Преглед на файлове

simd const operators

wraparound tests + wraparound fix in OpSimdXXArrGeneric
Nikolay Korovaiko преди 8 години
родител
ревизия
a6fed1aab5

+ 11 - 0
lib/Backend/IRBuilderAsmJs.cpp

@@ -2237,6 +2237,17 @@ IRBuilderAsmJs::BuildReg1Int1(Js::OpCodeAsmJs newOpcode, uint32 offset, Js::RegS
     }
 }
 
+void
+IRBuilderAsmJs::BuildFloat32x4_IntConst4(Js::OpCodeAsmJs newOpcode, uint32 offset, Js::RegSlot dstRegSlot, int C1, int C2, int C3, int C4)
+{
+    Assert(newOpcode == Js::OpCodeAsmJs::Simd128_LdC);
+    IR::RegOpnd * dstOpnd = BuildDstOpnd(dstRegSlot, TySimd128F4);
+    dstOpnd->SetValueType(ValueType::GetSimd128(ObjectType::Simd128Float32x4));
+    SIMDValue simdConst{ C1, C2, C3, C4 };
+    IR::Instr * instr = IR::Instr::New(Js::OpCode::Simd128_LdC, dstOpnd, IR::Simd128ConstOpnd::New(simdConst, TySimd128F4, m_func), m_func);
+    AddInstr(instr, offset);
+}
+
 void
 IRBuilderAsmJs::BuildInt1Const1(Js::OpCodeAsmJs newOpcode, uint32 offset, Js::RegSlot dstRegSlot, int constInt)
 {

+ 4 - 4
lib/Backend/LowerMDSharedSimd128.cpp

@@ -785,10 +785,10 @@ IR::Instr* LowererMD::Simd128LowerLdLane(IR::Instr *instr)
         IR::Instr* pInstr = nullptr;
         IR::RegOpnd* tmp = IR::RegOpnd::New(TyInt8, m_func);
 
-        // cmp      dst, -1
+        // cmp      dst, 0
         pInstr = IR::Instr::New(Js::OpCode::CMP, m_func);
         pInstr->SetSrc1(dst->UseWithNewType(laneType, m_func));
-        pInstr->SetSrc2(IR::IntConstOpnd::New(-1, laneType, m_func, true));
+        pInstr->SetSrc2(IR::IntConstOpnd::New(0, laneType, m_func, true));
         instr->InsertBefore(pInstr);
         Legalize(pInstr);
 
@@ -797,8 +797,8 @@ IR::Instr* LowererMD::Simd128LowerLdLane(IR::Instr *instr)
         instr->InsertBefore(pInstr);
         Legalize(pInstr);
 
-        // sete     tmp(TyInt8)
-        pInstr = IR::Instr::New(Js::OpCode::SETE, tmp, tmp, m_func);
+        // setne     tmp(TyInt8)
+        pInstr = IR::Instr::New(Js::OpCode::SETNE, tmp, tmp, m_func);
         instr->InsertBefore(pInstr);
         Legalize(pInstr);
 

+ 4 - 22
lib/Backend/amd64/LowererMDArch.cpp

@@ -1153,15 +1153,6 @@ LowererMDArch::LowerAsmJsLdElemHelper(IR::Instr * instr, bool isSimdLoad /*= fal
             IR::RegOpnd *tmp = IR::RegOpnd::New(cmpOpnd->GetType(), m_func);
             // MOV tmp, cmpOnd
             Lowerer::InsertMove(tmp, cmpOpnd, helperLabel);
-#ifdef ENABLE_WASM
-            if (m_func->GetJITFunctionBody()->IsWasmFunction() && src1->AsIndirOpnd()->GetOffset())
-            {
-                // ADD tmp, offset
-                Lowerer::InsertAdd(true, tmp, tmp, IR::IntConstOpnd::New((uint32)src1->AsIndirOpnd()->GetOffset(), tmp->GetType(), m_func, true), helperLabel);
-                // JB helper
-                Lowerer::InsertBranch(Js::OpCode::JB, helperLabel, helperLabel);
-            }
-#endif
             // ADD tmp, dataWidth
             Lowerer::InsertAdd(true, tmp, tmp, IR::IntConstOpnd::New((uint32)dataWidth, tmp->GetType(), m_func, true), helperLabel);
             // JB helper
@@ -1179,10 +1170,10 @@ LowererMDArch::LowerAsmJsLdElemHelper(IR::Instr * instr, bool isSimdLoad /*= fal
                 // MOV tmp, cmpOnd
                 Lowerer::InsertMove(tmp, cmpOpnd, helperLabel);
                 // ADD tmp, offset
-                Lowerer::InsertAdd(true, tmp, tmp, IR::IntConstOpnd::New((uint32)src1->AsIndirOpnd()->GetOffset(), tmp->GetType(), m_func, true), helperLabel);
+                Lowerer::InsertAdd(true, tmp, tmp, IR::IntConstOpnd::New((uint32)src1->AsIndirOpnd()->GetOffset(), tmp->GetType(), m_func), helperLabel);
                 // JB helper
                 Lowerer::InsertBranch(Js::OpCode::JB, helperLabel, helperLabel);
-                lowererMD->m_lowerer->InsertCompareBranch(cmpOpnd, instr->UnlinkSrc2(), Js::OpCode::BrGe_A, true, helperLabel, helperLabel);
+                lowererMD->m_lowerer->InsertCompareBranch(tmp, instr->UnlinkSrc2(), Js::OpCode::BrGe_A, true, helperLabel, helperLabel);
             }
             else
 #endif
@@ -1256,15 +1247,6 @@ LowererMDArch::LowerAsmJsStElemHelper(IR::Instr * instr, bool isSimdStore /*= fa
             IR::RegOpnd *tmp = IR::RegOpnd::New(cmpOpnd->GetType(), m_func);
             // MOV tmp, cmpOnd
             Lowerer::InsertMove(tmp, cmpOpnd, helperLabel);
-#ifdef ENABLE_WASM
-            if (m_func->GetJITFunctionBody()->IsWasmFunction() && dst->AsIndirOpnd()->GetOffset())
-            {
-                // ADD tmp, offset
-                Lowerer::InsertAdd(true, tmp, tmp, IR::IntConstOpnd::New((uint32)dst->AsIndirOpnd()->GetOffset(), tmp->GetType(), m_func, true), helperLabel);
-                // JB helper
-                Lowerer::InsertBranch(Js::OpCode::JB, helperLabel, helperLabel);
-            }
-#endif
             // ADD tmp, dataWidth
             Lowerer::InsertAdd(true, tmp, tmp, IR::IntConstOpnd::New((uint32)dataWidth, tmp->GetType(), m_func, true), helperLabel);
             // JB helper
@@ -1282,10 +1264,10 @@ LowererMDArch::LowerAsmJsStElemHelper(IR::Instr * instr, bool isSimdStore /*= fa
                 // MOV tmp, cmpOnd
                 Lowerer::InsertMove(tmp, cmpOpnd, helperLabel);
                 // ADD tmp, offset
-                Lowerer::InsertAdd(true, tmp, tmp, IR::IntConstOpnd::New((uint32)dst->AsIndirOpnd()->GetOffset(), tmp->GetType(), m_func, true), helperLabel);
+                Lowerer::InsertAdd(true, tmp, tmp, IR::IntConstOpnd::New((uint32)dst->AsIndirOpnd()->GetOffset(), tmp->GetType(), m_func), helperLabel);
                 // JB helper
                 Lowerer::InsertBranch(Js::OpCode::JB, helperLabel, helperLabel);
-                lowererMD->m_lowerer->InsertCompareBranch(cmpOpnd, instr->UnlinkSrc2(), Js::OpCode::BrGe_A, true, helperLabel, helperLabel);
+                lowererMD->m_lowerer->InsertCompareBranch(tmp, instr->UnlinkSrc2(), Js::OpCode::BrGe_A, true, helperLabel, helperLabel);
             }
             else
 #endif

+ 2 - 21
lib/Backend/i386/LowererMDArch.cpp

@@ -976,15 +976,6 @@ LowererMDArch::LowerAsmJsLdElemHelper(IR::Instr * instr, bool isSimdLoad /*= fal
         IR::RegOpnd *tmp = IR::RegOpnd::New(cmpOpnd->GetType(), m_func);
         // MOV tmp, cmpOnd
         Lowerer::InsertMove(tmp, cmpOpnd, helperLabel);
-#ifdef ENABLE_WASM
-        if (m_func->GetJITFunctionBody()->IsWasmFunction() && src1->AsIndirOpnd()->GetOffset()) //WASM.SIMD
-        {
-            // ADD tmp, offset
-            Lowerer::InsertAdd(true, tmp, tmp, IR::IntConstOpnd::New((uint32)src1->AsIndirOpnd()->GetOffset(), tmp->GetType(), m_func, true), helperLabel);
-            // JB helper
-            Lowerer::InsertBranch(Js::OpCode::JB, helperLabel, helperLabel);
-        }
-#endif
         // ADD tmp, dataWidth
         Lowerer::InsertAdd(true, tmp, tmp, IR::IntConstOpnd::New((uint32)dataWidth, tmp->GetType(), m_func, true), helperLabel);
         // JB helper
@@ -1001,7 +992,7 @@ LowererMDArch::LowerAsmJsLdElemHelper(IR::Instr * instr, bool isSimdLoad /*= fal
             IR::RegOpnd *tmp = IR::RegOpnd::New(cmpOpnd->GetType(), m_func);
             Lowerer::InsertMove(tmp, cmpOpnd, helperLabel);
             // ADD tmp, offset
-            Lowerer::InsertAdd(true, tmp, tmp, IR::IntConstOpnd::New((uint32)src1->AsIndirOpnd()->GetOffset(), tmp->GetType(), m_func, true), helperLabel);
+            Lowerer::InsertAdd(true, tmp, tmp, IR::IntConstOpnd::New((uint32)src1->AsIndirOpnd()->GetOffset(), tmp->GetType(), m_func), helperLabel);
             // JB helper
             Lowerer::InsertBranch(Js::OpCode::JB, helperLabel, helperLabel);
             lowererMD->m_lowerer->InsertCompareBranch(tmp, instr->UnlinkSrc2(), Js::OpCode::BrGe_A, true, helperLabel, helperLabel);
@@ -1062,16 +1053,6 @@ LowererMDArch::LowerAsmJsStElemHelper(IR::Instr * instr, bool isSimdStore /*= fa
         IR::RegOpnd *tmp = IR::RegOpnd::New(cmpOpnd->GetType(), m_func);
         // MOV tmp, cmpOnd
         Lowerer::InsertMove(tmp, cmpOpnd, helperLabel);
-
-#ifdef ENABLE_WASM
-        if (m_func->GetJITFunctionBody()->IsWasmFunction() && dst->AsIndirOpnd()->GetOffset()) //WASM.SIMD
-        {
-            // ADD tmp, offset
-            Lowerer::InsertAdd(true, tmp, tmp, IR::IntConstOpnd::New((uint32)dst->AsIndirOpnd()->GetOffset(), tmp->GetType(), m_func, true), helperLabel);
-            // JB helper
-            Lowerer::InsertBranch(Js::OpCode::JB, helperLabel, helperLabel);
-        }
-#endif
         // ADD tmp, dataWidth
         Lowerer::InsertAdd(true, tmp, tmp, IR::IntConstOpnd::New((uint32)dataWidth, tmp->GetType(), m_func, true), helperLabel);
         // JB helper
@@ -1088,7 +1069,7 @@ LowererMDArch::LowerAsmJsStElemHelper(IR::Instr * instr, bool isSimdStore /*= fa
             IR::RegOpnd *tmp = IR::RegOpnd::New(cmpOpnd->GetType(), m_func);
             Lowerer::InsertMove(tmp, cmpOpnd, helperLabel);
             // ADD tmp, offset
-            Lowerer::InsertAdd(true, tmp, tmp, IR::IntConstOpnd::New((uint32)dst->AsIndirOpnd()->GetOffset(), tmp->GetType(), m_func, true), helperLabel);
+            Lowerer::InsertAdd(true, tmp, tmp, IR::IntConstOpnd::New((uint32)dst->AsIndirOpnd()->GetOffset(), tmp->GetType(), m_func), helperLabel);
             // JB helper
             Lowerer::InsertBranch(Js::OpCode::JB, helperLabel, helperLabel);
             lowererMD->m_lowerer->InsertCompareBranch(tmp, instr->UnlinkSrc2(), Js::OpCode::BrGe_A, true, helperLabel, helperLabel);

+ 22 - 0
lib/Runtime/ByteCode/AsmJsByteCodeWriter.cpp

@@ -77,6 +77,23 @@ namespace Js
         }
         return false;
     }
+    
+    template <typename SizePolicy>
+    bool AsmJsByteCodeWriter::TryWriteFloat32x4_IntConst4(OpCodeAsmJs op, RegSlot R0, int C1, int C2, int C3, int C4)
+    {
+        OpLayoutT_Float32x4_IntConst4<SizePolicy> layout;
+        if (SizePolicy::Assign(layout.F4_0, R0) && 
+            SizePolicy::Assign(layout.C1, C1) && 
+            SizePolicy::Assign(layout.C2, C2) &&
+            SizePolicy::Assign(layout.C3, C3) &&
+            SizePolicy::Assign(layout.C4, C4)
+            )
+        {
+            m_byteCodeData.EncodeT<SizePolicy::LayoutEnum>(op, &layout, sizeof(layout), this);
+            return true;
+        }
+        return false;
+    }
     template <typename SizePolicy>
     bool AsmJsByteCodeWriter::TryWriteAsmReg2(OpCodeAsmJs op, RegSlot R0, RegSlot R1)
     {
@@ -500,6 +517,11 @@ namespace Js
         MULTISIZE_LAYOUT_WRITE(AsmReg19, op, R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, R13, R14, R15, R16, R17, R18);
     }
 
+    void AsmJsByteCodeWriter::WasmSimdConst(OpCodeAsmJs op, RegSlot R0, int C0, int C1, int C2, int C3)
+    {
+        MULTISIZE_LAYOUT_WRITE(Float32x4_IntConst4, op, R0, C0, C1, C2, C3);
+    }
+
     void AsmJsByteCodeWriter::AsmBr(ByteCodeLabel labelID, OpCodeAsmJs op)
     {
         CheckOpen();

+ 2 - 0
lib/Runtime/ByteCode/AsmJsByteCodeWriter.h

@@ -51,6 +51,7 @@ namespace Js
         IMP_IWASM void AsmReg17(OpCodeAsmJs op, RegSlot R0, RegSlot R1, RegSlot R2, RegSlot R3, RegSlot R4, RegSlot R5, RegSlot R6, RegSlot R7, RegSlot R8,
             RegSlot R9, RegSlot R10, RegSlot R11, RegSlot R12, RegSlot R13, RegSlot R14, RegSlot R15, RegSlot R16);
         IMP_IWASM void AsmSimdTypedArr(OpCodeAsmJs op, RegSlot value, uint32 slotIndex, uint8 dataWidth, ArrayBufferView::ViewType viewType, uint32 offset = 0);
+        IMP_IWASM void WasmSimdConst(OpCodeAsmJs op, RegSlot R0, int C0, int C1, int C2, int C3);
 
 #ifdef WASM_BYTECODE_WRITER
         // We don't want to expose api not in IWasmByteCodeWriter, but it's easier to compile them anyway
@@ -70,6 +71,7 @@ namespace Js
     private:
         void AsmJsUnsigned1(OpCodeAsmJs op, uint C1);
         template <typename SizePolicy> bool TryWriteAsmReg1(OpCodeAsmJs op, RegSlot R0);
+        template <typename SizePolicy> bool TryWriteFloat32x4_IntConst4(OpCodeAsmJs op, RegSlot R0, int C1, int C2, int C3, int C4);
         template <typename SizePolicy> bool TryWriteAsmReg2(OpCodeAsmJs op, RegSlot R0, RegSlot R1);
         template <typename SizePolicy> bool TryWriteAsmReg3(OpCodeAsmJs op, RegSlot R0, RegSlot R1, RegSlot R2);
         template <typename SizePolicy> bool TryWriteAsmReg4(OpCodeAsmJs op, RegSlot R0, RegSlot R1, RegSlot R2, RegSlot R3);

+ 1 - 0
lib/Runtime/ByteCode/IWasmByteCodeWriter.h

@@ -35,6 +35,7 @@ namespace Js
         virtual void AsmReg17(OpCodeAsmJs op, RegSlot R0, RegSlot R1, RegSlot R2, RegSlot R3, RegSlot R4, RegSlot R5, RegSlot R6, RegSlot R7, RegSlot R8,
             RegSlot R9, RegSlot R10, RegSlot R11, RegSlot R12, RegSlot R13, RegSlot R14, RegSlot R15, RegSlot R16) = 0;
         virtual void AsmSimdTypedArr(OpCodeAsmJs op, RegSlot value, uint32 slotIndex, uint8 dataWidth, ArrayBufferView::ViewType viewType, uint32 offset = 0) = 0;
+        virtual void WasmSimdConst(OpCodeAsmJs op, RegSlot R0, int C0, int C1, int C2, int C3) = 0;
 
         virtual void AsmSlot(OpCodeAsmJs op, RegSlot value, RegSlot instance, int32 slotId) = 0;
         virtual void AsmBr(ByteCodeLabel labelID, OpCodeAsmJs op = OpCodeAsmJs::AsmBr) = 0;

+ 1 - 0
lib/Runtime/ByteCode/LayoutTypesAsmJs.h

@@ -218,6 +218,7 @@ LAYOUT_TYPE_WMS_REG2  ( Int32x4_1Int8x16_1               , Int32x4, Int8x16)
 LAYOUT_TYPE_WMS_REG2  ( Int32x4_1Uint8x16_1              , Int32x4, Uint8x16)
 LAYOUT_TYPE_WMS_REG3  ( Int1Int32x4_1Int1                , Int, Int32x4, Int)
 LAYOUT_TYPE_WMS_REG4  ( Int32x4_2Int2                    , Int32x4, Int32x4, Int, Int)
+LAYOUT_TYPE_WMS_REG5  ( Float32x4_IntConst4              , Float32x4, IntConst, IntConst, IntConst, IntConst)
 // Float64x2
 // Disabled for now
 #if 0

+ 1 - 1
lib/Runtime/ByteCode/OpCodesSimd.h

@@ -209,7 +209,7 @@ MACRO_SIMD_EXTEND_WMS(Simd128_Not_B16         , Bool8x16_2                  , No
 MACRO_SIMD_EXTEND_WMS(Simd128_Neg_U4          , Uint32x4_2                  , None, None, 0)
 MACRO_SIMD_EXTEND_WMS(Simd128_Neg_U8          , Uint16x8_2                  , None, None, 0)
 MACRO_SIMD_EXTEND_WMS(Simd128_Neg_U16         , Uint8x16_2                  , None, None, 0)
-MACRO_SIMD_BACKEND_ONLY(Simd128_LdC           , Empty                       , None, OpCanCSE) // Load Simd128 const stack slot
+MACRO_SIMD_EXTEND_WMS(Simd128_LdC             , Float32x4_IntConst4         , None, OpCanCSE, 0) // Load Simd128 const stack slot
 
 #if 0
 MACRO_SIMD_ASMJS_ONLY_EXTEND_WMS(Simd128_Ld_D2, Float64x2_2, None, None)

+ 1 - 0
lib/Runtime/Language/InterpreterHandlerAsmJs.inl

@@ -614,6 +614,7 @@ EXDEF2_WMS( SIMD_U8_1toI16_1 , Simd128_FromUint16x8Bits_I16   , Js::SIMDUtils::F
 EXDEF2_WMS( SIMD_U16_1toI16_1, Simd128_FromUint8x16Bits_I16   , Js::SIMDUtils::FromSimdBits                   )
 
 EXDEF3_WMS( CUSTOM_ASMJS     , Simd128_FromFloat32x4_U4       , OP_SimdUint32x4FromFloat32x4 , Uint32x4_1Float32x4_1)
+EXDEF3_WMS( CUSTOM_ASMJS     , Simd128_LdC                    , OP_WasmSimdConst             , Float32x4_IntConst4)
 EXDEF2_WMS( SIMD_F4_1toU4_1  , Simd128_FromFloat32x4Bits_U4   , Js::SIMDUtils::FromSimdBits                   )
 EXDEF2_WMS( SIMD_I4_1toU4_1  , Simd128_FromInt32x4Bits_U4     , Js::SIMDUtils::FromSimdBits                   )
 EXDEF2_WMS( SIMD_I8_1toU4_1  , Simd128_FromInt16x8Bits_U4     , Js::SIMDUtils::FromSimdBits                   )

+ 11 - 6
lib/Runtime/Language/InterpreterStackFrame.cpp

@@ -7851,9 +7851,8 @@ const byte * InterpreterStackFrame::OP_ProfiledLoopBodyStart(const byte * ip)
     template <class T>
     void InterpreterStackFrame::OP_SimdLdArrGeneric(const unaligned T* playout)
     {
-        //Output::Print(_u("accessing offset %d and dataWidth %d\n"), playout->Offset, playout->DataWidth);
         Assert(playout->ViewType < Js::ArrayBufferView::TYPE_COUNT);
-        const uint64 index = ((uint32) GetRegRawInt(playout->SlotIndex) + playout->Offset /* WASM only */) & ArrayBufferView::ViewMask[playout->ViewType];
+        const uint64 index = ((uint64)(uint32) GetRegRawInt(playout->SlotIndex) + playout->Offset /* WASM only */) & (int64)(int)ArrayBufferView::ViewMask[playout->ViewType];
         JavascriptArrayBuffer* arr = *(JavascriptArrayBuffer**)GetNonVarReg(AsmJsFunctionMemory::ArrayBufferRegister);
         BYTE* buffer = arr->GetBuffer();
         uint8 dataWidth = playout->DataWidth;
@@ -7863,7 +7862,7 @@ const byte * InterpreterStackFrame::OP_ProfiledLoopBodyStart(const byte * ip)
         {
             JavascriptError::ThrowRangeError(scriptContext, JSERR_ArgumentOutOfRange, _u("Simd typed array access"));
         }
-        AsmJsSIMDValue *data = (AsmJsSIMDValue*)(buffer + (uint32)index);
+        AsmJsSIMDValue *data = (AsmJsSIMDValue*)(buffer + index);
         AsmJsSIMDValue value;
 
         value = SIMDUtils::SIMDLdData(data, dataWidth);
@@ -7895,8 +7894,7 @@ const byte * InterpreterStackFrame::OP_ProfiledLoopBodyStart(const byte * ip)
     void InterpreterStackFrame::OP_SimdStArrGeneric(const unaligned T* playout)
     {
         Assert(playout->ViewType < Js::ArrayBufferView::TYPE_COUNT);
-        //const uint32 index = (uint32)GetRegRawInt(playout->SlotIndex) & ArrayBufferView::ViewMask[playout->ViewType];
-        const uint64 index = ((uint32)GetRegRawInt(playout->SlotIndex) + playout->Offset /* WASM only */) & ArrayBufferView::ViewMask[playout->ViewType];
+        const uint64 index = ((uint64)(uint32)GetRegRawInt(playout->SlotIndex) + playout->Offset /* WASM only */) & (int64)(int)ArrayBufferView::ViewMask[playout->ViewType];
         JavascriptArrayBuffer* arr = *(JavascriptArrayBuffer**)GetNonVarReg(AsmJsFunctionMemory::ArrayBufferRegister);
         BYTE* buffer = arr->GetBuffer();
         uint8 dataWidth = playout->DataWidth;
@@ -7906,7 +7904,7 @@ const byte * InterpreterStackFrame::OP_ProfiledLoopBodyStart(const byte * ip)
         {
             JavascriptError::ThrowRangeError(scriptContext, JSERR_ArgumentOutOfRange, _u("Simd typed array access"));
         }
-        AsmJsSIMDValue *data = (AsmJsSIMDValue*)(buffer + (uint32)index);
+        AsmJsSIMDValue *data = (AsmJsSIMDValue*)(buffer + index);
         AsmJsSIMDValue value = GetRegRawSimd(srcReg);
         SIMDUtils::SIMDStData(data, value, dataWidth);
     }
@@ -7962,6 +7960,13 @@ const byte * InterpreterStackFrame::OP_ProfiledLoopBodyStart(const byte * ip)
         SetRegRawSimd(playout->U4_0, result);
     }
 
+    template <class T>
+    void InterpreterStackFrame::OP_WasmSimdConst(const unaligned T* playout)
+    {
+        AsmJsSIMDValue result{ playout->C1, playout->C2, playout->C3, playout->C4 };
+        SetRegRawSimd(playout->F4_0, result);
+    }
+    
     template <class T>
     void InterpreterStackFrame::OP_SimdInt16x8(const unaligned T* playout)
     {

+ 1 - 0
lib/Runtime/Language/InterpreterStackFrame.h

@@ -224,6 +224,7 @@ namespace Js
         template <class T> void OP_SimdStArrConstIndex(const unaligned T* playout);
         template <class T> void OP_SimdInt32x4FromFloat32x4(const unaligned T* playout);
         template <class T> void OP_SimdUint32x4FromFloat32x4(const unaligned T* playout);
+        template <class T> void OP_WasmSimdConst(const unaligned T* playout);
 
         template <class T> void OP_SimdInt16x8(const unaligned T* playout);
         template <class T> void OP_SimdInt8x16(const unaligned T* playout);

+ 1 - 0
lib/WasmReader/EmptyWasmByteCodeWriter.h

@@ -32,6 +32,7 @@ namespace Js
         virtual void AsmReg17(OpCodeAsmJs op, RegSlot R0, RegSlot R1, RegSlot R2, RegSlot R3, RegSlot R4, RegSlot R5, RegSlot R6, RegSlot R7, RegSlot R8,
             RegSlot R9, RegSlot R10, RegSlot R11, RegSlot R12, RegSlot R13, RegSlot R14, RegSlot R15, RegSlot R16) override {}
         virtual void AsmSimdTypedArr(OpCodeAsmJs op, RegSlot value, uint32 slotIndex, uint8 dataWidth, ArrayBufferView::ViewType viewType, uint32 offset = 0) override {};
+        virtual void WasmSimdConst(OpCodeAsmJs op, RegSlot R0, int C0, int C1, int C2, int C3) override {};
 
         virtual void AsmSlot(OpCodeAsmJs op, RegSlot value, RegSlot instance, int32 slotId) override {}
         virtual void AsmBr(ByteCodeLabel labelID, OpCodeAsmJs op = OpCodeAsmJs::AsmBr) override {}

+ 1 - 1
lib/WasmReader/WasmBinaryOpcodesSimd.h

@@ -54,7 +54,7 @@ WASM_SIGNATURE(M128X3, 3, WasmTypes::M128, WasmTypes::M128, WasmTypes::M128)
 WASM_SIGNATURE(M128_I_M128, 3, WasmTypes::M128, WasmTypes::M128, WasmTypes::I32)
 WASM_SIGNATURE(M128_M128, 2, WasmTypes::M128, WasmTypes::M128)
 
-WASM_MISC_OPCODE(M128Const, 0x100, Limit, true)
+WASM_MISC_OPCODE(M128Const, 0x100, Limit, false)
 WASM_SIMD_MEMREAD_OPCODE(M128Load, 0x101, M128_I, Simd128_LdArr_F4, Js::ArrayBufferView::TYPE_FLOAT32, 16, false)
 WASM_SIMD_MEMSTORE_OPCODE(M128Store, 0x102, M128_I, Simd128_StArr_F4, Js::ArrayBufferView::TYPE_FLOAT32, 16, false)
 WASM_MISC_OPCODE(I16Splat, 0x103, Limit, true)

+ 3 - 3
lib/WasmReader/WasmBinaryReader.cpp

@@ -435,8 +435,8 @@ WasmOp WasmBinaryReader::ReadOpCode()
             ThrowDecodingError(_u("WebAssembly SIMD support is not enabled"));
         }
 
-        UINT len;
-        UINT32 extOpCode = LEB128(len);
+        uint32 len;
+        uint32 extOpCode = LEB128(len) + wbM128Const;
         Assert((WasmOp)(extOpCode) == extOpCode);
         op = (WasmOp)extOpCode;
         m_funcState.count += len;
@@ -636,7 +636,7 @@ WasmBinaryReader::BrTableNode()
 void
 WasmBinaryReader::LaneNode()
 {
-    m_currentNode.lane.lane_index = ReadConst<uint8>();
+    m_currentNode.lane.index = ReadConst<uint8>();
     m_funcState.count++;
 }
 

+ 9 - 32
lib/WasmReader/WasmByteCodeGenerator.cpp

@@ -472,10 +472,7 @@ WasmBytecodeGenerator::EnregisterLocals()
                 break;
             case WasmTypes::M128:
             {
-                //@TODO maybe we should introduce REAL simd consts? 
-                EmitInfo arg1 = EmitLoadFloatConstIntoReg(0);
-                m_writer->AsmReg5(Js::OpCodeAsmJs::Simd128_FloatsToF4, m_locals[i].location, arg1.location, arg1.location, arg1.location, arg1.location);
-                ReleaseLocation(&arg1);
+                m_writer->WasmSimdConst(Js::OpCodeAsmJs::Simd128_LdC, m_locals[i].location, 0, 0, 0, 0);
                 break;
             }
             default:
@@ -634,7 +631,6 @@ WasmBytecodeGenerator::EmitExpr(WasmOp op)
         info.type = WasmTypes::Any;
         break;
 
-//WASM_EXTRACTLANE_OPCODE
 #define WASM_EXTRACTLANE_OPCODE(opname, opcode, sig, asmjsop, nyi) \
     case wb##opname: \
         info = EmitExtractLane(Js::OpCodeAsmJs::##asmjsop, WasmOpCodeSignatures::sig); \
@@ -802,22 +798,6 @@ WasmBytecodeGenerator::EmitConst(WasmTypes::WasmType type, WasmConstLitNode cnst
     return dst;
 }
 
-EmitInfo WasmBytecodeGenerator::EmitLoadIntConstIntoReg(uint val)
-{
-    Js::RegSlot tmpReg = GetRegisterSpace(WasmTypes::I32)->AcquireTmpRegister();
-    EmitInfo dst(tmpReg, WasmTypes::I32);
-    m_writer->AsmInt1Const1(Js::OpCodeAsmJs::Ld_IntConst, dst.location, val);
-    return dst;
-}
-
-EmitInfo WasmBytecodeGenerator::EmitLoadFloatConstIntoReg(uint val)
-{
-    Js::RegSlot tmpReg = GetRegisterSpace(WasmTypes::F32)->AcquireTmpRegister();
-    EmitInfo dst(tmpReg, WasmTypes::F32);
-    m_writer->AsmFloat1Const1(Js::OpCodeAsmJs::Ld_FltConst, dst.location, *reinterpret_cast<float *>(&val));
-    return dst;
-}
-
 void
 WasmBytecodeGenerator::EmitLoadConst(EmitInfo dst, WasmConstLitNode cnst)
 {
@@ -837,15 +817,7 @@ WasmBytecodeGenerator::EmitLoadConst(EmitInfo dst, WasmConstLitNode cnst)
         break;
     case WasmTypes::M128:
     {
-        EmitInfo arg1 = EmitLoadFloatConstIntoReg(cnst.v128[0]);
-        EmitInfo arg2 = EmitLoadFloatConstIntoReg(cnst.v128[1]);
-        EmitInfo arg3 = EmitLoadFloatConstIntoReg(cnst.v128[2]);
-        EmitInfo arg4 = EmitLoadFloatConstIntoReg(cnst.v128[3]);
-        m_writer->AsmReg5(Js::OpCodeAsmJs::Simd128_FloatsToF4, dst.location, arg1.location, arg2.location, arg3.location, arg4.location); //@TODO check if the order should be reversed
-        ReleaseLocation(&arg4); //FILO 
-        ReleaseLocation(&arg3);
-        ReleaseLocation(&arg2);
-        ReleaseLocation(&arg1);
+        m_writer->WasmSimdConst(Js::OpCodeAsmJs::Simd128_LdC, dst.location, cnst.v128[0], cnst.v128[1], cnst.v128[2], cnst.v128[3]);
         break;
     }
     default:
@@ -1294,7 +1266,7 @@ WasmBytecodeGenerator::EmitExtractLane(Js::OpCodeAsmJs op, const WasmTypes::Wasm
 {
     WasmTypes::WasmType resultType = signature[0];
     WasmTypes::WasmType simdArgType = signature[1];
-    const uint offset = GetReader()->m_currentNode.lane.lane_index;
+    const uint offset = GetReader()->m_currentNode.lane.index;
 
     if (offset >= numLanes(op)) 
     {
@@ -1313,7 +1285,6 @@ WasmBytecodeGenerator::EmitExtractLane(Js::OpCodeAsmJs op, const WasmTypes::Wasm
     m_writer->AsmReg3(op, resultReg, simdArgInfo.location, indexInfo.location);
     ReleaseLocation(&indexInfo);
     ReleaseLocation(&simdArgInfo);
-    //ReleaseLocation(&resultInfo);
     return resultInfo;
 }
 
@@ -1325,8 +1296,14 @@ WasmBytecodeGenerator::EmitSimdMemAccess(Js::OpCodeAsmJs op, const WasmTypes::Wa
     SetUsesMemory(0);
 
     const uint32 mask = Js::ArrayBufferView::ViewMask[viewType];
+    const uint alignment = GetReader()->m_currentNode.mem.alignment;
     const uint offset = GetReader()->m_currentNode.mem.offset;
 
+    if ((mask << 1) & (1 << alignment))
+    {
+        throw WasmCompilationException(_u("alignment must not be larger than natural"));
+    }
+
     EmitInfo rhsInfo;
     if (isStore)
     {

+ 1 - 4
lib/WasmReader/WasmByteCodeGenerator.h

@@ -150,14 +150,11 @@ namespace Wasm
         EmitInfo EmitBinExpr(Js::OpCodeAsmJs op, const WasmTypes::WasmType* signature);
         EmitInfo EmitUnaryExpr(Js::OpCodeAsmJs op, const WasmTypes::WasmType* signature);
         EmitInfo EmitExtractLane(Js::OpCodeAsmJs op, const WasmTypes::WasmType* signature);
-        
+
         EmitInfo EmitConst(WasmTypes::WasmType type, WasmConstLitNode cnst);
         void EmitLoadConst(EmitInfo dst, WasmConstLitNode cnst);
         WasmConstLitNode GetZeroCnst();
 
-        EmitInfo EmitLoadIntConstIntoReg(uint val);
-        EmitInfo EmitLoadFloatConstIntoReg(uint val);
-
         void EnregisterLocals();
         void ReleaseLocation(EmitInfo* info);
 

+ 1 - 2
lib/WasmReader/WasmParseTree.h

@@ -98,10 +98,9 @@ namespace Wasm
 
     struct WasmLaneNode
     {
-        uint lane_index;
+        uint index;
     };
 
-
     struct WasmVarNode
     {
         uint num;

BIN
test/wasm.simd/const.wasm


+ 104 - 0
test/wasm.simd/const.wast

@@ -0,0 +1,104 @@
+;;-------------------------------------------------------------------------------------------------------
+;; Copyright (C) Microsoft Corporation and contributors. All rights reserved.
+;; Licensed under the MIT license. See LICENSE.txt file in the project root for full license information.
+;;-------------------------------------------------------------------------------------------------------
+
+(module
+    (import "dummy" "memory" (memory 1))
+
+        (func (export "m128_const_1") (local $v1 m128)
+            (set_local $v1 
+            (m128.const 
+                (i32.const 0x0)
+                (i32.const 0x0)
+                (i32.const 0x0)
+                (i32.const 0x0) 
+                (i32.const 0xcc)
+                (i32.const 0xab)
+                (i32.const 0x0)
+                (i32.const 0xff)
+                (i32.const 0x0)
+                (i32.const 0x0)
+                (i32.const 0x0)
+                (i32.const 0x0) 
+                (i32.const 0x0)
+                (i32.const 0x0)
+                (i32.const 0x0)
+                (i32.const 0x0)
+            )
+            )
+            (m128.store offset=0 (i32.const 0) (get_local $v1))
+        )
+
+        (func (export "m128_const_2") (local $v1 m128)
+            (set_local $v1 
+            (m128.const 
+                (i32.const 0x0)
+                (i32.const 0xbc)
+                (i32.const 0x0)
+                (i32.const 0xa1)
+                (i32.const 0xff)
+                (i32.const 0xff)
+                (i32.const 0xff)
+                (i32.const 0xff) 
+                (i32.const 0x0)
+                (i32.const 0xff)
+                (i32.const 0x0)
+                (i32.const 0x0)
+                (i32.const 0x1)
+                (i32.const 0x0)
+                (i32.const 0x0)
+                (i32.const 0x0)
+            )
+            )
+            (m128.store offset=0 (i32.const 0) (get_local $v1))
+        )
+
+        (func (export "m128_const_3") (local $v1 m128)
+            (set_local $v1 
+            (m128.const 
+                (i32.const 0xff)
+                (i32.const 0xff)
+                (i32.const 0xff)
+                (i32.const 0xff) 
+                (i32.const 0xff)
+                (i32.const 0xff)
+                (i32.const 0xff)
+                (i32.const 0xff) 
+                (i32.const 0x0)
+                (i32.const 0x0)
+                (i32.const 0x0)
+                (i32.const 0x0) 
+                (i32.const 0xff)
+                (i32.const 0xff)
+                (i32.const 0xff)
+                (i32.const 0xff)
+            )
+            )
+            (m128.store offset=0 (i32.const 0) (get_local $v1))
+        )
+
+        (func (export "m128_const_4") (local $v1 m128)
+            (set_local $v1 
+            (m128.const 
+                (i32.const 0x0)
+                (i32.const 0x0)
+                (i32.const 0x0)
+                (i32.const 0x0)
+                (i32.const 0x0)
+                (i32.const 0x0)
+                (i32.const 0x0)
+                (i32.const 0x0)
+                (i32.const 0x0)
+                (i32.const 0x0)
+                (i32.const 0x0)
+                (i32.const 0x0)
+                (i32.const 0x0)
+                (i32.const 0x0)
+                (i32.const 0x0)
+                (i32.const 0x0)
+            )
+            )
+            (m128.store offset=0 (i32.const 0) (get_local $v1))
+        )
+)

+ 37 - 0
test/wasm.simd/constTests.js

@@ -0,0 +1,37 @@
+//-------------------------------------------------------------------------------------------------------
+// Copyright (C) Microsoft Corporation and contributors. All rights reserved.
+// Licensed under the MIT license. See LICENSE.txt file in the project root for full license information.
+//-------------------------------------------------------------------------------------------------------
+
+let passed = true;
+
+function assertEquals(expected, actual) {
+    if (expected != actual) {
+        passed = false;
+        throw `Expected ${expected}, received ${actual}`;
+    }
+}
+
+const INITIAL_SIZE = 1;
+const memObj = new WebAssembly.Memory({initial:INITIAL_SIZE});
+const arr = new Uint32Array (memObj.buffer);
+
+const module = new WebAssembly.Module(readbuffer('const.wasm'));
+const instance = new WebAssembly.Instance(module, { "dummy" : { "memory" : memObj } }).exports;
+
+let testIntLogicalOps = function (funcname, resultArr) {
+    const len = 4
+    instance[funcname]();
+    for (let i = 0; i < len; i++) {
+        assertEquals(arr[i], resultArr[i]);
+    }
+}
+
+testIntLogicalOps("m128_const_1", [0, 0xFF00ABCC, 0, 0]);
+testIntLogicalOps("m128_const_2", [0xA100BC00, 0xFFFFFFFF, 0xFF00, 0x1]);
+testIntLogicalOps("m128_const_3", [0xFFFFFFFF, 0xFFFFFFFF, 0, 0xFFFFFFFF]);
+testIntLogicalOps("m128_const_4", [0, 0, 0, 0]);
+
+if (passed) {
+    print("Passed");
+}

+ 2 - 0
test/wasm.simd/loadTests.js

@@ -63,6 +63,8 @@ function testLoadOpsForType(funcname, module, laneValues, expectedResults, start
     check(0, "instance.m128_load4", MEM_SIZE_IN_BYTES - 16);
     check("Access index is out of range", "instance.m128_load4", MEM_SIZE_IN_BYTES - 8);
     check("Access index is out of range", "instance.m128_load4", MEM_SIZE_IN_BYTES - 4);
+    check("Access index is out of range", "instance.m128_load4_offset", 0xFFFFFFFC);
+    check("Access index is out of range", "instance.m128_load4_offset", -1);
 
 }
 

BIN
test/wasm.simd/loads.wasm


+ 26 - 17
test/wasm.simd/loads.wast

@@ -1,3 +1,8 @@
+;;-------------------------------------------------------------------------------------------------------
+;; Copyright (C) Microsoft Corporation and contributors. All rights reserved.
+;; Licensed under the MIT license. See LICENSE.txt file in the project root for full license information.
+;;-------------------------------------------------------------------------------------------------------
+
 (module
     (import "dummy" "memory" (memory 1))
 
@@ -5,41 +10,45 @@
         (i32x4.extract_lane lane=0 (m128.load offset=0 align=4 (get_local $x)))
     )
 
-    (func (export "m128_load_test") (param $x i32) (local m128)       
+    (func (export "m128_load4_offset") (param $x i32) (result i32)
+        (i32x4.extract_lane lane=0 (m128.load offset=16 align=4 (get_local $x)))
+    )
+
+    (func (export "m128_load_test") (param $x i32) (local m128)
         (set_local 1 (m128.load offset=0 align=4 (get_local $x)))
-        (i32.store offset=0 (get_local $x) (i32.popcnt (i32x4.extract_lane lane=0 (get_local 1)))) 
+        (i32.store offset=0 (get_local $x) (i32.popcnt (i32x4.extract_lane lane=0 (get_local 1))))
         (set_local 1 (m128.load offset=0 align=4 (get_local $x)))
-        (i32.store offset=4 (get_local $x) (i32.popcnt (i32x4.extract_lane lane=1 (get_local 1)))) 
+        (i32.store offset=4 (get_local $x) (i32.popcnt (i32x4.extract_lane lane=1 (get_local 1))))
         (set_local 1 (m128.load offset=0 align=4 (get_local $x)))
-        (i32.store offset=8 (get_local $x) (i32.popcnt (i32x4.extract_lane lane=2 (get_local 1)))) 
+        (i32.store offset=8 (get_local $x) (i32.popcnt (i32x4.extract_lane lane=2 (get_local 1))))
         (set_local 1 (m128.load offset=0 align=4 (get_local $x)))
-        (i32.store offset=12 (get_local $x) (i32.popcnt (i32x4.extract_lane lane=3 (get_local 1)))) 
+        (i32.store offset=12 (get_local $x) (i32.popcnt (i32x4.extract_lane lane=3 (get_local 1))))
         ;;
         (set_local 1 (m128.load offset=20 align=4 (get_local $x)))
-        (i32.store offset=20 (get_local $x) (i32.popcnt (i32x4.extract_lane lane=0 (get_local 1)))) 
+        (i32.store offset=20 (get_local $x) (i32.popcnt (i32x4.extract_lane lane=0 (get_local 1))))
         (set_local 1 (m128.load offset=20 align=4 (get_local $x)))
-        (i32.store offset=24 (get_local $x) (i32.popcnt (i32x4.extract_lane lane=1 (get_local 1)))) 
+        (i32.store offset=24 (get_local $x) (i32.popcnt (i32x4.extract_lane lane=1 (get_local 1))))
         (set_local 1 (m128.load offset=20 align=4 (get_local $x)))
-        (i32.store offset=28 (get_local $x) (i32.popcnt (i32x4.extract_lane lane=2 (get_local 1)))) 
+        (i32.store offset=28 (get_local $x) (i32.popcnt (i32x4.extract_lane lane=2 (get_local 1))))
         (set_local 1 (m128.load offset=20 align=4 (get_local $x)))
-        (i32.store offset=32 (get_local $x) (i32.popcnt (i32x4.extract_lane lane=3 (get_local 1)))) 
+        (i32.store offset=32 (get_local $x) (i32.popcnt (i32x4.extract_lane lane=3 (get_local 1))))
         ;;
         (set_local 1 (m128.load offset=44 align=4 (get_local $x)))
-        (i32.store offset=44 (get_local $x) (i32.popcnt (i32x4.extract_lane lane=0 (get_local 1)))) 
+        (i32.store offset=44 (get_local $x) (i32.popcnt (i32x4.extract_lane lane=0 (get_local 1))))
         (set_local 1 (m128.load offset=44 align=4 (get_local $x)))
-        (i32.store offset=48 (get_local $x) (i32.popcnt (i32x4.extract_lane lane=1 (get_local 1)))) 
+        (i32.store offset=48 (get_local $x) (i32.popcnt (i32x4.extract_lane lane=1 (get_local 1))))
         (set_local 1 (m128.load offset=44 align=4 (get_local $x)))
-        (i32.store offset=52 (get_local $x) (i32.popcnt (i32x4.extract_lane lane=2 (get_local 1)))) 
+        (i32.store offset=52 (get_local $x) (i32.popcnt (i32x4.extract_lane lane=2 (get_local 1))))
         (set_local 1 (m128.load offset=44 align=4 (get_local $x)))
-        (i32.store offset=56 (get_local $x) (i32.popcnt (i32x4.extract_lane lane=3 (get_local 1)))) 
+        (i32.store offset=56 (get_local $x) (i32.popcnt (i32x4.extract_lane lane=3 (get_local 1))))
         ;;
         (set_local 1 (m128.load offset=68 align=4 (get_local $x)))
-        (i32.store offset=68 (get_local $x) (i32.popcnt (i32x4.extract_lane lane=0 (get_local 1)))) 
+        (i32.store offset=68 (get_local $x) (i32.popcnt (i32x4.extract_lane lane=0 (get_local 1))))
         (set_local 1 (m128.load offset=68 align=4 (get_local $x)))
-        (i32.store offset=72 (get_local $x) (i32.popcnt (i32x4.extract_lane lane=1 (get_local 1)))) 
+        (i32.store offset=72 (get_local $x) (i32.popcnt (i32x4.extract_lane lane=1 (get_local 1))))
         (set_local 1 (m128.load offset=68 align=4 (get_local $x)))
-        (i32.store offset=76 (get_local $x) (i32.popcnt (i32x4.extract_lane lane=2 (get_local 1)))) 
+        (i32.store offset=76 (get_local $x) (i32.popcnt (i32x4.extract_lane lane=2 (get_local 1))))
         (set_local 1 (m128.load offset=68 align=4 (get_local $x)))
-        (i32.store offset=80 (get_local $x) (i32.popcnt (i32x4.extract_lane lane=3 (get_local 1)))) 
+        (i32.store offset=80 (get_local $x) (i32.popcnt (i32x4.extract_lane lane=3 (get_local 1))))
     )
 )

+ 12 - 0
test/wasm.simd/rlexe.xml

@@ -24,4 +24,16 @@
     <compile-flags> -maic:0 -wasm -wasmsimd</compile-flags>
   </default>
 </test>
+<test>
+  <default>
+    <files>constTests.js</files>
+    <compile-flags> -wasm -wasmsimd</compile-flags>
+  </default>
+</test>
+<test>
+  <default>
+    <files>constTests.js</files>
+    <compile-flags> -wasm -wasmsimd -maic:0</compile-flags>
+  </default>
+</test>
 </regress-exe>

+ 2 - 0
test/wasm.simd/storeTests.js

@@ -60,6 +60,8 @@ const MEM_SIZE_IN_BYTES = 1024 * 64;
 check("RangeError", "instance.m128_store4", MEM_SIZE_IN_BYTES - 12, 777, 888, 999, 1111);
 check("RangeError", "instance.m128_store4", MEM_SIZE_IN_BYTES - 8, 777, 888, 999, 1111);
 check("RangeError", "instance.m128_store4", MEM_SIZE_IN_BYTES - 4, 777, 888, 999, 1111);
+check("RangeError", "instance.m128_store4_offset", -1, 777, 888, 999, 1111);
+check("RangeError", "instance.m128_store4_offset", 0xFFFFFFFC, 777, 888, 999, 1111);
 check(undefined, "instance.m128_store4", MEM_SIZE_IN_BYTES - 16, 777, 888, 999, 1111);
 
 if (passed) {

BIN
test/wasm.simd/stores.wasm


+ 11 - 1
test/wasm.simd/stores.wast

@@ -1,8 +1,18 @@
+;;-------------------------------------------------------------------------------------------------------
+;; Copyright (C) Microsoft Corporation and contributors. All rights reserved.
+;; Licensed under the MIT license. See LICENSE.txt file in the project root for full license information.
+;;-------------------------------------------------------------------------------------------------------
+
 (module
     (import "dummy" "memory" (memory 1))
 
-    (func (export "m128_store4") (param i32 i32 i32 i32 i32) (local m128) 
+    (func (export "m128_store4") (param i32 i32 i32 i32 i32) (local m128)
         (set_local 5 (m128.load offset=0 align=4 (i32.const 16)))
         (m128.store offset=0 align=4 (get_local 0) (get_local 5))
     )
+
+    (func (export "m128_store4_offset") (param i32 i32 i32 i32 i32) (local m128)
+        (set_local 5 (m128.load offset=0 align=4 (i32.const 16)))
+        (m128.store offset=16 align=4 (get_local 0) (get_local 5))
+    )
 )