Преглед на файлове

[MERGE #4470 @Cellule] WASM: Atomic load/store

Merge pull request #4470 from Cellule:wasm/atomics

Implement wasm atomic load/store operations.
Michael Ferris преди 8 години
родител
ревизия
458c86a5e4
променени са 68 файла, в които са добавени 1401 реда и са изтрити 531 реда
  1. 1 0
      lib/Backend/GlobOpt.cpp
  2. 22 0
      lib/Backend/GlobOptExpr.cpp
  3. 37 85
      lib/Backend/IRBuilderAsmJs.cpp
  4. 6 0
      lib/Backend/JnHelperMethod.cpp
  5. 2 0
      lib/Backend/JnHelperMethodList.h
  6. 99 4
      lib/Backend/Lower.cpp
  7. 4 3
      lib/Backend/Lower.h
  8. 50 3
      lib/Backend/LowerMDShared.cpp
  9. 3 2
      lib/Backend/LowerMDShared.h
  10. 25 1
      lib/Backend/amd64/EncoderMD.cpp
  11. 46 1
      lib/Backend/amd64/LowererMDArch.cpp
  12. 3 1
      lib/Backend/amd64/LowererMDArch.h
  13. 6 1
      lib/Backend/amd64/MdOpCodes.h
  14. 2 0
      lib/Backend/amd64/X64Encode.h
  15. 3 2
      lib/Backend/arm/LowerMD.h
  16. 3 1
      lib/Backend/arm64/LowerMD.h
  17. 17 10
      lib/Backend/i386/EncoderMD.cpp
  18. 135 19
      lib/Backend/i386/LowererMDArch.cpp
  19. 3 1
      lib/Backend/i386/LowererMDArch.h
  20. 2 0
      lib/Backend/i386/MdOpCodes.h
  21. 2 0
      lib/Backend/i386/X86Encode.h
  22. 2 0
      lib/Common/ConfigFlagsList.h
  23. 1 0
      lib/Parser/rterrors.h
  24. 2 0
      lib/Runtime/ByteCode/AsmJsByteCodeDumper.cpp
  25. 3 0
      lib/Runtime/ByteCode/OpCodes.h
  26. 2 0
      lib/Runtime/ByteCode/OpCodesAsmJs.h
  27. 38 0
      lib/Runtime/Language/AsmJsArrayBufferViews.h
  28. 5 19
      lib/Runtime/Language/AsmJsByteCodeGenerator.cpp
  29. 12 31
      lib/Runtime/Language/AsmJsTypes.h
  30. 1 0
      lib/Runtime/Language/Chakra.Runtime.Language.vcxproj
  31. 3 2
      lib/Runtime/Language/Chakra.Runtime.Language.vcxproj.filters
  32. 2 0
      lib/Runtime/Language/InterpreterHandlerAsmJs.inl
  33. 133 128
      lib/Runtime/Language/InterpreterStackFrame.cpp
  34. 4 6
      lib/Runtime/Language/InterpreterStackFrame.h
  35. 17 1
      lib/Runtime/Language/JavascriptConversion.cpp
  36. 3 0
      lib/Runtime/Language/JavascriptConversion.h
  37. 6 6
      lib/Runtime/Language/WAsmjsUtils.cpp
  38. 2 1
      lib/Runtime/Language/WAsmjsUtils.h
  39. 5 1
      lib/Runtime/Library/AtomicsObject.cpp
  40. 158 0
      lib/Runtime/Library/AtomicsOperations.cpp
  41. 24 0
      lib/Runtime/Library/AtomicsOperations.h
  42. 1 0
      lib/Runtime/Library/CMakeLists.txt
  43. 2 0
      lib/Runtime/Library/Chakra.Runtime.Library.vcxproj
  44. 3 1
      lib/Runtime/Library/Chakra.Runtime.Library.vcxproj.filters
  45. 8 5
      lib/Runtime/Library/JavascriptFunction.cpp
  46. 63 137
      lib/Runtime/Library/TypedArray.cpp
  47. 24 24
      lib/Runtime/Library/TypedArray.h
  48. 1 0
      lib/Runtime/Library/WabtInterface.cpp
  49. 73 1
      lib/WasmReader/WasmBinaryOpCodes.h
  50. 30 20
      lib/WasmReader/WasmByteCodeGenerator.cpp
  51. 2 1
      lib/WasmReader/WasmByteCodeGenerator.h
  52. 1 1
      lib/wabt/src/validator.cc
  53. 36 0
      pal/inc/pal.h
  54. 1 0
      test/WasmSpec/baselines/atomic_load.baseline
  55. 1 0
      test/WasmSpec/baselines/atomic_store.baseline
  56. 1 0
      test/WasmSpec/baselines/chakra_atomic_load.baseline
  57. 1 0
      test/WasmSpec/baselines/chakra_atomic_store.baseline
  58. 29 0
      test/WasmSpec/chakra/chakra_atomic_load.wast
  59. 28 0
      test/WasmSpec/chakra/chakra_atomic_store.wast
  60. 0 0
      test/WasmSpec/chakra_generated/chakra_extends_i32.wast
  61. 0 0
      test/WasmSpec/chakra_generated/chakra_extends_i64.wast
  62. 0 0
      test/WasmSpec/chakra_generated/chakra_i32.wast
  63. 0 0
      test/WasmSpec/chakra_generated/chakra_i64.wast
  64. 13 1
      test/WasmSpec/convert-test-suite/config.json
  65. 1 1
      test/WasmSpec/convert-test-suite/index.js
  66. 50 0
      test/WasmSpec/features/threads/atomic_load.wast
  67. 68 0
      test/WasmSpec/features/threads/atomic_store.wast
  68. 70 10
      test/WasmSpec/rlexe.xml

+ 1 - 0
lib/Backend/GlobOpt.cpp

@@ -15650,6 +15650,7 @@ swap_srcs:
     case Js::OpCode::TrapIfMinIntOverNegOne:
     case Js::OpCode::TrapIfTruncOverflow:
     case Js::OpCode::TrapIfZero:
+    case Js::OpCode::TrapIfUnalignedAccess:
     case Js::OpCode::FromVar:
     case Js::OpCode::Conv_Prim:
     case Js::OpCode::LdC_A_I4:

+ 22 - 0
lib/Backend/GlobOptExpr.cpp

@@ -435,6 +435,28 @@ GlobOpt::OptimizeChecks(IR::Instr * const instr)
         }
         break;
     }
+    case Js::OpCode::TrapIfUnalignedAccess:
+        if (src1 && src1->IsImmediateOpnd())
+        {
+            int64 val = src1->GetImmediateValue(func);
+            Assert(src2->IsImmediateOpnd());
+            uint32 cmpValue = (uint32)src2->GetImmediateValue(func);
+            uint32 mask = src2->GetSize() - 1;
+            Assert((cmpValue & ~mask) == 0);
+
+            if (((uint32)val & mask) == cmpValue)
+            {
+                instr->FreeSrc2();
+                instr->m_opcode = Js::OpCode::Ld_I4;
+            }
+            else
+            {
+                TransformIntoUnreachable(WASMERR_UnalignedAtomicAccess, instr);
+                InsertByteCodeUses(instr);
+                RemoveCodeAfterNoFallthroughInstr(instr); //remove dead code
+            }
+        }
+        break;
     default:
         return;
     }

+ 37 - 85
lib/Backend/IRBuilderAsmJs.cpp

@@ -1420,64 +1420,20 @@ IRBuilderAsmJs::BuildStartCall(Js::OpCodeAsmJs newOpcode, uint32 offset)
 void
 IRBuilderAsmJs::InitializeMemAccessTypeInfo(Js::ArrayBufferView::ViewType viewType, _Out_ MemAccessTypeInfo * typeInfo)
 {
-    typeInfo->type = TyInt32;
-    typeInfo->valueRegType = WAsmJs::INT32;
-
+    AssertOrFailFast(typeInfo);
+    
     switch (viewType)
     {
-    case Js::ArrayBufferView::TYPE_INT8_TO_INT64:
-        typeInfo->valueRegType = WAsmJs::INT64;
-    case Js::ArrayBufferView::TYPE_INT8:
-        typeInfo->arrayType = ValueType::GetObject(ObjectType::Int8Array);
-        typeInfo->type = TyInt8;
-        break;
-    case Js::ArrayBufferView::TYPE_UINT8_TO_INT64:
-        typeInfo->valueRegType = WAsmJs::INT64;
-    case Js::ArrayBufferView::TYPE_UINT8:
-        typeInfo->arrayType = ValueType::GetObject(ObjectType::Uint8Array);
-        typeInfo->type = TyUint8;
-        break;
-    case Js::ArrayBufferView::TYPE_INT16_TO_INT64:
-        typeInfo->valueRegType = WAsmJs::INT64;
-    case Js::ArrayBufferView::TYPE_INT16:
-        typeInfo->arrayType = ValueType::GetObject(ObjectType::Int16Array);
-        typeInfo->type = TyInt16;
-        break;
-    case Js::ArrayBufferView::TYPE_UINT16_TO_INT64:
-        typeInfo->valueRegType = WAsmJs::INT64;
-    case Js::ArrayBufferView::TYPE_UINT16:
-        typeInfo->arrayType = ValueType::GetObject(ObjectType::Uint16Array);
-        typeInfo->type = TyUint16;
-        break;
-    case Js::ArrayBufferView::TYPE_INT32_TO_INT64:
-        typeInfo->valueRegType = WAsmJs::INT64;
-    case Js::ArrayBufferView::TYPE_INT32:
-        typeInfo->arrayType = ValueType::GetObject(ObjectType::Int32Array);
-        typeInfo->type = TyInt32;
-        break;
-    case Js::ArrayBufferView::TYPE_UINT32_TO_INT64:
-        typeInfo->valueRegType = WAsmJs::INT64;
-    case Js::ArrayBufferView::TYPE_UINT32:
-        typeInfo->arrayType = ValueType::GetObject(ObjectType::Uint32Array);
-        typeInfo->type = TyUint32;
-        break;
-    case Js::ArrayBufferView::TYPE_FLOAT32:
-        typeInfo->valueRegType = WAsmJs::FLOAT32;
-        typeInfo->arrayType = ValueType::GetObject(ObjectType::Float32Array);
-        typeInfo->type = TyFloat32;
-        break;
-    case Js::ArrayBufferView::TYPE_FLOAT64:
-        typeInfo->valueRegType = WAsmJs::FLOAT64;
-        typeInfo->arrayType = ValueType::GetObject(ObjectType::Float64Array);
-        typeInfo->type = TyFloat64;
-        break;
-    case Js::ArrayBufferView::TYPE_INT64:
-        typeInfo->valueRegType = WAsmJs::INT64;
-        typeInfo->arrayType = ValueType::GetObject(ObjectType::Int64Array);
-        typeInfo->type = TyInt64;
+#define ARRAYBUFFER_VIEW(name, align, RegType, MemType, irSuffix) \
+    case Js::ArrayBufferView::TYPE_##name: \
+        typeInfo->valueRegType = WAsmJs::FromPrimitiveType<RegType>(); \
+        typeInfo->type = Ty##irSuffix;\
+        typeInfo->arrayType = ValueType::GetObject(ObjectType::##irSuffix##Array); \
+        Assert(TySize[Ty##irSuffix] == (1<<align)); \
         break;
+#include "Language/AsmJsArrayBufferViews.h"
     default:
-        Assume(UNREACHED);
+        AssertOrFailFast(UNREACHED);
     }
 }
 
@@ -1493,11 +1449,15 @@ IRBuilderAsmJs::BuildWasmMemAccess(Js::OpCodeAsmJs newOpcode, uint32 offset)
 void
 IRBuilderAsmJs::BuildWasmMemAccess(Js::OpCodeAsmJs newOpcode, uint32 offset, uint32 slotIndex, Js::RegSlot value, uint32 constOffset, Js::ArrayBufferView::ViewType viewType)
 {
-    bool isLd = newOpcode == Js::OpCodeAsmJs::LdArrWasm;
-    Js::OpCode op = isLd ? Js::OpCode::LdArrViewElemWasm : Js::OpCode::StArrViewElem;
+    bool isAtomic = newOpcode == Js::OpCodeAsmJs::StArrAtomic || newOpcode == Js::OpCodeAsmJs::LdArrAtomic;
+    bool isLd = newOpcode == Js::OpCodeAsmJs::LdArrWasm || newOpcode == Js::OpCodeAsmJs::LdArrAtomic;
+    Js::OpCode op = isAtomic ? 
+        isLd ? Js::OpCode::LdAtomicWasm : Js::OpCode::StAtomicWasm
+        : isLd ? Js::OpCode::LdArrViewElemWasm : Js::OpCode::StArrViewElem;
 
     MemAccessTypeInfo typeInfo;
     InitializeMemAccessTypeInfo(viewType, &typeInfo);
+    const uint32 memAccessSize = TySize[typeInfo.type];
 
     Js::RegSlot valueRegSlot = GetRegSlotFromTypedReg(value, typeInfo.valueRegType);
     IR::Instr * instr = nullptr;
@@ -1506,6 +1466,22 @@ IRBuilderAsmJs::BuildWasmMemAccess(Js::OpCodeAsmJs newOpcode, uint32 offset, uin
 
     Js::RegSlot indexRegSlot = GetRegSlotFromIntReg(slotIndex);
     IR::RegOpnd * indexOpnd = BuildSrcOpnd(indexRegSlot, TyUint32);
+    if (isAtomic && memAccessSize > 1)
+    {
+        const uint32 mask = memAccessSize - 1;
+        // We need (constOffset + index) & mask == 0
+        // Since we know constOffset ahead of time
+        // what we need to check is index & mask == (memAccessSize - (constOffset & mask)) & mask
+        const uint32 offseted = constOffset & mask;
+        // In this IntContOpnd, the value is what the index&mask should be, the type carries the size of the access
+        IR::Opnd* offsetedOpnd = IR::IntConstOpnd::NewFromType((memAccessSize - offseted) & mask, typeInfo.type, m_func);
+        IR::RegOpnd* intermediateIndex = IR::RegOpnd::New(TyUint32, m_func);
+        instr = IR::Instr::New(Js::OpCode::TrapIfUnalignedAccess, intermediateIndex, indexOpnd, offsetedOpnd, m_func);
+        AddInstr(instr, offset);
+
+        // Create dependency between load/store and trap through the index
+        indexOpnd = intermediateIndex;
+    }
     indirOpnd = IR::IndirOpnd::New(BuildSrcOpnd(AsmJsRegSlots::BufferReg, TyVar), constOffset, typeInfo.type, m_func);
     indirOpnd->SetIndexOpnd(indexOpnd);
     indirOpnd->GetBaseOpnd()->SetValueType(typeInfo.arrayType);
@@ -6877,36 +6853,12 @@ IRBuilderAsmJs::BuildAsmSimdTypedArr(Js::OpCodeAsmJs newOpcode, uint32 offset, u
 
     switch (viewType)
     {
-    case Js::ArrayBufferView::TYPE_INT8:
-        arrayType = ValueType::GetObject(ObjectType::Int8Array);
-        break;
-    case Js::ArrayBufferView::TYPE_UINT8:
-        arrayType = ValueType::GetObject(ObjectType::Uint8Array);
-        break;
-    case Js::ArrayBufferView::TYPE_INT16:
-        arrayType = ValueType::GetObject(ObjectType::Int16Array);
-        mask = (uint32)~1;
-        break;
-    case Js::ArrayBufferView::TYPE_UINT16:
-        arrayType = ValueType::GetObject(ObjectType::Uint16Array);
-        mask = (uint32)~1;
-        break;
-    case Js::ArrayBufferView::TYPE_INT32:
-        arrayType = ValueType::GetObject(ObjectType::Int32Array);
-        mask = (uint32)~3;
-        break;
-    case Js::ArrayBufferView::TYPE_UINT32:
-        arrayType = ValueType::GetObject(ObjectType::Uint32Array);
-        mask = (uint32)~3;
-        break;
-    case Js::ArrayBufferView::TYPE_FLOAT32:
-        arrayType = ValueType::GetObject(ObjectType::Float32Array);
-        mask = (uint32)~3;
-        break;
-    case Js::ArrayBufferView::TYPE_FLOAT64:
-        arrayType = ValueType::GetObject(ObjectType::Float64Array);
-        mask = (uint32)~7;
+#define ARRAYBUFFER_VIEW(name, align, RegType, MemType, irSuffix) \
+    case Js::ArrayBufferView::TYPE_##name: \
+        mask = ARRAYBUFFER_VIEW_MASK(align); \
+        arrayType = ValueType::GetObject(ObjectType::##irSuffix##Array); \
         break;
+#include "Language/AsmJsArrayBufferViews.h"
     default:
         Assert(UNREACHED);
     }

+ 6 - 0
lib/Backend/JnHelperMethod.cpp

@@ -176,6 +176,12 @@ DECLSPEC_GUARDIGNORE  _NOINLINE intptr_t GetNonTableMethodAddress(ThreadContextI
 
     case HelperDirectMath_Tan:
         return ShiftAddr(context, (double(*)(double))__libm_sse2_tan);
+
+    case HelperAtomicStore64:
+        return ShiftAddr(context, (double(*)(double))InterlockedExchange64);
+
+    case HelperMemoryBarrier:
+        return ShiftAddr(context, (void(*)())MemoryBarrier);
 #endif
 
     case HelperDirectMath_FloorDb:

+ 2 - 0
lib/Backend/JnHelperMethodList.h

@@ -582,6 +582,8 @@ HELPERCALL(DirectMath_Int64Rol , (int64(*)(int64,int64)) Wasm::WasmMath::Rol<int
 HELPERCALL(DirectMath_Int64Ror , (int64(*)(int64,int64)) Wasm::WasmMath::Ror<int64>, 0)
 HELPERCALL(DirectMath_Int64Clz , (int64(*)(int64)) Wasm::WasmMath::Clz<int64>, 0)
 HELPERCALL(DirectMath_Int64Ctz , (int64(*)(int64)) Wasm::WasmMath::Ctz<int64>, 0)
+HELPERCALL(AtomicStore64, nullptr, 0)
+HELPERCALL(MemoryBarrier, nullptr, 0)
 #elif defined(_M_X64)
 // AMD64 regular CRT calls -- on AMD64 calling convention is already what we want -- args in XMM0, XMM1 rather than on stack which is slower.
 HELPERCALL(DirectMath_Acos, nullptr, 0)

+ 99 - 4
lib/Backend/Lower.cpp

@@ -1128,6 +1128,9 @@ Lowerer::LowerRange(IR::Instr *instrStart, IR::Instr *instrEnd, bool defaultDoFa
         case Js::OpCode::TrapIfZero:
             LowerTrapIfZero(instr);
             break;
+        case Js::OpCode::TrapIfUnalignedAccess:
+            instrPrev = LowerTrapIfUnalignedAccess(instr);
+            break;
         case Js::OpCode::DivU_I4:
         case Js::OpCode::Div_I4:
             this->LowerDivI4(instr);
@@ -1541,10 +1544,18 @@ Lowerer::LowerRange(IR::Instr *instrStart, IR::Instr *instrEnd, bool defaultDoFa
             instrPrev = LowerLdArrViewElem(instr);
             break;
 
+        case Js::OpCode::StAtomicWasm:
+            instrPrev = LowerStAtomicsWasm(instr);
+            break;
+
         case Js::OpCode::StArrViewElem:
             instrPrev = LowerStArrViewElem(instr);
             break;
 
+        case Js::OpCode::LdAtomicWasm:
+            instrPrev = LowerLdAtomicsWasm(instr);
+            break;
+
         case Js::OpCode::LdArrViewElemWasm:
             instrPrev = LowerLdArrViewElemWasm(instr);
             break;
@@ -9172,7 +9183,7 @@ Lowerer::LowerLdArrViewElem(IR::Instr * instr)
 }
 
 IR::Instr *
-Lowerer::LowerWasmMemOp(IR::Instr * instr, IR::Opnd *addrOpnd)
+Lowerer::LowerWasmArrayBoundsCheck(IR::Instr * instr, IR::Opnd *addrOpnd)
 {
     uint32 offset = addrOpnd->AsIndirOpnd()->GetOffset();
 
@@ -9188,7 +9199,7 @@ Lowerer::LowerWasmMemOp(IR::Instr * instr, IR::Opnd *addrOpnd)
     }
     else
     {
-        return m_lowererMD.LowerWasmMemOp(instr, addrOpnd);
+        return m_lowererMD.LowerWasmArrayBoundsCheck(instr, addrOpnd);
     }
 }
 
@@ -9208,7 +9219,7 @@ Lowerer::LowerLdArrViewElemWasm(IR::Instr * instr)
     Assert(!dst->IsFloat32() || src1->IsFloat32());
     Assert(!dst->IsFloat64() || src1->IsFloat64());
 
-    IR::Instr * done = LowerWasmMemOp(instr, src1);
+    IR::Instr * done = LowerWasmArrayBoundsCheck(instr, src1);
     IR::Instr* newMove = InsertMove(dst, src1, done);
 
 #if ENABLE_FAST_ARRAYBUFFER
@@ -9218,6 +9229,7 @@ Lowerer::LowerLdArrViewElemWasm(IR::Instr * instr)
 #else
     Unused(newMove);
 #endif
+
     instr->Remove();
     return instrPrev;
 #else
@@ -9379,6 +9391,57 @@ Lowerer::LowerMemOp(IR::Instr * instr)
     return instrPrev;
 }
 
+IR::Instr*
+Lowerer::LowerStAtomicsWasm(IR::Instr* instr)
+{
+#ifdef ENABLE_WASM
+    Assert(m_func->GetJITFunctionBody()->IsWasmFunction());
+    Assert(instr);
+    Assert(instr->m_opcode == Js::OpCode::StAtomicWasm);
+
+    IR::Instr * instrPrev = instr->m_prev;
+
+    IR::Opnd * dst = instr->GetDst();
+    IR::Opnd * src1 = instr->GetSrc1();
+
+    Assert(IRType_IsNativeInt(dst->GetType()));
+
+    IR::Instr * done = LowerWasmArrayBoundsCheck(instr, dst);
+    m_lowererMD.LowerAtomicStore(dst, src1, done);
+
+    instr->Remove();
+    return instrPrev;
+#else
+    Assert(UNREACHED);
+    return instr;
+#endif
+}
+
+IR::Instr * Lowerer::LowerLdAtomicsWasm(IR::Instr * instr)
+{
+#ifdef ENABLE_WASM
+    Assert(m_func->GetJITFunctionBody()->IsWasmFunction());
+    Assert(instr);
+    Assert(instr->m_opcode == Js::OpCode::LdAtomicWasm);
+
+    IR::Instr * instrPrev = instr->m_prev;
+
+    IR::Opnd * dst = instr->GetDst();
+    IR::Opnd * src1 = instr->GetSrc1();
+
+    Assert(IRType_IsNativeInt(dst->GetType()));
+
+    IR::Instr * done = LowerWasmArrayBoundsCheck(instr, src1);
+    m_lowererMD.LowerAtomicLoad(dst, src1, done);
+
+    instr->Remove();
+    return instrPrev;
+#else
+    Assert(UNREACHED);
+    return instr;
+#endif
+}
+
 IR::Instr *
 Lowerer::LowerStArrViewElem(IR::Instr * instr)
 {
@@ -9405,7 +9468,7 @@ Lowerer::LowerStArrViewElem(IR::Instr * instr)
 
     if (m_func->GetJITFunctionBody()->IsWasmFunction())
     {
-        done = LowerWasmMemOp(instr, dst);
+        done = LowerWasmArrayBoundsCheck(instr, dst);
     }
     else if (offset < 0)
     {
@@ -25386,6 +25449,38 @@ Lowerer::LowerTrapIfZero(IR::Instr * const instr)
     LowererMD::ChangeToAssign(instr);
 }
 
+IR::Instr*
+Lowerer::LowerTrapIfUnalignedAccess(IR::Instr * const instr)
+{
+    IR::Opnd* src1 = instr->GetSrc1();
+    IR::Opnd* src2 = instr->UnlinkSrc2();
+    Assert(instr);
+    Assert(instr->m_opcode == Js::OpCode::TrapIfUnalignedAccess);
+    Assert(src1 && !src1->IsVar());
+    Assert(src2 && src2->IsImmediateOpnd());
+    Assert(src2->GetSize() > 1);
+
+    uint32 mask = src2->GetSize() - 1;
+    uint32 cmpValue = (uint32)src2->GetImmediateValue(m_func);
+    src2->Free(m_func);
+
+    IR::IntConstOpnd* maskOpnd = IR::IntConstOpnd::New(mask, src1->GetType(), m_func);
+    IR::RegOpnd* maskedOpnd = IR::RegOpnd::New(src1->GetType(), m_func);
+    IR::Instr* newInstr = IR::Instr::New(Js::OpCode::And_I4, maskedOpnd, src1, maskOpnd, m_func);
+    instr->InsertBefore(newInstr);
+
+    IR::IntConstOpnd* cmpOpnd = IR::IntConstOpnd::New(cmpValue, maskedOpnd->GetType(), m_func, true);
+    IR::LabelInstr* alignedLabel = IR::LabelInstr::New(Js::OpCode::Label, m_func);
+    newInstr = IR::BranchInstr::New(Js::OpCode::BrEq_I4, alignedLabel, maskedOpnd, cmpOpnd, m_func);
+    instr->InsertBefore(newInstr);
+    InsertLabel(true, instr);
+    GenerateThrow(IR::IntConstOpnd::NewFromType(SCODE_CODE(WASMERR_UnalignedAtomicAccess), TyInt32, m_func), instr);
+    instr->InsertBefore(alignedLabel);
+
+    instr->m_opcode = Js::OpCode::Ld_I4;
+    return instr;
+}
+
 void
 Lowerer::LowerTrapIfMinIntOverNegOne(IR::Instr * const instr)
 {

+ 4 - 3
lib/Backend/Lower.h

@@ -149,7 +149,6 @@ private:
     void            GenerateDirectFieldStore(IR::Instr* instrStFld, IR::PropertySymOpnd* propertySymOpnd);
     void            GenerateAdjustSlots(IR::Instr * instrStFld, IR::PropertySymOpnd *propertySymOpnd, JITTypeHolder initialType, JITTypeHolder finalType);
     bool            GenerateAdjustBaseSlots(IR::Instr * instrStFld, IR::RegOpnd *baseOpnd, JITTypeHolder initialType, JITTypeHolder finalType);
-    void            GeneratePrototypeCacheInvalidateCheck(IR::PropertySymOpnd *propertySymOpnd, IR::Instr *instrStFld);
     void            PinTypeRef(JITTypeHolder type, void* typeRef, IR::Instr* instr, Js::PropertyId propertyId);
     IR::RegOpnd *   GenerateIsBuiltinRecyclableObject(IR::RegOpnd *regOpnd, IR::Instr *insertInstr, IR::LabelInstr *labelHelper, bool checkObjectAndDynamicObject = true, IR::LabelInstr *labelFastExternal = nullptr, bool isInHelper = false);
     void            GenerateIsDynamicObject(IR::RegOpnd *regOpnd, IR::Instr *insertInstr, IR::LabelInstr *labelHelper, bool fContinueLabel = false);
@@ -198,9 +197,11 @@ private:
     IR::Instr *     LowerMemset(IR::Instr * instr, IR::RegOpnd * helperRet);
     IR::Instr *     LowerMemcopy(IR::Instr * instr, IR::RegOpnd * helperRet);
 
-    IR::Instr *     LowerWasmMemOp(IR::Instr * instr, IR::Opnd *addrOpnd);
+    IR::Instr *     LowerWasmArrayBoundsCheck(IR::Instr * instr, IR::Opnd *addrOpnd);
     IR::Instr *     LowerLdArrViewElem(IR::Instr * instr);
     IR::Instr *     LowerStArrViewElem(IR::Instr * instr);
+    IR::Instr *     LowerStAtomicsWasm(IR::Instr * instr);
+    IR::Instr *     LowerLdAtomicsWasm(IR::Instr * instr);
     IR::Instr *     LowerLdArrViewElemWasm(IR::Instr * instr);
     IR::Instr *     LowerArrayDetachedCheck(IR::Instr * instr);
     IR::Instr *     LowerDeleteElemI(IR::Instr *instr, bool strictMode);
@@ -215,7 +216,6 @@ private:
     void            LowerLdSlot(IR::Instr *instr);
     IR::Instr *     LowerChkUndecl(IR::Instr *instr);
     void            GenUndeclChk(IR::Instr *insertInsert, IR::Opnd *opnd);
-    IR::Instr *     LowerStLen(IR::Instr *instr);
     IR::Instr *     LoadPropertySymAsArgument(IR::Instr *instr, IR::Opnd *fieldSrc);
     IR::Instr *     LoadFunctionBodyAsArgument(IR::Instr *instr, IR::IntConstOpnd * functionBodySlotOpnd, IR::RegOpnd * envOpnd);
     IR::Instr *     LoadHelperTemp(IR::Instr * instr, IR::Instr * instrInsert);
@@ -524,6 +524,7 @@ private:
     void            LowerRemI4(IR::Instr * const instr);
     void            LowerTrapIfZero(IR::Instr * const instr);
     void            LowerTrapIfMinIntOverNegOne(IR::Instr * const instr);
+    IR::Instr*      LowerTrapIfUnalignedAccess(IR::Instr * const instr);
     void            LowerDivI4Common(IR::Instr * const instr);
     void            LowerRemR8(IR::Instr * const instr);
     void            LowerRemR4(IR::Instr * const instr);

+ 50 - 3
lib/Backend/LowerMDShared.cpp

@@ -282,9 +282,19 @@ LowererMD::LowerAsmJsCallE(IR::Instr * callInstr)
 }
 
 IR::Instr *
-LowererMD::LowerWasmMemOp(IR::Instr * instr, IR::Opnd *addrOpnd)
+LowererMD::LowerWasmArrayBoundsCheck(IR::Instr * instr, IR::Opnd *addrOpnd)
 {
-    return this->lowererMDArch.LowerWasmMemOp(instr, addrOpnd);
+    return this->lowererMDArch.LowerWasmArrayBoundsCheck(instr, addrOpnd);
+}
+
+void LowererMD::LowerAtomicStore(IR::Opnd * dst, IR::Opnd * src1, IR::Instr * insertBeforeInstr)
+{
+    return this->lowererMDArch.LowerAtomicStore(dst, src1, insertBeforeInstr);
+}
+
+void LowererMD::LowerAtomicLoad(IR::Opnd * dst, IR::Opnd * src1, IR::Instr * insertBeforeInstr)
+{
+    return this->lowererMDArch.LowerAtomicLoad(dst, src1, insertBeforeInstr);
 }
 
 IR::Instr *
@@ -1557,6 +1567,43 @@ LowererMD::Legalize(IR::Instr *const instr, bool fPostRegAlloc)
                 L_Reg | L_Mem | L_Imm32);
             break;
 
+        case Js::OpCode::LOCKCMPXCHG8B:
+        case Js::OpCode::CMPXCHG8B:
+        {
+            const auto getRegMask = [](IR::Opnd* opnd)
+            {
+                Assert(opnd->IsListOpnd());
+                return opnd->AsListOpnd()->Reduce(
+                [](int i, IR::Opnd* opnd) {
+                    Assert(opnd->IsRegOpnd());
+                    return 1 << opnd->AsRegOpnd()->GetReg(); 
+                },
+                [](int i, uint32 regmask, uint32 allReg)
+                {
+                    AssertMsg((allReg & regmask) == 0, "Should not have the same register twice");
+                    return allReg | regmask;
+                }, 0);
+            };
+#if _M_IX86
+            const uint32 dstMask = (1 << RegEAX | 1 << RegEDX);
+            const uint32 srcMask = (1 << RegEAX | 1 << RegEBX | 1 << RegECX | 1 << RegEDX);
+#else
+            const uint32 dstMask = (1 << RegRAX | 1 << RegRDX);
+            const uint32 srcMask = (1 << RegRAX | 1 << RegRBX | 1 << RegRCX | 1 << RegRDX);
+#endif
+
+            AssertMsg(!instr->m_func->isPostFinalLower || !instr->GetDst(), "After FinalLower, there should not be a dst");
+            AssertMsg(instr->m_func->isPostFinalLower || getRegMask(instr->GetDst()) == dstMask,
+                "Before FinalLower, instr should have eax,edx as dst");
+            AssertMsg(!instr->m_func->isPostFinalLower || !instr->GetSrc2(), "After FinalLower, there should not be a src2");
+            AssertMsg(instr->m_func->isPostFinalLower || getRegMask(instr->GetSrc2()) == srcMask,
+                "Before FinalLower, instr should have eax,edx,ecx,ebx as src2");
+            LegalizeSrc<verify>(
+                instr,
+                instr->GetSrc1(),
+                L_Mem);
+            break;
+        }
         case Js::OpCode::TEST:
             if((instr->GetSrc1()->IsImmediateOpnd() && !instr->GetSrc2()->IsImmediateOpnd()) ||
                 (instr->GetSrc2()->IsMemoryOpnd() && !instr->GetSrc1()->IsMemoryOpnd()))
@@ -1983,7 +2030,7 @@ void LowererMD::LegalizeSrc(IR::Instr *const instr, IR::Opnd *src, const uint fo
     Assert(src == instr->GetSrc1() || src == instr->GetSrc2());
     Assert(forms);
 #ifndef _M_X64
-    AssertMsg(!src->IsInt64(), "Int64 supported only on x64");
+    AssertMsg(!src->IsInt64() || src->IsMemoryOpnd(), "Int64 supported only on x64");
 #endif
     switch(src->GetKind())
     {

+ 3 - 2
lib/Backend/LowerMDShared.h

@@ -71,7 +71,6 @@ public:
 
     UINT FloatPrefThreshold;
 
-public:
             void            Init(Lowerer *lowerer);
             IR::Opnd *      GenerateMemRef(intptr_t addr, IRType type, IR::Instr *instr, bool dontEncode = false);
             void            GenerateMemInit(IR::RegOpnd * opnd, int32 offset, size_t value, IR::Instr * insertBeforeInstr, bool isZeroed = false);
@@ -102,7 +101,9 @@ public:
             IR::Instr *     LowerCondBranch(IR::Instr * instr);
             IR::Instr *     LoadFunctionObjectOpnd(IR::Instr *instr, IR::Opnd *&functionObjOpnd);
             IR::Instr *     LowerNewScObject(IR::Instr *newObjInstr);
-            IR::Instr *     LowerWasmMemOp(IR::Instr *instr, IR::Opnd *addrOpnd);
+            IR::Instr *     LowerWasmArrayBoundsCheck(IR::Instr *instr, IR::Opnd *addrOpnd);
+            void            LowerAtomicStore(IR::Opnd * dst, IR::Opnd * src1, IR::Instr * insertBeforeInstr);
+            void            LowerAtomicLoad(IR::Opnd* dst, IR::Opnd* src1, IR::Instr* insertBeforeInstr);
             void            ForceDstToReg(IR::Instr *instr);
 
 public:

+ 25 - 1
lib/Backend/amd64/EncoderMD.cpp

@@ -685,6 +685,11 @@ EncoderMD::Encode(IR::Instr *instr, BYTE *pc, BYTE* beginCodeAddress)
 
     instrRestart = instrStart = m_pc;
 
+    // Emit the lock byte first if needed
+    if (opdope & DLOCK)
+    {
+        *instrRestart++ = 0xf0;
+    }
 
     // put out 16bit override if any
     if (instrSize == 2 && (opdope & (DNO16 | DFLT)) == 0)
@@ -727,7 +732,7 @@ EncoderMD::Encode(IR::Instr *instr, BYTE *pc, BYTE* beginCodeAddress)
     prexByte = instrRestart;
 
     // This is a heuristic to determine whether we really need to have the Rex bytes
-    // This heuristics is always correct for instrSize == 8
+    // This heuristics is almost always correct for instrSize == 8
     // For instrSize < 8, we might use extended registers and we will have to adjust in EmitRexByte
     bool reservedRexByte = (instrSize == 8);
     if (reservedRexByte)
@@ -1026,6 +1031,25 @@ EncoderMD::Encode(IR::Instr *instr, BYTE *pc, BYTE* beginCodeAddress)
                     rexByte |= this->EmitModRM(instr, src1, this->GetRegEncode(src2->AsRegOpnd()));
                 }
                 break;
+
+            case Js::OpCode::CMPXCHG8B:
+            case Js::OpCode::LOCKCMPXCHG8B:
+            {
+                if (instrSize == 8)
+                {
+                    skipRexByte = true;
+                    Assert(!opr2);
+                    BYTE byte2 = (this->GetOpcodeByte2(instr) >> 3);
+                    this->EmitModRM(instr, opr1, byte2);
+                }
+                else
+                {
+                    Assert(instrSize == 16);
+                    continue;
+                }
+                break;
+            }
+
             case Js::OpCode::SHLD:
                 /*
                  *       0F A4   SHLD r/m32, r32, imm8

+ 46 - 1
lib/Backend/amd64/LowererMDArch.cpp

@@ -1080,7 +1080,7 @@ LowererMDArch::LowerAsmJsCallI(IR::Instr * callInstr)
 }
 
 IR::Instr *
-LowererMDArch::LowerWasmMemOp(IR::Instr * instr, IR::Opnd *addrOpnd)
+LowererMDArch::LowerWasmArrayBoundsCheck(IR::Instr * instr, IR::Opnd *addrOpnd)
 {
     IR::IndirOpnd * indirOpnd = addrOpnd->AsIndirOpnd();
     IR::RegOpnd * indexOpnd = indirOpnd->GetIndexOpnd();
@@ -1126,6 +1126,45 @@ LowererMDArch::LowerWasmMemOp(IR::Instr * instr, IR::Opnd *addrOpnd)
     return doneLabel;
 }
 
+void
+LowererMDArch::LowerAtomicStore(IR::Opnd * dst, IR::Opnd * src1, IR::Instr * insertBeforeInstr)
+{
+    Assert(IRType_IsNativeInt(dst->GetType()));
+    Assert(IRType_IsNativeInt(src1->GetType()));
+    IR::RegOpnd* tmpSrc = IR::RegOpnd::New(dst->GetType(), m_func);
+    Lowerer::InsertMove(tmpSrc, src1, insertBeforeInstr);
+
+    // Put src1 as dst to make sure we know that register is modified
+    IR::Instr* xchgInstr = IR::Instr::New(Js::OpCode::XCHG, tmpSrc, tmpSrc, dst, insertBeforeInstr->m_func);
+    insertBeforeInstr->InsertBefore(xchgInstr);
+}
+
+void
+LowererMDArch::LowerAtomicLoad(IR::Opnd * dst, IR::Opnd * src1, IR::Instr * insertBeforeInstr)
+{
+    Assert(IRType_IsNativeInt(dst->GetType()));
+    Assert(IRType_IsNativeInt(src1->GetType()));
+    IR::Instr* newMove = Lowerer::InsertMove(dst, src1, insertBeforeInstr);
+
+#if ENABLE_FAST_ARRAYBUFFER
+    // We need to have an AV when accessing out of bounds memory even if the dst is not used
+    // Make sure LinearScan doesn't dead store this instruction
+    newMove->hasSideEffects = true;
+#endif
+
+    // Need to add Memory Barrier before the load
+    // MemoryBarrier is implemented with `lock or [rsp], 0` on x64
+    IR::IndirOpnd* stackTop = IR::IndirOpnd::New(
+        IR::RegOpnd::New(nullptr, RegRSP, TyMachReg, m_func),
+        0,
+        TyMachReg,
+        m_func
+    );
+    IR::IntConstOpnd* zero = IR::IntConstOpnd::New(0, TyMachReg, m_func);
+    IR::Instr* memoryBarrier = IR::Instr::New(Js::OpCode::LOCKOR, stackTop, stackTop, zero, m_func);
+    newMove->InsertBefore(memoryBarrier);
+}
+
 IR::Instr*
 LowererMDArch::LowerAsmJsLdElemHelper(IR::Instr * instr, bool isSimdLoad /*= false*/, bool checkEndOffset /*= false*/)
 {
@@ -3334,6 +3373,12 @@ LowererMDArch::FinalLower()
                 instr->FreeSrc2();
             }
             break;
+        case Js::OpCode::LOCKCMPXCHG8B:
+        case Js::OpCode::CMPXCHG8B:
+            // Get rid of the deps and srcs
+            instr->FreeDst();
+            instr->FreeSrc2();
+            break;
         }
     } NEXT_INSTR_BACKWARD_EDITING_IN_RANGE;
 }

+ 3 - 1
lib/Backend/amd64/LowererMDArch.h

@@ -91,7 +91,9 @@ public:
     IR::Instr *         LowerStartCall(IR::Instr * instr);
     IR::Instr *         LowerAsmJsCallI(IR::Instr * callInstr);
     IR::Instr *         LowerAsmJsCallE(IR::Instr * callInstr);
-    IR::Instr *         LowerWasmMemOp(IR::Instr * instr, IR::Opnd *addrOpnd);
+    IR::Instr *         LowerWasmArrayBoundsCheck(IR::Instr * instr, IR::Opnd *addrOpnd);
+    void                LowerAtomicStore(IR::Opnd * dst, IR::Opnd * src1, IR::Instr * insertBeforeInstr);
+    void                LowerAtomicLoad(IR::Opnd* dst, IR::Opnd* src1, IR::Instr* insertBeforeInstr);
     IR::Instr *         LowerAsmJsLdElemHelper(IR::Instr * instr, bool isSimdLoad = false, bool checkEndOffset = false);
     IR::Instr *         LowerAsmJsStElemHelper(IR::Instr * instr, bool isSimdStore = false, bool checkEndOffset = false);
 

+ 6 - 1
lib/Backend/amd64/MdOpCodes.h

@@ -67,6 +67,9 @@ MACRO(CMPLEPD,    Empty,    None,          RNON,   f(MODRM),   o(CMPPD),   D66|D
 MACRO(CMPEQPD,    Empty,    None,          RNON,   f(MODRM),   o(CMPPD),   D66|DSSE,                OLB_0F)
 MACRO(CMPNEQPD,   Empty,    None,          RNON,   f(MODRM),   o(CMPPD),   D66|DSSE,                OLB_0F)
 
+MACRO(CMPXCHG8B,  Reg1,     OpSideEffect,  R001,   f(SPMOD),   o(CMPXCHG8B), DNO16|DSETCC,          OLB_0F)
+MACRO(LOCKCMPXCHG8B, Reg1,  OpSideEffect,  R001,   f(SPMOD),   o(CMPXCHG8B), DNO16|DSETCC|DLOCK,    OLB_0F)
+
 MACRO(COMISD,   Empty,  OpSideEffect,  RNON,   f(MODRM),   o(COMISD),  DNO16|D66|DSETCC,            OLB_0F)
 MACRO(COMISS,   Empty,  OpSideEffect,  RNON,   f(MODRM),   o(COMISS),  DNO16|DSETCC,                OLB_0F)
 MACRO(CVTDQ2PD, Reg2,   None,          RNON,   f(MODRM),   o(CVTDQ2PD),DDST|DNO16|DF3,              OLB_0F)
@@ -124,6 +127,8 @@ MACRO(MAXPS,    Reg2,       None,           RNON,   f(MODRM),   o(MAXPS),   DNO1
 MACRO(MINPD,    Reg2,       None,           RNON,   f(MODRM),   o(MINPD),   DNO16|DOPEQ|D66,        OLB_0F)
 MACRO(MINPS,    Reg2,       None,           RNON,   f(MODRM),   o(MINPS),   DNO16|DOPEQ,            OLB_0F)
 
+MACRO(LOCKOR,   Reg2,   OpSideEffect,  R001,   f(BINOP),   o(OR),      DOPEQ|DSETCC|DCOMMOP|DLOCK,  OLB_NONE)
+
 MACRO(LZCNT,    Reg2,   None,          RNON,   f(MODRM),   o(LZCNT),   DF3|DSETCC|DDST,             OLB_0F)
 
 MACRO(MOV,      Reg2,   None,          R000,   f(MOV),     o(MOV),     DDST|DMOV,                   OLB_NONE)
@@ -263,7 +268,7 @@ MACRO(TZCNT,    Reg2,   None,          RNON,   f(MODRM),   o(TZCNT),   DF3|DSETC
 
 MACRO(UCOMISD,  Empty,  None,          RNON,   f(MODRM),   o(UCOMISD), DNO16|D66|DSETCC,            OLB_0F)
 MACRO(UCOMISS,  Empty,  None,          RNON,   f(MODRM),   o(UCOMISS), DNO16|DSETCC,                OLB_0F)
-MACRO(XCHG,     Reg2,   None,          R000,   f(XCHG),    o(XCHG),    DOPEQ,                       OLB_NONE)
+MACRO(XCHG,     Reg2,   OpSideEffect,  R000,   f(XCHG),    o(XCHG),    DOPEQ,                       OLB_NONE)
 MACRO(XOR,      Reg2,   OpSideEffect,  R110,   f(BINOP),   o(XOR),     DOPEQ|DSETCC|DCOMMOP,        OLB_NONE)
 MACRO(XORPS,    Reg3,   None,          RNON,   f(MODRM),   o(XORPS),   DNO16|DOPEQ|DCOMMOP,         OLB_0F)
 MACRO(PINSRW,   Reg2,   None,          RNON,   f(MODRM),   o(PINSRW),  DDST|DNO16|DSSE|D66,         OLB_0F)

+ 2 - 0
lib/Backend/amd64/X64Encode.h

@@ -43,6 +43,7 @@
 #define D66     0x100000 // 0x66 0x0F style WNI form (usually 128-bit DP FP)
 #define DF2     0x200000 /* 0xF2 0x0F style WNI form (usually 64-bit DP FP) */
 #define DREXSRC  0x400000 /* Use src1's size to generate REX byte */
+#define DLOCK   0x800000 /* Prefix the instruction with the lock byte (0xf0) */
 
 // 2nd 3 bits is options
 #define SBIT 0x20
@@ -168,6 +169,7 @@ enum Forms : BYTE
 
 #define OPBYTE_CMPPD    {0xc2}                  // modrm
 #define OPBYTE_CMPPS    {0xc2}                  // modrm
+#define OPBYTE_CMPXCHG8B {0xC7,0xC7}            // special, modrm
 
 #define OPBYTE_COMISD   {0x2F}                   // modrm
 #define OPBYTE_COMISS   {0x2F}                   // modrm

+ 3 - 2
lib/Backend/arm/LowerMD.h

@@ -161,7 +161,9 @@ public:
             IR::Instr *         LowerStartCall(IR::Instr * instr);
             IR::Instr *         LowerAsmJsCallI(IR::Instr * callInstr) { Assert(UNREACHED); return nullptr; }
             IR::Instr *         LowerAsmJsCallE(IR::Instr * callInstr) { Assert(UNREACHED); return nullptr; }
-            IR::Instr *         LowerWasmMemOp(IR::Instr * instr, IR::Opnd *addrOpnd) { Assert(UNREACHED); return nullptr; }
+            IR::Instr *         LowerWasmArrayBoundsCheck(IR::Instr * instr, IR::Opnd *addrOpnd) { Assert(UNREACHED); return nullptr; }
+            void                LowerAtomicStore(IR::Opnd * dst, IR::Opnd * src1, IR::Instr * insertBeforeInstr) { Assert(UNREACHED); }
+            void                LowerAtomicLoad(IR::Opnd* dst, IR::Opnd* src1, IR::Instr* insertBeforeInstr) { Assert(UNREACHED); }
             IR::Instr *         LowerAsmJsStElemHelper(IR::Instr * callInstr) { Assert(UNREACHED); return nullptr; }
             IR::Instr *         LowerAsmJsLdElemHelper(IR::Instr * callInstr) { Assert(UNREACHED); return nullptr; }
             IR::Instr *         LowerCallIDynamic(IR::Instr *callInstr, IR::Instr*saveThisArgOutInstr, IR::Opnd *argsLength, ushort callFlags, IR::Instr * insertBeforeInstrForCFG = nullptr);
@@ -272,5 +274,4 @@ protected:
     IR::Opnd *          helperCallArgs[MaxArgumentsToHelper];
 
     void                FlipHelperCallArgsOrder();
-
 };

+ 3 - 1
lib/Backend/arm64/LowerMD.h

@@ -160,7 +160,9 @@ public:
             IR::Instr *         LowerStartCall(IR::Instr * instr);
             IR::Instr *         LowerAsmJsCallI(IR::Instr * callInstr) { Assert(UNREACHED); return nullptr; }
             IR::Instr *         LowerAsmJsCallE(IR::Instr * callInstr) { Assert(UNREACHED); return nullptr; }
-            IR::Instr *         LowerWasmMemOp(IR::Instr * instr, IR::Opnd *addrOpnd) { Assert(UNREACHED); return nullptr; }
+            IR::Instr *         LowerWasmArrayBoundsCheck(IR::Instr * instr, IR::Opnd *addrOpnd) { Assert(UNREACHED); return nullptr; }
+            void                LowerAtomicStore(IR::Opnd * dst, IR::Opnd * src1, IR::Instr * insertBeforeInstr) { Assert(UNREACHED); }
+            void                LowerAtomicLoad(IR::Opnd* dst, IR::Opnd* src1, IR::Instr* insertBeforeInstr) { Assert(UNREACHED); }
             IR::Instr *         LowerAsmJsStElemHelper(IR::Instr * callInstr) { Assert(UNREACHED); return nullptr; }
             IR::Instr *         LowerAsmJsLdElemHelper(IR::Instr * callInstr) { Assert(UNREACHED); return nullptr; }
             IR::Instr *         LowerCallIDynamic(IR::Instr *callInstr, IR::Instr*saveThisArgOutInstr, IR::Opnd *argsLength, ushort callFlags, IR::Instr * insertBeforeInstrForCFG = nullptr);

+ 17 - 10
lib/Backend/i386/EncoderMD.cpp

@@ -630,8 +630,9 @@ EncoderMD::Encode(IR::Instr *instr, BYTE *pc, BYTE* beginCodeAddress)
     IR::Opnd  *opr1;
     IR::Opnd  *opr2;
 
+    uint32 opdope = this->GetOpdope(instr);
     // Canonicalize operands.
-    if (this->GetOpdope(instr) & DDST)
+    if (opdope & DDST)
     {
         opr1 = dst;
         opr2 = src1;
@@ -649,31 +650,37 @@ EncoderMD::Encode(IR::Instr *instr, BYTE *pc, BYTE* beginCodeAddress)
     const uint32 leadIn = EncoderMD::GetLeadIn(instr);
     instrRestart = instrStart = m_pc;
 
-    if (instrSize == 2 && (this->GetOpdope(instr) & (DNO16|DFLT)) == 0)
+    // Emit the lock byte first if needed
+    if (opdope & DLOCK)
+    {
+        *instrRestart++ = 0xf0;
+    }
+
+    if (instrSize == 2 && (opdope & (DNO16|DFLT)) == 0)
     {
         *instrRestart++ = 0x66;
     }
-    if (this->GetOpdope(instr) & D66EX)
+    if (opdope & D66EX)
     {
         if (opr1->IsFloat64() || opr2->IsFloat64())
         {
             *instrRestart++ = 0x66;
         }
     }
-    if (this->GetOpdope(instr) & (DZEROF|DF2|DF3|D66))
+    if (opdope & (DZEROF|DF2|DF3|D66))
     {
-        if (this->GetOpdope(instr) & DZEROF)
+        if (opdope & DZEROF)
         {
         }
-        else if (this->GetOpdope(instr) & DF2)
+        else if (opdope & DF2)
         {
             *instrRestart++ = 0xf2;
         }
-        else if (this->GetOpdope(instr) & DF3)
+        else if (opdope & DF3)
         {
             *instrRestart++ = 0xf3;
         }
-        else if (this->GetOpdope(instr) & D66)
+        else if (opdope & D66)
         {
             *instrRestart++ = 0x66;
         }
@@ -1233,13 +1240,13 @@ modrm:
         }
 
         // if instr has W bit, set it appropriately
-        if ((*form & WBIT) && !(this->GetOpdope(instr) & DFLT) && instrSize != 1)
+        if ((*form & WBIT) && !(opdope & DFLT) && instrSize != 1)
         {
             *opcodeByte |= 0x1; // set WBIT
         }
 
         AssertMsg(m_pc - instrStart <= MachMaxInstrSize, "MachMaxInstrSize not set correctly");
-        if (this->GetOpdope(instr) & DSSE)
+        if (opdope & DSSE)
         {
             // extra imm8 byte for SSE instructions.
             uint valueImm = 0;

+ 135 - 19
lib/Backend/i386/LowererMDArch.cpp

@@ -888,7 +888,7 @@ LowererMDArch::LowerAsmJsCallI(IR::Instr * callInstr)
 }
 
 IR::Instr *
-LowererMDArch::LowerWasmMemOp(IR::Instr * instr, IR::Opnd *addrOpnd)
+LowererMDArch::LowerWasmArrayBoundsCheck(IR::Instr * instr, IR::Opnd *addrOpnd)
 {
     IR::IndirOpnd * indirOpnd = addrOpnd->AsIndirOpnd();
     IR::RegOpnd * indexOpnd = indirOpnd->GetIndexOpnd();
@@ -922,6 +922,132 @@ LowererMDArch::LowerWasmMemOp(IR::Instr * instr, IR::Opnd *addrOpnd)
     return doneLabel;
 }
 
+void
+LowererMDArch::LowerAtomicStore(IR::Opnd * dst, IR::Opnd * src1, IR::Instr * insertBeforeInstr)
+{
+    Assert(IRType_IsNativeInt(dst->GetType()));
+    Assert(IRType_IsNativeInt(src1->GetType()));
+    Func* func = insertBeforeInstr->m_func;
+
+    // Move src1 to a register of the same type as dst
+    IR::RegOpnd* tmpSrc = IR::RegOpnd::New(dst->GetType(), func);
+    Lowerer::InsertMove(tmpSrc, src1, insertBeforeInstr);
+    if (dst->IsInt64())
+    {
+        // todo:: Can do better implementation then InterlockedExchange64 with the following
+        /*
+        mov ebx, tmpSrc.low;
+        mov ecx, tmpSrc.high;
+        ;; Load old value first
+        mov eax, [buffer];
+        mov edx, [buffer+4];
+    tryAgain:
+        ;; CMPXCHG8B doc:
+            ;; Compare EDX:EAX with m64. If equal, set ZF
+            ;; and load ECX:EBX into m64. Else, clear ZF and
+            ;; load m64 into EDX:EAX.
+        lock CMPXCHG8B [buffer]
+        jnz tryAgain
+
+        ;; ZF was set, this means the old value hasn't changed between the load and the CMPXCHG8B
+        ;; so we correctly stored our value atomically
+        
+        // This is a failed attempt to implement this
+        // Review: Should I leave this as a comment or remove ?
+        IR::RegOpnd* ecx = IR::RegOpnd::New(RegECX, TyMachReg, func);
+        IR::RegOpnd* ebx = IR::RegOpnd::New(RegEBX, TyMachReg, func);
+        IR::RegOpnd* eax = IR::RegOpnd::New(RegEAX, TyMachReg, func);
+        IR::RegOpnd* edx = IR::RegOpnd::New(RegEDX, TyMachReg, func);
+        auto dstPair = func->FindOrCreateInt64Pair(dst);
+        auto srcPair = func->FindOrCreateInt64Pair(tmpSrc);
+        Lowerer::InsertMove(ebx, srcPair.low, insertBeforeInstr);
+        Lowerer::InsertMove(ecx, srcPair.high, insertBeforeInstr);
+        Lowerer::InsertMove(eax, dstPair.low, insertBeforeInstr);
+        Lowerer::InsertMove(edx, dstPair.high, insertBeforeInstr);
+
+        IR::LabelInstr* startLoop = IR::LabelInstr::New(Js::OpCode::Label, func);
+        startLoop->m_isLoopTop = true;
+        Loop *loop = JitAnew(this->m_func->m_alloc, Loop, this->m_func->m_alloc, this->m_func);
+        startLoop->SetLoop(loop);
+        loop->SetLoopTopInstr(startLoop);
+        loop->regAlloc.liveOnBackEdgeSyms = JitAnew(func->m_alloc, BVSparse<JitArenaAllocator>, func->m_alloc);
+        loop->regAlloc.liveOnBackEdgeSyms->Set(ebx->m_sym->m_id);
+        loop->regAlloc.liveOnBackEdgeSyms->Set(ecx->m_sym->m_id);
+        loop->regAlloc.liveOnBackEdgeSyms->Set(eax->m_sym->m_id);
+        loop->regAlloc.liveOnBackEdgeSyms->Set(edx->m_sym->m_id);
+        insertBeforeInstr->InsertBefore(startLoop);
+
+        insertBeforeInstr->InsertBefore(IR::Instr::New(Js::OpCode::CMPXCHG8B, nullptr, dstPair.low, func));
+        insertBeforeInstr->InsertBefore(IR::BranchInstr::New(Js::OpCode::JNE, startLoop, func));
+        */
+
+        //////
+        IR::RegOpnd* bufferAddress = IR::RegOpnd::New(TyMachReg, func);
+        IR::Instr* lea = IR::Instr::New(Js::OpCode::LEA, bufferAddress, dst, func);
+        insertBeforeInstr->InsertBefore(lea);
+
+        LoadInt64HelperArgument(insertBeforeInstr, tmpSrc);
+        LoadHelperArgument(insertBeforeInstr, bufferAddress);
+
+        IR::Instr* callInstr = IR::Instr::New(Js::OpCode::Call, func);
+        insertBeforeInstr->InsertBefore(callInstr);
+        lowererMD->ChangeToHelperCall(callInstr, IR::HelperAtomicStore64);
+    }
+    else
+    {
+        // Put tmpSrc as dst to make sure we know that register is modified
+        IR::Instr* xchgInstr = IR::Instr::New(Js::OpCode::XCHG, tmpSrc, tmpSrc, dst, insertBeforeInstr->m_func);
+        insertBeforeInstr->InsertBefore(xchgInstr);
+    }
+}
+
+void
+LowererMDArch::LowerAtomicLoad(IR::Opnd * dst, IR::Opnd * src1, IR::Instr * insertBeforeInstr)
+{
+    Assert(IRType_IsNativeInt(dst->GetType()));
+    Assert(IRType_IsNativeInt(src1->GetType()));
+    Func* func = insertBeforeInstr->m_func;
+
+    if (src1->IsInt64())
+    {
+        /*
+        ;; Zero out all the relevant registers
+        xor ebx, ebx;
+        xor ecx, ecx;
+        xor eax, eax;
+        xor edx, edx;
+        lock CMPXCHG8B [buffer]
+        ;; The value in the buffer is in EDX:EAX
+        */
+
+        IR::RegOpnd* ecx = IR::RegOpnd::New(RegECX, TyMachReg, func);
+        IR::RegOpnd* ebx = IR::RegOpnd::New(RegEBX, TyMachReg, func);
+        IR::RegOpnd* eax = IR::RegOpnd::New(RegEAX, TyMachReg, func);
+        IR::RegOpnd* edx = IR::RegOpnd::New(RegEDX, TyMachReg, func);
+
+        IR::IntConstOpnd* zero = IR::IntConstOpnd::New(0, TyMachReg, func);
+        Lowerer::InsertMove(ebx, zero, insertBeforeInstr);
+        Lowerer::InsertMove(ecx, zero, insertBeforeInstr);
+        Lowerer::InsertMove(eax, zero, insertBeforeInstr);
+        Lowerer::InsertMove(edx, zero, insertBeforeInstr);
+
+        IR::ListOpnd* deps = IR::ListOpnd::New(func, eax, ebx, ecx, edx);
+        IR::ListOpnd* dsts = IR::ListOpnd::New(func, eax, edx);
+        IR::Instr* cmpxchg = IR::Instr::New(Js::OpCode::LOCKCMPXCHG8B, dsts, src1, deps, func);
+        insertBeforeInstr->InsertBefore(cmpxchg);
+        Int64RegPair dstPair = func->FindOrCreateInt64Pair(dst);
+        Lowerer::InsertMove(dstPair.low, eax, insertBeforeInstr);
+        Lowerer::InsertMove(dstPair.high, edx, insertBeforeInstr);
+    }
+    else
+    {
+        IR::Instr* callInstr = IR::Instr::New(Js::OpCode::Call, func);
+        insertBeforeInstr->InsertBefore(callInstr);
+        lowererMD->ChangeToHelperCall(callInstr, IR::HelperMemoryBarrier);
+        Lowerer::InsertMove(dst, src1, insertBeforeInstr);
+    }
+}
+
 IR::Instr*
 LowererMDArch::LowerAsmJsLdElemHelper(IR::Instr * instr, bool isSimdLoad /*= false*/, bool checkEndOffset /*= false*/)
 {
@@ -1391,25 +1517,9 @@ LowererMDArch::LoadDynamicArgument(IR::Instr * instr, uint argNumber /*ignore fo
 IR::Instr *
 LowererMDArch::LoadInt64HelperArgument(IR::Instr * instrInsert, IR::Opnd * opndArg)
 {
-    IR::RegOpnd * espOpnd = IR::RegOpnd::New(nullptr, this->GetRegStackPointer(), TyMachReg, this->m_func);
-
-    IR::Opnd * opnd = IR::IndirOpnd::New(espOpnd, -8, TyMachReg, this->m_func);
-    IR::Instr * instrPrev = IR::Instr::New(Js::OpCode::LEA, espOpnd, opnd, this->m_func);
-    instrInsert->InsertBefore(instrPrev);
-
     Int64RegPair argPair = m_func->FindOrCreateInt64Pair(opndArg);
-
-    opnd = IR::IndirOpnd::New(espOpnd, 0, TyInt32, this->m_func);
-    IR::Instr * instr = IR::Instr::New(Js::OpCode::MOV, opnd, argPair.low, this->m_func);
-    instrInsert->InsertBefore(instr);
-    LowererMD::Legalize(instr);
-
-    opnd = IR::IndirOpnd::New(espOpnd, 4, TyInt32, this->m_func);
-    instr = IR::Instr::New(Js::OpCode::MOV, opnd, argPair.high, this->m_func);
-    instrInsert->InsertBefore(instr);
-    LowererMD::Legalize(instr);
-
-    return instrPrev;
+    LoadHelperArgument(instrInsert, argPair.high);
+    return LoadHelperArgument(instrInsert, argPair.low);
 }
 
 IR::Instr *
@@ -3973,6 +4083,12 @@ LowererMDArch::FinalLower()
                 instr->FreeSrc2();
             }
             break;
+        case Js::OpCode::LOCKCMPXCHG8B:
+        case Js::OpCode::CMPXCHG8B:
+            // Get rid of the deps and srcs
+            instr->FreeDst();
+            instr->FreeSrc2();
+            break;
         }
     }
     NEXT_INSTR_BACKWARD_EDITING_IN_RANGE;

+ 3 - 1
lib/Backend/i386/LowererMDArch.h

@@ -48,7 +48,9 @@ public:
             IR::Instr *         LowerInt64CallDst(IR::Instr * callInstr);
             IR::Instr *         LowerAsmJsCallI(IR::Instr * callInstr);
             IR::Instr *         LowerAsmJsCallE(IR::Instr * callInstr);
-            IR::Instr *         LowerWasmMemOp(IR::Instr * instr, IR::Opnd *addrOpnd);
+            IR::Instr *         LowerWasmArrayBoundsCheck(IR::Instr * instr, IR::Opnd *addrOpnd);
+            void                LowerAtomicStore(IR::Opnd * dst, IR::Opnd * src1, IR::Instr * insertBeforeInstr);
+            void                LowerAtomicLoad(IR::Opnd* dst, IR::Opnd* src1, IR::Instr* insertBeforeInstr);
             IR::Instr *         LowerAsmJsLdElemHelper(IR::Instr * instr, bool isSimdLoad = false, bool checkEndOffset = false);
             IR::Instr *         LowerAsmJsStElemHelper(IR::Instr * instr, bool isSimdStore = false, bool checkEndOffset = false);
 

+ 2 - 0
lib/Backend/i386/MdOpCodes.h

@@ -63,6 +63,8 @@ MACRO(CMPLTPD,  Empty,      None,           RNON,   f(MODRM),   o(CMPPD),   D66|
 MACRO(CMPLEPD,  Empty,      None,           RNON,   f(MODRM),   o(CMPPD),   D66|DSSE,                   OLB_NONE)
 MACRO(CMPEQPD,  Empty,      None,           RNON,   f(MODRM),   o(CMPPD),   D66|DSSE,                   OLB_NONE)
 MACRO(CMPNEQPD, Empty,      None,           RNON,   f(MODRM),   o(CMPPD),   D66|DSSE,                   OLB_NONE)
+MACRO(CMPXCHG8B, Reg1,      OpSideEffect,   R001,   f(MODRM),   o(CMPXCHG8B), DNO16|DZEROF|DSETCC,      OLB_NONE)
+MACRO(LOCKCMPXCHG8B, Reg1,  OpSideEffect,   R001,   f(MODRM),   o(CMPXCHG8B), DNO16|DZEROF|DSETCC|DLOCK,OLB_NONE)
 
 MACRO(COMISD,   Empty,      OpSideEffect,   RNON,   f(MODRM),   o(COMISD),  DNO16|D66|DSETCC,           OLB_NONE)
 MACRO(COMISS,   Empty,      OpSideEffect,   RNON,   f(MODRM),   o(COMISS),  DNO16|DZEROF|DSETCC,        OLB_NONE)

+ 2 - 0
lib/Backend/i386/X86Encode.h

@@ -44,6 +44,7 @@
 #define DF3     0x400000 // 0xF3 0x0F style KNI opcodes
 #define NDPinc  0x800000    /* instruction incs stack level by 1 */
 #define NDPdec  0x1000000    /* instruction decs stack level by 1 */
+#define DLOCK   0x2000000 /* Prefix the instruction with the lock byte (0xf0) */
 
 // 2nd 3 bits is options
 #define SBIT 0x20
@@ -164,6 +165,7 @@ enum Forms : BYTE
 
 #define OPBYTE_CMPPD    {0xc2}                  // modrm
 #define OPBYTE_CMPPS    {0xc2}                  // modrm
+#define OPBYTE_CMPXCHG8B {0xC7}                 // modrm
 
 #define OPBYTE_COMISD   {0x2F}                  // modrm
 #define OPBYTE_COMISS   {0x2F}                  // modrm

+ 2 - 0
lib/Common/ConfigFlagsList.h

@@ -403,6 +403,7 @@ PHASE(All)
 #define DEFAULT_CONFIG_WasmMaxTableSize     (10000000)
 #define DEFAULT_CONFIG_WasmSimd             (false)
 #define DEFAULT_CONFIG_WasmSignExtends      (false)
+#define DEFAULT_CONFIG_WasmThreads          (false)
 #define DEFAULT_CONFIG_WasmMultiValue       (false)
 #define DEFAULT_CONFIG_BgJitDelayFgBuffer   (0)
 #define DEFAULT_CONFIG_BgJitPendingFuncCap  (31)
@@ -898,6 +899,7 @@ FLAGNR(Boolean, WasmFold              , "Enable i32/i64 const folding", DEFAULT_
 FLAGNR(Boolean, WasmIgnoreResponse    , "Ignore the type of the Response object", DEFAULT_CONFIG_WasmIgnoreResponse)
 FLAGNR(Number,  WasmMaxTableSize      , "Maximum size allowed to the WebAssembly.Table", DEFAULT_CONFIG_WasmMaxTableSize)
 FLAGNR(Boolean, WasmSignExtends       , "Use new WebAssembly sign extension operators", DEFAULT_CONFIG_WasmSignExtends)
+FLAGNR(Boolean, WasmThreads           , "Enable WebAssembly threads feature", DEFAULT_CONFIG_WasmThreads)
 FLAGNR(Boolean, WasmMultiValue        , "Use new WebAssembly multi-value", DEFAULT_CONFIG_WasmMultiValue)
 #ifdef ENABLE_WASM_SIMD
 FLAGNR(Boolean, WasmSimd              , "Enable SIMD in WebAssembly", DEFAULT_CONFIG_WasmSimd)

+ 1 - 0
lib/Parser/rterrors.h

@@ -404,6 +404,7 @@ RT_ERROR_MSG(WASMERR_InvalidImportModule, 7026, "Import module '%s' is invalid",
 RT_ERROR_MSG(WASMERR_InvalidImport, 7027, "Import '%s.%s' is invalid. Expected type %s", "Import is invalid", kjstTypeError, 0)
 RT_ERROR_MSG(WASMERR_InvalidInitialSize, 7028, "Imported %s initial size (%u) is smaller than declared (%u)", "Invalid initial size", kjstWebAssemblyLinkError, 0)
 RT_ERROR_MSG(WASMERR_InvalidMaximumSize, 7029, "Imported %s maximum size (%u) is larger than declared (%u)", "Invalid initial size", kjstWebAssemblyLinkError, 0)
+RT_ERROR_MSG(WASMERR_UnalignedAtomicAccess, 7030, "", "Atomic memory access is unaligned", kjstWebAssemblyRuntimeError, 0)
 
 // Wabt Errors
 RT_ERROR_MSG(WABTERR_WabtError, 7200, "%s", "Wabt Error.", kjstTypeError, 0)

+ 2 - 0
lib/Runtime/ByteCode/AsmJsByteCodeDumper.cpp

@@ -580,8 +580,10 @@ namespace Js
         switch (op)
         {
         case OpCodeAsmJs::LdArrWasm:
+        case OpCodeAsmJs::LdArrAtomic:
             Output::Print(_u(" %c%d = %s[I%d + %d]"), tag.valueTag, data->Value, tag.heapTag, data->SlotIndex, data->Offset); break;
         case OpCodeAsmJs::StArrWasm:
+        case OpCodeAsmJs::StArrAtomic:
             Output::Print(_u(" %s[I%d + %d] = %c%d"), tag.heapTag, data->SlotIndex, data->Offset, tag.valueTag, data->Value); break;
         default:
             Assume(UNREACHED);

+ 3 - 0
lib/Runtime/ByteCode/OpCodes.h

@@ -428,6 +428,8 @@ MACRO_WMS_PROFILED(     StElemI_A_Strict,       ElementI,       OpSideEffect|OpH
 MACRO_BACKEND_ONLY(     StArrViewElem,          ElementI,       OpSideEffect        )       // Store into typed array view
 MACRO_BACKEND_ONLY(     LdArrViewElem,          ElementI,       OpCanCSE            )       // Load from typed array view
 MACRO_BACKEND_ONLY(     LdArrViewElemWasm,      ElementI,       OpSideEffect        )       // Load from wasm array
+MACRO_BACKEND_ONLY(     StAtomicWasm,           ElementI,       OpSideEffect        )       // Atomic store into typed array view
+MACRO_BACKEND_ONLY(     LdAtomicWasm,           ElementI,       OpSideEffect        )       // Atomic load from typed array view
 MACRO_BACKEND_ONLY(     Memset,                 ElementI,       OpSideEffect)
 MACRO_BACKEND_ONLY(     Memcopy,                ElementI,       OpSideEffect)
 MACRO_BACKEND_ONLY(     ArrayDetachedCheck,     Reg1,           None)   // ensures that an ArrayBuffer has not been detached
@@ -749,6 +751,7 @@ MACRO_BACKEND_ONLY(     Nearest_A,          Empty,          OpTempNumberSources|
 MACRO_BACKEND_ONLY(     ThrowRuntimeError,  Empty,          OpSideEffect)
 MACRO_BACKEND_ONLY(     TrapIfMinIntOverNegOne, Reg3,       OpSideEffect)
 MACRO_BACKEND_ONLY(     TrapIfZero,         Reg3,           OpSideEffect)
+MACRO_BACKEND_ONLY(     TrapIfUnalignedAccess, Reg3,        OpSideEffect)
 
 // All SIMD ops are backend only for non-asmjs.
 #define MACRO_SIMD(opcode, asmjsLayout, opCodeAttrAsmJs, OpCodeAttr, ...) MACRO_BACKEND_ONLY(opcode, Empty, OpCodeAttr)

+ 2 - 0
lib/Runtime/ByteCode/OpCodesAsmJs.h

@@ -144,6 +144,8 @@ MACRO_WMS       ( CheckSignature             , Reg1IntConst1   , None
 // Array Buffer manipulations
 MACRO_WMS       ( LdArrWasm                  , WasmMemAccess   , None            )
 MACRO_WMS       ( StArrWasm                  , WasmMemAccess   , None            )
+MACRO_EXTEND_WMS( LdArrAtomic                , WasmMemAccess   , None            )
+MACRO_EXTEND_WMS( StArrAtomic                , WasmMemAccess   , None            )
 MACRO_WMS       ( LdArr                      , AsmTypedArr     , None            )
 MACRO_WMS       ( LdArrConst                 , AsmTypedArr     , None            )
 MACRO_WMS       ( StArr                      , AsmTypedArr     , None            )

+ 38 - 0
lib/Runtime/Language/AsmJsArrayBufferViews.h

@@ -0,0 +1,38 @@
+//-------------------------------------------------------------------------------------------------------
+// Copyright (C) Microsoft Corporation and contributors. All rights reserved.
+// Licensed under the MIT license. See LICENSE.txt file in the project root for full license information.
+// Portions of this file are copyright 2014 Mozilla Foundation, available under the Apache 2.0 license.
+//-------------------------------------------------------------------------------------------------------
+
+#ifndef ARRAYBUFFER_VIEW
+#define ARRAYBUFFER_VIEW(name, align, RegType, MemType, irSuffix)
+#endif
+
+#ifndef ARRAYBUFFER_VIEW_INT
+#define ARRAYBUFFER_VIEW_INT(name, align, RegType, MemType, irSuffix) ARRAYBUFFER_VIEW(name, align, RegType, MemType, irSuffix)
+#endif
+
+#ifndef ARRAYBUFFER_VIEW_FLT
+#define ARRAYBUFFER_VIEW_FLT(name, align, RegType, MemType, irSuffix) ARRAYBUFFER_VIEW(name, align, RegType, MemType, irSuffix)
+#endif
+
+//                  (Name            , Align , RegType, MemType , irSuffix )
+ARRAYBUFFER_VIEW_INT(INT8            , 0     , int32  , int8    , Int8     )
+ARRAYBUFFER_VIEW_INT(UINT8           , 0     , int32  , uint8   , Uint8    )
+ARRAYBUFFER_VIEW_INT(INT16           , 1     , int32  , int16   , Int16    )
+ARRAYBUFFER_VIEW_INT(UINT16          , 1     , int32  , uint16  , Uint16   )
+ARRAYBUFFER_VIEW_INT(INT32           , 2     , int32  , int32   , Int32    )
+ARRAYBUFFER_VIEW_INT(UINT32          , 2     , int32  , uint32  , Uint32   )
+ARRAYBUFFER_VIEW_FLT(FLOAT32         , 2     , float  , float   , Float32  )
+ARRAYBUFFER_VIEW_FLT(FLOAT64         , 3     , double , double  , Float64  )
+ARRAYBUFFER_VIEW_INT(INT64           , 3     , int64  , int64   , Int64    )
+ARRAYBUFFER_VIEW_INT(INT8_TO_INT64   , 0     , int64  , int8    , Int8     )
+ARRAYBUFFER_VIEW_INT(UINT8_TO_INT64  , 0     , int64  , uint8   , Uint8    )
+ARRAYBUFFER_VIEW_INT(INT16_TO_INT64  , 1     , int64  , int16   , Int16    )
+ARRAYBUFFER_VIEW_INT(UINT16_TO_INT64 , 1     , int64  , uint16  , Uint16   )
+ARRAYBUFFER_VIEW_INT(INT32_TO_INT64  , 2     , int64  , int32   , Int32    )
+ARRAYBUFFER_VIEW_INT(UINT32_TO_INT64 , 2     , int64  , uint32  , Uint32   )
+
+#undef ARRAYBUFFER_VIEW
+#undef ARRAYBUFFER_VIEW_INT
+#undef ARRAYBUFFER_VIEW_FLT

+ 5 - 19
lib/Runtime/Language/AsmJsByteCodeGenerator.cpp

@@ -1572,26 +1572,12 @@ namespace Js
                 }
                 switch (viewType)
                 {
-                case Js::ArrayBufferView::TYPE_INT8:
-                case Js::ArrayBufferView::TYPE_UINT8:
-                    val = 0;
-                    mask = (uint32)~0;
-                    break;
-                case Js::ArrayBufferView::TYPE_INT16:
-                case Js::ArrayBufferView::TYPE_UINT16:
-                    val = 1;
-                    mask = (uint32)~1;
-                    break;
-                case Js::ArrayBufferView::TYPE_INT32:
-                case Js::ArrayBufferView::TYPE_UINT32:
-                case Js::ArrayBufferView::TYPE_FLOAT32:
-                    val = 2;
-                    mask = (uint32)~3;
-                    break;
-                case Js::ArrayBufferView::TYPE_FLOAT64:
-                    val = 3;
-                    mask = (uint32)~7;
+#define ARRAYBUFFER_VIEW(name, align, RegType, MemType, irSuffix) \
+                case Js::ArrayBufferView::TYPE_##name:\
+                    val = align;\
+                    mask = ARRAYBUFFER_VIEW_MASK(align);\
                     break;
+                #include "AsmJsArrayBufferViews.h"
                 default:
                     Assume(UNREACHED);
                 }

+ 12 - 31
lib/Runtime/Language/AsmJsTypes.h

@@ -59,41 +59,22 @@ namespace Js
     {
         enum ViewType: uint8
         {
-            TYPE_INT8 = 0,
-            TYPE_UINT8,
-            TYPE_INT16,
-            TYPE_UINT16,
-            TYPE_INT32,
-            TYPE_UINT32,
-            TYPE_FLOAT32,
-            TYPE_FLOAT64,
-            TYPE_INT64,
-            TYPE_INT8_TO_INT64,
-            TYPE_UINT8_TO_INT64,
-            TYPE_INT16_TO_INT64,
-            TYPE_UINT16_TO_INT64,
-            TYPE_INT32_TO_INT64,
-            TYPE_UINT32_TO_INT64,
+#define ARRAYBUFFER_VIEW(name, ...) TYPE_##name,
+#include "AsmJsArrayBufferViews.h"
             TYPE_COUNT
         };
 
+        const uint32 NaturalAlignment[ArrayBufferView::TYPE_COUNT] =
+        {
+#define ARRAYBUFFER_VIEW(name, align, ...) align,
+#include "AsmJsArrayBufferViews.h"
+        };
+
+#define ARRAYBUFFER_VIEW_MASK(align) ((uint32)~((1 << align) - 1))
         const uint32 ViewMask[] =
         {
-            (uint32)~0 //TYPE_INT8
-            , (uint32)~0 //TYPE_UINT8
-            , (uint32)~1 //TYPE_INT16
-            , (uint32)~1 //TYPE_UINT16
-            , (uint32)~3 //TYPE_INT32
-            , (uint32)~3 //TYPE_UINT32
-            , (uint32)~3 //TYPE_FLOAT32
-            , (uint32)~7 //TYPE_FLOAT64
-            , (uint32)~7 //TYPE_INT64
-            , (uint32)~0 //TYPE_INT8_TO_INT64
-            , (uint32)~0 //TYPE_UINT8_TO_UINT64
-            , (uint32)~1 //TYPE_INT16_TO_INT64
-            , (uint32)~1 //TYPE_UINT16_TO_UINT64
-            , (uint32)~3 //TYPE_INT32_TO_INT64
-            , (uint32)~3 //TYPE_UINT32_TO_UINT64
+#define ARRAYBUFFER_VIEW(name, align, ...) ARRAYBUFFER_VIEW_MASK(align),
+#include "AsmJsArrayBufferViews.h"
         };
 
     } /* namespace ArrayBufferView */
@@ -805,7 +786,7 @@ namespace Js
         inline void SetDeferred() { mDeferred = true; }
         inline bool IsDeferred()const { return mDeferred; }
         template<typename T> inline AsmJsRegisterSpace<T>& GetRegisterSpace() {
-            return *(AsmJsRegisterSpace<T>*)mTypedRegisterAllocator.GetRegisterSpace(WAsmJs::RegisterSpace::GetRegisterSpaceType<T>());
+            return *(AsmJsRegisterSpace<T>*)mTypedRegisterAllocator.GetRegisterSpace(WAsmJs::FromPrimitiveType<T>());
         }
         const WAsmJs::TypedRegisterAllocator& GetTypedRegisterAllocator() const { return mTypedRegisterAllocator; }
 

+ 1 - 0
lib/Runtime/Language/Chakra.Runtime.Language.vcxproj

@@ -188,6 +188,7 @@
       <ExcludedFromBuild Condition="'$(Platform)'!='ARM'">true</ExcludedFromBuild>
     </ClInclude>
     <ClInclude Include="AsmJs.h" />
+    <ClInclude Include="AsmJsArrayBufferViews.h" />
     <ClInclude Include="AsmJsByteCodeGenerator.h" />
     <ClInclude Include="AsmJsCodeGenerator.h" />
     <ClInclude Include="AsmJsEncoder.h">

+ 3 - 2
lib/Runtime/Language/Chakra.Runtime.Language.vcxproj.filters

@@ -1,4 +1,4 @@
-<?xml version="1.0" encoding="utf-8"?>
+<?xml version="1.0" encoding="utf-8"?>
 <Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
   <ItemGroup>
     <ClCompile Include="$(MsBuildThisFileDirectory)AsmJS.cpp" />
@@ -149,6 +149,7 @@
     <ClInclude Include="ModuleNamespaceEnumerator.h" />
     <ClInclude Include="WebAssemblySource.h" />
     <ClInclude Include="ConstructorCache.h" />
+    <ClInclude Include="AsmJsArrayBufferViews.h" />
   </ItemGroup>
   <ItemGroup>
     <MASM Include="$(MSBuildThisFileDirectory)amd64\amd64_Thunks.asm">
@@ -215,4 +216,4 @@
       <Filter>arm64</Filter>
     </ARMASM>
   </ItemGroup>
-</Project>
+</Project>

+ 2 - 0
lib/Runtime/Language/InterpreterHandlerAsmJs.inl

@@ -52,6 +52,8 @@ EXDEF2    (NOPASMJS          , InvalidOpCode, Empty
   DEF3_WMS( CUSTOM_ASMJS     , LdArr        , OP_LdArrGeneric              , AsmTypedArr         )
   DEF3_WMS( CUSTOM_ASMJS     , LdArrWasm    , OP_LdArrWasm                 , WasmMemAccess       )
   DEF3_WMS( CUSTOM_ASMJS     , StArrWasm    , OP_StArrWasm                 , WasmMemAccess       )
+EXDEF3_WMS( CUSTOM_ASMJS     , LdArrAtomic  , OP_LdArrAtomic               , WasmMemAccess       )
+EXDEF3_WMS( CUSTOM_ASMJS     , StArrAtomic  , OP_StArrAtomic               , WasmMemAccess       )
   DEF3_WMS( CUSTOM_ASMJS     , LdArrConst   , OP_LdArrConstIndex           , AsmTypedArr         )
   DEF3_WMS( CUSTOM_ASMJS     , StArr        , OP_StArrGeneric              , AsmTypedArr         )
   DEF3_WMS( CUSTOM_ASMJS     , StArrConst   , OP_StArrConstIndex           , AsmTypedArr         )

+ 133 - 128
lib/Runtime/Language/InterpreterStackFrame.cpp

@@ -14,6 +14,7 @@
 #include "Language/InterpreterStackFrame.h"
 #include "Library/JavascriptGeneratorFunction.h"
 #include "Library/ForInObjectEnumerator.h"
+#include "Library/AtomicsOperations.h"
 #include "../../WasmReader/WasmParseTree.h"
 ///----------------------------------------------------------------------------
 ///
@@ -929,83 +930,6 @@
 
 namespace Js
 {
-#ifdef ASMJS_PLAT
-
-    typedef void(InterpreterStackFrame::*ArrFunc)(uint32, RegSlot);
-    CompileAssert(Js::ArrayBufferView::TYPE_INT8 == 0);
-    CompileAssert(Js::ArrayBufferView::TYPE_UINT8 == 1);
-    CompileAssert(Js::ArrayBufferView::TYPE_INT16 == 2);
-    CompileAssert(Js::ArrayBufferView::TYPE_UINT16 == 3);
-    CompileAssert(Js::ArrayBufferView::TYPE_INT32 == 4);
-    CompileAssert(Js::ArrayBufferView::TYPE_UINT32 == 5);
-    CompileAssert(Js::ArrayBufferView::TYPE_FLOAT32 == 6);
-    CompileAssert(Js::ArrayBufferView::TYPE_FLOAT64 == 7);
-    CompileAssert(Js::ArrayBufferView::TYPE_INT64 == 8);
-    CompileAssert(Js::ArrayBufferView::TYPE_INT8_TO_INT64 == 9);
-    CompileAssert(Js::ArrayBufferView::TYPE_UINT8_TO_INT64 == 10);
-    CompileAssert(Js::ArrayBufferView::TYPE_INT16_TO_INT64 == 11);
-    CompileAssert(Js::ArrayBufferView::TYPE_UINT16_TO_INT64 == 12);
-    CompileAssert(Js::ArrayBufferView::TYPE_INT32_TO_INT64 == 13);
-    CompileAssert(Js::ArrayBufferView::TYPE_UINT32_TO_INT64 == 14);
-
-    const InterpreterStackFrame::ArrFunc InterpreterStackFrame::StArrFunc[] =
-    {
-        &InterpreterStackFrame::OP_StArr<int8, int32>,
-        &InterpreterStackFrame::OP_StArr<uint8, int32>,
-        &InterpreterStackFrame::OP_StArr<int16, int32>,
-        &InterpreterStackFrame::OP_StArr<uint16, int32>,
-        &InterpreterStackFrame::OP_StArr<int32>,
-        &InterpreterStackFrame::OP_StArr<uint32, int32>,
-        &InterpreterStackFrame::OP_StArr<float>,
-        &InterpreterStackFrame::OP_StArr<double>,
-        &InterpreterStackFrame::OP_StArr<int64>,
-        &InterpreterStackFrame::OP_StArr<int8, int64>,
-        &InterpreterStackFrame::OP_StArr<uint8, int64>,
-        &InterpreterStackFrame::OP_StArr<int16, int64>,
-        &InterpreterStackFrame::OP_StArr<uint16, int64>,
-        &InterpreterStackFrame::OP_StArr<int32, int64>,
-        &InterpreterStackFrame::OP_StArr<uint32, int64>,
-    };
-
-    const InterpreterStackFrame::ArrFunc InterpreterStackFrame::LdArrFunc[] =
-    {
-        &InterpreterStackFrame::OP_LdArr<int8, int32>,
-        &InterpreterStackFrame::OP_LdArr<uint8, int32>,
-        &InterpreterStackFrame::OP_LdArr<int16, int32>,
-        &InterpreterStackFrame::OP_LdArr<uint16, int32>,
-        &InterpreterStackFrame::OP_LdArr<int32>,
-        &InterpreterStackFrame::OP_LdArr<uint32, int32>,
-        &InterpreterStackFrame::OP_LdArr<float>,
-        &InterpreterStackFrame::OP_LdArr<double>,
-        &InterpreterStackFrame::OP_LdArr<int64>,
-        &InterpreterStackFrame::OP_LdArr<int8, int64>,
-        &InterpreterStackFrame::OP_LdArr<uint8, int64>,
-        &InterpreterStackFrame::OP_LdArr<int16, int64>,
-        &InterpreterStackFrame::OP_LdArr<uint16, int64>,
-        &InterpreterStackFrame::OP_LdArr<int32, int64>,
-        &InterpreterStackFrame::OP_LdArr<uint32, int64>,
-    };
-
-    const int InterpreterStackFrame::TypeToSizeMap[] =
-    {
-        /*int8*/ 1,
-        /*uint8*/ 1,
-        /*int16*/ 2,
-        /*uint16*/ 2,
-        /*int32*/ 4,
-        /*uint32*/ 4,
-        /*float*/ 4,
-        /*double*/ 8,
-        /*int64*/ 8,
-        /*int8*/ 1,
-        /*uint8*/ 1,
-        /*int16*/ 2,
-        /*uint16*/ 2,
-        /*int32*/ 4,
-        /*uint32*/ 4,
-    };
-#endif
-
     Var InterpreterStackFrame::InnerScopeFromRegSlot(RegSlot reg) const
     {
         return InnerScopeFromIndex(reg - m_functionBody->GetFirstInnerScopeRegister());
@@ -8526,7 +8450,6 @@ const byte * InterpreterStackFrame::OP_ProfiledLoopBodyStart(uint loopId)
     template <typename ArrayType, typename RegType>
     void InterpreterStackFrame::OP_StArr(uint32 index, RegSlot regSlot)
     {
-        CompileAssert(Js::ArrayBufferView::TYPE_COUNT == (sizeof(InterpreterStackFrame::StArrFunc) / sizeof(InterpreterStackFrame::ArrFunc)));
         JavascriptArrayBuffer* arr = GetAsmJsBuffer();
         if (index < arr->GetByteLength())
         {
@@ -8607,7 +8530,6 @@ const byte * InterpreterStackFrame::OP_ProfiledLoopBodyStart(uint loopId)
     template <typename ArrayType, typename RegType>
     void InterpreterStackFrame::OP_LdArr(uint32 index, RegSlot regSlot)
     {
-        CompileAssert(Js::ArrayBufferView::TYPE_COUNT == (sizeof(InterpreterStackFrame::LdArrFunc) / sizeof(InterpreterStackFrame::ArrFunc)));
         JavascriptArrayBuffer* arr = GetAsmJsBuffer();
         BYTE* buffer = arr->GetBuffer();
         ArrayType val = index < (arr->GetByteLength()) ? *(ArrayType*)(buffer + index) : GetArrayViewOverflowVal<ArrayType>();
@@ -8640,10 +8562,40 @@ const byte * InterpreterStackFrame::OP_ProfiledLoopBodyStart(uint loopId)
     template <class T>
     void InterpreterStackFrame::OP_LdArrGeneric(const unaligned T* playout)
     {
-        Assert(playout->ViewType < Js::ArrayBufferView::TYPE_COUNT);
-        const uint32 index = (uint32)GetRegRawInt(playout->SlotIndex) & ArrayBufferView::ViewMask[playout->ViewType];
-        (this->*LdArrFunc[playout->ViewType])(index, playout->Value);
+        const uint32 index = (uint32)GetRegRawInt(playout->SlotIndex);
+        switch (playout->ViewType)
+        {
+#define ARRAYBUFFER_VIEW(name, align, RegType, MemType, ...) \
+        case ArrayBufferView::ViewType::TYPE_##name: \
+            OP_LdArr<MemType, RegType>(index & ARRAYBUFFER_VIEW_MASK(align), playout->Value); \
+            return;
+#include "AsmJsArrayBufferViews.h"
+        default:Assert(UNREACHED);
+        }
+    }
+
+    template<typename MemType>
+    void InterpreterStackFrame::WasmArrayBoundsCheck(uint64 index, uint32 byteLength)
+    {
+        if (index + sizeof(MemType) > byteLength)
+        {
+            JavascriptError::ThrowWebAssemblyRuntimeError(scriptContext, WASMERR_ArrayIndexOutOfRange);
+        }
+    }
+
+    template<typename MemType>
+    MemType* InterpreterStackFrame::WasmAtomicsArrayBoundsCheck(byte* buffer, uint64 index, uint32 byteLength)
+    {
+        MemType* readBuffer = (MemType*)(buffer + index);
+        // Do alignment check to be coherent with the order the jit does the checks
+        if (!::Math::IsAligned<intptr_t>((intptr_t)readBuffer, sizeof(MemType)))
+        {
+            JavascriptError::ThrowWebAssemblyRuntimeError(scriptContext, WASMERR_UnalignedAtomicAccess);
+        }
+        WasmArrayBoundsCheck<MemType>(index, byteLength);
+        return readBuffer;
     }
+
     template <class T>
     void InterpreterStackFrame::OP_LdArrWasm(const unaligned T* playout)
     {
@@ -8651,49 +8603,107 @@ const byte * InterpreterStackFrame::OP_ProfiledLoopBodyStart(uint loopId)
         Assert(playout->ViewType < Js::ArrayBufferView::TYPE_COUNT);
         const uint64 index = playout->Offset + (uint64)(uint32)GetRegRawInt(playout->SlotIndex);
         WebAssemblyArrayBuffer* arr = GetWebAssemblyMemory()->GetBuffer();
-        if (index + TypeToSizeMap[playout->ViewType] > arr->GetByteLength())
+
+        uint32 byteLength = arr->GetByteLength();
+        BYTE* buffer = arr->GetBuffer();
+        switch (playout->ViewType)
         {
-            JavascriptError::ThrowWebAssemblyRuntimeError(scriptContext, WASMERR_ArrayIndexOutOfRange);
+#define ARRAYBUFFER_VIEW(name, align, RegType, MemType, ...) \
+        case ArrayBufferView::ViewType::TYPE_##name: \
+            WasmArrayBoundsCheck<MemType>(index, byteLength); \
+            SetRegRaw<RegType>(playout->Value, (RegType)*(MemType*)(buffer + index)); \
+            return;
+#include "AsmJsArrayBufferViews.h"
+        default:Assert(UNREACHED);
         }
+#else
+        Assert(UNREACHED);
+#endif
+    }
 
+    template <class T>
+    void InterpreterStackFrame::OP_LdArrAtomic(const unaligned T* playout)
+    {
+#ifdef ENABLE_WASM
+        Assert(CONFIG_FLAG(WasmThreads));
+        Assert(playout->ViewType < Js::ArrayBufferView::TYPE_COUNT);
+        const uint64 index = playout->Offset + (uint64)(uint32)GetRegRawInt(playout->SlotIndex);
+        WebAssemblyArrayBuffer* arr = GetWebAssemblyMemory()->GetBuffer();
+
+        uint32 byteLength = arr->GetByteLength();
         BYTE* buffer = arr->GetBuffer();
         switch (playout->ViewType)
         {
-        case ArrayBufferView::ViewType::TYPE_INT8: SetRegRaw<int32>(playout->Value, (int32)*(int8*)(buffer + index)); return;
-        case ArrayBufferView::ViewType::TYPE_UINT8 : SetRegRaw<int32>(playout->Value, (int32)*(uint8*)(buffer + index)); return;
-        case ArrayBufferView::ViewType::TYPE_INT16 : SetRegRaw<int32>(playout->Value, (int32)*(int16*)(buffer + index)); return;
-        case ArrayBufferView::ViewType::TYPE_UINT16 : SetRegRaw<int32>(playout->Value, (int32)*(uint16*)(buffer + index)); return;
-        case ArrayBufferView::ViewType::TYPE_INT32 : SetRegRaw<int32>(playout->Value, (int32)*(int32*)(buffer + index)); return;
-        case ArrayBufferView::ViewType::TYPE_UINT32 : SetRegRaw<int32>(playout->Value, (int32)*(uint32*)(buffer + index)); return;
-        case ArrayBufferView::ViewType::TYPE_FLOAT32 : SetRegRaw<float>(playout->Value, (float)*(float*)(buffer + index)); return;
-        case ArrayBufferView::ViewType::TYPE_FLOAT64 : SetRegRaw<double>(playout->Value, (double)*(double*)(buffer + index)); return;
-        case ArrayBufferView::ViewType::TYPE_INT64 : SetRegRaw<int64>(playout->Value, (int64)*(int64*)(buffer + index)); return;
-        case ArrayBufferView::ViewType::TYPE_INT8_TO_INT64 : SetRegRaw<int64>(playout->Value, (int64)*(int8*)(buffer + index)); return;
-        case ArrayBufferView::ViewType::TYPE_UINT8_TO_INT64 : SetRegRaw<int64>(playout->Value, (int64)*(uint8*)(buffer + index)); return;
-        case ArrayBufferView::ViewType::TYPE_INT16_TO_INT64 : SetRegRaw<int64>(playout->Value, (int64)*(int16*)(buffer + index)); return;
-        case ArrayBufferView::ViewType::TYPE_UINT16_TO_INT64 : SetRegRaw<int64>(playout->Value, (int64)*(uint16*)(buffer + index)); return;
-        case ArrayBufferView::ViewType::TYPE_INT32_TO_INT64 : SetRegRaw<int64>(playout->Value, (int64)*(int32*)(buffer + index)); return;
-        case ArrayBufferView::ViewType::TYPE_UINT32_TO_INT64 : SetRegRaw<int64>(playout->Value, (int64)*(uint32*)(buffer + index)); return;
+#define ARRAYBUFFER_VIEW_INT(name, align, RegType, MemType, ...) \
+        case ArrayBufferView::ViewType::TYPE_##name: {\
+            MemType* readBuffer = WasmAtomicsArrayBoundsCheck<MemType>(buffer, index, byteLength); \
+            MemType value = AtomicsOperations::Load<MemType>(readBuffer); \
+            SetRegRaw<RegType>(playout->Value, (RegType)value); \
+            return; \
+        }
+#include "AsmJsArrayBufferViews.h"
         default:Assert(UNREACHED);
         }
-        CompileAssert(ArrayBufferView::ViewType::TYPE_COUNT == 15);
 #else
         Assert(UNREACHED);
 #endif
     }
+
     template <class T>
-    void InterpreterStackFrame::OP_LdArrConstIndex(const unaligned T* playout)
+    void InterpreterStackFrame::OP_StArrAtomic(const unaligned T* playout)
     {
-        const uint32 index = playout->SlotIndex;
+#ifdef ENABLE_WASM
+        Assert(CONFIG_FLAG(WasmThreads));
         Assert(playout->ViewType < Js::ArrayBufferView::TYPE_COUNT);
-        (this->*LdArrFunc[playout->ViewType])(index, playout->Value);
+        const uint64 index = playout->Offset + (uint64)(uint32)GetRegRawInt(playout->SlotIndex);
+        WebAssemblyArrayBuffer* arr = GetWebAssemblyMemory()->GetBuffer();
+
+        uint32 byteLength = arr->GetByteLength();
+        BYTE* buffer = arr->GetBuffer();
+        switch (playout->ViewType)
+        {
+#define ARRAYBUFFER_VIEW_INT(name, align, RegType, MemType, ...) \
+        case ArrayBufferView::ViewType::TYPE_##name: {\
+            MemType* readBuffer = WasmAtomicsArrayBoundsCheck<MemType>(buffer, index, byteLength); \
+            MemType value = (MemType)GetRegRaw<RegType>(playout->Value); \
+            MemType storedValue = AtomicsOperations::Store<MemType>(readBuffer, value); \
+            Assert(storedValue == value); \
+            return; \
+        }
+#include "AsmJsArrayBufferViews.h"
+        default:Assert(UNREACHED);
+        }
+#else
+        Assert(UNREACHED);
+#endif
+    }
+
+    template <class T>
+    void InterpreterStackFrame::OP_LdArrConstIndex(const unaligned T* playout)
+    {
+        switch (playout->ViewType)
+        {
+#define ARRAYBUFFER_VIEW(name, align, RegType, MemType, ...) \
+        case ArrayBufferView::ViewType::TYPE_##name: \
+            OP_LdArr<MemType, RegType>(playout->SlotIndex, playout->Value); \
+            return;
+#include "AsmJsArrayBufferViews.h"
+        default:Assert(UNREACHED);
+        }
     }
     template <class T>
     void InterpreterStackFrame::OP_StArrGeneric(const unaligned T* playout)
     {
-        Assert(playout->ViewType < Js::ArrayBufferView::TYPE_COUNT);
-        const uint32 index = (uint32)GetRegRawInt(playout->SlotIndex) & ArrayBufferView::ViewMask[playout->ViewType];
-        (this->*StArrFunc[playout->ViewType])(index, playout->Value);
+        const uint32 index = (uint32)GetRegRawInt(playout->SlotIndex);
+        switch (playout->ViewType)
+        {
+#define ARRAYBUFFER_VIEW(name, align, RegType, MemType, ...) \
+        case ArrayBufferView::ViewType::TYPE_##name: \
+            OP_StArr<MemType, RegType>(index & ARRAYBUFFER_VIEW_MASK(align), playout->Value); \
+            return;
+#include "AsmJsArrayBufferViews.h"
+        default:Assert(UNREACHED);
+        }
     }
     template <class T>
     void InterpreterStackFrame::OP_StArrWasm(const unaligned T* playout)
@@ -8702,28 +8712,17 @@ const byte * InterpreterStackFrame::OP_ProfiledLoopBodyStart(uint loopId)
         Assert(playout->ViewType < Js::ArrayBufferView::TYPE_COUNT);
         const uint64 index = playout->Offset + (uint64)(uint32)GetRegRawInt(playout->SlotIndex);
         WebAssemblyArrayBuffer* arr = GetWebAssemblyMemory()->GetBuffer();
-        if (index + TypeToSizeMap[playout->ViewType] > arr->GetByteLength())
-        {
-            JavascriptError::ThrowWebAssemblyRuntimeError(scriptContext, WASMERR_ArrayIndexOutOfRange);
-        }
+
+        uint32 byteLength = arr->GetByteLength();
         BYTE* buffer = arr->GetBuffer();
         switch (playout->ViewType)
         {
-        case ArrayBufferView::ViewType::TYPE_INT8: *(int8*)(buffer + index) = (int8) (GetRegRaw<int32>(playout->Value)); break;
-        case ArrayBufferView::ViewType::TYPE_UINT8: *(uint8*)(buffer + index) = (uint8) (GetRegRaw<int32>(playout->Value)); break;
-        case ArrayBufferView::ViewType::TYPE_INT16: *(int16*)(buffer + index) = (int16) (GetRegRaw<int32>(playout->Value)); break;
-        case ArrayBufferView::ViewType::TYPE_UINT16: *(uint16*)(buffer + index) = (uint16) (GetRegRaw<int32>(playout->Value)); break;
-        case ArrayBufferView::ViewType::TYPE_INT32: *(int32*)(buffer + index) = (int32) (GetRegRaw<int32>(playout->Value)); break;
-        case ArrayBufferView::ViewType::TYPE_UINT32: *(uint32*)(buffer + index) = (uint32) (GetRegRaw<int32>(playout->Value)); break;
-        case ArrayBufferView::ViewType::TYPE_FLOAT32: *(float*)(buffer + index) = (float) (GetRegRaw<float>(playout->Value)); break;
-        case ArrayBufferView::ViewType::TYPE_FLOAT64: *(double*)(buffer + index) = (double) (GetRegRaw<double>(playout->Value)); break;
-        case ArrayBufferView::ViewType::TYPE_INT64: *(int64*)(buffer + index) = (int64) (GetRegRaw<int64>(playout->Value)); break;
-        case ArrayBufferView::ViewType::TYPE_INT8_TO_INT64: *(int8*)(buffer + index) = (int8) (GetRegRaw<int64>(playout->Value)); break;
-        case ArrayBufferView::ViewType::TYPE_UINT8_TO_INT64: *(uint8*)(buffer + index) = (uint8) (GetRegRaw<int64>(playout->Value)); break;
-        case ArrayBufferView::ViewType::TYPE_INT16_TO_INT64: *(int16*)(buffer + index) = (int16) (GetRegRaw<int64>(playout->Value)); break;
-        case ArrayBufferView::ViewType::TYPE_UINT16_TO_INT64: *(uint16*)(buffer + index) = (uint16) (GetRegRaw<int64>(playout->Value)); break;
-        case ArrayBufferView::ViewType::TYPE_INT32_TO_INT64: *(int32*)(buffer + index) = (int32) (GetRegRaw<int64>(playout->Value)); break;
-        case ArrayBufferView::ViewType::TYPE_UINT32_TO_INT64: *(uint32*)(buffer + index) = (uint32) (GetRegRaw<int64>(playout->Value)); break;
+#define ARRAYBUFFER_VIEW(name, align, RegType, MemType, ...) \
+        case ArrayBufferView::ViewType::TYPE_##name: \
+            WasmArrayBoundsCheck<MemType>(index, byteLength); \
+            *(MemType*)(buffer + index) = (MemType)(GetRegRaw<RegType>(playout->Value)); \
+            break;
+#include "AsmJsArrayBufferViews.h"
         default:Assert(UNREACHED);
         }
         CompileAssert(ArrayBufferView::ViewType::TYPE_COUNT == 15);
@@ -8741,9 +8740,15 @@ const byte * InterpreterStackFrame::OP_ProfiledLoopBodyStart(uint loopId)
     template <class T>
     void InterpreterStackFrame::OP_StArrConstIndex(const unaligned T* playout)
     {
-        const uint32 index = playout->SlotIndex;
-        Assert(playout->ViewType < Js::ArrayBufferView::TYPE_COUNT);
-        (this->*StArrFunc[playout->ViewType])(index, playout->Value);
+        switch (playout->ViewType)
+        {
+#define ARRAYBUFFER_VIEW(name, align, RegType, MemType, ...) \
+        case ArrayBufferView::ViewType::TYPE_##name: \
+            OP_StArr<MemType, RegType>(playout->SlotIndex, playout->Value); \
+            return;
+#include "AsmJsArrayBufferViews.h"
+        default:Assert(UNREACHED);
+        }
     }
 #endif
 

+ 4 - 6
lib/Runtime/Language/InterpreterStackFrame.h

@@ -188,12 +188,6 @@ namespace Js
 
         static const int LocalsThreshold = 32 * 1024; // Number of locals vars we'll allocate on the frame.
                                                       // If there are more, we'll use an arena.
-#ifndef TEMP_DISABLE_ASMJS
-        typedef void(InterpreterStackFrame::*ArrFunc)(uint32, RegSlot);
-        static const ArrFunc StArrFunc[15];
-        static const ArrFunc LdArrFunc[15];
-        static const int     TypeToSizeMap[15];
-#endif
 
         //This class must have an empty ctor (otherwise it will break the code in InterpreterStackFrame::InterpreterThunk
         inline InterpreterStackFrame() { }
@@ -690,6 +684,10 @@ namespace Js
         template <class T> inline void OP_StArrGeneric   ( const unaligned T* playout );
         template <class T> inline void OP_StArrWasm      ( const unaligned T* playout );
         template <class T> inline void OP_StArrConstIndex( const unaligned T* playout );
+        template <class T> inline void OP_LdArrAtomic    ( const unaligned T* playout );
+        template <class T> inline void OP_StArrAtomic    ( const unaligned T* playout );
+        template<typename MemType> void WasmArrayBoundsCheck(uint64 index, uint32 byteLength);
+        template<typename MemType> MemType* WasmAtomicsArrayBoundsCheck(byte* buffer, uint64 index, uint32 byteLength);
         inline Var OP_LdSlot(Var instance, int32 slotIndex);
         inline Var OP_LdObjSlot(Var instance, int32 slotIndex);
         inline Var OP_LdFrameDisplaySlot(Var instance, int32 slotIndex);

+ 17 - 1
lib/Runtime/Language/JavascriptConversion.cpp

@@ -1340,7 +1340,7 @@ CommonNumber:
         }
     }
 
-    inline uint16 JavascriptConversion::ToUInt16(double T1)
+    uint16 JavascriptConversion::ToUInt16(double T1)
     {
         //
         // VC does the right thing here, if we first convert to uint32 and then to uint16
@@ -1360,6 +1360,22 @@ CommonNumber:
         return (uint16) result;
     }
 
+    int16 JavascriptConversion::ToInt16(double aValue)
+    {
+        return (int16)ToInt32(aValue);
+    }
+
+    int8 JavascriptConversion::ToInt8(double aValue)
+    {
+        return (int8)ToInt32(aValue);
+    }
+
+    uint8 JavascriptConversion::ToUInt8(double aValue)
+    {
+        return (uint8)ToUInt32(aValue);
+    }
+
+
     JavascriptString * JavascriptConversion::ToPrimitiveString(Var aValue, ScriptContext * scriptContext)
     {
         return ToString(ToPrimitive(aValue, JavascriptHint::None, scriptContext), scriptContext);

+ 3 - 0
lib/Runtime/Language/JavascriptConversion.h

@@ -49,9 +49,12 @@ namespace Js {
         static int32 ToInt32_Full(Var aValue, ScriptContext* scriptContext);
 
         static int8 ToInt8(Var aValue, ScriptContext* scriptContext);
+        static int8 ToInt8(double aValue);
         static uint8 ToUInt8(Var aValue, ScriptContext* scriptContext);
+        static uint8 ToUInt8(double aValue);
         static uint8 ToUInt8Clamped(Var aValue, ScriptContext* scriptContext);
         static int16 ToInt16(Var aValue, ScriptContext* scriptContext);
+        static int16 ToInt16(double aValue);
         static float ToFloat(Var aValue, ScriptContext* scriptContext);
 
         static uint32 ToUInt32(Var aValue, ScriptContext* scriptContext);

+ 6 - 6
lib/Runtime/Language/WAsmjsUtils.cpp

@@ -10,12 +10,6 @@
 namespace WAsmJs
 {
 
-template<> Types RegisterSpace::GetRegisterSpaceType<int32>(){return WAsmJs::INT32;}
-template<> Types RegisterSpace::GetRegisterSpaceType<int64>(){return WAsmJs::INT64;}
-template<> Types RegisterSpace::GetRegisterSpaceType<float>(){return WAsmJs::FLOAT32;}
-template<> Types RegisterSpace::GetRegisterSpaceType<double>(){return WAsmJs::FLOAT64;}
-template<> Types RegisterSpace::GetRegisterSpaceType<AsmJsSIMDValue>(){return WAsmJs::SIMD;}
-
 #ifdef ENABLE_DEBUG_CONFIG_OPTIONS
     namespace Tracing
     {
@@ -184,6 +178,12 @@ template<> Types RegisterSpace::GetRegisterSpaceType<AsmJsSIMDValue>(){return WA
         return WAsmJs::LIMIT;
     }
 
+    template<> Types FromPrimitiveType<int32>() { return WAsmJs::INT32; }
+    template<> Types FromPrimitiveType<int64>() { return WAsmJs::INT64; }
+    template<> Types FromPrimitiveType<float>() { return WAsmJs::FLOAT32; }
+    template<> Types FromPrimitiveType<double>() { return WAsmJs::FLOAT64; }
+    template<> Types FromPrimitiveType<AsmJsSIMDValue>() { return WAsmJs::SIMD; }
+
 #if DBG_DUMP
     void RegisterSpace::GetTypeDebugName(Types type, char16* buf, uint bufsize, bool shortName)
     {

+ 2 - 1
lib/Runtime/Language/WAsmjsUtils.h

@@ -65,6 +65,8 @@ namespace WAsmJs
     const Types LastType = (Types)(LIMIT - 1);
     uint32 GetTypeByteSize(Types type);
     Types FromIRType(IRType irType);
+    template<typename T>
+    Types FromPrimitiveType();
 
     /// Register space for const, parameters, variables and tmp values
     ///     --------------------------------------------------------
@@ -237,7 +239,6 @@ namespace WAsmJs
             return false;
         }
 
-        template<typename T> static Types GetRegisterSpaceType();
 #if DBG_DUMP
         // Used for debugging
         Types mType;

+ 5 - 1
lib/Runtime/Library/AtomicsObject.cpp

@@ -43,7 +43,11 @@ namespace Js
 
         TypedArrayBase *typedArrayBase = TypedArrayBase::UnsafeFromVar(typedArray);
         ArrayBufferBase* arrayBuffer = typedArrayBase->GetArrayBuffer();
-        if (arrayBuffer == nullptr || !ArrayBufferBase::Is(arrayBuffer) || !arrayBuffer->IsSharedArrayBuffer())
+
+        // todo:: Allow WebAssemblySharedArrayBuffer
+        // Since WebAssemblySharedArrayBuffer can be detached (through grow_memory), we need to revalidate the buffer
+        // after JavascriptConversion on index and values, because {valueOf} can detach the buffer
+        if (arrayBuffer == nullptr || !ArrayBufferBase::Is(arrayBuffer) || !arrayBuffer->IsSharedArrayBuffer() || arrayBuffer->IsWebAssemblyArrayBuffer())
         {
             JavascriptError::ThrowTypeError(scriptContext, JSERR_NeedSharedArrayBufferObject);
         }

+ 158 - 0
lib/Runtime/Library/AtomicsOperations.cpp

@@ -0,0 +1,158 @@
+//-------------------------------------------------------------------------------------------------------
+// Copyright (C) Microsoft. All rights reserved.
+// Licensed under the MIT license. See LICENSE.txt file in the project root for full license information.
+//-------------------------------------------------------------------------------------------------------
+#include "RuntimeLibraryPch.h"
+#include "AtomicsOperations.h"
+
+#ifdef _WIN32
+#define InterlockedExchangeAdd8 _InterlockedExchangeAdd8
+#define InterlockedExchangeAdd16 _InterlockedExchangeAdd16
+
+#define InterlockedAnd8 _InterlockedAnd8
+#define InterlockedAnd16 _InterlockedAnd16
+
+#define InterlockedOr8 _InterlockedOr8
+#define InterlockedOr16 _InterlockedOr16
+
+#define InterlockedXor8 _InterlockedXor8
+#define InterlockedXor16 _InterlockedXor16
+
+#define InterlockedCompareExchange8 _InterlockedCompareExchange8
+#define InterlockedCompareExchange16 _InterlockedCompareExchange16
+
+#define InterlockedExchange8 _InterlockedExchange8
+#define InterlockedExchange16 _InterlockedExchange16
+#endif
+
+#define InterlockedExchangeAdd32 InterlockedExchangeAdd
+#define InterlockedAnd32 InterlockedAnd
+#define InterlockedOr32 InterlockedOr
+#define InterlockedXor32 InterlockedXor
+#define InterlockedCompareExchange32 InterlockedCompareExchange
+#define InterlockedExchange32 InterlockedExchange
+
+template<typename T> struct ConvertType {};
+template<> struct ConvertType<int8> { typedef char _t; };
+template<> struct ConvertType<uint8> { typedef char _t; };
+template<> struct ConvertType<int16> { typedef short _t; };
+template<> struct ConvertType<uint16> { typedef short _t; };
+template<> struct ConvertType<int32> { typedef LONG _t; };
+template<> struct ConvertType<uint32> { typedef LONG _t; };
+template<> struct ConvertType<int64> { typedef LONGLONG _t; };
+
+#define MakeInterLockArgDef1(type) type value
+#define MakeInterLockArgDef2(type) type v1, type v2
+#define MakeInterLockArgUse1 value
+#define MakeInterLockArgUse2 v1, v2
+#define _MakeInterlockTemplate(op, argDef, argUse) \
+template<typename T> T Interlocked##op##_t(T* target, argDef(T));\
+template<> char Interlocked##op##_t(char* target, argDef(char))   { return Interlocked##op##8 (target, argUse); }\
+template<> short Interlocked##op##_t(short* target, argDef(short)){ return Interlocked##op##16(target, argUse); }\
+template<> LONG Interlocked##op##_t(LONG* target, argDef(LONG))   { return Interlocked##op##32(target, argUse); }\
+template<> LONGLONG Interlocked##op##_t(LONGLONG* target, argDef(LONGLONG)) { return Interlocked##op##64(target, argUse); }
+#define MakeInterlockTemplate(op, nArgs) _MakeInterlockTemplate(op, MakeInterLockArgDef##nArgs, MakeInterLockArgUse##nArgs)
+MakeInterlockTemplate(ExchangeAdd, 1)
+MakeInterlockTemplate(And, 1)
+MakeInterlockTemplate(Or, 1)
+MakeInterlockTemplate(Xor, 1)
+MakeInterlockTemplate(Exchange, 1)
+MakeInterlockTemplate(CompareExchange, 2)
+
+namespace Js
+{
+template<typename T> T AtomicsOperations::Load(T* buffer)
+{
+    // MemoryBarrier only works when the memory size is not greater than the register size
+    CompileAssert(sizeof(T) <= sizeof(size_t));
+    MemoryBarrier();
+    T result = (T)*buffer;
+    return result;
+}
+
+#if TARGET_32
+template<> int64 AtomicsOperations::Load(int64* buffer)
+{
+    CompileAssert(sizeof(size_t) == 4);
+    // Implement 64bits atomic load on 32bits platform with a CompareExchange
+    // It is slower, but at least it is garantied to be an atomic operation
+    return CompareExchange<int64>(buffer, 0, 0);
+}
+#endif
+
+template<typename T> T AtomicsOperations::Store(T* buffer, T value)
+{
+    typedef typename ConvertType<T>::_t convertType;
+    InterlockedExchange_t<convertType>((convertType*)buffer, (convertType)value);
+    return value;
+}
+
+template<typename T> T AtomicsOperations::Add(T* buffer, T value)
+{
+    typedef typename ConvertType<T>::_t convertType;
+    T result = (T)InterlockedExchangeAdd_t<convertType>((convertType*)buffer, (convertType)value);
+    return result;
+}
+
+template<typename T> T AtomicsOperations::And(T* buffer, T value)
+{
+    typedef typename ConvertType<T>::_t convertType;
+    T result = (T)InterlockedAnd_t<convertType>((convertType*)buffer, (convertType)value);
+    return result;
+}
+
+template<typename T> T AtomicsOperations::CompareExchange(T* buffer, T comparand, T replacementValue)
+{
+    typedef typename ConvertType<T>::_t convertType;
+    T result = (T)InterlockedCompareExchange_t<convertType>((convertType*)buffer, (convertType)replacementValue, (convertType)comparand);
+    return result;
+}
+
+template<typename T> T AtomicsOperations::Exchange(T* buffer, T value)
+{
+    typedef typename ConvertType<T>::_t convertType;
+    T result = (T)InterlockedExchange_t<convertType>((convertType*)buffer, (convertType)value);
+    return result;
+}
+
+template<typename T> T AtomicsOperations::Or(T* buffer, T value)
+{
+    typedef typename ConvertType<T>::_t convertType;
+    T result = (T)InterlockedOr_t<convertType>((convertType*)buffer, (convertType)value);
+    return result;
+}
+
+template<typename T> T AtomicsOperations::Sub(T* buffer, T value)
+{
+    typedef typename ConvertType<T>::_t convertType;
+    T result = (T)InterlockedExchangeAdd_t<convertType>((convertType*)buffer, -(convertType)value);
+    return result;
+}
+
+template<typename T> T AtomicsOperations::Xor(T* buffer, T value)
+{
+    typedef typename ConvertType<T>::_t convertType;
+    T result = (T)InterlockedXor_t<convertType>((convertType*)buffer, (convertType)value);
+    return result;
+}
+
+#define ExplicitImplementation(type) \
+    template type AtomicsOperations::Load(type*); \
+    template type AtomicsOperations::Store(type*, type); \
+    template type AtomicsOperations::Add(type*, type); \
+    template type AtomicsOperations::And(type*, type); \
+    template type AtomicsOperations::CompareExchange(type*, type, type); \
+    template type AtomicsOperations::Exchange(type*, type); \
+    template type AtomicsOperations::Or(type*, type); \
+    template type AtomicsOperations::Sub(type*, type); \
+    template type AtomicsOperations::Xor(type*, type); \
+
+ExplicitImplementation(int8);
+ExplicitImplementation(uint8);
+ExplicitImplementation(int16);
+ExplicitImplementation(uint16);
+ExplicitImplementation(int32);
+ExplicitImplementation(uint32);
+ExplicitImplementation(int64);
+
+};

+ 24 - 0
lib/Runtime/Library/AtomicsOperations.h

@@ -0,0 +1,24 @@
+//-------------------------------------------------------------------------------------------------------
+// Copyright (C) Microsoft. All rights reserved.
+// Licensed under the MIT license. See LICENSE.txt file in the project root for full license information.
+//-------------------------------------------------------------------------------------------------------
+//  Implements Atomics according to http://tc39.github.io/ecmascript_sharedmem/shmem.html
+//----------------------------------------------------------------------------
+
+#pragma once
+namespace Js
+{
+    class AtomicsOperations
+    {
+    public:
+        template<typename T> static T Load(T* buffer);
+        template<typename T> static T Store(T* buffer, T value);
+        template<typename T> static T Add(T* buffer, T value);
+        template<typename T> static T And(T* buffer, T value);
+        template<typename T> static T CompareExchange(T* buffer, T comparand, T replacementValue);
+        template<typename T> static T Exchange(T* buffer, T value);
+        template<typename T> static T Or(T* buffer, T value);
+        template<typename T> static T Sub(T* buffer, T value);
+        template<typename T> static T Xor(T* buffer, T value);
+    };
+}

+ 1 - 0
lib/Runtime/Library/CMakeLists.txt

@@ -25,6 +25,7 @@ set(CRLIB_SOURCE_CODES
     ArgumentsObject.cpp
     ArgumentsObjectEnumerator.cpp
     ArrayBuffer.cpp
+    AtomicsOperations.cpp
     AtomicsObject.cpp
     BoundFunction.cpp
     BufferStringBuilder.cpp

+ 2 - 0
lib/Runtime/Library/Chakra.Runtime.Library.vcxproj

@@ -145,6 +145,7 @@
     <ClCompile Include="$(MSBuildThisFileDirectory)CustomExternalIterator.cpp" />
     <ClCompile Include="$(MSBuildThisFileDirectory)JavascriptExceptionMetadata.cpp" />
     <ClCompile Include="$(MSBuildThisFileDirectory)VerifyMarkFalseReference.cpp" />
+    <ClCompile Include="$(MSBuildThisFileDirectory)AtomicsOperations.cpp" />
     <ClCompile Include="$(MSBuildThisFileDirectory)JSONStringBuilder.cpp" />
     <ClCompile Include="$(MSBuildThisFileDirectory)JSONStringifier.cpp" />
     <ClCompile Include="$(MSBuildThisFileDirectory)LazyJSONString.cpp" />
@@ -157,6 +158,7 @@
     <ClInclude Include="..\RuntimeCommon.h" />
     <ClInclude Include="..\SerializableFunctionFields.h" />
     <ClInclude Include="AtomicsObject.h" />
+    <ClInclude Include="AtomicsOperations.h" />
     <ClInclude Include="PropertyRecordUsageCache.h" />
     <ClInclude Include="CustomExternalIterator.h" />
     <ClInclude Include="JsBuiltInEngineInterfaceExtensionObject.h" />

+ 3 - 1
lib/Runtime/Library/Chakra.Runtime.Library.vcxproj.filters

@@ -97,6 +97,7 @@
     <ClCompile Include="$(MSBuildThisFileDirectory)WabtInterface.cpp" />
     <ClCompile Include="$(MSBuildThisFileDirectory)CustomExternalIterator.cpp" />
     <ClCompile Include="$(MSBuildThisFileDirectory)VerifyMarkFalseReference.cpp" />
+    <ClCompile Include="$(MSBuildThisFileDirectory)AtomicsOperations.cpp" />
     <ClCompile Include="$(MSBuildThisFileDirectory)JsBuiltInEngineInterfaceExtensionObject.cpp" />
     <ClCompile Include="$(MSBuildThisFileDirectory)LazyJSONString.cpp" />
     <ClCompile Include="$(MSBuildThisFileDirectory)JSONStringifier.cpp" />
@@ -215,6 +216,7 @@
     <ClInclude Include="WabtInterface.h" />
     <ClInclude Include="CustomExternalIterator.h" />
     <ClInclude Include="JavascriptExceptionMetadata.h" />
+    <ClInclude Include="AtomicsOperations.h" />
     <ClInclude Include="..\DetachedStateBase.h" />
     <ClInclude Include="LazyJSONString.h" />
     <ClInclude Include="JSONStringifier.h" />
@@ -224,7 +226,7 @@
     <ClInclude Include="JsBuiltIn\JsBuiltIn.js.bc.64b.h" />
     <ClInclude Include="JsBuiltIn\JsBuiltIn.js.nojit.bc.32b.h" />
     <ClInclude Include="JsBuiltIn\JsBuiltIn.js.nojit.bc.64b.h" />
-    <ClInclude Include="PropertyRecordUsageCache.h" />
+    <ClInclude Include="PropertyRecordUsageCache.h" />
     <ClInclude Include="..\LibraryFunction.h" />
   </ItemGroup>
   <ItemGroup>

+ 8 - 5
lib/Runtime/Library/JavascriptFunction.cpp

@@ -2324,6 +2324,14 @@ LABEL1:
             {
                 return false;
             }
+
+            if (isWasmOnly)
+            {
+                // It is possible to have an A/V on other instructions then load/store (ie: xchg for atomics)
+                // Which we don't decode at this time
+                // We've confirmed the A/V occurred in the Virtual Memory, so just throw now
+                JavascriptError::ThrowWebAssemblyRuntimeError(func->GetScriptContext(), WASMERR_ArrayIndexOutOfRange);
+            }
         }
         else
         {
@@ -2363,11 +2371,6 @@ LABEL1:
             return false;
         }
 
-        if (isWasmOnly)
-        {
-            JavascriptError::ThrowWebAssemblyRuntimeError(func->GetScriptContext(), WASMERR_ArrayIndexOutOfRange);
-        }
-
         // SIMD loads/stores do bounds checks.
         if (instrData.isSimd)
         {

+ 63 - 137
lib/Runtime/Library/TypedArray.cpp

@@ -7,6 +7,7 @@
 // can share the same array buffer.
 //----------------------------------------------------------------------------
 #include "RuntimeLibraryPch.h"
+#include "AtomicsOperations.h"
 
 #define INSTANTIATE_BUILT_IN_ENTRYPOINTS(typeName) \
     template Var typeName::NewInstance(RecyclableObject* function, CallInfo callInfo, ...); \
@@ -2932,160 +2933,85 @@ namespace Js
     DIRECT_GET_VAR_CHECK_NO_DETACH_CHECK(Float64Array);
     DIRECT_GET_VAR_CHECK_NO_DETACH_CHECK(Float64VirtualArray);
 
-#define TypedArrayBeginStub(type) \
+#define TypedArrayBeginStub(TypedArrayName) \
         Assert(GetArrayBuffer() || GetArrayBuffer()->GetBuffer()); \
-        Assert(index < GetLength()); \
+        Assert(accessIndex < GetLength()); \
         ScriptContext *scriptContext = GetScriptContext(); \
-        type *buffer = (type*)this->buffer + index;
+        typedef TypedArrayName::TypedArrayType type; \
+        type *buffer = (type*)this->buffer + accessIndex;
 
-#ifdef _WIN32
-#define InterlockedExchangeAdd8 _InterlockedExchangeAdd8
-#define InterlockedExchangeAdd16 _InterlockedExchangeAdd16
-
-#define InterlockedAnd8 _InterlockedAnd8
-#define InterlockedAnd16 _InterlockedAnd16
-
-#define InterlockedOr8 _InterlockedOr8
-#define InterlockedOr16 _InterlockedOr16
-
-#define InterlockedXor8 _InterlockedXor8
-#define InterlockedXor16 _InterlockedXor16
-
-#define InterlockedCompareExchange8 _InterlockedCompareExchange8
-#define InterlockedCompareExchange16 _InterlockedCompareExchange16
-
-#define InterlockedExchange8 _InterlockedExchange8
-#define InterlockedExchange16 _InterlockedExchange16
-#endif
-
-#define InterlockedExchangeAdd32 InterlockedExchangeAdd
-#define InterlockedAnd32 InterlockedAnd
-#define InterlockedOr32 InterlockedOr
-#define InterlockedXor32 InterlockedXor
-#define InterlockedCompareExchange32 InterlockedCompareExchange
-#define InterlockedExchange32 InterlockedExchange
-
-#define TypedArrayAddOp(TypedArrayName, bit, type, convertType, convertFn) \
-    template<> \
-    inline Var TypedArrayName##::TypedAdd(__in uint32 index, __in Var second) \
-    { \
-        TypedArrayBeginStub(type); \
-        type result = (type)InterlockedExchangeAdd##bit((convertType*)buffer, (convertType)convertFn(second, scriptContext)); \
-        return JavascriptNumber::ToVar(result, scriptContext); \
-    }
-
-#define TypedArrayAndOp(TypedArrayName, bit, type, convertType, convertFn) \
-    template<> \
-    inline Var TypedArrayName##::TypedAnd(__in uint32 index, __in Var second) \
-    { \
-        TypedArrayBeginStub(type); \
-        type result = (type)InterlockedAnd##bit((convertType*)buffer, (convertType)convertFn(second, scriptContext)); \
-        return JavascriptNumber::ToVar(result, scriptContext); \
-    }
-
-#define TypedArrayCompareExchangeOp(TypedArrayName, bit, type, convertType, convertFn) \
-    template<> \
-    inline Var TypedArrayName##::TypedCompareExchange(__in uint32 index, __in Var comparand, __in Var replacementValue) \
-    { \
-        TypedArrayBeginStub(type); \
-        type result = (type)InterlockedCompareExchange##bit((convertType*)buffer, (convertType)convertFn(replacementValue, scriptContext), (convertType)convertFn(comparand, scriptContext)); \
-        return JavascriptNumber::ToVar(result, scriptContext); \
-    }
-
-#define TypedArrayExchangeOp(TypedArrayName, bit, type, convertType, convertFn) \
-    template<> \
-    inline Var TypedArrayName##::TypedExchange(__in uint32 index, __in Var second) \
+#define TypedArrayStore(TypedArrayName, fnName, convertFn) \
+    template<>\
+    Var TypedArrayName##::Typed##fnName(__in uint32 accessIndex, __in Var value) \
     { \
-        TypedArrayBeginStub(type); \
-        type result = (type)InterlockedExchange##bit((convertType*)buffer, (convertType)convertFn(second, scriptContext)); \
-        return JavascriptNumber::ToVar(result, scriptContext); \
+        TypedArrayBeginStub(TypedArrayName); \
+        double retVal = JavascriptConversion::ToInteger(value, scriptContext); \
+        AtomicsOperations::fnName(buffer, convertFn(retVal)); \
+        return JavascriptNumber::ToVarWithCheck(retVal, scriptContext); \
     }
 
-#define TypedArrayLoadOp(TypedArrayName, bit, type, convertType, convertFn) \
-    template<> \
-    inline Var TypedArrayName##::TypedLoad(__in uint32 index) \
+#define TypedArrayOp1(TypedArrayName, fnName, convertFn) \
+    template<>\
+    Var TypedArrayName##::Typed##fnName(__in uint32 accessIndex) \
     { \
-        TypedArrayBeginStub(type); \
-        MemoryBarrier(); \
-        type result = (type)*buffer; \
+        TypedArrayBeginStub(TypedArrayName); \
+        type result = AtomicsOperations::fnName(buffer); \
         return JavascriptNumber::ToVar(result, scriptContext); \
     }
 
-#define TypedArrayOrOp(TypedArrayName, bit, type, convertType, convertFn) \
-    template<> \
-    inline Var TypedArrayName##::TypedOr(__in uint32 index, __in Var second) \
+#define TypedArrayOp2(TypedArrayName, fnName, convertFn) \
+    template<>\
+    Var TypedArrayName##::Typed##fnName(__in uint32 accessIndex, __in Var value) \
     { \
-        TypedArrayBeginStub(type); \
-        type result = (type)InterlockedOr##bit((convertType*)buffer, (convertType)convertFn(second, scriptContext)); \
+        TypedArrayBeginStub(TypedArrayName); \
+        type result = AtomicsOperations::fnName(buffer, convertFn(value, scriptContext)); \
         return JavascriptNumber::ToVar(result, scriptContext); \
     }
 
-    // Currently the TypedStore is just using the InterlockedExchange to store the value in the buffer.
-    // TODO The InterlockedExchange will have the sequential consistency any way, not sure why do we need the Memory barrier or std::atomic::store to perform this.
-
-#define TypedArrayStoreOp(TypedArrayName, bit, type, convertType, convertFn) \
-    template<> \
-    inline Var TypedArrayName##::TypedStore(__in uint32 index, __in Var second) \
+#define TypedArrayOp3(TypedArrayName, fnName, convertFn) \
+    template<>\
+    Var TypedArrayName##::Typed##fnName(__in uint32 accessIndex, __in Var first, __in Var value) \
     { \
-        TypedArrayBeginStub(type); \
-        double d = JavascriptConversion::ToInteger(second, scriptContext); \
-        convertType s = (convertType)JavascriptConversion::ToUInt32(d); \
-        InterlockedExchange##bit((convertType*)buffer, s); \
-        return JavascriptNumber::ToVarWithCheck(d, scriptContext); \
-    }
-
-#define TypedArraySubOp(TypedArrayName, bit, type, convertType, convertFn) \
-    template<> \
-    inline Var TypedArrayName##::TypedSub(__in uint32 index, __in Var second) \
-    { \
-        TypedArrayBeginStub(type); \
-        type result = (type)InterlockedExchangeAdd##bit((convertType*)buffer, - (convertType)convertFn(second, scriptContext)); \
-        return JavascriptNumber::ToVar(result, scriptContext); \
-    }
-
-#define TypedArrayXorOp(TypedArrayName, bit, type, convertType, convertFn) \
-    template<> \
-    inline Var TypedArrayName##::TypedXor(__in uint32 index, __in Var second) \
-    { \
-        TypedArrayBeginStub(type); \
-        type result = (type)InterlockedXor##bit((convertType*)buffer, (convertType)convertFn(second, scriptContext)); \
+        TypedArrayBeginStub(TypedArrayName); \
+        type result = AtomicsOperations::fnName(buffer, convertFn(first, scriptContext), convertFn(value, scriptContext)); \
         return JavascriptNumber::ToVar(result, scriptContext); \
     }
 
 #define GenerateNotSupportedStub1(TypedArrayName, fnName) \
-    template<> \
-    inline Var TypedArrayName##::Typed##fnName(__in uint32 accessIndex) \
+    template<>\
+    Var TypedArrayName##::Typed##fnName(__in uint32 accessIndex) \
     { \
         JavascriptError::ThrowTypeError(GetScriptContext(), JSERR_InvalidOperationOnTypedArray); \
     }
 
 #define GenerateNotSupportedStub2(TypedArrayName, fnName) \
-    template<> \
-    inline Var TypedArrayName##::Typed##fnName(__in uint32 accessIndex, __in Var value) \
+    template<>\
+    Var TypedArrayName##::Typed##fnName(__in uint32 accessIndex, __in Var value) \
     { \
         JavascriptError::ThrowTypeError(GetScriptContext(), JSERR_InvalidOperationOnTypedArray); \
     }
 
 #define GenerateNotSupportedStub3(TypedArrayName, fnName) \
-    template<> \
-    inline Var TypedArrayName##::Typed##fnName(__in uint32 accessIndex, __in Var first, __in Var value) \
+    template<>\
+    Var TypedArrayName##::Typed##fnName(__in uint32 accessIndex, __in Var first, __in Var value) \
     { \
         JavascriptError::ThrowTypeError(GetScriptContext(), JSERR_InvalidOperationOnTypedArray); \
     }
 
+
 #define GENERATE_FOREACH_TYPEDARRAY(TYPEDARRAY_DEF, NOTSUPPORTEDSTUB, OP) \
-        TYPEDARRAY_DEF(Int8Array, 8, int8, char, JavascriptConversion::ToInt8); \
-        TYPEDARRAY_DEF(Int8VirtualArray, 8, int8, char, JavascriptConversion::ToInt8); \
-        TYPEDARRAY_DEF(Uint8Array, 8, uint8, char, JavascriptConversion::ToUInt8); \
-        TYPEDARRAY_DEF(Uint8VirtualArray, 8, uint8, char, JavascriptConversion::ToUInt8); \
-        TYPEDARRAY_DEF(Int16Array, 16, int16, short, JavascriptConversion::ToInt16); \
-        TYPEDARRAY_DEF(Int16VirtualArray, 16, int16, short, JavascriptConversion::ToInt16); \
-        TYPEDARRAY_DEF(Uint16Array, 16, uint16, short, JavascriptConversion::ToUInt16); \
-        TYPEDARRAY_DEF(Uint16VirtualArray, 16, uint16, short, JavascriptConversion::ToUInt16); \
-        TYPEDARRAY_DEF(Int32Array, 32, int32, LONG, JavascriptConversion::ToInt32); \
-        TYPEDARRAY_DEF(Int32VirtualArray, 32, int32, LONG, JavascriptConversion::ToInt32); \
-        TYPEDARRAY_DEF(Uint32Array, 32, uint32, LONG, JavascriptConversion::ToUInt32); \
-        TYPEDARRAY_DEF(Uint32VirtualArray, 32, uint32, LONG, JavascriptConversion::ToUInt32); \
+        TYPEDARRAY_DEF(Int8Array, OP, JavascriptConversion::ToInt8); \
+        TYPEDARRAY_DEF(Int8VirtualArray, OP, JavascriptConversion::ToInt8); \
+        TYPEDARRAY_DEF(Uint8Array, OP, JavascriptConversion::ToUInt8); \
+        TYPEDARRAY_DEF(Uint8VirtualArray, OP, JavascriptConversion::ToUInt8); \
+        TYPEDARRAY_DEF(Int16Array, OP, JavascriptConversion::ToInt16); \
+        TYPEDARRAY_DEF(Int16VirtualArray, OP, JavascriptConversion::ToInt16); \
+        TYPEDARRAY_DEF(Uint16Array, OP, JavascriptConversion::ToUInt16); \
+        TYPEDARRAY_DEF(Uint16VirtualArray, OP, JavascriptConversion::ToUInt16); \
+        TYPEDARRAY_DEF(Int32Array, OP, JavascriptConversion::ToInt32); \
+        TYPEDARRAY_DEF(Int32VirtualArray, OP, JavascriptConversion::ToInt32); \
+        TYPEDARRAY_DEF(Uint32Array, OP, JavascriptConversion::ToUInt32); \
+        TYPEDARRAY_DEF(Uint32VirtualArray, OP, JavascriptConversion::ToUInt32); \
         NOTSUPPORTEDSTUB(Float32Array, OP); \
         NOTSUPPORTEDSTUB(Float32VirtualArray, OP); \
         NOTSUPPORTEDSTUB(Float64Array, OP); \
@@ -3096,15 +3022,15 @@ namespace Js
         NOTSUPPORTEDSTUB(Uint8ClampedVirtualArray, OP); \
         NOTSUPPORTEDSTUB(BoolArray, OP);
 
-    GENERATE_FOREACH_TYPEDARRAY(TypedArrayAddOp, GenerateNotSupportedStub2, Add)
-    GENERATE_FOREACH_TYPEDARRAY(TypedArrayAndOp, GenerateNotSupportedStub2, And)
-    GENERATE_FOREACH_TYPEDARRAY(TypedArrayCompareExchangeOp, GenerateNotSupportedStub3, CompareExchange)
-    GENERATE_FOREACH_TYPEDARRAY(TypedArrayExchangeOp, GenerateNotSupportedStub2, Exchange)
-    GENERATE_FOREACH_TYPEDARRAY(TypedArrayLoadOp, GenerateNotSupportedStub1, Load)
-    GENERATE_FOREACH_TYPEDARRAY(TypedArrayOrOp, GenerateNotSupportedStub2, Or)
-    GENERATE_FOREACH_TYPEDARRAY(TypedArrayStoreOp, GenerateNotSupportedStub2, Store)
-    GENERATE_FOREACH_TYPEDARRAY(TypedArraySubOp, GenerateNotSupportedStub2, Sub)
-    GENERATE_FOREACH_TYPEDARRAY(TypedArrayXorOp, GenerateNotSupportedStub2, Xor)
+    GENERATE_FOREACH_TYPEDARRAY(TypedArrayOp2, GenerateNotSupportedStub2, Add)
+    GENERATE_FOREACH_TYPEDARRAY(TypedArrayOp2, GenerateNotSupportedStub2, And)
+    GENERATE_FOREACH_TYPEDARRAY(TypedArrayOp3, GenerateNotSupportedStub3, CompareExchange)
+    GENERATE_FOREACH_TYPEDARRAY(TypedArrayOp2, GenerateNotSupportedStub2, Exchange)
+    GENERATE_FOREACH_TYPEDARRAY(TypedArrayOp1, GenerateNotSupportedStub1, Load)
+    GENERATE_FOREACH_TYPEDARRAY(TypedArrayOp2, GenerateNotSupportedStub2, Or)
+    GENERATE_FOREACH_TYPEDARRAY(TypedArrayStore, GenerateNotSupportedStub2, Store)
+    GENERATE_FOREACH_TYPEDARRAY(TypedArrayOp2, GenerateNotSupportedStub2, Sub)
+    GENERATE_FOREACH_TYPEDARRAY(TypedArrayOp2, GenerateNotSupportedStub2, Xor)
 
     template<>
     VTableValue Int8Array::DummyVirtualFunctionToHinderLinkerICF()
@@ -3700,22 +3626,22 @@ namespace Js
         return DirectGetItem(index);
     }
 
-    Var CharArray::TypedAdd(__in uint32 index, Var second)
+    Var CharArray::TypedAdd(__in uint32 index, __in Var second)
     {
         JavascriptError::ThrowTypeError(GetScriptContext(), JSERR_InvalidOperationOnTypedArray);
     }
 
-    Var CharArray::TypedAnd(__in uint32 index, Var second)
+    Var CharArray::TypedAnd(__in uint32 index, __in Var second)
     {
         JavascriptError::ThrowTypeError(GetScriptContext(), JSERR_InvalidOperationOnTypedArray);
     }
 
-    Var CharArray::TypedCompareExchange(__in uint32 index, Var comparand, Var replacementValue)
+    Var CharArray::TypedCompareExchange(__in uint32 index, __in Var comparand, __in Var replacementValue)
     {
         JavascriptError::ThrowTypeError(GetScriptContext(), JSERR_InvalidOperationOnTypedArray);
     }
 
-    Var CharArray::TypedExchange(__in uint32 index, Var second)
+    Var CharArray::TypedExchange(__in uint32 index, __in Var second)
     {
         JavascriptError::ThrowTypeError(GetScriptContext(), JSERR_InvalidOperationOnTypedArray);
     }
@@ -3725,22 +3651,22 @@ namespace Js
         JavascriptError::ThrowTypeError(GetScriptContext(), JSERR_InvalidOperationOnTypedArray);
     }
 
-    Var CharArray::TypedOr(__in uint32 index, Var second)
+    Var CharArray::TypedOr(__in uint32 index, __in Var second)
     {
         JavascriptError::ThrowTypeError(GetScriptContext(), JSERR_InvalidOperationOnTypedArray);
     }
 
-    Var CharArray::TypedStore(__in uint32 index, Var second)
+    Var CharArray::TypedStore(__in uint32 index, __in Var second)
     {
         JavascriptError::ThrowTypeError(GetScriptContext(), JSERR_InvalidOperationOnTypedArray);
     }
 
-    Var CharArray::TypedSub(__in uint32 index, Var second)
+    Var CharArray::TypedSub(__in uint32 index, __in Var second)
     {
         JavascriptError::ThrowTypeError(GetScriptContext(), JSERR_InvalidOperationOnTypedArray);
     }
 
-    Var CharArray::TypedXor(__in uint32 index, Var second)
+    Var CharArray::TypedXor(__in uint32 index, __in Var second)
     {
         JavascriptError::ThrowTypeError(GetScriptContext(), JSERR_InvalidOperationOnTypedArray);
     }

+ 24 - 24
lib/Runtime/Library/TypedArray.h

@@ -147,15 +147,15 @@ namespace Js
         virtual BOOL DirectSetItemNoDetachCheck(__in uint32 index, __in Js::Var value) = 0;
         virtual Var  DirectGetItemNoDetachCheck(__in uint32 index) = 0;
 
-        virtual Var TypedAdd(__in uint32 index, Var second) = 0;
-        virtual Var TypedAnd(__in uint32 index, Var second) = 0;
+        virtual Var TypedAdd(__in uint32 index, __in Var second) = 0;
+        virtual Var TypedAnd(__in uint32 index, __in Var second) = 0;
         virtual Var TypedLoad(__in uint32 index) = 0;
-        virtual Var TypedOr(__in uint32 index, Var second) = 0;
-        virtual Var TypedStore(__in uint32 index, Var second) = 0;
-        virtual Var TypedSub(__in uint32 index, Var second) = 0;
-        virtual Var TypedXor(__in uint32 index, Var second) = 0;
-        virtual Var TypedExchange(__in uint32 index, Var second) = 0;
-        virtual Var TypedCompareExchange(__in uint32 index, Var comparand, Var replacementValue) = 0;
+        virtual Var TypedOr(__in uint32 index, __in Var second) = 0;
+        virtual Var TypedStore(__in uint32 index, __in Var second) = 0;
+        virtual Var TypedSub(__in uint32 index, __in Var second) = 0;
+        virtual Var TypedXor(__in uint32 index, __in Var second) = 0;
+        virtual Var TypedExchange(__in uint32 index, __in Var second) = 0;
+        virtual Var TypedCompareExchange(__in uint32 index, __in Var comparand, __in Var replacementValue) = 0;
 
         uint32 GetByteLength() const { return length * BYTES_PER_ELEMENT; }
         uint32 GetByteOffset() const { return byteOffset; }
@@ -485,15 +485,15 @@ namespace Js
         virtual Var  DirectGetItem(__in uint32 index) override sealed;
         virtual BOOL DirectSetItemNoDetachCheck(__in uint32 index, __in Js::Var value) override sealed;
         virtual Var  DirectGetItemNoDetachCheck(__in uint32 index) override sealed;
-        virtual Var TypedAdd(__in uint32 index, Var second) override;
-        virtual Var TypedAnd(__in uint32 index, Var second) override;
+        virtual Var TypedAdd(__in uint32 index, __in Var second) override;
+        virtual Var TypedAnd(__in uint32 index, __in Var second) override;
         virtual Var TypedLoad(__in uint32 index) override;
-        virtual Var TypedOr(__in uint32 index, Var second) override;
-        virtual Var TypedStore(__in uint32 index, Var second) override;
-        virtual Var TypedSub(__in uint32 index, Var second) override;
-        virtual Var TypedXor(__in uint32 index, Var second) override;
-        virtual Var TypedExchange(__in uint32 index, Var second) override;
-        virtual Var TypedCompareExchange(__in uint32 index, Var comparand, Var replacementValue) override;
+        virtual Var TypedOr(__in uint32 index, __in Var second) override;
+        virtual Var TypedStore(__in uint32 index, __in Var second) override;
+        virtual Var TypedSub(__in uint32 index, __in Var second) override;
+        virtual Var TypedXor(__in uint32 index, __in Var second) override;
+        virtual Var TypedExchange(__in uint32 index, __in Var second) override;
+        virtual Var TypedCompareExchange(__in uint32 index, __in Var comparand, __in Var replacementValue) override;
 
         static BOOL DirectSetItem(__in TypedArray* arr, __in uint32 index, __in Js::Var value)
         {
@@ -554,15 +554,15 @@ namespace Js
         virtual BOOL DirectSetItemNoDetachCheck(__in uint32 index, __in Js::Var value) override;
         virtual Var  DirectGetItemNoDetachCheck(__in uint32 index) override;
 
-        virtual Var TypedAdd(__in uint32 index, Var second) override;
-        virtual Var TypedAnd(__in uint32 index, Var second) override;
+        virtual Var TypedAdd(__in uint32 index, __in Var second) override;
+        virtual Var TypedAnd(__in uint32 index, __in Var second) override;
         virtual Var TypedLoad(__in uint32 index) override;
-        virtual Var TypedOr(__in uint32 index, Var second) override;
-        virtual Var TypedStore(__in uint32 index, Var second) override;
-        virtual Var TypedSub(__in uint32 index, Var second) override;
-        virtual Var TypedXor(__in uint32 index, Var second) override;
-        virtual Var TypedExchange(__in uint32 index, Var second) override;
-        virtual Var TypedCompareExchange(__in uint32 index, Var comparand, Var replacementValue) override;
+        virtual Var TypedOr(__in uint32 index, __in Var second) override;
+        virtual Var TypedStore(__in uint32 index, __in Var second) override;
+        virtual Var TypedSub(__in uint32 index, __in Var second) override;
+        virtual Var TypedXor(__in uint32 index, __in Var second) override;
+        virtual Var TypedExchange(__in uint32 index, __in Var second) override;
+        virtual Var TypedCompareExchange(__in uint32 index, __in Var comparand, __in Var replacementValue) override;
 
     protected:
         CompareElementsFunction GetCompareElementsFunction()

+ 1 - 0
lib/Runtime/Library/WabtInterface.cpp

@@ -152,6 +152,7 @@ Js::Var WabtInterface::EntryConvertWast2Wasm(RecyclableObject* function, CallInf
         wabtCtx.user_data = &context;
         wabtCtx.createBuffer = CreateBuffer;
         wabtCtx.features.sign_extends = CONFIG_FLAG(WasmSignExtends);
+        wabtCtx.features.threads = CONFIG_FLAG(WasmThreads);
         if (isSpecText)
         {
             wabtCtx.spec = &spec;

+ 73 - 1
lib/WasmReader/WasmBinaryOpCodes.h

@@ -60,7 +60,10 @@
 #define WASM_PREFIX(prefixname, op, imp, errorMsg)
 #endif
 
+#define WASM_PREFIX_THREADS 0xfe
 #define WASM_PREFIX_TRACING 0xf0
+
+WASM_PREFIX(Threads, WASM_PREFIX_THREADS, CONFIG_FLAG(WasmThreads), "WebAssembly Threads support is not enabled")
 #if ENABLE_DEBUG_CONFIG_OPTIONS
 // We won't even look at that prefix in release builds
 // Mark the prefix as not implemented so we don't allow it in the binary buffer
@@ -326,6 +329,74 @@ WASM_UNARY__OPCODE(I64Extend8_s , 0xc2, L_L, I64Extend8_s , CONFIG_FLAG(WasmSign
 WASM_UNARY__OPCODE(I64Extend16_s, 0xc3, L_L, I64Extend16_s, CONFIG_FLAG(WasmSignExtends), "i64.extend16_s")
 WASM_UNARY__OPCODE(I64Extend32_s, 0xc4, L_L, I64Extend32_s, CONFIG_FLAG(WasmSignExtends), "i64.extend32_s")
 
+#define __has_atomics CONFIG_FLAG(WasmThreads)
+#define __prefix (WASM_PREFIX_THREADS << 8)
+WASM_ATOMICREAD_OPCODE (I32AtomicLoad          , __prefix | 0x10, I_I  , __has_atomics, Js::ArrayBufferView::TYPE_INT32, "i32.atomic.load")
+WASM_ATOMICREAD_OPCODE (I64AtomicLoad          , __prefix | 0x11, L_I  , __has_atomics, Js::ArrayBufferView::TYPE_INT64, "i64.atomic.load")
+WASM_ATOMICREAD_OPCODE (I32AtomicLoad8U        , __prefix | 0x12, I_I  , __has_atomics, Js::ArrayBufferView::TYPE_UINT8, "i32.atomic.load8_u")
+WASM_ATOMICREAD_OPCODE (I32AtomicLoad16U       , __prefix | 0x13, I_I  , __has_atomics, Js::ArrayBufferView::TYPE_UINT16, "i32.atomic.load16_u")
+WASM_ATOMICREAD_OPCODE (I64AtomicLoad8U        , __prefix | 0x14, L_I  , __has_atomics, Js::ArrayBufferView::TYPE_UINT8_TO_INT64, "i64.atomic.load8_u")
+WASM_ATOMICREAD_OPCODE (I64AtomicLoad16U       , __prefix | 0x15, L_I  , __has_atomics, Js::ArrayBufferView::TYPE_UINT16_TO_INT64, "i64.atomic.load16_u")
+WASM_ATOMICREAD_OPCODE (I64AtomicLoad32U       , __prefix | 0x16, L_I  , __has_atomics, Js::ArrayBufferView::TYPE_UINT32_TO_INT64, "i64.atomic.load32_u")
+WASM_ATOMICSTORE_OPCODE(I32AtomicStore         , __prefix | 0x17, I_II , __has_atomics, Js::ArrayBufferView::TYPE_INT32, "i32.atomic.store")
+WASM_ATOMICSTORE_OPCODE(I64AtomicStore         , __prefix | 0x18, L_IL , __has_atomics, Js::ArrayBufferView::TYPE_INT64, "i64.atomic.store")
+WASM_ATOMICSTORE_OPCODE(I32AtomicStore8        , __prefix | 0x19, I_II , __has_atomics, Js::ArrayBufferView::TYPE_INT8, "i32.atomic.store8")
+WASM_ATOMICSTORE_OPCODE(I32AtomicStore16       , __prefix | 0x1a, I_II , __has_atomics, Js::ArrayBufferView::TYPE_INT16, "i32.atomic.store16")
+WASM_ATOMICSTORE_OPCODE(I64AtomicStore8        , __prefix | 0x1b, L_IL , __has_atomics, Js::ArrayBufferView::TYPE_INT8_TO_INT64, "i64.atomic.store8")
+WASM_ATOMICSTORE_OPCODE(I64AtomicStore16       , __prefix | 0x1c, L_IL , __has_atomics, Js::ArrayBufferView::TYPE_INT16_TO_INT64, "i64.atomic.store16")
+WASM_ATOMICSTORE_OPCODE(I64AtomicStore32       , __prefix | 0x1d, L_IL , __has_atomics, Js::ArrayBufferView::TYPE_INT32_TO_INT64, "i64.atomic.store32")
+WASM_ATOMIC_OPCODE     (I32AtomicRmwAdd        , __prefix | 0x1e, I_II , (false && __has_atomics), Js::ArrayBufferView::TYPE_INT32, "i32.atomic.rmw.add")
+WASM_ATOMIC_OPCODE     (I64AtomicRmwAdd        , __prefix | 0x1f, L_IL , (false && __has_atomics), Js::ArrayBufferView::TYPE_INT64, "i64.atomic.rmw.add")
+WASM_ATOMIC_OPCODE     (I32AtomicRmw8UAdd      , __prefix | 0x20, I_II , (false && __has_atomics), Js::ArrayBufferView::TYPE_UINT8, "i32.atomic.rmw8_u.add")
+WASM_ATOMIC_OPCODE     (I32AtomicRmw16UAdd     , __prefix | 0x21, I_II , (false && __has_atomics), Js::ArrayBufferView::TYPE_UINT16, "i32.atomic.rmw16_u.add")
+WASM_ATOMIC_OPCODE     (I64AtomicRmw8UAdd      , __prefix | 0x22, L_IL , (false && __has_atomics), Js::ArrayBufferView::TYPE_UINT8_TO_INT64, "i64.atomic.rmw8_u.add")
+WASM_ATOMIC_OPCODE     (I64AtomicRmw16UAdd     , __prefix | 0x23, L_IL , (false && __has_atomics), Js::ArrayBufferView::TYPE_UINT16_TO_INT64, "i64.atomic.rmw16_u.add")
+WASM_ATOMIC_OPCODE     (I64AtomicRmw32UAdd     , __prefix | 0x24, L_IL , (false && __has_atomics), Js::ArrayBufferView::TYPE_UINT32_TO_INT64, "i64.atomic.rmw32_u.add")
+WASM_ATOMIC_OPCODE     (I32AtomicRmwSub        , __prefix | 0x25, I_II , (false && __has_atomics), Js::ArrayBufferView::TYPE_INT32, "i32.atomic.rmw.sub")
+WASM_ATOMIC_OPCODE     (I64AtomicRmwSub        , __prefix | 0x26, L_IL , (false && __has_atomics), Js::ArrayBufferView::TYPE_INT64, "i64.atomic.rmw.sub")
+WASM_ATOMIC_OPCODE     (I32AtomicRmw8USub      , __prefix | 0x27, I_II , (false && __has_atomics), Js::ArrayBufferView::TYPE_UINT8, "i32.atomic.rmw8_u.sub")
+WASM_ATOMIC_OPCODE     (I32AtomicRmw16USub     , __prefix | 0x28, I_II , (false && __has_atomics), Js::ArrayBufferView::TYPE_UINT16, "i32.atomic.rmw16_u.sub")
+WASM_ATOMIC_OPCODE     (I64AtomicRmw8USub      , __prefix | 0x29, L_IL , (false && __has_atomics), Js::ArrayBufferView::TYPE_UINT8_TO_INT64, "i64.atomic.rmw8_u.sub")
+WASM_ATOMIC_OPCODE     (I64AtomicRmw16USub     , __prefix | 0x2a, L_IL , (false && __has_atomics), Js::ArrayBufferView::TYPE_UINT16_TO_INT64, "i64.atomic.rmw16_u.sub")
+WASM_ATOMIC_OPCODE     (I64AtomicRmw32USub     , __prefix | 0x2b, L_IL , (false && __has_atomics), Js::ArrayBufferView::TYPE_UINT32_TO_INT64, "i64.atomic.rmw32_u.sub")
+WASM_ATOMIC_OPCODE     (I32AtomicRmwAnd        , __prefix | 0x2c, I_II , (false && __has_atomics), Js::ArrayBufferView::TYPE_INT32, "i32.atomic.rmw.and")
+WASM_ATOMIC_OPCODE     (I64AtomicRmwAnd        , __prefix | 0x2d, L_IL , (false && __has_atomics), Js::ArrayBufferView::TYPE_INT64, "i64.atomic.rmw.and")
+WASM_ATOMIC_OPCODE     (I32AtomicRmw8UAnd      , __prefix | 0x2e, I_II , (false && __has_atomics), Js::ArrayBufferView::TYPE_UINT8, "i32.atomic.rmw8_u.and")
+WASM_ATOMIC_OPCODE     (I32AtomicRmw16UAnd     , __prefix | 0x2f, I_II , (false && __has_atomics), Js::ArrayBufferView::TYPE_UINT16, "i32.atomic.rmw16_u.and")
+WASM_ATOMIC_OPCODE     (I64AtomicRmw8UAnd      , __prefix | 0x30, L_IL , (false && __has_atomics), Js::ArrayBufferView::TYPE_UINT8_TO_INT64, "i64.atomic.rmw8_u.and")
+WASM_ATOMIC_OPCODE     (I64AtomicRmw16UAnd     , __prefix | 0x31, L_IL , (false && __has_atomics), Js::ArrayBufferView::TYPE_UINT16_TO_INT64, "i64.atomic.rmw16_u.and")
+WASM_ATOMIC_OPCODE     (I64AtomicRmw32UAnd     , __prefix | 0x32, L_IL , (false && __has_atomics), Js::ArrayBufferView::TYPE_UINT32_TO_INT64, "i64.atomic.rmw32_u.and")
+WASM_ATOMIC_OPCODE     (I32AtomicRmwOr         , __prefix | 0x33, I_II , (false && __has_atomics), Js::ArrayBufferView::TYPE_INT32, "i32.atomic.rmw.or")
+WASM_ATOMIC_OPCODE     (I64AtomicRmwOr         , __prefix | 0x34, L_IL , (false && __has_atomics), Js::ArrayBufferView::TYPE_INT64, "i64.atomic.rmw.or")
+WASM_ATOMIC_OPCODE     (I32AtomicRmw8UOr       , __prefix | 0x35, I_II , (false && __has_atomics), Js::ArrayBufferView::TYPE_UINT8, "i32.atomic.rmw8_u.or")
+WASM_ATOMIC_OPCODE     (I32AtomicRmw16UOr      , __prefix | 0x36, I_II , (false && __has_atomics), Js::ArrayBufferView::TYPE_UINT16, "i32.atomic.rmw16_u.or")
+WASM_ATOMIC_OPCODE     (I64AtomicRmw8UOr       , __prefix | 0x37, L_IL , (false && __has_atomics), Js::ArrayBufferView::TYPE_UINT8_TO_INT64, "i64.atomic.rmw8_u.or")
+WASM_ATOMIC_OPCODE     (I64AtomicRmw16UOr      , __prefix | 0x38, L_IL , (false && __has_atomics), Js::ArrayBufferView::TYPE_UINT16_TO_INT64, "i64.atomic.rmw16_u.or")
+WASM_ATOMIC_OPCODE     (I64AtomicRmw32UOr      , __prefix | 0x39, L_IL , (false && __has_atomics), Js::ArrayBufferView::TYPE_UINT32_TO_INT64, "i64.atomic.rmw32_u.or")
+WASM_ATOMIC_OPCODE     (I32AtomicRmwXor        , __prefix | 0x3a, I_II , (false && __has_atomics), Js::ArrayBufferView::TYPE_INT32, "i32.atomic.rmw.xor")
+WASM_ATOMIC_OPCODE     (I64AtomicRmwXor        , __prefix | 0x3b, L_IL , (false && __has_atomics), Js::ArrayBufferView::TYPE_INT64, "i64.atomic.rmw.xor")
+WASM_ATOMIC_OPCODE     (I32AtomicRmw8UXor      , __prefix | 0x3c, I_II , (false && __has_atomics), Js::ArrayBufferView::TYPE_UINT8, "i32.atomic.rmw8_u.xor")
+WASM_ATOMIC_OPCODE     (I32AtomicRmw16UXor     , __prefix | 0x3d, I_II , (false && __has_atomics), Js::ArrayBufferView::TYPE_UINT16, "i32.atomic.rmw16_u.xor")
+WASM_ATOMIC_OPCODE     (I64AtomicRmw8UXor      , __prefix | 0x3e, L_IL , (false && __has_atomics), Js::ArrayBufferView::TYPE_UINT8_TO_INT64, "i64.atomic.rmw8_u.xor")
+WASM_ATOMIC_OPCODE     (I64AtomicRmw16UXor     , __prefix | 0x3f, L_IL , (false && __has_atomics), Js::ArrayBufferView::TYPE_UINT16_TO_INT64, "i64.atomic.rmw16_u.xor")
+WASM_ATOMIC_OPCODE     (I64AtomicRmw32UXor     , __prefix | 0x40, L_IL , (false && __has_atomics), Js::ArrayBufferView::TYPE_UINT32_TO_INT64, "i64.atomic.rmw32_u.xor")
+WASM_ATOMIC_OPCODE     (I32AtomicRmwXchg       , __prefix | 0x41, I_II , (false && __has_atomics), Js::ArrayBufferView::TYPE_INT32, "i32.atomic.rmw.xchg")
+WASM_ATOMIC_OPCODE     (I64AtomicRmwXchg       , __prefix | 0x42, L_IL , (false && __has_atomics), Js::ArrayBufferView::TYPE_INT64, "i64.atomic.rmw.xchg")
+WASM_ATOMIC_OPCODE     (I32AtomicRmw8UXchg     , __prefix | 0x43, I_II , (false && __has_atomics), Js::ArrayBufferView::TYPE_UINT8, "i32.atomic.rmw8_u.xchg")
+WASM_ATOMIC_OPCODE     (I32AtomicRmw16UXchg    , __prefix | 0x44, I_II , (false && __has_atomics), Js::ArrayBufferView::TYPE_UINT16, "i32.atomic.rmw16_u.xchg")
+WASM_ATOMIC_OPCODE     (I64AtomicRmw8UXchg     , __prefix | 0x45, L_IL , (false && __has_atomics), Js::ArrayBufferView::TYPE_UINT8_TO_INT64, "i64.atomic.rmw8_u.xchg")
+WASM_ATOMIC_OPCODE     (I64AtomicRmw16UXchg    , __prefix | 0x46, L_IL , (false && __has_atomics), Js::ArrayBufferView::TYPE_UINT16_TO_INT64, "i64.atomic.rmw16_u.xchg")
+WASM_ATOMIC_OPCODE     (I64AtomicRmw32UXchg    , __prefix | 0x47, L_IL , (false && __has_atomics), Js::ArrayBufferView::TYPE_UINT32_TO_INT64, "i64.atomic.rmw32_u.xchg")
+WASM_ATOMIC_OPCODE     (I32AtomicRmwCmpxchg    , __prefix | 0x48, I_III, (false && __has_atomics), Js::ArrayBufferView::TYPE_INT32, "i32.atomic.rmw.cmpxchg")
+WASM_ATOMIC_OPCODE     (I64AtomicRmwCmpxchg    , __prefix | 0x49, L_ILL, (false && __has_atomics), Js::ArrayBufferView::TYPE_INT64, "i64.atomic.rmw.cmpxchg")
+WASM_ATOMIC_OPCODE     (I32AtomicRmw8UCmpxchg  , __prefix | 0x4a, I_III, (false && __has_atomics), Js::ArrayBufferView::TYPE_UINT8, "i32.atomic.rmw8_u.cmpxchg")
+WASM_ATOMIC_OPCODE     (I32AtomicRmw16UCmpxchg , __prefix | 0x4b, I_III, (false && __has_atomics), Js::ArrayBufferView::TYPE_UINT16, "i32.atomic.rmw16_u.cmpxchg")
+WASM_ATOMIC_OPCODE     (I64AtomicRmw8UCmpxchg  , __prefix | 0x4c, L_ILL, (false && __has_atomics), Js::ArrayBufferView::TYPE_UINT8_TO_INT64, "i64.atomic.rmw8_u.cmpxchg")
+WASM_ATOMIC_OPCODE     (I64AtomicRmw16UCmpxchg , __prefix | 0x4d, L_ILL, (false && __has_atomics), Js::ArrayBufferView::TYPE_UINT16_TO_INT64, "i64.atomic.rmw16_u.cmpxchg")
+WASM_ATOMIC_OPCODE     (I64AtomicRmw32UCmpxchg , __prefix | 0x4e, L_ILL, (false && __has_atomics), Js::ArrayBufferView::TYPE_UINT32_TO_INT64, "i64.atomic.rmw32_u.cmpxchg")
+#undef __prefix
+#undef __has_atomics
+
 #if ENABLE_DEBUG_CONFIG_OPTIONS
 #define __prefix (WASM_PREFIX_TRACING << 8)
 WASM_UNARY__OPCODE(PrintFuncName    , __prefix | 0x00, V_I , PrintFuncName    , true, "")
@@ -345,6 +416,7 @@ WASM_UNARY__OPCODE(PrintF64         , __prefix | 0x0f, D_D , PrintF64         ,
 #include "WasmBinaryOpcodesSimd.h"
 #endif
 
+#undef WASM_PREFIX_THREADS
 #undef WASM_PREFIX_TRACING
 #undef WASM_PREFIX
 #undef WASM_OPCODE
@@ -359,4 +431,4 @@ WASM_UNARY__OPCODE(PrintF64         , __prefix | 0x0f, D_D , PrintF64         ,
 #undef WASM_ATOMICSTORE_OPCODE
 #undef WASM_UNARY__OPCODE
 #undef WASM_BINARY_OPCODE
-#undef WASM_EMPTY__OPCODE
+#undef WASM_EMPTY__OPCODE

+ 30 - 20
lib/WasmReader/WasmByteCodeGenerator.cpp

@@ -814,12 +814,22 @@ void WasmBytecodeGenerator::EmitExpr(WasmOp op)
 #define WASM_MEMREAD_OPCODE(opname, opcode, sig, imp, viewtype, wat) \
     case wb##opname: \
         Assert(WasmOpCodeSignatures::n##sig > 0);\
-        info = EmitMemAccess(wb##opname, WasmOpCodeSignatures::sig, viewtype, false); \
+        info = EmitMemAccess<false, false>(wb##opname, WasmOpCodeSignatures::sig, viewtype); \
+        break;
+#define WASM_ATOMICREAD_OPCODE(opname, opcode, sig, imp, viewtype, wat) \
+    case wb##opname: \
+        Assert(WasmOpCodeSignatures::n##sig > 0);\
+        info = EmitMemAccess<false, true>(wb##opname, WasmOpCodeSignatures::sig, viewtype); \
         break;
 #define WASM_MEMSTORE_OPCODE(opname, opcode, sig, imp, viewtype, wat) \
     case wb##opname: \
         Assert(WasmOpCodeSignatures::n##sig > 0);\
-        info = EmitMemAccess(wb##opname, WasmOpCodeSignatures::sig, viewtype, true); \
+        info = EmitMemAccess<true, false>(wb##opname, WasmOpCodeSignatures::sig, viewtype); \
+        break;
+#define WASM_ATOMICSTORE_OPCODE(opname, opcode, sig, imp, viewtype, wat) \
+    case wb##opname: \
+        Assert(WasmOpCodeSignatures::n##sig > 0);\
+        info = EmitMemAccess<true, true>(wb##opname, WasmOpCodeSignatures::sig, viewtype); \
         break;
 #define WASM_SIMD_MEMREAD_OPCODE(opname, opcode, sig, asmjsop, viewtype, dataWidth, ...) \
     case wb##opname: \
@@ -1654,47 +1664,47 @@ EmitInfo WasmBytecodeGenerator::EmitSimdMemAccess(Js::OpCodeAsmJs op, const Wasm
 }
 #endif
 
-EmitInfo WasmBytecodeGenerator::EmitMemAccess(WasmOp wasmOp, const WasmTypes::WasmType* signature, Js::ArrayBufferView::ViewType viewType, bool isStore)
+template<bool isStore, bool isAtomic>
+EmitInfo WasmBytecodeGenerator::EmitMemAccess(WasmOp wasmOp, const WasmTypes::WasmType* signature, Js::ArrayBufferView::ViewType viewType)
 {
+    Assert(!isAtomic || CONFIG_FLAG(WasmThreads));
     WasmTypes::WasmType type = signature[0];
     SetUsesMemory(0);
 
-    const uint32 mask = Js::ArrayBufferView::ViewMask[viewType];
+    const uint32 naturalAlignment = Js::ArrayBufferView::NaturalAlignment[viewType];
     const uint32 alignment = GetReader()->m_currentNode.mem.alignment;
     const uint32 offset = GetReader()->m_currentNode.mem.offset;
 
-    if ((mask << 1) & (1 << alignment))
+    if (alignment > naturalAlignment)
     {
         throw WasmCompilationException(_u("alignment must not be larger than natural"));
     }
-
-    EmitInfo rhsInfo;
-    if (isStore)
+    if (isAtomic && alignment != naturalAlignment)
     {
-        rhsInfo = PopEvalStack(type, _u("Invalid type for store op"));
+        throw WasmCompilationException(_u("invalid alignment for atomic RW. Expected %u, got %u"), naturalAlignment, alignment);
     }
-    EmitInfo exprInfo = PopEvalStack(WasmTypes::I32, _u("Index expression must be of type i32"));
 
-    if (isStore) // Stores
+    // Stores
+    if (isStore)
     {
-        m_writer->WasmMemAccess(Js::OpCodeAsmJs::StArrWasm, rhsInfo.location, exprInfo.location, offset, viewType);
+        EmitInfo rhsInfo = PopEvalStack(type, _u("Invalid type for store op"));
+        EmitInfo exprInfo = PopEvalStack(WasmTypes::I32, _u("Index expression must be of type i32"));
+        Js::OpCodeAsmJs op = isAtomic ? Js::OpCodeAsmJs::StArrAtomic : Js::OpCodeAsmJs::StArrWasm;
+        m_writer->WasmMemAccess(op, rhsInfo.location, exprInfo.location, offset, viewType);
         ReleaseLocation(&rhsInfo);
         ReleaseLocation(&exprInfo);
 
         return EmitInfo();
     }
 
+    // Loads
+    EmitInfo exprInfo = PopEvalStack(WasmTypes::I32, _u("Index expression must be of type i32"));
     ReleaseLocation(&exprInfo);
     Js::RegSlot resultReg = GetRegisterSpace(type)->AcquireTmpRegister();
-    m_writer->WasmMemAccess(Js::OpCodeAsmJs::LdArrWasm, resultReg, exprInfo.location, offset, viewType);
+    Js::OpCodeAsmJs op = isAtomic ? Js::OpCodeAsmJs::LdArrAtomic : Js::OpCodeAsmJs::LdArrWasm;
+    m_writer->WasmMemAccess(op, resultReg, exprInfo.location, offset, viewType);
 
-    EmitInfo yieldInfo;
-    if (!isStore)
-    {
-        // Yield only on load
-        yieldInfo = EmitInfo(resultReg, type);
-    }
-    return yieldInfo;
+    return EmitInfo(resultReg, type);
 }
 
 void WasmBytecodeGenerator::EmitReturnExpr(PolymorphicEmitInfo* explicitRetInfo)

+ 2 - 1
lib/WasmReader/WasmByteCodeGenerator.h

@@ -237,7 +237,8 @@ namespace Wasm
         void EmitBr();
         PolymorphicEmitInfo EmitBrIf();
 
-        EmitInfo EmitMemAccess(WasmOp wasmOp, const WasmTypes::WasmType* signature, Js::ArrayBufferView::ViewType viewType, bool isStore);
+        template<bool isStore, bool isAtomic>
+        EmitInfo EmitMemAccess(WasmOp wasmOp, const WasmTypes::WasmType* signature, Js::ArrayBufferView::ViewType viewType);
         EmitInfo EmitSimdMemAccess(Js::OpCodeAsmJs op, const WasmTypes::WasmType* signature, Js::ArrayBufferView::ViewType viewType, uint8 dataWidth, bool isStore);
         EmitInfo EmitBinExpr(Js::OpCodeAsmJs op, const WasmTypes::WasmType* signature);
         EmitInfo EmitUnaryExpr(Js::OpCodeAsmJs op, const WasmTypes::WasmType* signature);

+ 1 - 1
lib/wabt/src/validator.cc

@@ -449,7 +449,7 @@ void Validator::CheckBlockSig(const Location* loc,
 template <typename T>
 void Validator::CheckAtomicExpr(const T* expr,
                                 Result (TypeChecker::*func)(Opcode)) {
-  CheckHasSharedMemory(&expr->loc, expr->opcode);
+  CheckHasMemory(&expr->loc, expr->opcode);
   CheckAtomicAlign(&expr->loc, expr->align,
                    get_opcode_natural_alignment(expr->opcode));
   (typechecker_.*func)(expr->opcode);

+ 36 - 0
pal/inc/pal.h

@@ -5512,6 +5512,18 @@ InterlockedAnd(
     return __sync_fetch_and_and(Destination, Value);
 }
 
+EXTERN_C
+PALIMPORT
+inline
+LONGLONG
+PALAPI
+InterlockedAnd64(
+    IN OUT LONGLONG volatile *Destination,
+    IN LONGLONG Value)
+{
+    return __sync_fetch_and_and(Destination, Value);
+}
+
 EXTERN_C
 PALIMPORT
 inline
@@ -5548,6 +5560,18 @@ InterlockedOr(
     return __sync_fetch_and_or(Destination, Value);
 }
 
+EXTERN_C
+PALIMPORT
+inline
+LONGLONG
+PALAPI
+InterlockedOr64(
+    IN OUT LONGLONG volatile *Destination,
+    IN LONGLONG Value)
+{
+    return __sync_fetch_and_or(Destination, Value);
+}
+
 EXTERN_C
 PALIMPORT
 inline
@@ -5584,6 +5608,18 @@ InterlockedXor(
     return __sync_fetch_and_xor(Destination, Value);
 }
 
+EXTERN_C
+PALIMPORT
+inline
+LONGLONG
+PALAPI
+InterlockedXor64(
+    IN OUT LONGLONG volatile *Destination,
+    IN LONGLONG Value)
+{
+    return __sync_fetch_and_xor(Destination, Value);
+}
+
 #define BITS_IN_BYTE 8
 #define BITS_IN_LONG (sizeof(LONG) * BITS_IN_BYTE)
 

+ 1 - 0
test/WasmSpec/baselines/atomic_load.baseline

@@ -0,0 +1 @@
+13/13 tests passed.

+ 1 - 0
test/WasmSpec/baselines/atomic_store.baseline

@@ -0,0 +1 @@
+13/13 tests passed.

+ 1 - 0
test/WasmSpec/baselines/chakra_atomic_load.baseline

@@ -0,0 +1 @@
+18/18 tests passed.

+ 1 - 0
test/WasmSpec/baselines/chakra_atomic_store.baseline

@@ -0,0 +1 @@
+18/18 tests passed.

+ 29 - 0
test/WasmSpec/chakra/chakra_atomic_load.wast

@@ -0,0 +1,29 @@
+(module
+  ;; todo make this a shared memory
+  (memory 1)
+  (data (i32.const 16) "\ff\ff\ff\ff\ff\ff\ff\ff")
+  (data (i32.const 24) "\12\34\56\78\00\00\ce\41")
+
+  (func (export "load") (param i32) (result i64)
+    (i64.atomic.load32_u offset=15 (get_local 0))
+  )
+)
+
+(assert_return (invoke "load" (i32.const 1)) (i64.const 0xffffffff))
+(assert_return (invoke "load" (i32.const 5)) (i64.const 0xffffffff))
+(assert_return (invoke "load" (i32.const 9)) (i64.const 0x78563412))
+(assert_return (invoke "load" (i32.const 13)) (i64.const 0x41ce0000))
+(assert_trap (invoke "load" (i32.const 0)) "atomic memory access is unaligned")
+(assert_trap (invoke "load" (i32.const 2)) "atomic memory access is unaligned")
+(assert_trap (invoke "load" (i32.const 3)) "atomic memory access is unaligned")
+(assert_trap (invoke "load" (i32.const 4)) "atomic memory access is unaligned")
+(assert_trap (invoke "load" (i32.const 6)) "atomic memory access is unaligned")
+(assert_trap (invoke "load" (i32.const 7)) "atomic memory access is unaligned")
+(assert_trap (invoke "load" (i32.const 8)) "atomic memory access is unaligned")
+(assert_trap (invoke "load" (i32.const 10)) "atomic memory access is unaligned")
+(assert_trap (invoke "load" (i32.const 11)) "atomic memory access is unaligned")
+(assert_trap (invoke "load" (i32.const 12)) "atomic memory access is unaligned")
+(assert_trap (invoke "load" (i32.const 14)) "atomic memory access is unaligned")
+
+(assert_return (invoke "load" (i32.const 65517)) (i64.const 0))
+(assert_trap (invoke "load" (i32.const 65521)) "out of bounds memory access")

+ 28 - 0
test/WasmSpec/chakra/chakra_atomic_store.wast

@@ -0,0 +1,28 @@
+(module
+  ;; todo make this a shared memory
+  (memory 1)
+
+  (func (export "store") (param i32 i64) (result i32)
+    (i64.atomic.store offset=15 (get_local 0) (get_local 1))
+    (i32.atomic.load offset=19 (get_local 0))
+  )
+)
+
+(assert_return (invoke "store" (i32.const 1)  (i64.const 0xffffffff41ce0000)) (i32.const 0xffffffff))
+(assert_return (invoke "store" (i32.const 9)  (i64.const 0x78563412ffffffff)) (i32.const 0x78563412))
+(assert_trap (invoke "store" (i32.const 0)  (i64.const 0)) "atomic memory access is unaligned")
+(assert_trap (invoke "store" (i32.const 2)  (i64.const 0)) "atomic memory access is unaligned")
+(assert_trap (invoke "store" (i32.const 3)  (i64.const 0)) "atomic memory access is unaligned")
+(assert_trap (invoke "store" (i32.const 4)  (i64.const 0)) "atomic memory access is unaligned")
+(assert_trap (invoke "store" (i32.const 5)  (i64.const 0)) "atomic memory access is unaligned")
+(assert_trap (invoke "store" (i32.const 6)  (i64.const 0)) "atomic memory access is unaligned")
+(assert_trap (invoke "store" (i32.const 7)  (i64.const 0)) "atomic memory access is unaligned")
+(assert_trap (invoke "store" (i32.const 8)  (i64.const 0)) "atomic memory access is unaligned")
+(assert_trap (invoke "store" (i32.const 10) (i64.const 0)) "atomic memory access is unaligned")
+(assert_trap (invoke "store" (i32.const 11) (i64.const 0)) "atomic memory access is unaligned")
+(assert_trap (invoke "store" (i32.const 12) (i64.const 0)) "atomic memory access is unaligned")
+(assert_trap (invoke "store" (i32.const 13) (i64.const 0)) "atomic memory access is unaligned")
+(assert_trap (invoke "store" (i32.const 14) (i64.const 0)) "atomic memory access is unaligned")
+
+(assert_return (invoke "store" (i32.const 65513) (i64.const 0x78563412ffffffff)) (i32.const 0x78563412))
+(assert_trap (invoke "store" (i32.const 65521) (i64.const 0)) "out of bounds memory access")

+ 0 - 0
test/WasmSpec/chakra/chakra_extends_i32.wast → test/WasmSpec/chakra_generated/chakra_extends_i32.wast


+ 0 - 0
test/WasmSpec/chakra/chakra_extends_i64.wast → test/WasmSpec/chakra_generated/chakra_extends_i64.wast


+ 0 - 0
test/WasmSpec/chakra/chakra_i32.wast → test/WasmSpec/chakra_generated/chakra_i32.wast


+ 0 - 0
test/WasmSpec/chakra/chakra_i64.wast → test/WasmSpec/chakra_generated/chakra_i64.wast


+ 13 - 1
test/WasmSpec/convert-test-suite/config.json

@@ -1,10 +1,12 @@
 {
   "folders": [
     "chakra",
+    "chakra_generated",
     "testsuite/core",
     "testsuite/js-api",
 
-    "features/extends"
+    "features/extends",
+    "features/threads"
   ],
   "features": [{
     "flags": ["-wasmfastarray-"],
@@ -37,6 +39,16 @@
       "chakra_extends_i32",
       "chakra_extends_i64"
     ]
+  }, {
+    "required": true,
+    "flags": ["-WasmThreads"],
+    "folders": [
+      "features/threads"
+    ],
+    "files": [
+      "chakra_atomic_load",
+      "chakra_atomic_store"
+    ]
   }],
   "excludes": [
     "names"

+ 1 - 1
test/WasmSpec/convert-test-suite/index.js

@@ -52,7 +52,7 @@ function removePossiblyEmptyFolder(folder) {
 }
 
 function generateChakraTests() {
-  const chakraTestsDestination = path.join(rlRoot, "chakra");
+  const chakraTestsDestination = path.join(rlRoot, "chakra_generated");
 
   const chakraTests = require("./generateTests");
   return removePossiblyEmptyFolder(chakraTestsDestination)

+ 50 - 0
test/WasmSpec/features/threads/atomic_load.wast

@@ -0,0 +1,50 @@
+(module
+  ;; todo make this a shared memory
+  (memory 1)
+  (data (i32.const 0) "\ff\ff\ff\ff")
+  (data (i32.const 4) "\00\00\ce\41")
+  (data (i32.const 8) "\00\00\00\00\00\ff\8f\40")
+
+  (func (export "i32.atomic.load8_u") (result i32)
+    i32.const 1 i32.atomic.load8_u)
+  (func (export "i32.atomic.load16_u") (result i32)
+    i32.const 2 i32.atomic.load16_u)
+  (func (export "i32.atomic.load") (result i32)
+    i32.const 4 i32.atomic.load)
+
+  (func (export "i64.atomic.load8_u") (result i64)
+    i32.const 1 i64.atomic.load8_u)
+  (func (export "i64.atomic.load16_u") (result i64)
+    i32.const 2 i64.atomic.load16_u)
+  (func (export "i64.atomic.load32_u") (result i64)
+    i32.const 4 i64.atomic.load32_u)
+  (func (export "i64.atomic.load") (result i64)
+    i32.const 0 i64.atomic.load)
+
+  ;; Test bad alignment
+
+  (func (export "bad.align-i32.atomic.load16_u") (result i32)
+    i32.const 1 i32.atomic.load16_u)
+  (func (export "bad.align-i32.atomic.load") (result i32)
+    i32.const 2 i32.atomic.load)
+
+  (func (export "bad.align-i64.atomic.load16_u") (result i64)
+    i32.const 1 i64.atomic.load16_u)
+  (func (export "bad.align-i64.atomic.load32_u") (result i64)
+    i32.const 2 i64.atomic.load32_u)
+  (func (export "bad.align-i64.atomic.load") (result i64)
+    i32.const 4 i64.atomic.load)
+)
+
+(assert_return (invoke "i32.atomic.load8_u") (i32.const 255))
+(assert_return (invoke "i32.atomic.load16_u") (i32.const 65535))
+(assert_return (invoke "i32.atomic.load") (i32.const 1104019456))
+(assert_return (invoke "i64.atomic.load8_u") (i64.const 255))
+(assert_return (invoke "i64.atomic.load16_u") (i64.const 65535))
+(assert_return (invoke "i64.atomic.load32_u") (i64.const 1104019456))
+(assert_return (invoke "i64.atomic.load") (i64.const 4741727461962678271))
+(assert_trap (invoke "bad.align-i32.atomic.load16_u") "atomic memory access is unaligned")
+(assert_trap (invoke "bad.align-i32.atomic.load") "atomic memory access is unaligned")
+(assert_trap (invoke "bad.align-i64.atomic.load16_u") "atomic memory access is unaligned")
+(assert_trap (invoke "bad.align-i64.atomic.load32_u") "atomic memory access is unaligned")
+(assert_trap (invoke "bad.align-i64.atomic.load") "atomic memory access is unaligned")

+ 68 - 0
test/WasmSpec/features/threads/atomic_store.wast

@@ -0,0 +1,68 @@
+(module
+  ;; todo make this a shared memory
+  (memory 1)
+
+  (func (export "i32.atomic.store8") (result i32)
+      i32.const 0 i32.const 0xfb i32.atomic.store8
+      i32.const 1 i32.const 0xfc i32.atomic.store8
+      i32.const 2 i32.const 0xfd i32.atomic.store8
+      i32.const 3 i32.const 0xfe i32.atomic.store8
+      i32.const 0 i32.load)
+
+  (func (export "i32.atomic.store16") (result i32)
+      i32.const 0 i32.const 0xcac9 i32.atomic.store16
+      i32.const 2 i32.const 0xcccb i32.atomic.store16
+      i32.const 0 i32.load)
+
+  (func (export "i32.atomic.store") (result i32)
+      i32.const 0 i32.const -123456 i32.atomic.store
+      i32.const 0 i32.load)
+
+  (func (export "i64.atomic.store8") (result i32)
+      i32.const 0 i64.const 0xeeeeeeeeeeeeeefb i64.atomic.store8
+      i32.const 1 i64.const 0xeeeeeeeeeeeeeefc i64.atomic.store8
+      i32.const 2 i64.const 0xeeeeeeeeeeeeeefd i64.atomic.store8
+      i32.const 3 i64.const 0xeeeeeeeeeeeeeefe i64.atomic.store8
+      i32.const 0 i32.load)
+
+  (func (export "i64.atomic.store16") (result i32)
+      i32.const 0 i64.const 0xeeeeeeeeeeeecac9 i64.atomic.store16
+      i32.const 2 i64.const 0xeeeeeeeeeeeecccb i64.atomic.store16
+      i32.const 0 i32.load)
+
+  (func (export "i64.atomic.store32") (result i32)
+      i32.const 0 i64.const -123456 i64.atomic.store32
+      i32.const 0 i32.load)
+
+  (func (export "i64.atomic.store") (result i64)
+      i32.const 0 i64.const 0xbaddc0de600dd00d i64.atomic.store
+      i32.const 0 i64.load)
+
+  ;; Test bad alignment
+
+  (func (export "bad.align-i32.atomic.store16")
+      i32.const 1 i32.const 0 i32.atomic.store16)
+  (func (export "bad.align-i32.atomic.store")
+      i32.const 2 i32.const 0 i32.atomic.store)
+
+  (func (export "bad.align-i64.atomic.store16")
+      i32.const 1 i64.const 0 i64.atomic.store16)
+  (func (export "bad.align-i64.atomic.store32")
+      i32.const 2 i64.const 0 i64.atomic.store32)
+  (func (export "bad.align-i64.atomic.store")
+      i32.const 4 i64.const 0 i64.atomic.store)
+
+)
+
+(assert_return (invoke "i32.atomic.store8") (i32.const 4278058235))
+(assert_return (invoke "i32.atomic.store16") (i32.const 3435907785))
+(assert_return (invoke "i32.atomic.store") (i32.const 4294843840))
+(assert_return (invoke "i64.atomic.store8") (i32.const 4278058235))
+(assert_return (invoke "i64.atomic.store16") (i32.const 3435907785))
+(assert_return (invoke "i64.atomic.store32") (i32.const 4294843840))
+(assert_return (invoke "i64.atomic.store") (i64.const 13465130522234441741))
+(assert_trap (invoke "bad.align-i32.atomic.store16") "atomic memory access is unaligned")
+(assert_trap (invoke "bad.align-i32.atomic.store") "atomic memory access is unaligned")
+(assert_trap (invoke "bad.align-i64.atomic.store16") "atomic memory access is unaligned")
+(assert_trap (invoke "bad.align-i64.atomic.store32") "atomic memory access is unaligned")
+(assert_trap (invoke "bad.align-i64.atomic.store") "atomic memory access is unaligned")

+ 70 - 10
test/WasmSpec/rlexe.xml

@@ -1,18 +1,48 @@
 <?xml version="1.0" encoding="utf-8"?>
 <!-- Auto Generated by convert-test-suite -->
 <regress-exe>
+  <test>
+    <default>
+      <files>spec.js</files>
+      <baseline>baselines/chakra_atomic_load.baseline</baseline>
+      <compile-flags>-wasm -args chakra/chakra_atomic_load.wast -endargs -WasmThreads</compile-flags>
+    </default>
+  </test>
+  <test>
+    <default>
+      <files>spec.js</files>
+      <baseline>baselines/chakra_atomic_load.baseline</baseline>
+      <compile-flags>-wasm -args chakra/chakra_atomic_load.wast -endargs -nonative -WasmThreads</compile-flags>
+      <tags>exclude_dynapogo</tags>
+    </default>
+  </test>
+  <test>
+    <default>
+      <files>spec.js</files>
+      <baseline>baselines/chakra_atomic_store.baseline</baseline>
+      <compile-flags>-wasm -args chakra/chakra_atomic_store.wast -endargs -WasmThreads</compile-flags>
+    </default>
+  </test>
+  <test>
+    <default>
+      <files>spec.js</files>
+      <baseline>baselines/chakra_atomic_store.baseline</baseline>
+      <compile-flags>-wasm -args chakra/chakra_atomic_store.wast -endargs -nonative -WasmThreads</compile-flags>
+      <tags>exclude_dynapogo</tags>
+    </default>
+  </test>
   <test>
     <default>
       <files>spec.js</files>
       <baseline>baselines/chakra_extends_i32.baseline</baseline>
-      <compile-flags>-wasm -args chakra/chakra_extends_i32.wast -endargs -WasmSignExtends</compile-flags>
+      <compile-flags>-wasm -args chakra_generated/chakra_extends_i32.wast -endargs -WasmSignExtends</compile-flags>
     </default>
   </test>
   <test>
     <default>
       <files>spec.js</files>
       <baseline>baselines/chakra_extends_i32.baseline</baseline>
-      <compile-flags>-wasm -args chakra/chakra_extends_i32.wast -endargs -nonative -WasmSignExtends</compile-flags>
+      <compile-flags>-wasm -args chakra_generated/chakra_extends_i32.wast -endargs -nonative -WasmSignExtends</compile-flags>
       <tags>exclude_dynapogo</tags>
     </default>
   </test>
@@ -20,14 +50,14 @@
     <default>
       <files>spec.js</files>
       <baseline>baselines/chakra_extends_i64.baseline</baseline>
-      <compile-flags>-wasm -args chakra/chakra_extends_i64.wast -endargs -WasmSignExtends</compile-flags>
+      <compile-flags>-wasm -args chakra_generated/chakra_extends_i64.wast -endargs -WasmSignExtends</compile-flags>
     </default>
   </test>
   <test>
     <default>
       <files>spec.js</files>
       <baseline>baselines/chakra_extends_i64.baseline</baseline>
-      <compile-flags>-wasm -args chakra/chakra_extends_i64.wast -endargs -nonative -WasmSignExtends</compile-flags>
+      <compile-flags>-wasm -args chakra_generated/chakra_extends_i64.wast -endargs -nonative -WasmSignExtends</compile-flags>
       <tags>exclude_dynapogo</tags>
     </default>
   </test>
@@ -35,14 +65,14 @@
     <default>
       <files>spec.js</files>
       <baseline>baselines/chakra_i32.baseline</baseline>
-      <compile-flags>-wasm -args chakra/chakra_i32.wast -endargs</compile-flags>
+      <compile-flags>-wasm -args chakra_generated/chakra_i32.wast -endargs</compile-flags>
     </default>
   </test>
   <test>
     <default>
       <files>spec.js</files>
       <baseline>baselines/chakra_i32.baseline</baseline>
-      <compile-flags>-wasm -args chakra/chakra_i32.wast -endargs -nonative</compile-flags>
+      <compile-flags>-wasm -args chakra_generated/chakra_i32.wast -endargs -nonative</compile-flags>
       <tags>exclude_dynapogo</tags>
     </default>
   </test>
@@ -50,21 +80,21 @@
     <default>
       <files>spec.js</files>
       <baseline>baselines/chakra_i32.baseline</baseline>
-      <compile-flags>-wasm -args chakra/chakra_i32.wast -endargs -wasmMathExFilter</compile-flags>
+      <compile-flags>-wasm -args chakra_generated/chakra_i32.wast -endargs -wasmMathExFilter</compile-flags>
     </default>
   </test>
   <test>
     <default>
       <files>spec.js</files>
       <baseline>baselines/chakra_i64.baseline</baseline>
-      <compile-flags>-wasm -args chakra/chakra_i64.wast -endargs</compile-flags>
+      <compile-flags>-wasm -args chakra_generated/chakra_i64.wast -endargs</compile-flags>
     </default>
   </test>
   <test>
     <default>
       <files>spec.js</files>
       <baseline>baselines/chakra_i64.baseline</baseline>
-      <compile-flags>-wasm -args chakra/chakra_i64.wast -endargs -nonative</compile-flags>
+      <compile-flags>-wasm -args chakra_generated/chakra_i64.wast -endargs -nonative</compile-flags>
       <tags>exclude_dynapogo</tags>
     </default>
   </test>
@@ -72,7 +102,7 @@
     <default>
       <files>spec.js</files>
       <baseline>baselines/chakra_i64.baseline</baseline>
-      <compile-flags>-wasm -args chakra/chakra_i64.wast -endargs -wasmMathExFilter</compile-flags>
+      <compile-flags>-wasm -args chakra_generated/chakra_i64.wast -endargs -wasmMathExFilter</compile-flags>
     </default>
   </test>
   <test>
@@ -1216,4 +1246,34 @@
       <tags>exclude_dynapogo</tags>
     </default>
   </test>
+  <test>
+    <default>
+      <files>spec.js</files>
+      <baseline>baselines/atomic_load.baseline</baseline>
+      <compile-flags>-wasm -args features/threads/atomic_load.wast -endargs -WasmThreads</compile-flags>
+    </default>
+  </test>
+  <test>
+    <default>
+      <files>spec.js</files>
+      <baseline>baselines/atomic_load.baseline</baseline>
+      <compile-flags>-wasm -args features/threads/atomic_load.wast -endargs -nonative -WasmThreads</compile-flags>
+      <tags>exclude_dynapogo</tags>
+    </default>
+  </test>
+  <test>
+    <default>
+      <files>spec.js</files>
+      <baseline>baselines/atomic_store.baseline</baseline>
+      <compile-flags>-wasm -args features/threads/atomic_store.wast -endargs -WasmThreads</compile-flags>
+    </default>
+  </test>
+  <test>
+    <default>
+      <files>spec.js</files>
+      <baseline>baselines/atomic_store.baseline</baseline>
+      <compile-flags>-wasm -args features/threads/atomic_store.wast -endargs -nonative -WasmThreads</compile-flags>
+      <tags>exclude_dynapogo</tags>
+    </default>
+  </test>
 </regress-exe>