فهرست منبع

JIT: enable JIT on Linux

Primary changes to JIT on Linux.

Custom calling convention
-------------------------

Calls to native helpers conform to Sys V AMD64 ABI.
    - Change all code to use correct arg registers.

Calls to JsMethod and native helpers that depend on
ARGUMENTS/RUNTIME_ARGUMENTS use custom calling convention:
    - Caller home arg registers onto stack.

Implemented all needed assembly thunks.

Unwind data
-----------

Emit .eh_frame data for dynamic interpreter thunks and all jit functions.
    - For less noise, the new code hides behind existing
        PrologEncoder/PData/XData.

Misc
----

ARGUMENTS/RUNTIME_ARGUMENTS:
    - Extended to support all runtime usages.
    - Limitation: known arg list must end with "callInfo".

Int32Math:
    - Use builtins to test signed int overflow. Original code does not work
        due to clang optimization.

TODO
    - AsmJs: missing ArrayBuffer out of bound access recovery.
    - Simd.
Jianchun Xu 9 سال پیش
والد
کامیت
960ec9a5a6
41فایلهای تغییر یافته به همراه2309 افزوده شده و 401 حذف شده
  1. 11 0
      bin/ch/ChakraRtInterface.cpp
  2. 4 0
      lib/Backend/Backend.cpp
  3. 1 1
      lib/Backend/CodeGenWorkItem.h
  4. 226 0
      lib/Backend/EhFrame.cpp
  5. 206 0
      lib/Backend/EhFrame.h
  6. 29 0
      lib/Backend/EhFrameCFI.inc
  7. 7 4
      lib/Backend/Encoder.cpp
  8. 41 2
      lib/Backend/InterpreterThunkEmitter.cpp
  9. 29 29
      lib/Backend/LowerMDShared.cpp
  10. 13 24
      lib/Backend/NativeCodeData.cpp
  11. 34 29
      lib/Backend/NativeCodeData.h
  12. 24 1
      lib/Backend/PDataManager.cpp
  13. 131 2
      lib/Backend/PrologEncoder.cpp
  14. 49 2
      lib/Backend/PrologEncoder.h
  15. 27 29
      lib/Backend/amd64/LinearScanMD.cpp
  16. 139 0
      lib/Backend/amd64/LinearScanMdA.S
  17. 158 75
      lib/Backend/amd64/LowererMDArch.cpp
  18. 4 1
      lib/Backend/amd64/LowererMDArch.h
  19. 6 13
      lib/Backend/amd64/PeepsMD.cpp
  20. 22 2
      lib/Backend/amd64/Reg.h
  21. 83 0
      lib/Backend/amd64/RegList.h
  22. 94 0
      lib/Backend/amd64/Thunks.S
  23. 14 0
      lib/Common/Common/Int32Math.cpp
  24. 6 2
      lib/Common/CommonDefines.h
  25. 1 3
      lib/Common/Memory/CustomHeap.h
  26. 8 6
      lib/Common/Memory/PageAllocator.cpp
  27. 19 0
      lib/Common/Memory/amd64/XDataAllocator.cpp
  28. 28 3
      lib/Common/Memory/amd64/XDataAllocator.h
  29. 100 0
      lib/JITClient/JITManager.h
  30. 3 3
      lib/Runtime/Base/CallInfo.h
  31. 37 17
      lib/Runtime/Base/FunctionBody.cpp
  32. 3 3
      lib/Runtime/Base/ThreadContextInfo.cpp
  33. 58 15
      lib/Runtime/Language/Arguments.h
  34. 61 49
      lib/Runtime/Language/InterpreterStackFrame.cpp
  35. 13 58
      lib/Runtime/Language/JavascriptOperators.cpp
  36. 42 0
      lib/Runtime/Language/JavascriptOperators.inl
  37. 2 2
      lib/Runtime/Language/ProfilingHelpers.cpp
  38. 23 23
      lib/Runtime/Language/amd64/JavascriptOperatorsA.S
  39. 382 0
      lib/Runtime/Language/amd64/amd64_Thunks.S
  40. 1 1
      lib/Runtime/Library/JavascriptFunction.cpp
  41. 170 2
      lib/Runtime/Library/amd64/JavascriptFunctionA.S

+ 11 - 0
bin/ch/ChakraRtInterface.cpp

@@ -180,7 +180,18 @@ void ChakraRTInterface::UnloadChakraDll(HINSTANCE library)
     {
         pDllCanUnloadNow();
     }
+#ifdef _WIN32
     UnloadChakraCore(library);
+#else  // !_WIN32
+    // PAL thread shutdown needs more time after execution completion.
+    // Do not FreeLibrary. Invoke DllMain(DLL_PROCESS_DETACH) directly.
+    typedef BOOL (__stdcall *PDLLMAIN)(HINSTANCE, DWORD, LPVOID);
+    PDLLMAIN pDllMain = (PDLLMAIN) GetChakraCoreSymbol(library, "DllMain");
+    if (pDllMain)
+    {
+        pDllMain(library, DLL_PROCESS_DETACH, NULL);
+    }
+#endif
 #endif
 }
 

+ 4 - 0
lib/Backend/Backend.cpp

@@ -3,3 +3,7 @@
 // Licensed under the MIT license. See LICENSE.txt file in the project root for full license information.
 //-------------------------------------------------------------------------------------------------------
 #include "Backend.h"
+
+#if !ENABLE_OOP_NATIVE_CODEGEN
+JITManager JITManager::s_jitManager; // dummy object when OOP JIT disabled
+#endif

+ 1 - 1
lib/Backend/CodeGenWorkItem.h

@@ -227,7 +227,7 @@ public:
     void GetEntryPointAddress(void** entrypoint, ptrdiff_t *size) override
     {
          Assert(entrypoint);
-         *entrypoint = this->GetEntryPoint()->jsMethod;
+         *entrypoint = (void*)this->GetEntryPoint()->jsMethod;
          *size = this->GetEntryPoint()->GetCodeSize();
     }
 

+ 226 - 0
lib/Backend/EhFrame.cpp

@@ -0,0 +1,226 @@
+//-------------------------------------------------------------------------------------------------------
+// Copyright (C) Microsoft. All rights reserved.
+// Licensed under the MIT license. See LICENSE.txt file in the project root for full license information.
+//-------------------------------------------------------------------------------------------------------
+#include "Backend.h"
+#include "EhFrame.h"
+
+// AMD64 ABI -- DWARF register number mapping
+static const ubyte DWARF_RegNum[] =
+{
+    // Exactly same order as RegList.h!
+    -1, // NOREG,
+    0,  // RAX,
+    2,  // RCX,
+    1,  // RDX,
+    3,  // RBX,
+    7,  // RSP,
+    6,  // RBP,
+    4,  // RSI,
+    5,  // RDI,
+    8,  // R8,
+    9,  // R9,
+    10, // R10,
+    11, // R11,
+    12, // R12,
+    13, // R13,
+    14, // R14,
+    15, // R15,
+    17,  // XMM0,
+    18,  // XMM1,
+    19,  // XMM2,
+    20,  // XMM3,
+    21,  // XMM4,
+    22,  // XMM5,
+    23,  // XMM6,
+    24,  // XMM7,
+    25,  // XMM8,
+    26,  // XMM9,
+    27,  // XMM10,
+    28,  // XMM11,
+    29,  // XMM12,
+    30,  // XMM13,
+    31,  // XMM14,
+    32,  // XMM15,
+};
+
+static const ubyte DWARF_RegRA = 16;
+
+ubyte GetDwarfRegNum(ubyte regNum)
+{
+    return DWARF_RegNum[regNum];
+}
+
+// Enocde into ULEB128 (Unsigned Little Endian Base 128)
+BYTE* EmitLEB128(BYTE* pc, unsigned value)
+{
+    do
+    {
+        BYTE b = value & 0x7F; // low order 7 bits
+        value >>= 7;
+
+        if (value)  // more bytes to come
+        {
+            b |= 0x80;
+        }
+
+        *pc++ = b;
+    }
+    while (value != 0);
+
+    return pc;
+}
+
+// Encode into signed LEB128 (Signed Little Endian Base 128)
+BYTE* EmitLEB128(BYTE* pc, int value)
+{
+    static const int size = sizeof(value) * 8;
+    static const bool isLogicShift = (-1 >> 1) != -1;
+
+    const bool signExtend = isLogicShift && value < 0;
+
+    bool more = true;
+    while (more)
+    {
+        BYTE b = value & 0x7F; // low order 7 bits
+        value >>= 7;
+
+        if (signExtend)
+        {
+            value |= - (1 << (size - 7)); // sign extend
+        }
+
+        const bool signBit = (b & 0x40) != 0;
+        if ((value == 0 && !signBit) || (value == -1 && signBit))
+        {
+            more = false;
+        }
+        else
+        {
+            b |= 0x80;
+        }
+
+        *pc++ = b;
+    }
+
+    return pc;
+}
+
+
+void EhFrame::Entry::Begin()
+{
+    Assert(beginOffset == -1);
+    beginOffset = writer->Count();
+
+    // Write Length place holder
+    const uword length = 0;
+    writer->Write(length);
+}
+
+void EhFrame::Entry::End()
+{
+    // padding
+    size_t padding = (MachPtr - writer->Count() % MachPtr) % MachPtr;
+    for (size_t i = 0; i < padding; i++)
+    {
+        cfi_nop();
+    }
+
+    // update length record
+    uword length = writer->Count() - beginOffset
+                    - sizeof(length);  // exclude length itself
+    writer->Write(beginOffset, length);
+}
+
+void EhFrame::Entry::cfi_advance(uword advance)
+{
+    if (advance <= 0x3F)        // 6-bits
+    {
+        cfi_advance_loc(static_cast<ubyte>(advance));
+    }
+    else if (advance <= 0xFF)   // 1-byte
+    {
+        cfi_advance_loc1(static_cast<ubyte>(advance));
+    }
+    else if (advance <= 0xFFFF) // 2-byte
+    {
+        cfi_advance_loc2(static_cast<uword>(advance));
+    }
+    else                        // 4-byte
+    {
+        cfi_advance_loc4(advance);
+    }
+}
+
+void EhFrame::CIE::Begin()
+{
+    Assert(writer->Count() == 0);
+    Entry::Begin();
+
+    const uword cie_id = 0;
+    Emit(cie_id);
+
+    const ubyte version = 1;
+    Emit(version);
+
+    const ubyte augmentationString = 0; // none
+    Emit(augmentationString);
+
+    const ULEB128 codeAlignmentFactor = 1;
+    Emit(codeAlignmentFactor);
+
+    const LEB128 dataAlignmentFactor = - MachPtr;
+    Emit(dataAlignmentFactor);
+
+    const ubyte returnAddressRegister = DWARF_RegRA;
+    Emit(returnAddressRegister);
+}
+
+
+void EhFrame::FDE::Begin()
+{
+    Entry::Begin();
+
+    const uword cie_id = writer->Count();
+    Emit(cie_id);
+
+    // Write pc <begin, range> placeholder
+    pcBeginOffset = writer->Count();
+    const void* pc = nullptr;
+    Emit(pc);
+    Emit(pc);
+}
+
+void EhFrame::FDE::UpdateAddressRange(const void* pcBegin, size_t pcRange)
+{
+    writer->Write(pcBeginOffset, pcBegin);
+    writer->Write(pcBeginOffset + sizeof(pcBegin),
+        reinterpret_cast<const void*>(pcRange));
+}
+
+
+EhFrame::EhFrame(BYTE* buffer, size_t size)
+        : writer(buffer, size), fde(&writer)
+{
+    CIE cie(&writer);
+    cie.Begin();
+
+    // CIE initial instructions
+    // DW_CFA_def_cfa: r7 (rsp) ofs 8
+    cie.cfi_def_cfa(DWARF_RegNum[LowererMDArch::GetRegStackPointer()], MachPtr);
+    // DW_CFA_offset: r16 (rip) at cfa-8 (data alignment -8)
+    cie.cfi_offset(DWARF_RegRA, 1);
+
+    cie.End();
+
+    fde.Begin();
+}
+
+void EhFrame::End()
+{
+    fde.End();
+
+    // Write length 0 to mark terminate entry
+    const uword terminate_entry_length = 0;
+    writer.Write(terminate_entry_length);
+}

+ 206 - 0
lib/Backend/EhFrame.h

@@ -0,0 +1,206 @@
+//-------------------------------------------------------------------------------------------------------
+// Copyright (C) Microsoft. All rights reserved.
+// Licensed under the MIT license. See LICENSE.txt file in the project root for full license information.
+//-------------------------------------------------------------------------------------------------------
+#pragma once
+
+typedef BYTE ubyte;
+typedef uint16 uhalf;
+typedef uint32 uword;
+CompileAssert(sizeof(ubyte) == 1);
+CompileAssert(sizeof(uhalf) == 2);
+CompileAssert(sizeof(uword) == 4);
+
+BYTE* EmitLEB128(BYTE* pc, unsigned value);
+BYTE* EmitLEB128(BYTE* pc, int value);
+ubyte GetDwarfRegNum(ubyte regNum);
+
+template <class T>
+class LEB128Wrapper
+{
+private:
+    T value;
+
+public:
+    LEB128Wrapper(T value): value(value)
+    {}
+
+    BYTE* Write(BYTE* pc) const
+    {
+        return EmitLEB128(pc, value);
+    }
+};
+
+typedef LEB128Wrapper<unsigned> ULEB128;
+typedef LEB128Wrapper<int> LEB128;
+
+//
+// EhFrame emits .eh_frame unwind data for our JIT code. We emit only one CIE
+// followed by one FDE for each JIT function.
+//
+class EhFrame
+{
+    // Simple buffer writer. Must operate on a buffer of sufficient size.
+    class Writer
+    {
+    private:
+        BYTE* buffer;   // original buffer head
+        BYTE* cur;      // current output position
+        const size_t size;  // original size of buffer, for debug only
+
+    public:
+        Writer(BYTE* buffer, size_t size) : buffer(buffer), cur(buffer), size(size)
+        {}
+
+        // Write a value, and advance cur position
+        template <class T>
+        void Write(T value)
+        {
+            *reinterpret_cast<T*>(cur) = value;
+            cur += sizeof(value);
+            Assert(Count() <= size);
+        }
+
+        // Write a ULEB128 or LEB128 value, and advance cur position
+        template <class T>
+        void Write(const LEB128Wrapper<T>& leb128)
+        {
+            cur = leb128.Write(cur);
+            Assert(Count() <= size);
+        }
+
+        // Write a value at an absolute position
+        template <class T>
+        void Write(size_t offset, T value)
+        {
+            Assert(offset + sizeof(value) <= size);
+            *reinterpret_cast<T*>(buffer + offset) = value;
+        }
+
+        // Get original buffer head
+        BYTE* Buffer() const
+        {
+            return buffer;
+        }
+
+        // Get count of written bytes (== offset of cur position)
+        size_t Count() const
+        {
+            return cur - buffer;
+        }
+    };
+
+    // Base class for CIE and FDE
+    class Entry
+    {
+    protected:
+        Writer* writer;
+        size_t  beginOffset;    // where we'll update "length" record
+
+        // To limit supported value types
+        void Emit(ubyte value) { writer->Write(value); }
+        void Emit(uhalf value) { writer->Write(value); }
+        void Emit(uword value) { writer->Write(value); }
+        void Emit(const void* absptr) { writer->Write(absptr); }
+        void Emit(LEB128 value) { writer->Write(value); }
+        void Emit(ULEB128 value) { writer->Write(value); }
+
+        template <class T1>
+        void Emit(ubyte op, T1 arg1)
+        {
+            Emit(op);
+            Emit(arg1);
+        }
+
+        template <class T1, class T2>
+        void Emit(ubyte op, T1 arg1, T2 arg2)
+        {
+            Emit(op, arg1);
+            Emit(arg2);
+        }
+
+    public:
+        Entry(Writer* writer) : writer(writer), beginOffset(-1)
+        {}
+
+        void Begin();
+        void End();
+
+#define ENTRY(name, op) \
+    void cfi_##name() \
+    { Emit(static_cast<ubyte>(op)); }
+
+#define ENTRY1(name, op, arg1_type) \
+    void cfi_##name(arg1_type arg1) \
+    { Emit(op, arg1); }
+
+#define ENTRY2(name, op, arg1_type, arg2_type) \
+    void cfi_##name(arg1_type arg1, arg2_type arg2) \
+    { Emit(op, arg1, arg2); }
+
+#define ENTRY_SM1(name, op, arg1_type) \
+    void cfi_##name(arg1_type arg1) \
+    { Assert((arg1) <= 0x3F); Emit(static_cast<ubyte>((op) | arg1)); }
+
+#define ENTRY_SM2(name, op, arg1_type, arg2_type) \
+    void cfi_##name(arg1_type arg1, arg2_type arg2) \
+    { Assert((arg1) <= 0x3F); Emit((op) | arg1, arg2); }
+
+#include "EhFrameCFI.inc"
+
+        void cfi_advance(uword advance);
+    };
+
+    // Common Information Entry
+    class CIE : public Entry
+    {
+    public:
+        CIE(Writer* writer) : Entry(writer)
+        {}
+
+        void Begin();
+    };
+
+    // Frame Description Entry
+    class FDE: public Entry
+    {
+    private:
+        size_t pcBeginOffset;
+
+    public:
+        FDE(Writer* writer) : Entry(writer)
+        {}
+
+        void Begin();
+        void UpdateAddressRange(const void* pcBegin, size_t pcRange);
+    };
+
+private:
+    Writer writer;
+    FDE fde;
+
+public:
+    EhFrame(BYTE* buffer, size_t size);
+
+    Writer* GetWriter()
+    {
+        return &writer;
+    }
+
+    FDE* GetFDE()
+    {
+        return &fde;
+    }
+
+    void End();
+
+    BYTE* Buffer() const
+    {
+        return writer.Buffer();
+    }
+
+    size_t Count() const
+    {
+        return writer.Count();
+    }
+};

+ 29 - 0
lib/Backend/EhFrameCFI.inc

@@ -0,0 +1,29 @@
+//-------------------------------------------------------------------------------------------------------
+// Copyright (C) Microsoft. All rights reserved.
+// Licensed under the MIT license. See LICENSE.txt file in the project root for full license information.
+//-------------------------------------------------------------------------------------------------------
+
+//
+// A subset of DWARF cfi
+//
+
+//          instruction     _2_6_ bits      arg1        arg2
+//
+ENTRY_SM1(  advance_loc,    0x1 << 6,       ubyte               )
+ENTRY_SM2(  offset,         0x2 << 6,       ubyte,      ULEB128 )
+ENTRY_SM1(  restore,        0x3 << 6,       ubyte               )
+
+ENTRY1   (  advance_loc1,   0x02,           ubyte               )
+ENTRY1   (  advance_loc2,   0x03,           uhalf               )
+ENTRY1   (  advance_loc4,   0x04,           uword               )
+
+ENTRY2   (  def_cfa,        0x0c,           ULEB128,    ULEB128 )
+ENTRY1   (  def_cfa_offset, 0x0e,           ULEB128             )
+
+ENTRY    (  nop,            0                                   )
+
+#undef ENTRY_SM1
+#undef ENTRY_SM2
+#undef ENTRY
+#undef ENTRY1
+#undef ENTRY2

+ 7 - 4
lib/Backend/Encoder.cpp

@@ -81,10 +81,12 @@ Encoder::Encode()
                 {
 #ifdef _M_X64
                 case Js::OpCode::PrologStart:
+                    m_func->m_prologEncoder.Begin(m_pc - m_encodeBuffer);
                     inProlog = true;
                     continue;
 
                 case Js::OpCode::PrologEnd:
+                    m_func->m_prologEncoder.End();
                     inProlog = false;
                     continue;
 #endif
@@ -313,8 +315,9 @@ Encoder::Encode()
     m_func->GetJITOutput()->RecordNativeCode(m_func, m_encodeBuffer, alloc);
 
 #ifdef _M_X64
-    m_func->m_prologEncoder.FinalizeUnwindInfo();
-    
+    m_func->m_prologEncoder.FinalizeUnwindInfo(
+        (BYTE*)m_func->GetJITOutput()->GetCodeAddress(), (DWORD)codeSize);
+
     m_func->GetJITOutput()->RecordUnwindInfo(
         0,
         m_func->m_prologEncoder.GetUnwindInfo(),
@@ -351,7 +354,7 @@ Encoder::Encode()
         {
             NativeOffsetInlineeFrameRecordOffset* pairs = NativeCodeDataNewArrayZNoFixup(m_func->GetNativeCodeDataAllocator(), NativeOffsetInlineeFrameRecordOffset, this->m_inlineeFrameMap->Count());
 
-            this->m_inlineeFrameMap->Map([&pairs](int i, NativeOffsetInlineeFramePair& p) 
+            this->m_inlineeFrameMap->Map([&pairs](int i, NativeOffsetInlineeFramePair& p)
             {
                 pairs[i].offset = p.offset;
                 if (p.record)
@@ -560,7 +563,7 @@ Encoder::Encode()
                 (*entry)->propId = propertyId;
                 (*entry)->guardsCount = count;
                 (*entry)->next = nullptr;
-                
+
                 auto& guardOffsets = (*entry)->guardOffsets;
                 int guardIndex = 0;
                 srcSet->Map([&guardOffsets, &guardIndex](Js::JitIndexedPropertyGuard* guard) -> void

+ 41 - 2
lib/Backend/InterpreterThunkEmitter.cpp

@@ -6,6 +6,7 @@
 
 #ifdef ENABLE_NATIVE_CODEGEN
 #ifdef _M_X64
+#ifdef _WIN32
 const BYTE InterpreterThunkEmitter::FunctionBodyOffset = 23;
 const BYTE InterpreterThunkEmitter::DynamicThunkAddressOffset = 27;
 const BYTE InterpreterThunkEmitter::CallBlockStartAddrOffset = 37;
@@ -52,6 +53,44 @@ const BYTE InterpreterThunkEmitter::Epilog[] = {
     0x48, 0x83, 0xC4, StackAllocSize,                              // add         rsp,28h
     0xC3                                                           // ret
 };
+#else  // Sys V AMD64
+const BYTE InterpreterThunkEmitter::FunctionBodyOffset = 7;
+const BYTE InterpreterThunkEmitter::DynamicThunkAddressOffset = 11;
+const BYTE InterpreterThunkEmitter::CallBlockStartAddrOffset = 21;
+const BYTE InterpreterThunkEmitter::ThunkSizeOffset = 35;
+const BYTE InterpreterThunkEmitter::ErrorOffset = 44;
+const BYTE InterpreterThunkEmitter::ThunkAddressOffset = 57;
+
+const BYTE InterpreterThunkEmitter::PrologSize = 56;
+const BYTE InterpreterThunkEmitter::StackAllocSize = 0x0;
+
+const BYTE InterpreterThunkEmitter::InterpreterThunk[] = {
+    0x55,                                                       // push   rbp                   // Prolog - setup the stack frame
+    0x48, 0x89, 0xe5,                                           // mov    rbp, rsp
+    0x48, 0x8b, 0x47, 0x00,                                     // mov    rax, qword ptr [rdi + FunctionBodyOffset]
+    0x48, 0x8b, 0x50, 0x00,                                     // mov    rdx, qword ptr [rax + DynamicThunkAddressOffset]
+                                                                                                // Range Check for Valid call target
+    0x48, 0x83, 0xE2, 0xF8,                                     // and    rdx, 0xfffffffffffffff8   // Force 8 byte alignment
+    0x48, 0x89, 0xd1,                                           // mov    rcx, rdx
+    0x48, 0xb8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // mov    rax, CallBlockStartAddress
+    0x48, 0x29, 0xc1,                                           // sub    rcx, rax
+    0x48, 0x81, 0xf9, 0x00, 0x00, 0x00, 0x00,                   // cmp    rcx, ThunkSize
+    0x76, 0x09,                                                 // jbe    safe
+    0x48, 0xc7, 0xc1, 0x00, 0x00, 0x00, 0x00,                   // mov    rcx, errorcode
+    0xcd, 0x29,                                                 // int    29h       <-- xplat TODO: just to exit
+
+    // safe:
+    0x48, 0x8d, 0x7c, 0x24, 0x10,                               // lea    rdi, [rsp+0x10]
+    0x48, 0xB8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // mov    rax, <thunk>          // stack already 16-byte aligned
+    0xff, 0xe2,                                                 // jmp    rdx
+    0xcc, 0xcc, 0xcc, 0xcc, 0xcc                                // int    3                     // for alignment to size of 8
+};
+
+const BYTE InterpreterThunkEmitter::Epilog[] = {
+    0x5d,                                                       // pop    rbp
+    0xc3                                                        // ret
+};
+#endif
 #elif defined(_M_ARM)
 const BYTE InterpreterThunkEmitter::ThunkAddressOffset = 8;
 const BYTE InterpreterThunkEmitter::FunctionBodyOffset = 18;
@@ -260,12 +299,12 @@ void InterpreterThunkEmitter::NewThunkBlock()
 #ifdef ASMJS_PLAT
     if (isAsmInterpreterThunk)
     {
-        interpreterThunk = Js::InterpreterStackFrame::InterpreterAsmThunk;
+        interpreterThunk = (void*)Js::InterpreterStackFrame::InterpreterAsmThunk;
     }
     else
 #endif
     {
-        interpreterThunk = Js::InterpreterStackFrame::InterpreterThunk;
+        interpreterThunk = (void*)Js::InterpreterStackFrame::InterpreterThunk;
     }
 
     allocation = emitBufferManager.AllocateBuffer(bufferSize, &buffer);

+ 29 - 29
lib/Backend/LowerMDShared.cpp

@@ -190,7 +190,7 @@ LowererMD::LowerCallHelper(IR::Instr *instrCall)
         Assert(regArg->m_sym->m_isSingleDef);
         IR::Instr *instrArg = regArg->m_sym->m_instrDef;
 
-        Assert(instrArg->m_opcode == Js::OpCode::ArgOut_A || 
+        Assert(instrArg->m_opcode == Js::OpCode::ArgOut_A ||
             (helperMethod == IR::JnHelperMethod::HelperOP_InitCachedScope && instrArg->m_opcode == Js::OpCode::ExtendArg_A));
         prevInstr = LoadHelperArgument(prevInstr, instrArg->GetSrc1());
 
@@ -447,22 +447,22 @@ LowererMD::LowerLeaveNull(IR::Instr *finallyEndInstr)
 #if _M_X64
     {
         // amd64_ReturnFromCallWithFakeFrame expects to find the spill size and args size
-        // in r8 and r9.
+        // in REG_EH_SPILL_SIZE and REG_EH_ARGS_SIZE.
 
-        // MOV r8, spillSize
+        // MOV REG_EH_SPILL_SIZE, spillSize
         IR::Instr *movR8 = IR::Instr::New(Js::OpCode::LdSpillSize,
-                                          IR::RegOpnd::New(nullptr, RegR8, TyMachReg, m_func),
+                                          IR::RegOpnd::New(nullptr, REG_EH_SPILL_SIZE, TyMachReg, m_func),
                                           m_func);
         finallyEndInstr->InsertBefore(movR8);
 
 
-        // MOV r9, argsSize
+        // MOV REG_EH_ARGS_SIZE, argsSize
         IR::Instr *movR9 = IR::Instr::New(Js::OpCode::LdArgSize,
-                                          IR::RegOpnd::New(nullptr, RegR9, TyMachReg, m_func),
+                                          IR::RegOpnd::New(nullptr, REG_EH_ARGS_SIZE, TyMachReg, m_func),
                                           m_func);
         finallyEndInstr->InsertBefore(movR9);
 
-        IR::Opnd *targetOpnd = IR::RegOpnd::New(nullptr, RegRCX, TyMachReg, m_func);
+        IR::Opnd *targetOpnd = IR::RegOpnd::New(nullptr, REG_EH_TARGET, TyMachReg, m_func);
         IR::Instr *movTarget = IR::Instr::New(Js::OpCode::MOV,
             targetOpnd,
             IR::HelperCallOpnd::New(IR::HelperOp_ReturnFromCallWithFakeFrame, m_func),
@@ -1114,12 +1114,12 @@ void LowererMD::ChangeToAdd(IR::Instr *const instr, const bool needFlags)
     MakeDstEquSrc1(instr);
 
     // Prefer INC for add by one
-    if(instr->GetDst()->IsEqual(instr->GetSrc1()) &&
+    if((instr->GetDst()->IsEqual(instr->GetSrc1()) &&
             instr->GetSrc2()->IsIntConstOpnd() &&
-            instr->GetSrc2()->AsIntConstOpnd()->GetValue() == 1 ||
-        instr->GetDst()->IsEqual(instr->GetSrc2()) &&
+            instr->GetSrc2()->AsIntConstOpnd()->GetValue() == 1) ||
+        (instr->GetDst()->IsEqual(instr->GetSrc2()) &&
             instr->GetSrc1()->IsIntConstOpnd() &&
-            instr->GetSrc1()->AsIntConstOpnd()->GetValue() == 1)
+            instr->GetSrc1()->AsIntConstOpnd()->GetValue() == 1))
     {
         if(instr->GetSrc1()->IsIntConstOpnd())
         {
@@ -1500,8 +1500,8 @@ LowererMD::Legalize(IR::Instr *const instr, bool fPostRegAlloc)
             break;
 
         case Js::OpCode::TEST:
-            if(instr->GetSrc1()->IsImmediateOpnd() && !instr->GetSrc2()->IsImmediateOpnd() ||
-                instr->GetSrc2()->IsMemoryOpnd() && !instr->GetSrc1()->IsMemoryOpnd())
+            if((instr->GetSrc1()->IsImmediateOpnd() && !instr->GetSrc2()->IsImmediateOpnd()) ||
+                (instr->GetSrc2()->IsMemoryOpnd() && !instr->GetSrc1()->IsMemoryOpnd()))
             {
                 if (verify)
                 {
@@ -2410,33 +2410,33 @@ LowererMD::GenerateFastStringCheck(IR::Instr *instr, IR::RegOpnd *srcReg1, IR::R
     // if src1 is not string
     // generate object test, if not equal jump to $helper
     // compare type check to string, if not jump to $helper
-    // 
+    //
     // if strict mode generate string test as above for src1 and jump to $failure if failed any time
     // else if not strict generate string test as above for src1 and jump to $helper if failed any time
-    // 
+    //
     // Compare length of src1 and src2 if not equal goto $failure
-    // 
+    //
     // if src1 is not flat string jump to $helper
-    // 
+    //
     // if src1 and src2 m_pszValue pointer match goto $success
-    // 
+    //
     // if src2 is not flat string jump to $helper
-    // 
+    //
     // if first character of src1 and src2 doesn't match goto $failure
-    // 
+    //
     // shift left by 1 length of src1 (length*2)
-    // 
+    //
     // memcmp src1 and src2 flat strings till length * 2
-    // 
+    //
     // test eax (result of memcmp)
     // if equal jump to $success else to $failure
-    // 
+    //
     // $success
     //     jmp to $fallthrough
     // $failure
     //     jmp to $fallthrough
     // $helper
-    // 
+    //
     // $fallthrough
 
     // Generates:
@@ -5914,8 +5914,8 @@ LowererMD::GenerateFastRecyclerAlloc(size_t allocSize, IR::RegOpnd* newObjDst, I
     size_t alignedSize = HeapInfo::GetAlignedSizeNoCheck(allocSize);
 
     bool allowNativeCodeBumpAllocation = scriptContext->GetRecyclerAllowNativeCodeBumpAllocation();
-    Recycler::GetNormalHeapBlockAllocatorInfoForNativeAllocation((void*)scriptContext->GetRecyclerAddr(), alignedSize, 
-        allocatorAddress, endAddressOffset, freeListOffset, 
+    Recycler::GetNormalHeapBlockAllocatorInfoForNativeAllocation((void*)scriptContext->GetRecyclerAddr(), alignedSize,
+        allocatorAddress, endAddressOffset, freeListOffset,
         allowNativeCodeBumpAllocation, this->m_func->IsOOPJIT());
 
     endAddressOpnd = IR::MemRefOpnd::New((char*)allocatorAddress + endAddressOffset, TyMachPtr, this->m_func, IR::AddrOpndKindDynamicRecyclerAllocatorEndAddressRef);
@@ -6071,7 +6071,7 @@ LowererMD::SaveDoubleToVar(IR::RegOpnd * dstOpnd, IR::RegOpnd *opndFloat, IR::In
 
     // s1 = XOR s1, FloatTag_Value
     // dst = s1
-    
+
     IR::Instr *setTag = IR::Instr::New(Js::OpCode::XOR,
                                        s1,
                                        s1,
@@ -7960,11 +7960,11 @@ LowererMD::LowerCommitScope(IR::Instr *instrCommit)
     opnd = IR::IndirOpnd::New(baseOpnd, Js::ActivationObjectEx::GetOffsetOfCommitFlag(), TyInt8, this->m_func);
     instrCommit->SetDst(opnd);
     instrCommit->SetSrc1(IR::IntConstOpnd::New(1, TyInt8, this->m_func));
-    
+
     LowererMD::ChangeToAssign(instrCommit);
 
     const Js::PropertyIdArray *propIds = instrCommit->m_func->GetJITFunctionBody()->GetFormalsPropIdArray();
-    
+
     uint firstVarSlot = (uint)Js::ActivationObjectEx::GetFirstVarSlot(propIds);
     if (firstVarSlot < propIds->count)
     {

+ 13 - 24
lib/Backend/NativeCodeData.cpp

@@ -4,17 +4,6 @@
 //-------------------------------------------------------------------------------------------------------
 #include "Backend.h"
 
-char DataDesc_None[] = "";
-char DataDesc_InlineeFrameRecord_ArgOffsets[] = "";
-char DataDesc_InlineeFrameRecord_Constants[] = "";
-char DataDesc_BailoutInfo_CotalOutParamCount[] = "";
-char DataDesc_ArgOutOffsetInfo_StartCallOutParamCounts[] = "";
-char DataDesc_ArgOutOffsetInfo_StartCallArgRestoreAdjustCounts[] = "";
-char DataDesc_LowererMD_LoadFloatValue_Float[] = "";
-char DataDesc_LowererMD_LoadFloatValue_Double[] = "";
-char DataDesc_LowererMD_EmitLoadFloatCommon_Double[] = "";
-char DataDesc_LowererMD_Simd128LoadConst[] = "";
-
 NativeCodeData::NativeCodeData(DataChunk * chunkList) : chunkList(chunkList)
 {
 #ifdef PERF_COUNTERS
@@ -51,20 +40,20 @@ NativeCodeData::AddFixupEntry(void* targetAddr, void* targetStartAddr, void* add
     }
 
     Assert(targetStartAddr);
-    
+
     unsigned int inDataOffset = (unsigned int)((char*)targetAddr - (char*)targetStartAddr);
     DataChunk* targetChunk = NativeCodeData::GetDataChunk(targetStartAddr);
     Assert(targetChunk->len >= inDataOffset);
 
 #if DBG
     bool foundTargetChunk = false;
-    while (chunkList) 
+    while (chunkList)
     {
         foundTargetChunk |= (chunkList == targetChunk);
         chunkList = chunkList->next;
     }
     AssertMsg(foundTargetChunk, "current pointer is not allocated with NativeCodeData allocator?"); // change to valid check instead of assertion?
-#endif    
+#endif
 
     DataChunk* chunk = NativeCodeData::GetDataChunk(startAddress);
 
@@ -75,7 +64,7 @@ NativeCodeData::AddFixupEntry(void* targetAddr, void* targetStartAddr, void* add
     }
     __analysis_assume(entry);
     entry->addrOffset = (unsigned int)((__int64)addrToFixup - (__int64)startAddress);
-    Assert(entry->addrOffset <= chunk->len - sizeof(void*));    
+    Assert(entry->addrOffset <= chunk->len - sizeof(void*));
 
     entry->targetTotalOffset = targetChunk->offset + inDataOffset;
     entry->next = chunk->fixupList;
@@ -84,7 +73,7 @@ NativeCodeData::AddFixupEntry(void* targetAddr, void* targetStartAddr, void* add
 #if DBG
     if (PHASE_TRACE1(Js::NativeCodeDataPhase))
     {
-        Output::Print(L"NativeCodeData Add Fixup: %p(%p+%d, chunk:%p)  -->  %p(chunk:%p)  %S\n", 
+        Output::Print(_u("NativeCodeData Add Fixup: %p(%p+%d, chunk:%p)  -->  %p(chunk:%p)  %S\n"),
             addrToFixup, startAddress, entry->addrOffset, (void*)chunk, targetAddr, (void*)targetChunk, chunk->dataType);
     }
 #endif
@@ -132,25 +121,25 @@ NativeCodeData::AddFixupEntryForPointerArray(void* startAddress, DataChunk * chu
 #if DBG
         if (PHASE_TRACE1(Js::NativeCodeDataPhase))
         {
-            Output::Print(L"NativeCodeData Add Fixup: %p[%d](+%d, chunk:%p)  -->  %p(chunk:%p)  %S\n",
+            Output::Print(_u("NativeCodeData Add Fixup: %p[%d](+%d, chunk:%p)  -->  %p(chunk:%p)  %S\n"),
                 startAddress, i, entry->addrOffset, (void*)chunk, targetAddr, (void*)targetChunk, chunk->dataType);
         }
 #endif
     }
 }
 
-wchar_t* 
+char16*
 NativeCodeData::GetDataDescription(void* data, JitArenaAllocator * alloc)
 {
     auto chunk = GetDataChunk(data);
-    wchar_t buf[1024] = { 0 };
+    char16 buf[1024] = { 0 };
 #if DBG
-    swprintf_s(buf, L"%hs, NativeCodeData: index: %x, len: %x, offset: +%x", chunk->dataType, chunk->allocIndex, chunk->len, chunk->offset);
+    swprintf_s(buf, _u("%hs, NativeCodeData: index: %x, len: %x, offset: +%x"), chunk->dataType, chunk->allocIndex, chunk->len, chunk->offset);
 #else
-    swprintf_s(buf, L"NativeCodeData: index: %x, len: %x, offset: +%x", chunk->allocIndex, chunk->len, chunk->offset);
+    swprintf_s(buf, _u("NativeCodeData: index: %x, len: %x, offset: +%x"), chunk->allocIndex, chunk->len, chunk->offset);
 #endif
     auto len = wcslen(buf) + 1;
-    auto desc = JitAnewArray(alloc, wchar_t, len);
+    auto desc = JitAnewArray(alloc, char16, len);
     wcscpy_s(desc, len, buf);
     return desc;
 }
@@ -160,7 +149,7 @@ NativeCodeData::VerifyExistFixupEntry(void* targetAddr, void* addrToFixup, void*
 {
     DataChunk* chunk = NativeCodeData::GetDataChunk(startAddress);
     DataChunk* targetChunk = NativeCodeData::GetDataChunk(targetAddr);
-    if (chunk->len == 0) 
+    if (chunk->len == 0)
     {
         return;
     }
@@ -216,7 +205,7 @@ char *
 NativeCodeData::Allocator::Alloc(size_t requestSize)
 {
     char * data = nullptr;
-    Assert(!finalized);    
+    Assert(!finalized);
     requestSize = Math::Align(requestSize, sizeof(void*));
     DataChunk * newChunk = HeapNewStructPlus(requestSize, DataChunk);
 

+ 34 - 29
lib/Backend/NativeCodeData.h

@@ -47,7 +47,7 @@ public:
         return (NativeCodeData::DataChunk*)((char*)data - offsetof(NativeCodeData::DataChunk, data));
     }
 
-    static wchar_t* GetDataDescription(void* data, JitArenaAllocator * alloc);
+    static char16* GetDataDescription(void* data, JitArenaAllocator * alloc);
 
     static unsigned int GetDataTotalOffset(void* data)
     {
@@ -78,7 +78,7 @@ public:
 
         char * Alloc(DECLSPEC_GUARD_OVERFLOW size_t requestedBytes);
         char * AllocZero(DECLSPEC_GUARD_OVERFLOW size_t requestedBytes);
-        char * AllocLeaf(__declspec(guard(overflow)) size_t requestedBytes);
+        char * AllocLeaf(DECLSPEC_GUARD_OVERFLOW size_t requestedBytes);
 
         NativeCodeData * Finalize();
         void Free(void * buffer, size_t byteSize);
@@ -110,7 +110,7 @@ public:
         void Fixup(NativeCodeData::DataChunk* chunkList)
         {
             int count = NativeCodeData::GetDataChunk(this)->len / sizeof(T);
-            while (count-- > 0) 
+            while (count-- > 0)
             {
                 (((T*)this) + count)->Fixup(chunkList);
             }
@@ -128,8 +128,8 @@ public:
             DataChunk* chunk = NativeCodeData::GetDataChunk(dataBlock);
             chunk->dataType = typeid(T).name();
             if (PHASE_TRACE1(Js::NativeCodeDataPhase))
-            {                
-                Output::Print(L"NativeCodeData AllocNoFix: chunk: %p, data: %p, index: %d, len: %x, totalOffset: %x, type: %S\n",
+            {
+                Output::Print(_u("NativeCodeData AllocNoFix: chunk: %p, data: %p, index: %d, len: %x, totalOffset: %x, type: %S\n"),
                     chunk, (void*)dataBlock, chunk->allocIndex, chunk->len, chunk->offset, chunk->dataType);
             }
 #endif
@@ -145,7 +145,7 @@ public:
             chunk->dataType = typeid(T).name();
             if (PHASE_TRACE1(Js::NativeCodeDataPhase))
             {
-                Output::Print(L"NativeCodeData AllocNoFix: chunk: %p, data: %p, index: %d, len: %x, totalOffset: %x, type: %S\n",
+                Output::Print(_u("NativeCodeData AllocNoFix: chunk: %p, data: %p, index: %d, len: %x, totalOffset: %x, type: %S\n"),
                     chunk, (void*)dataBlock, chunk->allocIndex, chunk->len, chunk->offset, chunk->dataType);
             }
 #endif
@@ -172,7 +172,7 @@ public:
             chunk->dataType = typeid(T).name();
             if (PHASE_TRACE1(Js::NativeCodeDataPhase))
             {
-                Output::Print(L"NativeCodeData Alloc: chunk: %p, data: %p, index: %d, len: %x, totalOffset: %x, type: %S\n",
+                Output::Print(_u("NativeCodeData Alloc: chunk: %p, data: %p, index: %d, len: %x, totalOffset: %x, type: %S\n"),
                     chunk, (void*)dataBlock, chunk->allocIndex, chunk->len, chunk->offset, chunk->dataType);
             }
 #endif
@@ -199,37 +199,40 @@ public:
     ~NativeCodeData();
 };
 
-char DataDesc_None[];
-char DataDesc_InlineeFrameRecord_ArgOffsets[];
-char DataDesc_InlineeFrameRecord_Constants[];
-char DataDesc_BailoutInfo_CotalOutParamCount[];
-char DataDesc_ArgOutOffsetInfo_StartCallOutParamCounts[];
-char DataDesc_ArgOutOffsetInfo_StartCallArgRestoreAdjustCounts[];
-char DataDesc_LowererMD_LoadFloatValue_Float[];
-char DataDesc_LowererMD_LoadFloatValue_Double[];
-char DataDesc_LowererMD_EmitLoadFloatCommon_Double[];
-char DataDesc_LowererMD_Simd128LoadConst[];
-
-template<char const *desc = DataDesc_None>
-struct IntType 
-{ 
-    int data; 
+enum DataDesc
+{
+    DataDesc_None,
+    DataDesc_InlineeFrameRecord_ArgOffsets,
+    DataDesc_InlineeFrameRecord_Constants,
+    DataDesc_BailoutInfo_CotalOutParamCount,
+    DataDesc_ArgOutOffsetInfo_StartCallOutParamCounts,
+    DataDesc_ArgOutOffsetInfo_StartCallArgRestoreAdjustCounts,
+    DataDesc_LowererMD_LoadFloatValue_Float,
+    DataDesc_LowererMD_LoadFloatValue_Double,
+    DataDesc_LowererMD_EmitLoadFloatCommon_Double,
+    DataDesc_LowererMD_Simd128LoadConst,
 };
 
-template<char const *desc = DataDesc_None>
+template<DataDesc desc = DataDesc_None>
+struct IntType
+{
+    int data;
+};
+
+template<DataDesc desc = DataDesc_None>
 struct UIntType
 {
     uint data;
 };
 
-template<char const *desc = DataDesc_None>
+template<DataDesc desc = DataDesc_None>
 struct FloatType
 {
     FloatType(float val) :data(val) {}
     float data;
 };
 
-template<char const *desc = DataDesc_None>
+template<DataDesc desc = DataDesc_None>
 struct DoubleType
 {
     DoubleType() {}
@@ -237,7 +240,7 @@ struct DoubleType
     double data;
 };
 
-template<char const *desc = DataDesc_None>
+template<DataDesc desc = DataDesc_None>
 struct SIMDType
 {
     SIMDType() {}
@@ -245,7 +248,7 @@ struct SIMDType
     AsmJsSIMDValue data;
 };
 
-template<char const *desc = DataDesc_None>
+template<DataDesc desc = DataDesc_None>
 struct VarType
 {
     Js::Var data;
@@ -254,14 +257,16 @@ struct VarType
         AssertMsg(false, "Please specialize Fixup method for this Var type or use no-fixup allocator");
     }
 };
+
 template<>
-void VarType<DataDesc_InlineeFrameRecord_Constants>::Fixup(NativeCodeData::DataChunk* chunkList) 
+inline void VarType<DataDesc_InlineeFrameRecord_Constants>::Fixup(NativeCodeData::DataChunk* chunkList)
 {
     AssertMsg(false, "InlineeFrameRecord::constants contains Var from main process, should not fixup");
 }
 
 struct GlobalBailOutRecordDataTable;
-template<> void NativeCodeData::Array<GlobalBailOutRecordDataTable *>::Fixup(NativeCodeData::DataChunk* chunkList)
+template<>
+inline void NativeCodeData::Array<GlobalBailOutRecordDataTable *>::Fixup(NativeCodeData::DataChunk* chunkList)
 {
     NativeCodeData::AddFixupEntryForPointerArray(this, chunkList);
 }

+ 24 - 1
lib/Backend/PDataManager.cpp

@@ -7,6 +7,11 @@
 // Conditionally-compiled on x64 and arm
 #if PDATA_ENABLED
 
+#ifdef _WIN32
+// ----------------------------------------------------------------------------
+//  _WIN32 x64 unwind uses PDATA
+// ----------------------------------------------------------------------------
+
 void PDataManager::RegisterPdata(RUNTIME_FUNCTION* pdataStart, _In_ const ULONG_PTR functionStart, _In_ const ULONG_PTR functionEnd, _Out_ PVOID* pdataTable, ULONG entryCount, ULONG maxEntryCount)
 {
     BOOLEAN success = FALSE;
@@ -48,4 +53,22 @@ void PDataManager::UnregisterPdata(RUNTIME_FUNCTION* pdata)
         Assert(success);
     }
 }
-#endif
+
+#else  // !_WIN32
+// ----------------------------------------------------------------------------
+//  !_WIN32 x64 unwind uses .eh_frame
+// ----------------------------------------------------------------------------
+
+void PDataManager::RegisterPdata(RUNTIME_FUNCTION* pdataStart, _In_ const ULONG_PTR functionStart, _In_ const ULONG_PTR functionEnd, _Out_ PVOID* pdataTable, ULONG entryCount, ULONG maxEntryCount)
+{
+    __register_frame(pdataStart);
+    *pdataTable = pdataStart;
+}
+
+void PDataManager::UnregisterPdata(RUNTIME_FUNCTION* pdata)
+{
+    __deregister_frame(pdata);
+}
+
+#endif  // !_WIN32
+#endif  // PDATA_ENABLED

+ 131 - 2
lib/Backend/PrologEncoder.cpp

@@ -5,6 +5,11 @@
 #include "Backend.h"
 #include "PrologEncoderMD.h"
 
+#ifdef _WIN32
+// ----------------------------------------------------------------------------
+//  _WIN32 x64 unwind uses PDATA
+// ----------------------------------------------------------------------------
+
 void PrologEncoder::RecordNonVolRegSave()
 {
     requiredUnwindCodeNodeCount++;
@@ -175,12 +180,12 @@ BYTE *PrologEncoder::Finalize(BYTE *functionStart,
     pdata->runtimeFunction.EndAddress   = codeSize;
     pdata->runtimeFunction.UnwindData   = (DWORD)((pdataBuffer + sizeof(RUNTIME_FUNCTION)) - functionStart);
 
-    FinalizeUnwindInfo();
+    FinalizeUnwindInfo(functionStart, codeSize);
 
     return (BYTE *)&pdata->runtimeFunction;
 }
 
-void PrologEncoder::FinalizeUnwindInfo()
+void PrologEncoder::FinalizeUnwindInfo(BYTE *functionStart, DWORD codeSize)
 {
     pdata->unwindInfo.Version           = 1;
     pdata->unwindInfo.Flags             = 0;
@@ -213,3 +218,127 @@ BYTE *PrologEncoder::GetUnwindInfo()
 {
     return (BYTE *)&pdata->unwindInfo;
 }
+
+#else  // !_WIN32
+// ----------------------------------------------------------------------------
+//  !_WIN32 x64 unwind uses .eh_frame
+// ----------------------------------------------------------------------------
+
+static const int SMALL_EHFRAME_SIZE = 0x40;
+
+void PrologEncoder::EncodeSmallProlog(uint8 prologSize, size_t size)
+{
+    Assert(ehFrame == nullptr);
+
+    BYTE* buffer = AnewArray(alloc, BYTE, SMALL_EHFRAME_SIZE);
+    ehFrame = Anew(alloc, EhFrame, buffer, SMALL_EHFRAME_SIZE);
+
+    auto fde = ehFrame->GetFDE();
+
+    // prolog: push rbp
+    fde->cfi_advance_loc(1);                    // DW_CFA_advance_loc: 1
+    fde->cfi_def_cfa_offset(MachPtr * 2);       // DW_CFA_def_cfa_offset: 16
+    fde->cfi_offset(GetDwarfRegNum(LowererMDArch::GetRegFramePointer()), 2); // DW_CFA_offset: r6 (rbp) at cfa-16
+
+    ehFrame->End();
+}
+
+DWORD PrologEncoder::SizeOfPData()
+{
+    return ehFrame->Count();
+}
+
+BYTE* PrologEncoder::Finalize(BYTE *functionStart, DWORD codeSize, BYTE *pdataBuffer)
+{
+    auto fde = ehFrame->GetFDE();
+    fde->UpdateAddressRange(functionStart, codeSize);
+    return ehFrame->Buffer();
+}
+
+// TODO: We can also pre-calculate size needed based on #push/xmm/saves/stack allocs
+static const int JIT_EHFRAME_SIZE = 0x80;
+
+void PrologEncoder::Begin(size_t prologStartOffset)
+{
+    Assert(ehFrame == nullptr);
+    Assert(currentInstrOffset == 0);
+
+    BYTE* buffer = AnewArray(alloc, BYTE, JIT_EHFRAME_SIZE);
+    ehFrame = Anew(alloc, EhFrame, buffer, JIT_EHFRAME_SIZE);
+
+    currentInstrOffset = prologStartOffset;
+}
+
+void PrologEncoder::End()
+{
+    ehFrame->End();
+}
+
+void PrologEncoder::FinalizeUnwindInfo(BYTE *functionStart, DWORD codeSize)
+{
+    auto fde = ehFrame->GetFDE();
+    fde->UpdateAddressRange(functionStart, codeSize);
+}
+
+void PrologEncoder::EncodeInstr(IR::Instr *instr, unsigned __int8 size)
+{
+    auto fde = ehFrame->GetFDE();
+
+    uint8 unwindCodeOp = PrologEncoderMD::GetOp(instr);
+
+    Assert((currentInstrOffset + size) > currentInstrOffset);
+    currentInstrOffset += size;
+
+    switch (unwindCodeOp)
+    {
+    case UWOP_PUSH_NONVOL:
+    {
+        const uword advance = currentInstrOffset - cfiInstrOffset;
+        cfiInstrOffset = currentInstrOffset;
+        cfaWordOffset++;
+
+        fde->cfi_advance(advance);                              // DW_CFA_advance_loc: ?
+        fde->cfi_def_cfa_offset(cfaWordOffset * MachPtr);       // DW_CFA_def_cfa_offset: ??
+
+        const ubyte reg = PrologEncoderMD::GetNonVolRegToSave(instr) + 1;
+        fde->cfi_offset(GetDwarfRegNum(reg), cfaWordOffset);    // DW_CFA_offset: r? at cfa-??
+        break;
+    }
+
+    case UWOP_SAVE_XMM128:
+    {
+        // TODO
+        break;
+    }
+
+    case UWOP_ALLOC_SMALL:
+    case UWOP_ALLOC_LARGE:
+    {
+        size_t allocaSize = PrologEncoderMD::GetAllocaSize(instr);
+        Assert(allocaSize % MachPtr == 0);
+
+        size_t slots = allocaSize / MachPtr;
+        Assert(cfaWordOffset + slots > cfaWordOffset);
+
+        const uword advance = currentInstrOffset - cfiInstrOffset;
+        cfiInstrOffset = currentInstrOffset;
+        cfaWordOffset += slots;
+
+        fde->cfi_advance(advance);                          // DW_CFA_advance_loc: ?
+        fde->cfi_def_cfa_offset(cfaWordOffset * MachPtr);   // DW_CFA_def_cfa_offset: ??
+        break;
+    }
+
+    case UWOP_IGNORE:
+    {
+        return;
+    }
+
+    default:
+    {
+        AssertMsg(false, "PrologEncoderMD returned unsupported UnwindCodeOp.");
+    }
+    }
+}
+
+#endif  // !_WIN32

+ 49 - 2
lib/Backend/PrologEncoder.h

@@ -19,6 +19,11 @@ enum UnwindOp : unsigned __int8 {
     UWOP_SAVE_XMM128 =  8
 };
 
+#ifdef _WIN32
+// ----------------------------------------------------------------------------
+//  _WIN32 x64 unwind uses PDATA
+// ----------------------------------------------------------------------------
+
 class PrologEncoder
 {
 private:
@@ -147,11 +152,53 @@ public:
     //
     // Win8 PDATA registration.
     //
+    void Begin(size_t prologStartOffset) {}  // No op on _WIN32
+    void End() {}  // No op on _WIN32
     DWORD SizeOfUnwindInfo();
     BYTE *GetUnwindInfo();
-    void FinalizeUnwindInfo();
-
+    void FinalizeUnwindInfo(BYTE *functionStart, DWORD codeSize);
 private:
     UnwindCode *GetUnwindCode(unsigned __int8 nodeCount);
 
 };
+
+#else  // !_WIN32
+// ----------------------------------------------------------------------------
+//  !_WIN32 x64 unwind uses .eh_frame
+// ----------------------------------------------------------------------------
+#include "EhFrame.h"
+
+class PrologEncoder
+{
+private:
+    ArenaAllocator* alloc;
+    EhFrame* ehFrame;
+
+    size_t cfiInstrOffset;      // last cfi emit instr offset
+    size_t currentInstrOffset;  // current instr offset
+                                // currentInstrOffset - cfiInstrOffset == advance
+    unsigned cfaWordOffset;
+
+public:
+    PrologEncoder(ArenaAllocator *alloc)
+        : alloc(alloc), ehFrame(nullptr),
+          cfiInstrOffset(0), currentInstrOffset(0), cfaWordOffset(1)
+    {}
+
+    void RecordNonVolRegSave() {}
+    void RecordXmmRegSave() {}
+    void RecordAlloca(size_t size) {}
+    void EncodeInstr(IR::Instr *instr, uint8 size);
+
+    void EncodeSmallProlog(uint8 prologSize, size_t size);
+    DWORD SizeOfPData();
+    BYTE *Finalize(BYTE *functionStart, DWORD codeSize, BYTE *pdataBuffer);
+
+    void Begin(size_t prologStartOffset);
+    void End();
+    DWORD SizeOfUnwindInfo() { return SizeOfPData(); }
+    BYTE *GetUnwindInfo() { return ehFrame->Buffer(); }
+    void FinalizeUnwindInfo(BYTE *functionStart, DWORD codeSize);
+};
+
+#endif  // !_WIN32

+ 27 - 29
lib/Backend/amd64/LinearScanMD.cpp

@@ -238,13 +238,13 @@ LinearScanMD::GenerateBailOut(IR::Instr * instr, __in_ecount(registerSaveSymsCou
     Assert(static_cast<int>(registerSaveSymsCount) == static_cast<int>(RegNumCount-1));
 
     // Save registers used for parameters, and rax, if necessary, into the shadow space allocated for register parameters:
-    //     mov  [rsp + 16], rdx
-    //     mov  [rsp + 8], rcx
+    //     mov  [rsp + 16], RegArg1     (if branchConditionOpnd)
+    //     mov  [rsp + 8], RegArg0
     //     mov  [rsp], rax
-    for(RegNum reg = bailOutInfo->branchConditionOpnd ? RegRDX : RegRCX;
-        reg != RegNOREG;
-        reg = static_cast<RegNum>(reg - 1))
+    const RegNum regs[3] = { RegRAX, RegArg0, RegArg1 };
+    for (int i = (bailOutInfo->branchConditionOpnd ? 2 : 1); i >= 0; i--)
     {
+        RegNum reg = regs[i];
         StackSym *const stackSym = registerSaveSyms[reg - 1];
         if(!stackSym)
         {
@@ -253,7 +253,7 @@ LinearScanMD::GenerateBailOut(IR::Instr * instr, __in_ecount(registerSaveSymsCou
 
         const IRType regType = RegTypes[reg];
         Lowerer::InsertMove(
-            IR::SymOpnd::New(func->m_symTable->GetArgSlotSym(static_cast<Js::ArgSlot>(reg)), regType, func),
+            IR::SymOpnd::New(func->m_symTable->GetArgSlotSym(static_cast<Js::ArgSlot>(i + 1)), regType, func),
             IR::RegOpnd::New(stackSym, reg, regType, func),
             instr);
     }
@@ -261,44 +261,42 @@ LinearScanMD::GenerateBailOut(IR::Instr * instr, __in_ecount(registerSaveSymsCou
     if(bailOutInfo->branchConditionOpnd)
     {
         // Pass in the branch condition
-        //     mov  rdx, condition
+        //     mov  RegArg1, condition
         IR::Instr *const newInstr =
             Lowerer::InsertMove(
-                IR::RegOpnd::New(nullptr, RegRDX, bailOutInfo->branchConditionOpnd->GetType(), func),
+                IR::RegOpnd::New(nullptr, RegArg1, bailOutInfo->branchConditionOpnd->GetType(), func),
                 bailOutInfo->branchConditionOpnd,
                 instr);
         linearScan->SetSrcRegs(newInstr);
     }
 
-
     if (!func->IsOOPJIT())
     {
         // Pass in the bailout record
-        //     mov  rcx, bailOutRecord
+        //     mov  RegArg0, bailOutRecord
         Lowerer::InsertMove(
-            IR::RegOpnd::New(nullptr, RegRCX, TyMachPtr, func),
+            IR::RegOpnd::New(nullptr, RegArg0, TyMachPtr, func),
             IR::AddrOpnd::New(bailOutInfo->bailOutRecord, IR::AddrOpndKindDynamicBailOutRecord, func, true),
             instr);
-
     }
     else
     {
-        // move rcx, dataAddr
+        // move RegArg0, dataAddr
         Lowerer::InsertMove(
-            IR::RegOpnd::New(nullptr, RegRCX, TyMachPtr, func),
+            IR::RegOpnd::New(nullptr, RegArg0, TyMachPtr, func),
             IR::AddrOpnd::New(func->GetWorkItem()->GetWorkItemData()->nativeDataAddr, IR::AddrOpndKindDynamicNativeCodeDataRef, func),
             instr);
 
-        // mov rcx, [rcx]
+        // mov RegArg0, [RegArg0]
         Lowerer::InsertMove(
-            IR::RegOpnd::New(nullptr, RegRCX, TyMachPtr, func),
-            IR::IndirOpnd::New(IR::RegOpnd::New(nullptr, RegRCX, TyVar, this->func), 0, TyMachPtr, func),
+            IR::RegOpnd::New(nullptr, RegArg0, TyMachPtr, func),
+            IR::IndirOpnd::New(IR::RegOpnd::New(nullptr, RegArg0, TyVar, this->func), 0, TyMachPtr, func),
             instr);
 
-        // lea rcx, [rcx + bailoutRecord_offset]
+        // lea RegArg0, [RegArg0 + bailoutRecord_offset]
         int bailoutRecordOffset = NativeCodeData::GetDataTotalOffset(bailOutInfo->bailOutRecord);
-        Lowerer::InsertLea(IR::RegOpnd::New(nullptr, RegRCX, TyVar, this->func), 
-            IR::IndirOpnd::New(IR::RegOpnd::New(nullptr, RegRCX, TyVar, this->func), bailoutRecordOffset, TyMachPtr,
+        Lowerer::InsertLea(IR::RegOpnd::New(nullptr, RegArg0, TyVar, this->func),
+            IR::IndirOpnd::New(IR::RegOpnd::New(nullptr, RegArg0, TyVar, this->func), bailoutRecordOffset, TyMachPtr,
 #if DBG
             NativeCodeData::GetDataDescription(bailOutInfo->bailOutRecord, func->m_alloc),
 #endif
@@ -511,7 +509,7 @@ RegNum LinearScanMD::GetParamReg(IR::SymOpnd *symOpnd, Func *func)
                 switch (paramSym->GetParamSlotNum())
                 {
                 case 1:
-                    reg = RegRCX;
+                    reg = RegArg0;
                     break;
                 default:
                     Assert(UNREACHED);
@@ -522,13 +520,13 @@ RegNum LinearScanMD::GetParamReg(IR::SymOpnd *symOpnd, Func *func)
                 switch (paramSym->GetParamSlotNum())
                 {
                 case 1:
-                    reg = RegRDX;
+                    reg = RegArg1;
                     break;
                 case 2:
-                    reg = RegR8;
+                    reg = RegArg2;
                     break;
                 case 3:
-                    reg = RegR9;
+                    reg = RegArg3;
                     break;
                 }
             }
@@ -543,10 +541,10 @@ RegNum LinearScanMD::GetParamReg(IR::SymOpnd *symOpnd, Func *func)
             switch (paramSym->GetParamSlotNum())
             {
             case 1:
-                reg = RegRDX;
+                reg = RegArg0;
                 break;
             case 2:
-                reg = RegRCX;
+                reg = RegArg1;
                 break;
             }
         }
@@ -555,14 +553,14 @@ RegNum LinearScanMD::GetParamReg(IR::SymOpnd *symOpnd, Func *func)
             switch (paramSym->GetParamSlotNum())
             {
             case 1:
-                reg = RegR8;
+                reg = RegArg2;
                 break;
             case 2:
-                reg = RegR9;
+                reg = RegArg3;
                 break;
             }
         }
     }
 
     return reg;
-}
+}

+ 139 - 0
lib/Backend/amd64/LinearScanMdA.S

@@ -0,0 +1,139 @@
+//-------------------------------------------------------------------------------------------------------
+// Copyright (C) Microsoft. All rights reserved.
+// Licensed under the MIT license. See LICENSE.txt file in the project root for full license information.
+//-------------------------------------------------------------------------------------------------------
+
+.intel_syntax noprefix
+#include "unixasmmacros.inc"
+
+
+#ifndef __APPLE__
+// BailOutRecord::BailOut(BailOutRecord const * bailOutRecord)
+// .extern _ZN13BailOutRecord7BailOutEPKS_
+
+// BranchBailOutRecord::BailOut(BranchBailOutRecord const * bailOutRecord, BOOL cond)
+// .extern _ZN19BranchBailOutRecord7BailOutEPKS_i
+#endif
+
+
+//------------------------------------------------------------------------------
+// LinearScanMD::SaveAllRegisters(BailOutRecord *const bailOutRecord)
+
+.balign 16
+LEAF_ENTRY _ZN12LinearScanMD26SaveAllRegistersEP13BailOutRecord, _TEXT
+
+    // [rsp + 7 * 8] == saved rax
+    // [rsp + 8 * 8] == saved rdi
+    // [rsp + 9 * 8] == saved rsi
+    // rdi == bailOutRecord
+    // rsi == condition
+
+    mov rax, [rdi] // bailOutRecord->globalBailOutRecordDataTable
+    mov rax, [rax] // bailOutRecord->globalBailOutRecordDataTable->registerSaveSpace
+
+    // Save r8 first to free up a register
+    mov [rax + 8 * 8], r8
+
+    // Save the original values of rax, rdi, and rsi into the actual register save space
+    mov r8, [rsp + 7 * 8] // saved rax
+    mov [rax + 0 * 8], r8
+    mov r8, [rsp + 8 * 8] // saved rdi
+    mov [rax + 7 * 8], r8
+    mov r8, [rsp + 9 * 8] // saved rsi
+    mov [rax + 6 * 8], r8
+
+    // Save remaining registers
+    mov [rax + 1 * 8], rcx
+    mov [rax + 2 * 8], rdx
+    mov [rax + 3 * 8], rbx
+    // [rax + 4 * 8] == save space for rsp, which doesn't need to be saved since bailout uses rbp for stack access
+    mov [rax + 5 * 8], rbp
+    // mov [rax + 6 * 8], rsi   // rsi saved above
+    // mov [rax + 7 * 8], rdi   // rdi saved above
+    // mov [rax + 8 * 8], r8    // r8 was saved earlier
+    mov [rax + 9 * 8], r9
+    mov [rax + 10 * 8], r10
+    mov [rax + 11 * 8], r11
+    mov [rax + 12 * 8], r12
+    mov [rax + 13 * 8], r13
+    mov [rax + 14 * 8], r14
+    mov [rax + 15 * 8], r15
+
+    // Save all XMM regs (full width)
+    movups xmmword ptr [rax + 80h], xmm0         // [rax + 16 * 8 + 0 * 16] = xmm0
+    movups xmmword ptr [rax + 90h], xmm1         // [rax + 16 * 8 + 1 * 16] = xmm1
+    movups xmmword ptr [rax + 0a0h], xmm2        //  ...
+    // movups xmmword ptr [rax + 0b0h], xmm3    // xplat: WHY this one fails to compile...
+    movups xmmword ptr [rax + 11 * 16], xmm3
+    movups xmmword ptr [rax + 0c0h], xmm4
+    movups xmmword ptr [rax + 0d0h], xmm5
+    movups xmmword ptr [rax + 0e0h], xmm6
+    movups xmmword ptr [rax + 0f0h], xmm7
+    movups xmmword ptr [rax + 100h], xmm8
+    movups xmmword ptr [rax + 110h], xmm9
+    movups xmmword ptr [rax + 120h], xmm10
+    movups xmmword ptr [rax + 130h], xmm11
+    movups xmmword ptr [rax + 140h], xmm12
+    movups xmmword ptr [rax + 150h], xmm13
+    movups xmmword ptr [rax + 160h], xmm14
+    movups xmmword ptr [rax + 170h], xmm15       // [rax + 16 * 8 + 15 * 16] = xmm15
+
+    ret
+
+LEAF_END _ZN12LinearScanMD26SaveAllRegistersEP13BailOutRecord, _TEXT
+
+
+//------------------------------------------------------------------------------
+// LinearScanMD::SaveAllRegistersAndBailOut(BailOutRecord *const bailOutRecord)
+
+.balign 16
+NESTED_ENTRY _ZN12LinearScanMD26SaveAllRegistersAndBailOutEP13BailOutRecord, _TEXT, NoHandler
+
+    // We follow Custom calling convention
+    // [rsp + 1 * 8] == saved rax
+    // [rsp + 2 * 8] == saved rdi
+    // rdi == bailOutRecord
+
+    // Relative to this function, SaveAllRegisters expects:
+    //     [rsp + 3 * 8] == saved rsi
+    // Since rsi is not a parameter to this function, it won't be saved on the stack by jitted code, so copy it there now
+
+    mov [rsp + 3 * 8], rsi
+
+    sub rsp, 28h        // use the same as Windows x64 so register locations are the same
+    .cfi_adjust_cfa_offset 0x28
+
+    call _ZN12LinearScanMD26SaveAllRegistersEP13BailOutRecord
+
+    add rsp, 28h        // deallocate stack space
+    .cfi_adjust_cfa_offset -0x28
+
+    jmp _ZN13BailOutRecord7BailOutEPKS_
+
+NESTED_END _ZN12LinearScanMD26SaveAllRegistersAndBailOutEP13BailOutRecord, _TEXT
+
+
+//------------------------------------------------------------------------------
+// LinearScanMD::SaveAllRegistersAndBranchBailOut(BranchBailOutRecord *const bailOutRecord, const BOOL condition)
+
+.balign 16
+NESTED_ENTRY _ZN12LinearScanMD32SaveAllRegistersAndBranchBailOutEP19BranchBailOutRecordi, _TEXT, NoHandler
+
+    // We follow custom calling convention
+    // [rsp + 1 * 8] == saved rax
+    // [rsp + 2 * 8] == saved rdi
+    // [rsp + 3 * 8] == saved rsi
+    // rdi == bailOutRecord
+    // rsi == condition
+
+    sub rsp, 28h        // use the same as Windows x64 so register locations are the same
+    .cfi_adjust_cfa_offset 0x28
+
+    call _ZN12LinearScanMD26SaveAllRegistersEP13BailOutRecord
+
+    add rsp, 28h        // deallocate stack space
+    .cfi_adjust_cfa_offset -0x28
+
+    jmp _ZN19BranchBailOutRecord7BailOutEPKS_i
+
+NESTED_END _ZN12LinearScanMD32SaveAllRegistersAndBranchBailOutEP19BranchBailOutRecordi, _TEXT

+ 158 - 75
lib/Backend/amd64/LowererMDArch.cpp

@@ -186,7 +186,7 @@ LowererMDArch::LoadHeapArgsCached(IR::Instr *instrArgs)
     {
         instrArgs->m_opcode = Js::OpCode::MOV;
         instrArgs->ReplaceSrc1(IR::AddrOpnd::NewNull(func));
-        
+
         if (PHASE_TRACE1(Js::StackArgFormalsOptPhase) && func->GetJITFunctionBody()->GetInParamsCount() > 1)
         {
             Output::Print(_u("StackArgFormals : %s (%d) :Removing Heap Arguments object creation in Lowerer. \n"), instrArgs->m_func->GetJITFunctionBody()->GetDisplayName(), instrArgs->m_func->GetFunctionNumber());
@@ -203,7 +203,7 @@ LowererMDArch::LoadHeapArgsCached(IR::Instr *instrArgs)
         // s2 = actual argument count
         // s1 = current function
         // dst = JavascriptOperators::LoadArguments(s1, s2, s3, s4, s5, s6, s7)
-        
+
         // s7 = formals are let decls
         IR::Opnd * formalsAreLetDecls = IR::IntConstOpnd::New((IntConstType)(instrArgs->m_opcode == Js::OpCode::LdLetHeapArgsCached), TyUint8, func);
         this->LoadHelperArgument(instrArgs, formalsAreLetDecls);
@@ -317,7 +317,7 @@ LowererMDArch::LoadHeapArguments(IR::Instr *instrArgs, bool force /* = false */,
         // s2 = actual argument count
         // s1 = current function
         // dst = JavascriptOperators::LoadHeapArguments(s1, s2, s3, s4, s5, s6, s7)
-        
+
         // s7 = formals are let decls
         this->LoadHelperArgument(instrArgs, IR::IntConstOpnd::New(instrArgs->m_opcode == Js::OpCode::LdLetHeapArguments ? TRUE : FALSE, TyUint8, func));
 
@@ -612,7 +612,14 @@ LowererMDArch::LowerCallIDynamic(IR::Instr *callInstr, IR::Instr*saveThisArgOutI
 
     IR::Opnd    *funcObjOpnd = callInstr->UnlinkSrc1();
     GeneratePreCall(callInstr, funcObjOpnd, insertBeforeInstrForCFG);
-    LowerCall(callInstr, 0);
+
+    // Normally for dynamic calls we move 4 args to registers and push remaining
+    // args onto stack (Windows convention, and unchanged on xplat). We need to
+    // manully home 4 args. inlinees lower differently and follow platform ABI.
+    // So we need to manually home actualArgsCount + 2 args (function, callInfo).
+    const uint32 homeArgs = callInstr->m_func->IsInlinee() ?
+                                callInstr->m_func->actualCount + 2 : 4;
+    LowerCall(callInstr, homeArgs);
 
     return callInstr;
 }
@@ -752,7 +759,7 @@ LowererMDArch::LowerCallI(IR::Instr * callInstr, ushort callFlags, bool isHelper
     else if (insertBeforeInstrForCFG != nullptr)
     {
         RegNum dstReg = insertBeforeInstrForCFG->GetDst()->AsRegOpnd()->GetReg();
-        AssertMsg(dstReg == RegR8 || dstReg == RegR9, "NewScObject should insert the first Argument in R8/R9 only based on Spread call or not.");
+        AssertMsg(dstReg == RegArg2 || dstReg == RegArg3, "NewScObject should insert the first Argument in RegArg2/RegArg3 only based on Spread call or not.");
         insertBeforeInstrForCFGCheck = insertBeforeInstrForCFG;
     }
 
@@ -810,6 +817,23 @@ LowererMDArch::LowerCallPut(IR::Instr *callInstr)
     return nullptr;
 }
 
+static inline IRType ExtendHelperArg(IRType type)
+{
+#ifdef __clang__
+    // clang expects caller to extend arg size to int
+    switch (type)
+    {
+        case TyInt8:
+        case TyInt16:
+            return TyInt32;
+        case TyUint8:
+        case TyUint16:
+            return TyUint32;
+    }
+#endif
+    return type;
+}
+
 IR::Instr *
 LowererMDArch::LowerCall(IR::Instr * callInstr, uint32 argCount)
 {
@@ -857,22 +881,85 @@ LowererMDArch::LowerCall(IR::Instr * callInstr, uint32 argCount)
     //
 
     AssertMsg(this->helperCallArgsCount >= 0, "Fatal. helper call arguments ought to be positive");
-    AssertMsg(this->helperCallArgsCount < 255, "Too many helper call arguments");
+    AssertMsg(this->helperCallArgsCount < MaxArgumentsToHelper && MaxArgumentsToHelper < 255, "Too many helper call arguments");
 
     uint16 argsLeft = static_cast<uint16>(this->helperCallArgsCount);
 
+    // Sys V x64 ABI assigns int and xmm arg registers separately.
+    // e.g. args:   int, double, int, double, int, double
+    //  Windows:    int0, xmm1, int2, xmm3, stack, stack
+    //  Sys V:      int0, xmm0, int1, xmm1, int2, xmm2
+#ifdef _WIN32
+#define _V_ARG_INDEX(index) index
+#else
+    uint16 _vindex[MaxArgumentsToHelper];
+    {
+        uint16 intIndex = 1, doubleIndex = 1, stackIndex = IntArgRegsCount + 1;
+        for (int i = 0; i < this->helperCallArgsCount; i++)
+        {
+            IR::Opnd * helperSrc = this->helperCallArgs[this->helperCallArgsCount - 1 - i];
+            IRType type = helperSrc->GetType();
+            if (IRType_IsFloat(type) || IRType_IsSimd128(type))
+            {
+                if (doubleIndex <= XmmArgRegsCount)
+                {
+                    _vindex[i] = doubleIndex++;
+                }
+                else
+                {
+                    _vindex[i] = stackIndex++;
+                }
+            }
+            else
+            {
+                if (intIndex <= IntArgRegsCount)
+                {
+                    _vindex[i] = intIndex++;
+                }
+                else
+                {
+                    _vindex[i] = stackIndex++;
+                }
+            }
+        }
+    }
+#define _V_ARG_INDEX(index) _vindex[(index) - 1]
+#endif
+
     while (argsLeft > 0)
     {
         IR::Opnd * helperSrc = this->helperCallArgs[this->helperCallArgsCount - argsLeft];
-        StackSym * helperSym = m_func->m_symTable->GetArgSlotSym(static_cast<uint16>(argsLeft));
-        helperSym->m_type = helperSrc->GetType();
+        uint16 index = _V_ARG_INDEX(argsLeft);
+        StackSym * helperSym = m_func->m_symTable->GetArgSlotSym(index);
+        helperSym->m_type = ExtendHelperArg(helperSrc->GetType());
         Lowerer::InsertMove(
-            this->GetArgSlotOpnd(argsLeft, helperSym),
+            this->GetArgSlotOpnd(index, helperSym, /*isHelper*/true),
             helperSrc,
             callInstr);
         --argsLeft;
     }
 
+#ifndef _WIN32
+    // Manually home args
+    if (argCount > 0)
+    {
+        static const RegNum s_argRegs[IntArgRegsCount] = {
+    #define REG_INT_ARG(Index, Name)  Reg ## Name,
+    #include "RegList.h"
+        };
+
+        const int callArgCount = static_cast<int>(argCount) + this->helperCallArgsCount;
+        const int argRegs = min(callArgCount, static_cast<int>(IntArgRegsCount));
+        for (int i = argRegs - 1; i >= 0; i--)
+        {
+            StackSym * sym = this->m_func->m_symTable->GetArgSlotSym(static_cast<uint16>(i + 1));
+            Lowerer::InsertMove(
+                IR::SymOpnd::New(sym, TyMachReg, this->m_func),
+                IR::RegOpnd::New(nullptr, s_argRegs[i], TyMachReg, this->m_func),
+                callInstr);
+        }
+    }
+#endif
 
     //
     // load the address into a register because we cannot directly access 64 bit constants
@@ -909,7 +996,7 @@ LowererMDArch::LowerCall(IR::Instr * callInstr, uint32 argCount)
 // the first 4 arguments go in registers and the rest are on stack.
 //
 IR::Opnd *
-LowererMDArch::GetArgSlotOpnd(uint16 index, StackSym * argSym)
+LowererMDArch::GetArgSlotOpnd(uint16 index, StackSym * argSym, bool isHelper /*= false*/)
 {
     Assert(index != 0);
 
@@ -934,51 +1021,38 @@ LowererMDArch::GetArgSlotOpnd(uint16 index, StackSym * argSym)
     }
 
     IRType type = argSym ? argSym->GetType() : TyMachReg;
-    if (argPosition <= 4)
-    {
-        RegNum reg = RegNOREG;
+    const bool isFloatArg = IRType_IsFloat(type) || IRType_IsSimd128(type);
+    RegNum reg = RegNOREG;
 
-        if (IRType_IsFloat(type) || IRType_IsSimd128(type))
+    if (!isFloatArg && argPosition <= IntArgRegsCount)
+    {
+        switch (argPosition)
         {
-            switch (argPosition)
-            {
-            case 4:
-                reg = RegXMM3;
-                break;
-            case 3:
-                reg = RegXMM2;
-                break;
-            case 2:
-                reg = RegXMM1;
-                break;
-            case 1:
-                reg = RegXMM0;
-                break;
-            default:
-                Assume(UNREACHED);
-            }
+#define REG_INT_ARG(Index, Name)    \
+        case ((Index) + 1):         \
+            reg = Reg ## Name;      \
+            break;
+#include "RegList.h"
+        default:
+            Assume(UNREACHED);
         }
-        else
+    }
+    else if (isFloatArg && argPosition <= XmmArgRegsCount)
+    {
+        switch (argPosition)
         {
-            switch (argPosition)
-            {
-            case 4:
-                reg = RegR9;
-                break;
-            case 3:
-                reg = RegR8;
-                break;
-            case 2:
-                reg = RegRDX;
-                break;
-            case 1:
-                reg = RegRCX;
-                break;
-            default:
-                Assume(UNREACHED);
-            }
+#define REG_XMM_ARG(Index, Name)    \
+        case ((Index) + 1):         \
+            reg = Reg ## Name;      \
+            break;
+#include "RegList.h"
+        default:
+            Assume(UNREACHED);
         }
+    }
 
+    if (reg != RegNOREG)
+    {
         IR::RegOpnd *regOpnd = IR::RegOpnd::New(argSym, reg, type, m_func);
         regOpnd->m_isCallArg = true;
 
@@ -988,12 +1062,17 @@ LowererMDArch::GetArgSlotOpnd(uint16 index, StackSym * argSym)
     {
         if (argSym == nullptr)
         {
-            argSym = this->m_func->m_symTable->GetArgSlotSym(static_cast<uint16>(index));
+            argSym = this->m_func->m_symTable->GetArgSlotSym(index);
         }
 
-        //
-        // More than 4 arguments. Assign them to appropriate slots
-        //
+#ifndef _WIN32
+        // helper does not home args, adjust stack offset
+        if (isHelper)
+        {
+            const uint16 argIndex = index - IntArgRegsCount;
+            argSym->m_offset = (argIndex - 1) * MachPtr;
+        }
+#endif
 
         argSlotOpnd = IR::SymOpnd::New(argSym, type, this->m_func);
     }
@@ -1403,8 +1482,8 @@ LowererMDArch::LowerEntryInstr(IR::EntryInstr * entryInstr)
 
     if (Lowerer::IsArgSaveRequired(this->m_func))
     {
-        if (argSlotsForFunctionsCalled < 4)
-            argSlotsForFunctionsCalled = 4;
+        if (argSlotsForFunctionsCalled < IntArgRegsCount)
+            argSlotsForFunctionsCalled = IntArgRegsCount;
     }
     else
     {
@@ -1489,10 +1568,11 @@ LowererMDArch::LowerEntryInstr(IR::EntryInstr * entryInstr)
     IR::Instr *movRax0 = nullptr;
     IR::Opnd *raxOpnd = nullptr;
 
-    if (this->m_func->HasArgumentSlot() && (this->m_func->IsStackArgsEnabled() ||
-        this->m_func->IsJitInDebugMode() ||
-        // disabling apply inlining leads to explicit load from the zero-inited slot
-        this->m_func->GetJITFunctionBody()->IsInlineApplyDisabled())
+    if ((this->m_func->HasArgumentSlot() &&
+            (this->m_func->IsStackArgsEnabled() ||
+            this->m_func->IsJitInDebugMode() ||
+            // disabling apply inlining leads to explicit load from the zero-inited slot
+            this->m_func->GetJITFunctionBody()->IsInlineApplyDisabled()))
 #ifdef BAILOUT_INJECTION
         || Js::Configuration::Global.flags.IsEnabled(Js::BailOutFlag)
         || Js::Configuration::Global.flags.IsEnabled(Js::BailOutAtEveryLineFlag)
@@ -1551,6 +1631,7 @@ LowererMDArch::LowerEntryInstr(IR::EntryInstr * entryInstr)
     firstPrologInstr->InsertBefore(IR::PragmaInstr::New(Js::OpCode::PrologStart, 0, m_func));
     lastPrologInstr->InsertAfter(IR::PragmaInstr::New(Js::OpCode::PrologEnd, 0, m_func));
 
+#ifdef _WIN32 // home registers
     //
     // Now store all the arguments in the register in the stack slots
     //
@@ -1631,6 +1712,7 @@ LowererMDArch::LowerEntryInstr(IR::EntryInstr * entryInstr)
         this->MovArgFromReg2Stack(entryInstr, RegR8, 3);
         this->MovArgFromReg2Stack(entryInstr, RegR9, 4);
     }
+#endif  // _WIN32
 
     IntConstType frameSize = Js::Constants::MinStackJIT + stackArgsSize + stackLocalsSize + savedRegSize;
     this->GeneratePrologueStackProbe(entryInstr, frameSize);
@@ -1761,14 +1843,14 @@ LowererMDArch::GeneratePrologueStackProbe(IR::Instr *entryInstr, IntConstType fr
 
     IR::RegOpnd *target;
     {
-        // MOV rdx, scriptContext
+        // MOV RegArg1, scriptContext
         this->lowererMD->CreateAssign(
-            IR::RegOpnd::New(nullptr, RegRDX, TyMachReg, m_func),
+            IR::RegOpnd::New(nullptr, RegArg1, TyMachReg, m_func),
             this->lowererMD->m_lowerer->LoadScriptContextOpnd(insertInstr), insertInstr);
 
-        // MOV rcx, frameSize
+        // MOV RegArg0, frameSize
         this->lowererMD->CreateAssign(
-            IR::RegOpnd::New(nullptr, RegRCX, TyMachReg, this->m_func),
+            IR::RegOpnd::New(nullptr, RegArg0, TyMachReg, this->m_func),
             IR::AddrOpnd::New((void*)frameSize, IR::AddrOpndKindConstant, this->m_func), insertInstr);
 
         // MOV rax, ThreadContext::ProbeCurrentStack
@@ -2543,7 +2625,7 @@ LowererMDArch::EmitLoadInt32(IR::Instr *instrLoad, bool conversionFromObjectAllo
             // Need to bail out instead of calling a helper
             return true;
         }
-        
+
         if (conversionFromObjectAllowed)
         {
             lowererMD->m_lowerer->LowerUnaryHelperMem(instrLoad, IR::HelperConv_ToInt32);
@@ -2907,23 +2989,24 @@ LowererMDArch::LowerEHRegionReturn(IR::Instr * insertBeforeInstr, IR::Opnd * tar
     // Load the continuation address into the return register.
     insertBeforeInstr->InsertBefore(IR::Instr::New(Js::OpCode::MOV, retReg, targetOpnd, this->m_func));
 
-    // MOV r8, spillSize
-    IR::Instr *movR8 = IR::Instr::New(Js::OpCode::LdSpillSize,
-        IR::RegOpnd::New(nullptr, RegR8, TyMachReg, m_func),
+    // MOV REG_EH_SPILL_SIZE, spillSize
+    IR::Instr *movSpillSize = IR::Instr::New(Js::OpCode::LdSpillSize,
+        IR::RegOpnd::New(nullptr, REG_EH_SPILL_SIZE, TyMachReg, m_func),
         m_func);
-    insertBeforeInstr->InsertBefore(movR8);
+    insertBeforeInstr->InsertBefore(movSpillSize);
 
 
-    // MOV r9, argsSize
-    IR::Instr *movR9 = IR::Instr::New(Js::OpCode::LdArgSize,
-        IR::RegOpnd::New(nullptr, RegR9, TyMachReg, m_func),
+    // MOV REG_EH_ARGS_SIZE, argsSize
+    IR::Instr *movArgsSize = IR::Instr::New(Js::OpCode::LdArgSize,
+        IR::RegOpnd::New(nullptr, REG_EH_ARGS_SIZE, TyMachReg, m_func),
         m_func);
-    insertBeforeInstr->InsertBefore(movR9);
+    insertBeforeInstr->InsertBefore(movArgsSize);
 
-    // MOV rcx, amd64_ReturnFromCallWithFakeFrame
-    // PUSH rcx
+    // MOV REG_EH_TARGET, amd64_ReturnFromCallWithFakeFrame
+    // PUSH REG_EH_TARGET
     // RET
-    IR::Opnd *endCallWithFakeFrame = endCallWithFakeFrame = IR::RegOpnd::New(nullptr, RegRCX, TyMachReg, m_func);
+    IR::Opnd *endCallWithFakeFrame = endCallWithFakeFrame =
+        IR::RegOpnd::New(nullptr, REG_EH_TARGET, TyMachReg, m_func);
     IR::Instr *movTarget = IR::Instr::New(Js::OpCode::MOV,
         endCallWithFakeFrame,
         IR::HelperCallOpnd::New(IR::HelperOp_ReturnFromCallWithFakeFrame, m_func),

+ 4 - 1
lib/Backend/amd64/LowererMDArch.h

@@ -58,7 +58,7 @@ public:
         return Math::FitsInDWord((size_t)opnd->GetMemLoc());
     }
 
-    IR::Opnd *          GetArgSlotOpnd(Js::ArgSlot slotIndex, StackSym * argSym = nullptr);
+    IR::Opnd *          GetArgSlotOpnd(Js::ArgSlot slotIndex, StackSym * argSym = nullptr, bool isHelper = false);
     IR::Instr *         LoadNewScObjFirstArg(IR::Instr * instr, IR::Opnd * dst, ushort extraArgs = 0);
     IR::Instr *         LoadInputParamPtr(IR::Instr *instrInsert, IR::RegOpnd *optionalDstOpnd = nullptr);
     int32               LowerCallArgs(IR::Instr *callInstr, ushort callFlags, Js::ArgSlot extraParams = 1 /* for function object */, IR::IntConstOpnd **callInfoOpndRef = nullptr);
@@ -134,3 +134,6 @@ private:
     void                SetMaxArgSlots(Js::ArgSlot actualCount /*including this*/);
 };
 
+#define REG_EH_TARGET      RegArg0
+#define REG_EH_SPILL_SIZE  RegArg2
+#define REG_EH_ARGS_SIZE   RegArg3

+ 6 - 13
lib/Backend/amd64/PeepsMD.cpp

@@ -18,19 +18,12 @@ PeepsMD::ProcessImplicitRegs(IR::Instr *instr)
 {
     if (LowererMD::IsCall(instr))
     {
-        this->peeps->ClearReg(RegRAX);
-        this->peeps->ClearReg(RegRCX);
-        this->peeps->ClearReg(RegRDX);
-        this->peeps->ClearReg(RegR8);
-        this->peeps->ClearReg(RegR9);
-        this->peeps->ClearReg(RegR10);
-        this->peeps->ClearReg(RegR11);
-        this->peeps->ClearReg(RegXMM0);
-        this->peeps->ClearReg(RegXMM1);
-        this->peeps->ClearReg(RegXMM2);
-        this->peeps->ClearReg(RegXMM3);
-        this->peeps->ClearReg(RegXMM4);
-        this->peeps->ClearReg(RegXMM5);
+#define REGDAT(Name, Listing, Encode, Type, BitVec) \
+        if (!((BitVec) & (RA_CALLEESAVE | RA_DONTALLOCATE))) \
+        { \
+            this->peeps->ClearReg(Reg ## Name); \
+        }
+#include "RegList.h"
     }
     else if (instr->m_opcode == Js::OpCode::IMUL)
     {

+ 22 - 2
lib/Backend/amd64/Reg.h

@@ -14,9 +14,29 @@
 enum RegNum {
 #define REGDAT(Name, Listing,    Encode,    Type,    BitVec)  Reg ## Name,
 #include "RegList.h"
-#undef REGDAT
+    RegNumCount,
 
-    RegNumCount,  // Number of operations
+// alias RegArg0, RegArg1, ...
+#define REG_INT_ARG(Index, Name)  RegArg ## Index = Reg ## Name,
+#include "RegList.h"
+
+// alias RegXmmArg0, RegXmmArg1, ...
+#define REG_XMM_ARG(Index, Name)  RegXmmArg ## Index = Reg ## Name,
+#include "RegList.h"
+};
+
+// IntArgRegsCount
+enum _IntArgRegs {
+#define REG_INT_ARG(Index, Name)  _RegArg ## Index,
+#include "RegList.h"
+    IntArgRegsCount
+};
+
+// XmmArgRegsCount
+enum _XmmArgRegs {
+#define REG_XMM_ARG(Index, Name)  _RegXmmArg ## Index,
+#include "RegList.h"
+    XmmArgRegsCount
 };
 
 #define REGNUM_ISXMMXREG(r) ((r) >= RegXMM0 && (r) <= RegXMM15)

+ 83 - 0
lib/Backend/amd64/RegList.h

@@ -6,6 +6,10 @@
 // Name      Name     Encode    Type        BitVec
 //------------------------------------------------------------------------
 
+#ifndef REGDAT
+#define REGDAT(Name, Listing,    Encode,    Type,    BitVec)
+#endif
+
 // Illegal registers - must be first and have a value of 0
 
 //            Internal Name
@@ -15,6 +19,7 @@
 //        /      /        /          Type
 //       /      /        /          /           BitVec
 //      /      /        /          /           /
+#ifdef _WIN32
 REGDAT(NOREG, noreg,    0xf,    TyIllegal,    RA_DONTALLOCATE)
 REGDAT(RAX,   rax,      0,      TyInt64,      RA_CALLERSAVE | RA_BYTEABLE)
 REGDAT(RCX,   rcx,      1,      TyInt64,      RA_CALLERSAVE | RA_BYTEABLE)
@@ -49,3 +54,81 @@ REGDAT(XMM12, xmm12,    4,      TyFloat64,    RA_CALLEESAVE)
 REGDAT(XMM13, xmm13,    5,      TyFloat64,    RA_CALLEESAVE)
 REGDAT(XMM14, xmm14,    6,      TyFloat64,    RA_CALLEESAVE)
 REGDAT(XMM15, xmm15,    7,      TyFloat64,    RA_CALLEESAVE)
+
+#else  // System V x64
+REGDAT(NOREG, noreg,    0xf,    TyIllegal,    RA_DONTALLOCATE)
+REGDAT(RAX,   rax,      0,      TyInt64,      RA_CALLERSAVE | RA_BYTEABLE)
+REGDAT(RCX,   rcx,      1,      TyInt64,      RA_CALLERSAVE | RA_BYTEABLE)
+REGDAT(RDX,   rdx,      2,      TyInt64,      RA_CALLERSAVE | RA_BYTEABLE)
+REGDAT(RBX,   rbx,      3,      TyInt64,      RA_CALLEESAVE | RA_BYTEABLE)
+REGDAT(RSP,   rsp,      4,      TyInt64,      RA_DONTALLOCATE)
+REGDAT(RBP,   rbp,      5,      TyInt64,      RA_DONTALLOCATE)
+REGDAT(RSI,   rsi,      6,      TyInt64,      RA_CALLERSAVE)
+REGDAT(RDI,   rdi,      7,      TyInt64,      RA_CALLERSAVE)
+REGDAT(R8,    r8,       0,      TyInt64,      RA_CALLERSAVE | RA_BYTEABLE)
+REGDAT(R9,    r9,       1,      TyInt64,      RA_CALLERSAVE | RA_BYTEABLE)
+REGDAT(R10,   r10,      2,      TyInt64,      RA_CALLERSAVE | RA_BYTEABLE)
+REGDAT(R11,   r11,      3,      TyInt64,      RA_CALLERSAVE | RA_BYTEABLE)
+REGDAT(R12,   r12,      4,      TyInt64,      RA_CALLEESAVE | RA_BYTEABLE)
+REGDAT(R13,   r13,      5,      TyInt64,      RA_CALLEESAVE | RA_BYTEABLE)
+REGDAT(R14,   r14,      6,      TyInt64,      RA_CALLEESAVE | RA_BYTEABLE)
+REGDAT(R15,   r15,      7,      TyInt64,      RA_CALLEESAVE | RA_BYTEABLE)
+
+REGDAT(XMM0,  xmm0,     0,      TyFloat64,    0)
+REGDAT(XMM1,  xmm1,     1,      TyFloat64,    0)
+REGDAT(XMM2,  xmm2,     2,      TyFloat64,    0)
+REGDAT(XMM3,  xmm3,     3,      TyFloat64,    0)
+REGDAT(XMM4,  xmm4,     4,      TyFloat64,    0)
+REGDAT(XMM5,  xmm5,     5,      TyFloat64,    0)
+REGDAT(XMM6,  xmm6,     6,      TyFloat64,    0)
+REGDAT(XMM7,  xmm7,     7,      TyFloat64,    0)
+REGDAT(XMM8,  xmm8,     0,      TyFloat64,    0)
+REGDAT(XMM9,  xmm9,     1,      TyFloat64,    0)
+REGDAT(XMM10, xmm10,    2,      TyFloat64,    0)
+REGDAT(XMM11, xmm11,    3,      TyFloat64,    0)
+REGDAT(XMM12, xmm12,    4,      TyFloat64,    0)
+REGDAT(XMM13, xmm13,    5,      TyFloat64,    0)
+REGDAT(XMM14, xmm14,    6,      TyFloat64,    0)
+REGDAT(XMM15, xmm15,    7,      TyFloat64,    0)
+#endif  // !_WIN32
+
+#ifndef REG_INT_ARG
+#define REG_INT_ARG(Index, Name)
+#endif
+
+#ifndef REG_XMM_ARG
+#define REG_XMM_ARG(Index, Name)
+#endif
+
+#ifdef _WIN32
+REG_INT_ARG(0, RCX)
+REG_INT_ARG(1, RDX)
+REG_INT_ARG(2, R8)
+REG_INT_ARG(3, R9)
+
+REG_XMM_ARG(0, XMM0)
+REG_XMM_ARG(1, XMM1)
+REG_XMM_ARG(2, XMM2)
+REG_XMM_ARG(3, XMM3)
+
+#else  // System V x64
+REG_INT_ARG(0, RDI)
+REG_INT_ARG(1, RSI)
+REG_INT_ARG(2, RDX)
+REG_INT_ARG(3, RCX)
+REG_INT_ARG(4, R8)
+REG_INT_ARG(5, R9)
+
+REG_XMM_ARG(0, XMM0)
+REG_XMM_ARG(1, XMM1)
+REG_XMM_ARG(2, XMM2)
+REG_XMM_ARG(3, XMM3)
+REG_XMM_ARG(4, XMM4)
+REG_XMM_ARG(5, XMM5)
+REG_XMM_ARG(6, XMM6)
+REG_XMM_ARG(7, XMM7)
+#endif  // !_WIN32
+
+#undef REGDAT
+#undef REG_INT_ARG
+#undef REG_XMM_ARG

+ 94 - 0
lib/Backend/amd64/Thunks.S

@@ -0,0 +1,94 @@
+//-------------------------------------------------------------------------------------------------------
+// Copyright (C) Microsoft. All rights reserved.
+// Licensed under the MIT license. See LICENSE.txt file in the project root for full license information.
+//-------------------------------------------------------------------------------------------------------
+
+.intel_syntax noprefix
+#include "unixasmmacros.inc"
+
+
+//============================================================================================================
+// Fake __chkstk
+//============================================================================================================
+.balign 16
+LEAF_ENTRY __chkstk, _TEXT
+        ret
+LEAF_END __chkstk, _TEXT
+
+//============================================================================================================
+// NativeCodeGenerator::CheckCodeGenThunk
+//============================================================================================================
+
+//.extern _ZN19NativeCodeGenerator12CheckCodeGenEPN2Js14ScriptFunctionE
+.balign 16
+NESTED_ENTRY _ZN19NativeCodeGenerator17CheckCodeGenThunkEPN2Js16RecyclableObjectENS0_8CallInfoEz, _TEXT, NoHandler
+        push_nonvol_reg rbp
+        lea  rbp, [rsp]
+
+        // save argument registers used by custom calling convention
+        push_register rdi
+        push_register rsi
+
+        //  Js::JavascriptMethod NativeCodeGenerator::CheckCodeGen(
+        //              Js::ScriptFunction * function)
+        //
+        //      RDI == function, setup by custom calling convention
+        call C_FUNC(_ZN19NativeCodeGenerator12CheckCodeGenEPN2Js14ScriptFunctionE)
+
+        pop_register rsi
+        pop_register rdi
+        pop_nonvol_reg rbp
+
+        jmp rax
+NESTED_END _ZN19NativeCodeGenerator17CheckCodeGenThunkEPN2Js16RecyclableObjectENS0_8CallInfoEz, _TEXT
+
+
+//============================================================================================================
+// NativeCodeGenerator::CheckAsmJsCodeGenThunk
+//============================================================================================================
+
+//.extern _ZN19NativeCodeGenerator17CheckAsmJsCodeGenEPN2Js14ScriptFunctionE
+.balign 16
+NESTED_ENTRY _ZN19NativeCodeGenerator22CheckAsmJsCodeGenThunkEPN2Js16RecyclableObjectENS0_8CallInfoEz, _TEXT, NoHandler
+        push_nonvol_reg rbp             // push rbp and adjust CFA offset
+        lea  rbp, [rsp]
+
+        set_cfa_register rbp, (2*8)     // Set to compute CFA as: rbp + 16 (sizeof: [rbp] [ReturnAddress])
+
+        // save argument registers used by custom calling convention
+        push rdi
+        push rsi
+        push rdx
+        push rcx
+        push r8
+        push r9
+
+        sub rsp, 40h
+
+        // ----- TODO: potentially xmm0-xmm7 args
+        // spill potential floating point arguments to stack
+        movaps xmmword ptr [rsp + 00h], xmm0
+        movaps xmmword ptr [rsp + 10h], xmm1
+        movaps xmmword ptr [rsp + 20h], xmm2
+        movaps xmmword ptr [rsp + 30h], xmm3
+
+        call _ZN19NativeCodeGenerator17CheckAsmJsCodeGenEPN2Js14ScriptFunctionE
+
+        // restore potential floating point arguments from stack
+        movaps xmm0, xmmword ptr [rsp + 00h]
+        movaps xmm1, xmmword ptr [rsp + 10h]
+        movaps xmm2, xmmword ptr [rsp + 20h]
+        movaps xmm3, xmmword ptr [rsp + 30h]
+
+        add rsp, 40h
+
+        pop r9
+        pop r8
+        pop rcx
+        pop rdx
+        pop rsi
+        pop rdi
+
+        pop_nonvol_reg rbp
+        jmp rax
+NESTED_END _ZN19NativeCodeGenerator22CheckAsmJsCodeGenThunkEPN2Js16RecyclableObjectENS0_8CallInfoEz, _TEXT

+ 14 - 0
lib/Common/Common/Int32Math.cpp

@@ -176,17 +176,31 @@ Int32Math::Not(int32 val, int32 *pResult)
 bool
 Int32Math::Inc(int32 val, int32 *pResult)
 {
+#ifdef _MSC_VER
     *pResult = val + 1;
     // Overflow if result ends up less than input
     return *pResult <= val;
+#elif defined(__APPLE__)
+    *pResult = val + 1;
+    return val == INT32_MAX; // Overflow if val was int max
+#else
+    return __builtin_add_overflow(val, 1, pResult);
+#endif
 }
 
 bool
 Int32Math::Dec(int32 val, int32 *pResult)
 {
+#ifdef _MSC_VER
     *pResult = val - 1;
     // Overflow if result ends up greater than input
     return *pResult >= val;
+#elif defined(__APPLE__)
+    *pResult = val - 1;
+    return val == INT32_MIN; // Overflow if val was int min
+#else
+    return __builtin_sub_overflow(val, 1, pResult);
+#endif
 }
 
 int32

+ 6 - 2
lib/Common/CommonDefines.h

@@ -183,6 +183,12 @@
 #endif
 #endif
 
+#if ENABLE_NATIVE_CODEGEN
+#ifdef _WIN32
+#define ENABLE_OOP_NATIVE_CODEGEN 1     // Out of process JIT
+#endif
+#endif
+
 // Other features
 // #define CHAKRA_CORE_DOWN_COMPAT 1
 
@@ -576,7 +582,6 @@
 #endif
 #endif
 
-#if _WIN32 || _WIN64
 #if _M_IX86
 #define I386_ASM 1
 #endif //_M_IX86
@@ -590,7 +595,6 @@
 #define ALLOC_XDATA (false)
 #endif
 #endif
-#endif // _WIN32 || _WIN64
 
 #ifndef _WIN32
 #define DISABLE_SEH 1

+ 1 - 3
lib/Common/Memory/CustomHeap.h

@@ -115,9 +115,7 @@ struct Allocation
         }
         return allocator;
     }
-
 #endif
-
 };
 
 // Wrapper for the two HeapPageAllocator with and without the prereserved segment.
@@ -400,7 +398,7 @@ public:
     void DecommitAll();
     void FreeAll();
     bool IsInHeap(__in void* address);
-   
+
     // A page should be in full list if:
     // 1. It does not have any space
     // 2. Parent segment cannot allocate any more XDATA

+ 8 - 6
lib/Common/Memory/PageAllocator.cpp

@@ -30,6 +30,14 @@ SegmentBase<T>::~SegmentBase()
 {
     Assert(this->allocator != nullptr);
 
+    // Cleanup secondaryAllocator before releasing pages so the destructor
+    // still has access to segment memory.
+    if(this->secondaryAllocator)
+    {
+        this->secondaryAllocator->Delete();
+        this->secondaryAllocator = nullptr;
+    }
+
     if (this->address)
     {
         char* originalAddress = this->address - (leadingGuardPageCount * AutoSystemInfo::PageSize);
@@ -39,12 +47,6 @@ SegmentBase<T>::~SegmentBase()
         RecyclerWriteBarrierManager::OnSegmentFree(this->address, this->segmentPageCount);
 #endif
     }
-
-    if(this->secondaryAllocator)
-    {
-        this->secondaryAllocator->Delete();
-        this->secondaryAllocator = nullptr;
-    }
 }
 
 template<typename T>

+ 19 - 0
lib/Common/Memory/amd64/XDataAllocator.cpp

@@ -66,9 +66,17 @@ bool XDataAllocator::Alloc(ULONG_PTR functionStart, DWORD functionSize, ushort p
     }
     else
     {
+        xdata->address = nullptr;
         OUTPUT_TRACE(Js::XDataAllocatorPhase, _u("No space for XDATA.\n"));
     }
 
+#ifndef _WIN32
+    if (xdata->address)
+    {
+        ClearHead(xdata->address);  // mark empty .eh_frame
+    }
+#endif
+
     return xdata->address != nullptr;
 }
 
@@ -111,6 +119,7 @@ void XDataAllocator::ClearFreeList()
 /* static */
 void XDataAllocator::Register(XDataAllocation * xdataInfo, ULONG_PTR functionStart, DWORD functionSize)
 {
+#ifdef _WIN32
     ULONG_PTR baseAddress = functionStart;
     xdataInfo->pdata.BeginAddress = (DWORD)(functionStart - baseAddress);
     xdataInfo->pdata.EndAddress = (DWORD)(xdataInfo->pdata.BeginAddress + functionSize);
@@ -143,11 +152,17 @@ void XDataAllocator::Register(XDataAllocation * xdataInfo, ULONG_PTR functionSta
     RUNTIME_FUNCTION  *runtimeFunction = RtlLookupFunctionEntry((DWORD64)functionStart, &imageBase, nullptr);
     Assert(runtimeFunction != NULL);
 #endif
+
+#else  // !_WIN32
+    Assert(ReadHead(xdataInfo->address));  // should be non-empty .eh_frame
+    __register_frame(xdataInfo->address);
+#endif
 }
 
 /* static */
 void XDataAllocator::Unregister(XDataAllocation * xdataInfo)
 {
+#ifdef _WIN32
     // Delete the table
     if (AutoSystemInfo::Data.IsWin8OrLater())
     {
@@ -159,4 +174,8 @@ void XDataAllocator::Unregister(XDataAllocation * xdataInfo)
         Assert(success);
     }
 
+#else  // !_WIN32
+    Assert(ReadHead(xdataInfo->address));  // should be non-empty .eh_frame
+    __deregister_frame(xdataInfo->address);
+#endif
 }

+ 28 - 3
lib/Common/Memory/amd64/XDataAllocator.h

@@ -7,9 +7,18 @@ CompileAssert(false)
 #endif
 #pragma once
 
+#ifndef _WIN32
+extern "C" void __register_frame(const void* ehframe);
+extern "C" void __deregister_frame(const void* ehframe);
+#endif
+
 namespace Memory
 {
+#ifdef _WIN32
 #define XDATA_SIZE (72)
+#else
+#define XDATA_SIZE (0x80)
+#endif
 
 struct XDataAllocation : public SecondaryAllocation
 {
@@ -24,8 +33,11 @@ struct XDataAllocation : public SecondaryAllocation
     {
         address = nullptr;
     }
+
+#ifdef _WIN32
     RUNTIME_FUNCTION pdata;
     FunctionTableHandle functionTable;
+#endif
 };
 
 //
@@ -69,15 +81,28 @@ public:
     void Release(const SecondaryAllocation& address);
     bool CanAllocate();
 
-    static void XDataAllocator::Register(XDataAllocation * xdataInfo, ULONG_PTR functionStart, DWORD functionSize);
+    static void Register(XDataAllocation * xdataInfo, ULONG_PTR functionStart, DWORD functionSize);
     static void Unregister(XDataAllocation * xdataInfo);
 
 // -------- Private helpers ---------/
 private:
     BYTE* End() { return start + size; }
 
-    void ClearFreeList();
-    void PreparePdata(XDataAllocation* const xdata, ULONG_PTR functionStart, DWORD functionSize);
+#ifndef _WIN32
+    // Read .eh_frame data head (length record). 0 means empty.
+    static uint32 ReadHead(const void* p)
+    {
+        return *reinterpret_cast<const uint32*>(p);
+    }
 
+    // Clear .eh_frame data head (length record). Set to 0 to mark empty.
+    static void ClearHead(void* p)
+    {
+        *reinterpret_cast<uint32*>(p) = 0;
+    }
+#endif
+
+    void ClearFreeList();
 };
+
 }

+ 100 - 0
lib/JITClient/JITManager.h

@@ -5,6 +5,7 @@
 
 #pragma once
 
+#if ENABLE_OOP_NATIVE_CODEGEN
 class JITManager
 {
 public:
@@ -96,3 +97,102 @@ private:
 
     static JITManager s_jitManager;
 };
+
+#else  // !ENABLE_OOP_NATIVE_CODEGEN
+class JITManager
+{
+public:
+    HRESULT ConnectRpcServer(__in HANDLE jitProcessHandle, __in_opt void* serverSecurityDescriptor, __in UUID connectionUuid)
+        { Assert(false); return E_FAIL; }
+
+    bool IsConnected() const { return false; }
+    bool IsJITServer() const { return false; }
+    void SetIsJITServer() { Assert(false); }
+    bool IsOOPJITEnabled() const { return false; }
+    void EnableOOPJIT() { Assert(false); }
+
+    HANDLE GetJITTargetHandle() const
+        { Assert(false); return HANDLE(); }
+
+    HRESULT InitializeThreadContext(
+        __in ThreadContextDataIDL * data,
+        __out intptr_t *threadContextInfoAddress,
+        __out intptr_t *prereservedRegionAddr)
+        { Assert(false); return E_FAIL; }
+
+    HRESULT CleanupThreadContext(
+        __in intptr_t threadContextInfoAddress)
+        { Assert(false); return E_FAIL; }
+
+    HRESULT AddPropertyRecordArray(
+        __in intptr_t threadContextInfoAddress,
+        __in unsigned int count,
+        __in PropertyRecordIDL ** propertyRecordArray)
+        { Assert(false); return E_FAIL; }
+
+    HRESULT AddDOMFastPathHelper(
+        __in intptr_t scriptContextInfoAddress,
+        __in intptr_t funcInfoAddr,
+        __in int helper)
+        { Assert(false); return E_FAIL; }
+
+    HRESULT AddModuleRecordInfo(
+            /* [in] */ intptr_t scriptContextInfoAddress,
+            /* [in] */ unsigned int moduleId,
+            /* [in] */ intptr_t localExportSlotsAddr)
+        { Assert(false); return E_FAIL; }
+
+    HRESULT SetWellKnownHostTypeId(
+        __in  intptr_t threadContextRoot,
+        __in  int typeId)
+        { Assert(false); return E_FAIL; }
+
+    HRESULT InitializeScriptContext(
+        __in ScriptContextDataIDL * data,
+        __out intptr_t *scriptContextInfoAddress)
+        { Assert(false); return E_FAIL; }
+
+    HRESULT CleanupProcess()
+        { Assert(false); return E_FAIL; }
+
+    HRESULT CleanupScriptContext(
+        __in intptr_t scriptContextInfoAddress)
+        { Assert(false); return E_FAIL; }
+
+    HRESULT CloseScriptContext(
+        __in intptr_t scriptContextInfoAddress)
+        { Assert(false); return E_FAIL; }
+
+    HRESULT FreeAllocation(
+        __in intptr_t threadContextInfoAddress,
+        __in intptr_t address)
+        { Assert(false); return E_FAIL; }
+
+    HRESULT SetIsPRNGSeeded(
+        __in intptr_t scriptContextInfoAddress,
+        __in boolean value)
+        { Assert(false); return E_FAIL; }
+
+    HRESULT IsNativeAddr(
+        __in intptr_t threadContextInfoAddress,
+        __in intptr_t address,
+        __out boolean * result)
+        { Assert(false); return E_FAIL; }
+
+    HRESULT RemoteCodeGenCall(
+        __in CodeGenWorkItemIDL *workItemData,
+        __in intptr_t threadContextInfoAddress,
+        __in intptr_t scriptContextInfoAddress,
+        __out JITOutputIDL *jitData)
+        { Assert(false); return E_FAIL; }
+
+    HRESULT Shutdown()
+        { Assert(false); return E_FAIL; }
+
+    static JITManager * GetJITManager()
+        { return &s_jitManager; }
+
+private:
+    static JITManager s_jitManager;
+};
+#endif  // !ENABLE_OOP_NATIVE_CODEGEN

+ 3 - 3
lib/Runtime/Base/CallInfo.h

@@ -32,7 +32,7 @@ namespace Js
         explicit CallInfo(ushort count)
             : Flags(CallFlags_None)
             , Count(count)
-#ifdef _WIN64
+#ifdef TARGET_64
             , unused(0)
 #endif
         {
@@ -41,7 +41,7 @@ namespace Js
         CallInfo(CallFlags flags, ushort count)
             : Flags(flags)
             , Count(count)
-#ifdef _WIN64
+#ifdef TARGET_64
             , unused(0)
 #endif
         {
@@ -58,7 +58,7 @@ namespace Js
         //
         unsigned  Count : 24;
         CallFlags Flags : 8;
-#ifdef _WIN64
+#ifdef TARGET_64
         unsigned unused : 32;
 #endif
 

+ 37 - 17
lib/Runtime/Base/FunctionBody.cpp

@@ -596,7 +596,7 @@ namespace Js
         return static_cast<PropertyIdArray*>(this->GetAuxPtrWithLock(AuxPointerType::FormalsPropIdArray));
     }
 
-    void 
+    void
     FunctionBody::SetFormalsPropIdArray(PropertyIdArray * propIdArray)
     {
         AssertMsg(propIdArray == nullptr || this->GetAuxPtrWithLock(AuxPointerType::FormalsPropIdArray) == nullptr, "Already set?");
@@ -978,7 +978,7 @@ namespace Js
         this->GetBoundPropertyRecords()->Item(pid, propRecord);
 
         return pid;
-    }  
+    }
 
     SmallSpanSequence::SmallSpanSequence()
         : pStatementBuffer(nullptr),
@@ -2893,7 +2893,7 @@ namespace Js
     BOOL FunctionBody::IsNativeOriginalEntryPoint() const
     {
 #if ENABLE_NATIVE_CODEGEN
-        return this->GetScriptContext()->IsNativeAddress(this->originalEntryPoint);
+        return this->GetScriptContext()->IsNativeAddress((void*)this->originalEntryPoint);
 #else
         return false;
 #endif
@@ -2962,12 +2962,11 @@ namespace Js
         return IsIntermediateCodeGenThunk(directEntryPoint) || originalEntryPoint == directEntryPoint
 #if ENABLE_PROFILE_INFO
             || (directEntryPoint == DynamicProfileInfo::EnsureDynamicProfileInfoThunk &&
-            this->IsFunctionBody() && this->GetFunctionBody()->IsNativeOriginalEntryPoint()
+            this->IsFunctionBody() && this->GetFunctionBody()->IsNativeOriginalEntryPoint())
 #ifdef ASMJS_PLAT
             || (GetFunctionBody()->GetIsAsmJsFunction() && directEntryPoint == AsmJsDefaultEntryThunk)
-            || (IsAsmJsCodeGenThunk(directEntryPoint))
+            || IsAsmJsCodeGenThunk(directEntryPoint)
 #endif
-            );
 #endif
         ;
     }
@@ -3151,7 +3150,7 @@ namespace Js
 #endif
 
 #if ENABLE_NATIVE_CODEGEN
-    void FunctionBody::SetNativeEntryPoint(FunctionEntryPointInfo* entryPointInfo, JavascriptMethod originalEntryPoint, Var directEntryPoint)
+    void FunctionBody::SetNativeEntryPoint(FunctionEntryPointInfo* entryPointInfo, JavascriptMethod originalEntryPoint, JavascriptMethod directEntryPoint)
     {
         if(entryPointInfo->nativeEntryPointProcessed)
         {
@@ -3173,7 +3172,7 @@ namespace Js
         }
         else
         {
-            entryPointInfo->jsMethod = reinterpret_cast<Js::JavascriptMethod>(directEntryPoint);
+            entryPointInfo->jsMethod = directEntryPoint;
         }
         if (isAsmJs)
         {
@@ -3255,7 +3254,7 @@ namespace Js
         Assert(reinterpret_cast<void*>(entryPointInfo->jsMethod) == nullptr);
         entryPointInfo->jsMethod = entryPoint;
 
-        ((Js::LoopEntryPointInfo*)entryPointInfo)->totalJittedLoopIterations = 
+        ((Js::LoopEntryPointInfo*)entryPointInfo)->totalJittedLoopIterations =
             static_cast<uint8>(
                 min(
                     static_cast<uint>(static_cast<uint8>(CONFIG_FLAG(MinBailOutsBeforeRejitForLoops))) *
@@ -5892,7 +5891,7 @@ namespace Js
                 // move back to the interpreter, the original entry point is going to be the dynamic interpreter thunk
                 originalEntryPoint =
                     m_dynamicInterpreterThunk
-                        ? static_cast<JavascriptMethod>(InterpreterThunkEmitter::ConvertToEntryPoint(m_dynamicInterpreterThunk))
+                        ? reinterpret_cast<JavascriptMethod>(InterpreterThunkEmitter::ConvertToEntryPoint(m_dynamicInterpreterThunk))
                         : DefaultEntryThunk;
 #else
                 originalEntryPoint = DefaultEntryThunk;
@@ -7787,7 +7786,7 @@ namespace Js
     void EntryPointInfo::EnsureIsReadyToCall()
     {
         ProcessJitTransferData();
-        
+
 #if !FLOATVAR
         if (this->numberPageSegments)
         {
@@ -8338,7 +8337,7 @@ namespace Js
             int index = this->inlineeFrameMap->BinarySearch([=](const NativeOffsetInlineeFramePair& pair, int index) {
                 if (pair.offset >= offset)
                 {
-                    if (index == 0 || index > 0 && this->inlineeFrameMap->Item(index - 1).offset < offset)
+                    if (index == 0 || (index > 0 && this->inlineeFrameMap->Item(index - 1).offset < offset))
                     {
                         return 0;
                     }
@@ -8375,7 +8374,7 @@ namespace Js
 
                 if (item.offset >= offset)
                 {
-                    if (midIndex == 0 || midIndex > 0 && offsets[midIndex - 1].offset < offset)
+                    if (midIndex == 0 || (midIndex > 0 && offsets[midIndex - 1].offset < offset))
                     {
                         if (offsets[midIndex].recordOffset == NativeOffsetInlineeFrameRecordOffset::InvalidRecordOffset)
                         {
@@ -8410,7 +8409,7 @@ namespace Js
             // find the closest entry which is greater than the current offset.
             if (record.offset >= offset)
             {
-                if (index == 0 || index > 0 && this->bailoutRecordMap->Item(index - 1).offset < offset)
+                if (index == 0 || (index > 0 && this->bailoutRecordMap->Item(index - 1).offset < offset))
                 {
                     return 0;
                 }
@@ -8466,7 +8465,7 @@ namespace Js
                 }
                 else
                 {
-                    HeapDeletePlus(offsetof(PinnedTypeRefsIDL, typeRefs) + sizeof(void*)*jitTransferData->runtimeTypeRefs->count - sizeof(PinnedTypeRefsIDL), 
+                    HeapDeletePlus(offsetof(PinnedTypeRefsIDL, typeRefs) + sizeof(void*)*jitTransferData->runtimeTypeRefs->count - sizeof(PinnedTypeRefsIDL),
                         jitTransferData->runtimeTypeRefs);
                 }
                 jitTransferData->runtimeTypeRefs = nullptr;
@@ -8680,6 +8679,28 @@ namespace Js
     {
         if (this->GetState() != CleanedUp)
         {
+            // Unregister xdataInfo before OnCleanup() which may release xdataInfo->address
+#if ENABLE_NATIVE_CODEGEN
+#if defined(_M_X64)
+            if (this->xdataInfo != nullptr)
+            {
+                XDataAllocator::Unregister(this->xdataInfo);
+                HeapDelete(this->xdataInfo);
+                this->xdataInfo = nullptr;
+            }
+#elif defined(_M_ARM32_OR_ARM64)
+            if (this->xdataInfo != nullptr)
+            {
+                XDataAllocator::Unregister(this->xdataInfo);
+                if (JITManager::GetJITManager()->IsOOPJITEnabled())
+                {
+                    HeapDelete(this->xdataInfo);
+                }
+                this->xdataInfo = nullptr;
+            }
+#endif
+#endif
+
             this->OnCleanup(isShutdown);
 
 #if ENABLE_NATIVE_CODEGEN
@@ -8710,7 +8731,6 @@ namespace Js
             {
                 this->constructorCaches->Clear();
             }
-
 #endif
 
             // This is how we set the CleanedUp state
@@ -9174,7 +9194,7 @@ namespace Js
                 // that are using the simple JIT code, and update the original entry point as necessary as well.
                 const JavascriptMethod newOriginalEntryPoint =
                     functionBody->GetDynamicInterpreterEntryPoint()
-                        ?   static_cast<JavascriptMethod>(
+                        ?   reinterpret_cast<JavascriptMethod>(
                                 InterpreterThunkEmitter::ConvertToEntryPoint(functionBody->GetDynamicInterpreterEntryPoint()))
                         :   DefaultEntryThunk;
                 const JavascriptMethod currentThunk = functionBody->GetScriptContext()->CurrentThunk;

+ 3 - 3
lib/Runtime/Base/ThreadContextInfo.cpp

@@ -367,7 +367,7 @@ ThreadContextInfo::IsJITActive()
 
 intptr_t SHIFT_ADDR(const ThreadContextInfo*const context, intptr_t address)
 {
-#if ENABLE_NATIVE_CODEGEN
+#if ENABLE_OOP_NATIVE_CODEGEN
     Assert(AutoSystemInfo::Data.IsJscriptModulePointer((void*)address));
     ptrdiff_t diff = 0;
     if (JITManager::GetJITManager()->IsJITServer())
@@ -387,10 +387,10 @@ intptr_t SHIFT_ADDR(const ThreadContextInfo*const context, intptr_t address)
 
 intptr_t SHIFT_CRT_ADDR(const ThreadContextInfo*const context, intptr_t address)
 {
-#if ENABLE_NATIVE_CODEGEN
+#if ENABLE_OOP_NATIVE_CODEGEN
     if (AutoSystemInfo::Data.IsJscriptModulePointer((void*)address))
     {
-        // the function is compiled to chakra.dll, or statically linked to crt 
+        // the function is compiled to chakra.dll, or statically linked to crt
         return SHIFT_ADDR(context, address);
     }
     ptrdiff_t diff = 0;

+ 58 - 15
lib/Runtime/Language/Arguments.h

@@ -4,17 +4,58 @@
 //-------------------------------------------------------------------------------------------------------
 #pragma once
 
+// To extract variadic args array after known args list:
+//      argx, callInfo, ...
+// NOTE: The last known arg name is hard-coded to "callInfo".
 #ifdef _WIN32
-#define VA_LIST_TO_VARARRAY(vl, va, callInfo) Js::Var* va = (Js::Var*) vl;
+#define DECLARE_ARGS_VARARRAY(va, ...)                              \
+    va_list _vl;                                                    \
+    va_start(_vl, callInfo);                                        \
+    Js::Var* va = (Js::Var*)_vl
 #else
 #if defined(_M_X64) || defined(_M_IX86)
 // We use a custom calling convention to invoke JavascriptMethod based on
 // System V AMD64 ABI. At entry of JavascriptMethod the stack layout is:
 //      [Return Address] [function] [callInfo] [arg0] [arg1] ...
 //
-#define VA_LIST_TO_VARARRAY(vl, va, callInfo)                              \
-    Js::Var* va = reinterpret_cast<Js::Var*>(_AddressOfReturnAddress()) + 3; \
-    Assert(*reinterpret_cast<Js::CallInfo*>(va - 1) == callInfo);
+#define DECLARE_ARGS_VARARRAY_N(va, n)                              \
+    Js::Var* va = _get_va(_AddressOfReturnAddress(), n);            \
+    Assert(*reinterpret_cast<Js::CallInfo*>(va - 1) == callInfo)
+
+#define DECLARE_ARGS_VARARRAY(va, ...)                              \
+    DECLARE_ARGS_VARARRAY_N(va, _count_args(__VA_ARGS__))
+
+inline Js::Var* _get_va(void* addrOfReturnAddress, int n)
+{
+    Js::Var* pArgs = reinterpret_cast<Js::Var*>(addrOfReturnAddress) + 1;
+    return pArgs + n;
+}
+
+inline int _count_args(Js::CallInfo callInfo)
+{
+    return 2;  // for typical JsMethod with 2 known args "function, callInfo"
+}
+template <class T1>
+inline int _count_args(const T1&, Js::CallInfo callInfo)
+{
+    return 2;
+}
+template <class T1, class T2>
+inline int _count_args(const T1&, const T2&, Js::CallInfo callInfo)
+{
+    return 3;
+}
+template <class T1, class T2, class T3>
+inline int _count_args(const T1&, const T2&, const T3&, Js::CallInfo callInfo)
+{
+    return 4;
+}
+template <class T1, class T2, class T3, class T4>
+inline int _count_args(const T1&, const T2&, const T3&, const T4&, Js::CallInfo callInfo)
+{
+    return 5;
+}
+
 #else
 #error Not yet implemented
 #endif
@@ -45,18 +86,20 @@
  * used by JavaScript functions. It is a low level macro that does not try to
  * differentiate between script usable Vars and runtime data structures.
  * To be able to access only script usable args use the ARGUMENTS macro instead.
+ *
+ * The ... list must be
+ *  * "callInfo", typically for JsMethod that has only 2 known args
+ *    "function, callInfo";
+ *  * or the full known args list ending with "callInfo" (for some runtime
+ *    helpers).
  */
-#define RUNTIME_ARGUMENTS(n, s)                                           \
-    va_list argptr;                                                       \
-    va_start(argptr, s);                                                  \
-    VA_LIST_TO_VARARRAY(argptr, _argsVarArray, s)                         \
-    Js::Arguments n(s, _argsVarArray);
-
-#define ARGUMENTS(n, s)                                                   \
-    va_list argptr;                                                       \
-    va_start(argptr, s);                                                  \
-    VA_LIST_TO_VARARRAY(argptr, _argsVarArray, s)                         \
-    Js::ArgumentReader n(&s, _argsVarArray);
+#define RUNTIME_ARGUMENTS(n, ...)                       \
+    DECLARE_ARGS_VARARRAY(_argsVarArray, __VA_ARGS__);  \
+    Js::Arguments n(callInfo, _argsVarArray);
+
+#define ARGUMENTS(n, ...)                               \
+    DECLARE_ARGS_VARARRAY(_argsVarArray, __VA_ARGS__);  \
+    Js::ArgumentReader n(&callInfo, _argsVarArray);
 
 namespace Js
 {

+ 61 - 49
lib/Runtime/Language/InterpreterStackFrame.cpp

@@ -1663,7 +1663,7 @@ namespace Js
     }
 #endif
 
-    bool InterpreterStackFrame::IsDelayDynamicInterpreterThunk(void * entryPoint)
+    bool InterpreterStackFrame::IsDelayDynamicInterpreterThunk(JavascriptMethod entryPoint)
     {
         return
 #if DYNAMIC_INTERPRETER_THUNK
@@ -1779,7 +1779,7 @@ namespace Js
 #if ENABLE_PROFILE_INFO
         DynamicProfileInfo * dynamicProfileInfo = nullptr;
         const bool doProfile = executeFunction->GetInterpreterExecutionMode(false) == ExecutionMode::ProfilingInterpreter ||
-                               executeFunction->IsInDebugMode() && DynamicProfileInfo::IsEnabled(executeFunction);
+                               (executeFunction->IsInDebugMode() && DynamicProfileInfo::IsEnabled(executeFunction));
         if (doProfile)
         {
 #if !DYNAMIC_INTERPRETER_THUNK
@@ -2057,18 +2057,18 @@ namespace Js
         {
         case Js::AsmJsRetType::Double:
         {
-            entryPoint = (AsmJsInterpreterDoubleEP)Js::InterpreterStackFrame::AsmJsInterpreter < double > ;
+            entryPoint = (void*)(AsmJsInterpreterDoubleEP)Js::InterpreterStackFrame::AsmJsInterpreter < double > ;
             break;
         }
         case Js::AsmJsRetType::Float:
         {
-            entryPoint = (AsmJsInterpreterFloatEP)Js::InterpreterStackFrame::AsmJsInterpreter < float > ;
+            entryPoint = (void*)(AsmJsInterpreterFloatEP)Js::InterpreterStackFrame::AsmJsInterpreter < float > ;
             break;
         }
         case Js::AsmJsRetType::Signed:
         case Js::AsmJsRetType::Void:
         {
-            entryPoint = (AsmJsInterpreterIntEP)Js::InterpreterStackFrame::AsmJsInterpreter < int > ;
+            entryPoint = (void*)(AsmJsInterpreterIntEP)Js::InterpreterStackFrame::AsmJsInterpreter < int > ;
             break;
         }
         case Js::AsmJsRetType::Int32x4:
@@ -2083,7 +2083,7 @@ namespace Js
         case Js::AsmJsRetType::Uint16x8:
         case Js::AsmJsRetType::Uint8x16:
         {
-            entryPoint = Js::InterpreterStackFrame::AsmJsInterpreterSimdJs;
+            entryPoint = (void*)Js::InterpreterStackFrame::AsmJsInterpreterSimdJs;
             break;
         }
         default:
@@ -2630,44 +2630,47 @@ namespace Js
         }
         threadContext->SetDisableImplicitFlags(prevDisableImplicitFlags);
         threadContext->SetImplicitCallFlags(saveImplicitcallFlags);
-        FrameDisplay* pDisplay = RecyclerNewPlus(scriptContext->GetRecycler(), sizeof(void*), FrameDisplay, 1);
-        pDisplay->SetItem( 0, moduleMemoryPtr );
-        for (int i = 0; i < info->GetFunctionCount(); i++)
+        // scope
         {
-            const auto& modFunc = info->GetFunction(i);
+            FrameDisplay* pDisplay = RecyclerNewPlus(scriptContext->GetRecycler(), sizeof(void*), FrameDisplay, 1);
+            pDisplay->SetItem( 0, moduleMemoryPtr );
+            for (int i = 0; i < info->GetFunctionCount(); i++)
+            {
+                const auto& modFunc = info->GetFunction(i);
 
-            // TODO: add more runtime checks here
-            auto proxy = m_functionBody->GetNestedFuncReference(i);
+                // TODO: add more runtime checks here
+                auto proxy = m_functionBody->GetNestedFuncReference(i);
 
-            AsmJsScriptFunction* scriptFuncObj = (AsmJsScriptFunction*)ScriptFunction::OP_NewScFunc(pDisplay, (FunctionProxy**)proxy);
-            localModuleFunctions[modFunc.location] = scriptFuncObj;
-            if (i == 0 && info->GetUsesChangeHeap())
-            {
-                scriptFuncObj->GetDynamicType()->SetEntryPoint(AsmJsChangeHeapBuffer);
-            }
-            else
-            {
-                if (scriptFuncObj->GetDynamicType()->GetEntryPoint() == DefaultDeferredDeserializeThunk)
+                AsmJsScriptFunction* scriptFuncObj = (AsmJsScriptFunction*)ScriptFunction::OP_NewScFunc(pDisplay, (FunctionProxy**)proxy);
+                localModuleFunctions[modFunc.location] = scriptFuncObj;
+                if (i == 0 && info->GetUsesChangeHeap())
                 {
-                    JavascriptFunction::DeferredDeserialize(scriptFuncObj);
+                    scriptFuncObj->GetDynamicType()->SetEntryPoint(AsmJsChangeHeapBuffer);
                 }
-                scriptFuncObj->GetDynamicType()->SetEntryPoint(AsmJsExternalEntryPoint);
-                scriptFuncObj->GetFunctionBody()->GetAsmJsFunctionInfo()->SetModuleFunctionBody(asmJsModuleFunctionBody);
-            }
-            scriptFuncObj->SetModuleMemory(moduleMemoryPtr);
-            if (!info->IsRuntimeProcessed())
-            {
-                // don't reset entrypoint upon relinking
-                FunctionEntryPointInfo* entrypointInfo = (FunctionEntryPointInfo*)scriptFuncObj->GetEntryPointInfo();
-                entrypointInfo->SetIsAsmJSFunction(true);
-                entrypointInfo->SetModuleAddress((uintptr_t)moduleMemoryPtr);
-
-#if DYNAMIC_INTERPRETER_THUNK
-                if (!PHASE_ON1(AsmJsJITTemplatePhase))
+                else
                 {
-                    entrypointInfo->jsMethod = AsmJsDefaultEntryThunk;
+                    if (scriptFuncObj->GetDynamicType()->GetEntryPoint() == DefaultDeferredDeserializeThunk)
+                    {
+                        JavascriptFunction::DeferredDeserialize(scriptFuncObj);
+                    }
+                    scriptFuncObj->GetDynamicType()->SetEntryPoint(AsmJsExternalEntryPoint);
+                    scriptFuncObj->GetFunctionBody()->GetAsmJsFunctionInfo()->SetModuleFunctionBody(asmJsModuleFunctionBody);
+                }
+                scriptFuncObj->SetModuleMemory(moduleMemoryPtr);
+                if (!info->IsRuntimeProcessed())
+                {
+                    // don't reset entrypoint upon relinking
+                    FunctionEntryPointInfo* entrypointInfo = (FunctionEntryPointInfo*)scriptFuncObj->GetEntryPointInfo();
+                    entrypointInfo->SetIsAsmJSFunction(true);
+                    entrypointInfo->SetModuleAddress((uintptr_t)moduleMemoryPtr);
+
+    #if DYNAMIC_INTERPRETER_THUNK
+                    if (!PHASE_ON1(AsmJsJITTemplatePhase))
+                    {
+                        entrypointInfo->jsMethod = AsmJsDefaultEntryThunk;
+                    }
+    #endif
                 }
-#endif
             }
         }
 
@@ -2727,9 +2730,11 @@ namespace Js
 
 
         // export only 1 function
-        Var exportFunc = localModuleFunctions[info->GetExportFunctionIndex()];
-        SetReg((RegSlot)0, exportFunc);
-        return exportFunc;
+        {
+            Var exportFunc = localModuleFunctions[info->GetExportFunctionIndex()];
+            SetReg((RegSlot)0, exportFunc);
+            return exportFunc;
+        }
 
     linkFailure:
         threadContext->SetDisableImplicitFlags(prevDisableImplicitFlags);
@@ -2754,7 +2759,7 @@ namespace Js
         ScriptFunction::ReparseAsmJsModule(&funcObj);
         const bool doProfile =
             funcObj->GetFunctionBody()->GetInterpreterExecutionMode(false) == ExecutionMode::ProfilingInterpreter ||
-            funcObj->GetFunctionBody()->IsInDebugMode() && DynamicProfileInfo::IsEnabled(funcObj->GetFunctionBody());
+            (funcObj->GetFunctionBody()->IsInDebugMode() && DynamicProfileInfo::IsEnabled(funcObj->GetFunctionBody()));
 
         DynamicProfileInfo * dynamicProfileInfo = nullptr;
         if (doProfile)
@@ -2788,7 +2793,7 @@ namespace Js
         }
 
 #if DBG
-        Js::RecyclableObject * invalidStackVar = (Js::RecyclableObject*)_alloca(sizeof(Js::RecyclableObject));
+        Var invalidStackVar = (Js::RecyclableObject*)_alloca(sizeof(Js::RecyclableObject));
         memset(invalidStackVar, 0xFE, sizeof(Js::RecyclableObject));
         InterpreterStackFrame * newInstance = newInstance = setup.InitializeAllocation(allocation, funcObj->GetFunctionBody()->GetHasImplicitArgIns(), doProfile, nullptr, stackAddr, invalidStackVar);
 #else
@@ -3017,8 +3022,14 @@ namespace Js
                 // IAT xmm2 spill
                 // IAT xmm1 spill <- floatSpillAddress for arg1
 
+#ifdef _WIN32
+#define FLOAT_SPILL_ADDRESS_OFFSET_WORDS 15
+#else
+// On Sys V x64 we have 4 words less (4 reg shadow)
+#define FLOAT_SPILL_ADDRESS_OFFSET_WORDS 11
+#endif
                 // floats are spilled as xmmwords
-                uintptr_t floatSpillAddress = (uintptr_t)m_inParams - MachPtr * (15 - 2*i);
+                uintptr_t floatSpillAddress = (uintptr_t)m_inParams - MachPtr * (FLOAT_SPILL_ADDRESS_OFFSET_WORDS - 2*i);
 
                 if (info->GetArgType(i).isInt())
                 {
@@ -6104,7 +6115,8 @@ const byte * InterpreterStackFrame::OP_ProfiledLoopBodyStart(const byte * ip)
         //       and do ISB only for 1st time this entry point is called (potential working set regression though).
         _InstructionSynchronizationBarrier();
 #endif
-        uint newOffset = ::Math::PointerCastToIntegral<uint>(address(function, CallInfo(CallFlags_InternalFrame, 1), this));
+        uint newOffset = ::Math::PointerCastToIntegral<uint>(
+            CALL_ENTRYPOINT(address, function, CallInfo(CallFlags_InternalFrame, 1), this));
 
 #ifdef _M_IX86
         _asm
@@ -7171,7 +7183,7 @@ const byte * InterpreterStackFrame::OP_ProfiledLoopBodyStart(const byte * ip)
         if (m_functionBody->HasCachedScopePropIds())
         {
             const Js::PropertyIdArray *propIds = this->m_functionBody->GetFormalsPropIdArray();
-                
+
             Var funcExpr = this->GetFunctionExpression();
             PropertyId objectId = ActivationObjectEx::GetLiteralObjectRef(propIds);
             scopeObject = JavascriptOperators::OP_InitCachedScope(funcExpr, propIds,
@@ -7743,7 +7755,7 @@ const byte * InterpreterStackFrame::OP_ProfiledLoopBodyStart(const byte * ip)
         // value is out of bound
         if (throws)
         {
-            JavascriptError::ThrowRangeError(scriptContext, JSERR_ArgumentOutOfRange, L"SIMD.Int32x4.FromFloat32x4");
+            JavascriptError::ThrowRangeError(scriptContext, JSERR_ArgumentOutOfRange, _u("SIMD.Int32x4.FromFloat32x4"));
         }
         SetRegRawSimd(playout->U4_0, result);
     }
@@ -8530,7 +8542,7 @@ const byte * InterpreterStackFrame::OP_ProfiledLoopBodyStart(const byte * ip)
                 }
                 Assert(propIds != nullptr);
                 SetLocalClosure(frameObject);
-                
+
                 if (PHASE_VERBOSE_TRACE1(Js::StackArgFormalsOptPhase) && m_functionBody->GetInParamsCount() > 1)
                 {
                     Output::Print(_u("StackArgFormals : %s (%d) :Creating scope object in the bail out path. \n"), m_functionBody->GetDisplayName(), m_functionBody->GetFunctionNumber());
@@ -8550,12 +8562,12 @@ const byte * InterpreterStackFrame::OP_ProfiledLoopBodyStart(const byte * ip)
                 Output::Flush();
             }
         }
-        
+
         if (heapArgObj)
         {
             heapArgObj->SetFormalCount(formalsCount);
             heapArgObj->SetFrameObject(frameObject);
-            
+
             if (PHASE_TRACE1(Js::StackArgFormalsOptPhase) && formalsCount > 0)
             {
                 Output::Print(_u("StackArgFormals : %s (%d) :Attaching the scope object with the heap arguments object in the bail out path. \n"), m_functionBody->GetDisplayName(), m_functionBody->GetFunctionNumber());

+ 13 - 58
lib/Runtime/Language/JavascriptOperators.cpp

@@ -1396,7 +1396,7 @@ CommonNumber:
 
         ThreadContext *threadContext = scriptContext->GetThreadContext();
 
-        Var iteratorVar = 
+        Var iteratorVar =
             threadContext->ExecuteImplicitCall(function, ImplicitCall_Accessor, [=]() -> Var
                 {
                     return CALL_FUNCTION(function, CallInfo(Js::CallFlags_Value, 1), aRight);
@@ -2087,37 +2087,6 @@ CommonNumber:
         return GetterSetter_Impl<JavascriptString*, false>(instance, propertyName, setterValue, info, scriptContext);
     }
 
-    // Checks to see if any object in the prototype chain has a property descriptor for the given property
-    // that specifies either an accessor or a non-writable attribute.
-    // If TRUE, check flags for details.
-    template<typename PropertyKeyType, bool doFastProtoChainCheck, bool isRoot>
-    BOOL JavascriptOperators::CheckPrototypesForAccessorOrNonWritablePropertyCore(RecyclableObject* instance,
-        PropertyKeyType propertyKey, Var* setterValue, DescriptorFlags* flags, PropertyValueInfo* info, ScriptContext* scriptContext)
-    {
-        Assert(setterValue);
-        Assert(flags);
-
-        // Do a quick check to see if all objects in the prototype chain are known to have only
-        // writable data properties (i.e. no accessors or non-writable properties).
-        if (doFastProtoChainCheck && CheckIfObjectAndPrototypeChainHasOnlyWritableDataProperties(instance))
-        {
-            return FALSE;
-        }
-
-        if (isRoot)
-        {
-            *flags = JavascriptOperators::GetRootSetter(instance, propertyKey, setterValue, info, scriptContext);
-        }
-        if (*flags == None)
-        {
-            *flags = JavascriptOperators::GetterSetter(instance, propertyKey, setterValue, info, scriptContext);
-        }
-
-
-
-        return ((*flags & Accessor) == Accessor) || ((*flags & Proxy) == Proxy)|| ((*flags & Data) == Data && (*flags & Writable) == None);
-    }
-
     void JavascriptOperators::OP_InvalidateProtoCaches(PropertyId propertyId, ScriptContext *scriptContext)
     {
         scriptContext->InvalidateProtoCaches(propertyId);
@@ -2931,7 +2900,7 @@ CommonNumber:
         // If we have console scope and no one in the scope had the property add it to console scope
         if ((length > 0) && ConsoleScopeActivationObject::Is(pDisplay->GetItem(length - 1)))
         {
-            // CheckPrototypesForAccessorOrNonWritableProperty does not check for const in global object. We should check it here. 
+            // CheckPrototypesForAccessorOrNonWritableProperty does not check for const in global object. We should check it here.
             if ((length > 1) && GlobalObject::Is(pDisplay->GetItem(length - 2)))
             {
                 GlobalObject* globalObject = GlobalObject::FromVar(pDisplay->GetItem(length - 2));
@@ -4925,7 +4894,7 @@ CommonNumber:
         return JavascriptOperators::OP_GetProperty(instance, PropertyIds::length, scriptContext);
     }
 
-    inline Var JavascriptOperators::GetThisFromModuleRoot(Var thisVar)
+    Var JavascriptOperators::GetThisFromModuleRoot(Var thisVar)
     {
         RootObjectBase * rootObject = static_cast<RootObjectBase*>(thisVar);
         RecyclableObject* hostObject = rootObject->GetHostObject();
@@ -5130,7 +5099,7 @@ CommonNumber:
         {
             return false;
         }
-        if (DynamicType::Is(typeId) && 
+        if (DynamicType::Is(typeId) &&
             static_cast<DynamicObject*>(instance)->GetTypeHandler()->IsStringTypeHandler())
         {
             return false;
@@ -5140,7 +5109,7 @@ CommonNumber:
             return false;
         }
         return !(instance->HasDeferredTypeHandler() &&
-                 JavascriptFunction::Is(instance) && 
+                 JavascriptFunction::Is(instance) &&
                  JavascriptFunction::FromVar(instance)->IsExternalFunction());
     }
 
@@ -5154,7 +5123,7 @@ CommonNumber:
             {
                 return false;
             }
-        }       
+        }
         return true;
     }
 
@@ -6814,8 +6783,8 @@ CommonNumber:
             formalsCount = propIds->count;
             Assert(formalsCount != 0 && propIds != nullptr);
         }
-        
-        HeapArgumentsObject *argsObj = JavascriptOperators::CreateHeapArguments(funcCallee, actualsCount, formalsCount, frameObj, scriptContext);        
+
+        HeapArgumentsObject *argsObj = JavascriptOperators::CreateHeapArguments(funcCallee, actualsCount, formalsCount, frameObj, scriptContext);
         return FillScopeObject(funcCallee, actualsCount, formalsCount, frameObj, paramAddr, propIds, argsObj, scriptContext, nonSimpleParamList, false);
     }
 
@@ -6826,11 +6795,11 @@ CommonNumber:
                   "Loading the arguments object in the global function?");
 
         HeapArgumentsObject *argsObj = JavascriptOperators::CreateHeapArguments(funcCallee, actualsCount, formalsCount, frameObj, scriptContext);
-        
+
         return FillScopeObject(funcCallee, actualsCount, formalsCount, frameObj, paramAddr, nullptr, argsObj, scriptContext, nonSimpleParamList, true);
     }
 
-    Var JavascriptOperators::FillScopeObject(JavascriptFunction *funcCallee, uint32 actualsCount, uint32 formalsCount, Var frameObj, Var * paramAddr, 
+    Var JavascriptOperators::FillScopeObject(JavascriptFunction *funcCallee, uint32 actualsCount, uint32 formalsCount, Var frameObj, Var * paramAddr,
         Js::PropertyIdArray *propIds, HeapArgumentsObject * argsObj, ScriptContext * scriptContext, bool nonSimpleParamList, bool useCachedScope)
     {
         Assert(frameObj);
@@ -7007,7 +6976,7 @@ CommonNumber:
         if (scriptContext->GetConfig()->IsES6HasInstanceEnabled())
         {
             Var instOfHandler = JavascriptOperators::GetProperty(constructor, PropertyIds::_symbolHasInstance, scriptContext);
-            if (JavascriptOperators::IsUndefinedObject(instOfHandler) 
+            if (JavascriptOperators::IsUndefinedObject(instOfHandler)
                 || instOfHandler == scriptContext->GetBuiltInLibraryFunction(JavascriptFunction::EntryInfo::SymbolHasInstance.GetOriginalEntryPoint()))
             {
                 return JavascriptBoolean::ToVar(constructor->HasInstance(instance, scriptContext, inlineCache), scriptContext);
@@ -8400,7 +8369,7 @@ CommonNumber:
         {
             // CONSIDER (EquivObjTypeSpec): Invent some form of least recently used eviction scheme.
             uintptr_t index = (reinterpret_cast<uintptr_t>(type) >> 4) & (EQUIVALENT_TYPE_CACHE_SIZE - 1);
-            
+
             if (cache->nextEvictionVictim == EQUIVALENT_TYPE_CACHE_SIZE)
             {
                 __analysis_assume(index < EQUIVALENT_TYPE_CACHE_SIZE);
@@ -8425,7 +8394,7 @@ CommonNumber:
             __analysis_assume(index < EQUIVALENT_TYPE_CACHE_SIZE);
             equivTypes[index] = type;
         }
-        
+
         // Fixed field checks allow us to assume a specific type ID, but the assumption is only
         // valid if we lock the type. Otherwise, the type ID may change out from under us without
         // evolving the type.
@@ -10763,20 +10732,6 @@ CommonNumber:
         }
     }
 
-    template<typename PropertyKeyType>
-    BOOL JavascriptOperators::CheckPrototypesForAccessorOrNonWritablePropertySlow(RecyclableObject* instance, PropertyKeyType propertyKey, Var* setterValue, DescriptorFlags* flags, bool isRoot, ScriptContext* scriptContext)
-    {
-        // This is used in debug verification, do not doFastProtoChainCheck to avoid side effect (doFastProtoChainCheck may update HasWritableDataOnly flags).
-        if (isRoot)
-        {
-            return CheckPrototypesForAccessorOrNonWritablePropertyCore<PropertyKeyType, /*doFastProtoChainCheck*/false, true>(instance, propertyKey, setterValue, flags, nullptr, scriptContext);
-        }
-        else
-        {
-            return CheckPrototypesForAccessorOrNonWritablePropertyCore<PropertyKeyType, /*doFastProtoChainCheck*/false, false>(instance, propertyKey, setterValue, flags, nullptr, scriptContext);
-        }
-    }
-
     BOOL JavascriptOperators::SetProperty(Var instance, RecyclableObject* object, PropertyId propertyId, Var newValue, ScriptContext* requestContext, PropertyOperationFlags propertyOperationFlags)
     {
         PropertyValueInfo info;

+ 42 - 0
lib/Runtime/Language/JavascriptOperators.inl

@@ -106,4 +106,46 @@ namespace Js
         return false;
     }
 
+    // Checks to see if any object in the prototype chain has a property descriptor for the given property
+    // that specifies either an accessor or a non-writable attribute.
+    // If TRUE, check flags for details.
+    template<typename PropertyKeyType, bool doFastProtoChainCheck, bool isRoot>
+    BOOL JavascriptOperators::CheckPrototypesForAccessorOrNonWritablePropertyCore(RecyclableObject* instance,
+        PropertyKeyType propertyKey, Var* setterValue, DescriptorFlags* flags, PropertyValueInfo* info, ScriptContext* scriptContext)
+    {
+        Assert(setterValue);
+        Assert(flags);
+
+        // Do a quick check to see if all objects in the prototype chain are known to have only
+        // writable data properties (i.e. no accessors or non-writable properties).
+        if (doFastProtoChainCheck && CheckIfObjectAndPrototypeChainHasOnlyWritableDataProperties(instance))
+        {
+            return FALSE;
+        }
+
+        if (isRoot)
+        {
+            *flags = JavascriptOperators::GetRootSetter(instance, propertyKey, setterValue, info, scriptContext);
+        }
+        if (*flags == None)
+        {
+            *flags = JavascriptOperators::GetterSetter(instance, propertyKey, setterValue, info, scriptContext);
+        }
+
+        return ((*flags & Accessor) == Accessor) || ((*flags & Proxy) == Proxy)|| ((*flags & Data) == Data && (*flags & Writable) == None);
+    }
+
+    template<typename PropertyKeyType>
+    BOOL JavascriptOperators::CheckPrototypesForAccessorOrNonWritablePropertySlow(RecyclableObject* instance, PropertyKeyType propertyKey, Var* setterValue, DescriptorFlags* flags, bool isRoot, ScriptContext* scriptContext)
+    {
+        // This is used in debug verification, do not doFastProtoChainCheck to avoid side effect (doFastProtoChainCheck may update HasWritableDataOnly flags).
+        if (isRoot)
+        {
+            return CheckPrototypesForAccessorOrNonWritablePropertyCore<PropertyKeyType, /*doFastProtoChainCheck*/false, true>(instance, propertyKey, setterValue, flags, nullptr, scriptContext);
+        }
+        else
+        {
+            return CheckPrototypesForAccessorOrNonWritablePropertyCore<PropertyKeyType, /*doFastProtoChainCheck*/false, false>(instance, propertyKey, setterValue, flags, nullptr, scriptContext);
+        }
+    }
 }

+ 2 - 2
lib/Runtime/Language/ProfilingHelpers.cpp

@@ -81,7 +81,7 @@ namespace Js
 
             const int32 index = TaggedInt::ToInt32(varIndex);
             const uint32 offset = index;
-            if(index < 0 || offset >= headSegmentLength || array && array->IsMissingHeadSegmentItem(offset))
+            if(index < 0 || offset >= headSegmentLength || (array && array->IsMissingHeadSegmentItem(offset)))
             {
                 ldElemInfo.neededHelperCall = true;
                 break;
@@ -446,7 +446,7 @@ namespace Js
         CallInfo callInfo,
         ...)
     {
-        ARGUMENTS(args, callInfo);
+        ARGUMENTS(args, callee, framePointer, profileId, arrayProfileId, callInfo);
         return
             ProfiledNewScObjArray(
                 callee,

+ 23 - 23
lib/Runtime/Language/amd64/JavascriptOperatorsA.S

@@ -25,7 +25,7 @@ C_FUNC(amd64_CallWithFakeFrame):
         lea rax, [rip + C_FUNC(amd64_ReturnFromCallWithFakeFrame)]
         mov [rsp+8h], rax
 
-        mov rax, [rsp + 28h]
+        mov rax, r8             // arg0
 
         push rbp
         mov rbp, rsi
@@ -33,17 +33,17 @@ C_FUNC(amd64_CallWithFakeFrame):
         // Frame spill size.
         sub rsp, rdx
 
-        // Save callee-saved xmm registers
-        movapd xmmword ptr [rsp + 90h], xmm15
-        movapd xmmword ptr [rsp + 80h], xmm14
-        movapd xmmword ptr [rsp + 70h], xmm13
-        movapd xmmword ptr [rsp + 60h], xmm12
-        movapd xmmword ptr [rsp + 50h], xmm11
-        movapd xmmword ptr [rsp + 40h], xmm10
-        movapd xmmword ptr [rsp + 30h], xmm9
-        movapd xmmword ptr [rsp + 20h], xmm8
-        movapd xmmword ptr [rsp + 10h], xmm7
-        movapd xmmword ptr [rsp], xmm6
+        // Save callee-saved xmm registers -- none on Sys V x64
+        // movapd xmmword ptr [rsp + 90h], xmm15
+        // movapd xmmword ptr [rsp + 80h], xmm14
+        // movapd xmmword ptr [rsp + 70h], xmm13
+        // movapd xmmword ptr [rsp + 60h], xmm12
+        // movapd xmmword ptr [rsp + 50h], xmm11
+        // movapd xmmword ptr [rsp + 40h], xmm10
+        // movapd xmmword ptr [rsp + 30h], xmm9
+        // movapd xmmword ptr [rsp + 20h], xmm8
+        // movapd xmmword ptr [rsp + 10h], xmm7
+        // movapd xmmword ptr [rsp], xmm6
 
         // Save all callee saved registers.
         push r15
@@ -70,17 +70,17 @@ C_FUNC(amd64_ReturnFromCallWithFakeFrame):
         pop  r14
         pop  r15
 
-        // Restore callee-saved xmm registers
-        movapd xmm6, xmmword ptr [rsp]
-        movapd xmm7, xmmword ptr [rsp + 10h]
-        movapd xmm8, xmmword ptr [rsp + 20h]
-        movapd xmm9, xmmword ptr [rsp + 30h]
-        movapd xmm10, xmmword ptr [rsp + 40h]
-        movapd xmm11, xmmword ptr [rsp + 50h]
-        movapd xmm12, xmmword ptr [rsp + 60h]
-        movapd xmm13, xmmword ptr [rsp + 70h]
-        movapd xmm14, xmmword ptr [rsp + 80h]
-        movapd xmm15, xmmword ptr [rsp + 90h]
+        // Restore callee-saved xmm registers -- none on Sys V x64; must match RegList.h
+        // movapd xmm6, xmmword ptr [rsp]
+        // movapd xmm7, xmmword ptr [rsp + 10h]
+        // movapd xmm8, xmmword ptr [rsp + 20h]
+        // movapd xmm9, xmmword ptr [rsp + 30h]
+        // movapd xmm10, xmmword ptr [rsp + 40h]
+        // movapd xmm11, xmmword ptr [rsp + 50h]
+        // movapd xmm12, xmmword ptr [rsp + 60h]
+        // movapd xmm13, xmmword ptr [rsp + 70h]
+        // movapd xmm14, xmmword ptr [rsp + 80h]
+        // movapd xmm15, xmmword ptr [rsp + 90h]
 
         add  rsp, rdx
 

+ 382 - 0
lib/Runtime/Language/amd64/amd64_Thunks.S

@@ -0,0 +1,382 @@
+//-------------------------------------------------------------------------------------------------------
+// Copyright (C) Microsoft. All rights reserved.
+// Licensed under the MIT license. See LICENSE.txt file in the project root for full license information.
+//-------------------------------------------------------------------------------------------------------
+.intel_syntax noprefix
+#include "unixasmmacros.inc"
+
+
+#ifdef _ENABLE_DYNAMIC_THUNKS
+
+//============================================================================================================
+// InterpreterStackFrame::DelayDynamicInterpreterThunk
+//============================================================================================================
+
+// JavascriptMethod InterpreterStackFrame::EnsureDynamicInterpreterThunk(ScriptFunction * function)
+// extrn _ZN2Js21InterpreterStackFrame29EnsureDynamicInterpreterThunkEPNS_14ScriptFunctionE
+
+// Var InterpreterStackFrame::DelayDynamicInterpreterThunk(RecyclableObject* function, CallInfo callInfo, ...)
+.balign 16
+NESTED_ENTRY _ZN2Js21InterpreterStackFrame28DelayDynamicInterpreterThunkEPNS_16RecyclableObjectENS_8CallInfoEz, _TEXT, NoHandler
+        push_nonvol_reg rbp
+        lea  rbp, [rsp]
+
+        // save argument registers used by custom calling convention
+        push_register rdi
+        push_register rsi
+
+        //  JavascriptMethod InterpreterStackFrame::EnsureDynamicInterpreterThunk(
+        //              ScriptFunction * function)
+        //
+        //      RDI == function, setup by custom calling convention
+        call C_FUNC(_ZN2Js21InterpreterStackFrame29EnsureDynamicInterpreterThunkEPNS_14ScriptFunctionE)
+
+        pop_register rsi
+        pop_register rdi
+        pop_nonvol_reg rbp
+
+        jmp rax
+NESTED_END _ZN2Js21InterpreterStackFrame28DelayDynamicInterpreterThunkEPNS_16RecyclableObjectENS_8CallInfoEz, _TEXT
+
+
+//============================================================================================================
+// InterpreterStackFrame::AsmJsDelayDynamicInterpreterThunk
+//============================================================================================================
+
+// JavascriptMethod InterpreterStackFrame::EnsureDynamicInterpreterThunk(ScriptFunction * function)
+// extrn _ZN2Js21InterpreterStackFrame29EnsureDynamicInterpreterThunkEPNS_14ScriptFunctionE
+
+// Var InterpreterStackFrame::AsmJsDelayDynamicInterpreterThunk(RecyclableObject* function, CallInfo callInfo, ...)
+.balign 16
+NESTED_ENTRY _ZN2Js21InterpreterStackFrame33AsmJsDelayDynamicInterpreterThunkEPNS_16RecyclableObjectENS_8CallInfoEz, _TEXT, NoHandler
+        push_nonvol_reg rbp             // push rbp and adjust CFA offset
+        lea  rbp, [rsp]
+
+        set_cfa_register rbp, (2*8)     // Set to compute CFA as: rbp + 16 (sizeof: [rbp] [ReturnAddress])
+
+        // save argument registers used by custom calling convention
+        push rdi
+        push rsi
+        push rdx
+        push rcx
+        push r8
+        push r9
+
+        sub rsp, 40h
+
+        // spill potential floating point arguments to stack
+        movaps xmmword ptr [rsp + 00h], xmm0
+        movaps xmmword ptr [rsp + 10h], xmm1
+        movaps xmmword ptr [rsp + 20h], xmm2
+        movaps xmmword ptr [rsp + 30h], xmm3
+        call C_FUNC(_ZN2Js21InterpreterStackFrame29EnsureDynamicInterpreterThunkEPNS_14ScriptFunctionE)
+        // restore potential floating point arguments from stack
+        movaps xmm0, xmmword ptr [rsp + 00h]
+        movaps xmm1, xmmword ptr [rsp + 10h]
+        movaps xmm2, xmmword ptr [rsp + 20h]
+        movaps xmm3, xmmword ptr [rsp + 30h]
+
+        add rsp, 40h
+
+        pop r9
+        pop r8
+        pop rcx
+        pop rdx
+        pop rsi
+        pop rdi
+
+        pop_nonvol_reg rbp
+        jmp rax
+NESTED_END _ZN2Js21InterpreterStackFrame33AsmJsDelayDynamicInterpreterThunkEPNS_16RecyclableObjectENS_8CallInfoEz, _TEXT
+
+
+//============================================================================================================
+// DynamicProfileInfo::EnsureDynamicProfileInfoThunk
+//============================================================================================================
+// JavascriptMethod DynamicProfileInfo::EnsureDynamicProfileInfo(ScriptFunction * function)
+// extrn _ZN2Js18DynamicProfileInfo24EnsureDynamicProfileInfoEPNS_14ScriptFunctionE
+
+// Var DynamicProfileInfo::EnsureDynamicProfileInfoThunk(RecyclableObject* function, CallInfo callInfo, ...)
+.balign 16
+NESTED_ENTRY _ZN2Js18DynamicProfileInfo29EnsureDynamicProfileInfoThunkEPNS_16RecyclableObjectENS_8CallInfoEz, _TEXT, NoHandler
+        push_nonvol_reg rbp
+        lea  rbp, [rsp]
+
+        // save argument registers used by custom calling convention
+        push_register rdi
+        push_register rsi
+
+        call _ZN2Js18DynamicProfileInfo24EnsureDynamicProfileInfoEPNS_14ScriptFunctionE
+
+        pop_register rsi
+        pop_register rdi
+
+        pop_nonvol_reg rbp
+        jmp rax
+NESTED_END _ZN2Js18DynamicProfileInfo29EnsureDynamicProfileInfoThunkEPNS_16RecyclableObjectENS_8CallInfoEz, _TEXT
+
+#endif // _ENABLE_DYNAMIC_THUNKS
+
+
+#ifdef ENABLE_SCRIPT_PROFILING
+
+//============================================================================================================
+// ScriptContext::ProfileModeDeferredParsingThunk
+//============================================================================================================
+
+// Js::JavascriptMethod ScriptContext::ProfileModeDeferredParse(ScriptFunction *function)
+extrn ?ProfileModeDeferredParse@ScriptContext@Js@@SAP6APEAXPEAVRecyclableObject@2@UCallInfo@2@ZZPEAPEAVScriptFunction@2@@Z : PROC
+
+// Var ScriptContext::ProfileModeDeferredParsingThunk(RecyclableObject* function, CallInfo callInfo, ...)
+align 16
+?ProfileModeDeferredParsingThunk@ScriptContext@Js@@SAPEAXPEAVRecyclableObject@2@UCallInfo@2@ZZ PROC FRAME
+        // save volatile registers
+        mov qword ptr [rsp + 8h],  rcx
+        mov qword ptr [rsp + 10h], rdx
+        mov qword ptr [rsp + 18h], r8
+        mov qword ptr [rsp + 20h], r9
+
+        push rbp
+        .pushreg rbp
+        lea  rbp, [rsp]
+        .setframe rbp, 0
+        .endprolog
+
+        sub rsp, 20h
+        lea rcx, [rsp + 30h]
+        call ?ProfileModeDeferredParse@ScriptContext@Js@@SAP6APEAXPEAVRecyclableObject@2@UCallInfo@2@ZZPEAPEAVScriptFunction@2@@Z
+
+        add rsp, 20h
+
+        lea rsp, [rbp]
+        pop rbp
+
+        // restore volatile registers
+        mov rcx, qword ptr [rsp + 8h]
+        mov rdx, qword ptr [rsp + 10h]
+        mov r8,  qword ptr [rsp + 18h]
+        mov r9,  qword ptr [rsp + 20h]
+
+        rex_jmp_reg rax
+?ProfileModeDeferredParsingThunk@ScriptContext@Js@@SAPEAXPEAVRecyclableObject@2@UCallInfo@2@ZZ ENDP
+
+//============================================================================================================
+
+
+//============================================================================================================
+// ScriptContext::ProfileModeDeferredDeserializeThunk
+//============================================================================================================
+
+// Js::JavascriptMethod ScriptContext::ProfileModeDeferredDeserialize(ScriptFunction *function)
+extrn ?ProfileModeDeferredDeserialize@ScriptContext@Js@@SAP6APEAXPEAVRecyclableObject@2@UCallInfo@2@ZZPEAVScriptFunction@2@@Z : PROC
+
+// Var ScriptContext::ProfileModeDeferredDeserializeThunk(RecyclableObject* function, CallInfo callInfo, ...)
+align 16
+?ProfileModeDeferredDeserializeThunk@ScriptContext@Js@@SAPEAXPEAVRecyclableObject@2@UCallInfo@2@ZZ PROC FRAME
+        // save volatile registers
+        mov qword ptr [rsp + 8h],  rcx
+        mov qword ptr [rsp + 10h], rdx
+        mov qword ptr [rsp + 18h], r8
+        mov qword ptr [rsp + 20h], r9
+
+        push rbp
+        .pushreg rbp
+        lea  rbp, [rsp]
+        .setframe rbp, 0
+        .endprolog
+
+        sub rsp, 20h
+        call ?ProfileModeDeferredDeserialize@ScriptContext@Js@@SAP6APEAXPEAVRecyclableObject@2@UCallInfo@2@ZZPEAVScriptFunction@2@@Z
+
+        add rsp, 20h
+
+        lea rsp, [rbp]
+        pop rbp
+
+        // restore volatile registers
+        mov rcx, qword ptr [rsp + 8h]
+        mov rdx, qword ptr [rsp + 10h]
+        mov r8,  qword ptr [rsp + 18h]
+        mov r9,  qword ptr [rsp + 20h]
+
+        rex_jmp_reg rax
+?ProfileModeDeferredDeserializeThunk@ScriptContext@Js@@SAPEAXPEAVRecyclableObject@2@UCallInfo@2@ZZ ENDP
+
+#endif // ENABLE_SCRIPT_PROFILING
+
+
+#ifdef _ENABLE_DYNAMIC_THUNKS
+
+//============================================================================================================
+// Js::AsmJsInterpreterThunk
+//============================================================================================================
+
+// extern _ZN2Js21InterpreterStackFrame29GetAsmJsInterpreterEntryPointEPNS_20AsmJsCallStackLayoutE
+
+// AsmJsInterpreterThunk (AsmJsCallStackLayout *function, ...)
+.balign 16
+NESTED_ENTRY _ZN2Js21InterpreterStackFrame19InterpreterAsmThunkEPNS_20AsmJsCallStackLayoutE, _TEXT, NoHandler
+        push_nonvol_reg rbp             // push rbp and adjust CFA offset
+        lea  rbp, [rsp]
+
+        set_cfa_register rbp, (2*8)     // Set to compute CFA as: rbp + 16 (sizeof: [rbp] [ReturnAddress])
+
+        sub rsp, 40h
+
+        // spill potential floating point arguments to stack
+        movaps xmmword ptr [rsp + 00h], xmm0
+        movaps xmmword ptr [rsp + 10h], xmm1
+        movaps xmmword ptr [rsp + 20h], xmm2
+        movaps xmmword ptr [rsp + 30h], xmm3
+
+        // save argument registers used by custom calling convention
+        push rdi
+        push rsi
+
+        // get correct interpreter entrypoint
+        call C_FUNC(_ZN2Js21InterpreterStackFrame29GetAsmJsInterpreterEntryPointEPNS_20AsmJsCallStackLayoutE)
+
+        pop rsi
+        pop rdi
+
+        call rax // call appropriate template
+
+        add rsp, 40h
+        pop_nonvol_reg rbp
+        ret
+NESTED_END _ZN2Js21InterpreterStackFrame19InterpreterAsmThunkEPNS_20AsmJsCallStackLayoutE, _TEXT
+
+//============================================================================================================
+// Js::AsmJsExternalEntryPoint
+//============================================================================================================
+
+//extrn ?GetStackSizeForAsmJsUnboxing@Js@@YAHPEAVScriptFunction@1@@Z: PROC
+//extrn ?UnboxAsmJsArguments@Js@@YAPEAXPEAVScriptFunction@1@PEAPEAXPEADUCallInfo@1@@Z : PROC
+// extrn ?BoxAsmJsReturnValue@Js@@YAPEAXPEAVScriptFunction@1@HNM@Z : PROC
+//extrn ?BoxAsmJsReturnValue@Js@@YAPEAXPEAVScriptFunction@1@HNMT__m128@@@Z : PROC
+
+//extrn ?GetArgsSizesArray@Js@@YAPEAIPEAVScriptFunction@1@@Z : PROC
+
+// int Js::AsmJsExternalEntryPoint(RecyclableObject* entryObject, CallInfo callInfo, ...)// 
+.balign 16
+NESTED_ENTRY _ZN2Js23AsmJsExternalEntryPointEPNS_16RecyclableObjectENS_8CallInfoEz, _TEXT, NoHandler        
+        push_nonvol_reg rbp             // push rbp and adjust CFA offset
+        lea  rbp, [rsp]
+
+        set_cfa_register rbp, (2*8)     // Set to compute CFA as: rbp + 16 (sizeof: [rbp] [ReturnAddress])
+
+        sub rsp, 40h
+
+        mov [rsp + 28h], r12
+        mov [rsp + 30h], r13
+
+        mov r12, rdi // r12: entryObject
+        mov r13, rsi // r13: callInfo
+
+        // allocate stack space for unboxed values
+        // int GetStackSizeForAsmJsUnboxing(ScriptFunction* func)
+        call _ZN2Js28GetStackSizeForAsmJsUnboxingEPNS_14ScriptFunctionE
+
+        mov rcx, r13 // arg4: callInfo
+        mov rsi, rsp // arg2: orig stack pointer is arg for the unboxing helper
+        mov r13, rsi // r13: save orig stack pointer, so that we can add it back later
+        add rsi, 68h // account for the changes we have already made to rsp
+
+        sub rsp, rax // allocate additional stack space for args
+        // UnboxAsmJsArguments(func, origArgsLoc, argDst, callInfo)
+        mov rdi, r12 // arg1: func
+        mov rdx, rsp // arg3: argDst
+
+        // unboxing function also does stack probe
+        call _ZN2Js19UnboxAsmJsArgumentsEPNS_14ScriptFunctionEPPvPcNS_8CallInfoE
+        // rax = target function address
+
+        // move first 4 arguments into registers.
+        // don't know types other than arg0 (which is ScriptFunction *), so put in both xmm and general purpose registers
+        mov rdi, r12 // arg0: func
+
+        // int GetArgsSizesArray(ScriptFunction* func)
+        // get args sizes of target asmjs function
+        // rdi has ScriptFunction*
+        push r13
+        push rax
+        push rdi
+        sub rsp, 8h
+        call _ZN2Js17GetArgsSizesArrayEPNS_14ScriptFunctionE
+        mov r13, rax                    // r13: arg size
+        add rsp, 8h
+        pop rdi
+        pop rax
+
+        // NOTE: Below xmm usage is non-standard.
+
+        // Move 3 args to regs per convention. rdi already has first arg: ScriptFunction*
+        push r12
+        // r12->unboxed args
+        lea r12, [rsp + 18h] // rsp + size of(r12 + r13 + ScriptFunction*)
+
+        // r13 is arg size
+        cmp dword ptr [r13], 10h
+        je SIMDArg2
+        mov rsi, [r12]                  // arg1
+        movq xmm1, qword ptr [r12]      // arg1
+        add r12, 8h
+        jmp Arg3
+    SIMDArg2:
+        movups xmm1, xmmword ptr[r12]
+        add r12, 10h
+    Arg3:
+        cmp dword ptr [r13 + 4h], 10h
+        je SIMDArg3
+        mov rdx, [r12]                  // arg2
+        movq xmm2, qword ptr [r12]      // arg2
+        add r12, 8h
+        jmp Arg4
+    SIMDArg3:
+        movups xmm2, xmmword ptr[r12]
+        add r12, 10h
+    Arg4:
+        cmp dword ptr [r13 + 8h], 10h
+        je SIMDArg4
+        mov rcx, [r12]                  // arg3
+        movq xmm3, qword ptr [r12]      // arg3
+        jmp ArgsDone
+   SIMDArg4:
+        movups xmm3, xmmword ptr [r12]
+
+   ArgsDone:
+        pop r12         // r12: func
+        pop r13         // r13: orig stack pointer
+
+        // "home" arg0. other args were read from stack and already homed. 
+        mov [rsp + 00h], rdi
+
+        // call entry point
+        call rax
+
+        // Var BoxAsmJsReturnValue(ScriptFunction* func, int intRetVal, double doubleRetVal, float floatRetVal)
+        mov rdi, r12            // arg0: func
+        mov rsi, rax            // arg1: intRetVal
+        // movsd xmm0, xmm0     // arg2: doubleRetVal
+        movss xmm1, xmm0        // arg3: floatRetVal
+
+
+        // store SIMD xmm value and pointer to it as argument to box function
+        sub rsp, 20h
+        movups [rsp + 10h], xmm0
+        lea r12, [rsp + 10h]
+        mov qword ptr [rsp], r12
+        call _ZN2Js19BoxAsmJsReturnValueEPNS_14ScriptFunctionEidfDv4_f
+
+        mov rsp, r13 // restore stack pointer
+    Epilogue:
+        mov r12, [rsp + 28h]
+        mov r13, [rsp + 30h]
+
+        lea  rsp, [rbp]
+        pop_nonvol_reg rbp
+
+        ret
+NESTED_END _ZN2Js23AsmJsExternalEntryPointEPNS_16RecyclableObjectENS_8CallInfoEz, _TEXT
+
+#endif // _ENABLE_DYNAMIC_THUNKS

+ 1 - 1
lib/Runtime/Library/JavascriptFunction.cpp

@@ -944,7 +944,7 @@ namespace Js
     {
         PROBE_STACK(function->GetScriptContext(), Js::Constants::MinStackDefault);
 
-        RUNTIME_ARGUMENTS(args, callInfo);
+        RUNTIME_ARGUMENTS(args, spreadIndices, function, callInfo);
 
         return JavascriptFunction::CallSpreadFunction(function, function->GetEntryPoint(), args, spreadIndices);
     }

+ 170 - 2
lib/Runtime/Library/amd64/JavascriptFunctionA.S

@@ -107,6 +107,169 @@ LOCAL_LABEL(function_done):
 NESTED_END amd64_CallFunction, _TEXT
 
 
+//------------------------------------------------------------------------------
+#ifdef _ENABLE_ASM_JS
+
+//extrn ?GetStackSizeForAsmJsUnboxing@Js@@YAHPEAVScriptFunction@1@@Z: PROC
+//extrn ?GetArgsSizesArray@Js@@YAPEAIPEAVScriptFunction@1@@Z : PROC
+
+// float CallAsmJsFunction<float>(RecyclableObject *function, JavascriptMethod entryPoint, uint argc, Var *argv);
+.balign 16
+LEAF_ENTRY _ZN2Js18JavascriptFunction17CallAsmJsFunctionIfEET_PNS_16RecyclableObjectEPFPvS4_NS_8CallInfoEzEjPS5_, _TEXT
+    jmp _ZN2Js18JavascriptFunction17CallAsmJsFunctionIiEET_PNS_16RecyclableObjectEPFPvS4_NS_8CallInfoEzEjPS5_
+LEAF_END _ZN2Js18JavascriptFunction17CallAsmJsFunctionIfEET_PNS_16RecyclableObjectEPFPvS4_NS_8CallInfoEzEjPS5_, _TEXT
+
+// double CallAsmJsFunction<double>(RecyclableObject *function, JavascriptMethod entryPoint, uint argc, Var *argv);
+.balign 16
+LEAF_ENTRY _ZN2Js18JavascriptFunction17CallAsmJsFunctionIdEET_PNS_16RecyclableObjectEPFPvS4_NS_8CallInfoEzEjPS5_, _TEXT
+    jmp _ZN2Js18JavascriptFunction17CallAsmJsFunctionIiEET_PNS_16RecyclableObjectEPFPvS4_NS_8CallInfoEzEjPS5_
+LEAF_END _ZN2Js18JavascriptFunction17CallAsmJsFunctionIdEET_PNS_16RecyclableObjectEPFPvS4_NS_8CallInfoEzEjPS5_, _TEXT
+
+// __m128 JavascriptFunction::CallAsmJsFunction(RecyclableObject * function, void* entryPoint, uint argc, Var * argv);
+//.balign 16
+//??$CallAsmJsFunction@T__m128@@@JavascriptFunction@Js@@SA?AT__m128@@PEAVRecyclableObject@1@PEAXIPEAPEAX@Z PROC FRAME
+//    jmp _ZN2Js18JavascriptFunction17CallAsmJsFunctionIiEET_PNS_16RecyclableObjectEPFPvS4_NS_8CallInfoEzEjPS5_
+//??$CallAsmJsFunction@T__m128@@@JavascriptFunction@Js@@SA?AT__m128@@PEAVRecyclableObject@1@PEAXIPEAPEAX@Z ENDP
+
+
+// int CallAsmJsFunction<int>(RecyclableObject *function, JavascriptMethod entryPoint, uint argc, Var *argv);
+.balign 16
+NESTED_ENTRY _ZN2Js18JavascriptFunction17CallAsmJsFunctionIiEET_PNS_16RecyclableObjectEPFPvS4_NS_8CallInfoEzEjPS5_, _TEXT, NoHandler
+
+        // save these to stack for interpreter
+        mov qword ptr [rsp + 8h],  rcx
+        mov qword ptr [rsp + 10h], rdx
+        mov qword ptr [rsp + 18h], r8
+        mov qword ptr [rsp + 20h], r9
+
+        push rbx
+        push rsi
+        push rdi
+        push r12
+        push r13
+        push rbp
+        lea rbp, [rsp]
+
+        // The first 4 QWORD args are passed in rcx, rdx/xmm1, r8/xmm2 and r9/xmm3,
+        // upon entry rcx contains function *.
+        sub rsp, 8h
+
+
+
+        // rbx = argc
+        mov rbx, r8
+
+        // save entry point into rax.
+        mov rax, rdx
+
+        xor r10d, r10d
+
+        // rsi = argv
+        mov rsi, r9
+        add rsi, 8h
+
+        // get map of args sizes for this function
+        push rax
+        push rcx
+        sub rsp, 20h
+        call _ZN2Js17GetArgsSizesArrayEPNS_14ScriptFunctionE
+        mov r12, rax
+        add rsp, 20h
+        pop rcx
+        pop rax
+
+        // int GetStackSizeForAsmJsUnboxing(ScriptFunction* func)
+        // This will return 0x20 bytes if size is below minimum. Includes space for function*.
+        push rax
+        push rcx
+        sub rsp, 20h
+        call _ZN2Js28GetStackSizeForAsmJsUnboxingEPNS_14ScriptFunctionE
+        mov r13, rax
+        add rsp, 20h
+        pop rcx
+        pop rax
+
+setup_stack_and_reg_args:
+
+        // OP_CallAsmInternal checks stack space
+
+stack_alloc:
+        sub  rsp, r13
+
+        // copy all args to the new stack frame.
+        lea r11, [rsi]
+        lea r10, [rsp + 8] // copy after ScriptFunction*
+copy_stack_args:
+        mov rdi, qword ptr [r11]
+        mov qword ptr [r10], rdi
+        add r11, 8
+        add r10, 8
+        sub r13, 8
+        cmp r13, 0
+        jg copy_stack_args
+
+        // r12 points to arg size map
+setup_reg_args_1:
+        lea r11, [rsi]
+        // argc < 1 ?
+        cmp rbx, 1h
+        jl setup_args_done
+        cmp dword ptr[r12], 10h
+        je SIMDArg1
+        mov rdx, qword ptr [r11]
+        movq xmm1, qword ptr [r11]
+        add r11, 8h
+        jmp setup_reg_args_2
+SIMDArg1:
+        movups xmm1, xmmword ptr [r11]
+        add r11, 10h
+
+setup_reg_args_2:
+        // argc < 2 ?
+        cmp rbx, 2h
+        jl setup_args_done
+
+        add r12, 4
+        cmp dword ptr[r12], 10h
+        je SIMDArg2
+        mov r8, qword ptr [r11]
+        movq xmm2, qword ptr [r11]
+        add r11, 8h
+        jmp setup_reg_args_3
+SIMDArg2:
+        movups xmm2, xmmword ptr [r11]
+        add r11, 10h
+
+setup_reg_args_3:
+        // argc < 3 ?
+        cmp rbx, 3h
+        jl setup_args_done
+        add r12, 4
+        cmp dword ptr[r12], 10h
+        je SIMDArg3
+        mov r9, qword ptr [r11]
+        movq xmm3, qword ptr [r11]
+SIMDArg3:
+        movups xmm3, xmmword ptr [r11]
+
+setup_args_done:
+        call rax
+done:
+        lea rsp, [rbp]
+        pop rbp
+        pop r13
+        pop r12
+        pop rdi
+        pop rsi
+        pop rbx
+        ret
+
+NESTED_END _ZN2Js18JavascriptFunction17CallAsmJsFunctionIiEET_PNS_16RecyclableObjectEPFPvS4_NS_8CallInfoEzEjPS5_, _TEXT
+
+#endif // _ENABLE_ASM_JS
+
+
+//------------------------------------------------------------------------------
 .balign 16
 NESTED_ENTRY _ZN2Js18JavascriptFunction20DeferredParsingThunkEPNS_16RecyclableObjectENS_8CallInfoEz, _TEXT, NoHandler
         push_nonvol_reg rbp
@@ -124,13 +287,18 @@ NESTED_ENTRY _ZN2Js18JavascriptFunction20DeferredParsingThunkEPNS_16RecyclableOb
 
         pop_register rsi
         pop_register rdi
-        pop_nonvol_reg rbp
 
+        mov rdi, qword ptr [rbp + 10h]  // re-load function, might have been changed by DeferredParse.
+                                        // e.g. StackScriptFunction is Boxed
+                                        // previous push/pop rdi is for stack alignment
+
+        pop_nonvol_reg rbp
         jmp rax
 
 NESTED_END _ZN2Js18JavascriptFunction20DeferredParsingThunkEPNS_16RecyclableObjectENS_8CallInfoEz, _TEXT
 
 
+//------------------------------------------------------------------------------
 // Var JavascriptFunction::DeferredDeserializeThunk(
 //              RecyclableObject* function, CallInfo callInfo, ...)
 .balign 16
@@ -151,8 +319,8 @@ NESTED_ENTRY _ZN2Js18JavascriptFunction24DeferredDeserializeThunkEPNS_16Recyclab
 
         pop_register rsi
         pop_register rdi
-        pop_nonvol_reg rbp
 
+        pop_nonvol_reg rbp
         jmp rax
 
 NESTED_END _ZN2Js18JavascriptFunction24DeferredDeserializeThunkEPNS_16RecyclableObjectENS_8CallInfoEz, _TEXT