Prechádzať zdrojové kódy

oop jit perf improvements

Michael Holman 9 rokov pred
rodič
commit
4a741ad357

+ 18 - 1
lib/Backend/Func.cpp

@@ -99,6 +99,7 @@ Func::Func(JitArenaAllocator *alloc, JITTimeWorkItem * workItem,
     isGetterSetter(isGetterSetter),
     frameInfo(nullptr),
     isTJLoopBody(false),
+    m_nativeCodeDataSym(nullptr),
     isFlowGraphValid(false),
 #if DBG
     m_callSiteCount(0),
@@ -513,7 +514,6 @@ Func::TryCodegen()
 #endif /* IR_VIEWER */
 
     }
-    END_CODEGEN_PHASE(this, Js::BackEndPhase);
 
 #if DBG_DUMP
     if (Js::Configuration::Global.flags.IsEnabled(Js::AsmDumpModeFlag))
@@ -538,6 +538,7 @@ Func::TryCodegen()
 #endif
     if (this->IsOOPJIT())
     {
+        BEGIN_CODEGEN_PHASE(this, Js::NativeCodeDataPhase);
 
         auto dataAllocator = this->GetNativeCodeDataAllocator();
         if (dataAllocator->allocCount > 0)
@@ -604,8 +605,10 @@ Func::TryCodegen()
             }
 #endif
         }
+        END_CODEGEN_PHASE(this, Js::NativeCodeDataPhase);
     }
 
+    END_CODEGEN_PHASE(this, Js::BackEndPhase);
 }
 
 ///----------------------------------------------------------------------------
@@ -1578,6 +1581,20 @@ Func::SetScopeObjSym(StackSym * sym)
     stackArgWithFormalsTracker->SetScopeObjSym(sym);
 }
 
+StackSym *
+Func::GetNativeCodeDataSym() const
+{
+    Assert(IsOOPJIT());
+    return m_nativeCodeDataSym;
+}
+
+void
+Func::SetNativeCodeDataSym(StackSym * opnd)
+{
+    Assert(IsOOPJIT());
+    m_nativeCodeDataSym = opnd;
+}
+
 StackSym* 
 Func::GetScopeObjSym()
 {

+ 3 - 1
lib/Backend/Func.h

@@ -902,6 +902,8 @@ public:
 
     IR::LabelInstr *    m_bailOutNoSaveLabel;
 
+    StackSym * GetNativeCodeDataSym() const;
+    void SetNativeCodeDataSym(StackSym * sym);
 private:
 
     Js::EntryPointInfo* m_entryPointInfo; // for in-proc JIT only
@@ -940,7 +942,7 @@ private:
     IR::SymOpnd *GetInlineeOpndAtOffset(int32 offset);
     bool HasLocalVarSlotCreated() const { return m_localVarSlotsOffset != Js::Constants::InvalidOffset; }
     void EnsureLocalVarSlots();
-
+    StackSym * m_nativeCodeDataSym;
     SList<IR::RegOpnd *> constantAddressRegOpnd;
     IR::Instr * lastConstantAddressRegLoadInstr;
     bool canHoistConstantAddressLoad;

+ 1 - 1
lib/Backend/FunctionJITTimeInfo.cpp

@@ -112,7 +112,7 @@ FunctionJITTimeInfo::BuildJITTimeData(
                 }
             }
         }
-        if (codeGenData->GetGlobalObjTypeSpecFldInfoCount() > 0)
+        if (!isInlinee && codeGenData->GetGlobalObjTypeSpecFldInfoCount() > 0)
         {
             Js::ObjTypeSpecFldInfo ** globObjTypeSpecInfo = codeGenData->GetGlobalObjTypeSpecFldInfoArray();
             Assert(globObjTypeSpecInfo != nullptr);

+ 14 - 0
lib/Backend/IRBuilder.cpp

@@ -476,6 +476,8 @@ IRBuilder::Build()
 
     m_switchBuilder.Init(m_func, m_tempAlloc, false);
 
+    this->LoadNativeCodeData();
+
     this->BuildConstantLoads();
     this->BuildGeneratorPreamble();
 
@@ -1425,6 +1427,18 @@ IRBuilder::BuildGeneratorPreamble()
     this->AddInstr(labelInstr, Js::Constants::NoByteCodeOffset);
 }
 
+void
+IRBuilder::LoadNativeCodeData()
+{
+    if (m_func->IsOOPJIT() && m_func->IsTopFunc())
+    {
+        IR::RegOpnd * nativeDataOpnd = IR::RegOpnd::New(TyVar, m_func);
+        IR::Instr * instr = IR::Instr::New(Js::OpCode::LdNativeCodeData, nativeDataOpnd, m_func);
+        this->AddInstr(instr, Js::Constants::NoByteCodeOffset);
+        m_func->SetNativeCodeDataSym(nativeDataOpnd->GetStackSym());
+    }
+}
+
 void
 IRBuilder::BuildConstantLoads()
 {

+ 2 - 1
lib/Backend/IRBuilder.h

@@ -90,7 +90,7 @@ public:
         }
 
         // Note: use original byte code without debugging probes, so that we don't jit BPs inserted by the user.
-        func->m_workItem->InitializeReader(&m_jnReader, &m_statementReader, func->m_alloc, func->GetThreadContextInfo());
+        func->m_workItem->InitializeReader(&m_jnReader, &m_statementReader, func->m_alloc);
     };
 
     ~IRBuilder() {
@@ -114,6 +114,7 @@ private:
 #endif
     BranchReloc *       CreateRelocRecord(IR::BranchInstr * branchInstr, uint32 offset, uint32 targetOffset);
     void                BuildGeneratorPreamble();
+    void                LoadNativeCodeData();
     void                BuildConstantLoads();
     void                BuildImplicitArgIns();
 

+ 15 - 0
lib/Backend/IRBuilderAsmJs.cpp

@@ -125,6 +125,8 @@ IRBuilderAsmJs::Build()
 #endif
     m_offsetToInstruction = JitAnewArrayZ(m_tempAlloc, IR::Instr *, offsetToInstructionCount);
 
+    LoadNativeCodeData();
+
     BuildConstantLoads();
     if (!this->IsLoopBody() && m_func->GetJITFunctionBody()->HasImplicitArgIns())
     {
@@ -207,6 +209,19 @@ IRBuilderAsmJs::Build()
     }
 }
 
+void
+IRBuilderAsmJs::LoadNativeCodeData()
+{
+    Assert(m_func->IsTopFunc());
+    if (m_func->IsOOPJIT())
+    {
+        IR::RegOpnd * nativeDataOpnd = IR::RegOpnd::New(TyVar, m_func);
+        IR::Instr * instr = IR::Instr::New(Js::OpCode::LdNativeCodeData, nativeDataOpnd, m_func);
+        this->AddInstr(instr, Js::Constants::NoByteCodeOffset);
+        m_func->SetNativeCodeDataSym(nativeDataOpnd->GetStackSym());
+    }
+}
+
 void
 IRBuilderAsmJs::AddInstr(IR::Instr * instr, uint32 offset)
 {

+ 2 - 1
lib/Backend/IRBuilderAsmJs.h

@@ -29,7 +29,7 @@ public:
         , m_switchAdapter(this)
         , m_switchBuilder(&m_switchAdapter)
     {
-        func->m_workItem->InitializeReader(&m_jnReader, &m_statementReader, func->m_alloc, func->GetThreadContextInfo());
+        func->m_workItem->InitializeReader(&m_jnReader, &m_statementReader, func->m_alloc);
         m_asmFuncInfo = m_func->GetJITFunctionBody()->GetAsmJsInfo();
 #if 0
         // templatized JIT loop body
@@ -49,6 +49,7 @@ public:
 
 private:
 
+    void                    LoadNativeCodeData();
     void                    AddInstr(IR::Instr * instr, uint32 offset);
     bool                    IsLoopBody()const;
     uint                    GetLoopBodyExitInstrOffset() const;

+ 4 - 4
lib/Backend/JITTimeFunctionBody.cpp

@@ -22,7 +22,7 @@ JITTimeFunctionBody::InitializeJITFunctionData(
 
     // bytecode
     jitBody->byteCodeLength = functionBody->GetByteCode()->GetLength();
-    jitBody->byteCodeBufferAddr = (intptr_t)functionBody->GetByteCode()->GetBuffer();
+    jitBody->byteCodeBuffer = functionBody->GetByteCode()->GetBuffer();
 
     // const table
     jitBody->constCount = functionBody->GetConstantCount();
@@ -707,10 +707,10 @@ JITTimeFunctionBody::NeedScopeObjectForArguments(bool hasNonSimpleParams) const
         && !dontNeedScopeObject;
 }
 
-intptr_t
-JITTimeFunctionBody::GetByteCodeBufferAddr() const
+const byte *
+JITTimeFunctionBody::GetByteCodeBuffer() const
 {
-    return m_bodyData.byteCodeBufferAddr;
+    return m_bodyData.byteCodeBuffer;
 }
 
 intptr_t

+ 1 - 1
lib/Backend/JITTimeFunctionBody.h

@@ -93,7 +93,7 @@ public:
     bool CanInlineRecursively(uint depth, bool tryAggressive = true) const;
     bool NeedScopeObjectForArguments(bool hasNonSimpleParams) const;
 
-    intptr_t GetByteCodeBufferAddr() const;
+    const byte * GetByteCodeBuffer() const;
 
     void * ReadFromAuxData(uint offset) const;
     void * ReadFromAuxContextData(uint offset) const;

+ 5 - 23
lib/Backend/JITTimeWorkItem.cpp

@@ -6,7 +6,7 @@
 #include "Backend.h"
 
 JITTimeWorkItem::JITTimeWorkItem(CodeGenWorkItemIDL * workItemData) :
-    m_workItemData(workItemData), m_jitBody(workItemData->jitData->bodyData), m_bytecodeBuffer(nullptr)
+    m_workItemData(workItemData), m_jitBody(workItemData->jitData->bodyData)
 {
 }
 
@@ -76,34 +76,16 @@ JITTimeWorkItem::GetLoopHeaderAddr() const
 void
 JITTimeWorkItem::InitializeReader(
     Js::ByteCodeReader * reader,
-    Js::StatementReader * statementReader, ArenaAllocator* alloc, ThreadContextInfo * context)
+    Js::StatementReader * statementReader, ArenaAllocator* alloc)
 {
-    if (m_bytecodeBuffer == nullptr)
-    {
-        if (JITManager::GetJITManager()->IsOOPJITEnabled())
-        {
-            m_bytecodeBuffer = AnewArray(alloc, byte, m_jitBody.GetByteCodeLength());
-            size_t bytesRead;
-            ReadProcessMemory(context->GetProcessHandle(), (byte*)m_jitBody.GetByteCodeBufferAddr(), m_bytecodeBuffer, m_jitBody.GetByteCodeLength(), &bytesRead);
-            if (bytesRead != m_jitBody.GetByteCodeLength())
-            {
-                Js::Throw::FatalInternalError();
-            }
-            m_jitBody.GetByteCodeBufferAddr();
-        }
-        else
-        {
-            m_bytecodeBuffer = (byte*)m_jitBody.GetByteCodeBufferAddr();
-        }
-    }
     uint startOffset = IsLoopBody() ? GetLoopHeader()->startOffset : 0;
 #if DBG
-    reader->Create(m_bytecodeBuffer, startOffset, m_jitBody.GetByteCodeLength());
+    reader->Create(m_jitBody.GetByteCodeBuffer(), startOffset, m_jitBody.GetByteCodeLength());
 #else
-    reader->Create(m_bytecodeBuffer, startOffset);
+    reader->Create(m_jitBody.GetByteCodeBuffer(), startOffset);
 #endif
     m_jitBody.InitializeStatementMap(&m_statementMap, alloc);
-    statementReader->Create(m_bytecodeBuffer, startOffset, &m_statementMap);
+    statementReader->Create(m_jitBody.GetByteCodeBuffer(), startOffset, &m_statementMap);
 }
 
 JITTimeFunctionBody *

+ 1 - 2
lib/Backend/JITTimeWorkItem.h

@@ -26,7 +26,7 @@ public:
 
     void InitializeReader(
         Js::ByteCodeReader * reader,
-        Js::StatementReader * statementReader, ArenaAllocator* alloc, ThreadContextInfo * context);
+        Js::StatementReader * statementReader, ArenaAllocator* alloc);
 
     JITTimeFunctionBody * GetJITFunctionBody();
 
@@ -43,7 +43,6 @@ public:
 
 private:
     CodeGenWorkItemIDL * m_workItemData;
-    byte * m_bytecodeBuffer;
     JITTimeFunctionBody m_jitBody;
     Js::SmallSpanSequence m_statementMap;
 

+ 33 - 35
lib/Backend/Lower.cpp

@@ -1094,6 +1094,10 @@ Lowerer::LowerRange(IR::Instr *instrStart, IR::Instr *instrEnd, bool defaultDoFa
             break;
         }
 
+        case Js::OpCode::LdNativeCodeData:
+            Assert(m_func->IsOOPJIT());
+            instrPrev = LowerLdNativeCodeData(instr);
+            break;
         case Js::OpCode::StrictLdThis:
             if (noFieldFastPath)
             {
@@ -6785,18 +6789,13 @@ Lowerer::GenerateCachedTypeCheck(IR::Instr *instrChk, IR::PropertySymOpnd *prope
 
         if (this->m_func->IsOOPJIT())
         {
-            auto regNativeCodeData = IR::RegOpnd::New(TyMachPtr, func);
-            Lowerer::InsertMove(
-                regNativeCodeData,
-                IR::MemRefOpnd::New((void*)func->GetWorkItem()->GetWorkItemData()->nativeDataAddr, TyMachPtr, func, IR::AddrOpndKindDynamicNativeCodeDataRef),
-                instrChk);
-
             int typeCheckGuardOffset = NativeCodeData::GetDataTotalOffset(typeCheckGuard);
-            expectedTypeOpnd = IR::IndirOpnd::New(regNativeCodeData, typeCheckGuardOffset, TyMachPtr,
+            expectedTypeOpnd = IR::IndirOpnd::New(IR::RegOpnd::New(func->GetTopFunc()->GetNativeCodeDataSym(), TyVar, m_func), typeCheckGuardOffset, TyMachPtr,
 #if DBG
                 NativeCodeData::GetDataDescription(typeCheckGuard, func->m_alloc),
 #endif
                 func);
+            this->addToLiveOnBackEdgeSyms->Set(func->GetTopFunc()->GetNativeCodeDataSym()->m_id);
         }
         else
         {
@@ -6856,20 +6855,18 @@ Lowerer::GenerateCachedTypeCheck(IR::Instr *instrChk, IR::PropertySymOpnd *prope
         if (this->m_func->IsOOPJIT())
         {
             typeCheckGuardOpnd = IR::RegOpnd::New(TyMachPtr, func);
-            Lowerer::InsertMove(
-                typeCheckGuardOpnd,
-                IR::MemRefOpnd::New((void*)func->GetWorkItem()->GetWorkItemData()->nativeDataAddr, TyMachPtr, func, IR::AddrOpndKindDynamicNativeCodeDataRef),
-                instrChk);
 
             int typeCheckGuardOffset = NativeCodeData::GetDataTotalOffset(typeCheckGuard);
             Lowerer::InsertLea(
                 typeCheckGuardOpnd->AsRegOpnd(),
-                IR::IndirOpnd::New(typeCheckGuardOpnd->AsRegOpnd(), typeCheckGuardOffset, TyMachPtr,
+                IR::IndirOpnd::New(IR::RegOpnd::New(func->GetTopFunc()->GetNativeCodeDataSym(), TyVar, m_func), typeCheckGuardOffset, TyMachPtr,
 #if DBG
                     NativeCodeData::GetDataDescription(typeCheckGuard, func->m_alloc),
 #endif
                     func, true),
                 instrChk);
+
+            this->addToLiveOnBackEdgeSyms->Set(func->GetTopFunc()->GetNativeCodeDataSym()->m_id);
         }
         else
         {
@@ -6964,18 +6961,14 @@ Lowerer::GenerateCachedTypeWithoutPropertyCheck(IR::Instr *instrInsert, IR::Prop
 
         if (this->m_func->IsOOPJIT())
         {
-            auto regNativeCodeData = IR::RegOpnd::New(TyMachPtr, this->m_func);
-            Lowerer::InsertMove(
-                regNativeCodeData,
-                IR::MemRefOpnd::New((void*)this->m_func->GetWorkItem()->GetWorkItemData()->nativeDataAddr, TyMachPtr, this->m_func, IR::AddrOpndKindDynamicNativeCodeDataRef),
-                instrInsert);
-
             int typeCheckGuardOffset = NativeCodeData::GetDataTotalOffset(typePropertyGuard);
-            expectedTypeOpnd = IR::IndirOpnd::New(regNativeCodeData, typeCheckGuardOffset, TyMachPtr,
+            expectedTypeOpnd = IR::IndirOpnd::New(IR::RegOpnd::New(m_func->GetTopFunc()->GetNativeCodeDataSym(), TyVar, m_func), typeCheckGuardOffset, TyMachPtr,
 #if DBG
                 NativeCodeData::GetDataDescription(typePropertyGuard, this->m_func->m_alloc),
 #endif
                 this->m_func);
+
+            this->addToLiveOnBackEdgeSyms->Set(m_func->GetTopFunc()->GetNativeCodeDataSym()->m_id);
         }
         else
         {
@@ -8827,18 +8820,15 @@ IR::Instr* Lowerer::LowerMultiBr(IR::Instr * instr, IR::JnHelperMethod helperMet
         auto dictionaryOffset = NativeCodeData::GetDataTotalOffset(dictionary);
         auto addressRegOpnd = IR::RegOpnd::New(TyMachPtr, m_func);
 
-        Lowerer::InsertMove(
-            addressRegOpnd,
-            IR::MemRefOpnd::New((void*)m_func->GetWorkItem()->GetWorkItemData()->nativeDataAddr, TyMachPtr, m_func, IR::AddrOpndKindDynamicNativeCodeDataRef),
-            instr);
-
         Lowerer::InsertLea(addressRegOpnd,
-            IR::IndirOpnd::New(addressRegOpnd, dictionaryOffset, TyMachPtr,
+            IR::IndirOpnd::New(IR::RegOpnd::New(m_func->GetTopFunc()->GetNativeCodeDataSym(), TyVar, m_func), dictionaryOffset, TyMachPtr,
 #if DBG
                 NativeCodeData::GetDataDescription(dictionary, this->m_func->m_alloc),
 #endif
                 this->m_func), instr);
 
+        this->addToLiveOnBackEdgeSyms->Set(m_func->GetTopFunc()->GetNativeCodeDataSym()->m_id);
+
         m_lowererMD.LoadHelperArgument(instr, addressRegOpnd);
     }
     else
@@ -12302,22 +12292,17 @@ Lowerer::GenerateBailOut(IR::Instr * instr, IR::BranchInstr * branchInstr, IR::L
         IR::Opnd * indexOpndForBailOutKind = nullptr;
 
         int bailOutRecordOffset = 0;
-        IR::RegOpnd* addressRegOpnd = nullptr;
         if (this->m_func->IsOOPJIT())
         {
             bailOutRecordOffset = NativeCodeData::GetDataTotalOffset(bailOutInfo->bailOutRecord);
-            addressRegOpnd = IR::RegOpnd::New(TyMachPtr, m_func);
-
-            Lowerer::InsertMove(
-                addressRegOpnd,
-                IR::MemRefOpnd::New((void*)m_func->GetWorkItem()->GetWorkItemData()->nativeDataAddr, TyMachPtr, m_func, IR::AddrOpndKindDynamicNativeCodeDataRef),
-                instr);
 
-            indexOpndForBailOutKind = IR::IndirOpnd::New(addressRegOpnd, (int)(bailOutRecordOffset + BailOutRecord::GetOffsetOfBailOutKind()), TyUint32,
+            indexOpndForBailOutKind = IR::IndirOpnd::New(IR::RegOpnd::New(m_func->GetTopFunc()->GetNativeCodeDataSym(), TyVar, m_func), (int)(bailOutRecordOffset + BailOutRecord::GetOffsetOfBailOutKind()), TyUint32,
 #if DBG
                 NativeCodeData::GetDataDescription(bailOutInfo->bailOutRecord, this->m_func->m_alloc),
 #endif
                 m_func);
+
+            this->addToLiveOnBackEdgeSyms->Set(m_func->GetTopFunc()->GetNativeCodeDataSym()->m_id);
         }
         else
         {
@@ -12340,7 +12325,7 @@ Lowerer::GenerateBailOut(IR::Instr * instr, IR::BranchInstr * branchInstr, IR::L
 
             if (this->m_func->IsOOPJIT())
             {
-                indexOpnd = IR::IndirOpnd::New(addressRegOpnd, (int)(bailOutRecordOffset + BailOutRecord::GetOffsetOfPolymorphicCacheIndex()), TyUint32, m_func);
+                indexOpnd = IR::IndirOpnd::New(IR::RegOpnd::New(m_func->GetTopFunc()->GetNativeCodeDataSym(), TyVar, m_func), (int)(bailOutRecordOffset + BailOutRecord::GetOffsetOfPolymorphicCacheIndex()), TyUint32, m_func);
             }
             else
             {
@@ -12356,7 +12341,7 @@ Lowerer::GenerateBailOut(IR::Instr * instr, IR::BranchInstr * branchInstr, IR::L
             IR::Opnd *functionBodyOpnd;
             if (this->m_func->IsOOPJIT())
             {
-                functionBodyOpnd = IR::IndirOpnd::New(addressRegOpnd, (int)(bailOutRecordOffset + SharedBailOutRecord::GetOffsetOfFunctionBody()), TyMachPtr, m_func);
+                functionBodyOpnd = IR::IndirOpnd::New(IR::RegOpnd::New(m_func->GetTopFunc()->GetNativeCodeDataSym(), TyVar, m_func), (int)(bailOutRecordOffset + SharedBailOutRecord::GetOffsetOfFunctionBody()), TyMachPtr, m_func);
             }
             else
             {
@@ -22393,6 +22378,19 @@ Lowerer::LowerLdAsmJsEnv(IR::Instr * instr)
     return instrPrev;
 }
 
+IR::Instr *
+Lowerer::LowerLdNativeCodeData(IR::Instr * instr)
+{
+    Assert(!instr->GetSrc1());
+    Assert(m_func->IsTopFunc());
+    IR::Instr * instrPrev = instr->m_prev;
+    instr->SetSrc1(IR::MemRefOpnd::New((void*)m_func->GetWorkItem()->GetWorkItemData()->nativeDataAddr, TyMachPtr, m_func, IR::AddrOpndKindDynamicNativeCodeDataRef));
+
+    LowererMD::ChangeToAssign(instr);
+
+    return instrPrev;
+}
+
 IR::Instr *
 Lowerer::LowerLdEnv(IR::Instr * instr)
 {

+ 1 - 0
lib/Backend/Lower.h

@@ -545,6 +545,7 @@ private:
     IR::Opnd *      LoadSlotArrayWithCachedProtoType(IR::Instr * instrInsert, IR::PropertySymOpnd *propertySymOpnd);
     IR::Instr *     LowerLdAsmJsEnv(IR::Instr *instr);
     IR::Instr *     LowerLdEnv(IR::Instr *instr);
+    IR::Instr *     LowerLdNativeCodeData(IR::Instr *instr);
     IR::Instr *     LowerFrameDisplayCheck(IR::Instr * instr);
     IR::Instr *     LowerSlotArrayCheck(IR::Instr * instr);
     void            InsertSlotArrayCheck(IR::Instr * instr, StackSym * dstSym, uint32 slotId);

+ 5 - 9
lib/Backend/LowerMDShared.cpp

@@ -6271,18 +6271,13 @@ LowererMD::EmitLoadFloatCommon(IR::Opnd *dst, IR::Opnd *src, IR::Instr *insertIn
         else
         {
             int offset = NativeCodeData::GetDataTotalOffset(pDouble);
-            IR::RegOpnd * addressRegOpnd = IR::RegOpnd::New(TyMachPtr, m_func);
-
-            Lowerer::InsertMove(
-                addressRegOpnd,
-                IR::MemRefOpnd::New((void*)m_func->GetWorkItem()->GetWorkItemData()->nativeDataAddr, TyMachPtr, m_func, IR::AddrOpndKindDynamicNativeCodeDataRef),
-                insertInstr);
-
-            doubleRef = IR::IndirOpnd::New(addressRegOpnd, offset, TyMachDouble,
+            doubleRef = IR::IndirOpnd::New(IR::RegOpnd::New(m_func->GetTopFunc()->GetNativeCodeDataSym(), TyVar, m_func), offset, TyMachDouble,
 #if DBG
                 NativeCodeData::GetDataDescription(pDouble, m_func->m_alloc),
 #endif
                 m_func);
+
+            GetLowerer()->addToLiveOnBackEdgeSyms->Set(m_func->GetTopFunc()->GetNativeCodeDataSym()->m_id);
         }
 #else
         IR::MemRefOpnd *doubleRef = IR::MemRefOpnd::New((BYTE*)value + Js::JavascriptNumber::GetValueOffset(), TyFloat64, this->m_func,
@@ -7100,11 +7095,12 @@ LowererMD::LoadFloatValue(IR::Opnd * opndDst, double value, IR::Instr * instrIns
             IR::MemRefOpnd::New((void*)instrInsert->m_func->GetWorkItem()->GetWorkItemData()->nativeDataAddr, TyMachPtr, instrInsert->m_func, IR::AddrOpndKindDynamicNativeCodeDataRef),
             instrInsert);
 
-        opnd = IR::IndirOpnd::New(addressRegOpnd, offset, isFloat64 ? TyMachDouble : TyFloat32, 
+        opnd = IR::IndirOpnd::New(addressRegOpnd, offset, isFloat64 ? TyMachDouble : TyFloat32,
 #if DBG
             NativeCodeData::GetDataDescription(pValue, instrInsert->m_func->m_alloc),
 #endif
             instrInsert->m_func);
+
         // movsd xmm, [reg+offset]
         auto instr = IR::Instr::New(LowererMDArch::GetAssignOp(opndDst->GetType()), opndDst, opnd, instrInsert->m_func);
         instrInsert->InsertBefore(instr);

+ 3 - 7
lib/Backend/LowerMDSharedSimd128.cpp

@@ -385,18 +385,14 @@ IR::Instr* LowererMD::Simd128LoadConst(IR::Instr* instr)
     else
     {
         int offset = NativeCodeData::GetDataTotalOffset(pValue);
-        IR::RegOpnd * addressRegOpnd = IR::RegOpnd::New(TyMachPtr, m_func);
 
-        Lowerer::InsertMove(
-            addressRegOpnd,
-            IR::MemRefOpnd::New((void*)m_func->GetWorkItem()->GetWorkItemData()->nativeDataAddr, TyMachPtr, m_func, IR::AddrOpndKindDynamicNativeCodeDataRef),
-            instr);
-
-        simdRef = IR::IndirOpnd::New(addressRegOpnd, offset, TyMachDouble,
+        simdRef = IR::IndirOpnd::New(IR::RegOpnd::New(m_func->GetTopFunc()->GetNativeCodeDataSym(), TyVar, m_func), offset, TyMachDouble,
 #if DBG
             NativeCodeData::GetDataDescription(pValue, m_func->m_alloc),
 #endif
             m_func);
+
+        GetLowerer()->addToLiveOnBackEdgeSyms->Set(m_func->GetTopFunc()->GetNativeCodeDataSym()->m_id);
     }
 
     instr->ReplaceSrc1(simdRef);

+ 21 - 1
lib/Backend/NativeCodeGenerator.cpp

@@ -923,9 +923,29 @@ NativeCodeGenerator::CodeGen(PageAllocator * pageAllocator, CodeGenWorkItem* wor
             scriptContext->GetRecycler());
         pNumberAllocator = &numberAllocator;
 #endif
+        Js::ScriptContextProfiler *const codeGenProfiler =
+#ifdef PROFILE_EXEC
+            foreground ? EnsureForegroundCodeGenProfiler() : GetBackgroundCodeGenProfiler(pageAllocator); // okay to do outside lock since the respective function is called only from one thread
+#else
+            nullptr;
+#endif
+
         Func::Codegen(&jitArena, jitWorkItem, scriptContext->GetThreadContext(),
             scriptContext, &jitWriteData, epInfo, nullptr, jitWorkItem->GetPolymorphicInlineCacheInfo(),
-            allocators, pNumberAllocator, nullptr, !foreground);
+            allocators, pNumberAllocator, codeGenProfiler, !foreground);
+    }
+    if (PHASE_TRACE1(Js::BackEndPhase))
+    {
+        LARGE_INTEGER freq;
+        LARGE_INTEGER end_time;
+        QueryPerformanceCounter(&end_time);
+        QueryPerformanceFrequency(&freq);
+
+        Output::Print(
+            L"BackendMarshalOut - function: %s time:%8.6f mSec\r\n",
+            workItem->GetFunctionBody()->GetDisplayName(),
+            (((double)((end_time.QuadPart - jitWriteData.startTime)* (double)1000.0 / (double)freq.QuadPart))) / (1));
+        Output::Flush();
     }
     NativeCodeGenerator::LogCodeGenDone(workItem, &start_time);
 

+ 27 - 1
lib/Backend/ServerScriptContext.cpp

@@ -9,8 +9,17 @@ ServerScriptContext::ServerScriptContext(ScriptContextDataIDL * contextData) :
     m_contextData(*contextData),
     m_isPRNGSeeded(false),
     m_moduleRecords(&HeapAllocator::Instance),
+#ifdef PROFILE_EXEC
+    m_codeGenProfiler(nullptr),
+#endif
     m_activeJITCount(0)
 {
+#ifdef PROFILE_EXEC
+    if (Js::Configuration::Global.flags.IsEnabled(Js::ProfileFlag))
+    {
+        m_codeGenProfiler = HeapNew(Js::ScriptContextProfiler);
+    }
+#endif
     m_domFastPathHelperMap = HeapNew(JITDOMFastPathHelperMap, &HeapAllocator::Instance, 17);
 }
 
@@ -21,6 +30,13 @@ ServerScriptContext::~ServerScriptContext()
     {
         HeapDelete(record);
     });
+
+#ifdef PROFILE_EXEC
+    if (m_codeGenProfiler)
+    {
+        HeapDelete(m_codeGenProfiler);
+    }
+#endif
 }
 
 intptr_t
@@ -280,4 +296,14 @@ ServerScriptContext::AddModuleRecordInfo(unsigned int moduleId, __int64 localExp
     record->moduleId = moduleId;
     record->localExportSlotsAddr = (Js::Var*)localExportSlotsAddr;
     m_moduleRecords.Add(moduleId, record);
-}
+}
+
+Js::ScriptContextProfiler *
+ServerScriptContext::GetCodeGenProfiler() const
+{
+#ifdef PROFILE_EXEC
+    return m_codeGenProfiler;
+#else
+    return nullptr;
+#endif
+}

+ 5 - 0
lib/Backend/ServerScriptContext.h

@@ -59,11 +59,16 @@ public:
 
     void AddModuleRecordInfo(unsigned int moduleId, __int64 localExportSlotsAddr);
 
+    Js::ScriptContextProfiler *  GetCodeGenProfiler() const;
+
     void BeginJIT();
     void EndJIT();
     bool IsJITActive();
 private:
     JITDOMFastPathHelperMap * m_domFastPathHelperMap;
+#ifdef PROFILE_EXEC
+    Js::ScriptContextProfiler * m_codeGenProfiler;
+#endif
 
     ScriptContextDataIDL m_contextData;
     uint m_activeJITCount;

+ 25 - 5
lib/Backend/ServerThreadContext.cpp

@@ -8,10 +8,7 @@
 ServerThreadContext::ServerThreadContext(ThreadContextDataIDL * data) :
     m_threadContextData(*data),
     m_policyManager(true),
-    m_pageAlloc(&m_policyManager, Js::Configuration::Global.flags, PageAllocatorType_BGJIT,
-        AutoSystemInfo::Data.IsLowMemoryProcess() ?
-        PageAllocator::DefaultLowMaxFreePageCount :
-        PageAllocator::DefaultMaxFreePageCount),
+    m_pageAllocs(&HeapAllocator::Instance),
     m_preReservedVirtualAllocator((HANDLE)data->processHandle),
     m_codePageAllocators(&m_policyManager, ALLOC_XDATA, &m_preReservedVirtualAllocator, (HANDLE)data->processHandle),
     m_codeGenAlloc(&m_policyManager, nullptr, &m_codePageAllocators, (HANDLE)data->processHandle),
@@ -34,6 +31,10 @@ ServerThreadContext::~ServerThreadContext()
         HeapDelete(m_propertyMap);
         this->m_propertyMap = nullptr;
     }
+    this->m_pageAllocs.Map([](DWORD thread, PageAllocator* alloc)
+    {
+        HeapDelete(alloc);
+    });
 }
 
 PreReservedVirtualAllocWrapper *
@@ -42,6 +43,26 @@ ServerThreadContext::GetPreReservedVirtualAllocator()
     return &m_preReservedVirtualAllocator;
 }
 
+PageAllocator*
+ServerThreadContext::GetPageAllocator()
+{
+    PageAllocator * alloc;
+
+    if (!m_pageAllocs.TryGetValue(GetCurrentThreadId(), &alloc))
+    {
+        alloc = HeapNew(PageAllocator,
+            &m_policyManager,
+            Js::Configuration::Global.flags, PageAllocatorType_BGJIT,
+            AutoSystemInfo::Data.IsLowMemoryProcess() ?
+            PageAllocator::DefaultLowMaxFreePageCount :
+            PageAllocator::DefaultMaxFreePageCount);
+
+        m_pageAllocs.Add(GetCurrentThreadId(), alloc);
+    }
+    return alloc;
+}
+
+
 intptr_t
 ServerThreadContext::GetBailOutRegisterSaveSpaceAddr() const
 {
@@ -177,7 +198,6 @@ ServerThreadContext::GetPropertyRecord(Js::PropertyId propertyId)
 void
 ServerThreadContext::AddToPropertyMap(const Js::PropertyRecord * origRecord)
 {
-
     size_t allocLength = origRecord->byteCount + sizeof(char16) + (origRecord->isNumeric ? sizeof(uint32) : 0);
     Js::PropertyRecord * record = HeapNewPlus(allocLength, Js::PropertyRecord, origRecord->byteCount, origRecord->isNumeric, origRecord->hash, origRecord->isSymbol);
     record->isBound = origRecord->isBound;

+ 2 - 2
lib/Backend/ServerThreadContext.h

@@ -39,7 +39,7 @@ public:
     CodeGenAllocators * GetCodeGenAllocators();
     AllocationPolicyManager * GetAllocationPolicyManager();
     CustomHeap::CodePageAllocators * GetCodePageAllocators();
-
+    PageAllocator* GetPageAllocator();
     void AddToPropertyMap(const Js::PropertyRecord * propertyRecord);
     void SetWellKnownHostTypeId(Js::TypeId typeId) { this->wellKnownHostTypeHTMLAllCollectionTypeId = typeId; }
 private:
@@ -51,7 +51,7 @@ private:
     PropertyMap * m_propertyMap;
 
     AllocationPolicyManager m_policyManager;
-    PageAllocator m_pageAlloc;
+    JsUtil::BaseDictionary<DWORD, PageAllocator*, HeapAllocator> m_pageAllocs;
     PreReservedVirtualAllocWrapper m_preReservedVirtualAllocator;
     CustomHeap::CodePageAllocators m_codePageAllocators;
     CodeGenAllocators m_codeGenAlloc;

+ 1 - 1
lib/Backend/arm/LowerMD.h

@@ -203,7 +203,7 @@ public:
             void                EmitLoadFloatFromNumber(IR::Opnd *dst, IR::Opnd *src, IR::Instr *insertInstr);
             IR::LabelInstr*     EmitLoadFloatCommon(IR::Opnd *dst, IR::Opnd *src, IR::Instr *insertInstr, bool needHelperLabel);
             static IR::Instr *  LoadFloatZero(IR::Opnd * opndDst, IR::Instr * instrInsert);
-            static IR::Instr *  LoadFloatValue(IR::Opnd * opndDst, double value, IR::Instr * instrInsert);
+            IR::Instr *  LoadFloatValue(IR::Opnd * opndDst, double value, IR::Instr * instrInsert);
 
             IR::Instr *         LowerEntryInstr(IR::EntryInstr * entryInstr);
             IR::Instr *         LowerExitInstr(IR::ExitInstr * exitInstr);

+ 2 - 1
lib/JITIDL/ChakraJIT.idl

@@ -522,7 +522,7 @@ typedef struct FunctionBodyDataIDL
 #endif
     SmallSpanSequenceIDL statementMap;
 
-    CHAKRA_PTR byteCodeBufferAddr;
+    [size_is(byteCodeLength)] byte * byteCodeBuffer;
 
     [size_is(constCount)] CHAKRA_PTR * constTable;
     ConstTableContentIDL * constTableContent;
@@ -795,6 +795,7 @@ typedef struct JITOutputIDL
 
     __int64 codeAddress;
     __int64 xdataAddr;
+    __int64 startTime;
 } JITOutputIDL;
 
 [

+ 44 - 8
lib/JITServer/JITServer.cpp

@@ -243,6 +243,14 @@ ServerCleanupScriptContext(
         return RPC_S_INVALID_ARG;
     }
 
+#ifdef PROFILE_EXEC
+    auto profiler = scriptContextInfo->GetCodeGenProfiler();
+    if (profiler && profiler->IsInitialized())
+    {
+        profiler->ProfilePrint(Js::Configuration::Global.flags.Profile.GetFirstPhase());
+    }
+#endif
+
     while (scriptContextInfo->IsJITActive()) { Sleep(30); }
     HeapDelete(scriptContextInfo);
     return S_OK;
@@ -322,12 +330,7 @@ ServerRemoteCodeGen(
         return RPC_S_INVALID_ARG;
     }
 
-    PageAllocator backgroundPageAllocator(threadContextInfo->GetAllocationPolicyManager(), Js::Configuration::Global.flags, PageAllocatorType_BGJIT,
-        (AutoSystemInfo::Data.IsLowMemoryProcess() ?
-            PageAllocator::DefaultLowMaxFreePageCount :
-            PageAllocator::DefaultMaxFreePageCount));
-
-    NoRecoverMemoryJitArenaAllocator jitArena(L"JITArena", &backgroundPageAllocator, Js::Throw::OutOfMemory);    
+    NoRecoverMemoryJitArenaAllocator jitArena(L"JITArena", threadContextInfo->GetPageAllocator(), Js::Throw::OutOfMemory);
 
     scriptContextInfo->BeginJIT(); // TODO: OOP JIT, improve how we do this
     threadContextInfo->BeginJIT();
@@ -342,20 +345,47 @@ ServerRemoteCodeGen(
         QueryPerformanceFrequency(&freq);
 
         Output::Print(
-            L"BackendMarshal - function: %s time:%8.6f mSec\r\n",
+            L"BackendMarshalIn - function: %s time:%8.6f mSec\r\n",
             jitWorkItem->GetJITFunctionBody()->GetDisplayName(),
             (((double)((end_time.QuadPart - workItemData->startTime)* (double)1000.0 / (double)freq.QuadPart))) / (1));
         Output::Flush();
     }
 
+    auto profiler = scriptContextInfo->GetCodeGenProfiler();
+#ifdef PROFILE_EXEC
+    if (profiler && !profiler->IsInitialized())
+    {
+        profiler->Initialize(threadContextInfo->GetPageAllocator(), nullptr);
+    }
+#endif
+
     jitData->numberPageSegments = (XProcNumberPageSegment*)midl_user_allocate(sizeof(XProcNumberPageSegment));
     memcpy_s(jitData->numberPageSegments, sizeof(XProcNumberPageSegment), jitWorkItem->GetWorkItemData()->xProcNumberPageSegment, sizeof(XProcNumberPageSegment));
 
-    Func::Codegen(&jitArena, jitWorkItem, threadContextInfo, scriptContextInfo, jitData, nullptr, nullptr, jitWorkItem->GetPolymorphicInlineCacheInfo(), threadContextInfo->GetCodeGenAllocators(), nullptr, nullptr, true);
+    Func::Codegen(
+        &jitArena,
+        jitWorkItem,
+        threadContextInfo,
+        scriptContextInfo,
+        jitData,
+        nullptr,
+        nullptr,
+        jitWorkItem->GetPolymorphicInlineCacheInfo(),
+        threadContextInfo->GetCodeGenAllocators(),
+        nullptr,
+        profiler,
+        true);
 
     scriptContextInfo->EndJIT();
     threadContextInfo->EndJIT();
 
+#ifdef PROFILE_EXEC
+    if (profiler && profiler->IsInitialized())
+    {
+        profiler->ProfilePrint(Js::Configuration::Global.flags.Profile.GetFirstPhase());
+    }
+#endif
+
     if (PHASE_TRACE1(Js::BackEndPhase))
     {
         LARGE_INTEGER freq;
@@ -370,5 +400,11 @@ ServerRemoteCodeGen(
         Output::Flush();
 
     }
+    LARGE_INTEGER out_time = { 0 };
+    if (PHASE_TRACE1(Js::BackEndPhase))
+    {
+        QueryPerformanceCounter(&out_time);
+        jitData->startTime = out_time.QuadPart;
+    }
     return S_OK;
 }

+ 1 - 0
lib/Runtime/ByteCode/OpCodes.h

@@ -437,6 +437,7 @@ MACRO_BACKEND_ONLY(     LdUInt32ArrViewElem,    ElementI,       OpCanCSE
 MACRO_BACKEND_ONLY(     Memset,                 ElementI,       OpSideEffect)
 MACRO_BACKEND_ONLY(     Memcopy,                ElementI,       OpSideEffect)
 MACRO_BACKEND_ONLY(     ArrayDetachedCheck,     Reg1,           None)   // ensures that an ArrayBuffer has not been detached
+MACRO_BACKEND_ONLY(     LdNativeCodeData,       Reg1,           OpSideEffect)   // load native code data buffer
 MACRO_WMS(              StArrItemI_CI4,         ElementUnsigned1,      OpSideEffect)
 MACRO_WMS(              StArrItemC_CI4,         ElementUnsigned1,      OpSideEffect)
 MACRO_WMS(              LdArrHead,              Reg2,           OpTempObjectSources)

+ 1 - 0
lib/Runtime/Runtime.h

@@ -471,6 +471,7 @@ enum tagDEBUG_EVENT_INFO_TYPE
 #include "Base/ThreadContext.h"
 
 #include "Base/StackProber.h"
+#include "Base/ScriptContextProfiler.h"
 
 #include "Language/EvalMapRecord.h"
 #include "Base/RegexPatternMruMap.h"