ソースを参照

Enable inlining on ARM64

The biggest thing that needed fixing up was the loading of inlinee
call info data; I implemented approximately the same thing that we
do on arm, minus doing it a little earlier (when encoding, instead
of in a relocation step). The most notable change is that I had to
make LabelInstr offsets grow to uintptr_t (from uint32); this will
not take any more memory, due to already being unioned with a byte
pointer, but has impact outside of ARM64.

The other part is the definition of two new instructions that have
a move with a known shift amount (movz_shift and movk_shift); this
is to ease definition of these fixed data moves, and may be useful
elsewhere.
Derek Morris 8 年 前
コミット
09cb144206

+ 4 - 4
lib/Backend/IR.h

@@ -679,7 +679,7 @@ private:
     union labelLocation
     {
         BYTE *                  pc;     // Used by encoder and is the real pc offset
-        uint32                  offset; // Used by preEncoder and is an estimation pc offset, not accurate
+        uintptr_t               offset; // Used by preEncoder and is an estimation pc offset, not accurate
     } m_pc;
 
     BasicBlock *            m_block;
@@ -689,9 +689,9 @@ public:
 
     inline void             SetPC(BYTE * pc);
     inline BYTE *           GetPC(void) const;
-    inline void             SetOffset(uint32 offset);
-    inline void             ResetOffset(uint32 offset);
-    inline uint32           GetOffset(void) const;
+    inline void             SetOffset(uintptr_t offset);
+    inline void             ResetOffset(uintptr_t offset);
+    inline uintptr_t        GetOffset(void) const;
     inline void             SetBasicBlock(BasicBlock * block);
     inline BasicBlock *     GetBasicBlock(void) const;
     inline void             SetLoop(Loop *loop);

+ 3 - 3
lib/Backend/IR.inl

@@ -637,7 +637,7 @@ LabelInstr::GetPC(void) const
 ///----------------------------------------------------------------------------
 
 inline void
-LabelInstr::ResetOffset(uint32 offset)
+LabelInstr::ResetOffset(uintptr_t offset)
 {
     AssertMsg(this->isInlineeEntryInstr, "As of now only InlineeEntryInstr overwrites the offset at encoder stage");
     this->m_pc.offset = offset;
@@ -650,7 +650,7 @@ LabelInstr::ResetOffset(uint32 offset)
 ///----------------------------------------------------------------------------
 
 inline void
-LabelInstr::SetOffset(uint32 offset)
+LabelInstr::SetOffset(uintptr_t offset)
 {
     AssertMsg(this->m_pc.offset == 0, "Overwriting existing byte offset");
     this->m_pc.offset = offset;
@@ -662,7 +662,7 @@ LabelInstr::SetOffset(uint32 offset)
 ///
 ///----------------------------------------------------------------------------
 
-inline uint32
+inline uintptr_t
 LabelInstr::GetOffset(void) const
 {
 

+ 0 - 5
lib/Backend/InliningDecider.cpp

@@ -184,10 +184,6 @@ uint InliningDecider::InlinePolymorphicCallSite(Js::FunctionBody *const inliner,
 Js::FunctionInfo *InliningDecider::Inline(Js::FunctionBody *const inliner, Js::FunctionInfo* functionInfo,
     bool isConstructorCall, bool isPolymorphicCall, uint16 constantArgInfo, Js::ProfileId callSiteId, uint recursiveInlineDepth, bool allowRecursiveInlining)
 {
-#if defined(_M_ARM64)
-    INLINE_TESTTRACE(_u("INLINING: Inline disabled for ARM64"));
-    return nullptr;
-#else // #if defined(_M_ARM64)
 #if defined(DBG_DUMP) || defined(ENABLE_DEBUG_CONFIG_OPTIONS)
     char16 debugStringBuffer[MAX_FUNCTION_BODY_DEBUG_STRING_SIZE];
     char16 debugStringBuffer2[MAX_FUNCTION_BODY_DEBUG_STRING_SIZE];
@@ -305,7 +301,6 @@ Js::FunctionInfo *InliningDecider::Inline(Js::FunctionBody *const inliner, Js::F
 
     // Note: for built-ins at this time we don't have enough data (the instr) to decide whether it's going to be inlined.
     return functionInfo;
-#endif // #if defined(_M_ARM64)
 }
 
 

+ 60 - 1
lib/Backend/arm64/EncoderMD.cpp

@@ -507,6 +507,31 @@ int EncoderMD::EmitMovConstant(Arm64CodeEmitter &Emitter, IR::Instr *instr, _Emi
     }
 }
 
+template<typename _Emitter, typename _Emitter64>
+int EncoderMD::EmitMovConstantKnownShift(Arm64CodeEmitter &Emitter, IR::Instr *instr, _Emitter emitter, _Emitter64 emitter64, uint32 shift)
+{
+    IR::Opnd* dst = instr->GetDst();
+    IR::Opnd* src1 = instr->GetSrc1();
+    Assert(dst->IsRegOpnd());
+    Assert(src1->IsImmediateOpnd());
+
+    int size = dst->GetSize();
+    Assert(size == 4 || size == 8);
+
+    IntConstType immediate = src1->GetImmediateValue(instr->m_func);
+    Assert((immediate & 0xFFFF) == immediate);
+    Assert(shift == 0 || shift == 16 || (size == 8 && (shift == 32 || shift == 48)));
+
+    if (size == 8)
+    {
+        return emitter64(Emitter, this->GetRegEncode(dst->AsRegOpnd()), ULONG(immediate), shift);
+    }
+    else
+    {
+        return emitter(Emitter, this->GetRegEncode(dst->AsRegOpnd()), ULONG(immediate), shift);
+    }
+}
+
 template<typename _Emitter, typename _Emitter64>
 int EncoderMD::EmitBitfield(Arm64CodeEmitter &Emitter, IR::Instr *instr, _Emitter emitter, _Emitter64 emitter64)
 {
@@ -725,6 +750,7 @@ EncoderMD::GenerateEncoding(IR::Instr* instr, BYTE *pc)
         Assert(src1->IsLabelOpnd());
 
         Assert(dst->GetSize() == 8);
+        Assert(!src1->AsLabelOpnd()->GetLabel()->isInlineeEntryInstr);
         EncodeReloc::New(&m_relocList, RelocTypeLabelAdr, m_pc, src1->AsLabelOpnd()->GetLabel(), m_encoder->m_tempAlloc);
         bytes = EmitAdr(Emitter, this->GetRegEncode(dst->AsRegOpnd()), 0);
         break;
@@ -939,6 +965,38 @@ EncoderMD::GenerateEncoding(IR::Instr* instr, BYTE *pc)
         bytes = this->EmitMovConstant(Emitter, instr, EmitMovz, EmitMovz64);
         break;
 
+    case Js::OpCode::MOVK_SHIFT:
+    {
+        Assert(instr->GetSrc1()->IsLabelOpnd());
+        Assert(instr->GetSrc2()->IsIntConstOpnd());
+        IR::LabelInstr* labelInstr = instr->GetSrc1()->AsLabelOpnd()->GetLabel();
+        uint32 shift = instr->GetSrc2()->AsIntConstOpnd()->AsUint32();
+
+        // We're going to drop src2, set src1 to just the masked bits from the label
+        // offset (so we don't even need to go into relocation), and emit it.
+        instr->UnlinkSrc2();
+        uintptr_t fullvalue = labelInstr->GetOffset();
+        instr->ReplaceSrc1(IR::IntConstOpnd::New((fullvalue & (0xffff << shift)) >> shift, IRType::TyUint16, instr->m_func, true));
+        bytes = this->EmitMovConstantKnownShift(Emitter, instr, EmitMovk, EmitMovk64, shift);
+    }
+        break;
+
+    case Js::OpCode::MOVZ_SHIFT:
+    {
+        Assert(instr->GetSrc1()->IsLabelOpnd());
+        Assert(instr->GetSrc2()->IsIntConstOpnd());
+        IR::LabelInstr* labelInstr = instr->GetSrc1()->AsLabelOpnd()->GetLabel();
+        uint32 shift = instr->GetSrc2()->AsIntConstOpnd()->AsUint32();
+
+        // We're going to drop src2, set src1 to just the masked bits from the label
+        // offset (so we don't even need to go into relocation), and emit it.
+        instr->UnlinkSrc2();
+        uintptr_t fullvalue = labelInstr->GetOffset();
+        instr->ReplaceSrc1(IR::IntConstOpnd::New((fullvalue & (0xffff << shift)) >> shift, IRType::TyUint16, instr->m_func, true));
+        bytes = this->EmitMovConstantKnownShift(Emitter, instr, EmitMovz, EmitMovz64, shift);
+    }
+        break;
+
     case Js::OpCode::MRS_FPCR:
         dst = instr->GetDst();
         Assert(dst->IsRegOpnd());
@@ -1303,7 +1361,7 @@ EncoderMD::Encode(IR::Instr *instr, BYTE *pc, BYTE* beginCodeAddress)
                 Assert(encodeResult);
                 //We are re-using offset to save the inlineeCallInfo which will be patched in ApplyRelocs
                 //This is a cleaner way to patch MOVW\MOVT pair with the right inlineeCallInfo
-                instr->AsLabelInstr()->ResetOffset((uint32)inlineeCallInfo);
+                instr->AsLabelInstr()->ResetOffset((uintptr_t)inlineeCallInfo);
             }
             else
             {
@@ -1459,6 +1517,7 @@ EncoderMD::ApplyRelocs(size_t codeBufferAddress, size_t codeSize, uint* bufferCR
             break;
 
         case RelocTypeLabelAdr:
+            Assert(!reloc->m_relocInstr->isInlineeEntryInstr);
             immediate = ULONG_PTR(targetAddress) - ULONG_PTR(relocAddress);
             Assert(IS_CONST_INT21(immediate));
             *relocAddress = (*relocAddress & ~(3 << 29)) | ULONG((immediate & 3) << 29);

+ 1 - 0
lib/Backend/arm64/EncoderMD.h

@@ -243,6 +243,7 @@ private:
 
     // Misc operations
     template<typename _Emitter, typename _Emitter64> int EmitMovConstant(Arm64CodeEmitter &Emitter, IR::Instr* instr, _Emitter emitter, _Emitter64 emitter64);
+    template<typename _Emitter, typename _Emitter64> int EmitMovConstantKnownShift(Arm64CodeEmitter &Emitter, IR::Instr* instr, _Emitter emitter, _Emitter64 emitter64, uint32 shift);
     template<typename _Emitter, typename _Emitter64> int EmitBitfield(Arm64CodeEmitter &Emitter, IR::Instr *instr, _Emitter emitter, _Emitter64 emitter64);
     template<typename _Emitter, typename _Emitter64> int EmitConditionalSelect(Arm64CodeEmitter &Emitter, IR::Instr *instr, int condition, _Emitter emitter, _Emitter64 emitter64);
 

+ 37 - 11
lib/Backend/arm64/LegalizeMD.cpp

@@ -630,24 +630,42 @@ void LegalizeMD::LegalizeLDIMM(IR::Instr * instr, IntConstType immed)
     }
     else
     {
-        // ARM64_WORKITEM: This needs to be understood better
+        // Since we don't know the value yet, we're going to handle it when we do
+        // This is done by having the load be from a label operand, which is later
+        // changed such that its offset is the correct value to ldimm
+
+        // The assembly generated becomes something like
+        // Label (offset:fake)
+        // MOVZ DST, Label
+        // MOVK DST, Label
+        // MOVK DST, Label
+        // MOVK DST, Label <- was the LDIMM
+
         Assert(Security::DontEncode(instr->GetSrc1()));
-        Assert(false);
-/*      IR::LabelInstr *label = IR::LabelInstr::New(Js::OpCode::Label, instr->m_func, false);
+
+        // The label with the special offset value, used for reloc
+        IR::LabelInstr *label = IR::LabelInstr::New(Js::OpCode::Label, instr->m_func, false);
         instr->InsertBefore(label);
         Assert((immed & 0x0000000F) == immed);
-        label->SetOffset(immed);
+        label->SetOffset((uint32)immed);
+        label->isInlineeEntryInstr = true;
 
         IR::LabelOpnd *target = IR::LabelOpnd::New(label, instr->m_func);
 
-        IR::Instr * instrMov = IR::Instr::New(Js::OpCode::MOVZ, instr->GetDst(), target, instr->m_func);
-        instr->InsertBefore(instrMov);
+        // We'll handle splitting this up to properly load the immediates now
+        // Typically (and worst case) we'll need to load 64 bits.
+        IR::Instr* bits48_63 = IR::Instr::New(Js::OpCode::MOVZ_SHIFT, instr->GetDst(), target, IR::IntConstOpnd::New(48, IRType::TyUint8, instr->m_func, true), instr->m_func);
+        instr->InsertBefore(bits48_63);
+        IR::Instr* bits32_47 = IR::Instr::New(Js::OpCode::MOVK_SHIFT, instr->GetDst(), target, IR::IntConstOpnd::New(32, IRType::TyUint8, instr->m_func, true), instr->m_func);
+        instr->InsertBefore(bits32_47);
+        IR::Instr* bits16_31 = IR::Instr::New(Js::OpCode::MOVK_SHIFT, instr->GetDst(), target, IR::IntConstOpnd::New(16, IRType::TyUint8, instr->m_func, true), instr->m_func);
+        instr->InsertBefore(bits16_31);
 
         instr->ReplaceSrc1(target);
-        instr->m_opcode = Js::OpCode::MOVK64;
+        instr->SetSrc2(IR::IntConstOpnd::New(0, IRType::TyUint8, instr->m_func, true));
+        instr->m_opcode = Js::OpCode::MOVK_SHIFT;
 
-        label->isInlineeEntryInstr = true;
-        instr->isInlineeEntryInstr = false;*/
+        instr->isInlineeEntryInstr = false;
     }
 }
 
@@ -740,14 +758,22 @@ void LegalizeMD::LegalizeLdLabel(IR::Instr * instr, IR::Opnd * opnd)
     Assert(instr->m_opcode == Js::OpCode::LDIMM);
     Assert(opnd->IsLabelOpnd());
 
-    instr->m_opcode = Js::OpCode::ADR;
+    if (opnd->AsLabelOpnd()->GetLabel()->isInlineeEntryInstr)
+    {
+        // We want to leave it as LDIMMs so that we can easily disambiguate later
+        return;
+    }
+    else
+    {
+        instr->m_opcode = Js::OpCode::ADR;
+    }
 }
 
 bool LegalizeMD::LegalizeDirectBranch(IR::BranchInstr *branchInstr, uint32 branchOffset)
 {
     Assert(branchInstr->IsBranchInstr());
 
-    uint32 labelOffset = branchInstr->GetTarget()->GetOffset();
+    uint32 labelOffset = (uint32)branchInstr->GetTarget()->GetOffset();
     Assert(labelOffset); //Label offset must be set.
 
     int32 offset = labelOffset - branchOffset;

+ 1 - 0
lib/Backend/arm64/LegalizeMD.h

@@ -56,6 +56,7 @@ struct LegalInstrForms
 #define LEGAL_CBZ      { L_None,    { L_Reg } }
 #define LEGAL_LABEL    { L_Reg,     { L_Label } }
 #define LEGAL_LDIMM    { L_Reg,     { L_Imm,     L_None } }
+#define LEGAL_LDIMM_S  { L_Reg,     { L_Imm,     L_ImmU6 } }
 #define LEGAL_LOAD     { L_Reg,     { (LegalForms)(L_IndirSU12I9 | L_SymSU12I9), L_None } }
 #define LEGAL_LOADP    { L_Reg,     { (LegalForms)(L_IndirSI7 | L_SymSI7), L_Reg } }
 #define LEGAL_PLD      { L_None,    { (LegalForms)(L_IndirSU12I9 | L_SymSU12I9), L_None } }

+ 1 - 1
lib/Backend/arm64/LowerMD.cpp

@@ -7613,7 +7613,7 @@ LowererMD::FinalLower()
 
                 if (branchInstr->GetTarget() && !LowererMD::IsUnconditionalBranch(branchInstr)) //Ignore BX register based branches & B
                 {
-                    uint32 targetOffset = branchInstr->GetTarget()->GetOffset();
+                    uint32 targetOffset = (uint32)branchInstr->GetTarget()->GetOffset();
 
                     if (targetOffset != 0)
                     {

+ 4 - 0
lib/Backend/arm64/MdOpCodes.h

@@ -69,8 +69,12 @@ MACRO(MOV,        Reg2,       0,              UNUSED,   LEGAL_REG2,     UNUSED,
 // Alias of MOV that won't get optimized out when src and dst are the same.
 MACRO(MOV_TRUNC,  Reg2,       0,              UNUSED,   LEGAL_REG2,     UNUSED,   DM__)
 MACRO(MOVK,       Reg2,       0,              UNUSED,   LEGAL_LDIMM,    UNUSED,   DM__)
+// Alias of MOVK where we know the shift, but don't know the value yet
+MACRO(MOVK_SHIFT, Reg2,       0,              UNUSED,   LEGAL_LDIMM_S,  UNUSED,   DM__)
 MACRO(MOVN,       Reg2,       0,              UNUSED,   LEGAL_LDIMM,    UNUSED,   DM__)
 MACRO(MOVZ,       Reg2,       0,              UNUSED,   LEGAL_LDIMM,    UNUSED,   DM__)
+// Alias of MOVZ where we know the shift, but don't know the value yet
+MACRO(MOVZ_SHIFT, Reg2,       0,              UNUSED,   LEGAL_LDIMM_S,  UNUSED,   DM__)
 MACRO(MRS_FPCR,   Reg1,       0,              UNUSED,   LEGAL_REG1,     UNUSED,   D___)
 MACRO(MRS_FPSR,   Reg1,       0,              UNUSED,   LEGAL_REG1,     UNUSED,   D___)
 MACRO(MSR_FPCR,   Reg2,       0,              UNUSED,   LEGAL_REG2_ND,  UNUSED,   D___)