//------------------------------------------------------------------------------------------------------- // Copyright (C) Microsoft. All rights reserved. // Licensed under the MIT license. See LICENSE.txt file in the project root for full license information. //------------------------------------------------------------------------------------------------------- #include "Backend.h" #if ENABLE_NATIVE_CODEGEN namespace { // The definitions in this anonymous namespace must be constexpr to allow OACR to conclude that certain operations // in InterpreterThunkEmitter::EncodeInterpreterThunk are safe. Because constexpr requires that the declaration // and the definition appear at the same place (i.e., no forward declarations), this means that we either have // to move all 5 definitions of InterpreterThunk into the header file, or we have to make InterpreterThunkSize // public. The latter option seems the less objectionable, so that's what I've done here. #ifdef _M_X64 #ifdef _WIN32 constexpr BYTE FunctionInfoOffset = 23; constexpr BYTE FunctionProxyOffset = 27; constexpr BYTE DynamicThunkAddressOffset = 31; constexpr BYTE CallBlockStartAddrOffset = 41; constexpr BYTE ThunkSizeOffset = 55; constexpr BYTE ErrorOffset = 64; constexpr BYTE ThunkAddressOffset = 81; constexpr BYTE PrologSize = 80; constexpr BYTE StackAllocSize = 0x28; // // Home the arguments onto the stack and pass a pointer to the base of the stack location to the inner thunk // // Calling convention requires that caller should allocate at least 0x20 bytes and the stack be 16 byte aligned. // Hence, we allocate 0x28 bytes of stack space for the callee to use. The callee uses 8 bytes to push the first // argument and the rest 0x20 ensures alignment is correct. // constexpr BYTE InterpreterThunk[InterpreterThunkEmitter::InterpreterThunkSize] = { 0x48, 0x89, 0x54, 0x24, 0x10, // mov qword ptr [rsp+10h],rdx 0x48, 0x89, 0x4C, 0x24, 0x08, // mov qword ptr [rsp+8],rcx 0x4C, 0x89, 0x44, 0x24, 0x18, // mov qword ptr [rsp+18h],r8 0x4C, 0x89, 0x4C, 0x24, 0x20, // mov qword ptr [rsp+20h],r9 0x48, 0x8B, 0x41, 0x00, // mov rax, qword ptr [rcx+FunctionInfoOffset] 0x48, 0x8B, 0x48, 0x00, // mov rcx, qword ptr [rax+FunctionProxyOffset] 0x48, 0x8B, 0x51, 0x00, // mov rdx, qword ptr [rcx+DynamicThunkAddressOffset] // Range Check for Valid call target 0x48, 0x83, 0xE2, 0xF8, // and rdx, 0xFFFFFFFFFFFFFFF8h ;Force 8 byte alignment 0x48, 0x8b, 0xca, // mov rcx, rdx 0x48, 0xb8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // mov rax, CallBlockStartAddress 0x48, 0x2b, 0xc8, // sub rcx, rax 0x48, 0x81, 0xf9, 0x00, 0x00, 0x00, 0x00, // cmp rcx, ThunkSize 0x76, 0x09, // jbe $safe 0x48, 0xc7, 0xc1, 0x00, 0x00, 0x00, 0x00, // mov rcx, errorcode 0xcd, 0x29, // int 29h // $safe: 0x48, 0x8D, 0x4C, 0x24, 0x08, // lea rcx, [rsp+8] ;Load the address to stack 0x48, 0x83, 0xEC, StackAllocSize, // sub rsp,28h 0x48, 0xB8, 0x00, 0x00, 0x00 ,0x00, 0x00, 0x00, 0x00, 0x00, // mov rax, 0xFF, 0xE2, // jmp rdx 0xCC, 0xCC, 0xCC, 0xCC, 0xCC // int 3 ;for alignment to size of 8 we are adding this }; constexpr BYTE Epilog[] = { 0x48, 0x83, 0xC4, StackAllocSize, // add rsp,28h 0xC3 // ret }; #else // Sys V AMD64 constexpr BYTE FunctionInfoOffset = 7; constexpr BYTE FunctionProxyOffset = 11; constexpr BYTE DynamicThunkAddressOffset = 15; constexpr BYTE CallBlockStartAddrOffset = 25; constexpr BYTE ThunkSizeOffset = 39; constexpr BYTE ErrorOffset = 48; constexpr BYTE ThunkAddressOffset = 61; constexpr BYTE PrologSize = 60; constexpr BYTE StackAllocSize = 0x0; constexpr BYTE InterpreterThunk[InterpreterThunkEmitter::InterpreterThunkSize] = { 0x55, // push rbp // Prolog - setup the stack frame 0x48, 0x89, 0xe5, // mov rbp, rsp 0x48, 0x8b, 0x47, 0x00, // mov rax, qword ptr [rdi + FunctionInfoOffset] 0x48, 0x8B, 0x48, 0x00, // mov rcx, qword ptr [rax+FunctionProxyOffset] 0x48, 0x8B, 0x51, 0x00, // mov rdx, qword ptr [rcx+DynamicThunkAddressOffset] // Range Check for Valid call target 0x48, 0x83, 0xE2, 0xF8, // and rdx, 0xfffffffffffffff8 // Force 8 byte alignment 0x48, 0x89, 0xd1, // mov rcx, rdx 0x48, 0xb8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // mov rax, CallBlockStartAddress 0x48, 0x29, 0xc1, // sub rcx, rax 0x48, 0x81, 0xf9, 0x00, 0x00, 0x00, 0x00, // cmp rcx, ThunkSize 0x76, 0x09, // jbe safe 0x48, 0xc7, 0xc1, 0x00, 0x00, 0x00, 0x00, // mov rcx, errorcode 0xcd, 0x29, // int 29h <-- xplat TODO: just to exit // safe: 0x48, 0x8d, 0x7c, 0x24, 0x10, // lea rdi, [rsp+0x10] 0x48, 0xB8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // mov rax, // stack already 16-byte aligned 0xff, 0xe2, // jmp rdx 0xcc // int 3 // for alignment to size of 8 }; constexpr BYTE Epilog[] = { 0x5d, // pop rbp 0xc3 // ret }; #endif #elif defined(_M_ARM) constexpr BYTE ThunkAddressOffset = 8; constexpr BYTE FunctionInfoOffset = 18; constexpr BYTE FunctionProxyOffset = 22; constexpr BYTE DynamicThunkAddressOffset = 26; constexpr BYTE CallBlockStartAddressInstrOffset = 42; constexpr BYTE CallThunkSizeInstrOffset = 54; constexpr BYTE ErrorOffset = 64; constexpr BYTE InterpreterThunk[InterpreterThunkEmitter::InterpreterThunkSize] = { 0x0F, 0xB4, // push {r0-r3} 0x2D, 0xE9, 0x00, 0x48, // push {r11,lr} 0xEB, 0x46, // mov r11,sp 0x00, 0x00, 0x00, 0x00, // movw r1,ThunkAddress 0x00, 0x00, 0x00, 0x00, // movt r1,ThunkAddress 0xD0, 0xF8, 0x00, 0x20, // ldr.w r2,[r0,#0x00] 0xD2, 0xF8, 0x00, 0x00, // ldr.w r0,[r2,#0x00] 0xD0, 0xF8, 0x00, 0x30, // ldr.w r3,[r0,#0x00] 0x4F, 0xF6, 0xF9, 0x70, // mov r0,#0xFFF9 0xCF, 0xF6, 0xFF, 0x70, // movt r0,#0xFFFF 0x03, 0xEA, 0x00, 0x03, // and r3,r3,r0 0x18, 0x46, // mov r0, r3 0x00, 0x00, 0x00, 0x00, // movw r12, CallBlockStartAddress 0x00, 0x00, 0x00, 0x00, // movt r12, CallBlockStartAddress 0xA0, 0xEB, 0x0C, 0x00, // sub r0, r12 0x00, 0x00, 0x00, 0x00, // mov r12, ThunkSize 0x60, 0x45, // cmp r0, r12 0x02, 0xD9, // bls $safe 0x4F, 0xF0, 0x00, 0x00, // mov r0, errorcode 0xFB, 0xDE, // Equivalent to int 0x29 //$safe: 0x02, 0xA8, // add r0,sp,#8 0x18, 0x47 // bx r3 }; constexpr BYTE JmpOffset = 2; constexpr BYTE Call[] = { 0x88, 0x47, // blx r1 0x00, 0x00, 0x00, 0x00, // b.w epilog 0xFE, 0xDE, // int 3 ;Required for alignment }; constexpr BYTE Epilog[] = { 0x5D, 0xF8, 0x04, 0xBB, // pop {r11} 0x5D, 0xF8, 0x14, 0xFB // ldr pc,[sp],#0x14 }; #elif defined(_M_ARM64) constexpr BYTE FunctionInfoOffset = 24; constexpr BYTE FunctionProxyOffset = 28; constexpr BYTE DynamicThunkAddressOffset = 32; constexpr BYTE ThunkAddressOffset = 36; //TODO: saravind :Implement Range Check for ARM64 constexpr BYTE InterpreterThunk[InterpreterThunkEmitter::InterpreterThunkSize] = { 0xFD, 0x7B, 0xBB, 0xA9, //stp fp, lr, [sp, #-80]! ;Prologue 0xFD, 0x03, 0x00, 0x91, //mov fp, sp ;update frame pointer to the stack pointer 0xE0, 0x07, 0x01, 0xA9, //stp x0, x1, [sp, #16] ;Prologue again; save all registers 0xE2, 0x0F, 0x02, 0xA9, //stp x2, x3, [sp, #32] 0xE4, 0x17, 0x03, 0xA9, //stp x4, x5, [sp, #48] 0xE6, 0x1F, 0x04, 0xA9, //stp x6, x7, [sp, #64] 0x02, 0x00, 0x40, 0xF9, //ldr x2, [x0, #0x00] ;offset will be replaced with Offset of FunctionInfo 0x40, 0x00, 0x40, 0xF9, //ldr x0, [x2, #0x00] ;offset will be replaced with Offset of FunctionProxy 0x03, 0x00, 0x40, 0xF9, //ldr x3, [x0, #0x00] ;offset will be replaced with offset of DynamicInterpreterThunk //Following 4 MOV Instrs are to move the 64-bit address of the InterpreterThunk address into register x1. 0x00, 0x00, 0x00, 0x00, //movz x1, #0x00 ;This is overwritten with the actual thunk address(16 - 0 bits) move 0x00, 0x00, 0x00, 0x00, //movk x1, #0x00, lsl #16 ;This is overwritten with the actual thunk address(32 - 16 bits) move 0x00, 0x00, 0x00, 0x00, //movk x1, #0x00, lsl #32 ;This is overwritten with the actual thunk address(48 - 32 bits) move 0x00, 0x00, 0x00, 0x00, //movk x1, #0x00, lsl #48 ;This is overwritten with the actual thunk address(64 - 48 bits) move 0xE0, 0x43, 0x00, 0x91, //add x0, sp, #16 0x60, 0x00, 0x1F, 0xD6, //br x3 0xCC, 0xCC, 0xCC, 0xCC //int 3 for 8byte alignment }; constexpr BYTE JmpOffset = 4; constexpr BYTE Call[] = { 0x20, 0x00, 0x3f, 0xd6, // blr x1 0x00, 0x00, 0x00, 0x00 // b epilog }; constexpr BYTE Epilog[] = { 0xfd, 0x7b, 0xc5, 0xa8, // ldp fp, lr, [sp], #80 0xc0, 0x03, 0x5f, 0xd6 // ret }; #else // x86 constexpr BYTE FunctionInfoOffset = 8; constexpr BYTE FunctionProxyOffset = 11; constexpr BYTE DynamicThunkAddressOffset = 14; constexpr BYTE CallBlockStartAddrOffset = 21; constexpr BYTE ThunkSizeOffset = 26; constexpr BYTE ErrorOffset = 33; constexpr BYTE ThunkAddressOffset = 44; constexpr BYTE InterpreterThunk[InterpreterThunkEmitter::InterpreterThunkSize] = { 0x55, // push ebp ;Prolog - setup the stack frame 0x8B, 0xEC, // mov ebp,esp 0x8B, 0x45, 0x08, // mov eax, dword ptr [ebp+8] 0x8B, 0x40, 0x00, // mov eax, dword ptr [eax+FunctionInfoOffset] 0x8B, 0x40, 0x00, // mov eax, dword ptr [eax+FunctionProxyOffset] 0x8B, 0x48, 0x00, // mov ecx, dword ptr [eax+DynamicThunkAddressOffset] // Range Check for Valid call target 0x83, 0xE1, 0xF8, // and ecx, 0FFFFFFF8h 0x8b, 0xc1, // mov eax, ecx 0x2d, 0x00, 0x00, 0x00, 0x00, // sub eax, CallBlockStartAddress 0x3d, 0x00, 0x00, 0x00, 0x00, // cmp eax, ThunkSize 0x76, 0x07, // jbe SHORT $safe 0xb9, 0x00, 0x00, 0x00, 0x00, // mov ecx, errorcode 0xCD, 0x29, // int 29h //$safe 0x8D, 0x45, 0x08, // lea eax, ebp+8 0x50, // push eax 0xB8, 0x00, 0x00, 0x00, 0x00, // mov eax, 0xFF, 0xE1, // jmp ecx 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC // int 3 for 8byte alignment }; constexpr BYTE Epilog[] = { 0x5D, // pop ebp 0xC3 // ret }; #endif #if defined(_M_X64) || defined(_M_IX86) constexpr BYTE JmpOffset = 3; constexpr BYTE Call[] = { 0xFF, 0xD0, // call rax 0xE9, 0x00, 0x00, 0x00, 0x00, // jmp [offset] 0xCC, // int 3 ;for alignment to size of 8 we are adding this }; #endif constexpr BYTE HeaderSize = sizeof(InterpreterThunk); } // anonymous namespace const BYTE InterpreterThunkEmitter::ThunkSize = sizeof(Call); InterpreterThunkEmitter::InterpreterThunkEmitter(Js::ScriptContext* context, ArenaAllocator* allocator, CustomHeap::InProcCodePageAllocators * codePageAllocators, bool isAsmInterpreterThunk) : emitBufferManager(allocator, codePageAllocators, /*scriptContext*/ nullptr, nullptr, _u("Interpreter thunk buffer"), GetCurrentProcess()), scriptContext(context), allocator(allocator), thunkCount(0), thunkBuffer(nullptr), isAsmInterpreterThunk(isAsmInterpreterThunk) { } SListBase* InterpreterThunkEmitter::GetThunkBlocksList() { return &thunkBlocks; } // // Returns the next thunk. Batch allocated PageCount pages of thunks and issue them one at a time // BYTE* InterpreterThunkEmitter::GetNextThunk(PVOID* ppDynamicInterpreterThunk) { Assert(ppDynamicInterpreterThunk); Assert(*ppDynamicInterpreterThunk == nullptr); if(thunkCount == 0) { if(!this->freeListedThunkBlocks.Empty()) { return AllocateFromFreeList(ppDynamicInterpreterThunk); } if (!NewThunkBlock()) { #ifdef ASMJS_PLAT return this->isAsmInterpreterThunk ? (BYTE*)&Js::InterpreterStackFrame::StaticInterpreterAsmThunk : (BYTE*)&Js::InterpreterStackFrame::StaticInterpreterThunk; #else Assert(!this->isAsmInterpreterThunk); return (BYTE*)&Js::InterpreterStackFrame::StaticInterpreterThunk; #endif } } Assert(this->thunkBuffer != nullptr); BYTE* thunk = this->thunkBuffer; #if _M_ARM thunk = (BYTE*)((DWORD)thunk | 0x01); #endif *ppDynamicInterpreterThunk = thunk + HeaderSize + ((--thunkCount) * ThunkSize); #if _M_ARM AssertMsg(((uintptr_t)(*ppDynamicInterpreterThunk) & 0x6) == 0, "Not 8 byte aligned?"); #else AssertMsg(((uintptr_t)(*ppDynamicInterpreterThunk) & 0x7) == 0, "Not 8 byte aligned?"); #endif return thunk; } // // Interpreter thunks have an entrypoint at the beginning of the page boundary. Each function has a unique thunk return address // and this function can convert to the unique thunk return address to the beginning of the page which corresponds with the entrypoint // void* InterpreterThunkEmitter::ConvertToEntryPoint(PVOID dynamicInterpreterThunk) { Assert(dynamicInterpreterThunk != nullptr); void* entryPoint = (void*)((size_t)dynamicInterpreterThunk & (~((size_t)(BlockSize) - 1))); #if _M_ARM entryPoint = (BYTE*)((DWORD)entryPoint | 0x01); #endif return entryPoint; } bool InterpreterThunkEmitter::NewThunkBlock() { if (this->scriptContext->GetConfig()->IsNoDynamicThunks()) { return false; } #ifdef ENABLE_OOP_NATIVE_CODEGEN if (CONFIG_FLAG(ForceStaticInterpreterThunk)) { return false; } if (JITManager::GetJITManager()->IsOOPJITEnabled()) { return NewOOPJITThunkBlock(); } #endif Assert(this->thunkCount == 0); BYTE* buffer; EmitBufferAllocation * allocation = emitBufferManager.AllocateBuffer(BlockSize, &buffer); if (allocation == nullptr) { Js::Throw::OutOfMemory(); } if (!emitBufferManager.ProtectBufferWithExecuteReadWriteForInterpreter(allocation)) { Js::Throw::OutOfMemory(); } #if PDATA_ENABLED PRUNTIME_FUNCTION pdataStart = nullptr; intptr_t epilogEnd = 0; #endif DWORD count = this->thunkCount; FillBuffer( this->scriptContext->GetThreadContext(), this->isAsmInterpreterThunk, (intptr_t)buffer, BlockSize, buffer, #if PDATA_ENABLED &pdataStart, &epilogEnd, #endif &count ); if (!emitBufferManager.CommitBufferForInterpreter(allocation, buffer, BlockSize)) { Js::Throw::OutOfMemory(); } // Call to set VALID flag for CFG check BYTE* callTarget = buffer; #ifdef _M_ARM // We want to allow the actual callable value, so thumb-tag the address callTarget = (BYTE*)((uintptr_t)buffer | 0x1); #endif ThreadContext::GetContextForCurrentThread()->SetValidCallTargetForCFG(callTarget); // Update object state only at the end when everything has succeeded - and no exceptions can be thrown. auto block = this->thunkBlocks.PrependNode(allocator, buffer, count); #if PDATA_ENABLED void* pdataTable; PDataManager::RegisterPdata((PRUNTIME_FUNCTION)pdataStart, (ULONG_PTR)buffer, (ULONG_PTR)epilogEnd, &pdataTable); block->SetPdata(pdataTable); #else Unused(block); #endif this->thunkBuffer = buffer; this->thunkCount = count; return true; } #ifdef ENABLE_OOP_NATIVE_CODEGEN bool InterpreterThunkEmitter::NewOOPJITThunkBlock() { PSCRIPTCONTEXT_HANDLE remoteScriptContext = this->scriptContext->GetRemoteScriptAddr(); if (!JITManager::GetJITManager()->IsConnected()) { return false; } InterpreterThunkInputIDL thunkInput; thunkInput.asmJsThunk = this->isAsmInterpreterThunk; InterpreterThunkOutputIDL thunkOutput; HRESULT hr = JITManager::GetJITManager()->NewInterpreterThunkBlock(remoteScriptContext, &thunkInput, &thunkOutput); if (!JITManager::HandleServerCallResult(hr, RemoteCallType::ThunkCreation)) { return false; } BYTE* buffer = (BYTE*)thunkOutput.mappedBaseAddr; if (!CONFIG_FLAG(OOPCFGRegistration)) { BYTE* callTarget = buffer; #ifdef _M_ARM // Need to register the thumb-tagged call target for CFG callTarget = (BYTE*)((uintptr_t)callTarget | 0x1); #endif this->scriptContext->GetThreadContext()->SetValidCallTargetForCFG(callTarget); } // Update object state only at the end when everything has succeeded - and no exceptions can be thrown. auto block = this->thunkBlocks.PrependNode(allocator, buffer, thunkOutput.thunkCount); #if PDATA_ENABLED void* pdataTable; PDataManager::RegisterPdata((PRUNTIME_FUNCTION)thunkOutput.pdataTableStart, (ULONG_PTR)thunkOutput.mappedBaseAddr, (ULONG_PTR)thunkOutput.epilogEndAddr, &pdataTable); block->SetPdata(pdataTable); #else Unused(block); #endif this->thunkBuffer = (BYTE*)thunkOutput.mappedBaseAddr; this->thunkCount = thunkOutput.thunkCount; return true; } #endif /* static */ void InterpreterThunkEmitter::FillBuffer( _In_ ThreadContextInfo * threadContext, _In_ bool asmJsThunk, _In_ intptr_t finalAddr, _In_ size_t bufferSize, _Out_writes_bytes_all_(BlockSize) BYTE* buffer, #if PDATA_ENABLED _Out_ PRUNTIME_FUNCTION * pdataTableStart, _Out_ intptr_t * epilogEndAddr, #endif _Out_ DWORD * thunkCount ) { #ifdef _M_X64 PrologEncoder prologEncoder; prologEncoder.EncodeSmallProlog(PrologSize, StackAllocSize); DWORD pdataSize = prologEncoder.SizeOfPData(); #elif defined(_M_ARM32_OR_ARM64) DWORD pdataSize = sizeof(RUNTIME_FUNCTION); #else DWORD pdataSize = 0; #endif DWORD bytesRemaining = BlockSize; DWORD bytesWritten = 0; DWORD thunks = 0; DWORD epilogSize = sizeof(Epilog); const BYTE *epilog = Epilog; const BYTE *header = InterpreterThunk; intptr_t interpreterThunk; // the static interpreter thunk invoked by the dynamic emitted thunk #ifdef ASMJS_PLAT if (asmJsThunk) { interpreterThunk = ShiftAddr(threadContext, &Js::InterpreterStackFrame::InterpreterAsmThunk); } else #endif { interpreterThunk = ShiftAddr(threadContext, &Js::InterpreterStackFrame::InterpreterThunk); } BYTE * currentBuffer = buffer; // Ensure there is space for PDATA at the end BYTE* pdataStart = currentBuffer + (BlockSize - Math::Align(pdataSize, EMIT_BUFFER_ALIGNMENT)); BYTE* epilogStart = pdataStart - Math::Align(epilogSize, EMIT_BUFFER_ALIGNMENT); // Ensure there is space for PDATA at the end intptr_t finalPdataStart = finalAddr + (BlockSize - Math::Align(pdataSize, EMIT_BUFFER_ALIGNMENT)); intptr_t finalEpilogStart = finalPdataStart - Math::Align(epilogSize, EMIT_BUFFER_ALIGNMENT); // Copy the thunk buffer and modify it. js_memcpy_s(currentBuffer, bytesRemaining, header, HeaderSize); EncodeInterpreterThunk(currentBuffer, finalAddr, finalEpilogStart, epilogSize, interpreterThunk); currentBuffer += HeaderSize; bytesRemaining -= HeaderSize; // Copy call buffer DWORD callSize = sizeof(Call); while (currentBuffer < epilogStart - callSize) { js_memcpy_s(currentBuffer, bytesRemaining, Call, callSize); #if _M_ARM int offset = (epilogStart - (currentBuffer + JmpOffset)); Assert(offset >= 0); DWORD encodedOffset = EncoderMD::BranchOffset_T2_24(offset); DWORD encodedBranch = /*opcode=*/ 0x9000F000 | encodedOffset; Emit(currentBuffer, JmpOffset, encodedBranch); #elif _M_ARM64 int64 offset = (epilogStart - (currentBuffer + JmpOffset)); Assert(offset >= 0); DWORD encodedOffset = EncoderMD::BranchOffset_26(offset); DWORD encodedBranch = /*opcode=*/ 0x14000000 | encodedOffset; Emit(currentBuffer, JmpOffset, encodedBranch); #else // jump requires an offset from the end of the jump instruction. int offset = (int)(epilogStart - (currentBuffer + JmpOffset + sizeof(int))); Assert(offset >= 0); Emit(currentBuffer, JmpOffset, offset); #endif currentBuffer += callSize; bytesRemaining -= callSize; thunks++; } // Fill any gap till start of epilog bytesWritten = FillDebugBreak(currentBuffer, (DWORD)(epilogStart - currentBuffer)); bytesRemaining -= bytesWritten; currentBuffer += bytesWritten; // Copy epilog bytesWritten = CopyWithAlignment(currentBuffer, bytesRemaining, epilog, epilogSize, EMIT_BUFFER_ALIGNMENT); currentBuffer += bytesWritten; bytesRemaining -= bytesWritten; // Generate and register PDATA #if PDATA_ENABLED BYTE* epilogEnd = epilogStart + epilogSize; DWORD functionSize = (DWORD)(epilogEnd - buffer); Assert(pdataStart == currentBuffer); #ifdef _M_X64 Assert(bytesRemaining >= pdataSize); BYTE* pdata = prologEncoder.Finalize(buffer, functionSize, pdataStart); bytesWritten = CopyWithAlignment(pdataStart, bytesRemaining, pdata, pdataSize, EMIT_BUFFER_ALIGNMENT); #elif defined(_M_ARM32_OR_ARM64) RUNTIME_FUNCTION pdata; GeneratePdata(buffer, functionSize, &pdata); bytesWritten = CopyWithAlignment(pdataStart, bytesRemaining, (const BYTE*)&pdata, pdataSize, EMIT_BUFFER_ALIGNMENT); #endif *pdataTableStart = (PRUNTIME_FUNCTION)finalPdataStart; *epilogEndAddr = finalEpilogStart; #endif *thunkCount = thunks; } #if _M_ARM void InterpreterThunkEmitter::EncodeInterpreterThunk( __in_bcount(InterpreterThunkSize) BYTE* thunkBuffer, __in const intptr_t thunkBufferStartAddress, __in const intptr_t epilogStart, __in const DWORD epilogSize, __in const intptr_t interpreterThunk) { // Encode MOVW DWORD lowerThunkBits = (uint32)interpreterThunk & 0x0000FFFF; DWORD movW = EncodeMove(/*Opcode*/ 0x0000F240, /*register*/1, lowerThunkBits); Emit(thunkBuffer,ThunkAddressOffset, movW); // Encode MOVT DWORD higherThunkBits = ((uint32)interpreterThunk & 0xFFFF0000) >> 16; DWORD movT = EncodeMove(/*Opcode*/ 0x0000F2C0, /*register*/1, higherThunkBits); Emit(thunkBuffer, ThunkAddressOffset + sizeof(movW), movT); // Encode LDR - Load of function Body thunkBuffer[FunctionInfoOffset] = Js::JavascriptFunction::GetOffsetOfFunctionInfo(); thunkBuffer[FunctionProxyOffset] = Js::FunctionInfo::GetOffsetOfFunctionProxy(); // Encode LDR - Load of interpreter thunk number thunkBuffer[DynamicThunkAddressOffset] = Js::FunctionBody::GetOffsetOfDynamicInterpreterThunk(); // Encode MOVW R12, CallBlockStartAddress uintptr_t callBlockStartAddress = (uintptr_t)thunkBufferStartAddress + HeaderSize; uint totalThunkSize = (uint)(epilogStart - callBlockStartAddress); DWORD lowerCallBlockStartAddress = callBlockStartAddress & 0x0000FFFF; DWORD movWblockStart = EncodeMove(/*Opcode*/ 0x0000F240, /*register*/12, lowerCallBlockStartAddress); Emit(thunkBuffer,CallBlockStartAddressInstrOffset, movWblockStart); // Encode MOVT R12, CallBlockStartAddress DWORD higherCallBlockStartAddress = (callBlockStartAddress & 0xFFFF0000) >> 16; DWORD movTblockStart = EncodeMove(/*Opcode*/ 0x0000F2C0, /*register*/12, higherCallBlockStartAddress); Emit(thunkBuffer, CallBlockStartAddressInstrOffset + sizeof(movWblockStart), movTblockStart); //Encode MOV R12, CallBlockSize DWORD movBlockSize = EncodeMove(/*Opcode*/ 0x0000F240, /*register*/12, (DWORD)totalThunkSize); Emit(thunkBuffer, CallThunkSizeInstrOffset, movBlockSize); Emit(thunkBuffer, ErrorOffset, (BYTE) FAST_FAIL_INVALID_ARG); } DWORD InterpreterThunkEmitter::EncodeMove(DWORD opCode, int reg, DWORD imm16) { DWORD encodedMove = reg << 24; #if _M_ARM DWORD encodedImm = 0; EncoderMD::EncodeImmediate16(imm16, &encodedImm); encodedMove |= encodedImm; #elif _M_ARM64 // ToDo (SaAgarwa) - From Aaron change. Validate for ARM64 encodedMove |= (imm16 & 0xFFFF) << 5; #endif AssertMsg((encodedMove & opCode) == 0, "Any bits getting overwritten?"); encodedMove |= opCode; return encodedMove; } void InterpreterThunkEmitter::GeneratePdata(_In_ const BYTE* entryPoint, _In_ const DWORD functionSize, _Out_ RUNTIME_FUNCTION* function) { function->BeginAddress = 0x1; // Since our base address is the start of the function - this is offset from the base address function->Flag = 1; // Packed unwind data is used function->FunctionLength = functionSize / 2; function->Ret = 0; // Return via Pop function->H = 1; // Homes parameters function->Reg = 7; // No saved registers - R11 is the frame pointer - not considered here function->R = 1; // No registers are being saved. function->L = 1; // Save/restore LR register function->C = 1; // Frame pointer chain in R11 established function->StackAdjust = 0; // Stack allocation for the function } #elif _M_ARM64 void InterpreterThunkEmitter::EncodeInterpreterThunk( __in_bcount(InterpreterThunkSize) BYTE* thunkBuffer, __in const intptr_t thunkBufferStartAddress, __in const intptr_t epilogStart, __in const DWORD epilogSize, __in const intptr_t interpreterThunk) { int addrOffset = ThunkAddressOffset; // Following 4 MOV Instrs are to move the 64-bit address of the InterpreterThunk address into register x1. // Encode MOVZ (movz x1, #) DWORD lowerThunkBits = (uint64)interpreterThunk & 0x0000FFFF; DWORD movZ = EncodeMove(/*Opcode*/ 0xD2800000, /*register x1*/1, lowerThunkBits); // no shift; hw = 00 Emit(thunkBuffer,addrOffset, movZ); static_assert(sizeof(movZ) == 4, "movZ has to be 32-bit encoded"); addrOffset+= sizeof(movZ); // Encode MOVK (movk x1, #, lsl #16) DWORD higherThunkBits = ((uint64)interpreterThunk & 0xFFFF0000) >> 16; DWORD movK = EncodeMove(/*Opcode*/ 0xF2A00000, /*register x1*/1, higherThunkBits); // left shift 16 bits; hw = 01 Emit(thunkBuffer, addrOffset, movK); static_assert(sizeof(movK) == 4, "movK has to be 32-bit encoded"); addrOffset+= sizeof(movK); // Encode MOVK (movk x1, #, lsl #16) higherThunkBits = ((uint64)interpreterThunk & 0xFFFF00000000) >> 32; movK = EncodeMove(/*Opcode*/ 0xF2C00000, /*register x1*/1, higherThunkBits); // left shift 32 bits; hw = 02 Emit(thunkBuffer, addrOffset, movK); addrOffset += sizeof(movK); // Encode MOVK (movk x1, #, lsl #16) higherThunkBits = ((uint64)interpreterThunk & 0xFFFF000000000000) >> 48; movK = EncodeMove(/*Opcode*/ 0xF2E00000, /*register x1*/1, higherThunkBits); // left shift 48 bits; hw = 03 Emit(thunkBuffer, addrOffset, movK); // Encode LDR - Load of function Body ULONG offsetOfFunctionInfo = Js::JavascriptFunction::GetOffsetOfFunctionInfo(); AssertMsg(offsetOfFunctionInfo % 8 == 0, "Immediate offset for LDR must be 8 byte aligned"); AssertMsg(offsetOfFunctionInfo < 0x8000, "Immediate offset for LDR must be less than 0x8000"); *(PULONG)&thunkBuffer[FunctionInfoOffset] |= (offsetOfFunctionInfo / 8) << 10; ULONG offsetOfFunctionProxy = Js::FunctionInfo::GetOffsetOfFunctionProxy(); AssertMsg(offsetOfFunctionProxy % 8 == 0, "Immediate offset for LDR must be 8 byte aligned"); AssertMsg(offsetOfFunctionProxy < 0x8000, "Immediate offset for LDR must be less than 0x8000"); *(PULONG)&thunkBuffer[FunctionProxyOffset] |= (offsetOfFunctionProxy / 8) << 10; // Encode LDR - Load of interpreter thunk number ULONG offsetOfDynamicInterpreterThunk = Js::FunctionBody::GetOffsetOfDynamicInterpreterThunk(); AssertMsg(offsetOfDynamicInterpreterThunk % 8 == 0, "Immediate offset for LDR must be 8 byte aligned"); AssertMsg(offsetOfDynamicInterpreterThunk < 0x8000, "Immediate offset for LDR must be less than 0x8000"); *(PULONG)&thunkBuffer[DynamicThunkAddressOffset] |= (offsetOfDynamicInterpreterThunk / 8) << 10; } DWORD InterpreterThunkEmitter::EncodeMove(DWORD opCode, int reg, DWORD imm16) { DWORD encodedMove = reg << 0; #if _M_ARM DWORD encodedImm = 0; EncoderMD::EncodeImmediate16(imm16, &encodedImm); encodedMove |= encodedImm; #elif _M_ARM64 // ToDo (SaAgarwa) - From Aaron change. Validate for ARM64 encodedMove |= (imm16 & 0xFFFF) << 5; #endif AssertMsg((encodedMove & opCode) == 0, "Any bits getting overwritten?"); encodedMove |= opCode; return encodedMove; } void InterpreterThunkEmitter::GeneratePdata(_In_ const BYTE* entryPoint, _In_ const DWORD functionSize, _Out_ RUNTIME_FUNCTION* function) { function->BeginAddress = 0x0; // Since our base address is the start of the function - this is offset from the base address function->Flag = 1; // Packed unwind data is used function->FunctionLength = functionSize / 4; function->RegF = 0; // number of non-volatile FP registers (d8-d15) saved in the canonical stack location function->RegI = 0; // number of non-volatile INT registers (r19-r28) saved in the canonical stack location function->H = 1; // Homes parameters // (indicating whether the function "homes" the integer parameter registers (r0-r7) by storing them at the very start of the function) function->CR = 3; // chained function, a store/load pair instruction is used in prolog/epilog function->FrameSize = 5; // the number of bytes of stack that is allocated for this function divided by 16 } #else void InterpreterThunkEmitter::EncodeInterpreterThunk( __in_bcount(InterpreterThunkSize) BYTE* thunkBuffer, __in const intptr_t thunkBufferStartAddress, __in const intptr_t epilogStart, __in const DWORD epilogSize, __in const intptr_t interpreterThunk) { Emit(thunkBuffer, ThunkAddressOffset, (uintptr_t)interpreterThunk); thunkBuffer[DynamicThunkAddressOffset] = Js::FunctionBody::GetOffsetOfDynamicInterpreterThunk(); thunkBuffer[FunctionInfoOffset] = Js::JavascriptFunction::GetOffsetOfFunctionInfo(); thunkBuffer[FunctionProxyOffset] = Js::FunctionInfo::GetOffsetOfFunctionProxy(); Emit(thunkBuffer, CallBlockStartAddrOffset, (uintptr_t) thunkBufferStartAddress + HeaderSize); uint totalThunkSize = (uint)(epilogStart - (thunkBufferStartAddress + HeaderSize)); Emit(thunkBuffer, ThunkSizeOffset, totalThunkSize); Emit(thunkBuffer, ErrorOffset, (BYTE) FAST_FAIL_INVALID_ARG); } #endif /*static*/ DWORD InterpreterThunkEmitter::FillDebugBreak(_Out_writes_bytes_all_(count) BYTE* dest, _In_ DWORD count) { #if defined(_M_ARM) Assert(count % 2 == 0); #elif defined(_M_ARM64) Assert(count % 4 == 0); #endif CustomHeap::FillDebugBreak(dest, count); return count; } /*static*/ DWORD InterpreterThunkEmitter::CopyWithAlignment( _Out_writes_bytes_all_(sizeInBytes) BYTE* dest, _In_ const DWORD sizeInBytes, _In_reads_bytes_(srcSize) const BYTE* src, _In_ const DWORD srcSize, _In_ const DWORD alignment) { js_memcpy_s(dest, sizeInBytes, src, srcSize); dest += srcSize; DWORD alignPad = Math::Align(srcSize, alignment) - srcSize; Assert(alignPad <= (sizeInBytes - srcSize)); if(alignPad > 0 && alignPad <= (sizeInBytes - srcSize)) { FillDebugBreak(dest, alignPad); return srcSize + alignPad; } return srcSize; } #if DBG bool InterpreterThunkEmitter::IsInHeap(void* address) { #ifdef ENABLE_OOP_NATIVE_CODEGEN if (JITManager::GetJITManager()->IsOOPJITEnabled()) { PSCRIPTCONTEXT_HANDLE remoteScript = this->scriptContext->GetRemoteScriptAddr(false); if (!remoteScript || !JITManager::GetJITManager()->IsConnected()) { // this method is used in asserts to validate whether an entry point is valid // in case JIT process crashed, let's just say true to keep asserts from firing return true; } boolean result; HRESULT hr = JITManager::GetJITManager()->IsInterpreterThunkAddr(remoteScript, (intptr_t)address, this->isAsmInterpreterThunk, &result); if (!JITManager::HandleServerCallResult(hr, RemoteCallType::HeapQuery)) { return true; } return result != FALSE; } else #endif { return emitBufferManager.IsInHeap(address); } } #endif // We only decommit at close because there might still be some // code running here. // The destructor of emitBufferManager will cause the eventual release. void InterpreterThunkEmitter::Close() { #if PDATA_ENABLED auto unregisterPdata = ([&] (const ThunkBlock& block) { PDataManager::UnregisterPdata((PRUNTIME_FUNCTION) block.GetPdata()); }); thunkBlocks.Iterate(unregisterPdata); freeListedThunkBlocks.Iterate(unregisterPdata); #endif this->thunkBlocks.Clear(allocator); this->freeListedThunkBlocks.Clear(allocator); #ifdef ENABLE_OOP_NATIVE_CODEGEN if (JITManager::GetJITManager()->IsOOPJITEnabled()) { PSCRIPTCONTEXT_HANDLE remoteScript = this->scriptContext->GetRemoteScriptAddr(false); if (remoteScript && JITManager::GetJITManager()->IsConnected()) { JITManager::GetJITManager()->DecommitInterpreterBufferManager(remoteScript, this->isAsmInterpreterThunk); } } else #endif { emitBufferManager.Decommit(); } this->thunkBuffer = nullptr; this->thunkCount = 0; } void InterpreterThunkEmitter::Release(BYTE* thunkAddress, bool addtoFreeList) { if(!addtoFreeList) { return; } auto predicate = ([=] (const ThunkBlock& block) { return block.Contains(thunkAddress); }); ThunkBlock* block = freeListedThunkBlocks.Find(predicate); if(!block) { block = thunkBlocks.MoveTo(&freeListedThunkBlocks, predicate); } // if EnsureFreeList fails in an OOM scenario - we just leak the thunks if(block && block->EnsureFreeList(allocator)) { block->Release(thunkAddress); } } BYTE* InterpreterThunkEmitter::AllocateFromFreeList(PVOID* ppDynamicInterpreterThunk ) { ThunkBlock& block = this->freeListedThunkBlocks.Head(); BYTE* thunk = block.AllocateFromFreeList(); #if _M_ARM thunk = (BYTE*)((DWORD)thunk | 0x01); #endif if(block.IsFreeListEmpty()) { this->freeListedThunkBlocks.MoveHeadTo(&this->thunkBlocks); } *ppDynamicInterpreterThunk = thunk; BYTE* entryPoint = block.GetStart(); #if _M_ARM entryPoint = (BYTE*)((DWORD)entryPoint | 0x01); #endif return entryPoint; } bool ThunkBlock::Contains(BYTE* address) const { bool contains = address >= start && address < (start + InterpreterThunkEmitter::BlockSize); return contains; } void ThunkBlock::Release(BYTE* address) { Assert(Contains(address)); Assert(this->freeList); BVIndex index = FromThunkAddress(address); this->freeList->Set(index); } BYTE* ThunkBlock::AllocateFromFreeList() { Assert(this->freeList); BVIndex index = this->freeList->GetNextBit(0); BYTE* address = ToThunkAddress(index); this->freeList->Clear(index); return address; } BVIndex ThunkBlock::FromThunkAddress(BYTE* address) { uint index = ((uint)(address - start) - HeaderSize) / InterpreterThunkEmitter::ThunkSize; Assert(index < this->thunkCount); return index; } BYTE* ThunkBlock::ToThunkAddress(BVIndex index) { Assert(index < this->thunkCount); BYTE* address = start + HeaderSize + InterpreterThunkEmitter::ThunkSize * index; return address; } bool ThunkBlock::EnsureFreeList(ArenaAllocator* allocator) { if(!this->freeList) { this->freeList = BVFixed::NewNoThrow(this->thunkCount, allocator); } return this->freeList != nullptr; } bool ThunkBlock::IsFreeListEmpty() const { Assert(this->freeList); return this->freeList->IsAllClear(); } #endif // ENABLE_NATIVE_CODEGEN