InterpreterThunkEmitter.cpp 32 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670
  1. //-------------------------------------------------------------------------------------------------------
  2. // Copyright (C) Microsoft. All rights reserved.
  3. // Licensed under the MIT license. See LICENSE.txt file in the project root for full license information.
  4. //-------------------------------------------------------------------------------------------------------
  5. #include "BackEnd.h"
  6. #ifdef ENABLE_NATIVE_CODEGEN
  7. #ifdef _M_X64
  8. const BYTE InterpreterThunkEmitter::FunctionBodyOffset = 23;
  9. const BYTE InterpreterThunkEmitter::DynamicThunkAddressOffset = 27;
  10. const BYTE InterpreterThunkEmitter::CallBlockStartAddrOffset = 37;
  11. const BYTE InterpreterThunkEmitter::ThunkSizeOffset = 51;
  12. const BYTE InterpreterThunkEmitter::ErrorOffset = 60;
  13. const BYTE InterpreterThunkEmitter::ThunkAddressOffset = 77;
  14. const BYTE InterpreterThunkEmitter::PrologSize = 76;
  15. const BYTE InterpreterThunkEmitter::StackAllocSize = 0x28;
  16. //
  17. // Home the arguments onto the stack and pass a pointer to the base of the stack location to the inner thunk
  18. //
  19. // Calling convention requires that caller should allocate at least 0x20 bytes and the stack be 16 byte aligned.
  20. // Hence, we allocate 0x28 bytes of stack space for the callee to use. The callee uses 8 bytes to push the first
  21. // argument and the rest 0x20 ensures alignment is correct.
  22. //
  23. const BYTE InterpreterThunkEmitter::InterpreterThunk[] = {
  24. 0x48, 0x89, 0x54, 0x24, 0x10, // mov qword ptr [rsp+10h],rdx
  25. 0x48, 0x89, 0x4C, 0x24, 0x08, // mov qword ptr [rsp+8],rcx
  26. 0x4C, 0x89, 0x44, 0x24, 0x18, // mov qword ptr [rsp+18h],r8
  27. 0x4C, 0x89, 0x4C, 0x24, 0x20, // mov qword ptr [rsp+20h],r9
  28. 0x48, 0x8B, 0x41, 0x00, // mov rax, qword ptr [rcx+FunctionBodyOffset]
  29. 0x48, 0x8B, 0x50, 0x00, // mov rdx, qword ptr [rax+DynamicThunkAddressOffset]
  30. // Range Check for Valid call target
  31. 0x48, 0x83, 0xE2, 0xF8, // and rdx, 0xFFFFFFFFFFFFFFF8h ;Force 8 byte alignment
  32. 0x48, 0x8b, 0xca, // mov rcx, rdx
  33. 0x48, 0xb8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // mov rax, CallBlockStartAddress
  34. 0x48, 0x2b, 0xc8, // sub rcx, rax
  35. 0x48, 0x81, 0xf9, 0x00, 0x00, 0x00, 0x00, // cmp rcx, ThunkSize
  36. 0x76, 0x09, // jbe $safe
  37. 0x48, 0xc7, 0xc1, 0x00, 0x00, 0x00, 0x00, // mov rcx, errorcode
  38. 0xcd, 0x29, // int 29h
  39. // $safe:
  40. 0x48, 0x8D, 0x4C, 0x24, 0x08, // lea rcx, [rsp+8] ;Load the address to stack
  41. 0x48, 0x83, 0xEC, StackAllocSize, // sub rsp,28h
  42. 0x48, 0xB8, 0x00, 0x00, 0x00 ,0x00, 0x00, 0x00, 0x00, 0x00, // mov rax, <thunk>
  43. 0xFF, 0xE2, // jmp rdx
  44. 0xCC // int 3 ;for alignment to size of 8 we are adding this
  45. };
  46. const BYTE InterpreterThunkEmitter::Epilog[] = {
  47. 0x48, 0x83, 0xC4, StackAllocSize, // add rsp,28h
  48. 0xC3 // ret
  49. };
  50. #elif defined(_M_ARM)
  51. const BYTE InterpreterThunkEmitter::ThunkAddressOffset = 8;
  52. const BYTE InterpreterThunkEmitter::FunctionBodyOffset = 18;
  53. const BYTE InterpreterThunkEmitter::DynamicThunkAddressOffset = 22;
  54. const BYTE InterpreterThunkEmitter::CallBlockStartAddressInstrOffset = 38;
  55. const BYTE InterpreterThunkEmitter::CallThunkSizeInstrOffset = 50;
  56. const BYTE InterpreterThunkEmitter::ErrorOffset = 60;
  57. const BYTE InterpreterThunkEmitter::InterpreterThunk[] = {
  58. 0x0F, 0xB4, // push {r0-r3}
  59. 0x2D, 0xE9, 0x00, 0x48, // push {r11,lr}
  60. 0xEB, 0x46, // mov r11,sp
  61. 0x00, 0x00, 0x00, 0x00, // movw r1,ThunkAddress
  62. 0x00, 0x00, 0x00, 0x00, // movt r1,ThunkAddress
  63. 0xD0, 0xF8, 0x00, 0x20, // ldr.w r2,[r0,#0x00]
  64. 0xD2, 0xF8, 0x00, 0x30, // ldr.w r3,[r2,#0x00]
  65. 0x4F, 0xF6, 0xF9, 0x70, // mov r0,#0xFFF9
  66. 0xCF, 0xF6, 0xFF, 0x70, // movt r0,#0xFFFF
  67. 0x03, 0xEA, 0x00, 0x03, // and r3,r3,r0
  68. 0x18, 0x46, // mov r0, r3
  69. 0x00, 0x00, 0x00, 0x00, // movw r12, CallBlockStartAddress
  70. 0x00, 0x00, 0x00, 0x00, // movt r12, CallBlockStartAddress
  71. 0xA0, 0xEB, 0x0C, 0x00, // sub r0, r12
  72. 0x00, 0x00, 0x00, 0x00, // mov r12, ThunkSize
  73. 0x60, 0x45, // cmp r0, r12
  74. 0x02, 0xD9, // bls $safe
  75. 0x4F, 0xF0, 0x00, 0x00, // mov r0, errorcode
  76. 0xFB, 0xDE, // Equivalent to int 0x29
  77. //$safe:
  78. 0x02, 0xA8, // add r0,sp,#8
  79. 0x18, 0x47, // bx r3
  80. 0xFE, 0xDE, // int 3 ;Required for alignment
  81. 0xFE, 0xDE // int 3 ;Required for alignment
  82. };
  83. const BYTE InterpreterThunkEmitter::JmpOffset = 2;
  84. const BYTE InterpreterThunkEmitter::Call[] = {
  85. 0x88, 0x47, // blx r1
  86. 0x00, 0x00, 0x00, 0x00, // b.w epilog
  87. 0xFE, 0xDE, // int 3 ;Required for alignment
  88. };
  89. const BYTE InterpreterThunkEmitter::Epilog[] = {
  90. 0x5D, 0xF8, 0x04, 0xBB, // pop {r11}
  91. 0x5D, 0xF8, 0x14, 0xFB // ldr pc,[sp],#0x14
  92. };
  93. #elif defined(_M_ARM64)
  94. const BYTE InterpreterThunkEmitter::FunctionBodyOffset = 24;
  95. const BYTE InterpreterThunkEmitter::DynamicThunkAddressOffset = 28;
  96. const BYTE InterpreterThunkEmitter::ThunkAddressOffset = 32;
  97. //TODO: saravind :Implement Range Check for ARM64
  98. const BYTE InterpreterThunkEmitter::InterpreterThunk[] = {
  99. 0xFD, 0x7B, 0xBB, 0xA9, //stp fp, lr, [sp, #-80]! ;Prologue
  100. 0xFD, 0x03, 0x00, 0x91, //mov fp, sp ;update frame pointer to the stack pointer
  101. 0xE0, 0x07, 0x01, 0xA9, //stp x0, x1, [sp, #16] ;Prologue again; save all registers
  102. 0xE2, 0x0F, 0x02, 0xA9, //stp x2, x3, [sp, #32]
  103. 0xE4, 0x17, 0x03, 0xA9, //stp x4, x5, [sp, #48]
  104. 0xE6, 0x1F, 0x04, 0xA9, //stp x6, x7, [sp, #64]
  105. 0x02, 0x00, 0x40, 0xF9, //ldr x2, [x0, #0x00] ;offset will be replaced with Offset of FunctionInfo
  106. 0x43, 0x00, 0x40, 0xF9, //ldr x3, [x2, #0x00] ;offset will be replaced with offset of DynamicInterpreterThunk
  107. //Following 4 MOV Instrs are to move the 64-bit address of the InterpreterThunk address into register x1.
  108. 0x00, 0x00, 0x00, 0x00, //movz x1, #0x00 ;This is overwritten with the actual thunk address(16 - 0 bits) move
  109. 0x00, 0x00, 0x00, 0x00, //movk x1, #0x00, lsl #16 ;This is overwritten with the actual thunk address(32 - 16 bits) move
  110. 0x00, 0x00, 0x00, 0x00, //movk x1, #0x00, lsl #32 ;This is overwritten with the actual thunk address(48 - 32 bits) move
  111. 0x00, 0x00, 0x00, 0x00, //movk x1, #0x00, lsl #48 ;This is overwritten with the actual thunk address(64 - 48 bits) move
  112. 0xE0, 0x43, 0x00, 0x91, //add x0, sp, #16
  113. 0x60, 0x00, 0x1F, 0xD6 //br x3
  114. };
  115. const BYTE InterpreterThunkEmitter::JmpOffset = 4;
  116. const BYTE InterpreterThunkEmitter::Call[] = {
  117. 0x20, 0x00, 0x3f, 0xd6, // blr x1
  118. 0x00, 0x00, 0x00, 0x00 // b epilog
  119. };
  120. const BYTE InterpreterThunkEmitter::Epilog[] = {
  121. 0xfd, 0x7b, 0xc5, 0xa8, // ldp fp, lr, [sp], #80
  122. 0xc0, 0x03, 0x5f, 0xd6 // ret
  123. };
  124. #else
  125. const BYTE InterpreterThunkEmitter::FunctionBodyOffset = 8;
  126. const BYTE InterpreterThunkEmitter::DynamicThunkAddressOffset = 11;
  127. const BYTE InterpreterThunkEmitter::CallBlockStartAddrOffset = 18;
  128. const BYTE InterpreterThunkEmitter::ThunkSizeOffset = 23;
  129. const BYTE InterpreterThunkEmitter::ErrorOffset = 30;
  130. const BYTE InterpreterThunkEmitter::ThunkAddressOffset = 41;
  131. const BYTE InterpreterThunkEmitter::InterpreterThunk[] = {
  132. 0x55, // push ebp ;Prolog - setup the stack frame
  133. 0x8B, 0xEC, // mov ebp,esp
  134. 0x8B, 0x45, 0x08, // mov eax, dword ptr [ebp+8]
  135. 0x8B, 0x40, 0x00, // mov eax, dword ptr [eax+FunctionBodyOffset]
  136. 0x8B, 0x48, 0x00, // mov ecx, dword ptr [eax+DynamicThunkAddressOffset]
  137. // Range Check for Valid call target
  138. 0x83, 0xE1, 0xF8, // and ecx, 0FFFFFFF8h
  139. 0x8b, 0xc1, // mov eax, ecx
  140. 0x2d, 0x00, 0x00, 0x00, 0x00, // sub eax, CallBlockStartAddress
  141. 0x3d, 0x00, 0x00, 0x00, 0x00, // cmp eax, ThunkSize
  142. 0x76, 0x07, // jbe SHORT $safe
  143. 0xb9, 0x00, 0x00, 0x00, 0x00, // mov ecx, errorcode
  144. 0xCD, 0x29, // int 29h
  145. //$safe
  146. 0x8D, 0x45, 0x08, // lea eax, ebp+8
  147. 0x50, // push eax
  148. 0xB8, 0x00, 0x00, 0x00, 0x00, // mov eax, <thunk>
  149. 0xFF, 0xE1, // jmp ecx
  150. 0xCC // int 3 for 8byte alignment
  151. };
  152. const BYTE InterpreterThunkEmitter::Epilog[] = {
  153. 0x5D, // pop ebp
  154. 0xC3 // ret
  155. };
  156. #endif
  157. #if defined(_M_X64) || defined(_M_IX86)
  158. const BYTE InterpreterThunkEmitter::JmpOffset = 3;
  159. const BYTE InterpreterThunkEmitter::Call[] = {
  160. 0xFF, 0xD0, // call rax
  161. 0xE9, 0x00, 0x00, 0x00, 0x00, // jmp [offset]
  162. 0xCC, // int 3 ;for alignment to size of 8 we are adding this
  163. };
  164. #endif
  165. const BYTE InterpreterThunkEmitter::PageCount = 1;
  166. const uint InterpreterThunkEmitter::BlockSize = AutoSystemInfo::PageSize * InterpreterThunkEmitter::PageCount;
  167. const BYTE InterpreterThunkEmitter::HeaderSize = sizeof(InterpreterThunk);
  168. const BYTE InterpreterThunkEmitter::ThunkSize = sizeof(Call);
  169. const uint InterpreterThunkEmitter::ThunksPerBlock = (BlockSize - HeaderSize) / ThunkSize;
  170. InterpreterThunkEmitter::InterpreterThunkEmitter(AllocationPolicyManager * policyManager, ArenaAllocator* allocator, void * interpreterThunk) :
  171. emitBufferManager(policyManager, allocator, /*scriptContext*/ nullptr, L"Interpreter thunk buffer", /*allocXdata*/ false),
  172. allocation(nullptr),
  173. allocator(allocator),
  174. thunkCount(0),
  175. thunkBuffer(nullptr),
  176. interpreterThunk(interpreterThunk)
  177. {
  178. }
  179. //
  180. // Returns the next thunk. Batch allocated PageCount pages of thunks and issue them one at a time
  181. //
  182. BYTE* InterpreterThunkEmitter::GetNextThunk(PVOID* ppDynamicInterpreterThunk)
  183. {
  184. Assert(ppDynamicInterpreterThunk);
  185. Assert(*ppDynamicInterpreterThunk == nullptr);
  186. if(thunkCount == 0)
  187. {
  188. if(!this->freeListedThunkBlocks.Empty())
  189. {
  190. return AllocateFromFreeList(ppDynamicInterpreterThunk);
  191. }
  192. NewThunkBlock();
  193. }
  194. Assert(this->thunkBuffer != nullptr);
  195. BYTE* thunk = this->thunkBuffer;
  196. #if _M_ARM
  197. thunk = (BYTE*)((DWORD)thunk | 0x01);
  198. #endif
  199. *ppDynamicInterpreterThunk = thunk + HeaderSize + ((--thunkCount) * ThunkSize);
  200. #if _M_ARM
  201. AssertMsg(((uintptr_t)(*ppDynamicInterpreterThunk) & 0x6) == 0, "Not 8 byte aligned?");
  202. #else
  203. AssertMsg(((uintptr_t)(*ppDynamicInterpreterThunk) & 0x7) == 0, "Not 8 byte aligned?");
  204. #endif
  205. return thunk;
  206. }
  207. //
  208. // Interpreter thunks have an entrypoint at the beginning of the page boundary. Each function has a unique thunk return address
  209. // and this function can convert to the unique thunk return address to the beginning of the page which corresponds with the entrypoint
  210. //
  211. void* InterpreterThunkEmitter::ConvertToEntryPoint(PVOID dynamicInterpreterThunk)
  212. {
  213. Assert(dynamicInterpreterThunk != nullptr);
  214. void* entryPoint = (void*)((size_t)dynamicInterpreterThunk & (~((size_t)(BlockSize) - 1)));
  215. #if _M_ARM
  216. entryPoint = (BYTE*)((DWORD)entryPoint | 0x01);
  217. #endif
  218. return entryPoint;
  219. }
  220. void InterpreterThunkEmitter::NewThunkBlock()
  221. {
  222. Assert(this->thunkCount == 0);
  223. BYTE* buffer;
  224. BYTE* currentBuffer;
  225. DWORD bufferSize = BlockSize;
  226. DWORD thunkCount = 0;
  227. allocation = emitBufferManager.AllocateBuffer(bufferSize, &buffer);
  228. if (!emitBufferManager.ProtectBufferWithExecuteReadWriteForInterpreter(allocation))
  229. {
  230. Js::Throw::OutOfMemory();
  231. }
  232. currentBuffer = buffer;
  233. #ifdef _M_X64
  234. PrologEncoder prologEncoder(allocator);
  235. prologEncoder.EncodeSmallProlog(PrologSize, StackAllocSize);
  236. DWORD pdataSize = prologEncoder.SizeOfPData();
  237. #elif defined(_M_ARM32_OR_ARM64)
  238. DWORD pdataSize = sizeof(RUNTIME_FUNCTION);
  239. #else
  240. DWORD pdataSize = 0;
  241. #endif
  242. DWORD bytesRemaining = bufferSize;
  243. DWORD bytesWritten = 0;
  244. DWORD epilogSize = sizeof(Epilog);
  245. // Ensure there is space for PDATA at the end
  246. BYTE* pdataStart = currentBuffer + (bufferSize - Math::Align(pdataSize, EMIT_BUFFER_ALIGNMENT));
  247. BYTE* epilogStart = pdataStart - Math::Align(epilogSize, EMIT_BUFFER_ALIGNMENT);
  248. // Copy the thunk buffer and modify it.
  249. js_memcpy_s(currentBuffer, bytesRemaining, InterpreterThunk, HeaderSize);
  250. EncodeInterpreterThunk(currentBuffer, buffer, HeaderSize, epilogStart, epilogSize);
  251. currentBuffer += HeaderSize;
  252. bytesRemaining -= HeaderSize;
  253. // Copy call buffer
  254. DWORD callSize = sizeof(Call);
  255. while(currentBuffer < epilogStart - callSize)
  256. {
  257. js_memcpy_s(currentBuffer, bytesRemaining, Call, callSize);
  258. #if _M_ARM
  259. int offset = (epilogStart - (currentBuffer + JmpOffset));
  260. Assert(offset >= 0);
  261. DWORD encodedOffset = EncoderMD::BranchOffset_T2_24(offset);
  262. DWORD encodedBranch = /*opcode=*/ 0x9000F000 | encodedOffset;
  263. Emit(currentBuffer, JmpOffset, encodedBranch);
  264. #elif _M_ARM64
  265. int64 offset = (epilogStart - (currentBuffer + JmpOffset));
  266. Assert(offset >= 0);
  267. DWORD encodedOffset = EncoderMD::BranchOffset_26(offset);
  268. DWORD encodedBranch = /*opcode=*/ 0x14000000 | encodedOffset;
  269. Emit(currentBuffer, JmpOffset, encodedBranch);
  270. #else
  271. // jump requires an offset from the end of the jump instruction.
  272. int offset = (int)(epilogStart - (currentBuffer + JmpOffset + sizeof(int)));
  273. Assert(offset >= 0);
  274. Emit(currentBuffer, JmpOffset, offset);
  275. #endif
  276. currentBuffer += callSize;
  277. bytesRemaining -= callSize;
  278. thunkCount++;
  279. }
  280. // Fill any gap till start of epilog
  281. bytesWritten = FillDebugBreak(currentBuffer, (DWORD)(epilogStart - currentBuffer));
  282. bytesRemaining -= bytesWritten;
  283. currentBuffer += bytesWritten;
  284. // Copy epilog
  285. bytesWritten = CopyWithAlignment(currentBuffer, bytesRemaining, Epilog, epilogSize, EMIT_BUFFER_ALIGNMENT);
  286. currentBuffer += bytesWritten;
  287. bytesRemaining -= bytesWritten;
  288. // Generate and register PDATA
  289. #if PDATA_ENABLED
  290. BYTE* epilogEnd = epilogStart + epilogSize;
  291. DWORD functionSize = (DWORD)(epilogEnd - buffer);
  292. Assert(pdataStart == currentBuffer);
  293. #ifdef _M_X64
  294. Assert(bytesRemaining >= pdataSize);
  295. BYTE* pdata = prologEncoder.Finalize(buffer, functionSize, pdataStart);
  296. bytesWritten = CopyWithAlignment(pdataStart, bytesRemaining, pdata, pdataSize, EMIT_BUFFER_ALIGNMENT);
  297. #elif defined(_M_ARM32_OR_ARM64)
  298. RUNTIME_FUNCTION pdata;
  299. GeneratePdata(buffer, functionSize, &pdata);
  300. bytesWritten = CopyWithAlignment(pdataStart, bytesRemaining, (const BYTE*)&pdata, pdataSize, EMIT_BUFFER_ALIGNMENT);
  301. #endif
  302. void* pdataTable;
  303. PDataManager::RegisterPdata((PRUNTIME_FUNCTION) pdataStart, (ULONG_PTR) buffer, (ULONG_PTR) epilogEnd, &pdataTable);
  304. #endif
  305. if (!emitBufferManager.CommitReadWriteBufferForInterpreter(allocation, buffer, bufferSize))
  306. {
  307. Js::Throw::OutOfMemory();
  308. }
  309. // Call to set VALID flag for CFG check
  310. ThreadContext::GetContextForCurrentThread()->SetValidCallTargetForCFG(buffer);
  311. // Update object state only at the end when everything has succeeded - and no exceptions can be thrown.
  312. ThunkBlock* block = this->thunkBlocks.PrependNode(allocator, buffer);
  313. UNREFERENCED_PARAMETER(block);
  314. #if PDATA_ENABLED
  315. block->SetPdata(pdataTable);
  316. #endif
  317. this->thunkCount = thunkCount;
  318. this->thunkBuffer = buffer;
  319. }
  320. #if _M_ARM
  321. void InterpreterThunkEmitter::EncodeInterpreterThunk(__in_bcount(thunkSize) BYTE* thunkBuffer, __in_bcount(thunkSize) BYTE* thunkBufferStartAddress, __in const DWORD thunkSize, __in_bcount(epilogSize) BYTE* epilogStart, __in const DWORD epilogSize)
  322. {
  323. _Analysis_assume_(thunkSize == HeaderSize);
  324. // Encode MOVW
  325. DWORD lowerThunkBits = (uint32)this->interpreterThunk & 0x0000FFFF;
  326. DWORD movW = EncodeMove(/*Opcode*/ 0x0000F240, /*register*/1, lowerThunkBits);
  327. Emit(thunkBuffer,ThunkAddressOffset, movW);
  328. // Encode MOVT
  329. DWORD higherThunkBits = ((uint32)this->interpreterThunk & 0xFFFF0000) >> 16;
  330. DWORD movT = EncodeMove(/*Opcode*/ 0x0000F2C0, /*register*/1, higherThunkBits);
  331. Emit(thunkBuffer, ThunkAddressOffset + sizeof(movW), movT);
  332. // Encode LDR - Load of function Body
  333. thunkBuffer[FunctionBodyOffset] = Js::JavascriptFunction::GetOffsetOfFunctionInfo();
  334. // Encode LDR - Load of interpreter thunk number
  335. thunkBuffer[DynamicThunkAddressOffset] = Js::FunctionBody::GetOffsetOfDynamicInterpreterThunk();
  336. // Encode MOVW R12, CallBlockStartAddress
  337. uintptr_t callBlockStartAddress = (uintptr_t)thunkBufferStartAddress + HeaderSize;
  338. uint totalThunkSize = (uint)(epilogStart - callBlockStartAddress);
  339. DWORD lowerCallBlockStartAddress = callBlockStartAddress & 0x0000FFFF;
  340. DWORD movWblockStart = EncodeMove(/*Opcode*/ 0x0000F240, /*register*/12, lowerCallBlockStartAddress);
  341. Emit(thunkBuffer,CallBlockStartAddressInstrOffset, movWblockStart);
  342. // Encode MOVT R12, CallBlockStartAddress
  343. DWORD higherCallBlockStartAddress = (callBlockStartAddress & 0xFFFF0000) >> 16;
  344. DWORD movTblockStart = EncodeMove(/*Opcode*/ 0x0000F2C0, /*register*/12, higherCallBlockStartAddress);
  345. Emit(thunkBuffer, CallBlockStartAddressInstrOffset + sizeof(movWblockStart), movTblockStart);
  346. //Encode MOV R12, CallBlockSize
  347. DWORD movBlockSize = EncodeMove(/*Opcode*/ 0x0000F240, /*register*/12, (DWORD)totalThunkSize);
  348. Emit(thunkBuffer, CallThunkSizeInstrOffset, movBlockSize);
  349. Emit(thunkBuffer, ErrorOffset, (BYTE) FAST_FAIL_INVALID_ARG);
  350. }
  351. DWORD InterpreterThunkEmitter::EncodeMove(DWORD opCode, int reg, DWORD imm16)
  352. {
  353. DWORD encodedMove = reg << 24;
  354. DWORD encodedImm = 0;
  355. EncoderMD::EncodeImmediate16(imm16, &encodedImm);
  356. encodedMove |= encodedImm;
  357. AssertMsg((encodedMove & opCode) == 0, "Any bits getting overwritten?");
  358. encodedMove |= opCode;
  359. return encodedMove;
  360. }
  361. void InterpreterThunkEmitter::GeneratePdata(_In_ const BYTE* entryPoint, _In_ const DWORD functionSize, _Out_ RUNTIME_FUNCTION* function)
  362. {
  363. function->BeginAddress = 0x1; // Since our base address is the start of the function - this is offset from the base address
  364. function->Flag = 1; // Packed unwind data is used
  365. function->FunctionLength = functionSize / 2;
  366. function->Ret = 0; // Return via Pop
  367. function->H = 1; // Homes parameters
  368. function->Reg = 7; // No saved registers - R11 is the frame pointer - not considered here
  369. function->R = 1; // No registers are being saved.
  370. function->L = 1; // Save/restore LR register
  371. function->C = 1; // Frame pointer chain in R11 established
  372. function->StackAdjust = 0; // Stack allocation for the function
  373. }
  374. #elif _M_ARM64
  375. void InterpreterThunkEmitter::EncodeInterpreterThunk(__in_bcount(thunkSize) BYTE* thunkBuffer, __in_bcount(thunkSize) BYTE* thunkBufferStartAddress, __in const DWORD thunkSize, __in_bcount(epilogSize) BYTE* epilogStart, __in const DWORD epilogSize)
  376. {
  377. int addrOffset = ThunkAddressOffset;
  378. _Analysis_assume_(thunkSize == HeaderSize);
  379. AssertMsg(thunkSize == HeaderSize, "Mismatch in the size of the InterpreterHeaderThunk and the thunkSize used in this API (EncodeInterpreterThunk)");
  380. // Following 4 MOV Instrs are to move the 64-bit address of the InterpreterThunk address into register x1.
  381. // Encode MOVZ (movz x1, #<interpreterThunk 16-0 bits>)
  382. DWORD lowerThunkBits = (uint64)this->interpreterThunk & 0x0000FFFF;
  383. DWORD movZ = EncodeMove(/*Opcode*/ 0xD2800000, /*register x1*/1, lowerThunkBits); // no shift; hw = 00
  384. Emit(thunkBuffer,addrOffset, movZ);
  385. AssertMsg(sizeof(movZ) == 4, "movZ has to be 32-bit encoded");
  386. addrOffset+= sizeof(movZ);
  387. // Encode MOVK (movk x1, #<interpreterThunk 32-16 bits>, lsl #16)
  388. DWORD higherThunkBits = ((uint64)this->interpreterThunk & 0xFFFF0000) >> 16;
  389. DWORD movK = EncodeMove(/*Opcode*/ 0xF2A00000, /*register x1*/1, higherThunkBits); // left shift 16 bits; hw = 01
  390. Emit(thunkBuffer, addrOffset, movK);
  391. AssertMsg(sizeof(movK) == 4, "movK has to be 32-bit encoded");
  392. addrOffset+= sizeof(movK);
  393. // Encode MOVK (movk x1, #<interpreterThunk 48-32 bits>, lsl #16)
  394. higherThunkBits = ((uint64)this->interpreterThunk & 0xFFFF00000000) >> 32;
  395. movK = EncodeMove(/*Opcode*/ 0xF2C00000, /*register x1*/1, higherThunkBits); // left shift 32 bits; hw = 02
  396. Emit(thunkBuffer, addrOffset, movK);
  397. AssertMsg(sizeof(movK) == 4, "movK has to be 32-bit encoded");
  398. addrOffset += sizeof(movK);
  399. // Encode MOVK (movk x1, #<interpreterThunk 64-48 bits>, lsl #16)
  400. higherThunkBits = ((uint64)this->interpreterThunk & 0xFFFF000000000000) >> 48;
  401. movK = EncodeMove(/*Opcode*/ 0xF2E00000, /*register x1*/1, higherThunkBits); // left shift 48 bits; hw = 03
  402. AssertMsg(sizeof(movK) == 4, "movK has to be 32-bit encoded");
  403. Emit(thunkBuffer, addrOffset, movK);
  404. // Encode LDR - Load of function Body
  405. ULONG offsetOfFunctionInfo = Js::JavascriptFunction::GetOffsetOfFunctionInfo();
  406. AssertMsg(offsetOfFunctionInfo % 8 == 0, "Immediate offset for LDR must be 8 byte aligned");
  407. AssertMsg(offsetOfFunctionInfo < 0x8000, "Immediate offset for LDR must be less than 0x8000");
  408. *(PULONG)&thunkBuffer[FunctionBodyOffset] |= (offsetOfFunctionInfo / 8) << 10;
  409. // Encode LDR - Load of interpreter thunk number
  410. ULONG offsetOfDynamicInterpreterThunk = Js::FunctionBody::GetOffsetOfDynamicInterpreterThunk();
  411. AssertMsg(offsetOfDynamicInterpreterThunk % 8 == 0, "Immediate offset for LDR must be 8 byte aligned");
  412. AssertMsg(offsetOfDynamicInterpreterThunk < 0x8000, "Immediate offset for LDR must be less than 0x8000");
  413. *(PULONG)&thunkBuffer[DynamicThunkAddressOffset] |= (offsetOfDynamicInterpreterThunk / 8) << 10;
  414. }
  415. DWORD InterpreterThunkEmitter::EncodeMove(DWORD opCode, int reg, DWORD imm16)
  416. {
  417. DWORD encodedMove = reg << 0;
  418. DWORD encodedImm = 0;
  419. EncoderMD::EncodeImmediate16(imm16, &encodedImm);
  420. encodedMove |= encodedImm;
  421. AssertMsg((encodedMove & opCode) == 0, "Any bits getting overwritten?");
  422. encodedMove |= opCode;
  423. return encodedMove;
  424. }
  425. void InterpreterThunkEmitter::GeneratePdata(_In_ const BYTE* entryPoint, _In_ const DWORD functionSize, _Out_ RUNTIME_FUNCTION* function)
  426. {
  427. function->BeginAddress = 0x0; // Since our base address is the start of the function - this is offset from the base address
  428. function->Flag = 1; // Packed unwind data is used
  429. function->FunctionLength = functionSize / 4;
  430. function->RegF = 0; // number of non-volatile FP registers (d8-d15) saved in the canonical stack location
  431. function->RegI = 0; // number of non-volatile INT registers (r19-r28) saved in the canonical stack location
  432. function->H = 1; // Homes parameters
  433. // (indicating whether the function "homes" the integer parameter registers (r0-r7) by storing them at the very start of the function)
  434. function->CR = 3; // chained function, a store/load pair instruction is used in prolog/epilog <r29,lr>
  435. function->FrameSize = 5; // the number of bytes of stack that is allocated for this function divided by 16
  436. }
  437. #else
  438. void InterpreterThunkEmitter::EncodeInterpreterThunk(__in_bcount(thunkSize) BYTE* thunkBuffer, __in_bcount(thunkSize) BYTE* thunkBufferStartAddress, __in const DWORD thunkSize, __in_bcount(epilogSize) BYTE* epilogStart, __in const DWORD epilogSize)
  439. {
  440. _Analysis_assume_(thunkSize == HeaderSize);
  441. Emit(thunkBuffer, ThunkAddressOffset, (uintptr_t)interpreterThunk);
  442. thunkBuffer[DynamicThunkAddressOffset] = Js::FunctionBody::GetOffsetOfDynamicInterpreterThunk();
  443. thunkBuffer[FunctionBodyOffset] = Js::JavascriptFunction::GetOffsetOfFunctionInfo();
  444. Emit(thunkBuffer, CallBlockStartAddrOffset, (uintptr_t) thunkBufferStartAddress + HeaderSize);
  445. uint totalThunkSize = (uint)(epilogStart - (thunkBufferStartAddress + HeaderSize));
  446. Emit(thunkBuffer, ThunkSizeOffset, totalThunkSize);
  447. Emit(thunkBuffer, ErrorOffset, (BYTE) FAST_FAIL_INVALID_ARG);
  448. }
  449. #endif
  450. inline /*static*/
  451. DWORD InterpreterThunkEmitter::FillDebugBreak(__out_bcount_full(count) BYTE* dest, __in DWORD count)
  452. {
  453. #if defined(_M_ARM)
  454. Assert(count % 2 == 0);
  455. #elif defined(_M_ARM64)
  456. Assert(count % 4 == 0);
  457. #endif
  458. CustomHeap::FillDebugBreak(dest, count);
  459. return count;
  460. }
  461. inline /*static*/
  462. DWORD InterpreterThunkEmitter::CopyWithAlignment(
  463. __in_bcount(sizeInBytes) BYTE* dest,
  464. __in const DWORD sizeInBytes,
  465. __in_bcount(srcSize) const BYTE* src,
  466. __in_range(0, sizeInBytes) const DWORD srcSize,
  467. __in const DWORD alignment)
  468. {
  469. js_memcpy_s(dest, sizeInBytes, src, srcSize);
  470. dest += srcSize;
  471. DWORD alignPad = Math::Align(srcSize, alignment) - srcSize;
  472. Assert(alignPad <= (sizeInBytes - srcSize));
  473. if(alignPad > 0 && alignPad <= (sizeInBytes - srcSize))
  474. {
  475. FillDebugBreak(dest, alignPad);
  476. return srcSize + alignPad;
  477. }
  478. return srcSize;
  479. }
  480. // We only decommit at close because there might still be some
  481. // code running here.
  482. // The destructor of emitBufferManager will cause the eventual release.
  483. void InterpreterThunkEmitter::Close()
  484. {
  485. #if PDATA_ENABLED
  486. auto unregiserPdata = ([&] (const ThunkBlock& block)
  487. {
  488. PDataManager::UnregisterPdata((PRUNTIME_FUNCTION) block.GetPdata());
  489. });
  490. thunkBlocks.Iterate(unregiserPdata);
  491. freeListedThunkBlocks.Iterate(unregiserPdata);
  492. #endif
  493. this->thunkBlocks.Clear(allocator);
  494. this->freeListedThunkBlocks.Clear(allocator);
  495. emitBufferManager.Decommit();
  496. this->thunkBuffer = nullptr;
  497. this->thunkCount = 0;
  498. }
  499. void InterpreterThunkEmitter::Release(BYTE* thunkAddress, bool addtoFreeList)
  500. {
  501. if(!addtoFreeList)
  502. {
  503. return;
  504. }
  505. auto predicate = ([=] (const ThunkBlock& block)
  506. {
  507. return block.Contains(thunkAddress);
  508. });
  509. ThunkBlock* block = freeListedThunkBlocks.Find(predicate);
  510. if(!block)
  511. {
  512. block = thunkBlocks.MoveTo(&freeListedThunkBlocks, predicate);
  513. }
  514. // if EnsureFreeList fails in an OOM scenario - we just leak the thunks
  515. if(block && block->EnsureFreeList(allocator))
  516. {
  517. block->Release(thunkAddress);
  518. }
  519. }
  520. BYTE* InterpreterThunkEmitter::AllocateFromFreeList(PVOID* ppDynamicInterpreterThunk )
  521. {
  522. ThunkBlock& block = this->freeListedThunkBlocks.Head();
  523. BYTE* thunk = block.AllocateFromFreeList();
  524. #if _M_ARM
  525. thunk = (BYTE*)((DWORD)thunk | 0x01);
  526. #endif
  527. if(block.IsFreeListEmpty())
  528. {
  529. this->freeListedThunkBlocks.MoveHeadTo(&this->thunkBlocks);
  530. }
  531. *ppDynamicInterpreterThunk = thunk;
  532. BYTE* entryPoint = block.GetStart();
  533. #if _M_ARM
  534. entryPoint = (BYTE*)((DWORD)entryPoint | 0x01);
  535. #endif
  536. return entryPoint;
  537. }
  538. bool ThunkBlock::Contains(BYTE* address) const
  539. {
  540. bool contains = address >= start && address < (start + InterpreterThunkEmitter::BlockSize);
  541. return contains;
  542. }
  543. void ThunkBlock::Release(BYTE* address)
  544. {
  545. Assert(Contains(address));
  546. Assert(this->freeList);
  547. BVIndex index = FromThunkAddress(address);
  548. this->freeList->Set(index);
  549. }
  550. BYTE* ThunkBlock::AllocateFromFreeList()
  551. {
  552. Assert(this->freeList);
  553. BVIndex index = this->freeList->GetNextBit(0);
  554. BYTE* address = ToThunkAddress(index);
  555. this->freeList->Clear(index);
  556. return address;
  557. }
  558. BVIndex ThunkBlock::FromThunkAddress(BYTE* address)
  559. {
  560. int index = ((uint)(address - start) - InterpreterThunkEmitter::HeaderSize) / InterpreterThunkEmitter::ThunkSize;
  561. Assert(index < InterpreterThunkEmitter::ThunksPerBlock);
  562. return index;
  563. }
  564. BYTE* ThunkBlock::ToThunkAddress(BVIndex index)
  565. {
  566. Assert(index < InterpreterThunkEmitter::ThunksPerBlock);
  567. BYTE* address = start + InterpreterThunkEmitter::HeaderSize + InterpreterThunkEmitter::ThunkSize * index;
  568. return address;
  569. }
  570. bool ThunkBlock::EnsureFreeList(ArenaAllocator* allocator)
  571. {
  572. if(!this->freeList)
  573. {
  574. this->freeList = BVFixed::NewNoThrow(InterpreterThunkEmitter::ThunksPerBlock, allocator);
  575. }
  576. return this->freeList != nullptr;
  577. }
  578. bool ThunkBlock::IsFreeListEmpty() const
  579. {
  580. Assert(this->freeList);
  581. return this->freeList->IsAllClear();
  582. }
  583. #endif