InterpreterThunkEmitter.cpp 35 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725
  1. //-------------------------------------------------------------------------------------------------------
  2. // Copyright (C) Microsoft. All rights reserved.
  3. // Licensed under the MIT license. See LICENSE.txt file in the project root for full license information.
  4. //-------------------------------------------------------------------------------------------------------
  5. #include "Backend.h"
  6. #ifdef ENABLE_NATIVE_CODEGEN
  7. #ifdef _M_X64
  8. #ifdef _WIN32
  9. const BYTE InterpreterThunkEmitter::FunctionBodyOffset = 23;
  10. const BYTE InterpreterThunkEmitter::DynamicThunkAddressOffset = 27;
  11. const BYTE InterpreterThunkEmitter::CallBlockStartAddrOffset = 37;
  12. const BYTE InterpreterThunkEmitter::ThunkSizeOffset = 51;
  13. const BYTE InterpreterThunkEmitter::ErrorOffset = 60;
  14. const BYTE InterpreterThunkEmitter::ThunkAddressOffset = 77;
  15. const BYTE InterpreterThunkEmitter::PrologSize = 76;
  16. const BYTE InterpreterThunkEmitter::StackAllocSize = 0x28;
  17. //
  18. // Home the arguments onto the stack and pass a pointer to the base of the stack location to the inner thunk
  19. //
  20. // Calling convention requires that caller should allocate at least 0x20 bytes and the stack be 16 byte aligned.
  21. // Hence, we allocate 0x28 bytes of stack space for the callee to use. The callee uses 8 bytes to push the first
  22. // argument and the rest 0x20 ensures alignment is correct.
  23. //
  24. const BYTE InterpreterThunkEmitter::InterpreterThunk[] = {
  25. 0x48, 0x89, 0x54, 0x24, 0x10, // mov qword ptr [rsp+10h],rdx
  26. 0x48, 0x89, 0x4C, 0x24, 0x08, // mov qword ptr [rsp+8],rcx
  27. 0x4C, 0x89, 0x44, 0x24, 0x18, // mov qword ptr [rsp+18h],r8
  28. 0x4C, 0x89, 0x4C, 0x24, 0x20, // mov qword ptr [rsp+20h],r9
  29. 0x48, 0x8B, 0x41, 0x00, // mov rax, qword ptr [rcx+FunctionBodyOffset]
  30. 0x48, 0x8B, 0x50, 0x00, // mov rdx, qword ptr [rax+DynamicThunkAddressOffset]
  31. // Range Check for Valid call target
  32. 0x48, 0x83, 0xE2, 0xF8, // and rdx, 0xFFFFFFFFFFFFFFF8h ;Force 8 byte alignment
  33. 0x48, 0x8b, 0xca, // mov rcx, rdx
  34. 0x48, 0xb8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // mov rax, CallBlockStartAddress
  35. 0x48, 0x2b, 0xc8, // sub rcx, rax
  36. 0x48, 0x81, 0xf9, 0x00, 0x00, 0x00, 0x00, // cmp rcx, ThunkSize
  37. 0x76, 0x09, // jbe $safe
  38. 0x48, 0xc7, 0xc1, 0x00, 0x00, 0x00, 0x00, // mov rcx, errorcode
  39. 0xcd, 0x29, // int 29h
  40. // $safe:
  41. 0x48, 0x8D, 0x4C, 0x24, 0x08, // lea rcx, [rsp+8] ;Load the address to stack
  42. 0x48, 0x83, 0xEC, StackAllocSize, // sub rsp,28h
  43. 0x48, 0xB8, 0x00, 0x00, 0x00 ,0x00, 0x00, 0x00, 0x00, 0x00, // mov rax, <thunk>
  44. 0xFF, 0xE2, // jmp rdx
  45. 0xCC // int 3 ;for alignment to size of 8 we are adding this
  46. };
  47. const BYTE InterpreterThunkEmitter::Epilog[] = {
  48. 0x48, 0x83, 0xC4, StackAllocSize, // add rsp,28h
  49. 0xC3 // ret
  50. };
  51. #else // Sys V AMD64
  52. const BYTE InterpreterThunkEmitter::FunctionBodyOffset = 7;
  53. const BYTE InterpreterThunkEmitter::DynamicThunkAddressOffset = 11;
  54. const BYTE InterpreterThunkEmitter::CallBlockStartAddrOffset = 21;
  55. const BYTE InterpreterThunkEmitter::ThunkSizeOffset = 35;
  56. const BYTE InterpreterThunkEmitter::ErrorOffset = 44;
  57. const BYTE InterpreterThunkEmitter::ThunkAddressOffset = 57;
  58. const BYTE InterpreterThunkEmitter::PrologSize = 56;
  59. const BYTE InterpreterThunkEmitter::StackAllocSize = 0x0;
  60. const BYTE InterpreterThunkEmitter::InterpreterThunk[] = {
  61. 0x55, // push rbp // Prolog - setup the stack frame
  62. 0x48, 0x89, 0xe5, // mov rbp, rsp
  63. 0x48, 0x8b, 0x47, 0x00, // mov rax, qword ptr [rdi + FunctionBodyOffset]
  64. 0x48, 0x8b, 0x50, 0x00, // mov rdx, qword ptr [rax + DynamicThunkAddressOffset]
  65. // Range Check for Valid call target
  66. 0x48, 0x83, 0xE2, 0xF8, // and rdx, 0xfffffffffffffff8 // Force 8 byte alignment
  67. 0x48, 0x89, 0xd1, // mov rcx, rdx
  68. 0x48, 0xb8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // mov rax, CallBlockStartAddress
  69. 0x48, 0x29, 0xc1, // sub rcx, rax
  70. 0x48, 0x81, 0xf9, 0x00, 0x00, 0x00, 0x00, // cmp rcx, ThunkSize
  71. 0x76, 0x09, // jbe safe
  72. 0x48, 0xc7, 0xc1, 0x00, 0x00, 0x00, 0x00, // mov rcx, errorcode
  73. 0xcd, 0x29, // int 29h <-- xplat TODO: just to exit
  74. // safe:
  75. 0x48, 0x8d, 0x7c, 0x24, 0x10, // lea rdi, [rsp+0x10]
  76. 0x48, 0xB8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // mov rax, <thunk> // stack already 16-byte aligned
  77. 0xff, 0xe2, // jmp rdx
  78. 0xcc, 0xcc, 0xcc, 0xcc, 0xcc // int 3 // for alignment to size of 8
  79. };
  80. const BYTE InterpreterThunkEmitter::Epilog[] = {
  81. 0x5d, // pop rbp
  82. 0xc3 // ret
  83. };
  84. #endif
  85. #elif defined(_M_ARM)
  86. const BYTE InterpreterThunkEmitter::ThunkAddressOffset = 8;
  87. const BYTE InterpreterThunkEmitter::FunctionBodyOffset = 18;
  88. const BYTE InterpreterThunkEmitter::DynamicThunkAddressOffset = 22;
  89. const BYTE InterpreterThunkEmitter::CallBlockStartAddressInstrOffset = 38;
  90. const BYTE InterpreterThunkEmitter::CallThunkSizeInstrOffset = 50;
  91. const BYTE InterpreterThunkEmitter::ErrorOffset = 60;
  92. const BYTE InterpreterThunkEmitter::InterpreterThunk[] = {
  93. 0x0F, 0xB4, // push {r0-r3}
  94. 0x2D, 0xE9, 0x00, 0x48, // push {r11,lr}
  95. 0xEB, 0x46, // mov r11,sp
  96. 0x00, 0x00, 0x00, 0x00, // movw r1,ThunkAddress
  97. 0x00, 0x00, 0x00, 0x00, // movt r1,ThunkAddress
  98. 0xD0, 0xF8, 0x00, 0x20, // ldr.w r2,[r0,#0x00]
  99. 0xD2, 0xF8, 0x00, 0x30, // ldr.w r3,[r2,#0x00]
  100. 0x4F, 0xF6, 0xF9, 0x70, // mov r0,#0xFFF9
  101. 0xCF, 0xF6, 0xFF, 0x70, // movt r0,#0xFFFF
  102. 0x03, 0xEA, 0x00, 0x03, // and r3,r3,r0
  103. 0x18, 0x46, // mov r0, r3
  104. 0x00, 0x00, 0x00, 0x00, // movw r12, CallBlockStartAddress
  105. 0x00, 0x00, 0x00, 0x00, // movt r12, CallBlockStartAddress
  106. 0xA0, 0xEB, 0x0C, 0x00, // sub r0, r12
  107. 0x00, 0x00, 0x00, 0x00, // mov r12, ThunkSize
  108. 0x60, 0x45, // cmp r0, r12
  109. 0x02, 0xD9, // bls $safe
  110. 0x4F, 0xF0, 0x00, 0x00, // mov r0, errorcode
  111. 0xFB, 0xDE, // Equivalent to int 0x29
  112. //$safe:
  113. 0x02, 0xA8, // add r0,sp,#8
  114. 0x18, 0x47, // bx r3
  115. 0xFE, 0xDE, // int 3 ;Required for alignment
  116. 0xFE, 0xDE // int 3 ;Required for alignment
  117. };
  118. const BYTE InterpreterThunkEmitter::JmpOffset = 2;
  119. const BYTE InterpreterThunkEmitter::Call[] = {
  120. 0x88, 0x47, // blx r1
  121. 0x00, 0x00, 0x00, 0x00, // b.w epilog
  122. 0xFE, 0xDE, // int 3 ;Required for alignment
  123. };
  124. const BYTE InterpreterThunkEmitter::Epilog[] = {
  125. 0x5D, 0xF8, 0x04, 0xBB, // pop {r11}
  126. 0x5D, 0xF8, 0x14, 0xFB // ldr pc,[sp],#0x14
  127. };
  128. #elif defined(_M_ARM64)
  129. const BYTE InterpreterThunkEmitter::FunctionBodyOffset = 24;
  130. const BYTE InterpreterThunkEmitter::DynamicThunkAddressOffset = 28;
  131. const BYTE InterpreterThunkEmitter::ThunkAddressOffset = 32;
  132. //TODO: saravind :Implement Range Check for ARM64
  133. const BYTE InterpreterThunkEmitter::InterpreterThunk[] = {
  134. 0xFD, 0x7B, 0xBB, 0xA9, //stp fp, lr, [sp, #-80]! ;Prologue
  135. 0xFD, 0x03, 0x00, 0x91, //mov fp, sp ;update frame pointer to the stack pointer
  136. 0xE0, 0x07, 0x01, 0xA9, //stp x0, x1, [sp, #16] ;Prologue again; save all registers
  137. 0xE2, 0x0F, 0x02, 0xA9, //stp x2, x3, [sp, #32]
  138. 0xE4, 0x17, 0x03, 0xA9, //stp x4, x5, [sp, #48]
  139. 0xE6, 0x1F, 0x04, 0xA9, //stp x6, x7, [sp, #64]
  140. 0x02, 0x00, 0x40, 0xF9, //ldr x2, [x0, #0x00] ;offset will be replaced with Offset of FunctionInfo
  141. 0x43, 0x00, 0x40, 0xF9, //ldr x3, [x2, #0x00] ;offset will be replaced with offset of DynamicInterpreterThunk
  142. //Following 4 MOV Instrs are to move the 64-bit address of the InterpreterThunk address into register x1.
  143. 0x00, 0x00, 0x00, 0x00, //movz x1, #0x00 ;This is overwritten with the actual thunk address(16 - 0 bits) move
  144. 0x00, 0x00, 0x00, 0x00, //movk x1, #0x00, lsl #16 ;This is overwritten with the actual thunk address(32 - 16 bits) move
  145. 0x00, 0x00, 0x00, 0x00, //movk x1, #0x00, lsl #32 ;This is overwritten with the actual thunk address(48 - 32 bits) move
  146. 0x00, 0x00, 0x00, 0x00, //movk x1, #0x00, lsl #48 ;This is overwritten with the actual thunk address(64 - 48 bits) move
  147. 0xE0, 0x43, 0x00, 0x91, //add x0, sp, #16
  148. 0x60, 0x00, 0x1F, 0xD6 //br x3
  149. };
  150. const BYTE InterpreterThunkEmitter::JmpOffset = 4;
  151. const BYTE InterpreterThunkEmitter::Call[] = {
  152. 0x20, 0x00, 0x3f, 0xd6, // blr x1
  153. 0x00, 0x00, 0x00, 0x00 // b epilog
  154. };
  155. const BYTE InterpreterThunkEmitter::Epilog[] = {
  156. 0xfd, 0x7b, 0xc5, 0xa8, // ldp fp, lr, [sp], #80
  157. 0xc0, 0x03, 0x5f, 0xd6 // ret
  158. };
  159. #else
  160. const BYTE InterpreterThunkEmitter::FunctionBodyOffset = 8;
  161. const BYTE InterpreterThunkEmitter::DynamicThunkAddressOffset = 11;
  162. const BYTE InterpreterThunkEmitter::CallBlockStartAddrOffset = 18;
  163. const BYTE InterpreterThunkEmitter::ThunkSizeOffset = 23;
  164. const BYTE InterpreterThunkEmitter::ErrorOffset = 30;
  165. const BYTE InterpreterThunkEmitter::ThunkAddressOffset = 41;
  166. const BYTE InterpreterThunkEmitter::InterpreterThunk[] = {
  167. 0x55, // push ebp ;Prolog - setup the stack frame
  168. 0x8B, 0xEC, // mov ebp,esp
  169. 0x8B, 0x45, 0x08, // mov eax, dword ptr [ebp+8]
  170. 0x8B, 0x40, 0x00, // mov eax, dword ptr [eax+FunctionBodyOffset]
  171. 0x8B, 0x48, 0x00, // mov ecx, dword ptr [eax+DynamicThunkAddressOffset]
  172. // Range Check for Valid call target
  173. 0x83, 0xE1, 0xF8, // and ecx, 0FFFFFFF8h
  174. 0x8b, 0xc1, // mov eax, ecx
  175. 0x2d, 0x00, 0x00, 0x00, 0x00, // sub eax, CallBlockStartAddress
  176. 0x3d, 0x00, 0x00, 0x00, 0x00, // cmp eax, ThunkSize
  177. 0x76, 0x07, // jbe SHORT $safe
  178. 0xb9, 0x00, 0x00, 0x00, 0x00, // mov ecx, errorcode
  179. 0xCD, 0x29, // int 29h
  180. //$safe
  181. 0x8D, 0x45, 0x08, // lea eax, ebp+8
  182. 0x50, // push eax
  183. 0xB8, 0x00, 0x00, 0x00, 0x00, // mov eax, <thunk>
  184. 0xFF, 0xE1, // jmp ecx
  185. 0xCC // int 3 for 8byte alignment
  186. };
  187. const BYTE InterpreterThunkEmitter::Epilog[] = {
  188. 0x5D, // pop ebp
  189. 0xC3 // ret
  190. };
  191. #endif
  192. #if defined(_M_X64) || defined(_M_IX86)
  193. const BYTE InterpreterThunkEmitter::JmpOffset = 3;
  194. const BYTE InterpreterThunkEmitter::Call[] = {
  195. 0xFF, 0xD0, // call rax
  196. 0xE9, 0x00, 0x00, 0x00, 0x00, // jmp [offset]
  197. 0xCC, // int 3 ;for alignment to size of 8 we are adding this
  198. };
  199. #endif
  200. const BYTE InterpreterThunkEmitter::PageCount = 1;
  201. const uint InterpreterThunkEmitter::BlockSize = AutoSystemInfo::PageSize * InterpreterThunkEmitter::PageCount;
  202. const BYTE InterpreterThunkEmitter::HeaderSize = sizeof(InterpreterThunk);
  203. const BYTE InterpreterThunkEmitter::ThunkSize = sizeof(Call);
  204. const uint InterpreterThunkEmitter::ThunksPerBlock = (BlockSize - HeaderSize) / ThunkSize;
  205. InterpreterThunkEmitter::InterpreterThunkEmitter(ArenaAllocator* allocator, CustomHeap::CodePageAllocators * codePageAllocators, bool isAsmInterpreterThunk) :
  206. // TODO: michhol oop JIT move interpreter thunk emitter out of process
  207. emitBufferManager(allocator, codePageAllocators, /*scriptContext*/ nullptr, _u("Interpreter thunk buffer"), GetCurrentProcess()),
  208. allocation(nullptr),
  209. allocator(allocator),
  210. thunkCount(0),
  211. thunkBuffer(nullptr),
  212. isAsmInterpreterThunk(isAsmInterpreterThunk)
  213. {
  214. }
  215. //
  216. // Returns the next thunk. Batch allocated PageCount pages of thunks and issue them one at a time
  217. //
  218. BYTE* InterpreterThunkEmitter::GetNextThunk(PVOID* ppDynamicInterpreterThunk)
  219. {
  220. Assert(ppDynamicInterpreterThunk);
  221. Assert(*ppDynamicInterpreterThunk == nullptr);
  222. if(thunkCount == 0)
  223. {
  224. if(!this->freeListedThunkBlocks.Empty())
  225. {
  226. return AllocateFromFreeList(ppDynamicInterpreterThunk);
  227. }
  228. NewThunkBlock();
  229. }
  230. Assert(this->thunkBuffer != nullptr);
  231. BYTE* thunk = this->thunkBuffer;
  232. #if _M_ARM
  233. thunk = (BYTE*)((DWORD)thunk | 0x01);
  234. #endif
  235. *ppDynamicInterpreterThunk = thunk + HeaderSize + ((--thunkCount) * ThunkSize);
  236. #if _M_ARM
  237. AssertMsg(((uintptr_t)(*ppDynamicInterpreterThunk) & 0x6) == 0, "Not 8 byte aligned?");
  238. #else
  239. AssertMsg(((uintptr_t)(*ppDynamicInterpreterThunk) & 0x7) == 0, "Not 8 byte aligned?");
  240. #endif
  241. return thunk;
  242. }
  243. //
  244. // Interpreter thunks have an entrypoint at the beginning of the page boundary. Each function has a unique thunk return address
  245. // and this function can convert to the unique thunk return address to the beginning of the page which corresponds with the entrypoint
  246. //
  247. void* InterpreterThunkEmitter::ConvertToEntryPoint(PVOID dynamicInterpreterThunk)
  248. {
  249. Assert(dynamicInterpreterThunk != nullptr);
  250. void* entryPoint = (void*)((size_t)dynamicInterpreterThunk & (~((size_t)(BlockSize) - 1)));
  251. #if _M_ARM
  252. entryPoint = (BYTE*)((DWORD)entryPoint | 0x01);
  253. #endif
  254. return entryPoint;
  255. }
  256. void InterpreterThunkEmitter::NewThunkBlock()
  257. {
  258. Assert(this->thunkCount == 0);
  259. BYTE* buffer;
  260. BYTE* currentBuffer;
  261. DWORD bufferSize = BlockSize;
  262. DWORD thunkCount = 0;
  263. void * interpreterThunk = nullptr;
  264. // the static interpreter thunk invoked by the dynamic emitted thunk
  265. #ifdef ASMJS_PLAT
  266. if (isAsmInterpreterThunk)
  267. {
  268. interpreterThunk = (void*)Js::InterpreterStackFrame::InterpreterAsmThunk;
  269. }
  270. else
  271. #endif
  272. {
  273. interpreterThunk = (void*)Js::InterpreterStackFrame::InterpreterThunk;
  274. }
  275. allocation = emitBufferManager.AllocateBuffer(bufferSize, &buffer);
  276. if (!emitBufferManager.ProtectBufferWithExecuteReadWriteForInterpreter(allocation))
  277. {
  278. Js::Throw::OutOfMemory();
  279. }
  280. currentBuffer = buffer;
  281. #ifdef _M_X64
  282. PrologEncoder prologEncoder(allocator);
  283. prologEncoder.EncodeSmallProlog(PrologSize, StackAllocSize);
  284. DWORD pdataSize = prologEncoder.SizeOfPData();
  285. #elif defined(_M_ARM32_OR_ARM64)
  286. DWORD pdataSize = sizeof(RUNTIME_FUNCTION);
  287. #else
  288. DWORD pdataSize = 0;
  289. #endif
  290. DWORD bytesRemaining = bufferSize;
  291. DWORD bytesWritten = 0;
  292. DWORD epilogSize = sizeof(Epilog);
  293. // Ensure there is space for PDATA at the end
  294. BYTE* pdataStart = currentBuffer + (bufferSize - Math::Align(pdataSize, EMIT_BUFFER_ALIGNMENT));
  295. BYTE* epilogStart = pdataStart - Math::Align(epilogSize, EMIT_BUFFER_ALIGNMENT);
  296. // Copy the thunk buffer and modify it.
  297. js_memcpy_s(currentBuffer, bytesRemaining, InterpreterThunk, HeaderSize);
  298. EncodeInterpreterThunk(currentBuffer, buffer, HeaderSize, epilogStart, epilogSize, interpreterThunk);
  299. currentBuffer += HeaderSize;
  300. bytesRemaining -= HeaderSize;
  301. // Copy call buffer
  302. DWORD callSize = sizeof(Call);
  303. while(currentBuffer < epilogStart - callSize)
  304. {
  305. js_memcpy_s(currentBuffer, bytesRemaining, Call, callSize);
  306. #if _M_ARM
  307. int offset = (epilogStart - (currentBuffer + JmpOffset));
  308. Assert(offset >= 0);
  309. DWORD encodedOffset = EncoderMD::BranchOffset_T2_24(offset);
  310. DWORD encodedBranch = /*opcode=*/ 0x9000F000 | encodedOffset;
  311. Emit(currentBuffer, JmpOffset, encodedBranch);
  312. #elif _M_ARM64
  313. int64 offset = (epilogStart - (currentBuffer + JmpOffset));
  314. Assert(offset >= 0);
  315. DWORD encodedOffset = EncoderMD::BranchOffset_26(offset);
  316. DWORD encodedBranch = /*opcode=*/ 0x14000000 | encodedOffset;
  317. Emit(currentBuffer, JmpOffset, encodedBranch);
  318. #else
  319. // jump requires an offset from the end of the jump instruction.
  320. int offset = (int)(epilogStart - (currentBuffer + JmpOffset + sizeof(int)));
  321. Assert(offset >= 0);
  322. Emit(currentBuffer, JmpOffset, offset);
  323. #endif
  324. currentBuffer += callSize;
  325. bytesRemaining -= callSize;
  326. thunkCount++;
  327. }
  328. // Fill any gap till start of epilog
  329. bytesWritten = FillDebugBreak(currentBuffer, (DWORD)(epilogStart - currentBuffer));
  330. bytesRemaining -= bytesWritten;
  331. currentBuffer += bytesWritten;
  332. // Copy epilog
  333. bytesWritten = CopyWithAlignment(currentBuffer, bytesRemaining, Epilog, epilogSize, EMIT_BUFFER_ALIGNMENT);
  334. currentBuffer += bytesWritten;
  335. bytesRemaining -= bytesWritten;
  336. // Generate and register PDATA
  337. #if PDATA_ENABLED
  338. BYTE* epilogEnd = epilogStart + epilogSize;
  339. DWORD functionSize = (DWORD)(epilogEnd - buffer);
  340. Assert(pdataStart == currentBuffer);
  341. #ifdef _M_X64
  342. Assert(bytesRemaining >= pdataSize);
  343. BYTE* pdata = prologEncoder.Finalize(buffer, functionSize, pdataStart);
  344. bytesWritten = CopyWithAlignment(pdataStart, bytesRemaining, pdata, pdataSize, EMIT_BUFFER_ALIGNMENT);
  345. #elif defined(_M_ARM32_OR_ARM64)
  346. RUNTIME_FUNCTION pdata;
  347. GeneratePdata(buffer, functionSize, &pdata);
  348. bytesWritten = CopyWithAlignment(pdataStart, bytesRemaining, (const BYTE*)&pdata, pdataSize, EMIT_BUFFER_ALIGNMENT);
  349. #endif
  350. void* pdataTable;
  351. PDataManager::RegisterPdata((PRUNTIME_FUNCTION) pdataStart, (ULONG_PTR) buffer, (ULONG_PTR) epilogEnd, &pdataTable);
  352. #endif
  353. if (!emitBufferManager.CommitReadWriteBufferForInterpreter(allocation, buffer, bufferSize))
  354. {
  355. Js::Throw::OutOfMemory();
  356. }
  357. // Call to set VALID flag for CFG check
  358. ThreadContext::GetContextForCurrentThread()->SetValidCallTargetForCFG(buffer);
  359. // Update object state only at the end when everything has succeeded - and no exceptions can be thrown.
  360. ThunkBlock* block = this->thunkBlocks.PrependNode(allocator, buffer);
  361. UNREFERENCED_PARAMETER(block);
  362. #if PDATA_ENABLED
  363. block->SetPdata(pdataTable);
  364. #endif
  365. this->thunkCount = thunkCount;
  366. this->thunkBuffer = buffer;
  367. }
  368. #if _M_ARM
  369. void InterpreterThunkEmitter::EncodeInterpreterThunk(__in_bcount(thunkSize) BYTE* thunkBuffer, __in_bcount(thunkSize) BYTE* thunkBufferStartAddress, __in const DWORD thunkSize, __in_bcount(epilogSize) BYTE* epilogStart, __in const DWORD epilogSize, __in void * const interpreterThunk)
  370. {
  371. _Analysis_assume_(thunkSize == HeaderSize);
  372. // Encode MOVW
  373. DWORD lowerThunkBits = (uint32)interpreterThunk & 0x0000FFFF;
  374. DWORD movW = EncodeMove(/*Opcode*/ 0x0000F240, /*register*/1, lowerThunkBits);
  375. Emit(thunkBuffer,ThunkAddressOffset, movW);
  376. // Encode MOVT
  377. DWORD higherThunkBits = ((uint32)interpreterThunk & 0xFFFF0000) >> 16;
  378. DWORD movT = EncodeMove(/*Opcode*/ 0x0000F2C0, /*register*/1, higherThunkBits);
  379. Emit(thunkBuffer, ThunkAddressOffset + sizeof(movW), movT);
  380. // Encode LDR - Load of function Body
  381. thunkBuffer[FunctionBodyOffset] = Js::JavascriptFunction::GetOffsetOfFunctionInfo();
  382. // Encode LDR - Load of interpreter thunk number
  383. thunkBuffer[DynamicThunkAddressOffset] = Js::FunctionBody::GetOffsetOfDynamicInterpreterThunk();
  384. // Encode MOVW R12, CallBlockStartAddress
  385. uintptr_t callBlockStartAddress = (uintptr_t)thunkBufferStartAddress + HeaderSize;
  386. uint totalThunkSize = (uint)(epilogStart - callBlockStartAddress);
  387. DWORD lowerCallBlockStartAddress = callBlockStartAddress & 0x0000FFFF;
  388. DWORD movWblockStart = EncodeMove(/*Opcode*/ 0x0000F240, /*register*/12, lowerCallBlockStartAddress);
  389. Emit(thunkBuffer,CallBlockStartAddressInstrOffset, movWblockStart);
  390. // Encode MOVT R12, CallBlockStartAddress
  391. DWORD higherCallBlockStartAddress = (callBlockStartAddress & 0xFFFF0000) >> 16;
  392. DWORD movTblockStart = EncodeMove(/*Opcode*/ 0x0000F2C0, /*register*/12, higherCallBlockStartAddress);
  393. Emit(thunkBuffer, CallBlockStartAddressInstrOffset + sizeof(movWblockStart), movTblockStart);
  394. //Encode MOV R12, CallBlockSize
  395. DWORD movBlockSize = EncodeMove(/*Opcode*/ 0x0000F240, /*register*/12, (DWORD)totalThunkSize);
  396. Emit(thunkBuffer, CallThunkSizeInstrOffset, movBlockSize);
  397. Emit(thunkBuffer, ErrorOffset, (BYTE) FAST_FAIL_INVALID_ARG);
  398. }
  399. DWORD InterpreterThunkEmitter::EncodeMove(DWORD opCode, int reg, DWORD imm16)
  400. {
  401. DWORD encodedMove = reg << 24;
  402. DWORD encodedImm = 0;
  403. EncoderMD::EncodeImmediate16(imm16, &encodedImm);
  404. encodedMove |= encodedImm;
  405. AssertMsg((encodedMove & opCode) == 0, "Any bits getting overwritten?");
  406. encodedMove |= opCode;
  407. return encodedMove;
  408. }
  409. void InterpreterThunkEmitter::GeneratePdata(_In_ const BYTE* entryPoint, _In_ const DWORD functionSize, _Out_ RUNTIME_FUNCTION* function)
  410. {
  411. function->BeginAddress = 0x1; // Since our base address is the start of the function - this is offset from the base address
  412. function->Flag = 1; // Packed unwind data is used
  413. function->FunctionLength = functionSize / 2;
  414. function->Ret = 0; // Return via Pop
  415. function->H = 1; // Homes parameters
  416. function->Reg = 7; // No saved registers - R11 is the frame pointer - not considered here
  417. function->R = 1; // No registers are being saved.
  418. function->L = 1; // Save/restore LR register
  419. function->C = 1; // Frame pointer chain in R11 established
  420. function->StackAdjust = 0; // Stack allocation for the function
  421. }
  422. #elif _M_ARM64
  423. void InterpreterThunkEmitter::EncodeInterpreterThunk(__in_bcount(thunkSize) BYTE* thunkBuffer, __in_bcount(thunkSize) BYTE* thunkBufferStartAddress, __in const DWORD thunkSize, __in_bcount(epilogSize) BYTE* epilogStart, __in const DWORD epilogSize, __in void * const interpreterThunk)
  424. {
  425. int addrOffset = ThunkAddressOffset;
  426. _Analysis_assume_(thunkSize == HeaderSize);
  427. AssertMsg(thunkSize == HeaderSize, "Mismatch in the size of the InterpreterHeaderThunk and the thunkSize used in this API (EncodeInterpreterThunk)");
  428. // Following 4 MOV Instrs are to move the 64-bit address of the InterpreterThunk address into register x1.
  429. // Encode MOVZ (movz x1, #<interpreterThunk 16-0 bits>)
  430. DWORD lowerThunkBits = (uint64)interpreterThunk & 0x0000FFFF;
  431. DWORD movZ = EncodeMove(/*Opcode*/ 0xD2800000, /*register x1*/1, lowerThunkBits); // no shift; hw = 00
  432. Emit(thunkBuffer,addrOffset, movZ);
  433. AssertMsg(sizeof(movZ) == 4, "movZ has to be 32-bit encoded");
  434. addrOffset+= sizeof(movZ);
  435. // Encode MOVK (movk x1, #<interpreterThunk 32-16 bits>, lsl #16)
  436. DWORD higherThunkBits = ((uint64)interpreterThunk & 0xFFFF0000) >> 16;
  437. DWORD movK = EncodeMove(/*Opcode*/ 0xF2A00000, /*register x1*/1, higherThunkBits); // left shift 16 bits; hw = 01
  438. Emit(thunkBuffer, addrOffset, movK);
  439. AssertMsg(sizeof(movK) == 4, "movK has to be 32-bit encoded");
  440. addrOffset+= sizeof(movK);
  441. // Encode MOVK (movk x1, #<interpreterThunk 48-32 bits>, lsl #16)
  442. higherThunkBits = ((uint64)interpreterThunk & 0xFFFF00000000) >> 32;
  443. movK = EncodeMove(/*Opcode*/ 0xF2C00000, /*register x1*/1, higherThunkBits); // left shift 32 bits; hw = 02
  444. Emit(thunkBuffer, addrOffset, movK);
  445. AssertMsg(sizeof(movK) == 4, "movK has to be 32-bit encoded");
  446. addrOffset += sizeof(movK);
  447. // Encode MOVK (movk x1, #<interpreterThunk 64-48 bits>, lsl #16)
  448. higherThunkBits = ((uint64)interpreterThunk & 0xFFFF000000000000) >> 48;
  449. movK = EncodeMove(/*Opcode*/ 0xF2E00000, /*register x1*/1, higherThunkBits); // left shift 48 bits; hw = 03
  450. AssertMsg(sizeof(movK) == 4, "movK has to be 32-bit encoded");
  451. Emit(thunkBuffer, addrOffset, movK);
  452. // Encode LDR - Load of function Body
  453. ULONG offsetOfFunctionInfo = Js::JavascriptFunction::GetOffsetOfFunctionInfo();
  454. AssertMsg(offsetOfFunctionInfo % 8 == 0, "Immediate offset for LDR must be 8 byte aligned");
  455. AssertMsg(offsetOfFunctionInfo < 0x8000, "Immediate offset for LDR must be less than 0x8000");
  456. *(PULONG)&thunkBuffer[FunctionBodyOffset] |= (offsetOfFunctionInfo / 8) << 10;
  457. // Encode LDR - Load of interpreter thunk number
  458. ULONG offsetOfDynamicInterpreterThunk = Js::FunctionBody::GetOffsetOfDynamicInterpreterThunk();
  459. AssertMsg(offsetOfDynamicInterpreterThunk % 8 == 0, "Immediate offset for LDR must be 8 byte aligned");
  460. AssertMsg(offsetOfDynamicInterpreterThunk < 0x8000, "Immediate offset for LDR must be less than 0x8000");
  461. *(PULONG)&thunkBuffer[DynamicThunkAddressOffset] |= (offsetOfDynamicInterpreterThunk / 8) << 10;
  462. }
  463. DWORD InterpreterThunkEmitter::EncodeMove(DWORD opCode, int reg, DWORD imm16)
  464. {
  465. DWORD encodedMove = reg << 0;
  466. DWORD encodedImm = 0;
  467. EncoderMD::EncodeImmediate16(imm16, &encodedImm);
  468. encodedMove |= encodedImm;
  469. AssertMsg((encodedMove & opCode) == 0, "Any bits getting overwritten?");
  470. encodedMove |= opCode;
  471. return encodedMove;
  472. }
  473. void InterpreterThunkEmitter::GeneratePdata(_In_ const BYTE* entryPoint, _In_ const DWORD functionSize, _Out_ RUNTIME_FUNCTION* function)
  474. {
  475. function->BeginAddress = 0x0; // Since our base address is the start of the function - this is offset from the base address
  476. function->Flag = 1; // Packed unwind data is used
  477. function->FunctionLength = functionSize / 4;
  478. function->RegF = 0; // number of non-volatile FP registers (d8-d15) saved in the canonical stack location
  479. function->RegI = 0; // number of non-volatile INT registers (r19-r28) saved in the canonical stack location
  480. function->H = 1; // Homes parameters
  481. // (indicating whether the function "homes" the integer parameter registers (r0-r7) by storing them at the very start of the function)
  482. function->CR = 3; // chained function, a store/load pair instruction is used in prolog/epilog <r29,lr>
  483. function->FrameSize = 5; // the number of bytes of stack that is allocated for this function divided by 16
  484. }
  485. #else
  486. void InterpreterThunkEmitter::EncodeInterpreterThunk(__in_bcount(thunkSize) BYTE* thunkBuffer, __in_bcount(thunkSize) BYTE* thunkBufferStartAddress, __in const DWORD thunkSize, __in_bcount(epilogSize) BYTE* epilogStart, __in const DWORD epilogSize, __in void * const interpreterThunk)
  487. {
  488. _Analysis_assume_(thunkSize == HeaderSize);
  489. Emit(thunkBuffer, ThunkAddressOffset, (uintptr_t)interpreterThunk);
  490. thunkBuffer[DynamicThunkAddressOffset] = Js::FunctionBody::GetOffsetOfDynamicInterpreterThunk();
  491. thunkBuffer[FunctionBodyOffset] = Js::JavascriptFunction::GetOffsetOfFunctionInfo();
  492. Emit(thunkBuffer, CallBlockStartAddrOffset, (uintptr_t) thunkBufferStartAddress + HeaderSize);
  493. uint totalThunkSize = (uint)(epilogStart - (thunkBufferStartAddress + HeaderSize));
  494. Emit(thunkBuffer, ThunkSizeOffset, totalThunkSize);
  495. Emit(thunkBuffer, ErrorOffset, (BYTE) FAST_FAIL_INVALID_ARG);
  496. }
  497. #endif
  498. inline /*static*/
  499. DWORD InterpreterThunkEmitter::FillDebugBreak(_In_ BYTE* dest, _In_ DWORD count)
  500. {
  501. #if defined(_M_ARM)
  502. Assert(count % 2 == 0);
  503. #elif defined(_M_ARM64)
  504. Assert(count % 4 == 0);
  505. #endif
  506. // TODO: michhol OOP JIT. after mving OOP, change to runtime process handle
  507. CustomHeap::FillDebugBreak(dest, count, GetCurrentProcess());
  508. return count;
  509. }
  510. inline /*static*/
  511. DWORD InterpreterThunkEmitter::CopyWithAlignment(
  512. _In_ BYTE* dest,
  513. _In_ const DWORD sizeInBytes,
  514. _In_ const BYTE* src,
  515. _In_ const DWORD srcSize,
  516. _In_ const DWORD alignment)
  517. {
  518. js_memcpy_s(dest, sizeInBytes, src, srcSize);
  519. dest += srcSize;
  520. DWORD alignPad = Math::Align(srcSize, alignment) - srcSize;
  521. Assert(alignPad <= (sizeInBytes - srcSize));
  522. if(alignPad > 0 && alignPad <= (sizeInBytes - srcSize))
  523. {
  524. FillDebugBreak(dest, alignPad);
  525. return srcSize + alignPad;
  526. }
  527. return srcSize;
  528. }
  529. // We only decommit at close because there might still be some
  530. // code running here.
  531. // The destructor of emitBufferManager will cause the eventual release.
  532. void InterpreterThunkEmitter::Close()
  533. {
  534. #if PDATA_ENABLED
  535. auto unregisterPdata = ([&] (const ThunkBlock& block)
  536. {
  537. PDataManager::UnregisterPdata((PRUNTIME_FUNCTION) block.GetPdata());
  538. });
  539. thunkBlocks.Iterate(unregisterPdata);
  540. freeListedThunkBlocks.Iterate(unregisterPdata);
  541. #endif
  542. this->thunkBlocks.Clear(allocator);
  543. this->freeListedThunkBlocks.Clear(allocator);
  544. emitBufferManager.Decommit();
  545. this->thunkBuffer = nullptr;
  546. this->thunkCount = 0;
  547. }
  548. void InterpreterThunkEmitter::Release(BYTE* thunkAddress, bool addtoFreeList)
  549. {
  550. if(!addtoFreeList)
  551. {
  552. return;
  553. }
  554. auto predicate = ([=] (const ThunkBlock& block)
  555. {
  556. return block.Contains(thunkAddress);
  557. });
  558. ThunkBlock* block = freeListedThunkBlocks.Find(predicate);
  559. if(!block)
  560. {
  561. block = thunkBlocks.MoveTo(&freeListedThunkBlocks, predicate);
  562. }
  563. // if EnsureFreeList fails in an OOM scenario - we just leak the thunks
  564. if(block && block->EnsureFreeList(allocator))
  565. {
  566. block->Release(thunkAddress);
  567. }
  568. }
  569. BYTE* InterpreterThunkEmitter::AllocateFromFreeList(PVOID* ppDynamicInterpreterThunk )
  570. {
  571. ThunkBlock& block = this->freeListedThunkBlocks.Head();
  572. BYTE* thunk = block.AllocateFromFreeList();
  573. #if _M_ARM
  574. thunk = (BYTE*)((DWORD)thunk | 0x01);
  575. #endif
  576. if(block.IsFreeListEmpty())
  577. {
  578. this->freeListedThunkBlocks.MoveHeadTo(&this->thunkBlocks);
  579. }
  580. *ppDynamicInterpreterThunk = thunk;
  581. BYTE* entryPoint = block.GetStart();
  582. #if _M_ARM
  583. entryPoint = (BYTE*)((DWORD)entryPoint | 0x01);
  584. #endif
  585. return entryPoint;
  586. }
  587. bool ThunkBlock::Contains(BYTE* address) const
  588. {
  589. bool contains = address >= start && address < (start + InterpreterThunkEmitter::BlockSize);
  590. return contains;
  591. }
  592. void ThunkBlock::Release(BYTE* address)
  593. {
  594. Assert(Contains(address));
  595. Assert(this->freeList);
  596. BVIndex index = FromThunkAddress(address);
  597. this->freeList->Set(index);
  598. }
  599. BYTE* ThunkBlock::AllocateFromFreeList()
  600. {
  601. Assert(this->freeList);
  602. BVIndex index = this->freeList->GetNextBit(0);
  603. BYTE* address = ToThunkAddress(index);
  604. this->freeList->Clear(index);
  605. return address;
  606. }
  607. BVIndex ThunkBlock::FromThunkAddress(BYTE* address)
  608. {
  609. int index = ((uint)(address - start) - InterpreterThunkEmitter::HeaderSize) / InterpreterThunkEmitter::ThunkSize;
  610. Assert(index < InterpreterThunkEmitter::ThunksPerBlock);
  611. return index;
  612. }
  613. BYTE* ThunkBlock::ToThunkAddress(BVIndex index)
  614. {
  615. Assert(index < InterpreterThunkEmitter::ThunksPerBlock);
  616. BYTE* address = start + InterpreterThunkEmitter::HeaderSize + InterpreterThunkEmitter::ThunkSize * index;
  617. return address;
  618. }
  619. bool ThunkBlock::EnsureFreeList(ArenaAllocator* allocator)
  620. {
  621. if(!this->freeList)
  622. {
  623. this->freeList = BVFixed::NewNoThrow(InterpreterThunkEmitter::ThunksPerBlock, allocator);
  624. }
  625. return this->freeList != nullptr;
  626. }
  627. bool ThunkBlock::IsFreeListEmpty() const
  628. {
  629. Assert(this->freeList);
  630. return this->freeList->IsAllClear();
  631. }
  632. #endif