LowerMDSharedSimd128.cpp 49 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335
  1. //-------------------------------------------------------------------------------------------------------
  2. // Copyright (C) Microsoft. All rights reserved.
  3. // Licensed under the MIT license. See LICENSE.txt file in the project root for full license information.
  4. //-------------------------------------------------------------------------------------------------------
  5. #include "BackEnd.h"
  6. #define GET_SIMDOPCODE(irOpcode) m_simd128OpCodesMap[(uint32)(irOpcode - Js::OpCode::Simd128_Start)]
  7. #define SET_SIMDOPCODE(irOpcode, mdOpcode) \
  8. Assert((uint32)m_simd128OpCodesMap[(uint32)(Js::OpCode::irOpcode - Js::OpCode::Simd128_Start)] == 0); \
  9. Assert(Js::OpCode::mdOpcode > Js::OpCode::MDStart); \
  10. m_simd128OpCodesMap[(uint32)(Js::OpCode::irOpcode - Js::OpCode::Simd128_Start)] = Js::OpCode::mdOpcode;
  11. IR::Instr* LowererMD::Simd128Instruction(IR::Instr *instr)
  12. {
  13. // Currently only handles type-specialized/asm.js opcodes
  14. if (!instr->GetDst())
  15. {
  16. // SIMD ops always have DST in asmjs
  17. Assert(!instr->m_func->GetJnFunction()->GetIsAsmjsMode());
  18. // unused result. Do nothing.
  19. IR::Instr * pInstr = instr->m_prev;
  20. instr->Remove();
  21. return pInstr;
  22. }
  23. if (Simd128TryLowerMappedInstruction(instr))
  24. {
  25. return instr->m_prev;
  26. }
  27. return Simd128LowerUnMappedInstruction(instr);
  28. }
  29. bool LowererMD::Simd128TryLowerMappedInstruction(IR::Instr *instr)
  30. {
  31. bool legalize = true;
  32. Js::OpCode opcode = GET_SIMDOPCODE(instr->m_opcode);
  33. if ((uint32)opcode == 0)
  34. return false;
  35. Assert(instr->GetDst() && instr->GetDst()->IsRegOpnd() && instr->GetDst()->IsSimd128() || instr->GetDst()->GetType() == TyInt32);
  36. Assert(instr->GetSrc1() && instr->GetSrc1()->IsRegOpnd() && instr->GetSrc1()->IsSimd128());
  37. Assert(!instr->GetSrc2() || (((instr->GetSrc2()->IsRegOpnd() && instr->GetSrc2()->IsSimd128()) || (instr->GetSrc2()->IsIntConstOpnd() && instr->GetSrc2()->GetType() == TyInt8))));
  38. switch (instr->m_opcode)
  39. {
  40. case Js::OpCode::Simd128_Abs_F4:
  41. Assert(opcode == Js::OpCode::ANDPS);
  42. instr->SetSrc2(IR::MemRefOpnd::New((void*)&X86_ABS_MASK_F4, instr->GetSrc1()->GetType(), m_func));
  43. break;
  44. case Js::OpCode::Simd128_Abs_D2:
  45. Assert(opcode == Js::OpCode::ANDPD);
  46. instr->SetSrc2(IR::MemRefOpnd::New((void*)&X86_ABS_MASK_D2, instr->GetSrc1()->GetType(), m_func));
  47. break;
  48. case Js::OpCode::Simd128_Neg_F4:
  49. Assert(opcode == Js::OpCode::XORPS);
  50. instr->SetSrc2(IR::MemRefOpnd::New((void*)&X86_NEG_MASK_F4, instr->GetSrc1()->GetType(), m_func));
  51. break;
  52. case Js::OpCode::Simd128_Neg_D2:
  53. Assert(opcode == Js::OpCode::XORPS);
  54. instr->SetSrc2(IR::MemRefOpnd::New((void*)&X86_NEG_MASK_D2, instr->GetSrc1()->GetType(), m_func));
  55. break;
  56. case Js::OpCode::Simd128_Not_F4:
  57. case Js::OpCode::Simd128_Not_I4:
  58. Assert(opcode == Js::OpCode::XORPS);
  59. instr->SetSrc2(IR::MemRefOpnd::New((void*)&X86_ALL_NEG_ONES, instr->GetSrc1()->GetType(), m_func));
  60. break;
  61. case Js::OpCode::Simd128_Gt_F4:
  62. case Js::OpCode::Simd128_Gt_D2:
  63. case Js::OpCode::Simd128_GtEq_F4:
  64. case Js::OpCode::Simd128_GtEq_D2:
  65. case Js::OpCode::Simd128_Lt_I4:
  66. {
  67. Assert(opcode == Js::OpCode::CMPLTPS || opcode == Js::OpCode::CMPLTPD || opcode == Js::OpCode::CMPLEPS || opcode == Js::OpCode::CMPLEPD || opcode == Js::OpCode::PCMPGTD);
  68. // swap operands
  69. auto *src1 = instr->UnlinkSrc1();
  70. auto *src2 = instr->UnlinkSrc2();
  71. instr->SetSrc1(src2);
  72. instr->SetSrc2(src1);
  73. break;
  74. }
  75. case Js::OpCode::Simd128_LdSignMask_F4:
  76. case Js::OpCode::Simd128_LdSignMask_I4:
  77. case Js::OpCode::Simd128_LdSignMask_D2:
  78. legalize = false;
  79. break;
  80. }
  81. instr->m_opcode = opcode;
  82. if (legalize)
  83. {
  84. //MakeDstEquSrc1(instr);
  85. Legalize(instr);
  86. }
  87. return true;
  88. }
  89. IR::Instr* LowererMD::Simd128LowerUnMappedInstruction(IR::Instr *instr)
  90. {
  91. switch (instr->m_opcode)
  92. {
  93. case Js::OpCode::Simd128_LdC:
  94. return Simd128LoadConst(instr);
  95. case Js::OpCode::Simd128_FloatsToF4:
  96. case Js::OpCode::Simd128_IntsToI4:
  97. case Js::OpCode::Simd128_DoublesToD2:
  98. return Simd128LowerConstructor(instr);
  99. case Js::OpCode::Simd128_ExtractLane_I4:
  100. case Js::OpCode::Simd128_ExtractLane_F4:
  101. return Simd128LowerLdLane(instr);
  102. case Js::OpCode::Simd128_ReplaceLane_I4:
  103. case Js::OpCode::Simd128_ReplaceLane_F4:
  104. return SIMD128LowerReplaceLane(instr);
  105. case Js::OpCode::Simd128_Splat_F4:
  106. case Js::OpCode::Simd128_Splat_I4:
  107. case Js::OpCode::Simd128_Splat_D2:
  108. return Simd128LowerSplat(instr);
  109. case Js::OpCode::Simd128_Rcp_F4:
  110. case Js::OpCode::Simd128_Rcp_D2:
  111. return Simd128LowerRcp(instr);
  112. case Js::OpCode::Simd128_Sqrt_F4:
  113. case Js::OpCode::Simd128_Sqrt_D2:
  114. return Simd128LowerSqrt(instr);
  115. case Js::OpCode::Simd128_RcpSqrt_F4:
  116. case Js::OpCode::Simd128_RcpSqrt_D2:
  117. return Simd128LowerRcpSqrt(instr);
  118. case Js::OpCode::Simd128_Select_F4:
  119. case Js::OpCode::Simd128_Select_I4:
  120. case Js::OpCode::Simd128_Select_D2:
  121. return Simd128LowerSelect(instr);
  122. case Js::OpCode::Simd128_Neg_I4:
  123. return Simd128LowerNegI4(instr);
  124. case Js::OpCode::Simd128_Mul_I4:
  125. return Simd128LowerMulI4(instr);
  126. case Js::OpCode::Simd128_LdArr_I4:
  127. case Js::OpCode::Simd128_LdArr_F4:
  128. case Js::OpCode::Simd128_LdArr_D2:
  129. case Js::OpCode::Simd128_LdArrConst_I4:
  130. case Js::OpCode::Simd128_LdArrConst_F4:
  131. case Js::OpCode::Simd128_LdArrConst_D2:
  132. return Simd128LowerLoadElem(instr);
  133. case Js::OpCode::Simd128_StArr_I4:
  134. case Js::OpCode::Simd128_StArr_F4:
  135. case Js::OpCode::Simd128_StArr_D2:
  136. case Js::OpCode::Simd128_StArrConst_I4:
  137. case Js::OpCode::Simd128_StArrConst_F4:
  138. case Js::OpCode::Simd128_StArrConst_D2:
  139. return Simd128LowerStoreElem(instr);
  140. case Js::OpCode::Simd128_Swizzle_I4:
  141. case Js::OpCode::Simd128_Swizzle_F4:
  142. case Js::OpCode::Simd128_Swizzle_D2:
  143. case Js::OpCode::Simd128_Shuffle_I4:
  144. case Js::OpCode::Simd128_Shuffle_F4:
  145. case Js::OpCode::Simd128_Shuffle_D2:
  146. return Simd128LowerShuffle(instr);
  147. default:
  148. AssertMsg(UNREACHED, "Unsupported Simd128 instruction");
  149. }
  150. return nullptr;
  151. }
  152. IR::Instr* LowererMD::Simd128LoadConst(IR::Instr* instr)
  153. {
  154. Assert(instr->GetDst() && instr->m_opcode == Js::OpCode::Simd128_LdC);
  155. if (instr->GetDst()->IsSimd128())
  156. {
  157. Assert(instr->GetSrc1()->IsSimd128());
  158. Assert(instr->GetSrc1()->IsSimd128ConstOpnd());
  159. Assert(instr->GetSrc2() == nullptr);
  160. AsmJsSIMDValue value = instr->GetSrc1()->AsSimd128ConstOpnd()->m_value;
  161. // MOVUPS dst, [const]
  162. AsmJsSIMDValue *pValue = NativeCodeDataNew(instr->m_func->GetNativeCodeDataAllocator(), AsmJsSIMDValue);
  163. pValue->SetValue(value);
  164. IR::Opnd * opnd = IR::MemRefOpnd::New((void *)pValue, instr->GetDst()->GetType(), instr->m_func);
  165. instr->ReplaceSrc1(opnd);
  166. instr->m_opcode = LowererMDArch::GetAssignOp(instr->GetDst()->GetType());
  167. Legalize(instr);
  168. return instr->m_prev;
  169. }
  170. else
  171. {
  172. AssertMsg(UNREACHED, "Non-typespecialized form of Simd128_LdC is unsupported");
  173. }
  174. return nullptr;
  175. }
  176. IR::Instr* LowererMD::Simd128LowerConstructor(IR::Instr *instr)
  177. {
  178. IR::Opnd* dst = nullptr;
  179. IR::Opnd* src1 = nullptr;
  180. IR::Opnd* src2 = nullptr;
  181. IR::Opnd* src3 = nullptr;
  182. IR::Opnd* src4 = nullptr;
  183. IR::Instr* newInstr = nullptr;
  184. if (instr->m_opcode == Js::OpCode::Simd128_FloatsToF4 || instr->m_opcode == Js::OpCode::Simd128_IntsToI4)
  185. {
  186. // use MOVSS for both int32x4 and float32x4. MOVD zeroes upper bits.
  187. Js::OpCode movOpcode = Js::OpCode::MOVSS;
  188. Js::OpCode shiftOpcode = Js::OpCode::PSLLDQ;
  189. SList<IR::Opnd*> *args = Simd128GetExtendedArgs(instr);
  190. // The number of src opnds should be exact. If opnds are missing, they should be filled in by globopt during type-spec.
  191. Assert(args->Count() == 5);
  192. dst = args->Pop();
  193. src1 = args->Pop();
  194. src2 = args->Pop();
  195. src3 = args->Pop();
  196. src4 = args->Pop();
  197. if (instr->m_opcode == Js::OpCode::Simd128_FloatsToF4)
  198. {
  199. // We don't have f32 type-spec, so we type-spec to f64 and convert to f32 before use.
  200. if (src1->IsFloat64())
  201. {
  202. IR::RegOpnd *regOpnd32 = IR::RegOpnd::New(TyFloat32, this->m_func);
  203. // CVTSD2SS regOpnd32.f32, src.f64 -- Convert regOpnd from f64 to f32
  204. newInstr = IR::Instr::New(Js::OpCode::CVTSD2SS, regOpnd32, src1, this->m_func);
  205. instr->InsertBefore(newInstr);
  206. src1 = regOpnd32;
  207. }
  208. if (src2->IsFloat64())
  209. {
  210. IR::RegOpnd *regOpnd32 = IR::RegOpnd::New(TyFloat32, this->m_func);
  211. // CVTSD2SS regOpnd32.f32, src.f64 -- Convert regOpnd from f64 to f32
  212. newInstr = IR::Instr::New(Js::OpCode::CVTSD2SS, regOpnd32, src2, this->m_func);
  213. instr->InsertBefore(newInstr);
  214. src2 = regOpnd32;
  215. }
  216. if (src3->IsFloat64())
  217. {
  218. IR::RegOpnd *regOpnd32 = IR::RegOpnd::New(TyFloat32, this->m_func);
  219. // CVTSD2SS regOpnd32.f32, src.f64 -- Convert regOpnd from f64 to f32
  220. newInstr = IR::Instr::New(Js::OpCode::CVTSD2SS, regOpnd32, src3, this->m_func);
  221. instr->InsertBefore(newInstr);
  222. src3 = regOpnd32;
  223. }
  224. if (src4->IsFloat64())
  225. {
  226. IR::RegOpnd *regOpnd32 = IR::RegOpnd::New(TyFloat32, this->m_func);
  227. // CVTSD2SS regOpnd32.f32, src.f64 -- Convert regOpnd from f64 to f32
  228. newInstr = IR::Instr::New(Js::OpCode::CVTSD2SS, regOpnd32, src4, this->m_func);
  229. instr->InsertBefore(newInstr);
  230. src4 = regOpnd32;
  231. }
  232. Assert(src1->IsRegOpnd() && src1->GetType() == TyFloat32);
  233. Assert(src2->IsRegOpnd() && src2->GetType() == TyFloat32);
  234. Assert(src3->IsRegOpnd() && src3->GetType() == TyFloat32);
  235. Assert(src4->IsRegOpnd() && src4->GetType() == TyFloat32);
  236. // MOVSS dst, src4
  237. instr->InsertBefore(IR::Instr::New(movOpcode, dst, src4, m_func));
  238. // PSLLDQ dst, dst, 4
  239. instr->InsertBefore(IR::Instr::New(shiftOpcode, dst, dst, IR::IntConstOpnd::New(4, TyInt8, m_func, true), m_func));
  240. // MOVSS dst, src3
  241. instr->InsertBefore(IR::Instr::New(movOpcode, dst, src3, m_func));
  242. // PSLLDQ dst, 4
  243. instr->InsertBefore(IR::Instr::New(shiftOpcode, dst, dst, IR::IntConstOpnd::New(4, TyInt8, m_func, true), m_func));
  244. // MOVSS dst, src2
  245. instr->InsertBefore(IR::Instr::New(movOpcode, dst, src2, m_func));
  246. // PSLLDQ dst, 4
  247. instr->InsertBefore(IR::Instr::New(shiftOpcode, dst, dst, IR::IntConstOpnd::New(4, TyInt8, m_func, true), m_func));
  248. // MOVSS dst, src1
  249. instr->InsertBefore(IR::Instr::New(movOpcode, dst, src1, m_func));
  250. }
  251. else
  252. {
  253. //Simd128_IntsToI4
  254. // b-namost: better way to implement this on SSE2? Using MOVD directly zeroes upper bits.
  255. IR::RegOpnd *temp = IR::RegOpnd::New(TyFloat32, m_func);
  256. // src's might have been constant prop'ed. Enregister them if so.
  257. src4 = EnregisterIntConst(instr, src4);
  258. src3 = EnregisterIntConst(instr, src3);
  259. src2 = EnregisterIntConst(instr, src2);
  260. src1 = EnregisterIntConst(instr, src1);
  261. Assert(src1->GetType() == TyInt32 && src1->IsRegOpnd());
  262. Assert(src2->GetType() == TyInt32 && src2->IsRegOpnd());
  263. Assert(src3->GetType() == TyInt32 && src3->IsRegOpnd());
  264. Assert(src4->GetType() == TyInt32 && src4->IsRegOpnd());
  265. // MOVD t(TyFloat32), src4(TyInt32)
  266. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVD, temp, src4, m_func));
  267. // MOVSS dst, t
  268. instr->InsertBefore(IR::Instr::New(movOpcode, dst, temp, m_func));
  269. // PSLLDQ dst, dst, 4
  270. instr->InsertBefore(IR::Instr::New(shiftOpcode, dst, dst, IR::IntConstOpnd::New(TySize[TyInt32], TyInt8, m_func, true), m_func));
  271. // MOVD t(TyFloat32), sr34(TyInt32)
  272. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVD, temp, src3, m_func));
  273. // MOVSS dst, t
  274. instr->InsertBefore(IR::Instr::New(movOpcode, dst, temp, m_func));
  275. // PSLLDQ dst, dst, 4
  276. instr->InsertBefore(IR::Instr::New(shiftOpcode, dst, dst, IR::IntConstOpnd::New(TySize[TyInt32], TyInt8, m_func, true), m_func));
  277. // MOVD t(TyFloat32), src2(TyInt32)
  278. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVD, temp, src2, m_func));
  279. // MOVSS dst, t
  280. instr->InsertBefore(IR::Instr::New(movOpcode, dst, temp, m_func));
  281. // PSLLDQ dst, dst, 4
  282. instr->InsertBefore(IR::Instr::New(shiftOpcode, dst, dst, IR::IntConstOpnd::New(TySize[TyInt32], TyInt8, m_func, true), m_func));
  283. // MOVD t(TyFloat32), src1(TyInt32)
  284. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVD, temp, src1, m_func));
  285. // MOVSS dst, t
  286. instr->InsertBefore(IR::Instr::New(movOpcode, dst, temp, m_func));
  287. }
  288. }
  289. else
  290. {
  291. Assert(instr->m_opcode == Js::OpCode::Simd128_DoublesToD2);
  292. dst = instr->GetDst();
  293. src1 = instr->GetSrc1();
  294. src2 = instr->GetSrc2();
  295. Assert(src1->IsRegOpnd() && src1->GetType() == TyFloat64);
  296. Assert(src2->IsRegOpnd() && src2->GetType() == TyFloat64);
  297. // MOVSD dst, src2
  298. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVSD, dst, src2, m_func));
  299. // PSLLDQ dst, dst, 8
  300. instr->InsertBefore(IR::Instr::New(Js::OpCode::PSLLDQ, dst, dst, IR::IntConstOpnd::New(TySize[TyFloat64], TyInt8, m_func, true), m_func));
  301. // MOVSD dst, src1
  302. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVSD, dst, src1, m_func));
  303. }
  304. Assert(dst->IsRegOpnd() && dst->IsSimd128());
  305. IR::Instr* prevInstr;
  306. prevInstr = instr->m_prev;
  307. instr->Remove();
  308. return prevInstr;
  309. }
  310. IR::Instr* LowererMD::Simd128LowerLdLane(IR::Instr *instr)
  311. {
  312. IR::Opnd* dst, *src1, *src2;
  313. Js::OpCode movOpcode = Js::OpCode::MOVSS;
  314. uint laneSize = 0, laneIndex = 0;
  315. dst = instr->GetDst();
  316. src1 = instr->GetSrc1();
  317. src2 = instr->GetSrc2();
  318. Assert(dst && dst->IsRegOpnd() && (dst->GetType() == TyFloat32 || dst->GetType() == TyInt32 || dst->GetType() == TyFloat64));
  319. Assert(src1 && src1->IsRegOpnd() && src1->IsSimd128());
  320. Assert(src2 && src2->IsIntConstOpnd());
  321. laneIndex = (uint)src2->AsIntConstOpnd()->AsUint32();
  322. switch (instr->m_opcode)
  323. {
  324. case Js::OpCode::Simd128_ExtractLane_F4:
  325. laneSize = 4;
  326. movOpcode = Js::OpCode::MOVSS;
  327. Assert(laneIndex < 4);
  328. break;
  329. case Js::OpCode::Simd128_ExtractLane_I4:
  330. laneSize = 4;
  331. movOpcode = Js::OpCode::MOVD;
  332. Assert(laneIndex < 4);
  333. break;
  334. default:
  335. Assert(UNREACHED);
  336. }
  337. IR::Opnd* tmp = src1;
  338. if (laneIndex != 0)
  339. {
  340. // tmp = PSRLDQ src1, shamt
  341. tmp = IR::RegOpnd::New(src1->GetType(), m_func);
  342. IR::Instr *shiftInstr = IR::Instr::New(Js::OpCode::PSRLDQ, tmp, src1, IR::IntConstOpnd::New(laneSize * laneIndex, TyInt8, m_func, true), m_func);
  343. instr->InsertBefore(shiftInstr);
  344. //MakeDstEquSrc1(shiftInstr);
  345. Legalize(shiftInstr);
  346. }
  347. // MOVSS/MOVSD/MOVD dst, tmp
  348. instr->InsertBefore(IR::Instr::New(movOpcode, dst, tmp, m_func));
  349. IR::Instr* prevInstr = instr->m_prev;
  350. instr->Remove();
  351. return prevInstr;
  352. }
  353. IR::Instr* LowererMD::Simd128LowerSplat(IR::Instr *instr)
  354. {
  355. Js::OpCode shufOpCode = Js::OpCode::SHUFPS, movOpCode = Js::OpCode::MOVSS;
  356. IR::Opnd *dst, *src1;
  357. dst = instr->GetDst();
  358. src1 = instr->GetSrc1();
  359. Assert(dst && dst->IsRegOpnd() && dst->IsSimd128());
  360. Assert(src1 && src1->IsRegOpnd() && (src1->GetType() == TyFloat32 || src1->GetType() == TyInt32 || src1->GetType() == TyFloat64));
  361. Assert(!instr->GetSrc2());
  362. switch (instr->m_opcode)
  363. {
  364. case Js::OpCode::Simd128_Splat_F4:
  365. shufOpCode = Js::OpCode::SHUFPS;
  366. movOpCode = Js::OpCode::MOVSS;
  367. break;
  368. case Js::OpCode::Simd128_Splat_I4:
  369. shufOpCode = Js::OpCode::PSHUFD;
  370. movOpCode = Js::OpCode::MOVD;
  371. break;
  372. case Js::OpCode::Simd128_Splat_D2:
  373. shufOpCode = Js::OpCode::SHUFPD;
  374. movOpCode = Js::OpCode::MOVSD;
  375. break;
  376. default:
  377. Assert(UNREACHED);
  378. }
  379. instr->InsertBefore(IR::Instr::New(movOpCode, dst, src1, m_func));
  380. instr->InsertBefore(IR::Instr::New(shufOpCode, dst, dst, IR::IntConstOpnd::New(0, TyInt8, m_func, true), m_func));
  381. IR::Instr* prevInstr = instr->m_prev;
  382. instr->Remove();
  383. return prevInstr;
  384. }
  385. IR::Instr* LowererMD::Simd128LowerRcp(IR::Instr *instr, bool removeInstr)
  386. {
  387. Js::OpCode opcode = Js::OpCode::DIVPS;
  388. void* x86_allones_mask = nullptr;
  389. IR::Opnd *dst, *src1;
  390. dst = instr->GetDst();
  391. src1 = instr->GetSrc1();
  392. Assert(dst && dst->IsRegOpnd());
  393. Assert(src1 && src1->IsRegOpnd());
  394. Assert(instr->GetSrc2() == nullptr);
  395. if (instr->m_opcode == Js::OpCode::Simd128_Rcp_F4 || instr->m_opcode == Js::OpCode::Simd128_RcpSqrt_F4)
  396. {
  397. Assert(src1->IsSimd128F4() || src1->IsSimd128I4());
  398. opcode = Js::OpCode::DIVPS;
  399. x86_allones_mask = (void*)(&X86_ALL_ONES_F4);
  400. }
  401. else
  402. {
  403. Assert(instr->m_opcode == Js::OpCode::Simd128_Rcp_D2 || instr->m_opcode == Js::OpCode::Simd128_RcpSqrt_D2);
  404. Assert(src1->IsSimd128D2());
  405. opcode = Js::OpCode::DIVPD;
  406. x86_allones_mask = (void*)(&X86_ALL_ONES_D2);
  407. }
  408. IR::RegOpnd* tmp = IR::RegOpnd::New(src1->GetType(), m_func);
  409. IR::Instr* movInstr = IR::Instr::New(Js::OpCode::MOVAPS, tmp, IR::MemRefOpnd::New(x86_allones_mask, src1->GetType(), m_func), m_func);
  410. instr->InsertBefore(movInstr);
  411. Legalize(movInstr);
  412. instr->InsertBefore(IR::Instr::New(opcode, tmp, tmp, src1, m_func));
  413. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVAPS, dst, tmp, m_func));
  414. if (removeInstr)
  415. {
  416. IR::Instr* prevInstr = instr->m_prev;
  417. instr->Remove();
  418. return prevInstr;
  419. }
  420. return instr;
  421. }
  422. IR::Instr* LowererMD::Simd128LowerSqrt(IR::Instr *instr)
  423. {
  424. Js::OpCode opcode = Js::OpCode::SQRTPS;
  425. IR::Opnd *dst, *src1;
  426. dst = instr->GetDst();
  427. src1 = instr->GetSrc1();
  428. Assert(dst && dst->IsRegOpnd());
  429. Assert(src1 && src1->IsRegOpnd());
  430. Assert(instr->GetSrc2() == nullptr);
  431. if (instr->m_opcode == Js::OpCode::Simd128_Sqrt_F4)
  432. {
  433. opcode = Js::OpCode::SQRTPS;
  434. }
  435. else
  436. {
  437. Assert(instr->m_opcode == Js::OpCode::Simd128_Sqrt_D2);
  438. opcode = Js::OpCode::SQRTPD;
  439. }
  440. instr->InsertBefore(IR::Instr::New(opcode, dst, src1, m_func));
  441. IR::Instr* prevInstr = instr->m_prev;
  442. instr->Remove();
  443. return prevInstr;
  444. }
  445. IR::Instr* LowererMD::Simd128LowerRcpSqrt(IR::Instr *instr)
  446. {
  447. Js::OpCode opcode = Js::OpCode::SQRTPS;
  448. Simd128LowerRcp(instr, false);
  449. if (instr->m_opcode == Js::OpCode::Simd128_RcpSqrt_F4)
  450. {
  451. opcode = Js::OpCode::SQRTPS;
  452. }
  453. else
  454. {
  455. Assert(instr->m_opcode == Js::OpCode::Simd128_RcpSqrt_D2);
  456. opcode = Js::OpCode::SQRTPD;
  457. }
  458. instr->InsertBefore(IR::Instr::New(opcode, instr->GetDst(), instr->GetDst(), m_func));
  459. IR::Instr* prevInstr = instr->m_prev;
  460. instr->Remove();
  461. return prevInstr;
  462. }
  463. IR::Instr* LowererMD::Simd128LowerSelect(IR::Instr *instr)
  464. {
  465. Assert(instr->m_opcode == Js::OpCode::Simd128_Select_F4 || instr->m_opcode == Js::OpCode::Simd128_Select_I4 || instr->m_opcode == Js::OpCode::Simd128_Select_D2);
  466. IR::Opnd* dst = nullptr;
  467. IR::Opnd* src1 = nullptr;
  468. IR::Opnd* src2 = nullptr;
  469. IR::Opnd* src3 = nullptr;
  470. SList<IR::Opnd*> *args = Simd128GetExtendedArgs(instr);
  471. // The number of src opnds should be exact. Missing opnds means type-error, and we should generate an exception throw instead (or globopt does).
  472. Assert(args->Count() == 4);
  473. dst = args->Pop();
  474. src1 = args->Pop(); // mask
  475. src2 = args->Pop(); // trueValue
  476. src3 = args->Pop(); // falseValue
  477. Assert(dst->IsRegOpnd() && dst->IsSimd128());
  478. Assert(src1->IsRegOpnd() && src1->IsSimd128());
  479. Assert(src2->IsRegOpnd() && src2->IsSimd128());
  480. Assert(src3->IsRegOpnd() && src3->IsSimd128());
  481. IR::RegOpnd *tmp = IR::RegOpnd::New(src1->GetType(), m_func);
  482. IR::Instr *pInstr = nullptr;
  483. // ANDPS tmp1, mask, tvalue
  484. pInstr = IR::Instr::New(Js::OpCode::ANDPS, tmp, src1, src2, m_func);
  485. instr->InsertBefore(pInstr);
  486. //MakeDstEquSrc1(pInstr);
  487. Legalize(pInstr);
  488. // ANDPS dst, mask, fvalue
  489. pInstr = IR::Instr::New(Js::OpCode::ANDNPS, dst, src1, src3, m_func);
  490. instr->InsertBefore(pInstr);
  491. //MakeDstEquSrc1(pInstr);
  492. Legalize(pInstr);
  493. // ORPS dst, dst, tmp1
  494. pInstr = IR::Instr::New(Js::OpCode::ORPS, dst, dst, tmp, m_func);
  495. instr->InsertBefore(pInstr);
  496. pInstr = instr->m_prev;
  497. instr->Remove();
  498. return pInstr;
  499. }
  500. IR::Instr* LowererMD::Simd128LowerNegI4(IR::Instr *instr)
  501. {
  502. Assert(instr->m_opcode == Js::OpCode::Simd128_Neg_I4);
  503. IR::Opnd* dst = instr->GetDst();
  504. IR::Opnd* src1 = instr->GetSrc1();
  505. Assert(dst->IsRegOpnd() && dst->IsSimd128());
  506. Assert(src1->IsRegOpnd() && src1->IsSimd128());
  507. Assert(instr->GetSrc2() == nullptr);
  508. // MOVAPS dst, src1
  509. IR::Instr *pInstr = IR::Instr::New(Js::OpCode::MOVAPS, dst, src1, m_func);
  510. instr->InsertBefore(pInstr);
  511. // XORPS dst, dst, 0xfff...f
  512. pInstr = IR::Instr::New(Js::OpCode::XORPS, dst, dst, IR::MemRefOpnd::New((void*)&X86_ALL_NEG_ONES, src1->GetType(), m_func), m_func);
  513. instr->InsertBefore(pInstr);
  514. Legalize(pInstr);
  515. // PADDD dst, dst, {1,1,1,1}
  516. pInstr = IR::Instr::New(Js::OpCode::PADDD, dst, dst, IR::MemRefOpnd::New((void*)&X86_ALL_ONES_I4, src1->GetType(), m_func), m_func);
  517. instr->InsertBefore(pInstr);
  518. Legalize(pInstr);
  519. pInstr = instr->m_prev;
  520. instr->Remove();
  521. return pInstr;
  522. }
  523. IR::Instr* LowererMD::Simd128LowerMulI4(IR::Instr *instr)
  524. {
  525. Assert(instr->m_opcode == Js::OpCode::Simd128_Mul_I4);
  526. IR::Instr *pInstr;
  527. IR::Opnd* dst = instr->GetDst();
  528. IR::Opnd* src1 = instr->GetSrc1();
  529. IR::Opnd* src2 = instr->GetSrc2();
  530. IR::Opnd* temp1, *temp2, *temp3;
  531. Assert(dst->IsRegOpnd() && dst->IsSimd128());
  532. Assert(src1->IsRegOpnd() && src1->IsSimd128());
  533. Assert(src2->IsRegOpnd() && src2->IsSimd128());
  534. temp1 = IR::RegOpnd::New(src1->GetType(), m_func);
  535. temp2 = IR::RegOpnd::New(src1->GetType(), m_func);
  536. temp3 = IR::RegOpnd::New(src1->GetType(), m_func);
  537. // temp1 = PMULUDQ src1, src2
  538. pInstr = IR::Instr::New(Js::OpCode::PMULUDQ, temp1, src1, src2, m_func);
  539. instr->InsertBefore(pInstr);
  540. //MakeDstEquSrc1(pInstr);
  541. Legalize(pInstr);
  542. // temp2 = PSLRD src1, 0x4
  543. pInstr = IR::Instr::New(Js::OpCode::PSRLDQ, temp2, src1, IR::IntConstOpnd::New(TySize[TyInt32], TyInt8, m_func, true), m_func);
  544. instr->InsertBefore(pInstr);
  545. //MakeDstEquSrc1(pInstr);
  546. Legalize(pInstr);
  547. // temp3 = PSLRD src2, 0x4
  548. pInstr = IR::Instr::New(Js::OpCode::PSRLDQ, temp3, src2, IR::IntConstOpnd::New(TySize[TyInt32], TyInt8, m_func, true), m_func);
  549. instr->InsertBefore(pInstr);
  550. //MakeDstEquSrc1(pInstr);
  551. Legalize(pInstr);
  552. // temp2 = PMULUDQ temp2, temp3
  553. pInstr = IR::Instr::New(Js::OpCode::PMULUDQ, temp2, temp2, temp3, m_func);
  554. instr->InsertBefore(pInstr);
  555. Legalize(pInstr);
  556. //PSHUFD temp1, temp1, 0x8
  557. instr->InsertBefore(IR::Instr::New(Js::OpCode::PSHUFD, temp1, temp1, IR::IntConstOpnd::New( 8 /*b00001000*/, TyInt8, m_func, true), m_func));
  558. //PSHUFD temp2, temp2, 0x8
  559. instr->InsertBefore(IR::Instr::New(Js::OpCode::PSHUFD, temp2, temp2, IR::IntConstOpnd::New(8 /*b00001000*/, TyInt8, m_func, true), m_func));
  560. // PUNPCKLDQ dst, temp1, temp2
  561. pInstr = IR::Instr::New(Js::OpCode::PUNPCKLDQ, dst, temp1, temp2, m_func);
  562. instr->InsertBefore(pInstr);
  563. Legalize(pInstr);
  564. pInstr = instr->m_prev;
  565. instr->Remove();
  566. return pInstr;
  567. }
  568. IR::Instr* LowererMD::SIMD128LowerReplaceLane(IR::Instr* instr)
  569. {
  570. SList<IR::Opnd*> *args = Simd128GetExtendedArgs(instr);
  571. int lane = 0, byteWidth = 0;
  572. IR::Opnd *dst = args->Pop();
  573. IR::Opnd *src1 = args->Pop();
  574. IR::Opnd *src2 = args->Pop();
  575. IR::Opnd *src3 = args->Pop();
  576. Assert(dst->IsSimd128() && src1->IsSimd128());
  577. IRType type = dst->GetType();
  578. lane = src2->AsIntConstOpnd()->AsInt32();
  579. IR::Opnd* laneValue = EnregisterIntConst(instr, src3);
  580. switch (instr->m_opcode)
  581. {
  582. case Js::OpCode::Simd128_ReplaceLane_I4:
  583. byteWidth = TySize[TyInt32];
  584. break;
  585. case Js::OpCode::Simd128_ReplaceLane_F4:
  586. byteWidth = TySize[TyFloat32];
  587. break;
  588. default:
  589. Assert(UNREACHED);
  590. }
  591. // MOVAPS dst, src1
  592. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVAPS, dst, src1, m_func));
  593. if (byteWidth == TySize[TyFloat32])
  594. {
  595. if (laneValue->GetType() == TyInt32)
  596. {
  597. IR::RegOpnd *tempReg = IR::RegOpnd::New(TyFloat32, m_func); //mov intval to xmm
  598. //MOVD
  599. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVD, tempReg, laneValue, m_func));
  600. laneValue = tempReg;
  601. }
  602. Assert(laneValue->GetType() == TyFloat32);
  603. if (lane == 0)
  604. {
  605. // MOVSS for both TyFloat32 and TyInt32. MOVD zeroes upper bits.
  606. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVSS, dst, laneValue, m_func));
  607. }
  608. else if (lane == 2)
  609. {
  610. IR::RegOpnd *tmp = IR::RegOpnd::New(type, m_func);
  611. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVHLPS, tmp, dst, m_func));
  612. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVSS, tmp, laneValue, m_func));
  613. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVLHPS, dst, tmp, m_func));
  614. }
  615. else
  616. {
  617. Assert(lane == 1 || lane == 3);
  618. uint8 shufMask = 0xE4; // 11 10 01 00
  619. shufMask |= lane; // 11 10 01 id
  620. shufMask &= ~(0x03 << (lane << 1)); // set 2 bits corresponding to lane index to 00
  621. // SHUFPS dst, dst, shufMask
  622. instr->InsertBefore(IR::Instr::New(Js::OpCode::SHUFPS, dst, dst, IR::IntConstOpnd::New(shufMask, TyInt8, m_func, true), m_func));
  623. // MOVSS dst, value
  624. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVSS, dst, laneValue, m_func));
  625. // SHUFPS dst, dst, shufMask
  626. instr->InsertBefore(IR::Instr::New(Js::OpCode::SHUFPS, dst, dst, IR::IntConstOpnd::New(shufMask, TyInt8, m_func, true), m_func));
  627. }
  628. }
  629. IR::Instr* prevInstr = instr->m_prev;
  630. instr->Remove();
  631. return prevInstr;
  632. }
  633. IR::Instr* LowererMD::Simd128LowerShuffle(IR::Instr* instr)
  634. {
  635. Js::OpCode shufOpcode = Js::OpCode::SHUFPS;
  636. Js::OpCode irOpcode = instr->m_opcode;
  637. bool isShuffle = false;
  638. SList<IR::Opnd*> *args = Simd128GetExtendedArgs(instr);
  639. IR::Opnd *dst = args->Pop();
  640. IR::Opnd *srcs[6] = { nullptr, nullptr, nullptr, nullptr, nullptr, nullptr };
  641. int i = 0;
  642. while (!args->Empty() && i < 6)
  643. {
  644. srcs[i++] = args->Pop();
  645. }
  646. int8 shufMask = 0;
  647. int lane0 = 0, lane1 = 0, lane2 = 0, lane3 = 0;
  648. IR::Instr *pInstr = instr->m_prev;
  649. Assert(dst->IsSimd128() && srcs[0] && srcs[0]->IsSimd128());
  650. // globOpt will type-spec if all lane indices are constants, and within range constraints to match a single SSE instruction
  651. if (irOpcode == Js::OpCode::Simd128_Swizzle_I4 ||
  652. irOpcode == Js::OpCode::Simd128_Swizzle_F4 ||
  653. irOpcode == Js::OpCode::Simd128_Swizzle_D2)
  654. {
  655. isShuffle = false;
  656. AssertMsg(srcs[1] && srcs[1]->IsIntConstOpnd() &&
  657. srcs[2] && srcs[2]->IsIntConstOpnd() &&
  658. (irOpcode == Js::OpCode::Simd128_Swizzle_D2 || (srcs[3] && srcs[3]->IsIntConstOpnd())) &&
  659. (irOpcode == Js::OpCode::Simd128_Swizzle_D2 || (srcs[4] && srcs[4]->IsIntConstOpnd())), "Type-specialized swizzle is supported only with constant lane indices");
  660. if (irOpcode == Js::OpCode::Simd128_Swizzle_D2)
  661. {
  662. lane0 = srcs[1]->AsIntConstOpnd()->AsInt32();
  663. lane1 = srcs[2]->AsIntConstOpnd()->AsInt32();
  664. Assert(lane0 >= 0 && lane0 < 2);
  665. Assert(lane1 >= 0 && lane1 < 2);
  666. shufMask = (int8)((lane1 << 1) | lane0);
  667. }
  668. else
  669. {
  670. AnalysisAssert(srcs[3] != nullptr && srcs[4] != nullptr);
  671. lane0 = srcs[1]->AsIntConstOpnd()->AsInt32();
  672. lane1 = srcs[2]->AsIntConstOpnd()->AsInt32();
  673. lane2 = srcs[3]->AsIntConstOpnd()->AsInt32();
  674. lane3 = srcs[4]->AsIntConstOpnd()->AsInt32();
  675. Assert(lane1 >= 0 && lane1 < 4);
  676. Assert(lane2 >= 0 && lane2 < 4);
  677. Assert(lane2 >= 0 && lane2 < 4);
  678. Assert(lane3 >= 0 && lane3 < 4);
  679. shufMask = (int8)((lane3 << 6) | (lane2 << 4) | (lane1 << 2) | lane0);
  680. }
  681. }
  682. else if (irOpcode == Js::OpCode::Simd128_Shuffle_I4 ||
  683. irOpcode == Js::OpCode::Simd128_Shuffle_F4 ||
  684. irOpcode == Js::OpCode::Simd128_Shuffle_D2)
  685. {
  686. isShuffle = true;
  687. Assert(srcs[1] && srcs[1]->IsSimd128());
  688. AssertMsg(srcs[2] && srcs[2]->IsIntConstOpnd() &&
  689. srcs[3] && srcs[3]->IsIntConstOpnd() &&
  690. (irOpcode == Js::OpCode::Simd128_Shuffle_D2 || (srcs[4] && srcs[4]->IsIntConstOpnd())) &&
  691. (irOpcode == Js::OpCode::Simd128_Shuffle_D2 || (srcs[5] && srcs[5]->IsIntConstOpnd())), "Type-specialized shuffle is supported only with constant lane indices");
  692. if (irOpcode == Js::OpCode::Simd128_Shuffle_D2)
  693. {
  694. Assert(srcs[2]->IsIntConstOpnd() && srcs[3]->IsIntConstOpnd());
  695. lane0 = srcs[2]->AsIntConstOpnd()->AsInt32();
  696. lane1 = srcs[3]->AsIntConstOpnd()->AsInt32() - 2;
  697. Assert(lane0 >= 0 && lane0 < 2);
  698. Assert(lane1 >= 0 && lane1 < 2);
  699. shufMask = (int8)((lane1 << 1) | lane0);
  700. }
  701. else
  702. {
  703. AnalysisAssert(srcs[4] != nullptr && srcs[5] != nullptr);
  704. lane0 = srcs[2]->AsIntConstOpnd()->AsInt32();
  705. lane1 = srcs[3]->AsIntConstOpnd()->AsInt32();
  706. lane2 = srcs[4]->AsIntConstOpnd()->AsInt32() - 4;
  707. lane3 = srcs[5]->AsIntConstOpnd()->AsInt32() - 4;
  708. Assert(lane0 >= 0 && lane0 < 4);
  709. Assert(lane1 >= 0 && lane1 < 4);
  710. Assert(lane2 >= 0 && lane2 < 4);
  711. Assert(lane3 >= 0 && lane3 < 4);
  712. shufMask = (int8)((lane3 << 6) | (lane2 << 4) | (lane1 << 2) | lane0);
  713. }
  714. }
  715. else
  716. {
  717. Assert(UNREACHED);
  718. }
  719. if (instr->m_opcode == Js::OpCode::Simd128_Swizzle_D2 || instr->m_opcode == Js::OpCode::Simd128_Shuffle_D2)
  720. {
  721. shufOpcode = Js::OpCode::SHUFPD;
  722. }
  723. // Lower shuffle/swizzle
  724. instr->m_opcode = shufOpcode;
  725. instr->SetDst(dst);
  726. // MOVAPS dst, src1
  727. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVAPS, dst, srcs[0], m_func));
  728. if (isShuffle)
  729. {
  730. // SHUF dst, src2, imm8
  731. instr->SetSrc1(srcs[1]);
  732. }
  733. else
  734. {
  735. // SHUF dst, dst, imm8
  736. instr->SetSrc1(dst);
  737. }
  738. instr->SetSrc2(IR::IntConstOpnd::New((IntConstType)shufMask, TyInt8, m_func, true));
  739. return pInstr;
  740. }
  741. IR::Instr* LowererMD::Simd128LowerLoadElem(IR::Instr *instr)
  742. {
  743. Assert(instr->m_opcode == Js::OpCode::Simd128_LdArr_I4 ||
  744. instr->m_opcode == Js::OpCode::Simd128_LdArr_F4 ||
  745. instr->m_opcode == Js::OpCode::Simd128_LdArr_D2 ||
  746. instr->m_opcode == Js::OpCode::Simd128_LdArrConst_I4 ||
  747. instr->m_opcode == Js::OpCode::Simd128_LdArrConst_F4 ||
  748. instr->m_opcode == Js::OpCode::Simd128_LdArrConst_D2
  749. );
  750. IR::Instr * instrPrev = instr->m_prev;
  751. IR::RegOpnd * indexOpnd = instr->GetSrc1()->AsIndirOpnd()->GetIndexOpnd();
  752. IR::RegOpnd * baseOpnd = instr->GetSrc1()->AsIndirOpnd()->GetBaseOpnd();
  753. IR::Opnd * dst = instr->GetDst();
  754. IR::Opnd * src1 = instr->GetSrc1();
  755. IR::Opnd * src2 = instr->GetSrc2();
  756. ValueType arrType = baseOpnd->GetValueType();
  757. uint8 dataWidth = instr->dataWidth;
  758. // Type-specialized.
  759. Assert(dst->IsSimd128() && src1->IsSimd128() && src2->GetType() == TyUint32);
  760. IR::Instr * done;
  761. if (indexOpnd || (((uint32)src1->AsIndirOpnd()->GetOffset() + dataWidth) > 0x1000000 /* 16 MB */))
  762. {
  763. // CMP indexOpnd, src2(arrSize)
  764. // JA $helper
  765. // JMP $load
  766. // $helper:
  767. // Throw RangeError
  768. // JMP $done
  769. // $load:
  770. // MOVUPS dst, src1([arrayBuffer + indexOpnd]) // or other based on data width
  771. // $done:
  772. uint32 bpe = 1;
  773. switch (arrType.GetObjectType())
  774. {
  775. case ObjectType::Int8Array:
  776. case ObjectType::Uint8Array:
  777. break;
  778. case ObjectType::Int16Array:
  779. case ObjectType::Uint16Array:
  780. bpe = 2;
  781. break;
  782. case ObjectType::Int32Array:
  783. case ObjectType::Uint32Array:
  784. case ObjectType::Float32Array:
  785. bpe = 4;
  786. break;
  787. case ObjectType::Float64Array:
  788. bpe = 8;
  789. break;
  790. default:
  791. Assert(UNREACHED);
  792. }
  793. // bound check and helper
  794. done = this->lowererMDArch.LowerAsmJsLdElemHelper(instr, true, bpe != dataWidth);
  795. }
  796. else
  797. {
  798. // Reaching here means:
  799. // We have a constant index, and either
  800. // (1) constant heap or (2) variable heap with constant index < 16MB.
  801. // Case (1) requires static bound check. Case (2) means we are always in bound.
  802. instr->UnlinkDst();
  803. // this can happen in cases where globopt props a constant access which was not known at bytecodegen time or when heap is non-constant
  804. if (src2->IsIntConstOpnd() && ((uint32)src1->AsIndirOpnd()->GetOffset() + dataWidth > src2->AsIntConstOpnd()->AsUint32()))
  805. {
  806. m_lowerer->GenerateRuntimeError(instr, JSERR_ArgumentOutOfRange, IR::HelperOp_RuntimeRangeError);
  807. instr->FreeSrc1();
  808. instr->FreeSrc2();
  809. instr->Remove();
  810. return instrPrev;
  811. }
  812. instr->FreeSrc2();
  813. done = instr;
  814. }
  815. IR::Instr *newInstr = nullptr;
  816. switch (dataWidth)
  817. {
  818. case 16:
  819. // MOVUPS dst, src1([arrayBuffer + indexOpnd])
  820. newInstr = IR::Instr::New(LowererMDArch::GetAssignOp(src1->GetType()), dst, src1, m_func);
  821. instr->InsertBefore(newInstr);
  822. Legalize(newInstr);
  823. break;
  824. case 12:
  825. {
  826. IR::RegOpnd *temp = IR::RegOpnd::New(src1->GetType(), m_func);
  827. // MOVSD dst, src1([arrayBuffer + indexOpnd])
  828. newInstr = IR::Instr::New(Js::OpCode::MOVSD, dst, src1, m_func);
  829. instr->InsertBefore(newInstr);
  830. Legalize(newInstr);
  831. // MOVSS temp, src1([arrayBuffer + indexOpnd + 8])
  832. newInstr = IR::Instr::New(Js::OpCode::MOVSS, temp, src1, m_func);
  833. instr->InsertBefore(newInstr);
  834. newInstr->GetSrc1()->AsIndirOpnd()->SetOffset(src1->AsIndirOpnd()->GetOffset() + 8, true);
  835. Legalize(newInstr);
  836. // PSLLDQ temp, 0x08
  837. instr->InsertBefore(IR::Instr::New(Js::OpCode::PSLLDQ, temp, temp, IR::IntConstOpnd::New(8, TyInt8, m_func, true), m_func));
  838. // ORPS dst, temp
  839. newInstr = IR::Instr::New(Js::OpCode::ORPS, dst, dst, temp, m_func);
  840. instr->InsertBefore(newInstr);
  841. Legalize(newInstr);
  842. break;
  843. }
  844. case 8:
  845. // MOVSD dst, src1([arrayBuffer + indexOpnd])
  846. newInstr = IR::Instr::New(Js::OpCode::MOVSD, dst, src1, m_func);
  847. instr->InsertBefore(newInstr);
  848. Legalize(newInstr);
  849. break;
  850. case 4:
  851. // MOVSS dst, src1([arrayBuffer + indexOpnd])
  852. newInstr = IR::Instr::New(Js::OpCode::MOVSS, dst, src1, m_func);
  853. instr->InsertBefore(newInstr);
  854. Legalize(newInstr);
  855. break;
  856. default:
  857. Assume(UNREACHED);
  858. }
  859. instr->Remove();
  860. return instrPrev;
  861. }
  862. IR::Instr* LowererMD::Simd128LowerStoreElem(IR::Instr *instr)
  863. {
  864. Assert(instr->m_opcode == Js::OpCode::Simd128_StArr_I4 ||
  865. instr->m_opcode == Js::OpCode::Simd128_StArr_F4 ||
  866. instr->m_opcode == Js::OpCode::Simd128_StArr_D2 ||
  867. instr->m_opcode == Js::OpCode::Simd128_StArrConst_I4 ||
  868. instr->m_opcode == Js::OpCode::Simd128_StArrConst_F4 ||
  869. instr->m_opcode == Js::OpCode::Simd128_StArrConst_D2
  870. );
  871. IR::Instr * instrPrev = instr->m_prev;
  872. IR::RegOpnd * indexOpnd = instr->GetDst()->AsIndirOpnd()->GetIndexOpnd();
  873. IR::RegOpnd * baseOpnd = instr->GetDst()->AsIndirOpnd()->GetBaseOpnd();
  874. IR::Opnd * dst = instr->GetDst();
  875. IR::Opnd * src1 = instr->GetSrc1();
  876. IR::Opnd * src2 = instr->GetSrc2();
  877. ValueType arrType = baseOpnd->GetValueType();
  878. uint8 dataWidth = instr->dataWidth;
  879. // Type-specialized.
  880. Assert(dst->IsSimd128() && src1->IsSimd128() && src2->GetType() == TyUint32);
  881. IR::Instr * done;
  882. bool doStore = true;
  883. if (indexOpnd || ((uint32)dst->AsIndirOpnd()->GetOffset() + dataWidth > 0x1000000))
  884. {
  885. // CMP indexOpnd, src2(arrSize)
  886. // JA $helper
  887. // JMP $store
  888. // $helper:
  889. // Throw RangeError
  890. // JMP $done
  891. // $store:
  892. // MOV dst([arrayBuffer + indexOpnd]), src1
  893. // $done:
  894. uint32 bpe = 1;
  895. switch (arrType.GetObjectType())
  896. {
  897. case ObjectType::Int8Array:
  898. case ObjectType::Uint8Array:
  899. break;
  900. case ObjectType::Int16Array:
  901. case ObjectType::Uint16Array:
  902. bpe = 2;
  903. break;
  904. case ObjectType::Int32Array:
  905. case ObjectType::Uint32Array:
  906. case ObjectType::Float32Array:
  907. bpe = 4;
  908. break;
  909. case ObjectType::Float64Array:
  910. bpe = 8;
  911. break;
  912. default:
  913. Assert(UNREACHED);
  914. }
  915. done = this->lowererMDArch.LowerAsmJsStElemHelper(instr, true, bpe != dataWidth);
  916. }
  917. else
  918. {
  919. instr->UnlinkDst();
  920. instr->UnlinkSrc1();
  921. // we might have a constant index if globopt propped a constant store. we can ahead of time check if it is in-bounds
  922. if (src2->IsIntConstOpnd() && ((uint32)dst->AsIndirOpnd()->GetOffset() + dataWidth > src2->AsIntConstOpnd()->AsUint32()))
  923. {
  924. m_lowerer->GenerateRuntimeError(instr, JSERR_ArgumentOutOfRange, IR::HelperOp_RuntimeRangeError);
  925. doStore = false;
  926. src1->Free(m_func);
  927. dst->Free(m_func);
  928. }
  929. done = instr;
  930. instr->FreeSrc2();
  931. }
  932. if (doStore)
  933. {
  934. switch (dataWidth)
  935. {
  936. case 16:
  937. // MOVUPS dst([arrayBuffer + indexOpnd]), src1
  938. instr->InsertBefore(IR::Instr::New(LowererMDArch::GetAssignOp(src1->GetType()), dst, src1, m_func));
  939. break;
  940. case 12:
  941. {
  942. IR::RegOpnd *temp = IR::RegOpnd::New(src1->GetType(), m_func);
  943. IR::Instr *movss;
  944. // MOVAPS temp, src
  945. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVAPS, temp, src1, m_func));
  946. // MOVSD dst([arrayBuffer + indexOpnd]), temp
  947. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVSD, dst, temp, m_func));
  948. // PSRLDQ temp, 0x08
  949. instr->InsertBefore(IR::Instr::New(Js::OpCode::PSRLDQ, temp, temp, IR::IntConstOpnd::New(8, TyInt8, m_func, true), m_func));
  950. // MOVSS dst([arrayBuffer + indexOpnd + 8]), temp
  951. movss = IR::Instr::New(Js::OpCode::MOVSS, dst, temp, m_func);
  952. instr->InsertBefore(movss);
  953. movss->GetDst()->AsIndirOpnd()->SetOffset(dst->AsIndirOpnd()->GetOffset() + 8, true);
  954. break;
  955. }
  956. case 8:
  957. // MOVSD dst([arrayBuffer + indexOpnd]), src1
  958. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVSD, dst, src1, m_func));
  959. break;
  960. case 4:
  961. // MOVSS dst([arrayBuffer + indexOpnd]), src1
  962. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVSS, dst, src1, m_func));
  963. break;
  964. default:;
  965. Assume(UNREACHED);
  966. }
  967. }
  968. instr->Remove();
  969. return instrPrev;
  970. }
  971. // Builds args list <dst, src1, src2, src3 ..>
  972. SList<IR::Opnd*> * LowererMD::Simd128GetExtendedArgs(IR::Instr *instr)
  973. {
  974. SList<IR::Opnd*> * args = JitAnew(m_lowerer->m_alloc, SList<IR::Opnd*>, m_lowerer->m_alloc);
  975. IR::Instr *pInstr = instr;
  976. IR::Opnd *dst, *src1, *src2;
  977. dst = src1 = src2 = nullptr;
  978. if (pInstr->GetDst())
  979. {
  980. dst = pInstr->UnlinkDst();
  981. }
  982. src1 = pInstr->UnlinkSrc1();
  983. Assert(src1->GetStackSym()->IsSingleDef());
  984. pInstr = src1->GetStackSym()->GetInstrDef();
  985. while (pInstr && pInstr->m_opcode == Js::OpCode::ExtendArg_A)
  986. {
  987. Assert(pInstr->GetSrc1());
  988. src1 = pInstr->GetSrc1()->Copy(this->m_func);
  989. if (src1->IsRegOpnd())
  990. {
  991. this->m_lowerer->addToLiveOnBackEdgeSyms->Set(src1->AsRegOpnd()->m_sym->m_id);
  992. }
  993. args->Push(src1);
  994. if (pInstr->GetSrc2())
  995. {
  996. src2 = pInstr->GetSrc2();
  997. Assert(src2->GetStackSym()->IsSingleDef());
  998. pInstr = src2->GetStackSym()->GetInstrDef();
  999. }
  1000. else
  1001. {
  1002. pInstr = nullptr;
  1003. }
  1004. }
  1005. args->Push(dst);
  1006. Assert(args->Count() > 3);
  1007. return args;
  1008. }
  1009. IR::Opnd* LowererMD::EnregisterIntConst(IR::Instr* instr, IR::Opnd *constOpnd)
  1010. {
  1011. if (constOpnd->IsRegOpnd())
  1012. {
  1013. // already a register
  1014. return constOpnd;
  1015. }
  1016. Assert(constOpnd->GetType() == TyInt32);
  1017. IR::RegOpnd *tempReg = IR::RegOpnd::New(TyInt32, m_func);
  1018. // MOV tempReg, constOpnd
  1019. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOV, tempReg, constOpnd, m_func));
  1020. return tempReg;
  1021. }
  1022. void LowererMD::Simd128InitOpcodeMap()
  1023. {
  1024. m_simd128OpCodesMap = JitAnewArrayZ(m_lowerer->m_alloc, Js::OpCode, Js::Simd128OpcodeCount());
  1025. // All simd ops should be contiguous for this mapping to work
  1026. Assert(Js::OpCode::Simd128_End + (Js::OpCode) 1 == Js::OpCode::Simd128_Start_Extend);
  1027. SET_SIMDOPCODE(Simd128_FromFloat64x2_I4 , CVTTPD2DQ);
  1028. SET_SIMDOPCODE(Simd128_FromFloat64x2Bits_I4 , MOVAPS);
  1029. SET_SIMDOPCODE(Simd128_FromFloat32x4_I4 , CVTTPS2DQ);
  1030. SET_SIMDOPCODE(Simd128_FromFloat32x4Bits_I4 , MOVAPS);
  1031. SET_SIMDOPCODE(Simd128_Add_I4 , PADDD);
  1032. SET_SIMDOPCODE(Simd128_Sub_I4 , PSUBD);
  1033. SET_SIMDOPCODE(Simd128_Lt_I4 , PCMPGTD);
  1034. SET_SIMDOPCODE(Simd128_Gt_I4 , PCMPGTD);
  1035. SET_SIMDOPCODE(Simd128_Eq_I4 , PCMPEQD);
  1036. SET_SIMDOPCODE(Simd128_And_I4 , PAND);
  1037. SET_SIMDOPCODE(Simd128_Or_I4 , POR);
  1038. SET_SIMDOPCODE(Simd128_Xor_I4 , XORPS);
  1039. SET_SIMDOPCODE(Simd128_Not_I4 , XORPS);
  1040. SET_SIMDOPCODE(Simd128_LdSignMask_I4 , MOVMSKPS);
  1041. SET_SIMDOPCODE(Simd128_FromFloat64x2_F4 , CVTPD2PS);
  1042. SET_SIMDOPCODE(Simd128_FromFloat64x2Bits_F4 , MOVAPS);
  1043. SET_SIMDOPCODE(Simd128_FromInt32x4_F4 , CVTDQ2PS);
  1044. SET_SIMDOPCODE(Simd128_FromInt32x4Bits_F4 , MOVAPS);
  1045. SET_SIMDOPCODE(Simd128_Abs_F4 , ANDPS);
  1046. SET_SIMDOPCODE(Simd128_Neg_F4 , XORPS);
  1047. SET_SIMDOPCODE(Simd128_Add_F4 , ADDPS);
  1048. SET_SIMDOPCODE(Simd128_Sub_F4 , SUBPS);
  1049. SET_SIMDOPCODE(Simd128_Mul_F4 , MULPS);
  1050. SET_SIMDOPCODE(Simd128_Div_F4 , DIVPS);
  1051. SET_SIMDOPCODE(Simd128_Min_F4 , MINPS);
  1052. SET_SIMDOPCODE(Simd128_Max_F4 , MAXPS);
  1053. SET_SIMDOPCODE(Simd128_Sqrt_F4 , SQRTPS);
  1054. SET_SIMDOPCODE(Simd128_Lt_F4 , CMPLTPS); // CMPLTPS
  1055. SET_SIMDOPCODE(Simd128_LtEq_F4 , CMPLEPS); // CMPLEPS
  1056. SET_SIMDOPCODE(Simd128_Eq_F4 , CMPEQPS); // CMPEQPS
  1057. SET_SIMDOPCODE(Simd128_Neq_F4 , CMPNEQPS); // CMPNEQPS
  1058. SET_SIMDOPCODE(Simd128_Gt_F4 , CMPLTPS); // CMPLTPS (swap srcs)
  1059. SET_SIMDOPCODE(Simd128_GtEq_F4 , CMPLEPS); // CMPLEPS (swap srcs)
  1060. SET_SIMDOPCODE(Simd128_And_F4 , ANDPS);
  1061. SET_SIMDOPCODE(Simd128_Or_F4 , ORPS);
  1062. SET_SIMDOPCODE(Simd128_Xor_F4 , XORPS );
  1063. SET_SIMDOPCODE(Simd128_Not_F4 , XORPS );
  1064. SET_SIMDOPCODE(Simd128_LdSignMask_F4 , MOVMSKPS );
  1065. SET_SIMDOPCODE(Simd128_FromFloat32x4_D2 , CVTPS2PD);
  1066. SET_SIMDOPCODE(Simd128_FromFloat32x4Bits_D2 , MOVAPS);
  1067. SET_SIMDOPCODE(Simd128_FromInt32x4_D2 , CVTDQ2PD);
  1068. SET_SIMDOPCODE(Simd128_FromInt32x4Bits_D2 , MOVAPS);
  1069. SET_SIMDOPCODE(Simd128_Neg_D2 , XORPS);
  1070. SET_SIMDOPCODE(Simd128_Add_D2 , ADDPD);
  1071. SET_SIMDOPCODE(Simd128_Abs_D2 , ANDPD);
  1072. SET_SIMDOPCODE(Simd128_Sub_D2 , SUBPD);
  1073. SET_SIMDOPCODE(Simd128_Mul_D2 , MULPD);
  1074. SET_SIMDOPCODE(Simd128_Div_D2 , DIVPD);
  1075. SET_SIMDOPCODE(Simd128_Min_D2 , MINPD);
  1076. SET_SIMDOPCODE(Simd128_Max_D2 , MAXPD);
  1077. SET_SIMDOPCODE(Simd128_Sqrt_D2 , SQRTPD);
  1078. SET_SIMDOPCODE(Simd128_Lt_D2 , CMPLTPD); // CMPLTPD
  1079. SET_SIMDOPCODE(Simd128_LtEq_D2 , CMPLEPD); // CMPLEPD
  1080. SET_SIMDOPCODE(Simd128_Eq_D2 , CMPEQPD); // CMPEQPD
  1081. SET_SIMDOPCODE(Simd128_Neq_D2 , CMPNEQPD); // CMPNEQPD
  1082. SET_SIMDOPCODE(Simd128_Gt_D2 , CMPLTPD); // CMPLTPD (swap srcs)
  1083. SET_SIMDOPCODE(Simd128_GtEq_D2 , CMPLEPD); // CMPLEPD (swap srcs)
  1084. SET_SIMDOPCODE(Simd128_LdSignMask_D2 , MOVMSKPD);
  1085. }
  1086. #undef SIMD_SETOPCODE
  1087. #undef SIMD_GETOPCODE
  1088. // FromVar
  1089. void LowererMD::GenerateCheckedSimdLoad(IR::Instr * instr)
  1090. {
  1091. Assert(instr->m_opcode == Js::OpCode::FromVar);
  1092. Assert(instr->GetSrc1()->GetType() == TyVar);
  1093. Assert(IRType_IsSimd128(instr->GetDst()->GetType()));
  1094. bool checkRequired = instr->HasBailOutInfo();
  1095. IR::LabelInstr * labelHelper = nullptr, * labelDone = nullptr;
  1096. IR::Instr * insertInstr = instr, * newInstr;
  1097. IR::RegOpnd * src = instr->GetSrc1()->AsRegOpnd(), * dst = instr->GetDst()->AsRegOpnd();
  1098. Assert(!checkRequired || instr->GetBailOutKind() == IR::BailOutSimd128F4Only || instr->GetBailOutKind() == IR::BailOutSimd128I4Only);
  1099. if (checkRequired)
  1100. {
  1101. labelHelper = IR::LabelInstr::New(Js::OpCode::Label, this->m_func, true);
  1102. labelDone = IR::LabelInstr::New(Js::OpCode::Label, this->m_func);
  1103. instr->InsertBefore(labelHelper);
  1104. instr->InsertAfter(labelDone);
  1105. insertInstr = labelHelper;
  1106. GenerateObjectTest(instr->GetSrc1(), insertInstr, labelHelper);
  1107. newInstr = IR::Instr::New(Js::OpCode::CMP, instr->m_func);
  1108. newInstr->SetSrc1(IR::IndirOpnd::New(instr->GetSrc1()->AsRegOpnd(), 0, TyMachPtr, instr->m_func));
  1109. newInstr->SetSrc2(m_lowerer->LoadVTableValueOpnd(instr, dst->GetType() == TySimd128F4 ? VTableValue::VtableSimd128F4 : VTableValue::VtableSimd128I4));
  1110. insertInstr->InsertBefore(newInstr);
  1111. Legalize(newInstr);
  1112. insertInstr->InsertBefore(IR::BranchInstr::New(Js::OpCode::JNE, labelHelper, this->m_func));
  1113. instr->UnlinkSrc1();
  1114. instr->UnlinkDst();
  1115. this->m_lowerer->GenerateBailOut(instr);
  1116. }
  1117. size_t valueOffset = dst->GetType() == TySimd128F4 ? Js::JavascriptSIMDFloat32x4::GetOffsetOfValue() : Js::JavascriptSIMDInt32x4::GetOffsetOfValue();
  1118. Assert(valueOffset < INT_MAX);
  1119. newInstr = IR::Instr::New(Js::OpCode::MOVUPS, dst, IR::IndirOpnd::New(src, static_cast<int>(valueOffset), dst->GetType(), this->m_func), this->m_func);
  1120. insertInstr->InsertBefore(newInstr);
  1121. insertInstr->InsertBefore(IR::BranchInstr::New(Js::OpCode::JMP, labelDone, this->m_func));
  1122. // FromVar is converted to BailOut call. Don't remove.
  1123. }
  1124. // ToVar
  1125. void LowererMD::GenerateSimdStore(IR::Instr * instr)
  1126. {
  1127. IR::RegOpnd *dst, *src;
  1128. IRType type;
  1129. dst = instr->GetDst()->AsRegOpnd();
  1130. src = instr->GetSrc1()->AsRegOpnd();
  1131. type = src->GetType();
  1132. this->m_lowerer->LoadScriptContext(instr);
  1133. IR::Instr * instrCall = IR::Instr::New(Js::OpCode::CALL, instr->GetDst(),
  1134. IR::HelperCallOpnd::New(type == TySimd128F4 ? IR::HelperAllocUninitializedSimdF4 : IR::HelperAllocUninitializedSimdI4, this->m_func), this->m_func);
  1135. instr->InsertBefore(instrCall);
  1136. this->lowererMDArch.LowerCall(instrCall, 0);
  1137. IR::Opnd * valDst;
  1138. if (type == TySimd128F4)
  1139. {
  1140. valDst = IR::IndirOpnd::New(dst, (int32)Js::JavascriptSIMDFloat32x4::GetOffsetOfValue(), TySimd128F4, this->m_func);
  1141. }
  1142. else
  1143. {
  1144. valDst = IR::IndirOpnd::New(dst, (int32)Js::JavascriptSIMDInt32x4::GetOffsetOfValue(), TySimd128I4, this->m_func);
  1145. }
  1146. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVUPS, valDst, src, this->m_func));
  1147. instr->Remove();
  1148. }