LowerMDSharedSimd128.cpp 62 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668
  1. //-------------------------------------------------------------------------------------------------------
  2. // Copyright (C) Microsoft. All rights reserved.
  3. // Licensed under the MIT license. See LICENSE.txt file in the project root for full license information.
  4. //-------------------------------------------------------------------------------------------------------
  5. #include "BackEnd.h"
  6. #define GET_SIMDOPCODE(irOpcode) m_simd128OpCodesMap[(uint32)(irOpcode - Js::OpCode::Simd128_Start)]
  7. #define SET_SIMDOPCODE(irOpcode, mdOpcode) \
  8. Assert((uint32)m_simd128OpCodesMap[(uint32)(Js::OpCode::irOpcode - Js::OpCode::Simd128_Start)] == 0); \
  9. Assert(Js::OpCode::mdOpcode > Js::OpCode::MDStart); \
  10. m_simd128OpCodesMap[(uint32)(Js::OpCode::irOpcode - Js::OpCode::Simd128_Start)] = Js::OpCode::mdOpcode;
  11. IR::Instr* LowererMD::Simd128Instruction(IR::Instr *instr)
  12. {
  13. // Currently only handles type-specialized/asm.js opcodes
  14. if (!instr->GetDst())
  15. {
  16. // SIMD ops always have DST in asmjs
  17. Assert(!instr->m_func->GetJnFunction()->GetIsAsmjsMode());
  18. // unused result. Do nothing.
  19. IR::Instr * pInstr = instr->m_prev;
  20. instr->Remove();
  21. return pInstr;
  22. }
  23. if (Simd128TryLowerMappedInstruction(instr))
  24. {
  25. return instr->m_prev;
  26. }
  27. return Simd128LowerUnMappedInstruction(instr);
  28. }
  29. bool LowererMD::Simd128TryLowerMappedInstruction(IR::Instr *instr)
  30. {
  31. bool legalize = true;
  32. Js::OpCode opcode = GET_SIMDOPCODE(instr->m_opcode);
  33. if ((uint32)opcode == 0)
  34. return false;
  35. Assert(instr->GetDst() && instr->GetDst()->IsRegOpnd() && instr->GetDst()->IsSimd128() || instr->GetDst()->GetType() == TyInt32);
  36. Assert(instr->GetSrc1() && instr->GetSrc1()->IsRegOpnd() && instr->GetSrc1()->IsSimd128());
  37. Assert(!instr->GetSrc2() || (((instr->GetSrc2()->IsRegOpnd() && instr->GetSrc2()->IsSimd128()) || (instr->GetSrc2()->IsIntConstOpnd() && instr->GetSrc2()->GetType() == TyInt8))));
  38. switch (instr->m_opcode)
  39. {
  40. case Js::OpCode::Simd128_Abs_F4:
  41. Assert(opcode == Js::OpCode::ANDPS);
  42. instr->SetSrc2(IR::MemRefOpnd::New((void*)&X86_ABS_MASK_F4, instr->GetSrc1()->GetType(), m_func));
  43. break;
  44. case Js::OpCode::Simd128_Abs_D2:
  45. Assert(opcode == Js::OpCode::ANDPD);
  46. instr->SetSrc2(IR::MemRefOpnd::New((void*)&X86_ABS_MASK_D2, instr->GetSrc1()->GetType(), m_func));
  47. break;
  48. case Js::OpCode::Simd128_Neg_F4:
  49. Assert(opcode == Js::OpCode::XORPS);
  50. instr->SetSrc2(IR::MemRefOpnd::New((void*)&X86_NEG_MASK_F4, instr->GetSrc1()->GetType(), m_func));
  51. break;
  52. case Js::OpCode::Simd128_Neg_D2:
  53. Assert(opcode == Js::OpCode::XORPS);
  54. instr->SetSrc2(IR::MemRefOpnd::New((void*)&X86_NEG_MASK_D2, instr->GetSrc1()->GetType(), m_func));
  55. break;
  56. case Js::OpCode::Simd128_Not_F4:
  57. case Js::OpCode::Simd128_Not_I4:
  58. Assert(opcode == Js::OpCode::XORPS);
  59. instr->SetSrc2(IR::MemRefOpnd::New((void*)&X86_ALL_NEG_ONES, instr->GetSrc1()->GetType(), m_func));
  60. break;
  61. case Js::OpCode::Simd128_Gt_F4:
  62. case Js::OpCode::Simd128_Gt_D2:
  63. case Js::OpCode::Simd128_GtEq_F4:
  64. case Js::OpCode::Simd128_GtEq_D2:
  65. case Js::OpCode::Simd128_Lt_I4:
  66. {
  67. Assert(opcode == Js::OpCode::CMPLTPS || opcode == Js::OpCode::CMPLTPD || opcode == Js::OpCode::CMPLEPS || opcode == Js::OpCode::CMPLEPD || opcode == Js::OpCode::PCMPGTD);
  68. // swap operands
  69. auto *src1 = instr->UnlinkSrc1();
  70. auto *src2 = instr->UnlinkSrc2();
  71. instr->SetSrc1(src2);
  72. instr->SetSrc2(src1);
  73. break;
  74. }
  75. case Js::OpCode::Simd128_LdSignMask_F4:
  76. case Js::OpCode::Simd128_LdSignMask_I4:
  77. case Js::OpCode::Simd128_LdSignMask_D2:
  78. legalize = false;
  79. break;
  80. }
  81. instr->m_opcode = opcode;
  82. if (legalize)
  83. {
  84. //MakeDstEquSrc1(instr);
  85. Legalize(instr);
  86. }
  87. return true;
  88. }
  89. IR::Instr* LowererMD::Simd128LowerUnMappedInstruction(IR::Instr *instr)
  90. {
  91. switch (instr->m_opcode)
  92. {
  93. case Js::OpCode::Simd128_LdC:
  94. return Simd128LoadConst(instr);
  95. case Js::OpCode::Simd128_FloatsToF4:
  96. case Js::OpCode::Simd128_IntsToI4:
  97. case Js::OpCode::Simd128_DoublesToD2:
  98. return Simd128LowerConstructor(instr);
  99. case Js::OpCode::Simd128_ExtractLane_I4:
  100. case Js::OpCode::Simd128_ExtractLane_F4:
  101. return Simd128LowerLdLane(instr);
  102. case Js::OpCode::Simd128_ReplaceLane_I4:
  103. case Js::OpCode::Simd128_ReplaceLane_F4:
  104. return SIMD128LowerReplaceLane(instr);
  105. case Js::OpCode::Simd128_Splat_F4:
  106. case Js::OpCode::Simd128_Splat_I4:
  107. case Js::OpCode::Simd128_Splat_D2:
  108. return Simd128LowerSplat(instr);
  109. case Js::OpCode::Simd128_Rcp_F4:
  110. case Js::OpCode::Simd128_Rcp_D2:
  111. return Simd128LowerRcp(instr);
  112. case Js::OpCode::Simd128_Sqrt_F4:
  113. case Js::OpCode::Simd128_Sqrt_D2:
  114. return Simd128LowerSqrt(instr);
  115. case Js::OpCode::Simd128_RcpSqrt_F4:
  116. case Js::OpCode::Simd128_RcpSqrt_D2:
  117. return Simd128LowerRcpSqrt(instr);
  118. case Js::OpCode::Simd128_Select_F4:
  119. case Js::OpCode::Simd128_Select_I4:
  120. case Js::OpCode::Simd128_Select_D2:
  121. return Simd128LowerSelect(instr);
  122. case Js::OpCode::Simd128_Neg_I4:
  123. return Simd128LowerNegI4(instr);
  124. case Js::OpCode::Simd128_Mul_I4:
  125. return Simd128LowerMulI4(instr);
  126. case Js::OpCode::Simd128_LdArr_I4:
  127. case Js::OpCode::Simd128_LdArr_F4:
  128. case Js::OpCode::Simd128_LdArr_D2:
  129. case Js::OpCode::Simd128_LdArrConst_I4:
  130. case Js::OpCode::Simd128_LdArrConst_F4:
  131. case Js::OpCode::Simd128_LdArrConst_D2:
  132. if (m_func->m_workItem->GetFunctionBody()->GetIsAsmjsMode())
  133. {
  134. // with bound checks
  135. return Simd128AsmJsLowerLoadElem(instr);
  136. }
  137. else
  138. {
  139. // non-AsmJs, boundChecks are extracted from instr
  140. return Simd128LowerLoadElem(instr);
  141. }
  142. case Js::OpCode::Simd128_StArr_I4:
  143. case Js::OpCode::Simd128_StArr_F4:
  144. case Js::OpCode::Simd128_StArr_D2:
  145. case Js::OpCode::Simd128_StArrConst_I4:
  146. case Js::OpCode::Simd128_StArrConst_F4:
  147. case Js::OpCode::Simd128_StArrConst_D2:
  148. if (m_func->m_workItem->GetFunctionBody()->GetIsAsmjsMode())
  149. {
  150. return Simd128AsmJsLowerStoreElem(instr);
  151. }
  152. else
  153. {
  154. return Simd128LowerStoreElem(instr);
  155. }
  156. case Js::OpCode::Simd128_Swizzle_I4:
  157. case Js::OpCode::Simd128_Swizzle_F4:
  158. case Js::OpCode::Simd128_Swizzle_D2:
  159. return Simd128LowerSwizzle4(instr);
  160. case Js::OpCode::Simd128_Shuffle_I4:
  161. case Js::OpCode::Simd128_Shuffle_F4:
  162. case Js::OpCode::Simd128_Shuffle_D2:
  163. return Simd128LowerShuffle4(instr);
  164. case Js::OpCode::Simd128_FromFloat32x4_I4:
  165. return Simd128LowerInt32x4FromFloat32x4(instr);
  166. default:
  167. AssertMsg(UNREACHED, "Unsupported Simd128 instruction");
  168. }
  169. return nullptr;
  170. }
  171. IR::Instr* LowererMD::Simd128LoadConst(IR::Instr* instr)
  172. {
  173. Assert(instr->GetDst() && instr->m_opcode == Js::OpCode::Simd128_LdC);
  174. if (instr->GetDst()->IsSimd128())
  175. {
  176. Assert(instr->GetSrc1()->IsSimd128());
  177. Assert(instr->GetSrc1()->IsSimd128ConstOpnd());
  178. Assert(instr->GetSrc2() == nullptr);
  179. AsmJsSIMDValue value = instr->GetSrc1()->AsSimd128ConstOpnd()->m_value;
  180. // MOVUPS dst, [const]
  181. AsmJsSIMDValue *pValue = NativeCodeDataNew(instr->m_func->GetNativeCodeDataAllocator(), AsmJsSIMDValue);
  182. pValue->SetValue(value);
  183. IR::Opnd * opnd = IR::MemRefOpnd::New((void *)pValue, instr->GetDst()->GetType(), instr->m_func);
  184. instr->ReplaceSrc1(opnd);
  185. instr->m_opcode = LowererMDArch::GetAssignOp(instr->GetDst()->GetType());
  186. Legalize(instr);
  187. return instr->m_prev;
  188. }
  189. else
  190. {
  191. AssertMsg(UNREACHED, "Non-typespecialized form of Simd128_LdC is unsupported");
  192. }
  193. return nullptr;
  194. }
  195. IR::Instr* LowererMD::Simd128LowerConstructor(IR::Instr *instr)
  196. {
  197. IR::Opnd* dst = nullptr;
  198. IR::Opnd* src1 = nullptr;
  199. IR::Opnd* src2 = nullptr;
  200. IR::Opnd* src3 = nullptr;
  201. IR::Opnd* src4 = nullptr;
  202. IR::Instr* newInstr = nullptr;
  203. if (instr->m_opcode == Js::OpCode::Simd128_FloatsToF4 || instr->m_opcode == Js::OpCode::Simd128_IntsToI4)
  204. {
  205. // use MOVSS for both int32x4 and float32x4. MOVD zeroes upper bits.
  206. Js::OpCode movOpcode = Js::OpCode::MOVSS;
  207. Js::OpCode shiftOpcode = Js::OpCode::PSLLDQ;
  208. SList<IR::Opnd*> *args = Simd128GetExtendedArgs(instr);
  209. // The number of src opnds should be exact. If opnds are missing, they should be filled in by globopt during type-spec.
  210. Assert(args->Count() == 5);
  211. dst = args->Pop();
  212. src1 = args->Pop();
  213. src2 = args->Pop();
  214. src3 = args->Pop();
  215. src4 = args->Pop();
  216. if (instr->m_opcode == Js::OpCode::Simd128_FloatsToF4)
  217. {
  218. // We don't have f32 type-spec, so we type-spec to f64 and convert to f32 before use.
  219. if (src1->IsFloat64())
  220. {
  221. IR::RegOpnd *regOpnd32 = IR::RegOpnd::New(TyFloat32, this->m_func);
  222. // CVTSD2SS regOpnd32.f32, src.f64 -- Convert regOpnd from f64 to f32
  223. newInstr = IR::Instr::New(Js::OpCode::CVTSD2SS, regOpnd32, src1, this->m_func);
  224. instr->InsertBefore(newInstr);
  225. src1 = regOpnd32;
  226. }
  227. if (src2->IsFloat64())
  228. {
  229. IR::RegOpnd *regOpnd32 = IR::RegOpnd::New(TyFloat32, this->m_func);
  230. // CVTSD2SS regOpnd32.f32, src.f64 -- Convert regOpnd from f64 to f32
  231. newInstr = IR::Instr::New(Js::OpCode::CVTSD2SS, regOpnd32, src2, this->m_func);
  232. instr->InsertBefore(newInstr);
  233. src2 = regOpnd32;
  234. }
  235. if (src3->IsFloat64())
  236. {
  237. IR::RegOpnd *regOpnd32 = IR::RegOpnd::New(TyFloat32, this->m_func);
  238. // CVTSD2SS regOpnd32.f32, src.f64 -- Convert regOpnd from f64 to f32
  239. newInstr = IR::Instr::New(Js::OpCode::CVTSD2SS, regOpnd32, src3, this->m_func);
  240. instr->InsertBefore(newInstr);
  241. src3 = regOpnd32;
  242. }
  243. if (src4->IsFloat64())
  244. {
  245. IR::RegOpnd *regOpnd32 = IR::RegOpnd::New(TyFloat32, this->m_func);
  246. // CVTSD2SS regOpnd32.f32, src.f64 -- Convert regOpnd from f64 to f32
  247. newInstr = IR::Instr::New(Js::OpCode::CVTSD2SS, regOpnd32, src4, this->m_func);
  248. instr->InsertBefore(newInstr);
  249. src4 = regOpnd32;
  250. }
  251. Assert(src1->IsRegOpnd() && src1->GetType() == TyFloat32);
  252. Assert(src2->IsRegOpnd() && src2->GetType() == TyFloat32);
  253. Assert(src3->IsRegOpnd() && src3->GetType() == TyFloat32);
  254. Assert(src4->IsRegOpnd() && src4->GetType() == TyFloat32);
  255. // MOVSS dst, src4
  256. instr->InsertBefore(IR::Instr::New(movOpcode, dst, src4, m_func));
  257. // PSLLDQ dst, dst, 4
  258. instr->InsertBefore(IR::Instr::New(shiftOpcode, dst, dst, IR::IntConstOpnd::New(4, TyInt8, m_func, true), m_func));
  259. // MOVSS dst, src3
  260. instr->InsertBefore(IR::Instr::New(movOpcode, dst, src3, m_func));
  261. // PSLLDQ dst, 4
  262. instr->InsertBefore(IR::Instr::New(shiftOpcode, dst, dst, IR::IntConstOpnd::New(4, TyInt8, m_func, true), m_func));
  263. // MOVSS dst, src2
  264. instr->InsertBefore(IR::Instr::New(movOpcode, dst, src2, m_func));
  265. // PSLLDQ dst, 4
  266. instr->InsertBefore(IR::Instr::New(shiftOpcode, dst, dst, IR::IntConstOpnd::New(4, TyInt8, m_func, true), m_func));
  267. // MOVSS dst, src1
  268. instr->InsertBefore(IR::Instr::New(movOpcode, dst, src1, m_func));
  269. }
  270. else
  271. {
  272. //Simd128_IntsToI4
  273. // b-namost: better way to implement this on SSE2? Using MOVD directly zeroes upper bits.
  274. IR::RegOpnd *temp = IR::RegOpnd::New(TyFloat32, m_func);
  275. // src's might have been constant prop'd. Enregister them if so.
  276. src4 = EnregisterIntConst(instr, src4);
  277. src3 = EnregisterIntConst(instr, src3);
  278. src2 = EnregisterIntConst(instr, src2);
  279. src1 = EnregisterIntConst(instr, src1);
  280. Assert(src1->GetType() == TyInt32 && src1->IsRegOpnd());
  281. Assert(src2->GetType() == TyInt32 && src2->IsRegOpnd());
  282. Assert(src3->GetType() == TyInt32 && src3->IsRegOpnd());
  283. Assert(src4->GetType() == TyInt32 && src4->IsRegOpnd());
  284. // MOVD t(TyFloat32), src4(TyInt32)
  285. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVD, temp, src4, m_func));
  286. // MOVSS dst, t
  287. instr->InsertBefore(IR::Instr::New(movOpcode, dst, temp, m_func));
  288. // PSLLDQ dst, dst, 4
  289. instr->InsertBefore(IR::Instr::New(shiftOpcode, dst, dst, IR::IntConstOpnd::New(TySize[TyInt32], TyInt8, m_func, true), m_func));
  290. // MOVD t(TyFloat32), sr34(TyInt32)
  291. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVD, temp, src3, m_func));
  292. // MOVSS dst, t
  293. instr->InsertBefore(IR::Instr::New(movOpcode, dst, temp, m_func));
  294. // PSLLDQ dst, dst, 4
  295. instr->InsertBefore(IR::Instr::New(shiftOpcode, dst, dst, IR::IntConstOpnd::New(TySize[TyInt32], TyInt8, m_func, true), m_func));
  296. // MOVD t(TyFloat32), src2(TyInt32)
  297. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVD, temp, src2, m_func));
  298. // MOVSS dst, t
  299. instr->InsertBefore(IR::Instr::New(movOpcode, dst, temp, m_func));
  300. // PSLLDQ dst, dst, 4
  301. instr->InsertBefore(IR::Instr::New(shiftOpcode, dst, dst, IR::IntConstOpnd::New(TySize[TyInt32], TyInt8, m_func, true), m_func));
  302. // MOVD t(TyFloat32), src1(TyInt32)
  303. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVD, temp, src1, m_func));
  304. // MOVSS dst, t
  305. instr->InsertBefore(IR::Instr::New(movOpcode, dst, temp, m_func));
  306. }
  307. }
  308. else
  309. {
  310. Assert(instr->m_opcode == Js::OpCode::Simd128_DoublesToD2);
  311. dst = instr->GetDst();
  312. src1 = instr->GetSrc1();
  313. src2 = instr->GetSrc2();
  314. Assert(src1->IsRegOpnd() && src1->GetType() == TyFloat64);
  315. Assert(src2->IsRegOpnd() && src2->GetType() == TyFloat64);
  316. // MOVSD dst, src2
  317. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVSD, dst, src2, m_func));
  318. // PSLLDQ dst, dst, 8
  319. instr->InsertBefore(IR::Instr::New(Js::OpCode::PSLLDQ, dst, dst, IR::IntConstOpnd::New(TySize[TyFloat64], TyInt8, m_func, true), m_func));
  320. // MOVSD dst, src1
  321. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVSD, dst, src1, m_func));
  322. }
  323. Assert(dst->IsRegOpnd() && dst->IsSimd128());
  324. IR::Instr* prevInstr;
  325. prevInstr = instr->m_prev;
  326. instr->Remove();
  327. return prevInstr;
  328. }
  329. IR::Instr* LowererMD::Simd128LowerLdLane(IR::Instr *instr)
  330. {
  331. IR::Opnd* dst, *src1, *src2;
  332. Js::OpCode movOpcode = Js::OpCode::MOVSS;
  333. uint laneSize = 0, laneIndex = 0;
  334. dst = instr->GetDst();
  335. src1 = instr->GetSrc1();
  336. src2 = instr->GetSrc2();
  337. Assert(dst && dst->IsRegOpnd() && (dst->GetType() == TyFloat32 || dst->GetType() == TyInt32 || dst->GetType() == TyFloat64));
  338. Assert(src1 && src1->IsRegOpnd() && src1->IsSimd128());
  339. Assert(src2 && src2->IsIntConstOpnd());
  340. laneIndex = (uint)src2->AsIntConstOpnd()->AsUint32();
  341. switch (instr->m_opcode)
  342. {
  343. case Js::OpCode::Simd128_ExtractLane_F4:
  344. laneSize = 4;
  345. movOpcode = Js::OpCode::MOVSS;
  346. Assert(laneIndex < 4);
  347. break;
  348. case Js::OpCode::Simd128_ExtractLane_I4:
  349. laneSize = 4;
  350. movOpcode = Js::OpCode::MOVD;
  351. Assert(laneIndex < 4);
  352. break;
  353. default:
  354. Assert(UNREACHED);
  355. }
  356. IR::Opnd* tmp = src1;
  357. if (laneIndex != 0)
  358. {
  359. // tmp = PSRLDQ src1, shamt
  360. tmp = IR::RegOpnd::New(src1->GetType(), m_func);
  361. IR::Instr *shiftInstr = IR::Instr::New(Js::OpCode::PSRLDQ, tmp, src1, IR::IntConstOpnd::New(laneSize * laneIndex, TyInt8, m_func, true), m_func);
  362. instr->InsertBefore(shiftInstr);
  363. //MakeDstEquSrc1(shiftInstr);
  364. Legalize(shiftInstr);
  365. }
  366. // MOVSS/MOVSD/MOVD dst, tmp
  367. instr->InsertBefore(IR::Instr::New(movOpcode, dst, tmp, m_func));
  368. IR::Instr* prevInstr = instr->m_prev;
  369. instr->Remove();
  370. return prevInstr;
  371. }
  372. IR::Instr* LowererMD::Simd128LowerSplat(IR::Instr *instr)
  373. {
  374. Js::OpCode shufOpCode = Js::OpCode::SHUFPS, movOpCode = Js::OpCode::MOVSS;
  375. IR::Opnd *dst, *src1;
  376. dst = instr->GetDst();
  377. src1 = instr->GetSrc1();
  378. Assert(dst && dst->IsRegOpnd() && dst->IsSimd128());
  379. Assert(src1 && src1->IsRegOpnd() && (src1->GetType() == TyFloat32 || src1->GetType() == TyInt32 || src1->GetType() == TyFloat64));
  380. Assert(!instr->GetSrc2());
  381. switch (instr->m_opcode)
  382. {
  383. case Js::OpCode::Simd128_Splat_F4:
  384. shufOpCode = Js::OpCode::SHUFPS;
  385. movOpCode = Js::OpCode::MOVSS;
  386. break;
  387. case Js::OpCode::Simd128_Splat_I4:
  388. shufOpCode = Js::OpCode::PSHUFD;
  389. movOpCode = Js::OpCode::MOVD;
  390. break;
  391. case Js::OpCode::Simd128_Splat_D2:
  392. shufOpCode = Js::OpCode::SHUFPD;
  393. movOpCode = Js::OpCode::MOVSD;
  394. break;
  395. default:
  396. Assert(UNREACHED);
  397. }
  398. if (instr->m_opcode == Js::OpCode::Simd128_Splat_F4 && instr->GetSrc1()->IsFloat64())
  399. {
  400. IR::RegOpnd *regOpnd32 = IR::RegOpnd::New(TyFloat32, this->m_func);
  401. // CVTSD2SS regOpnd32.f32, src.f64 -- Convert regOpnd from f64 to f32
  402. instr->InsertBefore(IR::Instr::New(Js::OpCode::CVTSD2SS, regOpnd32, src1, this->m_func));
  403. src1 = regOpnd32;
  404. }
  405. instr->InsertBefore(IR::Instr::New(movOpCode, dst, src1, m_func));
  406. instr->InsertBefore(IR::Instr::New(shufOpCode, dst, dst, IR::IntConstOpnd::New(0, TyInt8, m_func, true), m_func));
  407. IR::Instr* prevInstr = instr->m_prev;
  408. instr->Remove();
  409. return prevInstr;
  410. }
  411. IR::Instr* LowererMD::Simd128LowerRcp(IR::Instr *instr, bool removeInstr)
  412. {
  413. Js::OpCode opcode = Js::OpCode::DIVPS;
  414. void* x86_allones_mask = nullptr;
  415. IR::Opnd *dst, *src1;
  416. dst = instr->GetDst();
  417. src1 = instr->GetSrc1();
  418. Assert(dst && dst->IsRegOpnd());
  419. Assert(src1 && src1->IsRegOpnd());
  420. Assert(instr->GetSrc2() == nullptr);
  421. if (instr->m_opcode == Js::OpCode::Simd128_Rcp_F4 || instr->m_opcode == Js::OpCode::Simd128_RcpSqrt_F4)
  422. {
  423. Assert(src1->IsSimd128F4() || src1->IsSimd128I4());
  424. opcode = Js::OpCode::DIVPS;
  425. x86_allones_mask = (void*)(&X86_ALL_ONES_F4);
  426. }
  427. else
  428. {
  429. Assert(instr->m_opcode == Js::OpCode::Simd128_Rcp_D2 || instr->m_opcode == Js::OpCode::Simd128_RcpSqrt_D2);
  430. Assert(src1->IsSimd128D2());
  431. opcode = Js::OpCode::DIVPD;
  432. x86_allones_mask = (void*)(&X86_ALL_ONES_D2);
  433. }
  434. IR::RegOpnd* tmp = IR::RegOpnd::New(src1->GetType(), m_func);
  435. IR::Instr* movInstr = IR::Instr::New(Js::OpCode::MOVAPS, tmp, IR::MemRefOpnd::New(x86_allones_mask, src1->GetType(), m_func), m_func);
  436. instr->InsertBefore(movInstr);
  437. Legalize(movInstr);
  438. instr->InsertBefore(IR::Instr::New(opcode, tmp, tmp, src1, m_func));
  439. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVAPS, dst, tmp, m_func));
  440. if (removeInstr)
  441. {
  442. IR::Instr* prevInstr = instr->m_prev;
  443. instr->Remove();
  444. return prevInstr;
  445. }
  446. return instr;
  447. }
  448. IR::Instr* LowererMD::Simd128LowerSqrt(IR::Instr *instr)
  449. {
  450. Js::OpCode opcode = Js::OpCode::SQRTPS;
  451. IR::Opnd *dst, *src1;
  452. dst = instr->GetDst();
  453. src1 = instr->GetSrc1();
  454. Assert(dst && dst->IsRegOpnd());
  455. Assert(src1 && src1->IsRegOpnd());
  456. Assert(instr->GetSrc2() == nullptr);
  457. if (instr->m_opcode == Js::OpCode::Simd128_Sqrt_F4)
  458. {
  459. opcode = Js::OpCode::SQRTPS;
  460. }
  461. else
  462. {
  463. Assert(instr->m_opcode == Js::OpCode::Simd128_Sqrt_D2);
  464. opcode = Js::OpCode::SQRTPD;
  465. }
  466. instr->InsertBefore(IR::Instr::New(opcode, dst, src1, m_func));
  467. IR::Instr* prevInstr = instr->m_prev;
  468. instr->Remove();
  469. return prevInstr;
  470. }
  471. IR::Instr* LowererMD::Simd128LowerRcpSqrt(IR::Instr *instr)
  472. {
  473. Js::OpCode opcode = Js::OpCode::SQRTPS;
  474. Simd128LowerRcp(instr, false);
  475. if (instr->m_opcode == Js::OpCode::Simd128_RcpSqrt_F4)
  476. {
  477. opcode = Js::OpCode::SQRTPS;
  478. }
  479. else
  480. {
  481. Assert(instr->m_opcode == Js::OpCode::Simd128_RcpSqrt_D2);
  482. opcode = Js::OpCode::SQRTPD;
  483. }
  484. instr->InsertBefore(IR::Instr::New(opcode, instr->GetDst(), instr->GetDst(), m_func));
  485. IR::Instr* prevInstr = instr->m_prev;
  486. instr->Remove();
  487. return prevInstr;
  488. }
  489. IR::Instr* LowererMD::Simd128LowerSelect(IR::Instr *instr)
  490. {
  491. Assert(instr->m_opcode == Js::OpCode::Simd128_Select_F4 || instr->m_opcode == Js::OpCode::Simd128_Select_I4 || instr->m_opcode == Js::OpCode::Simd128_Select_D2);
  492. IR::Opnd* dst = nullptr;
  493. IR::Opnd* src1 = nullptr;
  494. IR::Opnd* src2 = nullptr;
  495. IR::Opnd* src3 = nullptr;
  496. SList<IR::Opnd*> *args = Simd128GetExtendedArgs(instr);
  497. // The number of src opnds should be exact. Missing opnds means type-error, and we should generate an exception throw instead (or globopt does).
  498. Assert(args->Count() == 4);
  499. dst = args->Pop();
  500. src1 = args->Pop(); // mask
  501. src2 = args->Pop(); // trueValue
  502. src3 = args->Pop(); // falseValue
  503. Assert(dst->IsRegOpnd() && dst->IsSimd128());
  504. Assert(src1->IsRegOpnd() && src1->IsSimd128());
  505. Assert(src2->IsRegOpnd() && src2->IsSimd128());
  506. Assert(src3->IsRegOpnd() && src3->IsSimd128());
  507. IR::RegOpnd *tmp = IR::RegOpnd::New(src1->GetType(), m_func);
  508. IR::Instr *pInstr = nullptr;
  509. // ANDPS tmp1, mask, tvalue
  510. pInstr = IR::Instr::New(Js::OpCode::ANDPS, tmp, src1, src2, m_func);
  511. instr->InsertBefore(pInstr);
  512. //MakeDstEquSrc1(pInstr);
  513. Legalize(pInstr);
  514. // ANDPS dst, mask, fvalue
  515. pInstr = IR::Instr::New(Js::OpCode::ANDNPS, dst, src1, src3, m_func);
  516. instr->InsertBefore(pInstr);
  517. //MakeDstEquSrc1(pInstr);
  518. Legalize(pInstr);
  519. // ORPS dst, dst, tmp1
  520. pInstr = IR::Instr::New(Js::OpCode::ORPS, dst, dst, tmp, m_func);
  521. instr->InsertBefore(pInstr);
  522. pInstr = instr->m_prev;
  523. instr->Remove();
  524. return pInstr;
  525. }
  526. IR::Instr* LowererMD::Simd128LowerNegI4(IR::Instr *instr)
  527. {
  528. Assert(instr->m_opcode == Js::OpCode::Simd128_Neg_I4);
  529. IR::Opnd* dst = instr->GetDst();
  530. IR::Opnd* src1 = instr->GetSrc1();
  531. Assert(dst->IsRegOpnd() && dst->IsSimd128());
  532. Assert(src1->IsRegOpnd() && src1->IsSimd128());
  533. Assert(instr->GetSrc2() == nullptr);
  534. // MOVAPS dst, src1
  535. IR::Instr *pInstr = IR::Instr::New(Js::OpCode::MOVAPS, dst, src1, m_func);
  536. instr->InsertBefore(pInstr);
  537. // XORPS dst, dst, 0xfff...f
  538. pInstr = IR::Instr::New(Js::OpCode::XORPS, dst, dst, IR::MemRefOpnd::New((void*)&X86_ALL_NEG_ONES, src1->GetType(), m_func), m_func);
  539. instr->InsertBefore(pInstr);
  540. Legalize(pInstr);
  541. // PADDD dst, dst, {1,1,1,1}
  542. pInstr = IR::Instr::New(Js::OpCode::PADDD, dst, dst, IR::MemRefOpnd::New((void*)&X86_ALL_ONES_I4, src1->GetType(), m_func), m_func);
  543. instr->InsertBefore(pInstr);
  544. Legalize(pInstr);
  545. pInstr = instr->m_prev;
  546. instr->Remove();
  547. return pInstr;
  548. }
  549. IR::Instr* LowererMD::Simd128LowerMulI4(IR::Instr *instr)
  550. {
  551. Assert(instr->m_opcode == Js::OpCode::Simd128_Mul_I4);
  552. IR::Instr *pInstr;
  553. IR::Opnd* dst = instr->GetDst();
  554. IR::Opnd* src1 = instr->GetSrc1();
  555. IR::Opnd* src2 = instr->GetSrc2();
  556. IR::Opnd* temp1, *temp2, *temp3;
  557. Assert(dst->IsRegOpnd() && dst->IsSimd128());
  558. Assert(src1->IsRegOpnd() && src1->IsSimd128());
  559. Assert(src2->IsRegOpnd() && src2->IsSimd128());
  560. temp1 = IR::RegOpnd::New(src1->GetType(), m_func);
  561. temp2 = IR::RegOpnd::New(src1->GetType(), m_func);
  562. temp3 = IR::RegOpnd::New(src1->GetType(), m_func);
  563. // temp1 = PMULUDQ src1, src2
  564. pInstr = IR::Instr::New(Js::OpCode::PMULUDQ, temp1, src1, src2, m_func);
  565. instr->InsertBefore(pInstr);
  566. //MakeDstEquSrc1(pInstr);
  567. Legalize(pInstr);
  568. // temp2 = PSLRD src1, 0x4
  569. pInstr = IR::Instr::New(Js::OpCode::PSRLDQ, temp2, src1, IR::IntConstOpnd::New(TySize[TyInt32], TyInt8, m_func, true), m_func);
  570. instr->InsertBefore(pInstr);
  571. //MakeDstEquSrc1(pInstr);
  572. Legalize(pInstr);
  573. // temp3 = PSLRD src2, 0x4
  574. pInstr = IR::Instr::New(Js::OpCode::PSRLDQ, temp3, src2, IR::IntConstOpnd::New(TySize[TyInt32], TyInt8, m_func, true), m_func);
  575. instr->InsertBefore(pInstr);
  576. //MakeDstEquSrc1(pInstr);
  577. Legalize(pInstr);
  578. // temp2 = PMULUDQ temp2, temp3
  579. pInstr = IR::Instr::New(Js::OpCode::PMULUDQ, temp2, temp2, temp3, m_func);
  580. instr->InsertBefore(pInstr);
  581. Legalize(pInstr);
  582. //PSHUFD temp1, temp1, 0x8
  583. instr->InsertBefore(IR::Instr::New(Js::OpCode::PSHUFD, temp1, temp1, IR::IntConstOpnd::New( 8 /*b00001000*/, TyInt8, m_func, true), m_func));
  584. //PSHUFD temp2, temp2, 0x8
  585. instr->InsertBefore(IR::Instr::New(Js::OpCode::PSHUFD, temp2, temp2, IR::IntConstOpnd::New(8 /*b00001000*/, TyInt8, m_func, true), m_func));
  586. // PUNPCKLDQ dst, temp1, temp2
  587. pInstr = IR::Instr::New(Js::OpCode::PUNPCKLDQ, dst, temp1, temp2, m_func);
  588. instr->InsertBefore(pInstr);
  589. Legalize(pInstr);
  590. pInstr = instr->m_prev;
  591. instr->Remove();
  592. return pInstr;
  593. }
  594. IR::Instr* LowererMD::SIMD128LowerReplaceLane(IR::Instr* instr)
  595. {
  596. SList<IR::Opnd*> *args = Simd128GetExtendedArgs(instr);
  597. int lane = 0, byteWidth = 0;
  598. IR::Opnd *dst = args->Pop();
  599. IR::Opnd *src1 = args->Pop();
  600. IR::Opnd *src2 = args->Pop();
  601. IR::Opnd *src3 = args->Pop();
  602. Assert(dst->IsSimd128() && src1->IsSimd128());
  603. IRType type = dst->GetType();
  604. lane = src2->AsIntConstOpnd()->AsInt32();
  605. IR::Opnd* laneValue = EnregisterIntConst(instr, src3);
  606. switch (instr->m_opcode)
  607. {
  608. case Js::OpCode::Simd128_ReplaceLane_I4:
  609. byteWidth = TySize[TyInt32];
  610. break;
  611. case Js::OpCode::Simd128_ReplaceLane_F4:
  612. byteWidth = TySize[TyFloat32];
  613. break;
  614. default:
  615. Assert(UNREACHED);
  616. }
  617. // MOVAPS dst, src1
  618. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVAPS, dst, src1, m_func));
  619. if (byteWidth == TySize[TyFloat32])
  620. {
  621. if (laneValue->GetType() == TyInt32)
  622. {
  623. IR::RegOpnd *tempReg = IR::RegOpnd::New(TyFloat32, m_func); //mov intval to xmm
  624. //MOVD
  625. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVD, tempReg, laneValue, m_func));
  626. laneValue = tempReg;
  627. }
  628. Assert(laneValue->GetType() == TyFloat32);
  629. if (lane == 0)
  630. {
  631. // MOVSS for both TyFloat32 and TyInt32. MOVD zeroes upper bits.
  632. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVSS, dst, laneValue, m_func));
  633. }
  634. else if (lane == 2)
  635. {
  636. IR::RegOpnd *tmp = IR::RegOpnd::New(type, m_func);
  637. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVHLPS, tmp, dst, m_func));
  638. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVSS, tmp, laneValue, m_func));
  639. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVLHPS, dst, tmp, m_func));
  640. }
  641. else
  642. {
  643. Assert(lane == 1 || lane == 3);
  644. uint8 shufMask = 0xE4; // 11 10 01 00
  645. shufMask |= lane; // 11 10 01 id
  646. shufMask &= ~(0x03 << (lane << 1)); // set 2 bits corresponding to lane index to 00
  647. // SHUFPS dst, dst, shufMask
  648. instr->InsertBefore(IR::Instr::New(Js::OpCode::SHUFPS, dst, dst, IR::IntConstOpnd::New(shufMask, TyInt8, m_func, true), m_func));
  649. // MOVSS dst, value
  650. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVSS, dst, laneValue, m_func));
  651. // SHUFPS dst, dst, shufMask
  652. instr->InsertBefore(IR::Instr::New(Js::OpCode::SHUFPS, dst, dst, IR::IntConstOpnd::New(shufMask, TyInt8, m_func, true), m_func));
  653. }
  654. }
  655. IR::Instr* prevInstr = instr->m_prev;
  656. instr->Remove();
  657. return prevInstr;
  658. }
  659. /*
  660. 4 and 2 lane Swizzle.
  661. */
  662. IR::Instr* LowererMD::Simd128LowerSwizzle4(IR::Instr* instr)
  663. {
  664. Js::OpCode shufOpcode = Js::OpCode::SHUFPS;
  665. Js::OpCode irOpcode = instr->m_opcode;
  666. SList<IR::Opnd*> *args = Simd128GetExtendedArgs(instr);
  667. IR::Opnd *dst = args->Pop();
  668. IR::Opnd *srcs[6] = { nullptr, nullptr, nullptr, nullptr, nullptr, nullptr };
  669. int i = 0;
  670. while (!args->Empty() && i < 6)
  671. {
  672. srcs[i++] = args->Pop();
  673. }
  674. int8 shufMask = 0;
  675. int lane0 = 0, lane1 = 0, lane2 = 0, lane3 = 0;
  676. IR::Instr *pInstr = instr->m_prev;
  677. Assert(dst->IsSimd128() && srcs[0] && srcs[0]->IsSimd128());
  678. // globOpt will type-spec if all lane indices are constants, and within range constraints to match a single SSE instruction
  679. Assert(irOpcode == Js::OpCode::Simd128_Swizzle_I4 || irOpcode == Js::OpCode::Simd128_Swizzle_F4 || irOpcode == Js::OpCode::Simd128_Swizzle_D2);
  680. AssertMsg(srcs[1] && srcs[1]->IsIntConstOpnd() &&
  681. srcs[2] && srcs[2]->IsIntConstOpnd() &&
  682. (irOpcode == Js::OpCode::Simd128_Swizzle_D2 || (srcs[3] && srcs[3]->IsIntConstOpnd())) &&
  683. (irOpcode == Js::OpCode::Simd128_Swizzle_D2 || (srcs[4] && srcs[4]->IsIntConstOpnd())), "Type-specialized swizzle is supported only with constant lane indices");
  684. if (irOpcode == Js::OpCode::Simd128_Swizzle_D2)
  685. {
  686. lane0 = srcs[1]->AsIntConstOpnd()->AsInt32();
  687. lane1 = srcs[2]->AsIntConstOpnd()->AsInt32();
  688. Assert(lane0 >= 0 && lane0 < 2);
  689. Assert(lane1 >= 0 && lane1 < 2);
  690. shufMask = (int8)((lane1 << 1) | lane0);
  691. shufOpcode = Js::OpCode::SHUFPD;
  692. }
  693. else
  694. {
  695. if (irOpcode == Js::OpCode::Simd128_Swizzle_I4)
  696. {
  697. shufOpcode = Js::OpCode::PSHUFD;
  698. }
  699. AnalysisAssert(srcs[3] != nullptr && srcs[4] != nullptr);
  700. lane0 = srcs[1]->AsIntConstOpnd()->AsInt32();
  701. lane1 = srcs[2]->AsIntConstOpnd()->AsInt32();
  702. lane2 = srcs[3]->AsIntConstOpnd()->AsInt32();
  703. lane3 = srcs[4]->AsIntConstOpnd()->AsInt32();
  704. Assert(lane1 >= 0 && lane1 < 4);
  705. Assert(lane2 >= 0 && lane2 < 4);
  706. Assert(lane2 >= 0 && lane2 < 4);
  707. Assert(lane3 >= 0 && lane3 < 4);
  708. shufMask = (int8)((lane3 << 6) | (lane2 << 4) | (lane1 << 2) | lane0);
  709. }
  710. instr->m_opcode = shufOpcode;
  711. instr->SetDst(dst);
  712. // MOVAPS dst, src1
  713. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVAPS, dst, srcs[0], m_func));
  714. // SHUF dst, dst, imm8
  715. instr->SetSrc1(dst);
  716. instr->SetSrc2(IR::IntConstOpnd::New((IntConstType)shufMask, TyInt8, m_func, true));
  717. return pInstr;
  718. }
  719. /*
  720. 4 lane shuffle. Handles arbitrary lane values.
  721. */
  722. IR::Instr* LowererMD::Simd128LowerShuffle4(IR::Instr* instr)
  723. {
  724. Js::OpCode irOpcode = instr->m_opcode;
  725. SList<IR::Opnd*> *args = Simd128GetExtendedArgs(instr);
  726. IR::Opnd *dst = args->Pop();
  727. IR::Opnd *srcs[6] = { nullptr, nullptr, nullptr, nullptr, nullptr, nullptr };
  728. int i = 0;
  729. while (!args->Empty() && i < 6)
  730. {
  731. srcs[i++] = args->Pop();
  732. }
  733. uint8 lanes[4], lanesSrc[4];
  734. uint fromSrc1, fromSrc2;
  735. IR::Instr *pInstr = instr->m_prev;
  736. Assert(dst->IsSimd128() && srcs[0] && srcs[0]->IsSimd128() && srcs[1] && srcs[1]->IsSimd128());
  737. Assert(irOpcode == Js::OpCode::Simd128_Shuffle_I4 || irOpcode == Js::OpCode::Simd128_Shuffle_F4);
  738. // globOpt will type-spec if all lane indices are constants, and within range constraints to match a single SSE instruction
  739. AssertMsg(srcs[2] && srcs[2]->IsIntConstOpnd() &&
  740. srcs[3] && srcs[3]->IsIntConstOpnd() &&
  741. srcs[4] && srcs[4]->IsIntConstOpnd() &&
  742. srcs[5] && srcs[5]->IsIntConstOpnd(), "Type-specialized shuffle is supported only with constant lane indices");
  743. lanes[0] = (uint8) srcs[2]->AsIntConstOpnd()->AsInt32();
  744. lanes[1] = (uint8) srcs[3]->AsIntConstOpnd()->AsInt32();
  745. lanes[2] = (uint8) srcs[4]->AsIntConstOpnd()->AsInt32();
  746. lanes[3] = (uint8) srcs[5]->AsIntConstOpnd()->AsInt32();
  747. Assert(lanes[0] >= 0 && lanes[0] < 8);
  748. Assert(lanes[1] >= 0 && lanes[1] < 8);
  749. Assert(lanes[2] >= 0 && lanes[2] < 8);
  750. Assert(lanes[3] >= 0 && lanes[3] < 8);
  751. CheckShuffleLanes4(lanes, lanesSrc, &fromSrc1, &fromSrc2);
  752. Assert(fromSrc1 + fromSrc2 == 4);
  753. if (fromSrc1 == 4 || fromSrc2 == 4)
  754. {
  755. // can be done with a swizzle
  756. IR::Opnd *srcOpnd = fromSrc1 == 4 ? srcs[0] : srcs[1];
  757. InsertShufps(lanes, dst, srcOpnd, srcOpnd, instr);
  758. }
  759. else if (fromSrc1 == 2)
  760. {
  761. if (lanes[0] < 4 && lanes[1] < 4)
  762. {
  763. // x86 friendly shuffle
  764. Assert(lanes[2] >= 4 && lanes[3] >= 4);
  765. InsertShufps(lanes, dst, srcs[0], srcs[1], instr);
  766. }
  767. else
  768. {
  769. // arbitrary shuffle with 2 lanes from each src
  770. uint8 ordLanes[4], reArrLanes[4];
  771. // order lanes based on which src they come from
  772. // compute re-arrangement mask
  773. for (uint8 i = 0, j1 = 0, j2 = 2; i < 4; i++)
  774. {
  775. if (lanesSrc[i] == 1)
  776. {
  777. ordLanes[j1] = lanes[i];
  778. reArrLanes[i] = j1;
  779. j1++;
  780. }
  781. else
  782. {
  783. Assert(lanesSrc[i] == 2);
  784. ordLanes[j2] = lanes[i];
  785. reArrLanes[i] = j2;
  786. j2++;
  787. }
  788. }
  789. IR::RegOpnd *temp = IR::RegOpnd::New(dst->GetType(), m_func);
  790. InsertShufps(ordLanes, temp, srcs[0], srcs[1], instr);
  791. InsertShufps(reArrLanes, dst, temp, temp, instr);
  792. }
  793. }
  794. else if (fromSrc1 == 3 || fromSrc2 == 3)
  795. {
  796. // shuffle with 3 lanes from one src, one from another
  797. IR::Instr *newInstr;
  798. IR::Opnd * majSrc, *minSrc;
  799. IR::RegOpnd *temp1 = IR::RegOpnd::New(dst->GetType(), m_func);
  800. IR::RegOpnd *temp2 = IR::RegOpnd::New(dst->GetType(), m_func);
  801. IR::RegOpnd *temp3 = IR::RegOpnd::New(dst->GetType(), m_func);
  802. uint8 minorityLane = 0, maxLaneValue;
  803. majSrc = fromSrc1 == 3 ? srcs[0] : srcs[1];
  804. minSrc = fromSrc1 == 3 ? srcs[1] : srcs[0];
  805. Assert(majSrc != minSrc);
  806. // Algorithm:
  807. // SHUFPS temp1, majSrc, lanes
  808. // SHUFPS temp2, minSrc, lanes
  809. // MOVUPS temp3, [minorityLane mask]
  810. // ANDPS temp2, temp3 // mask all lanes but minorityLane
  811. // ANDNPS temp3, temp1 // zero minorityLane
  812. // ORPS dst, temp2, temp3
  813. // find minorityLane to mask
  814. maxLaneValue = minSrc == srcs[0] ? 4 : 8;
  815. for (uint8 i = 0; i < 4; i++)
  816. {
  817. if (lanes[i] >= (maxLaneValue - 4) && lanes[i] < maxLaneValue)
  818. {
  819. minorityLane = i;
  820. break;
  821. }
  822. }
  823. IR::MemRefOpnd * laneMask = IR::MemRefOpnd::New((void*)&X86_4LANES_MASKS[minorityLane], dst->GetType(), m_func);
  824. InsertShufps(lanes, temp1, majSrc, majSrc, instr);
  825. InsertShufps(lanes, temp2, minSrc, minSrc, instr);
  826. newInstr = IR::Instr::New(Js::OpCode::MOVUPS, temp3, laneMask, m_func);
  827. instr->InsertBefore(newInstr);
  828. Legalize(newInstr);
  829. newInstr = IR::Instr::New(Js::OpCode::ANDPS, temp2, temp2, temp3, m_func);
  830. instr->InsertBefore(newInstr);
  831. Legalize(newInstr);
  832. newInstr = IR::Instr::New(Js::OpCode::ANDNPS, temp3, temp3, temp1, m_func);
  833. instr->InsertBefore(newInstr);
  834. Legalize(newInstr);
  835. newInstr = IR::Instr::New(Js::OpCode::ORPS, dst, temp2, temp3, m_func);
  836. instr->InsertBefore(newInstr);
  837. Legalize(newInstr);
  838. }
  839. instr->Remove();
  840. return pInstr;
  841. }
  842. IR::Instr* LowererMD::Simd128LowerInt32x4FromFloat32x4(IR::Instr *instr)
  843. {
  844. IR::Opnd *dst, *src, *tmp, *tmp2, *mask1, *mask2;
  845. IR::Instr *insertInstr, *pInstr, *newInstr;
  846. IR::LabelInstr *doneLabel;
  847. dst = instr->GetDst();
  848. src = instr->GetSrc1();
  849. Assert(dst != nullptr && src != nullptr && dst->IsSimd128() && src->IsSimd128());
  850. // CVTTPS2DQ dst, src
  851. instr->m_opcode = Js::OpCode::CVTTPS2DQ;
  852. insertInstr = instr->m_next;
  853. pInstr = instr->m_prev;
  854. doneLabel = IR::LabelInstr::New(Js::OpCode::Label, this->m_func);
  855. mask1 = IR::RegOpnd::New(TyInt32, m_func);
  856. mask2 = IR::RegOpnd::New(TyInt32, m_func);
  857. // bound checks
  858. // check if any value is potentially out of range (0x80000000 in output)
  859. // PCMPEQD tmp, dst, X86_NEG_MASK (0x80000000)
  860. // MOVMSKPS mask1, tmp
  861. // CMP mask1, 0
  862. // JNE $doneLabel
  863. tmp = IR::RegOpnd::New(TySimd128I4, m_func);
  864. tmp2 = IR::RegOpnd::New(TySimd128I4, m_func);
  865. newInstr = IR::Instr::New(Js::OpCode::MOVAPS, tmp2, IR::MemRefOpnd::New((void*)&X86_NEG_MASK_F4, TySimd128I4, m_func), m_func);
  866. insertInstr->InsertBefore(newInstr);
  867. Legalize(newInstr);
  868. newInstr = IR::Instr::New(Js::OpCode::PCMPEQD, tmp, dst, tmp2, m_func);
  869. insertInstr->InsertBefore(newInstr);
  870. Legalize(newInstr);
  871. insertInstr->InsertBefore(IR::Instr::New(Js::OpCode::MOVMSKPS, mask1, tmp, m_func));
  872. newInstr = IR::Instr::New(Js::OpCode::CMP, m_func);
  873. newInstr->SetSrc1(mask1);
  874. newInstr->SetSrc2(IR::IntConstOpnd::New(0, TyInt32, m_func));
  875. insertInstr->InsertBefore(newInstr);
  876. insertInstr->InsertBefore(IR::BranchInstr::New(Js::OpCode::JEQ, doneLabel, m_func));
  877. // we have potential out of bound. check bounds
  878. // MOVAPS tmp2, X86_TWO_31_F4 (0x4f000000)
  879. // CMPLEPS tmp, tmp2, src
  880. // MOVMSKPS mask1, tmp
  881. // MOVAPS tmp2, X86_NEG_TWO_31_F4 (0xcf000000)
  882. // CMPLTPS tmp, src, tmp2
  883. // MOVMSKPS mask2, tmp
  884. // OR mask1, mask1, mask2
  885. // CMP mask1, 0
  886. // JNE $doneLabel
  887. newInstr = IR::Instr::New(Js::OpCode::MOVAPS, tmp2, IR::MemRefOpnd::New((void*)&X86_TWO_31_F4, TySimd128I4, m_func), m_func);
  888. insertInstr->InsertBefore(newInstr);
  889. Legalize(newInstr);
  890. newInstr = IR::Instr::New(Js::OpCode::CMPLEPS, tmp, tmp2, src, m_func);
  891. insertInstr->InsertBefore(newInstr);
  892. Legalize(newInstr);
  893. insertInstr->InsertBefore(IR::Instr::New(Js::OpCode::MOVMSKPS, mask1, tmp, m_func));
  894. newInstr = IR::Instr::New(Js::OpCode::MOVAPS, tmp2, IR::MemRefOpnd::New((void*)&X86_NEG_TWO_31_F4, TySimd128I4, m_func), m_func);
  895. insertInstr->InsertBefore(newInstr);
  896. Legalize(newInstr);
  897. newInstr = IR::Instr::New(Js::OpCode::CMPLTPS, tmp, src, tmp2, m_func);
  898. insertInstr->InsertBefore(newInstr);
  899. Legalize(newInstr);
  900. insertInstr->InsertBefore(IR::Instr::New(Js::OpCode::MOVMSKPS, mask2, tmp, m_func));
  901. insertInstr->InsertBefore(IR::Instr::New(Js::OpCode::OR, mask1, mask1, mask2, m_func));
  902. newInstr = IR::Instr::New(Js::OpCode::CMP, m_func);
  903. newInstr->SetSrc1(mask1);
  904. newInstr->SetSrc2(IR::IntConstOpnd::New(0, TyInt32, m_func));
  905. insertInstr->InsertBefore(newInstr);
  906. insertInstr->InsertBefore(IR::BranchInstr::New(Js::OpCode::JEQ, doneLabel, m_func));
  907. // throw range error
  908. m_lowerer->GenerateRuntimeError(insertInstr, JSERR_ArgumentOutOfRange, IR::HelperOp_RuntimeRangeError);
  909. insertInstr->InsertBefore(doneLabel);
  910. return pInstr;
  911. }
  912. IR::Instr* LowererMD::Simd128AsmJsLowerLoadElem(IR::Instr *instr)
  913. {
  914. Assert(instr->m_opcode == Js::OpCode::Simd128_LdArr_I4 ||
  915. instr->m_opcode == Js::OpCode::Simd128_LdArr_F4 ||
  916. instr->m_opcode == Js::OpCode::Simd128_LdArr_D2 ||
  917. instr->m_opcode == Js::OpCode::Simd128_LdArrConst_I4 ||
  918. instr->m_opcode == Js::OpCode::Simd128_LdArrConst_F4 ||
  919. instr->m_opcode == Js::OpCode::Simd128_LdArrConst_D2
  920. );
  921. IR::Instr * instrPrev = instr->m_prev;
  922. IR::RegOpnd * indexOpnd = instr->GetSrc1()->AsIndirOpnd()->GetIndexOpnd();
  923. IR::RegOpnd * baseOpnd = instr->GetSrc1()->AsIndirOpnd()->GetBaseOpnd();
  924. IR::Opnd * dst = instr->GetDst();
  925. IR::Opnd * src1 = instr->GetSrc1();
  926. IR::Opnd * src2 = instr->GetSrc2();
  927. ValueType arrType = baseOpnd->GetValueType();
  928. uint8 dataWidth = instr->dataWidth;
  929. // Type-specialized.
  930. Assert(dst->IsSimd128() && src1->IsSimd128() && src2->GetType() == TyUint32);
  931. IR::Instr * done;
  932. if (indexOpnd || (((uint32)src1->AsIndirOpnd()->GetOffset() + dataWidth) > 0x1000000 /* 16 MB */))
  933. {
  934. uint32 bpe = Simd128GetTypedArrBytesPerElem(arrType);
  935. // bound check and helper
  936. done = this->lowererMDArch.LowerAsmJsLdElemHelper(instr, true, bpe != dataWidth);
  937. }
  938. else
  939. {
  940. // Reaching here means:
  941. // We have a constant index, and either
  942. // (1) constant heap or (2) variable heap with constant index < 16MB.
  943. // Case (1) requires static bound check. Case (2) means we are always in bound.
  944. // this can happen in cases where globopt props a constant access which was not known at bytecodegen time or when heap is non-constant
  945. if (src2->IsIntConstOpnd() && ((uint32)src1->AsIndirOpnd()->GetOffset() + dataWidth > src2->AsIntConstOpnd()->AsUint32()))
  946. {
  947. m_lowerer->GenerateRuntimeError(instr, JSERR_ArgumentOutOfRange, IR::HelperOp_RuntimeRangeError);
  948. instr->Remove();
  949. return instrPrev;
  950. }
  951. done = instr;
  952. }
  953. return Simd128ConvertToLoad(dst, src1, dataWidth, instr);
  954. }
  955. IR::Instr* LowererMD::Simd128LowerLoadElem(IR::Instr *instr)
  956. {
  957. Assert(!m_func->m_workItem->GetFunctionBody()->GetIsAsmjsMode());
  958. Assert(instr->m_opcode == Js::OpCode::Simd128_LdArr_I4 || instr->m_opcode == Js::OpCode::Simd128_LdArr_F4);
  959. IR::Opnd * src = instr->GetSrc1();
  960. IR::RegOpnd * indexOpnd =src->AsIndirOpnd()->GetIndexOpnd();
  961. IR::Opnd * dst = instr->GetDst();
  962. ValueType arrType = src->AsIndirOpnd()->GetBaseOpnd()->GetValueType();
  963. // If we type-specialized, then array is a definite typed-array.
  964. Assert(arrType.IsObject() && arrType.IsTypedArray());
  965. Simd128GenerateUpperBoundCheck(indexOpnd, src->AsIndirOpnd(), arrType, instr);
  966. Simd128LoadHeadSegment(src->AsIndirOpnd(), arrType, instr);
  967. return Simd128ConvertToLoad(dst, src, instr->dataWidth, instr, m_lowerer->GetArrayIndirScale(arrType) /* scale factor */);
  968. }
  969. IR::Instr *
  970. LowererMD::Simd128ConvertToLoad(IR::Opnd *dst, IR::Opnd *src, uint8 dataWidth, IR::Instr* instr, byte scaleFactor /* = 0*/)
  971. {
  972. IR::Instr *newInstr = nullptr;
  973. IR::Instr * instrPrev = instr->m_prev;
  974. // Type-specialized.
  975. Assert(dst && dst->IsSimd128());
  976. Assert(src->IsIndirOpnd());
  977. if (scaleFactor > 0)
  978. {
  979. // needed only for non-Asmjs code
  980. Assert(!m_func->m_workItem->GetFunctionBody()->GetIsAsmjsMode());
  981. src->AsIndirOpnd()->SetScale(scaleFactor);
  982. }
  983. switch (dataWidth)
  984. {
  985. case 16:
  986. // MOVUPS dst, src1([arrayBuffer + indexOpnd])
  987. newInstr = IR::Instr::New(LowererMDArch::GetAssignOp(src->GetType()), dst, src, instr->m_func);
  988. instr->InsertBefore(newInstr);
  989. Legalize(newInstr);
  990. break;
  991. case 12:
  992. {
  993. IR::RegOpnd *temp = IR::RegOpnd::New(src->GetType(), instr->m_func);
  994. // MOVSD dst, src1([arrayBuffer + indexOpnd])
  995. newInstr = IR::Instr::New(Js::OpCode::MOVSD, dst, src, instr->m_func);
  996. instr->InsertBefore(newInstr);
  997. Legalize(newInstr);
  998. // MOVSS temp, src1([arrayBuffer + indexOpnd + 8])
  999. newInstr = IR::Instr::New(Js::OpCode::MOVSS, temp, src, instr->m_func);
  1000. instr->InsertBefore(newInstr);
  1001. newInstr->GetSrc1()->AsIndirOpnd()->SetOffset(src->AsIndirOpnd()->GetOffset() + 8, true);
  1002. Legalize(newInstr);
  1003. // PSLLDQ temp, 0x08
  1004. instr->InsertBefore(IR::Instr::New(Js::OpCode::PSLLDQ, temp, temp, IR::IntConstOpnd::New(8, TyInt8, instr->m_func, true), instr->m_func));
  1005. // ORPS dst, temp
  1006. newInstr = IR::Instr::New(Js::OpCode::ORPS, dst, dst, temp, instr->m_func);
  1007. instr->InsertBefore(newInstr);
  1008. Legalize(newInstr);
  1009. break;
  1010. }
  1011. case 8:
  1012. // MOVSD dst, src1([arrayBuffer + indexOpnd])
  1013. newInstr = IR::Instr::New(Js::OpCode::MOVSD, dst, src, instr->m_func);
  1014. instr->InsertBefore(newInstr);
  1015. Legalize(newInstr);
  1016. break;
  1017. case 4:
  1018. // MOVSS dst, src1([arrayBuffer + indexOpnd])
  1019. newInstr = IR::Instr::New(Js::OpCode::MOVSS, dst, src, instr->m_func);
  1020. instr->InsertBefore(newInstr);
  1021. Legalize(newInstr);
  1022. break;
  1023. default:
  1024. Assume(UNREACHED);
  1025. }
  1026. instr->Remove();
  1027. return instrPrev;
  1028. }
  1029. IR::Instr* LowererMD::Simd128AsmJsLowerStoreElem(IR::Instr *instr)
  1030. {
  1031. Assert(instr->m_opcode == Js::OpCode::Simd128_StArr_I4 ||
  1032. instr->m_opcode == Js::OpCode::Simd128_StArr_F4 ||
  1033. instr->m_opcode == Js::OpCode::Simd128_StArr_D2 ||
  1034. instr->m_opcode == Js::OpCode::Simd128_StArrConst_I4 ||
  1035. instr->m_opcode == Js::OpCode::Simd128_StArrConst_F4 ||
  1036. instr->m_opcode == Js::OpCode::Simd128_StArrConst_D2
  1037. );
  1038. IR::Instr * instrPrev = instr->m_prev;
  1039. IR::RegOpnd * indexOpnd = instr->GetDst()->AsIndirOpnd()->GetIndexOpnd();
  1040. IR::RegOpnd * baseOpnd = instr->GetDst()->AsIndirOpnd()->GetBaseOpnd();
  1041. IR::Opnd * dst = instr->GetDst();
  1042. IR::Opnd * src1 = instr->GetSrc1();
  1043. IR::Opnd * src2 = instr->GetSrc2();
  1044. ValueType arrType = baseOpnd->GetValueType();
  1045. uint8 dataWidth = instr->dataWidth;
  1046. // Type-specialized.
  1047. Assert(dst->IsSimd128() && src1->IsSimd128() && src2->GetType() == TyUint32);
  1048. IR::Instr * done;
  1049. if (indexOpnd || ((uint32)dst->AsIndirOpnd()->GetOffset() + dataWidth > 0x1000000))
  1050. {
  1051. // CMP indexOpnd, src2(arrSize)
  1052. // JA $helper
  1053. // JMP $store
  1054. // $helper:
  1055. // Throw RangeError
  1056. // JMP $done
  1057. // $store:
  1058. // MOV dst([arrayBuffer + indexOpnd]), src1
  1059. // $done:
  1060. uint32 bpe = Simd128GetTypedArrBytesPerElem(arrType);
  1061. done = this->lowererMDArch.LowerAsmJsStElemHelper(instr, true, bpe != dataWidth);
  1062. }
  1063. else
  1064. {
  1065. // we might have a constant index if globopt propped a constant store. we can ahead of time check if it is in-bounds
  1066. if (src2->IsIntConstOpnd() && ((uint32)dst->AsIndirOpnd()->GetOffset() + dataWidth > src2->AsIntConstOpnd()->AsUint32()))
  1067. {
  1068. m_lowerer->GenerateRuntimeError(instr, JSERR_ArgumentOutOfRange, IR::HelperOp_RuntimeRangeError);
  1069. instr->Remove();
  1070. return instrPrev;
  1071. }
  1072. done = instr;
  1073. }
  1074. return Simd128ConvertToStore(dst, src1, dataWidth, instr);
  1075. }
  1076. IR::Instr* LowererMD::Simd128LowerStoreElem(IR::Instr *instr)
  1077. {
  1078. Assert(!m_func->m_workItem->GetFunctionBody()->GetIsAsmjsMode());
  1079. Assert(instr->m_opcode == Js::OpCode::Simd128_StArr_I4 || instr->m_opcode == Js::OpCode::Simd128_StArr_F4);
  1080. IR::Opnd * dst = instr->GetDst();
  1081. IR::RegOpnd * indexOpnd = dst->AsIndirOpnd()->GetIndexOpnd();
  1082. IR::Opnd * src1 = instr->GetSrc1();
  1083. uint8 dataWidth = instr->dataWidth;
  1084. ValueType arrType = dst->AsIndirOpnd()->GetBaseOpnd()->GetValueType();
  1085. // If we type-specialized, then array is a definite type-array.
  1086. Assert(arrType.IsObject() && arrType.IsTypedArray());
  1087. Simd128GenerateUpperBoundCheck(indexOpnd, dst->AsIndirOpnd(), arrType, instr);
  1088. Simd128LoadHeadSegment(dst->AsIndirOpnd(), arrType, instr);
  1089. return Simd128ConvertToStore(dst, src1, dataWidth, instr, m_lowerer->GetArrayIndirScale(arrType) /*scale factor*/);
  1090. }
  1091. IR::Instr *
  1092. LowererMD::Simd128ConvertToStore(IR::Opnd *dst, IR::Opnd *src1, uint8 dataWidth, IR::Instr* instr, byte scaleFactor /* = 0 */)
  1093. {
  1094. IR::Instr * instrPrev = instr->m_prev;
  1095. Assert(src1 && src1->IsSimd128());
  1096. Assert(dst->IsIndirOpnd());
  1097. if (scaleFactor > 0)
  1098. {
  1099. // needed only for non-Asmjs code
  1100. Assert(!m_func->m_workItem->GetFunctionBody()->GetIsAsmjsMode());
  1101. dst->AsIndirOpnd()->SetScale(scaleFactor);
  1102. }
  1103. switch (dataWidth)
  1104. {
  1105. case 16:
  1106. // MOVUPS dst([arrayBuffer + indexOpnd]), src1
  1107. instr->InsertBefore(IR::Instr::New(LowererMDArch::GetAssignOp(src1->GetType()), dst, src1, instr->m_func));
  1108. break;
  1109. case 12:
  1110. {
  1111. IR::RegOpnd *temp = IR::RegOpnd::New(src1->GetType(), instr->m_func);
  1112. IR::Instr *movss;
  1113. // MOVAPS temp, src
  1114. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVAPS, temp, src1, instr->m_func));
  1115. // MOVSD dst([arrayBuffer + indexOpnd]), temp
  1116. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVSD, dst, temp, instr->m_func));
  1117. // PSRLDQ temp, 0x08
  1118. instr->InsertBefore(IR::Instr::New(Js::OpCode::PSRLDQ, temp, temp, IR::IntConstOpnd::New(8, TyInt8, m_func, true), instr->m_func));
  1119. // MOVSS dst([arrayBuffer + indexOpnd + 8]), temp
  1120. movss = IR::Instr::New(Js::OpCode::MOVSS, dst, temp, instr->m_func);
  1121. instr->InsertBefore(movss);
  1122. movss->GetDst()->AsIndirOpnd()->SetOffset(dst->AsIndirOpnd()->GetOffset() + 8, true);
  1123. break;
  1124. }
  1125. case 8:
  1126. // MOVSD dst([arrayBuffer + indexOpnd]), src1
  1127. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVSD, dst, src1, instr->m_func));
  1128. break;
  1129. case 4:
  1130. // MOVSS dst([arrayBuffer + indexOpnd]), src1
  1131. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVSS, dst, src1, instr->m_func));
  1132. break;
  1133. default:;
  1134. Assume(UNREACHED);
  1135. }
  1136. instr->Remove();
  1137. return instrPrev;
  1138. }
  1139. void
  1140. LowererMD::Simd128GenerateUpperBoundCheck(IR::RegOpnd *indexOpnd, IR::IndirOpnd *indirOpnd, ValueType arrType, IR::Instr *instr)
  1141. {
  1142. Assert(!m_func->m_workItem->GetFunctionBody()->GetIsAsmjsMode());
  1143. IR::ArrayRegOpnd *arrayRegOpnd = indirOpnd->GetBaseOpnd()->AsArrayRegOpnd();
  1144. IR::Opnd* headSegmentLengthOpnd;
  1145. if (arrayRegOpnd->EliminatedUpperBoundCheck())
  1146. {
  1147. // already eliminated or extracted by globOpt (OptArraySrc). Nothing to do.
  1148. return;
  1149. }
  1150. if (arrayRegOpnd->HeadSegmentLengthSym())
  1151. {
  1152. headSegmentLengthOpnd = IR::RegOpnd::New(arrayRegOpnd->HeadSegmentLengthSym(), TyUint32, m_func);
  1153. }
  1154. else
  1155. {
  1156. // (headSegmentLength = [base + offset(length)])
  1157. int lengthOffset;
  1158. lengthOffset = m_lowerer->GetArrayOffsetOfLength(arrType);
  1159. headSegmentLengthOpnd = IR::IndirOpnd::New(arrayRegOpnd, lengthOffset, TyUint32, m_func);
  1160. }
  1161. IR::LabelInstr * skipLabel = Lowerer::InsertLabel(false, instr);
  1162. int32 elemCount = Lowerer::SimdGetElementCountFromBytes(arrayRegOpnd->GetValueType(), instr->dataWidth);
  1163. if (indexOpnd)
  1164. {
  1165. // MOV tmp, elemCount
  1166. // ADD tmp, index
  1167. // CMP tmp, Length -- upper bound check
  1168. // JBE $storeLabel
  1169. // Throw RuntimeError
  1170. // skipLabel:
  1171. IR::RegOpnd *tmp = IR::RegOpnd::New(indexOpnd->GetType(), m_func);
  1172. IR::IntConstOpnd *elemCountOpnd = IR::IntConstOpnd::New(elemCount, TyInt8, m_func, true);
  1173. m_lowerer->InsertMove(tmp, elemCountOpnd, skipLabel);
  1174. Lowerer::InsertAdd(false, tmp, tmp, indexOpnd, skipLabel);
  1175. m_lowerer->InsertCompareBranch(tmp, headSegmentLengthOpnd, Js::OpCode::BrLe_A, true, skipLabel, skipLabel);
  1176. }
  1177. else
  1178. {
  1179. // CMP Length, (offset + elemCount)
  1180. // JA $storeLabel
  1181. int32 offset = indirOpnd->GetOffset();
  1182. int32 index = offset + elemCount;
  1183. m_lowerer->InsertCompareBranch(headSegmentLengthOpnd, IR::IntConstOpnd::New(index, TyInt32, m_func, true), Js::OpCode::BrLe_A, true, skipLabel, skipLabel);
  1184. }
  1185. m_lowerer->GenerateRuntimeError(skipLabel, JSERR_ArgumentOutOfRange, IR::HelperOp_RuntimeRangeError);
  1186. return;
  1187. }
  1188. void
  1189. LowererMD::Simd128LoadHeadSegment(IR::IndirOpnd *indirOpnd, ValueType arrType, IR::Instr *instr)
  1190. {
  1191. // For non-asm.js we check if headSeg symbol exists, else load it.
  1192. IR::ArrayRegOpnd *arrayRegOpnd = indirOpnd->GetBaseOpnd()->AsArrayRegOpnd();
  1193. IR::RegOpnd *headSegmentOpnd;
  1194. if (arrayRegOpnd->HeadSegmentSym())
  1195. {
  1196. headSegmentOpnd = IR::RegOpnd::New(arrayRegOpnd->HeadSegmentSym(), TyMachPtr, m_func);
  1197. }
  1198. else
  1199. {
  1200. // REVIEW: Is this needed ? Shouldn't globOpt make sure headSegSym is set and alive ?
  1201. // MOV headSegment, [base + offset(head)]
  1202. int32 headOffset = m_lowerer->GetArrayOffsetOfHeadSegment(arrType);
  1203. IR::IndirOpnd * indirOpnd = IR::IndirOpnd::New(arrayRegOpnd, headOffset, TyMachPtr, this->m_func);
  1204. headSegmentOpnd = IR::RegOpnd::New(TyMachPtr, this->m_func);
  1205. m_lowerer->InsertMove(headSegmentOpnd, indirOpnd, instr);
  1206. }
  1207. // change base to be the head segment instead of the array object
  1208. indirOpnd->SetBaseOpnd(headSegmentOpnd);
  1209. }
  1210. // Builds args list <dst, src1, src2, src3 ..>
  1211. SList<IR::Opnd*> * LowererMD::Simd128GetExtendedArgs(IR::Instr *instr)
  1212. {
  1213. SList<IR::Opnd*> * args = JitAnew(m_lowerer->m_alloc, SList<IR::Opnd*>, m_lowerer->m_alloc);
  1214. IR::Instr *pInstr = instr;
  1215. IR::Opnd *dst, *src1, *src2;
  1216. dst = src1 = src2 = nullptr;
  1217. if (pInstr->GetDst())
  1218. {
  1219. dst = pInstr->UnlinkDst();
  1220. }
  1221. src1 = pInstr->UnlinkSrc1();
  1222. Assert(src1->GetStackSym()->IsSingleDef());
  1223. pInstr = src1->GetStackSym()->GetInstrDef();
  1224. while (pInstr && pInstr->m_opcode == Js::OpCode::ExtendArg_A)
  1225. {
  1226. Assert(pInstr->GetSrc1());
  1227. src1 = pInstr->GetSrc1()->Copy(this->m_func);
  1228. if (src1->IsRegOpnd())
  1229. {
  1230. this->m_lowerer->addToLiveOnBackEdgeSyms->Set(src1->AsRegOpnd()->m_sym->m_id);
  1231. }
  1232. args->Push(src1);
  1233. if (pInstr->GetSrc2())
  1234. {
  1235. src2 = pInstr->GetSrc2();
  1236. Assert(src2->GetStackSym()->IsSingleDef());
  1237. pInstr = src2->GetStackSym()->GetInstrDef();
  1238. }
  1239. else
  1240. {
  1241. pInstr = nullptr;
  1242. }
  1243. }
  1244. args->Push(dst);
  1245. Assert(args->Count() > 3);
  1246. return args;
  1247. }
  1248. IR::Opnd* LowererMD::EnregisterIntConst(IR::Instr* instr, IR::Opnd *constOpnd)
  1249. {
  1250. if (constOpnd->IsRegOpnd())
  1251. {
  1252. // already a register
  1253. return constOpnd;
  1254. }
  1255. Assert(constOpnd->GetType() == TyInt32);
  1256. IR::RegOpnd *tempReg = IR::RegOpnd::New(TyInt32, m_func);
  1257. // MOV tempReg, constOpnd
  1258. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOV, tempReg, constOpnd, m_func));
  1259. return tempReg;
  1260. }
  1261. void LowererMD::Simd128InitOpcodeMap()
  1262. {
  1263. m_simd128OpCodesMap = JitAnewArrayZ(m_lowerer->m_alloc, Js::OpCode, Js::Simd128OpcodeCount());
  1264. // All simd ops should be contiguous for this mapping to work
  1265. Assert(Js::OpCode::Simd128_End + (Js::OpCode) 1 == Js::OpCode::Simd128_Start_Extend);
  1266. SET_SIMDOPCODE(Simd128_FromFloat64x2_I4 , CVTTPD2DQ);
  1267. SET_SIMDOPCODE(Simd128_FromFloat64x2Bits_I4 , MOVAPS);
  1268. SET_SIMDOPCODE(Simd128_FromFloat32x4Bits_I4 , MOVAPS);
  1269. SET_SIMDOPCODE(Simd128_Add_I4 , PADDD);
  1270. SET_SIMDOPCODE(Simd128_Sub_I4 , PSUBD);
  1271. SET_SIMDOPCODE(Simd128_Lt_I4 , PCMPGTD);
  1272. SET_SIMDOPCODE(Simd128_Gt_I4 , PCMPGTD);
  1273. SET_SIMDOPCODE(Simd128_Eq_I4 , PCMPEQD);
  1274. SET_SIMDOPCODE(Simd128_And_I4 , PAND);
  1275. SET_SIMDOPCODE(Simd128_Or_I4 , POR);
  1276. SET_SIMDOPCODE(Simd128_Xor_I4 , XORPS);
  1277. SET_SIMDOPCODE(Simd128_Not_I4 , XORPS);
  1278. SET_SIMDOPCODE(Simd128_LdSignMask_I4 , MOVMSKPS);
  1279. SET_SIMDOPCODE(Simd128_FromFloat64x2_F4 , CVTPD2PS);
  1280. SET_SIMDOPCODE(Simd128_FromFloat64x2Bits_F4 , MOVAPS);
  1281. SET_SIMDOPCODE(Simd128_FromInt32x4_F4 , CVTDQ2PS);
  1282. SET_SIMDOPCODE(Simd128_FromInt32x4Bits_F4 , MOVAPS);
  1283. SET_SIMDOPCODE(Simd128_Abs_F4 , ANDPS);
  1284. SET_SIMDOPCODE(Simd128_Neg_F4 , XORPS);
  1285. SET_SIMDOPCODE(Simd128_Add_F4 , ADDPS);
  1286. SET_SIMDOPCODE(Simd128_Sub_F4 , SUBPS);
  1287. SET_SIMDOPCODE(Simd128_Mul_F4 , MULPS);
  1288. SET_SIMDOPCODE(Simd128_Div_F4 , DIVPS);
  1289. SET_SIMDOPCODE(Simd128_Min_F4 , MINPS);
  1290. SET_SIMDOPCODE(Simd128_Max_F4 , MAXPS);
  1291. SET_SIMDOPCODE(Simd128_Sqrt_F4 , SQRTPS);
  1292. SET_SIMDOPCODE(Simd128_Lt_F4 , CMPLTPS); // CMPLTPS
  1293. SET_SIMDOPCODE(Simd128_LtEq_F4 , CMPLEPS); // CMPLEPS
  1294. SET_SIMDOPCODE(Simd128_Eq_F4 , CMPEQPS); // CMPEQPS
  1295. SET_SIMDOPCODE(Simd128_Neq_F4 , CMPNEQPS); // CMPNEQPS
  1296. SET_SIMDOPCODE(Simd128_Gt_F4 , CMPLTPS); // CMPLTPS (swap srcs)
  1297. SET_SIMDOPCODE(Simd128_GtEq_F4 , CMPLEPS); // CMPLEPS (swap srcs)
  1298. SET_SIMDOPCODE(Simd128_And_F4 , ANDPS);
  1299. SET_SIMDOPCODE(Simd128_Or_F4 , ORPS);
  1300. SET_SIMDOPCODE(Simd128_Xor_F4 , XORPS );
  1301. SET_SIMDOPCODE(Simd128_Not_F4 , XORPS );
  1302. SET_SIMDOPCODE(Simd128_LdSignMask_F4 , MOVMSKPS );
  1303. SET_SIMDOPCODE(Simd128_FromFloat32x4_D2 , CVTPS2PD);
  1304. SET_SIMDOPCODE(Simd128_FromFloat32x4Bits_D2 , MOVAPS);
  1305. SET_SIMDOPCODE(Simd128_FromInt32x4_D2 , CVTDQ2PD);
  1306. SET_SIMDOPCODE(Simd128_FromInt32x4Bits_D2 , MOVAPS);
  1307. SET_SIMDOPCODE(Simd128_Neg_D2 , XORPS);
  1308. SET_SIMDOPCODE(Simd128_Add_D2 , ADDPD);
  1309. SET_SIMDOPCODE(Simd128_Abs_D2 , ANDPD);
  1310. SET_SIMDOPCODE(Simd128_Sub_D2 , SUBPD);
  1311. SET_SIMDOPCODE(Simd128_Mul_D2 , MULPD);
  1312. SET_SIMDOPCODE(Simd128_Div_D2 , DIVPD);
  1313. SET_SIMDOPCODE(Simd128_Min_D2 , MINPD);
  1314. SET_SIMDOPCODE(Simd128_Max_D2 , MAXPD);
  1315. SET_SIMDOPCODE(Simd128_Sqrt_D2 , SQRTPD);
  1316. SET_SIMDOPCODE(Simd128_Lt_D2 , CMPLTPD); // CMPLTPD
  1317. SET_SIMDOPCODE(Simd128_LtEq_D2 , CMPLEPD); // CMPLEPD
  1318. SET_SIMDOPCODE(Simd128_Eq_D2 , CMPEQPD); // CMPEQPD
  1319. SET_SIMDOPCODE(Simd128_Neq_D2 , CMPNEQPD); // CMPNEQPD
  1320. SET_SIMDOPCODE(Simd128_Gt_D2 , CMPLTPD); // CMPLTPD (swap srcs)
  1321. SET_SIMDOPCODE(Simd128_GtEq_D2 , CMPLEPD); // CMPLEPD (swap srcs)
  1322. SET_SIMDOPCODE(Simd128_LdSignMask_D2 , MOVMSKPD);
  1323. }
  1324. #undef SIMD_SETOPCODE
  1325. #undef SIMD_GETOPCODE
  1326. // FromVar
  1327. void LowererMD::GenerateCheckedSimdLoad(IR::Instr * instr)
  1328. {
  1329. Assert(instr->m_opcode == Js::OpCode::FromVar);
  1330. Assert(instr->GetSrc1()->GetType() == TyVar);
  1331. Assert(IRType_IsSimd128(instr->GetDst()->GetType()));
  1332. bool checkRequired = instr->HasBailOutInfo();
  1333. IR::LabelInstr * labelHelper = nullptr, * labelDone = nullptr;
  1334. IR::Instr * insertInstr = instr, * newInstr;
  1335. IR::RegOpnd * src = instr->GetSrc1()->AsRegOpnd(), * dst = instr->GetDst()->AsRegOpnd();
  1336. Assert(!checkRequired || instr->GetBailOutKind() == IR::BailOutSimd128F4Only || instr->GetBailOutKind() == IR::BailOutSimd128I4Only);
  1337. if (checkRequired)
  1338. {
  1339. labelHelper = IR::LabelInstr::New(Js::OpCode::Label, this->m_func, true);
  1340. labelDone = IR::LabelInstr::New(Js::OpCode::Label, this->m_func);
  1341. instr->InsertBefore(labelHelper);
  1342. instr->InsertAfter(labelDone);
  1343. insertInstr = labelHelper;
  1344. GenerateObjectTest(instr->GetSrc1(), insertInstr, labelHelper);
  1345. newInstr = IR::Instr::New(Js::OpCode::CMP, instr->m_func);
  1346. newInstr->SetSrc1(IR::IndirOpnd::New(instr->GetSrc1()->AsRegOpnd(), 0, TyMachPtr, instr->m_func));
  1347. newInstr->SetSrc2(m_lowerer->LoadVTableValueOpnd(instr, dst->GetType() == TySimd128F4 ? VTableValue::VtableSimd128F4 : VTableValue::VtableSimd128I4));
  1348. insertInstr->InsertBefore(newInstr);
  1349. Legalize(newInstr);
  1350. insertInstr->InsertBefore(IR::BranchInstr::New(Js::OpCode::JNE, labelHelper, this->m_func));
  1351. instr->UnlinkSrc1();
  1352. instr->UnlinkDst();
  1353. this->m_lowerer->GenerateBailOut(instr);
  1354. }
  1355. size_t valueOffset = dst->GetType() == TySimd128F4 ? Js::JavascriptSIMDFloat32x4::GetOffsetOfValue() : Js::JavascriptSIMDInt32x4::GetOffsetOfValue();
  1356. Assert(valueOffset < INT_MAX);
  1357. newInstr = IR::Instr::New(Js::OpCode::MOVUPS, dst, IR::IndirOpnd::New(src, static_cast<int>(valueOffset), dst->GetType(), this->m_func), this->m_func);
  1358. insertInstr->InsertBefore(newInstr);
  1359. insertInstr->InsertBefore(IR::BranchInstr::New(Js::OpCode::JMP, labelDone, this->m_func));
  1360. // FromVar is converted to BailOut call. Don't remove.
  1361. }
  1362. // ToVar
  1363. void LowererMD::GenerateSimdStore(IR::Instr * instr)
  1364. {
  1365. IR::RegOpnd *dst, *src;
  1366. IRType type;
  1367. dst = instr->GetDst()->AsRegOpnd();
  1368. src = instr->GetSrc1()->AsRegOpnd();
  1369. type = src->GetType();
  1370. this->m_lowerer->LoadScriptContext(instr);
  1371. IR::Instr * instrCall = IR::Instr::New(Js::OpCode::CALL, instr->GetDst(),
  1372. IR::HelperCallOpnd::New(type == TySimd128F4 ? IR::HelperAllocUninitializedSimdF4 : IR::HelperAllocUninitializedSimdI4, this->m_func), this->m_func);
  1373. instr->InsertBefore(instrCall);
  1374. this->lowererMDArch.LowerCall(instrCall, 0);
  1375. IR::Opnd * valDst;
  1376. if (type == TySimd128F4)
  1377. {
  1378. valDst = IR::IndirOpnd::New(dst, (int32)Js::JavascriptSIMDFloat32x4::GetOffsetOfValue(), TySimd128F4, this->m_func);
  1379. }
  1380. else
  1381. {
  1382. valDst = IR::IndirOpnd::New(dst, (int32)Js::JavascriptSIMDInt32x4::GetOffsetOfValue(), TySimd128I4, this->m_func);
  1383. }
  1384. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVUPS, valDst, src, this->m_func));
  1385. instr->Remove();
  1386. }
  1387. void LowererMD::CheckShuffleLanes4(uint8 lanes[], uint8 lanesSrc[], uint *fromSrc1, uint *fromSrc2)
  1388. {
  1389. Assert(lanes);
  1390. Assert(lanesSrc);
  1391. Assert(fromSrc1 && fromSrc2);
  1392. *fromSrc1 = 0;
  1393. *fromSrc2 = 0;
  1394. for (uint i = 0; i < 4; i++)
  1395. {
  1396. if (lanes[i] >= 0 && lanes[i] < 4)
  1397. {
  1398. (*fromSrc1)++;
  1399. lanesSrc[i] = 1;
  1400. }
  1401. else if (lanes[i] >= 4 && lanes[i] < 8)
  1402. {
  1403. (*fromSrc2)++;
  1404. lanesSrc[i] = 2;
  1405. }
  1406. else
  1407. {
  1408. Assert(UNREACHED);
  1409. }
  1410. }
  1411. }
  1412. void LowererMD::InsertShufps(uint8 lanes[], IR::Opnd *dst, IR::Opnd *src1, IR::Opnd *src2, IR::Instr *instr)
  1413. {
  1414. int8 shufMask;
  1415. uint8 normLanes[4];
  1416. for (uint i = 0; i < 4; i++)
  1417. {
  1418. normLanes[i] = (lanes[i] >= 4) ? (lanes[i] - 4) : lanes[i];
  1419. }
  1420. shufMask = (int8)((normLanes[3] << 6) | (normLanes[2] << 4) | (normLanes[1] << 2) | normLanes[0]);
  1421. // MOVAPS dst, src1
  1422. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVAPS, dst, src1, m_func));
  1423. // SHUF dst, src2, imm8
  1424. instr->InsertBefore(IR::Instr::New(Js::OpCode::SHUFPS, dst, src2, IR::IntConstOpnd::New((IntConstType)shufMask, TyInt8, m_func, true), m_func));
  1425. }
  1426. BYTE LowererMD::Simd128GetTypedArrBytesPerElem(ValueType arrType)
  1427. {
  1428. return (1 << Lowerer::GetArrayIndirScale(arrType));
  1429. }