LowerMDSharedSimd128.cpp 130 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363
  1. //-------------------------------------------------------------------------------------------------------
  2. // Copyright (C) Microsoft Corporation and contributors. All rights reserved.
  3. // Licensed under the MIT license. See LICENSE.txt file in the project root for full license information.
  4. //-------------------------------------------------------------------------------------------------------
  5. #include "Backend.h"
  6. #ifdef ENABLE_SIMDJS
  7. #define GET_SIMDOPCODE(irOpcode) m_simd128OpCodesMap[(uint32)(irOpcode - Js::OpCode::Simd128_Start)]
  8. #define SET_SIMDOPCODE(irOpcode, mdOpcode) \
  9. Assert((uint32)m_simd128OpCodesMap[(uint32)(Js::OpCode::irOpcode - Js::OpCode::Simd128_Start)] == 0);\
  10. Assert(Js::OpCode::mdOpcode > Js::OpCode::MDStart);\
  11. m_simd128OpCodesMap[(uint32)(Js::OpCode::irOpcode - Js::OpCode::Simd128_Start)] = Js::OpCode::mdOpcode;
  12. IR::Instr* LowererMD::Simd128Instruction(IR::Instr *instr)
  13. {
  14. // Currently only handles type-specialized/asm.js opcodes
  15. if (!instr->GetDst())
  16. {
  17. // SIMD ops always have DST in asmjs
  18. Assert(!instr->m_func->GetJITFunctionBody()->IsAsmJsMode());
  19. // unused result. Do nothing.
  20. IR::Instr * pInstr = instr->m_prev;
  21. instr->Remove();
  22. return pInstr;
  23. }
  24. if (Simd128TryLowerMappedInstruction(instr))
  25. {
  26. return instr->m_prev;
  27. }
  28. return Simd128LowerUnMappedInstruction(instr);
  29. }
  30. bool LowererMD::Simd128TryLowerMappedInstruction(IR::Instr *instr)
  31. {
  32. bool legalize = true;
  33. Js::OpCode opcode = GET_SIMDOPCODE(instr->m_opcode);
  34. if ((uint32)opcode == 0)
  35. return false;
  36. Assert(instr->GetDst() && instr->GetDst()->IsRegOpnd() && instr->GetDst()->IsSimd128() || instr->GetDst()->GetType() == TyInt32);
  37. Assert(instr->GetSrc1() && instr->GetSrc1()->IsRegOpnd() && instr->GetSrc1()->IsSimd128());
  38. Assert(!instr->GetSrc2() || (((instr->GetSrc2()->IsRegOpnd() && instr->GetSrc2()->IsSimd128()) || (instr->GetSrc2()->IsIntConstOpnd() && instr->GetSrc2()->GetType() == TyInt8))));
  39. switch (instr->m_opcode)
  40. {
  41. case Js::OpCode::Simd128_Abs_F4:
  42. Assert(opcode == Js::OpCode::ANDPS);
  43. instr->SetSrc2(IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86AbsMaskF4Addr(), instr->GetSrc1()->GetType(), m_func));
  44. break;
  45. #if 0
  46. case Js::OpCode::Simd128_Abs_D2:
  47. Assert(opcode == Js::OpCode::ANDPD);
  48. instr->SetSrc2(IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86AbsMaskD2Addr(), instr->GetSrc1()->GetType(), m_func));
  49. break;
  50. #endif // 0
  51. case Js::OpCode::Simd128_Neg_F4:
  52. Assert(opcode == Js::OpCode::XORPS);
  53. instr->SetSrc2(IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86NegMaskF4Addr(), instr->GetSrc1()->GetType(), m_func));
  54. break;
  55. #if 0
  56. case Js::OpCode::Simd128_Neg_D2:
  57. Assert(opcode == Js::OpCode::XORPS);
  58. instr->SetSrc2(IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86NegMaskD2Addr(), instr->GetSrc1()->GetType(), m_func));
  59. break;
  60. #endif // 0
  61. case Js::OpCode::Simd128_Not_I4:
  62. case Js::OpCode::Simd128_Not_I16:
  63. case Js::OpCode::Simd128_Not_I8:
  64. case Js::OpCode::Simd128_Not_U4:
  65. case Js::OpCode::Simd128_Not_U8:
  66. case Js::OpCode::Simd128_Not_U16:
  67. case Js::OpCode::Simd128_Not_B4:
  68. case Js::OpCode::Simd128_Not_B8:
  69. case Js::OpCode::Simd128_Not_B16:
  70. Assert(opcode == Js::OpCode::XORPS);
  71. instr->SetSrc2(IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86AllNegOnesAddr(), instr->GetSrc1()->GetType(), m_func));
  72. break;
  73. case Js::OpCode::Simd128_Gt_F4:
  74. //case Js::OpCode::Simd128_Gt_D2:
  75. case Js::OpCode::Simd128_GtEq_F4:
  76. //case Js::OpCode::Simd128_GtEq_D2:
  77. case Js::OpCode::Simd128_Lt_I4:
  78. case Js::OpCode::Simd128_Lt_I8:
  79. case Js::OpCode::Simd128_Lt_I16:
  80. {
  81. Assert(opcode == Js::OpCode::CMPLTPS || opcode == Js::OpCode::CMPLTPD || opcode == Js::OpCode::CMPLEPS
  82. || opcode == Js::OpCode::CMPLEPD || opcode == Js::OpCode::PCMPGTD || opcode == Js::OpCode::PCMPGTB
  83. || opcode == Js::OpCode::PCMPGTW );
  84. // swap operands
  85. auto *src1 = instr->UnlinkSrc1();
  86. auto *src2 = instr->UnlinkSrc2();
  87. instr->SetSrc1(src2);
  88. instr->SetSrc2(src1);
  89. break;
  90. }
  91. }
  92. instr->m_opcode = opcode;
  93. if (legalize)
  94. {
  95. //MakeDstEquSrc1(instr);
  96. Legalize(instr);
  97. }
  98. return true;
  99. }
  100. IR::Instr* LowererMD::Simd128LowerUnMappedInstruction(IR::Instr *instr)
  101. {
  102. switch (instr->m_opcode)
  103. {
  104. case Js::OpCode::Simd128_LdC:
  105. return Simd128LoadConst(instr);
  106. case Js::OpCode::Simd128_FloatsToF4:
  107. case Js::OpCode::Simd128_IntsToI4:
  108. case Js::OpCode::Simd128_IntsToU4:
  109. case Js::OpCode::Simd128_IntsToB4:
  110. return Simd128LowerConstructor_4(instr);
  111. case Js::OpCode::Simd128_IntsToI8:
  112. case Js::OpCode::Simd128_IntsToU8:
  113. case Js::OpCode::Simd128_IntsToB8:
  114. return Simd128LowerConstructor_8(instr);
  115. case Js::OpCode::Simd128_IntsToI16:
  116. case Js::OpCode::Simd128_IntsToU16:
  117. case Js::OpCode::Simd128_IntsToB16:
  118. return Simd128LowerConstructor_16(instr);
  119. #if 0
  120. case Js::OpCode::Simd128_DoublesToD2:
  121. return Simd128LowerConstructor_2(instr);
  122. #endif // 0
  123. case Js::OpCode::Simd128_ExtractLane_I4:
  124. case Js::OpCode::Simd128_ExtractLane_I8:
  125. case Js::OpCode::Simd128_ExtractLane_I16:
  126. case Js::OpCode::Simd128_ExtractLane_U4:
  127. case Js::OpCode::Simd128_ExtractLane_U8:
  128. case Js::OpCode::Simd128_ExtractLane_U16:
  129. case Js::OpCode::Simd128_ExtractLane_B4:
  130. case Js::OpCode::Simd128_ExtractLane_B8:
  131. case Js::OpCode::Simd128_ExtractLane_B16:
  132. case Js::OpCode::Simd128_ExtractLane_F4:
  133. return Simd128LowerLdLane(instr);
  134. case Js::OpCode::Simd128_ReplaceLane_I4:
  135. case Js::OpCode::Simd128_ReplaceLane_F4:
  136. case Js::OpCode::Simd128_ReplaceLane_U4:
  137. case Js::OpCode::Simd128_ReplaceLane_B4:
  138. return SIMD128LowerReplaceLane_4(instr);
  139. case Js::OpCode::Simd128_ReplaceLane_I8:
  140. case Js::OpCode::Simd128_ReplaceLane_U8:
  141. case Js::OpCode::Simd128_ReplaceLane_B8:
  142. return SIMD128LowerReplaceLane_8(instr);
  143. case Js::OpCode::Simd128_ReplaceLane_I16:
  144. case Js::OpCode::Simd128_ReplaceLane_U16:
  145. case Js::OpCode::Simd128_ReplaceLane_B16:
  146. return SIMD128LowerReplaceLane_16(instr);
  147. case Js::OpCode::Simd128_Splat_F4:
  148. case Js::OpCode::Simd128_Splat_I4:
  149. //case Js::OpCode::Simd128_Splat_D2:
  150. case Js::OpCode::Simd128_Splat_I8:
  151. case Js::OpCode::Simd128_Splat_I16:
  152. case Js::OpCode::Simd128_Splat_U4:
  153. case Js::OpCode::Simd128_Splat_U8:
  154. case Js::OpCode::Simd128_Splat_U16:
  155. case Js::OpCode::Simd128_Splat_B4:
  156. case Js::OpCode::Simd128_Splat_B8:
  157. case Js::OpCode::Simd128_Splat_B16:
  158. return Simd128LowerSplat(instr);
  159. case Js::OpCode::Simd128_Rcp_F4:
  160. //case Js::OpCode::Simd128_Rcp_D2:
  161. return Simd128LowerRcp(instr);
  162. case Js::OpCode::Simd128_Sqrt_F4:
  163. //case Js::OpCode::Simd128_Sqrt_D2:
  164. return Simd128LowerSqrt(instr);
  165. case Js::OpCode::Simd128_RcpSqrt_F4:
  166. //case Js::OpCode::Simd128_RcpSqrt_D2:
  167. return Simd128LowerRcpSqrt(instr);
  168. case Js::OpCode::Simd128_Select_F4:
  169. case Js::OpCode::Simd128_Select_I4:
  170. //case Js::OpCode::Simd128_Select_D2:
  171. case Js::OpCode::Simd128_Select_I8:
  172. case Js::OpCode::Simd128_Select_I16:
  173. case Js::OpCode::Simd128_Select_U4:
  174. case Js::OpCode::Simd128_Select_U8:
  175. case Js::OpCode::Simd128_Select_U16:
  176. return Simd128LowerSelect(instr);
  177. case Js::OpCode::Simd128_Neg_I4:
  178. case Js::OpCode::Simd128_Neg_I8:
  179. case Js::OpCode::Simd128_Neg_I16:
  180. case Js::OpCode::Simd128_Neg_U4:
  181. case Js::OpCode::Simd128_Neg_U8:
  182. case Js::OpCode::Simd128_Neg_U16:
  183. return Simd128LowerNeg(instr);
  184. case Js::OpCode::Simd128_Mul_I4:
  185. case Js::OpCode::Simd128_Mul_U4:
  186. return Simd128LowerMulI4(instr);
  187. case Js::OpCode::Simd128_Mul_I16:
  188. case Js::OpCode::Simd128_Mul_U16:
  189. return Simd128LowerMulI16(instr);
  190. case Js::OpCode::Simd128_ShRtByScalar_I4:
  191. case Js::OpCode::Simd128_ShLtByScalar_I4:
  192. case Js::OpCode::Simd128_ShRtByScalar_I8:
  193. case Js::OpCode::Simd128_ShLtByScalar_I8:
  194. case Js::OpCode::Simd128_ShLtByScalar_I16:
  195. case Js::OpCode::Simd128_ShRtByScalar_I16:
  196. case Js::OpCode::Simd128_ShRtByScalar_U4:
  197. case Js::OpCode::Simd128_ShLtByScalar_U4:
  198. case Js::OpCode::Simd128_ShRtByScalar_U8:
  199. case Js::OpCode::Simd128_ShLtByScalar_U8:
  200. case Js::OpCode::Simd128_ShRtByScalar_U16:
  201. case Js::OpCode::Simd128_ShLtByScalar_U16:
  202. return Simd128LowerShift(instr);
  203. case Js::OpCode::Simd128_LdArr_I4:
  204. case Js::OpCode::Simd128_LdArr_I8:
  205. case Js::OpCode::Simd128_LdArr_I16:
  206. case Js::OpCode::Simd128_LdArr_U4:
  207. case Js::OpCode::Simd128_LdArr_U8:
  208. case Js::OpCode::Simd128_LdArr_U16:
  209. case Js::OpCode::Simd128_LdArr_F4:
  210. //case Js::OpCode::Simd128_LdArr_D2:
  211. case Js::OpCode::Simd128_LdArrConst_I4:
  212. case Js::OpCode::Simd128_LdArrConst_I8:
  213. case Js::OpCode::Simd128_LdArrConst_I16:
  214. case Js::OpCode::Simd128_LdArrConst_U4:
  215. case Js::OpCode::Simd128_LdArrConst_U8:
  216. case Js::OpCode::Simd128_LdArrConst_U16:
  217. case Js::OpCode::Simd128_LdArrConst_F4:
  218. //case Js::OpCode::Simd128_LdArrConst_D2:
  219. if (m_func->GetJITFunctionBody()->IsAsmJsMode())
  220. {
  221. // with bound checks
  222. return Simd128AsmJsLowerLoadElem(instr);
  223. }
  224. else
  225. {
  226. // non-AsmJs, boundChecks are extracted from instr
  227. return Simd128LowerLoadElem(instr);
  228. }
  229. case Js::OpCode::Simd128_StArr_I4:
  230. case Js::OpCode::Simd128_StArr_I8:
  231. case Js::OpCode::Simd128_StArr_I16:
  232. case Js::OpCode::Simd128_StArr_U4:
  233. case Js::OpCode::Simd128_StArr_U8:
  234. case Js::OpCode::Simd128_StArr_U16:
  235. case Js::OpCode::Simd128_StArr_F4:
  236. //case Js::OpCode::Simd128_StArr_D2:
  237. case Js::OpCode::Simd128_StArrConst_I4:
  238. case Js::OpCode::Simd128_StArrConst_I8:
  239. case Js::OpCode::Simd128_StArrConst_I16:
  240. case Js::OpCode::Simd128_StArrConst_U4:
  241. case Js::OpCode::Simd128_StArrConst_U8:
  242. case Js::OpCode::Simd128_StArrConst_U16:
  243. case Js::OpCode::Simd128_StArrConst_F4:
  244. //case Js::OpCode::Simd128_StArrConst_D2:
  245. if (m_func->GetJITFunctionBody()->IsAsmJsMode())
  246. {
  247. return Simd128AsmJsLowerStoreElem(instr);
  248. }
  249. else
  250. {
  251. return Simd128LowerStoreElem(instr);
  252. }
  253. case Js::OpCode::Simd128_Swizzle_U4:
  254. case Js::OpCode::Simd128_Swizzle_I4:
  255. case Js::OpCode::Simd128_Swizzle_F4:
  256. //case Js::OpCode::Simd128_Swizzle_D2:
  257. return Simd128LowerSwizzle_4(instr);
  258. case Js::OpCode::Simd128_Shuffle_U4:
  259. case Js::OpCode::Simd128_Shuffle_I4:
  260. case Js::OpCode::Simd128_Shuffle_F4:
  261. //case Js::OpCode::Simd128_Shuffle_D2:
  262. return Simd128LowerShuffle_4(instr);
  263. case Js::OpCode::Simd128_Swizzle_I8:
  264. case Js::OpCode::Simd128_Swizzle_I16:
  265. case Js::OpCode::Simd128_Swizzle_U8:
  266. case Js::OpCode::Simd128_Swizzle_U16:
  267. case Js::OpCode::Simd128_Shuffle_I8:
  268. case Js::OpCode::Simd128_Shuffle_I16:
  269. case Js::OpCode::Simd128_Shuffle_U8:
  270. case Js::OpCode::Simd128_Shuffle_U16:
  271. return Simd128LowerShuffle(instr);
  272. case Js::OpCode::Simd128_FromUint32x4_F4:
  273. return Simd128LowerFloat32x4FromUint32x4(instr);
  274. case Js::OpCode::Simd128_FromFloat32x4_I4:
  275. return Simd128LowerInt32x4FromFloat32x4(instr);
  276. case Js::OpCode::Simd128_FromFloat32x4_U4:
  277. return Simd128LowerUint32x4FromFloat32x4(instr);
  278. case Js::OpCode::Simd128_Neq_I4:
  279. case Js::OpCode::Simd128_Neq_I8:
  280. case Js::OpCode::Simd128_Neq_I16:
  281. case Js::OpCode::Simd128_Neq_U4:
  282. case Js::OpCode::Simd128_Neq_U8:
  283. case Js::OpCode::Simd128_Neq_U16:
  284. return Simd128LowerNotEqual(instr);
  285. case Js::OpCode::Simd128_Lt_U4:
  286. case Js::OpCode::Simd128_Lt_U8:
  287. case Js::OpCode::Simd128_Lt_U16:
  288. case Js::OpCode::Simd128_GtEq_U4:
  289. case Js::OpCode::Simd128_GtEq_U8:
  290. case Js::OpCode::Simd128_GtEq_U16:
  291. return Simd128LowerLessThan(instr);
  292. case Js::OpCode::Simd128_LtEq_I4:
  293. case Js::OpCode::Simd128_LtEq_I8:
  294. case Js::OpCode::Simd128_LtEq_I16:
  295. case Js::OpCode::Simd128_LtEq_U4:
  296. case Js::OpCode::Simd128_LtEq_U8:
  297. case Js::OpCode::Simd128_LtEq_U16:
  298. case Js::OpCode::Simd128_Gt_U4:
  299. case Js::OpCode::Simd128_Gt_U8:
  300. case Js::OpCode::Simd128_Gt_U16:
  301. return Simd128LowerLessThanOrEqual(instr);
  302. case Js::OpCode::Simd128_GtEq_I4:
  303. case Js::OpCode::Simd128_GtEq_I8:
  304. case Js::OpCode::Simd128_GtEq_I16:
  305. return Simd128LowerGreaterThanOrEqual(instr);
  306. case Js::OpCode::Simd128_Min_F4:
  307. case Js::OpCode::Simd128_Max_F4:
  308. return Simd128LowerMinMax_F4(instr);
  309. case Js::OpCode::Simd128_AnyTrue_B4:
  310. case Js::OpCode::Simd128_AnyTrue_B8:
  311. case Js::OpCode::Simd128_AnyTrue_B16:
  312. return Simd128LowerAnyTrue(instr);
  313. case Js::OpCode::Simd128_AllTrue_B4:
  314. case Js::OpCode::Simd128_AllTrue_B8:
  315. case Js::OpCode::Simd128_AllTrue_B16:
  316. return Simd128LowerAllTrue(instr);
  317. default:
  318. AssertMsg(UNREACHED, "Unsupported Simd128 instruction");
  319. }
  320. return nullptr;
  321. }
  322. IR::Instr* LowererMD::Simd128LoadConst(IR::Instr* instr)
  323. {
  324. Assert(instr->GetDst() && instr->m_opcode == Js::OpCode::Simd128_LdC);
  325. Assert(instr->GetDst()->IsSimd128());
  326. Assert(instr->GetSrc1()->IsSimd128());
  327. Assert(instr->GetSrc1()->IsSimd128ConstOpnd());
  328. Assert(instr->GetSrc2() == nullptr);
  329. AsmJsSIMDValue value = instr->GetSrc1()->AsSimd128ConstOpnd()->m_value;
  330. // MOVUPS dst, [const]
  331. void *pValue = NativeCodeDataNewNoFixup(this->m_func->GetNativeCodeDataAllocator(), SIMDType<DataDesc_LowererMD_Simd128LoadConst>, value);
  332. IR::Opnd * simdRef;
  333. if (!m_func->IsOOPJIT())
  334. {
  335. simdRef = IR::MemRefOpnd::New((void *)pValue, instr->GetDst()->GetType(), instr->m_func);
  336. }
  337. else
  338. {
  339. int offset = NativeCodeData::GetDataTotalOffset(pValue);
  340. simdRef = IR::IndirOpnd::New(IR::RegOpnd::New(m_func->GetTopFunc()->GetNativeCodeDataSym(), TyVar, m_func), offset, instr->GetDst()->GetType(),
  341. #if DBG
  342. NativeCodeData::GetDataDescription(pValue, m_func->m_alloc),
  343. #endif
  344. m_func);
  345. GetLowerer()->addToLiveOnBackEdgeSyms->Set(m_func->GetTopFunc()->GetNativeCodeDataSym()->m_id);
  346. }
  347. instr->ReplaceSrc1(simdRef);
  348. instr->m_opcode = LowererMDArch::GetAssignOp(instr->GetDst()->GetType());
  349. Legalize(instr);
  350. return instr->m_prev;
  351. }
  352. IR::Instr* LowererMD::Simd128CanonicalizeToBools(IR::Instr* instr, const Js::OpCode &cmpOpcode, IR::Opnd& dstOpnd)
  353. {
  354. Assert(instr->m_opcode == Js::OpCode::Simd128_IntsToB4 || instr->m_opcode == Js::OpCode::Simd128_IntsToB8 || instr->m_opcode == Js::OpCode::Simd128_IntsToB16 ||
  355. instr->m_opcode == Js::OpCode::Simd128_ReplaceLane_B4 || instr->m_opcode == Js::OpCode::Simd128_ReplaceLane_B8 || instr->m_opcode == Js::OpCode::Simd128_ReplaceLane_B16);
  356. IR::Instr *pInstr;
  357. //dst = cmpOpcode dst, X86_ALL_ZEROS
  358. pInstr = IR::Instr::New(cmpOpcode, &dstOpnd, &dstOpnd, IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86AllZerosAddr(), TySimd128I4, m_func), m_func);
  359. instr->InsertBefore(pInstr);
  360. Legalize(pInstr);
  361. // dst = PANDN dst, X86_ALL_NEG_ONES
  362. pInstr = IR::Instr::New(Js::OpCode::PANDN, &dstOpnd, &dstOpnd, IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86AllNegOnesAddr(), TySimd128I4, m_func), m_func);
  363. instr->InsertBefore(pInstr);
  364. Legalize(pInstr);
  365. return instr;
  366. }
  367. IR::Instr* LowererMD::Simd128LowerConstructor_8(IR::Instr *instr)
  368. {
  369. IR::Opnd* dst = nullptr;
  370. IR::Opnd* srcs[8];
  371. //Simd128_IntsToI8/U8/B8
  372. Assert(instr->m_opcode == Js::OpCode::Simd128_IntsToI8 || instr->m_opcode == Js::OpCode::Simd128_IntsToU8 || instr->m_opcode == Js::OpCode::Simd128_IntsToB8);
  373. SList<IR::Opnd*> *args = Simd128GetExtendedArgs(instr);
  374. Assert(args->Count() == 9);
  375. dst = args->Pop();
  376. uint i = 0;
  377. while (!args->Empty() && i < 8)
  378. {
  379. srcs[i] = args->Pop();
  380. // src's might have been constant prop'ed. Enregister them if so.
  381. srcs[i] = EnregisterIntConst(instr, srcs[i], TyInt16);
  382. Assert(srcs[i]->GetType() == TyInt16 && srcs[i]->IsRegOpnd());
  383. // PINSRW dst, srcs[i], i
  384. instr->InsertBefore(IR::Instr::New(Js::OpCode::PINSRW, dst, srcs[i], IR::IntConstOpnd::New(i, TyInt8, m_func, true), m_func));
  385. i++;
  386. }
  387. if (instr->m_opcode == Js::OpCode::Simd128_IntsToB8)
  388. {
  389. instr = Simd128CanonicalizeToBools(instr, Js::OpCode::PCMPEQW, *dst);
  390. }
  391. IR::Instr* prevInstr;
  392. prevInstr = instr->m_prev;
  393. instr->Remove();
  394. return prevInstr;
  395. }
  396. IR::Instr* LowererMD::Simd128LowerConstructor_16(IR::Instr *instr)
  397. {
  398. IR::Opnd* dst = nullptr;
  399. IR::Opnd* srcs[16];
  400. //Simd128_IntsToI16/U16/B16
  401. Assert(instr->m_opcode == Js::OpCode::Simd128_IntsToU16 || instr->m_opcode == Js::OpCode::Simd128_IntsToI16 || instr->m_opcode == Js::OpCode::Simd128_IntsToB16);
  402. SList<IR::Opnd*> *args = Simd128GetExtendedArgs(instr);
  403. intptr_t tempSIMD = m_func->GetThreadContextInfo()->GetSimdTempAreaAddr(0);
  404. #if DBG
  405. // using only one SIMD temp
  406. intptr_t endAddrSIMD = tempSIMD + sizeof(X86SIMDValue);
  407. #endif
  408. intptr_t address;
  409. IR::Instr * newInstr;
  410. Assert(args->Count() == 17);
  411. dst = args->Pop();
  412. uint i = 0;
  413. while (!args->Empty() && i < 16)
  414. {
  415. srcs[i] = args->Pop();
  416. // src's might have been constant prop'ed. Enregister them if so.
  417. srcs[i] = EnregisterIntConst(instr, srcs[i], TyInt8);
  418. Assert(srcs[i]->GetType() == TyInt8 && srcs[i]->IsRegOpnd());
  419. address = tempSIMD + i;
  420. // check for buffer overrun
  421. Assert((intptr_t)address < endAddrSIMD);
  422. // MOV [temp + i], src[i] (TyInt8)
  423. newInstr = IR::Instr::New(Js::OpCode::MOV, IR::MemRefOpnd::New(tempSIMD + i, TyInt8, m_func), srcs[i], m_func);
  424. instr->InsertBefore(newInstr);
  425. Legalize(newInstr);
  426. i++;
  427. }
  428. // MOVUPS dst, [temp]
  429. newInstr = IR::Instr::New(Js::OpCode::MOVUPS, dst, IR::MemRefOpnd::New(tempSIMD, TySimd128U16, m_func), m_func);
  430. instr->InsertBefore(newInstr);
  431. Legalize(newInstr);
  432. if (instr->m_opcode == Js::OpCode::Simd128_IntsToB16)
  433. {
  434. instr = Simd128CanonicalizeToBools(instr, Js::OpCode::PCMPEQB, *dst);
  435. }
  436. IR::Instr* prevInstr;
  437. prevInstr = instr->m_prev;
  438. instr->Remove();
  439. return prevInstr;
  440. }
  441. IR::Instr* LowererMD::Simd128LowerConstructor_4(IR::Instr *instr)
  442. {
  443. IR::Opnd* dst = nullptr;
  444. IR::Opnd* src1 = nullptr;
  445. IR::Opnd* src2 = nullptr;
  446. IR::Opnd* src3 = nullptr;
  447. IR::Opnd* src4 = nullptr;
  448. IR::Instr* newInstr = nullptr;
  449. Assert(instr->m_opcode == Js::OpCode::Simd128_FloatsToF4 ||
  450. instr->m_opcode == Js::OpCode::Simd128_IntsToB4 ||
  451. instr->m_opcode == Js::OpCode::Simd128_IntsToI4 ||
  452. instr->m_opcode == Js::OpCode::Simd128_IntsToU4);
  453. // use MOVSS for both int32x4 and float32x4. MOVD zeroes upper bits.
  454. Js::OpCode movOpcode = Js::OpCode::MOVSS;
  455. Js::OpCode shiftOpcode = Js::OpCode::PSLLDQ;
  456. SList<IR::Opnd*> *args = Simd128GetExtendedArgs(instr);
  457. // The number of src opnds should be exact. If opnds are missing, they should be filled in by globopt during type-spec.
  458. Assert(args->Count() == 5);
  459. dst = args->Pop();
  460. src1 = args->Pop();
  461. src2 = args->Pop();
  462. src3 = args->Pop();
  463. src4 = args->Pop();
  464. if (instr->m_opcode == Js::OpCode::Simd128_FloatsToF4)
  465. {
  466. // We don't have f32 type-spec, so we type-spec to f64 and convert to f32 before use.
  467. if (src1->IsFloat64())
  468. {
  469. IR::RegOpnd *regOpnd32 = IR::RegOpnd::New(TyFloat32, this->m_func);
  470. // CVTSD2SS regOpnd32.f32, src.f64 -- Convert regOpnd from f64 to f32
  471. newInstr = IR::Instr::New(Js::OpCode::CVTSD2SS, regOpnd32, src1, this->m_func);
  472. instr->InsertBefore(newInstr);
  473. src1 = regOpnd32;
  474. }
  475. if (src2->IsFloat64())
  476. {
  477. IR::RegOpnd *regOpnd32 = IR::RegOpnd::New(TyFloat32, this->m_func);
  478. // CVTSD2SS regOpnd32.f32, src.f64 -- Convert regOpnd from f64 to f32
  479. newInstr = IR::Instr::New(Js::OpCode::CVTSD2SS, regOpnd32, src2, this->m_func);
  480. instr->InsertBefore(newInstr);
  481. src2 = regOpnd32;
  482. }
  483. if (src3->IsFloat64())
  484. {
  485. IR::RegOpnd *regOpnd32 = IR::RegOpnd::New(TyFloat32, this->m_func);
  486. // CVTSD2SS regOpnd32.f32, src.f64 -- Convert regOpnd from f64 to f32
  487. newInstr = IR::Instr::New(Js::OpCode::CVTSD2SS, regOpnd32, src3, this->m_func);
  488. instr->InsertBefore(newInstr);
  489. src3 = regOpnd32;
  490. }
  491. if (src4->IsFloat64())
  492. {
  493. IR::RegOpnd *regOpnd32 = IR::RegOpnd::New(TyFloat32, this->m_func);
  494. // CVTSD2SS regOpnd32.f32, src.f64 -- Convert regOpnd from f64 to f32
  495. newInstr = IR::Instr::New(Js::OpCode::CVTSD2SS, regOpnd32, src4, this->m_func);
  496. instr->InsertBefore(newInstr);
  497. src4 = regOpnd32;
  498. }
  499. Assert(src1->IsRegOpnd() && src1->GetType() == TyFloat32);
  500. Assert(src2->IsRegOpnd() && src2->GetType() == TyFloat32);
  501. Assert(src3->IsRegOpnd() && src3->GetType() == TyFloat32);
  502. Assert(src4->IsRegOpnd() && src4->GetType() == TyFloat32);
  503. // MOVSS dst, src4
  504. instr->InsertBefore(IR::Instr::New(movOpcode, dst, src4, m_func));
  505. // PSLLDQ dst, dst, 4
  506. instr->InsertBefore(IR::Instr::New(shiftOpcode, dst, dst, IR::IntConstOpnd::New(4, TyInt8, m_func, true), m_func));
  507. // MOVSS dst, src3
  508. instr->InsertBefore(IR::Instr::New(movOpcode, dst, src3, m_func));
  509. // PSLLDQ dst, 4
  510. instr->InsertBefore(IR::Instr::New(shiftOpcode, dst, dst, IR::IntConstOpnd::New(4, TyInt8, m_func, true), m_func));
  511. // MOVSS dst, src2
  512. instr->InsertBefore(IR::Instr::New(movOpcode, dst, src2, m_func));
  513. // PSLLDQ dst, 4
  514. instr->InsertBefore(IR::Instr::New(shiftOpcode, dst, dst, IR::IntConstOpnd::New(4, TyInt8, m_func, true), m_func));
  515. // MOVSS dst, src1
  516. instr->InsertBefore(IR::Instr::New(movOpcode, dst, src1, m_func));
  517. }
  518. else
  519. {
  520. //Simd128_IntsToI4/U4
  521. IR::RegOpnd *temp = IR::RegOpnd::New(TyFloat32, m_func);
  522. // src's might have been constant prop'ed. Enregister them if so.
  523. src4 = EnregisterIntConst(instr, src4);
  524. src3 = EnregisterIntConst(instr, src3);
  525. src2 = EnregisterIntConst(instr, src2);
  526. src1 = EnregisterIntConst(instr, src1);
  527. Assert(src1->GetType() == TyInt32 && src1->IsRegOpnd());
  528. Assert(src2->GetType() == TyInt32 && src2->IsRegOpnd());
  529. Assert(src3->GetType() == TyInt32 && src3->IsRegOpnd());
  530. Assert(src4->GetType() == TyInt32 && src4->IsRegOpnd());
  531. // MOVD t(TyFloat32), src4(TyInt32)
  532. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVD, temp, src4, m_func));
  533. // MOVSS dst, t
  534. instr->InsertBefore(IR::Instr::New(movOpcode, dst, temp, m_func));
  535. // PSLLDQ dst, dst, 4
  536. instr->InsertBefore(IR::Instr::New(shiftOpcode, dst, dst, IR::IntConstOpnd::New(TySize[TyInt32], TyInt8, m_func, true), m_func));
  537. // MOVD t(TyFloat32), sr34(TyInt32)
  538. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVD, temp, src3, m_func));
  539. // MOVSS dst, t
  540. instr->InsertBefore(IR::Instr::New(movOpcode, dst, temp, m_func));
  541. // PSLLDQ dst, dst, 4
  542. instr->InsertBefore(IR::Instr::New(shiftOpcode, dst, dst, IR::IntConstOpnd::New(TySize[TyInt32], TyInt8, m_func, true), m_func));
  543. // MOVD t(TyFloat32), src2(TyInt32)
  544. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVD, temp, src2, m_func));
  545. // MOVSS dst, t
  546. instr->InsertBefore(IR::Instr::New(movOpcode, dst, temp, m_func));
  547. // PSLLDQ dst, dst, 4
  548. instr->InsertBefore(IR::Instr::New(shiftOpcode, dst, dst, IR::IntConstOpnd::New(TySize[TyInt32], TyInt8, m_func, true), m_func));
  549. // MOVD t(TyFloat32), src1(TyInt32)
  550. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVD, temp, src1, m_func));
  551. // MOVSS dst, t
  552. instr->InsertBefore(IR::Instr::New(movOpcode, dst, temp, m_func));
  553. if (instr->m_opcode == Js::OpCode::Simd128_IntsToB4)
  554. {
  555. instr = Simd128CanonicalizeToBools(instr, Js::OpCode::PCMPEQD, *dst);
  556. }
  557. }
  558. IR::Instr* prevInstr;
  559. prevInstr = instr->m_prev;
  560. instr->Remove();
  561. return prevInstr;
  562. }
  563. #if 0
  564. IR::Instr *LowererMD::Simd128LowerConstructor_2(IR::Instr *instr)
  565. {
  566. IR::Opnd* dst = nullptr;
  567. IR::Opnd* src1 = nullptr;
  568. IR::Opnd* src2 = nullptr;
  569. Assert(instr->m_opcode == Js::OpCode::Simd128_DoublesToD2);
  570. dst = instr->GetDst();
  571. src1 = instr->GetSrc1();
  572. src2 = instr->GetSrc2();
  573. Assert(src1->IsRegOpnd() && src1->GetType() == TyFloat64);
  574. Assert(src2->IsRegOpnd() && src2->GetType() == TyFloat64);
  575. // MOVSD dst, src2
  576. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVSD, dst, src2, m_func));
  577. // PSLLDQ dst, dst, 8
  578. instr->InsertBefore(IR::Instr::New(Js::OpCode::PSLLDQ, dst, dst, IR::IntConstOpnd::New(TySize[TyFloat64], TyInt8, m_func, true), m_func));
  579. // MOVSD dst, src1
  580. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVSD, dst, src1, m_func));
  581. Assert(dst->IsRegOpnd() && dst->IsSimd128());
  582. IR::Instr* prevInstr;
  583. prevInstr = instr->m_prev;
  584. instr->Remove();
  585. return prevInstr;
  586. }
  587. #endif
  588. IR::Instr* LowererMD::Simd128LowerLdLane(IR::Instr *instr)
  589. {
  590. IR::Opnd* dst, *src1, *src2;
  591. Js::OpCode movOpcode = Js::OpCode::MOVSS;
  592. uint laneWidth = 0, laneIndex = 0, shamt = 0, mask = 0;
  593. IRType laneType = TyInt32;
  594. dst = instr->GetDst();
  595. src1 = instr->GetSrc1();
  596. src2 = instr->GetSrc2();
  597. Assert(dst && dst->IsRegOpnd() && (dst->GetType() == TyFloat32 || dst->GetType() == TyInt32 || dst->GetType() == TyUint32 || dst->GetType() == TyFloat64));
  598. Assert(src1 && src1->IsRegOpnd() && src1->IsSimd128());
  599. Assert(src2 && src2->IsIntConstOpnd());
  600. laneIndex = (uint)src2->AsIntConstOpnd()->AsUint32();
  601. laneWidth = 4;
  602. switch (instr->m_opcode)
  603. {
  604. case Js::OpCode::Simd128_ExtractLane_F4:
  605. movOpcode = Js::OpCode::MOVSS;
  606. Assert(laneIndex < 4);
  607. break;
  608. case Js::OpCode::Simd128_ExtractLane_I8:
  609. case Js::OpCode::Simd128_ExtractLane_U8:
  610. case Js::OpCode::Simd128_ExtractLane_B8:
  611. movOpcode = Js::OpCode::MOVD;
  612. Assert(laneIndex < 8);
  613. shamt = (laneIndex % 2) * 16;
  614. laneIndex = laneIndex / 2;
  615. laneType = TyInt16;
  616. mask = 0x0000ffff;
  617. break;
  618. case Js::OpCode::Simd128_ExtractLane_I16:
  619. case Js::OpCode::Simd128_ExtractLane_U16:
  620. case Js::OpCode::Simd128_ExtractLane_B16:
  621. movOpcode = Js::OpCode::MOVD;
  622. Assert(laneIndex < 16);
  623. shamt = (laneIndex % 4) * 8;
  624. laneIndex = laneIndex / 4;
  625. laneType = TyInt8;
  626. mask = 0x000000ff;
  627. break;
  628. case Js::OpCode::Simd128_ExtractLane_U4:
  629. case Js::OpCode::Simd128_ExtractLane_I4:
  630. case Js::OpCode::Simd128_ExtractLane_B4:
  631. movOpcode = Js::OpCode::MOVD;
  632. Assert(laneIndex < 4);
  633. break;
  634. default:
  635. Assert(UNREACHED);
  636. }
  637. {
  638. IR::Opnd* tmp = src1;
  639. if (laneIndex != 0)
  640. {
  641. // tmp = PSRLDQ src1, shamt
  642. tmp = IR::RegOpnd::New(src1->GetType(), m_func);
  643. IR::Instr *shiftInstr = IR::Instr::New(Js::OpCode::PSRLDQ, tmp, src1, IR::IntConstOpnd::New(laneWidth * laneIndex, TyInt8, m_func, true), m_func);
  644. instr->InsertBefore(shiftInstr);
  645. Legalize(shiftInstr);
  646. }
  647. // MOVSS/MOVSD/MOVD dst, tmp
  648. instr->InsertBefore(IR::Instr::New(movOpcode, movOpcode == Js::OpCode::MOVD ? dst : dst->UseWithNewType(tmp->GetType(), m_func), tmp, m_func));
  649. }
  650. // dst has the 4-byte lane
  651. if (instr->m_opcode == Js::OpCode::Simd128_ExtractLane_I8 || instr->m_opcode == Js::OpCode::Simd128_ExtractLane_U8 || instr->m_opcode == Js::OpCode::Simd128_ExtractLane_B8 ||
  652. instr->m_opcode == Js::OpCode::Simd128_ExtractLane_U16|| instr->m_opcode == Js::OpCode::Simd128_ExtractLane_I16|| instr->m_opcode == Js::OpCode::Simd128_ExtractLane_B16)
  653. {
  654. // extract the 1/2 bytes sublane
  655. IR::Instr *newInstr = nullptr;
  656. if (shamt != 0)
  657. {
  658. // SHR dst, dst, shamt
  659. newInstr = IR::Instr::New(Js::OpCode::SHR, dst, dst, IR::IntConstOpnd::New((IntConstType)shamt, TyInt8, m_func), m_func);
  660. instr->InsertBefore(newInstr);
  661. Legalize(newInstr);
  662. }
  663. Assert(laneType == TyInt8 || laneType == TyInt16);
  664. // zero or sign-extend upper bits
  665. if (instr->m_opcode == Js::OpCode::Simd128_ExtractLane_I8 || instr->m_opcode == Js::OpCode::Simd128_ExtractLane_I16)
  666. {
  667. if (laneType == TyInt8)
  668. {
  669. IR::RegOpnd * tmp = IR::RegOpnd::New(TyInt8, m_func);
  670. newInstr = IR::Instr::New(Js::OpCode::MOV, tmp, dst, m_func);
  671. instr->InsertBefore(newInstr);
  672. Legalize(newInstr);
  673. newInstr = IR::Instr::New(Js::OpCode::MOVSX, dst, tmp, m_func);
  674. }
  675. else
  676. {
  677. newInstr = IR::Instr::New(Js::OpCode::MOVSXW, dst, dst->UseWithNewType(laneType, m_func), m_func);
  678. }
  679. }
  680. else
  681. {
  682. newInstr = IR::Instr::New(Js::OpCode::AND, dst, dst, IR::IntConstOpnd::New(mask, TyInt32, m_func), m_func);
  683. }
  684. instr->InsertBefore(newInstr);
  685. Legalize(newInstr);
  686. }
  687. if (instr->m_opcode == Js::OpCode::Simd128_ExtractLane_B4 || instr->m_opcode == Js::OpCode::Simd128_ExtractLane_B8 ||
  688. instr->m_opcode == Js::OpCode::Simd128_ExtractLane_B16)
  689. {
  690. IR::Instr* pInstr = nullptr;
  691. IR::RegOpnd* tmp = IR::RegOpnd::New(TyInt8, m_func);
  692. // cmp dst, -1
  693. pInstr = IR::Instr::New(Js::OpCode::CMP, m_func);
  694. pInstr->SetSrc1(dst->UseWithNewType(laneType, m_func));
  695. pInstr->SetSrc2(IR::IntConstOpnd::New(-1, laneType, m_func, true));
  696. instr->InsertBefore(pInstr);
  697. Legalize(pInstr);
  698. // mov tmp(TyInt8), dst
  699. pInstr = IR::Instr::New(Js::OpCode::MOV, tmp, dst, m_func);
  700. instr->InsertBefore(pInstr);
  701. Legalize(pInstr);
  702. // sete tmp(TyInt8)
  703. pInstr = IR::Instr::New(Js::OpCode::SETE, tmp, tmp, m_func);
  704. instr->InsertBefore(pInstr);
  705. Legalize(pInstr);
  706. // movsx dst, tmp(TyInt8)
  707. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVSX, dst, tmp, m_func));
  708. }
  709. IR::Instr* prevInstr = instr->m_prev;
  710. instr->Remove();
  711. return prevInstr;
  712. }
  713. IR::Instr* LowererMD::Simd128LowerSplat(IR::Instr *instr)
  714. {
  715. Js::OpCode shufOpCode = Js::OpCode::SHUFPS, movOpCode = Js::OpCode::MOVSS;
  716. IR::Opnd *dst, *src1;
  717. IR::Instr *pInstr = nullptr;
  718. dst = instr->GetDst();
  719. src1 = instr->GetSrc1();
  720. Assert(dst && dst->IsRegOpnd() && dst->IsSimd128());
  721. Assert(src1 && src1->IsRegOpnd() && (src1->GetType() == TyFloat32 || src1->GetType() == TyInt32 || src1->GetType() == TyFloat64 ||
  722. src1->GetType() == TyInt16 || src1->GetType() == TyInt8 || src1->GetType() == TyUint16 ||
  723. src1->GetType() == TyUint8 || src1->GetType() == TyUint32));
  724. Assert(!instr->GetSrc2());
  725. IR::Opnd* tempTruncate = nullptr;
  726. bool bSkip = false;
  727. IR::LabelInstr *labelZero = IR::LabelInstr::New(Js::OpCode::Label, m_func);
  728. IR::LabelInstr *labelDone = IR::LabelInstr::New(Js::OpCode::Label, m_func);
  729. switch (instr->m_opcode)
  730. {
  731. case Js::OpCode::Simd128_Splat_F4:
  732. shufOpCode = Js::OpCode::SHUFPS;
  733. movOpCode = Js::OpCode::MOVSS;
  734. break;
  735. case Js::OpCode::Simd128_Splat_I4:
  736. case Js::OpCode::Simd128_Splat_U4:
  737. shufOpCode = Js::OpCode::PSHUFD;
  738. movOpCode = Js::OpCode::MOVD;
  739. break;
  740. #if 0
  741. case Js::OpCode::Simd128_Splat_D2:
  742. shufOpCode = Js::OpCode::SHUFPD;
  743. movOpCode = Js::OpCode::MOVSD;
  744. break;
  745. #endif // 0
  746. case Js::OpCode::Simd128_Splat_I8:
  747. case Js::OpCode::Simd128_Splat_U8:
  748. // MOV tempTruncate(bx), src1: truncate the value to 16bit int
  749. // MOVD dst, tempTruncate(bx)
  750. // PUNPCKLWD dst, dst
  751. // PSHUFD dst, dst, 0
  752. tempTruncate = EnregisterIntConst(instr, src1, TyInt16);
  753. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVD, dst, tempTruncate, m_func));
  754. instr->InsertBefore(IR::Instr::New(Js::OpCode::PUNPCKLWD, dst, dst, dst, m_func));
  755. instr->InsertBefore(IR::Instr::New(Js::OpCode::PSHUFD, dst, dst, IR::IntConstOpnd::New(0, TyInt8, m_func, true), m_func));
  756. bSkip = true;
  757. break;
  758. case Js::OpCode::Simd128_Splat_I16:
  759. case Js::OpCode::Simd128_Splat_U16:
  760. // MOV tempTruncate(bx), src1: truncate the value to 8bit int
  761. // MOVD dst, tempTruncate(bx)
  762. // PUNPCKLBW dst, dst
  763. // PUNPCKLWD dst, dst
  764. // PSHUFD dst, dst, 0
  765. tempTruncate = EnregisterIntConst(instr, src1, TyInt8);
  766. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVD, dst, tempTruncate, m_func));
  767. instr->InsertBefore(IR::Instr::New(Js::OpCode::PUNPCKLBW, dst, dst, dst, m_func));
  768. instr->InsertBefore(IR::Instr::New(Js::OpCode::PUNPCKLWD, dst, dst, dst, m_func));
  769. instr->InsertBefore(IR::Instr::New(Js::OpCode::PSHUFD, dst, dst, IR::IntConstOpnd::New(0, TyInt8, m_func, true), m_func));
  770. bSkip = true;
  771. break;
  772. case Js::OpCode::Simd128_Splat_B4:
  773. case Js::OpCode::Simd128_Splat_B8:
  774. case Js::OpCode::Simd128_Splat_B16:
  775. // CMP src1, 0
  776. // JEQ $labelZero
  777. // MOVAPS dst, xmmword ptr[X86_ALL_NEG_ONES]
  778. // JMP $labelDone
  779. // $labelZero:
  780. // XORPS dst, dst
  781. // $labelDone:
  782. //pInstr = IR::Instr::New(Js::OpCode::CMP, src1, IR::IntConstOpnd::New(0, TyInt8, m_func, true), m_func);
  783. //instr->InsertBefore(pInstr);
  784. //Legalize(pInstr);
  785. // cmp src1, 0000h
  786. pInstr = IR::Instr::New(Js::OpCode::CMP, m_func);
  787. pInstr->SetSrc1(src1);
  788. pInstr->SetSrc2(IR::IntConstOpnd::New(0x0000, TyInt32, m_func, true));
  789. instr->InsertBefore(pInstr);
  790. Legalize(pInstr);
  791. //JEQ $labelZero
  792. instr->InsertBefore(IR::BranchInstr::New(Js::OpCode::JEQ, labelZero, m_func));
  793. // MOVAPS dst, xmmword ptr[X86_ALL_NEG_ONES]
  794. pInstr = IR::Instr::New(Js::OpCode::MOVAPS, dst, IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86AllNegOnesAddr(), TySimd128I4, m_func), m_func);
  795. instr->InsertBefore(pInstr);
  796. Legalize(pInstr);
  797. // JMP $labelDone
  798. instr->InsertBefore(IR::BranchInstr::New(Js::OpCode::JMP, labelDone, m_func));
  799. // $labelZero:
  800. instr->InsertBefore(labelZero);
  801. // XORPS dst, dst
  802. instr->InsertBefore(IR::Instr::New(Js::OpCode::XORPS, dst, dst, dst, m_func)); // make dst to be 0
  803. // $labelDone:
  804. instr->InsertBefore(labelDone);
  805. bSkip = true;
  806. break;
  807. default:
  808. Assert(UNREACHED);
  809. }
  810. if (instr->m_opcode == Js::OpCode::Simd128_Splat_F4 && instr->GetSrc1()->IsFloat64())
  811. {
  812. IR::RegOpnd *regOpnd32 = IR::RegOpnd::New(TyFloat32, this->m_func);
  813. // CVTSD2SS regOpnd32.f32, src.f64 -- Convert regOpnd from f64 to f32
  814. instr->InsertBefore(IR::Instr::New(Js::OpCode::CVTSD2SS, regOpnd32, src1, this->m_func));
  815. src1 = regOpnd32;
  816. }
  817. if (!bSkip)
  818. {
  819. instr->InsertBefore(IR::Instr::New(movOpCode, dst, src1, m_func));
  820. instr->InsertBefore(IR::Instr::New(shufOpCode, dst, dst, IR::IntConstOpnd::New(0, TyInt8, m_func, true), m_func));
  821. }
  822. IR::Instr* prevInstr = instr->m_prev;
  823. instr->Remove();
  824. return prevInstr;
  825. }
  826. IR::Instr* LowererMD::Simd128LowerRcp(IR::Instr *instr, bool removeInstr)
  827. {
  828. Js::OpCode opcode = Js::OpCode::DIVPS;
  829. IR::Opnd *dst, *src1;
  830. dst = instr->GetDst();
  831. src1 = instr->GetSrc1();
  832. Assert(dst && dst->IsRegOpnd());
  833. Assert(src1 && src1->IsRegOpnd());
  834. Assert(instr->GetSrc2() == nullptr);
  835. Assert(src1->IsSimd128F4() || src1->IsSimd128I4());
  836. opcode = Js::OpCode::DIVPS;
  837. #if 0
  838. {
  839. Assert(instr->m_opcode == Js::OpCode::Simd128_Rcp_D2 || instr->m_opcode == Js::OpCode::Simd128_RcpSqrt_D2);
  840. Assert(src1->IsSimd128D2());
  841. opcode = Js::OpCode::DIVPD;
  842. x86_allones_mask = (void*)(&X86_ALL_ONES_D2);
  843. }
  844. #endif // 0
  845. IR::RegOpnd* tmp = IR::RegOpnd::New(src1->GetType(), m_func);
  846. IR::Instr* movInstr = IR::Instr::New(Js::OpCode::MOVAPS, tmp, IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86AllOnesF4Addr(), src1->GetType(), m_func), m_func);
  847. instr->InsertBefore(movInstr);
  848. Legalize(movInstr);
  849. instr->InsertBefore(IR::Instr::New(opcode, tmp, tmp, src1, m_func));
  850. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVAPS, dst, tmp, m_func));
  851. if (removeInstr)
  852. {
  853. IR::Instr* prevInstr = instr->m_prev;
  854. instr->Remove();
  855. return prevInstr;
  856. }
  857. return instr;
  858. }
  859. IR::Instr* LowererMD::Simd128LowerSqrt(IR::Instr *instr)
  860. {
  861. Js::OpCode opcode = Js::OpCode::SQRTPS;
  862. IR::Opnd *dst, *src1;
  863. dst = instr->GetDst();
  864. src1 = instr->GetSrc1();
  865. Assert(dst && dst->IsRegOpnd());
  866. Assert(src1 && src1->IsRegOpnd());
  867. Assert(instr->GetSrc2() == nullptr);
  868. opcode = Js::OpCode::SQRTPS;
  869. #if 0
  870. {
  871. Assert(instr->m_opcode == Js::OpCode::Simd128_Sqrt_D2);
  872. opcode = Js::OpCode::SQRTPD;
  873. }
  874. #endif // 0
  875. instr->InsertBefore(IR::Instr::New(opcode, dst, src1, m_func));
  876. IR::Instr* prevInstr = instr->m_prev;
  877. instr->Remove();
  878. return prevInstr;
  879. }
  880. IR::Instr* LowererMD::Simd128LowerRcpSqrt(IR::Instr *instr)
  881. {
  882. Js::OpCode opcode = Js::OpCode::SQRTPS;
  883. Simd128LowerRcp(instr, false);
  884. opcode = Js::OpCode::SQRTPS;
  885. #if 0
  886. else
  887. {
  888. Assert(instr->m_opcode == Js::OpCode::Simd128_RcpSqrt_D2);
  889. opcode = Js::OpCode::SQRTPD;
  890. }
  891. #endif // 0
  892. instr->InsertBefore(IR::Instr::New(opcode, instr->GetDst(), instr->GetDst(), m_func));
  893. IR::Instr* prevInstr = instr->m_prev;
  894. instr->Remove();
  895. return prevInstr;
  896. }
  897. IR::Instr* LowererMD::Simd128LowerSelect(IR::Instr *instr)
  898. {
  899. Assert(instr->m_opcode == Js::OpCode::Simd128_Select_F4 || instr->m_opcode == Js::OpCode::Simd128_Select_I4 /*|| instr->m_opcode == Js::OpCode::Simd128_Select_D2 */||
  900. instr->m_opcode == Js::OpCode::Simd128_Select_I8 || instr->m_opcode == Js::OpCode::Simd128_Select_I16 || instr->m_opcode == Js::OpCode::Simd128_Select_U4 ||
  901. instr->m_opcode == Js::OpCode::Simd128_Select_U8 || instr->m_opcode == Js::OpCode::Simd128_Select_U16 );
  902. IR::Opnd* dst = nullptr;
  903. IR::Opnd* src1 = nullptr;
  904. IR::Opnd* src2 = nullptr;
  905. IR::Opnd* src3 = nullptr;
  906. SList<IR::Opnd*> *args = Simd128GetExtendedArgs(instr);
  907. // The number of src opnds should be exact. Missing opnds means type-error, and we should generate an exception throw instead (or globopt does).
  908. Assert(args->Count() == 4);
  909. dst = args->Pop();
  910. src1 = args->Pop(); // mask
  911. src2 = args->Pop(); // trueValue
  912. src3 = args->Pop(); // falseValue
  913. Assert(dst->IsRegOpnd() && dst->IsSimd128());
  914. Assert(src1->IsRegOpnd() && src1->IsSimd128());
  915. Assert(src2->IsRegOpnd() && src2->IsSimd128());
  916. Assert(src3->IsRegOpnd() && src3->IsSimd128());
  917. IR::RegOpnd *tmp = IR::RegOpnd::New(src1->GetType(), m_func);
  918. IR::Instr *pInstr = nullptr;
  919. // ANDPS tmp1, mask, tvalue
  920. pInstr = IR::Instr::New(Js::OpCode::ANDPS, tmp, src1, src2, m_func);
  921. instr->InsertBefore(pInstr);
  922. Legalize(pInstr);
  923. // ANDPS dst, mask, fvalue
  924. pInstr = IR::Instr::New(Js::OpCode::ANDNPS, dst, src1, src3, m_func);
  925. instr->InsertBefore(pInstr);
  926. Legalize(pInstr);
  927. // ORPS dst, dst, tmp1
  928. pInstr = IR::Instr::New(Js::OpCode::ORPS, dst, dst, tmp, m_func);
  929. instr->InsertBefore(pInstr);
  930. pInstr = instr->m_prev;
  931. instr->Remove();
  932. return pInstr;
  933. }
  934. IR::Instr* LowererMD::Simd128LowerNeg(IR::Instr *instr)
  935. {
  936. IR::Opnd* dst = instr->GetDst();
  937. IR::Opnd* src1 = instr->GetSrc1();
  938. Js::OpCode addOpcode = Js::OpCode::PADDD;
  939. void * allOnes = (void*)&X86_ALL_ONES_I4;
  940. Assert(dst->IsRegOpnd() && dst->IsSimd128());
  941. Assert(src1->IsRegOpnd() && src1->IsSimd128());
  942. Assert(instr->GetSrc2() == nullptr);
  943. switch (instr->m_opcode)
  944. {
  945. case Js::OpCode::Simd128_Neg_I4:
  946. case Js::OpCode::Simd128_Neg_U4:
  947. break;
  948. case Js::OpCode::Simd128_Neg_I8:
  949. case Js::OpCode::Simd128_Neg_U8:
  950. addOpcode = Js::OpCode::PADDW;
  951. allOnes = (void*)&X86_ALL_ONES_I8;
  952. break;
  953. case Js::OpCode::Simd128_Neg_I16:
  954. case Js::OpCode::Simd128_Neg_U16:
  955. addOpcode = Js::OpCode::PADDB;
  956. allOnes = (void*)&X86_ALL_ONES_I16;
  957. break;
  958. default:
  959. Assert(UNREACHED);
  960. }
  961. // MOVAPS dst, src1
  962. IR::Instr *pInstr = IR::Instr::New(Js::OpCode::MOVAPS, dst, src1, m_func);
  963. instr->InsertBefore(pInstr);
  964. // PANDN dst, dst, 0xfff...f
  965. pInstr = IR::Instr::New(Js::OpCode::PANDN, dst, dst, IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86AllNegOnesAddr(), src1->GetType(), m_func), m_func);
  966. instr->InsertBefore(pInstr);
  967. Legalize(pInstr);
  968. // addOpCode dst, dst, {allOnes}
  969. pInstr = IR::Instr::New(addOpcode, dst, dst, IR::MemRefOpnd::New(allOnes, src1->GetType(), m_func), m_func);
  970. instr->InsertBefore(pInstr);
  971. Legalize(pInstr);
  972. pInstr = instr->m_prev;
  973. instr->Remove();
  974. return pInstr;
  975. }
  976. IR::Instr* LowererMD::Simd128LowerMulI4(IR::Instr *instr)
  977. {
  978. Assert(instr->m_opcode == Js::OpCode::Simd128_Mul_I4 || instr->m_opcode == Js::OpCode::Simd128_Mul_U4);
  979. IR::Instr *pInstr;
  980. IR::Opnd* dst = instr->GetDst();
  981. IR::Opnd* src1 = instr->GetSrc1();
  982. IR::Opnd* src2 = instr->GetSrc2();
  983. IR::Opnd* temp1, *temp2, *temp3;
  984. Assert(dst->IsRegOpnd() && dst->IsSimd128());
  985. Assert(src1->IsRegOpnd() && src1->IsSimd128());
  986. Assert(src2->IsRegOpnd() && src2->IsSimd128());
  987. temp1 = IR::RegOpnd::New(src1->GetType(), m_func);
  988. temp2 = IR::RegOpnd::New(src1->GetType(), m_func);
  989. temp3 = IR::RegOpnd::New(src1->GetType(), m_func);
  990. // temp1 = PMULUDQ src1, src2
  991. pInstr = IR::Instr::New(Js::OpCode::PMULUDQ, temp1, src1, src2, m_func);
  992. instr->InsertBefore(pInstr);
  993. //MakeDstEquSrc1(pInstr);
  994. Legalize(pInstr);
  995. // temp2 = PSLRD src1, 0x4
  996. pInstr = IR::Instr::New(Js::OpCode::PSRLDQ, temp2, src1, IR::IntConstOpnd::New(TySize[TyInt32], TyInt8, m_func, true), m_func);
  997. instr->InsertBefore(pInstr);
  998. //MakeDstEquSrc1(pInstr);
  999. Legalize(pInstr);
  1000. // temp3 = PSLRD src2, 0x4
  1001. pInstr = IR::Instr::New(Js::OpCode::PSRLDQ, temp3, src2, IR::IntConstOpnd::New(TySize[TyInt32], TyInt8, m_func, true), m_func);
  1002. instr->InsertBefore(pInstr);
  1003. //MakeDstEquSrc1(pInstr);
  1004. Legalize(pInstr);
  1005. // temp2 = PMULUDQ temp2, temp3
  1006. pInstr = IR::Instr::New(Js::OpCode::PMULUDQ, temp2, temp2, temp3, m_func);
  1007. instr->InsertBefore(pInstr);
  1008. Legalize(pInstr);
  1009. //PSHUFD temp1, temp1, 0x8
  1010. instr->InsertBefore(IR::Instr::New(Js::OpCode::PSHUFD, temp1, temp1, IR::IntConstOpnd::New( 8 /*b00001000*/, TyInt8, m_func, true), m_func));
  1011. //PSHUFD temp2, temp2, 0x8
  1012. instr->InsertBefore(IR::Instr::New(Js::OpCode::PSHUFD, temp2, temp2, IR::IntConstOpnd::New(8 /*b00001000*/, TyInt8, m_func, true), m_func));
  1013. // PUNPCKLDQ dst, temp1, temp2
  1014. pInstr = IR::Instr::New(Js::OpCode::PUNPCKLDQ, dst, temp1, temp2, m_func);
  1015. instr->InsertBefore(pInstr);
  1016. Legalize(pInstr);
  1017. pInstr = instr->m_prev;
  1018. instr->Remove();
  1019. return pInstr;
  1020. }
  1021. IR::Instr* LowererMD::Simd128LowerMulI16(IR::Instr *instr)
  1022. {
  1023. Assert(instr->m_opcode == Js::OpCode::Simd128_Mul_I16 || instr->m_opcode == Js::OpCode::Simd128_Mul_U16);
  1024. IR::Instr *pInstr = nullptr;
  1025. IR::Opnd* dst = instr->GetDst();
  1026. IR::Opnd* src1 = instr->GetSrc1();
  1027. IR::Opnd* src2 = instr->GetSrc2();
  1028. IR::Opnd* temp1, *temp2, *temp3;
  1029. IRType simdType, laneType;
  1030. if (instr->m_opcode == Js::OpCode::Simd128_Mul_I16)
  1031. {
  1032. simdType = TySimd128I16;
  1033. laneType = TyInt8;
  1034. }
  1035. else
  1036. {
  1037. simdType = TySimd128U16;
  1038. laneType = TyUint8;
  1039. }
  1040. Assert(dst->IsRegOpnd() && dst->GetType() == simdType);
  1041. Assert(src1->IsRegOpnd() && src1->GetType() == simdType);
  1042. Assert(src2->IsRegOpnd() && src2->GetType() == simdType);
  1043. temp1 = IR::RegOpnd::New(simdType, m_func);
  1044. temp2 = IR::RegOpnd::New(simdType, m_func);
  1045. temp3 = IR::RegOpnd::New(simdType, m_func);
  1046. // MOVAPS temp1, src1
  1047. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVAPS, temp1, src1, m_func));
  1048. //PMULLW temp1, src2
  1049. pInstr = IR::Instr::New(Js::OpCode::PMULLW, temp1, temp1, src2, m_func);
  1050. instr->InsertBefore(pInstr);
  1051. Legalize(pInstr);
  1052. //PAND temp1 {0x00ff00ff00ff00ff00ff00ff00ff00ff} :To zero out bytes 1,3,5...
  1053. pInstr = IR::Instr::New(Js::OpCode::PAND, temp1, temp1, IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86LowBytesMaskAddr(), simdType, m_func), m_func);
  1054. instr->InsertBefore(pInstr);
  1055. Legalize(pInstr);
  1056. //PSRLW src1, 8
  1057. pInstr = IR::Instr::New(Js::OpCode::PSRLW, temp2, src2, IR::IntConstOpnd::New(8, laneType, m_func, true), m_func);
  1058. instr->InsertBefore(pInstr);
  1059. Legalize(pInstr);
  1060. //PSRLW src2, 8 :upper 8 bits of each word
  1061. pInstr = IR::Instr::New(Js::OpCode::PSRLW, temp3, src1, IR::IntConstOpnd::New(8, laneType, m_func, true), m_func);
  1062. instr->InsertBefore(pInstr);
  1063. Legalize(pInstr);
  1064. //PMULLW src1, src2
  1065. pInstr = IR::Instr::New(Js::OpCode::PMULLW, temp2, temp2, temp3, m_func);
  1066. instr->InsertBefore(pInstr);
  1067. Legalize(pInstr);
  1068. //PSLLW src1, 8 :sets the results bytes 1,3,5..
  1069. pInstr = IR::Instr::New(Js::OpCode::PSLLW, temp2, temp2, IR::IntConstOpnd::New(8, laneType, m_func, true), m_func);
  1070. instr->InsertBefore(pInstr);
  1071. Legalize(pInstr);
  1072. //POR temp1, src1 :OR bytes 0,2,4.. to final result
  1073. pInstr = IR::Instr::New(Js::OpCode::POR, dst, temp1, temp2, m_func);
  1074. instr->InsertBefore(pInstr);
  1075. Legalize(pInstr);
  1076. pInstr = instr->m_prev;
  1077. instr->Remove();
  1078. return pInstr;
  1079. }
  1080. IR::Instr* LowererMD::Simd128LowerShift(IR::Instr *instr)
  1081. {
  1082. IR::Opnd* dst = instr->GetDst();
  1083. IR::Opnd* src1 = instr->GetSrc1();
  1084. IR::Opnd* src2 = instr->GetSrc2();
  1085. Assert(dst->IsRegOpnd() && dst->IsSimd128());
  1086. Assert(src1->IsRegOpnd() && src1->IsSimd128());
  1087. Assert(src2->IsInt32());
  1088. Js::OpCode opcode = Js::OpCode::PSLLD;
  1089. int elementSizeInBytes = 0;
  1090. switch (instr->m_opcode)
  1091. {
  1092. case Js::OpCode::Simd128_ShLtByScalar_I4:
  1093. case Js::OpCode::Simd128_ShLtByScalar_U4: // same as int32x4.ShiftLeftScalar
  1094. opcode = Js::OpCode::PSLLD;
  1095. elementSizeInBytes = 4;
  1096. break;
  1097. case Js::OpCode::Simd128_ShRtByScalar_I4:
  1098. opcode = Js::OpCode::PSRAD;
  1099. elementSizeInBytes = 4;
  1100. break;
  1101. case Js::OpCode::Simd128_ShLtByScalar_I8:
  1102. case Js::OpCode::Simd128_ShLtByScalar_U8: // same as int16x8.ShiftLeftScalar
  1103. opcode = Js::OpCode::PSLLW;
  1104. elementSizeInBytes = 2;
  1105. break;
  1106. case Js::OpCode::Simd128_ShRtByScalar_I8:
  1107. opcode = Js::OpCode::PSRAW;
  1108. elementSizeInBytes = 2;
  1109. break;
  1110. case Js::OpCode::Simd128_ShRtByScalar_U4:
  1111. opcode = Js::OpCode::PSRLD;
  1112. elementSizeInBytes = 4;
  1113. break;
  1114. case Js::OpCode::Simd128_ShRtByScalar_U8:
  1115. opcode = Js::OpCode::PSRLW;
  1116. elementSizeInBytes = 2;
  1117. break;
  1118. case Js::OpCode::Simd128_ShLtByScalar_I16: // composite, int8x16.ShiftLeftScalar
  1119. case Js::OpCode::Simd128_ShRtByScalar_I16: // composite, int8x16.ShiftRightScalar
  1120. case Js::OpCode::Simd128_ShLtByScalar_U16: // same as int8x16.ShiftLeftScalar
  1121. case Js::OpCode::Simd128_ShRtByScalar_U16: // composite, uint8x16.ShiftRightScalar
  1122. elementSizeInBytes = 1;
  1123. break;
  1124. default:
  1125. Assert(UNREACHED);
  1126. }
  1127. IR::Instr *pInstr = nullptr;
  1128. IR::RegOpnd *reg = IR::RegOpnd::New(TyInt32, m_func);
  1129. IR::RegOpnd *reg2 = IR::RegOpnd::New(TyInt32, m_func);
  1130. IR::RegOpnd *tmp0 = IR::RegOpnd::New(src1->GetType(), m_func);
  1131. IR::RegOpnd *tmp1 = IR::RegOpnd::New(src1->GetType(), m_func);
  1132. IR::RegOpnd *tmp2 = IR::RegOpnd::New(src1->GetType(), m_func);
  1133. //Shift amount: The shift amout is masked by [ElementSize] * 8
  1134. //The masked Shift amount is moved to xmm register
  1135. //AND shamt, shmask, shamt
  1136. //MOVD tmp0, shamt
  1137. IR::RegOpnd *shamt = IR::RegOpnd::New(src2->GetType(), m_func);
  1138. // en-register
  1139. IR::Opnd *origShamt = EnregisterIntConst(instr, src2); //unnormalized shift amount
  1140. pInstr = IR::Instr::New(Js::OpCode::AND, shamt, origShamt, IR::IntConstOpnd::New(Js::SIMDUtils::SIMDGetShiftAmountMask(elementSizeInBytes), TyInt32, m_func), m_func); // normalizing by elm width (i.e. shamt % elm_width)
  1141. instr->InsertBefore(pInstr);
  1142. Legalize(pInstr);
  1143. pInstr = IR::Instr::New(Js::OpCode::MOVD, tmp0, shamt, m_func);
  1144. instr->InsertBefore(pInstr);
  1145. if (instr->m_opcode == Js::OpCode::Simd128_ShLtByScalar_I4 || instr->m_opcode == Js::OpCode::Simd128_ShRtByScalar_I4 ||
  1146. instr->m_opcode == Js::OpCode::Simd128_ShLtByScalar_U4 || instr->m_opcode == Js::OpCode::Simd128_ShRtByScalar_U4 ||
  1147. instr->m_opcode == Js::OpCode::Simd128_ShLtByScalar_I8 || instr->m_opcode == Js::OpCode::Simd128_ShRtByScalar_I8 ||
  1148. instr->m_opcode == Js::OpCode::Simd128_ShLtByScalar_U8 || instr->m_opcode == Js::OpCode::Simd128_ShRtByScalar_U8)
  1149. {
  1150. // shiftOpCode dst, src1, tmp0
  1151. pInstr = IR::Instr::New(opcode, dst, src1, tmp0, m_func);
  1152. instr->InsertBefore(pInstr);
  1153. Legalize(pInstr);
  1154. }
  1155. else if (instr->m_opcode == Js::OpCode::Simd128_ShLtByScalar_I16 || instr->m_opcode == Js::OpCode::Simd128_ShLtByScalar_U16)
  1156. {
  1157. // MOVAPS tmp1, src1
  1158. pInstr = IR::Instr::New(Js::OpCode::MOVAPS, tmp1, src1, m_func);
  1159. instr->InsertBefore(pInstr);
  1160. // MOVAPS dst, src1
  1161. pInstr = IR::Instr::New(Js::OpCode::MOVAPS, dst, src1, m_func);
  1162. instr->InsertBefore(pInstr);
  1163. // PAND tmp1, [X86_HIGHBYTES_MASK]
  1164. pInstr = IR::Instr::New(Js::OpCode::PAND, tmp1, tmp1, IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86HighBytesMaskAddr(), TySimd128I4, m_func), m_func);
  1165. instr->InsertBefore(pInstr);
  1166. Legalize(pInstr);
  1167. // PSLLW tmp1, tmp0
  1168. pInstr = IR::Instr::New(Js::OpCode::PSLLW, tmp1, tmp1, tmp0, m_func);
  1169. instr->InsertBefore(pInstr);
  1170. Legalize(pInstr);
  1171. // PSLLW dst, tmp0
  1172. pInstr = IR::Instr::New(Js::OpCode::PSLLW, dst, dst, tmp0, m_func);
  1173. instr->InsertBefore(pInstr);
  1174. Legalize(pInstr);
  1175. // PAND dst, [X86_LOWBYTES_MASK]
  1176. pInstr = IR::Instr::New(Js::OpCode::PAND, dst, dst, IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86LowBytesMaskAddr(), TySimd128I4, m_func), m_func);
  1177. instr->InsertBefore(pInstr);
  1178. Legalize(pInstr);
  1179. // POR dst, tmp1
  1180. pInstr = IR::Instr::New(Js::OpCode::POR, dst, dst, tmp1, m_func);
  1181. instr->InsertBefore(pInstr);
  1182. }
  1183. else if (instr->m_opcode == Js::OpCode::Simd128_ShRtByScalar_I16)
  1184. {
  1185. // MOVAPS tmp1, src1
  1186. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVAPS, tmp1, src1, m_func));
  1187. // MOVAPS dst, src1
  1188. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVAPS, dst, src1, m_func));
  1189. // PSLLW dst, 8
  1190. instr->InsertBefore(IR::Instr::New(Js::OpCode::PSLLW, dst, dst, IR::IntConstOpnd::New(8, TyInt8, m_func), m_func));
  1191. // LEA reg, [shamt + 8]
  1192. IR::IndirOpnd *indirOpnd = IR::IndirOpnd::New(shamt->AsRegOpnd(), +8, TyInt32, m_func);
  1193. instr->InsertBefore(IR::Instr::New(Js::OpCode::LEA, reg, indirOpnd, m_func));
  1194. // MOVD tmp0, reg
  1195. pInstr = IR::Instr::New(Js::OpCode::MOVD, tmp2, reg, m_func);
  1196. instr->InsertBefore(pInstr);
  1197. // PSRAW dst, tmp0
  1198. instr->InsertBefore(IR::Instr::New(Js::OpCode::PSRAW, dst, dst, tmp2, m_func));
  1199. // PAND dst, [X86_LOWBYTES_MASK]
  1200. pInstr = IR::Instr::New(Js::OpCode::PAND, dst, dst, IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86LowBytesMaskAddr(), TySimd128I4, m_func), m_func);
  1201. instr->InsertBefore(pInstr);
  1202. Legalize(pInstr);
  1203. // PSRAW tmp1, tmp0
  1204. instr->InsertBefore(IR::Instr::New(Js::OpCode::PSRAW, tmp1, tmp1, tmp0, m_func));
  1205. // PAND tmp1, [X86_HIGHBYTES_MASK]
  1206. pInstr = IR::Instr::New(Js::OpCode::PAND, tmp1, tmp1, IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86HighBytesMaskAddr(), TySimd128I4, m_func), m_func);
  1207. instr->InsertBefore(pInstr);
  1208. Legalize(pInstr);
  1209. // POR dst, tmp1
  1210. instr->InsertBefore(IR::Instr::New(Js::OpCode::POR, dst, dst, tmp1, m_func));
  1211. }
  1212. else if (instr->m_opcode == Js::OpCode::Simd128_ShRtByScalar_U16)
  1213. {
  1214. IR::RegOpnd * shamtReg = IR::RegOpnd::New(TyInt8, m_func);
  1215. shamtReg->SetReg(LowererMDArch::GetRegShiftCount());
  1216. IR::RegOpnd * tmp = IR::RegOpnd::New(TyInt8, m_func);
  1217. // MOVAPS dst, src1
  1218. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVAPS, dst, src1, m_func));
  1219. // MOV reg2, 0FFh
  1220. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOV, reg2, IR::IntConstOpnd::New(0xFF, TyInt32, m_func), m_func));
  1221. // MOV shamtReg, shamt
  1222. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOV, shamtReg, shamt, m_func));
  1223. // SHR reg2, shamtReg (lower 8 bit)
  1224. instr->InsertBefore(IR::Instr::New(Js::OpCode::SHR, reg2, reg2, shamtReg, m_func));
  1225. // MOV tmp, reg2
  1226. // MOVSX reg2, tmp(TyInt8)
  1227. pInstr = IR::Instr::New(Js::OpCode::MOV, tmp, reg2, m_func);
  1228. instr->InsertBefore(pInstr);
  1229. Legalize(pInstr);
  1230. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVSX, reg2, tmp, m_func));
  1231. IR::RegOpnd *mask = IR::RegOpnd::New(TySimd128I4, m_func);
  1232. // PSRLW dst, mask
  1233. instr->InsertBefore(IR::Instr::New(Js::OpCode::PSRLW, dst, dst, tmp0, m_func));
  1234. // splat (0xFF >> shamt) into mask
  1235. // MOVD mask, reg2
  1236. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVD, mask, reg2, m_func));
  1237. // PUNPCKLBW mask, mask
  1238. pInstr = IR::Instr::New(Js::OpCode::PUNPCKLBW, mask, mask, mask, m_func);
  1239. instr->InsertBefore(pInstr);
  1240. Legalize(pInstr);
  1241. // PUNPCKLWD mask, mask
  1242. pInstr = IR::Instr::New(Js::OpCode::PUNPCKLWD, mask, mask, mask, m_func);
  1243. instr->InsertBefore(pInstr);
  1244. Legalize(pInstr);
  1245. // PSHUFD mask, mask, 0
  1246. instr->InsertBefore(IR::Instr::New(Js::OpCode::PSHUFD, mask, mask, IR::IntConstOpnd::New(0, TyInt8, m_func, true), m_func));
  1247. // PAND dst, mask
  1248. instr->InsertBefore(IR::Instr::New(Js::OpCode::PAND, dst, dst, mask, m_func));
  1249. }
  1250. else
  1251. {
  1252. Assert(UNREACHED);
  1253. }
  1254. pInstr = instr->m_prev;
  1255. instr->Remove();
  1256. return pInstr;
  1257. }
  1258. IR::Instr* LowererMD::SIMD128LowerReplaceLane_8(IR::Instr* instr)
  1259. {
  1260. SList<IR::Opnd*> *args = Simd128GetExtendedArgs(instr);
  1261. int lane = 0;
  1262. IR::Opnd *dst = args->Pop();
  1263. IR::Opnd *src1 = args->Pop();
  1264. IR::Opnd *src2 = args->Pop();
  1265. IR::Opnd *src3 = args->Pop();
  1266. IR::Instr * newInstr = nullptr;
  1267. Assert(dst->IsSimd128() && src1->IsSimd128());
  1268. lane = src2->AsIntConstOpnd()->AsInt32();
  1269. IR::Opnd* laneValue = EnregisterIntConst(instr, src3, TyInt16);
  1270. Assert(instr->m_opcode == Js::OpCode::Simd128_ReplaceLane_I8 || instr->m_opcode == Js::OpCode::Simd128_ReplaceLane_U8 || instr->m_opcode == Js::OpCode::Simd128_ReplaceLane_B8);
  1271. // MOVAPS dst, src1
  1272. newInstr = IR::Instr::New(Js::OpCode::MOVAPS, dst, src1, m_func);
  1273. instr->InsertBefore(newInstr);
  1274. Legalize(newInstr);
  1275. // PINSRW dst, value, index
  1276. newInstr = IR::Instr::New(Js::OpCode::PINSRW, dst, laneValue, IR::IntConstOpnd::New(lane, TyInt8, m_func), m_func);
  1277. instr->InsertBefore(newInstr);
  1278. Legalize(newInstr);
  1279. if (instr->m_opcode == Js::OpCode::Simd128_ReplaceLane_B8) //canonicalizing lanes
  1280. {
  1281. instr = Simd128CanonicalizeToBools(instr, Js::OpCode::PCMPEQW, *dst);
  1282. }
  1283. IR::Instr* prevInstr = instr->m_prev;
  1284. instr->Remove();
  1285. return prevInstr;
  1286. }
  1287. IR::Instr* LowererMD::SIMD128LowerReplaceLane_16(IR::Instr* instr)
  1288. {
  1289. SList<IR::Opnd*> *args = Simd128GetExtendedArgs(instr);
  1290. int lane = 0;
  1291. IR::Opnd *dst = args->Pop();
  1292. IR::Opnd *src1 = args->Pop();
  1293. IR::Opnd *src2 = args->Pop();
  1294. IR::Opnd *src3 = args->Pop();
  1295. IR::Instr * newInstr = nullptr;
  1296. Assert(dst->IsSimd128() && src1->IsSimd128());
  1297. lane = src2->AsIntConstOpnd()->AsInt32();
  1298. Assert(lane >= 0 && lane < 16);
  1299. IR::Opnd* laneValue = EnregisterIntConst(instr, src3, TyInt8);
  1300. intptr_t tempSIMD = m_func->GetThreadContextInfo()->GetSimdTempAreaAddr(0);
  1301. #if DBG
  1302. // using only one SIMD temp
  1303. intptr_t endAddrSIMD = tempSIMD + sizeof(X86SIMDValue);
  1304. #endif
  1305. Assert(instr->m_opcode == Js::OpCode::Simd128_ReplaceLane_I16 || instr->m_opcode == Js::OpCode::Simd128_ReplaceLane_U16 || instr->m_opcode == Js::OpCode::Simd128_ReplaceLane_B16);
  1306. // MOVUPS [temp], src1
  1307. intptr_t address = tempSIMD;
  1308. newInstr = IR::Instr::New(Js::OpCode::MOVUPS, IR::MemRefOpnd::New(address, TySimd128I16, m_func), src1, m_func);
  1309. instr->InsertBefore(newInstr);
  1310. Legalize(newInstr);
  1311. // MOV [temp+offset], laneValue
  1312. address = tempSIMD + lane;
  1313. // check for buffer overrun
  1314. Assert((intptr_t)address < endAddrSIMD);
  1315. newInstr = IR::Instr::New(Js::OpCode::MOV, IR::MemRefOpnd::New(address, TyInt8, m_func), laneValue, m_func);
  1316. instr->InsertBefore(newInstr);
  1317. Legalize(newInstr);
  1318. // MOVUPS dst, [temp]
  1319. address = tempSIMD;
  1320. newInstr = IR::Instr::New(Js::OpCode::MOVUPS, dst, IR::MemRefOpnd::New(address, TySimd128I16, m_func), m_func);
  1321. instr->InsertBefore(newInstr);
  1322. Legalize(newInstr);
  1323. if (instr->m_opcode == Js::OpCode::Simd128_ReplaceLane_B16) //canonicalizing lanes.
  1324. {
  1325. instr = Simd128CanonicalizeToBools(instr, Js::OpCode::PCMPEQB, *dst);
  1326. }
  1327. IR::Instr* prevInstr = instr->m_prev;
  1328. instr->Remove();
  1329. return prevInstr;
  1330. }
  1331. IR::Instr* LowererMD::SIMD128LowerReplaceLane_4(IR::Instr* instr)
  1332. {
  1333. SList<IR::Opnd*> *args = Simd128GetExtendedArgs(instr);
  1334. int lane = 0, byteWidth = 0;
  1335. IR::Opnd *dst = args->Pop();
  1336. IR::Opnd *src1 = args->Pop();
  1337. IR::Opnd *src2 = args->Pop();
  1338. IR::Opnd *src3 = args->Pop();
  1339. Assert(dst->IsSimd128() && src1->IsSimd128());
  1340. IRType type = dst->GetType();
  1341. lane = src2->AsIntConstOpnd()->AsInt32();
  1342. IR::Opnd* laneValue = EnregisterIntConst(instr, src3);
  1343. switch (instr->m_opcode)
  1344. {
  1345. case Js::OpCode::Simd128_ReplaceLane_I4:
  1346. case Js::OpCode::Simd128_ReplaceLane_U4:
  1347. case Js::OpCode::Simd128_ReplaceLane_B4:
  1348. byteWidth = TySize[TyInt32];
  1349. break;
  1350. case Js::OpCode::Simd128_ReplaceLane_F4:
  1351. byteWidth = TySize[TyFloat32];
  1352. break;
  1353. default:
  1354. Assert(UNREACHED);
  1355. }
  1356. // MOVAPS dst, src1
  1357. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVAPS, dst, src1, m_func));
  1358. if (laneValue->GetType() == TyInt32 || laneValue->GetType() == TyUint32)
  1359. {
  1360. IR::RegOpnd *tempReg = IR::RegOpnd::New(TyFloat32, m_func);//mov intval to xmm
  1361. //MOVD
  1362. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVD, tempReg, laneValue, m_func));
  1363. laneValue = tempReg;
  1364. }
  1365. Assert(laneValue->GetType() == TyFloat32);
  1366. if (lane == 0)
  1367. {
  1368. // MOVSS for both TyFloat32 and TyInt32. MOVD zeroes upper bits.
  1369. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVSS, dst, laneValue, m_func));
  1370. }
  1371. else if (lane == 2)
  1372. {
  1373. IR::RegOpnd *tmp = IR::RegOpnd::New(type, m_func);
  1374. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVHLPS, tmp, dst, m_func));
  1375. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVSS, tmp, laneValue, m_func));
  1376. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVLHPS, dst, tmp, m_func));
  1377. }
  1378. else
  1379. {
  1380. Assert(lane == 1 || lane == 3);
  1381. uint8 shufMask = 0xE4; // 11 10 01 00
  1382. shufMask |= lane; // 11 10 01 id
  1383. shufMask &= ~(0x03 << (lane << 1)); // set 2 bits corresponding to lane index to 00
  1384. // SHUFPS dst, dst, shufMask
  1385. instr->InsertBefore(IR::Instr::New(Js::OpCode::SHUFPS, dst, dst, IR::IntConstOpnd::New(shufMask, TyInt8, m_func, true), m_func));
  1386. // MOVSS dst, value
  1387. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVSS, dst, laneValue, m_func));
  1388. // SHUFPS dst, dst, shufMask
  1389. instr->InsertBefore(IR::Instr::New(Js::OpCode::SHUFPS, dst, dst, IR::IntConstOpnd::New(shufMask, TyInt8, m_func, true), m_func));
  1390. }
  1391. if (instr->m_opcode == Js::OpCode::Simd128_ReplaceLane_B4) //Canonicalizing lanes
  1392. {
  1393. instr = Simd128CanonicalizeToBools(instr, Js::OpCode::PCMPEQD, *dst);
  1394. }
  1395. IR::Instr* prevInstr = instr->m_prev;
  1396. instr->Remove();
  1397. return prevInstr;
  1398. }
  1399. /*
  1400. 4 and 2 lane Swizzle.
  1401. */
  1402. IR::Instr* LowererMD::Simd128LowerSwizzle_4(IR::Instr* instr)
  1403. {
  1404. Js::OpCode shufOpcode = Js::OpCode::SHUFPS;
  1405. Js::OpCode irOpcode = instr->m_opcode;
  1406. SList<IR::Opnd*> *args = Simd128GetExtendedArgs(instr);
  1407. IR::Opnd *dst = args->Pop();
  1408. IR::Opnd *srcs[6] = { nullptr, nullptr, nullptr, nullptr, nullptr, nullptr };
  1409. int i = 0;
  1410. while (!args->Empty() && i < 6)
  1411. {
  1412. srcs[i++] = args->Pop();
  1413. }
  1414. int8 shufMask = 0;
  1415. int lane0 = 0, lane1 = 0, lane2 = 0, lane3 = 0;
  1416. IR::Instr *pInstr = instr->m_prev;
  1417. Assert(dst->IsSimd128() && srcs[0] && srcs[0]->IsSimd128());
  1418. // globOpt will type-spec if all lane indices are constants, and within range constraints to match a single SSE instruction
  1419. Assert(irOpcode == Js::OpCode::Simd128_Swizzle_I4 || irOpcode == Js::OpCode::Simd128_Swizzle_U4 || irOpcode == Js::OpCode::Simd128_Swizzle_F4 /*|| irOpcode == Js::OpCode::Simd128_Swizzle_D2*/);
  1420. AssertMsg(srcs[1] && srcs[1]->IsIntConstOpnd() &&
  1421. srcs[2] && srcs[2]->IsIntConstOpnd() &&
  1422. (/*irOpcode == Js::OpCode::Simd128_Swizzle_D2 || */(srcs[3] && srcs[3]->IsIntConstOpnd())) &&
  1423. (/*irOpcode == Js::OpCode::Simd128_Swizzle_D2 || */(srcs[4] && srcs[4]->IsIntConstOpnd())), "Type-specialized swizzle is supported only with constant lane indices");
  1424. #if 0
  1425. if (irOpcode == Js::OpCode::Simd128_Swizzle_D2)
  1426. {
  1427. lane0 = srcs[1]->AsIntConstOpnd()->AsInt32();
  1428. lane1 = srcs[2]->AsIntConstOpnd()->AsInt32();
  1429. Assert(lane0 >= 0 && lane0 < 2);
  1430. Assert(lane1 >= 0 && lane1 < 2);
  1431. shufMask = (int8)((lane1 << 1) | lane0);
  1432. shufOpcode = Js::OpCode::SHUFPD;
  1433. }
  1434. #endif // 0
  1435. if (irOpcode == Js::OpCode::Simd128_Swizzle_I4 || irOpcode == Js::OpCode::Simd128_Swizzle_U4)
  1436. {
  1437. shufOpcode = Js::OpCode::PSHUFD;
  1438. }
  1439. AnalysisAssert(srcs[3] != nullptr && srcs[4] != nullptr);
  1440. lane0 = srcs[1]->AsIntConstOpnd()->AsInt32();
  1441. lane1 = srcs[2]->AsIntConstOpnd()->AsInt32();
  1442. lane2 = srcs[3]->AsIntConstOpnd()->AsInt32();
  1443. lane3 = srcs[4]->AsIntConstOpnd()->AsInt32();
  1444. Assert(lane1 >= 0 && lane1 < 4);
  1445. Assert(lane2 >= 0 && lane2 < 4);
  1446. Assert(lane2 >= 0 && lane2 < 4);
  1447. Assert(lane3 >= 0 && lane3 < 4);
  1448. shufMask = (int8)((lane3 << 6) | (lane2 << 4) | (lane1 << 2) | lane0);
  1449. instr->m_opcode = shufOpcode;
  1450. instr->SetDst(dst);
  1451. // MOVAPS dst, src1
  1452. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVAPS, dst, srcs[0], m_func));
  1453. // SHUF dst, dst, imm8
  1454. instr->SetSrc1(dst);
  1455. instr->SetSrc2(IR::IntConstOpnd::New((IntConstType)shufMask, TyInt8, m_func, true));
  1456. return pInstr;
  1457. }
  1458. /*
  1459. 4 lane shuffle. Handles arbitrary lane values.
  1460. */
  1461. IR::Instr* LowererMD::Simd128LowerShuffle_4(IR::Instr* instr)
  1462. {
  1463. Js::OpCode irOpcode = instr->m_opcode;
  1464. SList<IR::Opnd*> *args = Simd128GetExtendedArgs(instr);
  1465. IR::Opnd *dst = args->Pop();
  1466. IR::Opnd *srcs[6] = { nullptr, nullptr, nullptr, nullptr, nullptr, nullptr };
  1467. int j = 0;
  1468. while (!args->Empty() && j < 6)
  1469. {
  1470. srcs[j++] = args->Pop();
  1471. }
  1472. uint8 lanes[4], lanesSrc[4];
  1473. uint fromSrc1, fromSrc2;
  1474. IR::Instr *pInstr = instr->m_prev;
  1475. Assert(dst->IsSimd128() && srcs[0] && srcs[0]->IsSimd128() && srcs[1] && srcs[1]->IsSimd128());
  1476. Assert(irOpcode == Js::OpCode::Simd128_Shuffle_I4 || irOpcode == Js::OpCode::Simd128_Shuffle_U4 || irOpcode == Js::OpCode::Simd128_Shuffle_F4);
  1477. // globOpt will type-spec if all lane indices are constants, and within range constraints to match a single SSE instruction
  1478. AssertMsg(srcs[2] && srcs[2]->IsIntConstOpnd() &&
  1479. srcs[3] && srcs[3]->IsIntConstOpnd() &&
  1480. srcs[4] && srcs[4]->IsIntConstOpnd() &&
  1481. srcs[5] && srcs[5]->IsIntConstOpnd(), "Type-specialized shuffle is supported only with constant lane indices");
  1482. lanes[0] = (uint8) srcs[2]->AsIntConstOpnd()->AsInt32();
  1483. lanes[1] = (uint8) srcs[3]->AsIntConstOpnd()->AsInt32();
  1484. lanes[2] = (uint8) srcs[4]->AsIntConstOpnd()->AsInt32();
  1485. lanes[3] = (uint8) srcs[5]->AsIntConstOpnd()->AsInt32();
  1486. Assert(lanes[0] >= 0 && lanes[0] < 8);
  1487. Assert(lanes[1] >= 0 && lanes[1] < 8);
  1488. Assert(lanes[2] >= 0 && lanes[2] < 8);
  1489. Assert(lanes[3] >= 0 && lanes[3] < 8);
  1490. CheckShuffleLanes_4(lanes, lanesSrc, &fromSrc1, &fromSrc2);
  1491. Assert(fromSrc1 + fromSrc2 == 4);
  1492. if (fromSrc1 == 4 || fromSrc2 == 4)
  1493. {
  1494. // can be done with a swizzle
  1495. IR::Opnd *srcOpnd = fromSrc1 == 4 ? srcs[0] : srcs[1];
  1496. InsertShufps(lanes, dst, srcOpnd, srcOpnd, instr);
  1497. }
  1498. else if (fromSrc1 == 2)
  1499. {
  1500. if (lanes[0] < 4 && lanes[1] < 4)
  1501. {
  1502. // x86 friendly shuffle
  1503. Assert(lanes[2] >= 4 && lanes[3] >= 4);
  1504. InsertShufps(lanes, dst, srcs[0], srcs[1], instr);
  1505. }
  1506. else
  1507. {
  1508. // arbitrary shuffle with 2 lanes from each src
  1509. uint8 ordLanes[4], reArrLanes[4];
  1510. // order lanes based on which src they come from
  1511. // compute re-arrangement mask
  1512. for (uint8 i = 0, j1 = 0, j2 = 2; i < 4; i++)
  1513. {
  1514. if (lanesSrc[i] == 1 && j1 < 4)
  1515. {
  1516. ordLanes[j1] = lanes[i];
  1517. reArrLanes[i] = j1;
  1518. j1++;
  1519. }
  1520. else if(j2 < 4)
  1521. {
  1522. Assert(lanesSrc[i] == 2);
  1523. ordLanes[j2] = lanes[i];
  1524. reArrLanes[i] = j2;
  1525. j2++;
  1526. }
  1527. }
  1528. IR::RegOpnd *temp = IR::RegOpnd::New(dst->GetType(), m_func);
  1529. InsertShufps(ordLanes, temp, srcs[0], srcs[1], instr);
  1530. InsertShufps(reArrLanes, dst, temp, temp, instr);
  1531. }
  1532. }
  1533. else if (fromSrc1 == 3 || fromSrc2 == 3)
  1534. {
  1535. // shuffle with 3 lanes from one src, one from another
  1536. IR::Instr *newInstr;
  1537. IR::Opnd * majSrc, *minSrc;
  1538. IR::RegOpnd *temp1 = IR::RegOpnd::New(dst->GetType(), m_func);
  1539. IR::RegOpnd *temp2 = IR::RegOpnd::New(dst->GetType(), m_func);
  1540. IR::RegOpnd *temp3 = IR::RegOpnd::New(dst->GetType(), m_func);
  1541. uint8 minorityLane = 0, maxLaneValue;
  1542. majSrc = fromSrc1 == 3 ? srcs[0] : srcs[1];
  1543. minSrc = fromSrc1 == 3 ? srcs[1] : srcs[0];
  1544. Assert(majSrc != minSrc);
  1545. // Algorithm:
  1546. // SHUFPS temp1, majSrc, lanes
  1547. // SHUFPS temp2, minSrc, lanes
  1548. // MOVUPS temp3, [minorityLane mask]
  1549. // ANDPS temp2, temp3 // mask all lanes but minorityLane
  1550. // ANDNPS temp3, temp1 // zero minorityLane
  1551. // ORPS dst, temp2, temp3
  1552. // find minorityLane to mask
  1553. maxLaneValue = minSrc == srcs[0] ? 4 : 8;
  1554. for (uint8 i = 0; i < 4; i++)
  1555. {
  1556. if (lanes[i] >= (maxLaneValue - 4) && lanes[i] < maxLaneValue)
  1557. {
  1558. minorityLane = i;
  1559. break;
  1560. }
  1561. }
  1562. IR::MemRefOpnd * laneMask = IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86FourLanesMaskAddr(minorityLane), dst->GetType(), m_func);
  1563. InsertShufps(lanes, temp1, majSrc, majSrc, instr);
  1564. InsertShufps(lanes, temp2, minSrc, minSrc, instr);
  1565. newInstr = IR::Instr::New(Js::OpCode::MOVUPS, temp3, laneMask, m_func);
  1566. instr->InsertBefore(newInstr);
  1567. Legalize(newInstr);
  1568. newInstr = IR::Instr::New(Js::OpCode::ANDPS, temp2, temp2, temp3, m_func);
  1569. instr->InsertBefore(newInstr);
  1570. Legalize(newInstr);
  1571. newInstr = IR::Instr::New(Js::OpCode::ANDNPS, temp3, temp3, temp1, m_func);
  1572. instr->InsertBefore(newInstr);
  1573. Legalize(newInstr);
  1574. newInstr = IR::Instr::New(Js::OpCode::ORPS, dst, temp2, temp3, m_func);
  1575. instr->InsertBefore(newInstr);
  1576. Legalize(newInstr);
  1577. }
  1578. instr->Remove();
  1579. return pInstr;
  1580. }
  1581. // 8 and 16 lane shuffle with memory temps
  1582. IR::Instr* LowererMD::Simd128LowerShuffle(IR::Instr* instr)
  1583. {
  1584. Js::OpCode irOpcode = instr->m_opcode;
  1585. IR::Instr *pInstr = instr->m_prev, *newInstr = nullptr;
  1586. SList<IR::Opnd*> *args = nullptr;
  1587. IR::Opnd *dst = nullptr;
  1588. IR::Opnd *src1 = nullptr, *src2 = nullptr;
  1589. uint8 lanes[16], laneCount = 0, scale = 1;
  1590. bool isShuffle = false;
  1591. IRType laneType = TyInt16;
  1592. intptr_t temp1SIMD = m_func->GetThreadContextInfo()->GetSimdTempAreaAddr(0);
  1593. intptr_t temp2SIMD = m_func->GetThreadContextInfo()->GetSimdTempAreaAddr(1);
  1594. intptr_t dstSIMD = m_func->GetThreadContextInfo()->GetSimdTempAreaAddr(2);
  1595. #if DBG
  1596. intptr_t endAddrSIMD = (intptr_t)(temp1SIMD + sizeof(X86SIMDValue) * SIMD_TEMP_SIZE);
  1597. #endif
  1598. void *address = nullptr;
  1599. args = Simd128GetExtendedArgs(instr);
  1600. switch (irOpcode)
  1601. {
  1602. case Js::OpCode::Simd128_Swizzle_I8:
  1603. case Js::OpCode::Simd128_Swizzle_U8:
  1604. Assert(args->Count() == 10);
  1605. laneCount = 8;
  1606. laneType = TyInt16;
  1607. isShuffle = false;
  1608. scale = 2;
  1609. break;
  1610. case Js::OpCode::Simd128_Swizzle_I16:
  1611. case Js::OpCode::Simd128_Swizzle_U16:
  1612. Assert(args->Count() == 18);
  1613. laneCount = 16;
  1614. laneType = TyInt8;
  1615. isShuffle = false;
  1616. scale = 1;
  1617. break;
  1618. case Js::OpCode::Simd128_Shuffle_I8:
  1619. case Js::OpCode::Simd128_Shuffle_U8:
  1620. Assert(args->Count() == 11);
  1621. laneCount = 8;
  1622. isShuffle = true;
  1623. laneType = TyUint16;
  1624. scale = 2;
  1625. break;
  1626. case Js::OpCode::Simd128_Shuffle_I16:
  1627. case Js::OpCode::Simd128_Shuffle_U16:
  1628. Assert(args->Count() == 19);
  1629. laneCount = 16;
  1630. isShuffle = true;
  1631. laneType = TyUint8;
  1632. scale = 1;
  1633. break;
  1634. default:
  1635. Assert(UNREACHED);
  1636. }
  1637. dst = args->Pop();
  1638. src1 = args->Pop();
  1639. if (isShuffle)
  1640. {
  1641. src2 = args->Pop();
  1642. }
  1643. Assert(dst->IsSimd128() && src1 && src1->IsSimd128() && (!isShuffle|| src2->IsSimd128()));
  1644. for (uint i = 0; i < laneCount; i++)
  1645. {
  1646. IR::Opnd * laneOpnd = args->Pop();
  1647. Assert(laneOpnd->IsIntConstOpnd());
  1648. lanes[i] = (uint8)laneOpnd->AsIntConstOpnd()->AsInt32();
  1649. }
  1650. // MOVUPS [temp], src1
  1651. newInstr = IR::Instr::New(Js::OpCode::MOVUPS, IR::MemRefOpnd::New((void*)temp1SIMD, TySimd128I16, m_func), src1, m_func);
  1652. instr->InsertBefore(newInstr);
  1653. Legalize(newInstr);
  1654. if (isShuffle)
  1655. {
  1656. // MOVUPS [temp+16], src2
  1657. newInstr = IR::Instr::New(Js::OpCode::MOVUPS, IR::MemRefOpnd::New((void*)(temp2SIMD), TySimd128I16, m_func), src2, m_func);
  1658. instr->InsertBefore(newInstr);
  1659. Legalize(newInstr);
  1660. }
  1661. for (uint i = 0; i < laneCount; i++)
  1662. {
  1663. //. MOV tmp, [temp1SIMD + laneValue*scale]
  1664. IR::RegOpnd *tmp = IR::RegOpnd::New(laneType, m_func);
  1665. address = (void*)(temp1SIMD + lanes[i] * scale);
  1666. Assert((intptr_t)address + (intptr_t)scale <= (intptr_t)dstSIMD);
  1667. newInstr = IR::Instr::New(Js::OpCode::MOV, tmp, IR::MemRefOpnd::New(address, laneType, m_func), m_func);
  1668. instr->InsertBefore(newInstr);
  1669. Legalize(newInstr);
  1670. //. MOV [dstSIMD + i*scale], tmp
  1671. address = (void*)(dstSIMD + i * scale);
  1672. Assert((intptr_t)address + (intptr_t) scale <= endAddrSIMD);
  1673. newInstr = IR::Instr::New(Js::OpCode::MOV,IR::MemRefOpnd::New(address, laneType, m_func), tmp, m_func);
  1674. instr->InsertBefore(newInstr);
  1675. Legalize(newInstr);
  1676. }
  1677. // MOVUPS dst, [dstSIMD]
  1678. newInstr = IR::Instr::New(Js::OpCode::MOVUPS, dst, IR::MemRefOpnd::New((void*)dstSIMD, TySimd128I16, m_func), m_func);
  1679. instr->InsertBefore(newInstr);
  1680. Legalize(newInstr);
  1681. instr->Remove();
  1682. return pInstr;
  1683. }
  1684. IR::Instr* LowererMD::Simd128LowerNotEqual(IR::Instr* instr)
  1685. {
  1686. Assert(instr->m_opcode == Js::OpCode::Simd128_Neq_I4 || instr->m_opcode == Js::OpCode::Simd128_Neq_I8 ||
  1687. instr->m_opcode == Js::OpCode::Simd128_Neq_I16 || instr->m_opcode == Js::OpCode::Simd128_Neq_U4 ||
  1688. instr->m_opcode == Js::OpCode::Simd128_Neq_U8 || instr->m_opcode == Js::OpCode::Simd128_Neq_U16);
  1689. IR::Instr *pInstr;
  1690. IR::Opnd* dst = instr->GetDst();
  1691. IR::Opnd* src1 = instr->GetSrc1();
  1692. IR::Opnd* src2 = instr->GetSrc2();
  1693. Assert(dst->IsRegOpnd() && (dst->IsSimd128B4() || dst->IsSimd128B8() || dst->IsSimd128B16()));
  1694. Assert(src1->IsRegOpnd() && src1->IsSimd128());
  1695. Assert(src2->IsRegOpnd() && src2->IsSimd128());
  1696. Js::OpCode cmpOpcode = Js::OpCode::PCMPEQD;
  1697. if (instr->m_opcode == Js::OpCode::Simd128_Neq_I8 || instr->m_opcode == Js::OpCode::Simd128_Neq_U8)
  1698. {
  1699. cmpOpcode = Js::OpCode::PCMPEQW;
  1700. }
  1701. else if (instr->m_opcode == Js::OpCode::Simd128_Neq_I16 || instr->m_opcode == Js::OpCode::Simd128_Neq_U16)
  1702. {
  1703. cmpOpcode = Js::OpCode::PCMPEQB;
  1704. }
  1705. // dst = PCMPEQD src1, src2
  1706. pInstr = IR::Instr::New(cmpOpcode, dst, src1, src2, m_func);
  1707. instr->InsertBefore(pInstr);
  1708. //MakeDstEquSrc1(pInstr);
  1709. Legalize(pInstr);
  1710. // dst = PANDN dst, X86_ALL_NEG_ONES
  1711. pInstr = IR::Instr::New(Js::OpCode::PANDN, dst, dst, IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86AllNegOnesAddr(), TySimd128I4, m_func), m_func);
  1712. instr->InsertBefore(pInstr);
  1713. //MakeDstEquSrc1(pInstr);
  1714. Legalize(pInstr);
  1715. pInstr = instr->m_prev;
  1716. instr->Remove();
  1717. return pInstr;
  1718. }
  1719. IR::Instr* LowererMD::Simd128LowerLessThan(IR::Instr* instr)
  1720. {
  1721. Assert(instr->m_opcode == Js::OpCode::Simd128_Lt_U4 || instr->m_opcode == Js::OpCode::Simd128_Lt_U8 || instr->m_opcode == Js::OpCode::Simd128_Lt_U16 ||
  1722. instr->m_opcode == Js::OpCode::Simd128_GtEq_U4 || instr->m_opcode == Js::OpCode::Simd128_GtEq_U8 || instr->m_opcode == Js::OpCode::Simd128_GtEq_U16);
  1723. IR::Instr *pInstr;
  1724. IR::Opnd* dst = instr->GetDst();
  1725. IR::Opnd* src1 = instr->GetSrc1();
  1726. IR::Opnd* src2 = instr->GetSrc2();
  1727. Assert(dst->IsRegOpnd() && (dst->IsSimd128B4() || dst->IsSimd128B8() || dst->IsSimd128B16()));
  1728. Assert(src1->IsRegOpnd() && src1->IsSimd128());
  1729. Assert(src2->IsRegOpnd() && src2->IsSimd128());
  1730. IR::RegOpnd* tmpa = IR::RegOpnd::New(src1->GetType(), m_func);
  1731. IR::RegOpnd* tmpb = IR::RegOpnd::New(src1->GetType(), m_func);
  1732. IR::MemRefOpnd* signBits = IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86DoubleWordSignBitsAddr(), TySimd128I4, m_func);
  1733. IR::RegOpnd * mask = IR::RegOpnd::New(TySimd128I4, m_func);
  1734. Js::OpCode cmpOpcode = Js::OpCode::PCMPGTD;
  1735. if (instr->m_opcode == Js::OpCode::Simd128_Lt_U8 || instr->m_opcode == Js::OpCode::Simd128_GtEq_U8)
  1736. {
  1737. cmpOpcode = Js::OpCode::PCMPGTW;
  1738. signBits = IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86WordSignBitsAddr(), TySimd128I4, m_func);
  1739. }
  1740. else if (instr->m_opcode == Js::OpCode::Simd128_Lt_U16 || instr->m_opcode == Js::OpCode::Simd128_GtEq_U16)
  1741. {
  1742. cmpOpcode = Js::OpCode::PCMPGTB;
  1743. signBits = IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86ByteSignBitsAddr(), TySimd128I4, m_func);
  1744. }
  1745. // MOVUPS mask, [signBits]
  1746. pInstr = IR::Instr::New(Js::OpCode::MOVUPS, mask, signBits, m_func);
  1747. instr->InsertBefore(pInstr);
  1748. Legalize(pInstr);
  1749. // tmpa = PXOR src1, signBits
  1750. pInstr = IR::Instr::New(Js::OpCode::PXOR, tmpa, src1, mask, m_func);
  1751. instr->InsertBefore(pInstr);
  1752. Legalize(pInstr);
  1753. // tmpb = PXOR src2, signBits
  1754. pInstr = IR::Instr::New(Js::OpCode::PXOR, tmpb, src2, mask, m_func);
  1755. instr->InsertBefore(pInstr);
  1756. Legalize(pInstr);
  1757. // dst = cmpOpCode tmpb, tmpa (Less than, swapped opnds)
  1758. pInstr = IR::Instr::New(cmpOpcode, dst, tmpb, tmpa, m_func);
  1759. instr->InsertBefore(pInstr);
  1760. Legalize(pInstr);
  1761. if (instr->m_opcode == Js::OpCode::Simd128_GtEq_U4 || instr->m_opcode == Js::OpCode::Simd128_GtEq_U8 || instr->m_opcode == Js::OpCode::Simd128_GtEq_U16)
  1762. {
  1763. // for SIMD unsigned int, greaterThanOrEqual == lessThan + Not
  1764. // dst = PANDN dst, X86_ALL_NEG_ONES
  1765. // MOVUPS mask, [allNegOnes]
  1766. pInstr = IR::Instr::New(Js::OpCode::PANDN, dst, dst, IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86AllNegOnesAddr(), TySimd128I4, m_func), m_func);
  1767. instr->InsertBefore(pInstr);
  1768. Legalize(pInstr);
  1769. }
  1770. pInstr = instr->m_prev;
  1771. instr->Remove();
  1772. return pInstr;
  1773. }
  1774. IR::Instr* LowererMD::Simd128LowerLessThanOrEqual(IR::Instr* instr)
  1775. {
  1776. Assert(instr->m_opcode == Js::OpCode::Simd128_LtEq_I4 || instr->m_opcode == Js::OpCode::Simd128_LtEq_I8 || instr->m_opcode == Js::OpCode::Simd128_LtEq_I16 ||
  1777. instr->m_opcode == Js::OpCode::Simd128_LtEq_U4 || instr->m_opcode == Js::OpCode::Simd128_LtEq_U8 || instr->m_opcode == Js::OpCode::Simd128_LtEq_U16 ||
  1778. instr->m_opcode == Js::OpCode::Simd128_Gt_U4 || instr->m_opcode == Js::OpCode::Simd128_Gt_U8 || instr->m_opcode == Js::OpCode::Simd128_Gt_U16);
  1779. IR::Instr *pInstr;
  1780. IR::Opnd* dst = instr->GetDst();
  1781. IR::Opnd* src1 = instr->GetSrc1();
  1782. IR::Opnd* src2 = instr->GetSrc2();
  1783. Assert(dst->IsRegOpnd() && (dst->IsSimd128B4() || dst->IsSimd128B8() || dst->IsSimd128B16()));
  1784. Assert(src1->IsRegOpnd() && src1->IsSimd128());
  1785. Assert(src2->IsRegOpnd() && src2->IsSimd128());
  1786. IR::RegOpnd* tmpa = IR::RegOpnd::New(src1->GetType(), m_func);
  1787. IR::RegOpnd* tmpb = IR::RegOpnd::New(src1->GetType(), m_func);
  1788. Js::OpCode cmpOpcode = Js::OpCode::PCMPGTD;
  1789. Js::OpCode eqpOpcode = Js::OpCode::PCMPEQD;
  1790. if (instr->m_opcode == Js::OpCode::Simd128_LtEq_I8 || instr->m_opcode == Js::OpCode::Simd128_LtEq_U8 || instr->m_opcode == Js::OpCode::Simd128_Gt_U8)
  1791. {
  1792. cmpOpcode = Js::OpCode::PCMPGTW;
  1793. eqpOpcode = Js::OpCode::PCMPEQW;
  1794. }
  1795. else if (instr->m_opcode == Js::OpCode::Simd128_LtEq_I16 || instr->m_opcode == Js::OpCode::Simd128_LtEq_U16 || instr->m_opcode == Js::OpCode::Simd128_Gt_U16)
  1796. {
  1797. cmpOpcode = Js::OpCode::PCMPGTB;
  1798. eqpOpcode = Js::OpCode::PCMPEQB;
  1799. }
  1800. if (instr->m_opcode == Js::OpCode::Simd128_LtEq_I4)
  1801. {
  1802. // dst = pcmpgtd src1, src2
  1803. pInstr = IR::Instr::New(Js::OpCode::PCMPGTD, dst, src1, src2, m_func);
  1804. instr->InsertBefore(pInstr);
  1805. Legalize(pInstr);
  1806. // dst = pandn dst, xmmword ptr[X86_ALL_NEG_ONES]
  1807. pInstr = IR::Instr::New(Js::OpCode::PANDN, dst, dst, IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86AllNegOnesAddr(), TySimd128I4, m_func), m_func);
  1808. instr->InsertBefore(pInstr);
  1809. Legalize(pInstr);
  1810. }
  1811. else if (instr->m_opcode == Js::OpCode::Simd128_LtEq_I8 || instr->m_opcode == Js::OpCode::Simd128_LtEq_I16)
  1812. {
  1813. // tmpa = pcmpgtw src2, src1 (src1 < src2?) [pcmpgtb]
  1814. pInstr = IR::Instr::New(cmpOpcode, tmpa, src2, src1, m_func);
  1815. instr->InsertBefore(pInstr);
  1816. Legalize(pInstr);
  1817. // tmpb = pcmpeqw src1, src2 [pcmpeqb]
  1818. pInstr = IR::Instr::New(eqpOpcode, tmpb, src1, src2, m_func);
  1819. instr->InsertBefore(pInstr);
  1820. Legalize(pInstr);
  1821. // dst = por tmpa, tmpb
  1822. pInstr = IR::Instr::New(Js::OpCode::POR, dst, tmpa, tmpb, m_func);
  1823. instr->InsertBefore(pInstr);
  1824. Legalize(pInstr);
  1825. }
  1826. else if (instr->m_opcode == Js::OpCode::Simd128_LtEq_U4 || instr->m_opcode == Js::OpCode::Simd128_LtEq_U8 || instr->m_opcode == Js::OpCode::Simd128_LtEq_U16 ||
  1827. instr->m_opcode == Js::OpCode::Simd128_Gt_U4 || instr->m_opcode == Js::OpCode::Simd128_Gt_U8 || instr->m_opcode == Js::OpCode::Simd128_Gt_U16)
  1828. {
  1829. IR::MemRefOpnd* signBits = IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86DoubleWordSignBitsAddr(), TySimd128I4, m_func);
  1830. IR::RegOpnd * mask = IR::RegOpnd::New(TySimd128I4, m_func);
  1831. if (instr->m_opcode == Js::OpCode::Simd128_LtEq_U8 || instr->m_opcode == Js::OpCode::Simd128_Gt_U8)
  1832. {
  1833. signBits = IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86WordSignBitsAddr(), TySimd128I4, m_func);
  1834. }
  1835. else if (instr->m_opcode == Js::OpCode::Simd128_LtEq_U16 || instr->m_opcode == Js::OpCode::Simd128_Gt_U16)
  1836. {
  1837. signBits = IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86ByteSignBitsAddr(), TySimd128I4, m_func);
  1838. }
  1839. // MOVUPS mask, [signBits]
  1840. pInstr = IR::Instr::New(Js::OpCode::MOVUPS, mask, signBits, m_func);
  1841. instr->InsertBefore(pInstr);
  1842. Legalize(pInstr);
  1843. // tmpa = PXOR src1, mask
  1844. pInstr = IR::Instr::New(Js::OpCode::PXOR, tmpa, src1, mask, m_func);
  1845. instr->InsertBefore(pInstr);
  1846. Legalize(pInstr);
  1847. // tmpb = PXOR src2, signBits
  1848. pInstr = IR::Instr::New(Js::OpCode::PXOR, tmpb, src2, mask, m_func);
  1849. instr->InsertBefore(pInstr);
  1850. Legalize(pInstr);
  1851. // dst = cmpOpCode tmpb, tmpa
  1852. pInstr = IR::Instr::New(cmpOpcode, dst, tmpb, tmpa, m_func);
  1853. instr->InsertBefore(pInstr);
  1854. Legalize(pInstr);
  1855. // tmpa = pcmpeqd tmpa, tmpb
  1856. pInstr = IR::Instr::New(eqpOpcode, tmpa, tmpa, tmpb, m_func);
  1857. instr->InsertBefore(pInstr);
  1858. Legalize(pInstr);
  1859. // dst = por dst, tmpa
  1860. pInstr = IR::Instr::New(Js::OpCode::POR, dst, dst, tmpa, m_func);
  1861. instr->InsertBefore(pInstr);
  1862. Legalize(pInstr);
  1863. if (instr->m_opcode == Js::OpCode::Simd128_Gt_U4 || instr->m_opcode == Js::OpCode::Simd128_Gt_U8 || instr->m_opcode == Js::OpCode::Simd128_Gt_U16)
  1864. { // for SIMD unsigned int, greaterThan == lessThanOrEqual + Not
  1865. // dst = PANDN dst, X86_ALL_NEG_ONES
  1866. pInstr = IR::Instr::New(Js::OpCode::PANDN, dst, dst, IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86AllNegOnesAddr(), TySimd128I4, m_func), m_func);
  1867. instr->InsertBefore(pInstr);
  1868. Legalize(pInstr);
  1869. }
  1870. }
  1871. pInstr = instr->m_prev;
  1872. instr->Remove();
  1873. return pInstr;
  1874. }
  1875. IR::Instr* LowererMD::Simd128LowerGreaterThanOrEqual(IR::Instr* instr)
  1876. {
  1877. Assert(instr->m_opcode == Js::OpCode::Simd128_GtEq_I4 || instr->m_opcode == Js::OpCode::Simd128_GtEq_I8 || instr->m_opcode == Js::OpCode::Simd128_GtEq_I16);
  1878. IR::Instr *pInstr;
  1879. IR::Opnd* dst = instr->GetDst();
  1880. IR::Opnd* src1 = instr->GetSrc1();
  1881. IR::Opnd* src2 = instr->GetSrc2();
  1882. Assert(dst->IsRegOpnd() && (dst->IsSimd128B4() || dst->IsSimd128B8() || dst->IsSimd128B16()));
  1883. Assert(src1->IsRegOpnd() && src1->IsSimd128());
  1884. Assert(src2->IsRegOpnd() && src2->IsSimd128());
  1885. if (instr->m_opcode == Js::OpCode::Simd128_GtEq_I4)
  1886. {
  1887. // dst = pcmpgtd src2, src1
  1888. pInstr = IR::Instr::New(Js::OpCode::PCMPGTD, dst, src2, src1, m_func);
  1889. instr->InsertBefore(pInstr);
  1890. Legalize(pInstr);
  1891. // dst = pandn dst, xmmword ptr[X86_ALL_NEG_ONES]
  1892. pInstr = IR::Instr::New(Js::OpCode::PANDN, dst, dst, IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86AllNegOnesAddr(), TySimd128I4, m_func), m_func);
  1893. instr->InsertBefore(pInstr);
  1894. Legalize(pInstr);
  1895. }
  1896. else if (instr->m_opcode == Js::OpCode::Simd128_GtEq_I8 || instr->m_opcode == Js::OpCode::Simd128_GtEq_I16)
  1897. {
  1898. IR::RegOpnd* tmp1 = IR::RegOpnd::New(src1->GetType(), m_func);
  1899. IR::RegOpnd* tmp2 = IR::RegOpnd::New(src1->GetType(), m_func);
  1900. Js::OpCode cmpOpcode = Js::OpCode::PCMPGTW;
  1901. Js::OpCode eqpOpcode = Js::OpCode::PCMPEQW;
  1902. if (instr->m_opcode == Js::OpCode::Simd128_GtEq_I16)
  1903. {
  1904. cmpOpcode = Js::OpCode::PCMPGTB;
  1905. eqpOpcode = Js::OpCode::PCMPEQB;
  1906. }
  1907. // tmp1 = pcmpgtw src1, src2 [pcmpgtb]
  1908. pInstr = IR::Instr::New(cmpOpcode, tmp1, src1, src2, m_func);
  1909. instr->InsertBefore(pInstr);
  1910. Legalize(pInstr);
  1911. // tmp2 = pcmpeqw src1, src2 [pcmpeqw]
  1912. pInstr = IR::Instr::New(eqpOpcode, tmp2, src1, src2, m_func);
  1913. instr->InsertBefore(pInstr);
  1914. Legalize(pInstr);
  1915. // dst = por tmp1, tmp2
  1916. pInstr = IR::Instr::New(Js::OpCode::POR, dst, tmp1, tmp2, m_func);
  1917. instr->InsertBefore(pInstr);
  1918. Legalize(pInstr);
  1919. }
  1920. pInstr = instr->m_prev;
  1921. instr->Remove();
  1922. return pInstr;
  1923. }
  1924. IR::Instr* LowererMD::Simd128LowerMinMax_F4(IR::Instr* instr)
  1925. {
  1926. IR::Instr *pInstr;
  1927. IR::Opnd* dst = instr->GetDst();
  1928. IR::Opnd* src1 = instr->GetSrc1();
  1929. IR::Opnd* src2 = instr->GetSrc2();
  1930. Assert(dst->IsRegOpnd() && dst->IsSimd128());
  1931. Assert(src1->IsRegOpnd() && src1->IsSimd128());
  1932. Assert(src2->IsRegOpnd() && src2->IsSimd128());
  1933. Assert(instr->m_opcode == Js::OpCode::Simd128_Min_F4 || instr->m_opcode == Js::OpCode::Simd128_Max_F4);
  1934. IR::RegOpnd* tmp1 = IR::RegOpnd::New(src1->GetType(), m_func);
  1935. IR::RegOpnd* tmp2 = IR::RegOpnd::New(src2->GetType(), m_func);
  1936. if (instr->m_opcode == Js::OpCode::Simd128_Min_F4)
  1937. {
  1938. pInstr = IR::Instr::New(Js::OpCode::MINPS, tmp1, src1, src2, m_func);
  1939. instr->InsertBefore(pInstr);
  1940. Legalize(pInstr);
  1941. //
  1942. pInstr = IR::Instr::New(Js::OpCode::MINPS, tmp2, src2, src1, m_func);
  1943. instr->InsertBefore(pInstr);
  1944. Legalize(pInstr);
  1945. //
  1946. pInstr = IR::Instr::New(Js::OpCode::ORPS, dst, tmp1, tmp2, m_func);
  1947. instr->InsertBefore(pInstr);
  1948. Legalize(pInstr);
  1949. }
  1950. else
  1951. {
  1952. //This sequence closely mirrors SIMDFloat32x4Operation::OpMax except for
  1953. //the fact that tmp2 (tmpbValue) is reused to reduce the number of registers
  1954. //needed for this sequence.
  1955. pInstr = IR::Instr::New(Js::OpCode::MAXPS, tmp1, src1, src2, m_func);
  1956. instr->InsertBefore(pInstr);
  1957. Legalize(pInstr);
  1958. //
  1959. pInstr = IR::Instr::New(Js::OpCode::MAXPS, tmp2, src2, src1, m_func);
  1960. instr->InsertBefore(pInstr);
  1961. Legalize(pInstr);
  1962. //
  1963. pInstr = IR::Instr::New(Js::OpCode::ANDPS, tmp1, tmp1, tmp2, m_func);
  1964. instr->InsertBefore(pInstr);
  1965. Legalize(pInstr);
  1966. //
  1967. pInstr = IR::Instr::New(Js::OpCode::CMPUNORDPS, tmp2, src1, src2, m_func);
  1968. instr->InsertBefore(pInstr);
  1969. Legalize(pInstr);
  1970. //
  1971. pInstr = IR::Instr::New(Js::OpCode::ORPS, dst, tmp1, tmp2, m_func);
  1972. instr->InsertBefore(pInstr);
  1973. Legalize(pInstr);
  1974. }
  1975. pInstr = instr->m_prev;
  1976. instr->Remove();
  1977. return pInstr;
  1978. }
  1979. IR::Instr* LowererMD::Simd128LowerAnyTrue(IR::Instr* instr)
  1980. {
  1981. Assert(instr->m_opcode == Js::OpCode::Simd128_AnyTrue_B4 || instr->m_opcode == Js::OpCode::Simd128_AnyTrue_B8 ||
  1982. instr->m_opcode == Js::OpCode::Simd128_AnyTrue_B16);
  1983. IR::Instr *pInstr;
  1984. IR::Opnd* dst = instr->GetDst();
  1985. IR::Opnd* src1 = instr->GetSrc1();
  1986. Assert(dst->IsRegOpnd() && dst->IsInt32());
  1987. Assert(src1->IsRegOpnd() && src1->IsSimd128());
  1988. // pmovmskb dst, src1
  1989. // neg dst
  1990. // sbb dst, dst
  1991. // neg dst
  1992. // pmovmskb dst, src1
  1993. pInstr = IR::Instr::New(Js::OpCode::PMOVMSKB, dst, src1, m_func);
  1994. instr->InsertBefore(pInstr);
  1995. Legalize(pInstr);
  1996. // neg dst
  1997. pInstr = IR::Instr::New(Js::OpCode::NEG, dst, dst, m_func);
  1998. instr->InsertBefore(pInstr);
  1999. Legalize(pInstr);
  2000. // sbb dst, dst
  2001. pInstr = IR::Instr::New(Js::OpCode::SBB, dst, dst, dst, m_func);
  2002. instr->InsertBefore(pInstr);
  2003. Legalize(pInstr);
  2004. // neg dst
  2005. pInstr = IR::Instr::New(Js::OpCode::NEG, dst, dst, m_func);
  2006. instr->InsertBefore(pInstr);
  2007. Legalize(pInstr);
  2008. pInstr = instr->m_prev;
  2009. instr->Remove();
  2010. return pInstr;
  2011. }
  2012. IR::Instr* LowererMD::Simd128LowerAllTrue(IR::Instr* instr)
  2013. {
  2014. Assert(instr->m_opcode == Js::OpCode::Simd128_AllTrue_B4 || instr->m_opcode == Js::OpCode::Simd128_AllTrue_B8 ||
  2015. instr->m_opcode == Js::OpCode::Simd128_AllTrue_B16);
  2016. IR::Instr *pInstr;
  2017. IR::Opnd* dst = instr->GetDst();
  2018. IR::Opnd* src1 = instr->GetSrc1();
  2019. Assert(dst->IsRegOpnd() && dst->IsInt32());
  2020. Assert(src1->IsRegOpnd() && src1->IsSimd128());
  2021. IR::RegOpnd * tmp = IR::RegOpnd::New(TyInt8, m_func);
  2022. // pmovmskb dst, src1
  2023. pInstr = IR::Instr::New(Js::OpCode::PMOVMSKB, dst, src1, m_func);
  2024. instr->InsertBefore(pInstr);
  2025. // cmp dst, 0FFFFh
  2026. pInstr = IR::Instr::New(Js::OpCode::CMP, m_func);
  2027. pInstr->SetSrc1(dst);
  2028. pInstr->SetSrc2(IR::IntConstOpnd::New(0x0FFFF, TyInt32, m_func, true));
  2029. instr->InsertBefore(pInstr);
  2030. Legalize(pInstr);
  2031. // mov tmp(TyInt8), dst
  2032. pInstr = IR::Instr::New(Js::OpCode::MOV, tmp, dst, m_func);
  2033. instr->InsertBefore(pInstr);
  2034. Legalize(pInstr);
  2035. // sete tmp(TyInt8)
  2036. pInstr = IR::Instr::New(Js::OpCode::SETE, tmp, tmp, m_func);
  2037. instr->InsertBefore(pInstr);
  2038. Legalize(pInstr);
  2039. // movsx dst, dst(TyInt8)
  2040. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVSX, dst, tmp, m_func));
  2041. pInstr = instr->m_prev;
  2042. instr->Remove();
  2043. return pInstr;
  2044. }
  2045. IR::Instr* LowererMD::Simd128LowerInt32x4FromFloat32x4(IR::Instr *instr)
  2046. {
  2047. IR::Opnd *dst, *src, *tmp, *tmp2, *mask1, *mask2;
  2048. IR::Instr *insertInstr, *pInstr, *newInstr;
  2049. IR::LabelInstr *doneLabel;
  2050. dst = instr->GetDst();
  2051. src = instr->GetSrc1();
  2052. Assert(dst != nullptr && src != nullptr && dst->IsSimd128() && src->IsSimd128());
  2053. // CVTTPS2DQ dst, src
  2054. instr->m_opcode = Js::OpCode::CVTTPS2DQ;
  2055. insertInstr = instr->m_next;
  2056. pInstr = instr->m_prev;
  2057. doneLabel = IR::LabelInstr::New(Js::OpCode::Label, this->m_func);
  2058. mask1 = IR::RegOpnd::New(TyInt32, m_func);
  2059. mask2 = IR::RegOpnd::New(TyInt32, m_func);
  2060. // bound checks
  2061. // check if any value is potentially out of range (0x80000000 in output)
  2062. // PCMPEQD tmp, dst, X86_NEG_MASK (0x80000000)
  2063. // MOVMSKPS mask1, tmp
  2064. // CMP mask1, 0
  2065. // JNE $doneLabel
  2066. tmp = IR::RegOpnd::New(TySimd128I4, m_func);
  2067. tmp2 = IR::RegOpnd::New(TySimd128I4, m_func);
  2068. newInstr = IR::Instr::New(Js::OpCode::MOVAPS, tmp2, IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86NegMaskF4Addr(), TySimd128I4, m_func), m_func);
  2069. insertInstr->InsertBefore(newInstr);
  2070. Legalize(newInstr);
  2071. newInstr = IR::Instr::New(Js::OpCode::PCMPEQD, tmp, dst, tmp2, m_func);
  2072. insertInstr->InsertBefore(newInstr);
  2073. Legalize(newInstr);
  2074. insertInstr->InsertBefore(IR::Instr::New(Js::OpCode::MOVMSKPS, mask1, tmp, m_func));
  2075. newInstr = IR::Instr::New(Js::OpCode::CMP, m_func);
  2076. newInstr->SetSrc1(mask1);
  2077. newInstr->SetSrc2(IR::IntConstOpnd::New(0, TyInt32, m_func));
  2078. insertInstr->InsertBefore(newInstr);
  2079. insertInstr->InsertBefore(IR::BranchInstr::New(Js::OpCode::JEQ, doneLabel, m_func));
  2080. // we have potential out of bound. check bounds
  2081. // MOVAPS tmp2, X86_TWO_31_F4 (0x4f000000)
  2082. // CMPLEPS tmp, tmp2, src
  2083. // MOVMSKPS mask1, tmp
  2084. // MOVAPS tmp2, X86_NEG_TWO_31_F4 (0xcf000000)
  2085. // CMPLTPS tmp, src, tmp2
  2086. // MOVMSKPS mask2, tmp
  2087. // OR mask1, mask1, mask2
  2088. // CMP mask1, 0
  2089. // JNE $doneLabel
  2090. newInstr = IR::Instr::New(Js::OpCode::MOVAPS, tmp2, IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86TwoPower31F4Addr(), TySimd128I4, m_func), m_func);
  2091. insertInstr->InsertBefore(newInstr);
  2092. Legalize(newInstr);
  2093. newInstr = IR::Instr::New(Js::OpCode::CMPLEPS, tmp, tmp2, src, m_func);
  2094. insertInstr->InsertBefore(newInstr);
  2095. Legalize(newInstr);
  2096. insertInstr->InsertBefore(IR::Instr::New(Js::OpCode::MOVMSKPS, mask1, tmp, m_func));
  2097. newInstr = IR::Instr::New(Js::OpCode::MOVAPS, tmp2, IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86NegTwoPower31F4Addr(), TySimd128I4, m_func), m_func);
  2098. insertInstr->InsertBefore(newInstr);
  2099. Legalize(newInstr);
  2100. newInstr = IR::Instr::New(Js::OpCode::CMPLTPS, tmp, src, tmp2, m_func);
  2101. insertInstr->InsertBefore(newInstr);
  2102. Legalize(newInstr);
  2103. insertInstr->InsertBefore(IR::Instr::New(Js::OpCode::MOVMSKPS, mask2, tmp, m_func));
  2104. insertInstr->InsertBefore(IR::Instr::New(Js::OpCode::OR, mask1, mask1, mask2, m_func));
  2105. newInstr = IR::Instr::New(Js::OpCode::CMP, m_func);
  2106. newInstr->SetSrc1(mask1);
  2107. newInstr->SetSrc2(IR::IntConstOpnd::New(0, TyInt32, m_func));
  2108. insertInstr->InsertBefore(newInstr);
  2109. insertInstr->InsertBefore(IR::BranchInstr::New(Js::OpCode::JEQ, doneLabel, m_func));
  2110. // throw range error
  2111. m_lowerer->GenerateRuntimeError(insertInstr, JSERR_ArgumentOutOfRange, IR::HelperOp_RuntimeRangeError);
  2112. insertInstr->InsertBefore(doneLabel);
  2113. return pInstr;
  2114. }
  2115. IR::Instr* LowererMD::Simd128LowerUint32x4FromFloat32x4(IR::Instr *instr)
  2116. {
  2117. IR::Opnd *dst, *src, *tmp, *tmp2, *two_31_f4_mask, *two_31_i4_mask, *mask;
  2118. IR::Instr *pInstr, *newInstr;
  2119. IR::LabelInstr *doneLabel, *throwLabel;
  2120. dst = instr->GetDst();
  2121. src = instr->GetSrc1();
  2122. Assert(dst != nullptr && src != nullptr && dst->IsSimd128() && src->IsSimd128());
  2123. doneLabel = IR::LabelInstr::New(Js::OpCode::Label, this->m_func);
  2124. throwLabel = IR::LabelInstr::New(Js::OpCode::Label, this->m_func, true);
  2125. pInstr = instr->m_prev;
  2126. mask = IR::RegOpnd::New(TyInt32, m_func);
  2127. two_31_f4_mask = IR::RegOpnd::New(TySimd128F4, m_func);
  2128. two_31_i4_mask = IR::RegOpnd::New(TySimd128I4, m_func);
  2129. tmp = IR::RegOpnd::New(TySimd128F4, m_func);
  2130. tmp2 = IR::RegOpnd::New(TySimd128F4, m_func);
  2131. // any lanes <= -1.0 ?
  2132. // CMPLEPS tmp, src, [X86_ALL_FLOAT32_NEG_ONES]
  2133. // MOVMSKPS mask, tmp
  2134. // CMP mask, 0
  2135. // JNE $throwLabel
  2136. newInstr = IR::Instr::New(Js::OpCode::CMPLEPS, tmp, src, IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86AllNegOnesF4Addr(), TySimd128I4, m_func), m_func);
  2137. instr->InsertBefore(newInstr);
  2138. Legalize(newInstr);
  2139. newInstr = IR::Instr::New(Js::OpCode::MOVMSKPS, mask, tmp, m_func);
  2140. instr->InsertBefore(newInstr);
  2141. Legalize(newInstr);
  2142. newInstr = IR::Instr::New(Js::OpCode::CMP, m_func);
  2143. newInstr->SetSrc1(mask);
  2144. newInstr->SetSrc2(IR::IntConstOpnd::New(0, TyInt32, m_func));
  2145. instr->InsertBefore(newInstr);
  2146. Legalize(newInstr);
  2147. instr->InsertBefore(IR::BranchInstr::New(Js::OpCode::JNE, throwLabel, m_func));
  2148. // CVTTPS2DQ does a range check over signed range [-2^31, 2^31-1], so will fail to convert values >= 2^31.
  2149. // To fix this, subtract 2^31 from values >= 2^31, do CVTTPS2DQ, then add 2^31 back.
  2150. // MOVAPS two_31_f4_mask, [X86_TWO_31]
  2151. // CMPLEPS tmp2, two_31_mask, src
  2152. // ANDPS two_31_f4_mask, tmp2 // tmp has f32(2^31) for lanes >= 2^31, 0 otherwise
  2153. // SUBPS tmp2, two_31_f4_mask // subtract 2^31 from lanes >= 2^31, unchanged otherwise.
  2154. // CVTTPS2DQ dst, tmp2
  2155. newInstr = IR::Instr::New(Js::OpCode::MOVAPS, two_31_f4_mask, IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86TwoPower31F4Addr(), TySimd128F4, m_func), m_func);
  2156. instr->InsertBefore(newInstr);
  2157. Legalize(newInstr);
  2158. newInstr = IR::Instr::New(Js::OpCode::CMPLEPS, tmp2, two_31_f4_mask, src, m_func);
  2159. instr->InsertBefore(newInstr);
  2160. Legalize(newInstr);
  2161. newInstr = IR::Instr::New(Js::OpCode::ANDPS, two_31_f4_mask, two_31_f4_mask, tmp2, m_func);
  2162. instr->InsertBefore(newInstr);
  2163. Legalize(newInstr);
  2164. newInstr = IR::Instr::New(Js::OpCode::SUBPS, tmp2, src, two_31_f4_mask, m_func);
  2165. instr->InsertBefore(newInstr);
  2166. Legalize(newInstr);
  2167. newInstr = IR::Instr::New(Js::OpCode::CVTTPS2DQ, dst, tmp2, m_func);
  2168. instr->InsertBefore(newInstr);
  2169. Legalize(newInstr);
  2170. // check if any value is out of range (i.e. >= 2^31, meaning originally >= 2^32 before value adjustment)
  2171. // PCMPEQD tmp, dst, [X86_NEG_MASK]
  2172. // MOVMSKPS mask, tmp
  2173. // CMP mask, 0
  2174. // JNE $throwLabel
  2175. newInstr = IR::Instr::New(Js::OpCode::PCMPEQD, tmp, dst, IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86NegMaskF4Addr(), TySimd128I4, m_func), m_func);
  2176. instr->InsertBefore(newInstr);
  2177. Legalize(newInstr);
  2178. newInstr = IR::Instr::New(Js::OpCode::MOVMSKPS, mask, tmp, m_func);
  2179. instr->InsertBefore(newInstr);
  2180. Legalize(newInstr);
  2181. newInstr = IR::Instr::New(Js::OpCode::CMP, m_func);
  2182. newInstr->SetSrc1(mask);
  2183. newInstr->SetSrc2(IR::IntConstOpnd::New(0, TyInt32, m_func));
  2184. instr->InsertBefore(newInstr);
  2185. Legalize(newInstr);
  2186. instr->InsertBefore(IR::BranchInstr::New(Js::OpCode::JNE, throwLabel, m_func));
  2187. // we pass range checks
  2188. // add i4(2^31) values back to adjusted values.
  2189. // Use first bit from the 2^31 float mask (0x4f000...0 << 1)
  2190. // and AND with 2^31 int mask (0x8000..0) setting first bit to zero if lane hasn't been adjusted
  2191. // MOVAPS two_31_i4_mask, [X86_TWO_31_I4]
  2192. // PSLLD two_31_f4_mask, 1
  2193. // ANDPS two_31_i4_mask, two_31_f4_mask
  2194. // PADDD dst, dst, two_31_i4_mask
  2195. // JMP $doneLabel
  2196. newInstr = IR::Instr::New(Js::OpCode::MOVAPS, two_31_i4_mask, IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86TwoPower31I4Addr(), TySimd128I4, m_func), m_func);
  2197. instr->InsertBefore(newInstr);
  2198. Legalize(newInstr);
  2199. newInstr = IR::Instr::New(Js::OpCode::PSLLD, two_31_f4_mask, two_31_f4_mask, IR::IntConstOpnd::New(1, TyInt8, m_func), m_func);
  2200. instr->InsertBefore(newInstr);
  2201. Legalize(newInstr);
  2202. newInstr = IR::Instr::New(Js::OpCode::ANDPS, two_31_i4_mask, two_31_i4_mask, two_31_f4_mask, m_func);
  2203. instr->InsertBefore(newInstr);
  2204. Legalize(newInstr);
  2205. newInstr = IR::Instr::New(Js::OpCode::PADDD, dst, dst, two_31_i4_mask, m_func);
  2206. instr->InsertBefore(newInstr);
  2207. Legalize(newInstr);
  2208. instr->InsertBefore(IR::BranchInstr::New(Js::OpCode::JMP, doneLabel, m_func));
  2209. // throwLabel:
  2210. // Throw Range Error
  2211. instr->InsertBefore(throwLabel);
  2212. m_lowerer->GenerateRuntimeError(instr, JSERR_ArgumentOutOfRange, IR::HelperOp_RuntimeRangeError);
  2213. // doneLabe:
  2214. instr->InsertBefore(doneLabel);
  2215. instr->Remove();
  2216. return pInstr;
  2217. }
  2218. IR::Instr* LowererMD::Simd128LowerFloat32x4FromUint32x4(IR::Instr *instr)
  2219. {
  2220. IR::Opnd *dst, *src, *tmp, *zero;
  2221. IR::Instr *pInstr, *newInstr;
  2222. dst = instr->GetDst();
  2223. src = instr->GetSrc1();
  2224. Assert(dst != nullptr && src != nullptr && dst->IsSimd128() && src->IsSimd128());
  2225. pInstr = instr->m_prev;
  2226. zero = IR::RegOpnd::New(TySimd128I4, m_func);
  2227. tmp = IR::RegOpnd::New(TySimd128I4, m_func);
  2228. // find unsigned values above 2^31-1. Comparison is signed, so look for values < 0
  2229. // MOVAPS zero, [X86_ALL_ZEROS]
  2230. newInstr = IR::Instr::New(Js::OpCode::MOVAPS, zero, IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86AllZerosAddr(), TySimd128I4, m_func), m_func);
  2231. instr->InsertBefore(newInstr);
  2232. Legalize(newInstr);
  2233. // tmp = PCMPGTD zero, src
  2234. newInstr = IR::Instr::New(Js::OpCode::PCMPGTD, tmp, zero, src, m_func);
  2235. instr->InsertBefore(newInstr);
  2236. Legalize(newInstr);
  2237. // temp1 has f32(2^32) for unsigned values above 2^31, 0 otherwise
  2238. // ANDPS tmp, tmp, [X86_TWO_32_F4]
  2239. newInstr = IR::Instr::New(Js::OpCode::ANDPS, tmp, tmp, IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86TwoPower32F4Addr(), TySimd128F4, m_func), m_func);
  2240. instr->InsertBefore(newInstr);
  2241. Legalize(newInstr);
  2242. // convert
  2243. // dst = CVTDQ2PS src
  2244. newInstr = IR::Instr::New(Js::OpCode::CVTDQ2PS, dst, src, m_func);
  2245. instr->InsertBefore(newInstr);
  2246. Legalize(newInstr);
  2247. // Add f32(2^32) to negative values
  2248. // ADDPS dst, dst, tmp
  2249. newInstr = IR::Instr::New(Js::OpCode::ADDPS, dst, dst, tmp, m_func);
  2250. instr->InsertBefore(newInstr);
  2251. Legalize(newInstr);
  2252. instr->Remove();
  2253. return pInstr;
  2254. }
  2255. IR::Instr* LowererMD::Simd128AsmJsLowerLoadElem(IR::Instr *instr)
  2256. {
  2257. Assert(instr->m_opcode == Js::OpCode::Simd128_LdArr_I4 ||
  2258. instr->m_opcode == Js::OpCode::Simd128_LdArr_I8 ||
  2259. instr->m_opcode == Js::OpCode::Simd128_LdArr_I16 ||
  2260. instr->m_opcode == Js::OpCode::Simd128_LdArr_U4 ||
  2261. instr->m_opcode == Js::OpCode::Simd128_LdArr_U8 ||
  2262. instr->m_opcode == Js::OpCode::Simd128_LdArr_U16 ||
  2263. instr->m_opcode == Js::OpCode::Simd128_LdArr_F4 ||
  2264. //instr->m_opcode == Js::OpCode::Simd128_LdArr_D2 ||
  2265. instr->m_opcode == Js::OpCode::Simd128_LdArrConst_I4 ||
  2266. instr->m_opcode == Js::OpCode::Simd128_LdArrConst_I8 ||
  2267. instr->m_opcode == Js::OpCode::Simd128_LdArrConst_I16 ||
  2268. instr->m_opcode == Js::OpCode::Simd128_LdArrConst_U4 ||
  2269. instr->m_opcode == Js::OpCode::Simd128_LdArrConst_U8 ||
  2270. instr->m_opcode == Js::OpCode::Simd128_LdArrConst_U16 ||
  2271. instr->m_opcode == Js::OpCode::Simd128_LdArrConst_F4
  2272. //instr->m_opcode == Js::OpCode::Simd128_LdArrConst_D2
  2273. );
  2274. IR::Instr * instrPrev = instr->m_prev;
  2275. IR::RegOpnd * indexOpnd = instr->GetSrc1()->AsIndirOpnd()->GetIndexOpnd();
  2276. IR::RegOpnd * baseOpnd = instr->GetSrc1()->AsIndirOpnd()->GetBaseOpnd();
  2277. IR::Opnd * dst = instr->GetDst();
  2278. IR::Opnd * src1 = instr->GetSrc1();
  2279. IR::Opnd * src2 = instr->GetSrc2();
  2280. ValueType arrType = baseOpnd->GetValueType();
  2281. uint8 dataWidth = instr->dataWidth;
  2282. // Type-specialized.
  2283. Assert(dst->IsSimd128() && src1->IsSimd128() && src2->GetType() == TyUint32);
  2284. IR::Instr * done;
  2285. if (indexOpnd || (((uint32)src1->AsIndirOpnd()->GetOffset() + dataWidth) > 0x1000000 /* 16 MB */))
  2286. {
  2287. uint32 bpe = Simd128GetTypedArrBytesPerElem(arrType);
  2288. // bound check and helper
  2289. done = this->lowererMDArch.LowerAsmJsLdElemHelper(instr, true, bpe != dataWidth);
  2290. }
  2291. else
  2292. {
  2293. // Reaching here means:
  2294. // We have a constant index, and either
  2295. // (1) constant heap or (2) variable heap with constant index < 16MB.
  2296. // Case (1) requires static bound check. Case (2) means we are always in bound.
  2297. // this can happen in cases where globopt props a constant access which was not known at bytecodegen time or when heap is non-constant
  2298. if (src2->IsIntConstOpnd() && ((uint32)src1->AsIndirOpnd()->GetOffset() + dataWidth > src2->AsIntConstOpnd()->AsUint32()))
  2299. {
  2300. m_lowerer->GenerateRuntimeError(instr, JSERR_ArgumentOutOfRange, IR::HelperOp_RuntimeRangeError);
  2301. instr->Remove();
  2302. return instrPrev;
  2303. }
  2304. done = instr;
  2305. }
  2306. return Simd128ConvertToLoad(dst, src1, dataWidth, instr);
  2307. }
  2308. IR::Instr* LowererMD::Simd128LowerLoadElem(IR::Instr *instr)
  2309. {
  2310. Assert(!m_func->GetJITFunctionBody()->IsAsmJsMode());
  2311. Assert(
  2312. instr->m_opcode == Js::OpCode::Simd128_LdArr_I4 ||
  2313. instr->m_opcode == Js::OpCode::Simd128_LdArr_I8 ||
  2314. instr->m_opcode == Js::OpCode::Simd128_LdArr_I16 ||
  2315. instr->m_opcode == Js::OpCode::Simd128_LdArr_U4 ||
  2316. instr->m_opcode == Js::OpCode::Simd128_LdArr_U8 ||
  2317. instr->m_opcode == Js::OpCode::Simd128_LdArr_U16 ||
  2318. instr->m_opcode == Js::OpCode::Simd128_LdArr_F4
  2319. );
  2320. IR::Opnd * src = instr->GetSrc1();
  2321. IR::RegOpnd * indexOpnd =src->AsIndirOpnd()->GetIndexOpnd();
  2322. IR::Opnd * dst = instr->GetDst();
  2323. ValueType arrType = src->AsIndirOpnd()->GetBaseOpnd()->GetValueType();
  2324. // If we type-specialized, then array is a definite typed-array.
  2325. Assert(arrType.IsObject() && arrType.IsTypedArray());
  2326. Simd128GenerateUpperBoundCheck(indexOpnd, src->AsIndirOpnd(), arrType, instr);
  2327. Simd128LoadHeadSegment(src->AsIndirOpnd(), arrType, instr);
  2328. return Simd128ConvertToLoad(dst, src, instr->dataWidth, instr, m_lowerer->GetArrayIndirScale(arrType) /* scale factor */);
  2329. }
  2330. IR::Instr *
  2331. LowererMD::Simd128ConvertToLoad(IR::Opnd *dst, IR::Opnd *src, uint8 dataWidth, IR::Instr* instr, byte scaleFactor /* = 0*/)
  2332. {
  2333. IR::Instr *newInstr = nullptr;
  2334. IR::Instr * instrPrev = instr->m_prev;
  2335. // Type-specialized.
  2336. Assert(dst && dst->IsSimd128());
  2337. Assert(src->IsIndirOpnd());
  2338. if (scaleFactor > 0)
  2339. {
  2340. // needed only for non-Asmjs code
  2341. Assert(!m_func->GetJITFunctionBody()->IsAsmJsMode());
  2342. src->AsIndirOpnd()->SetScale(scaleFactor);
  2343. }
  2344. switch (dataWidth)
  2345. {
  2346. case 16:
  2347. // MOVUPS dst, src1([arrayBuffer + indexOpnd])
  2348. newInstr = IR::Instr::New(LowererMDArch::GetAssignOp(src->GetType()), dst, src, instr->m_func);
  2349. instr->InsertBefore(newInstr);
  2350. Legalize(newInstr);
  2351. break;
  2352. case 12:
  2353. {
  2354. IR::RegOpnd *temp = IR::RegOpnd::New(src->GetType(), instr->m_func);
  2355. // MOVSD dst, src1([arrayBuffer + indexOpnd])
  2356. newInstr = IR::Instr::New(Js::OpCode::MOVSD, dst, src, instr->m_func);
  2357. instr->InsertBefore(newInstr);
  2358. Legalize(newInstr);
  2359. // MOVSS temp, src1([arrayBuffer + indexOpnd + 8])
  2360. newInstr = IR::Instr::New(Js::OpCode::MOVSS, temp, src, instr->m_func);
  2361. instr->InsertBefore(newInstr);
  2362. newInstr->GetSrc1()->AsIndirOpnd()->SetOffset(src->AsIndirOpnd()->GetOffset() + 8, true);
  2363. Legalize(newInstr);
  2364. // PSLLDQ temp, 0x08
  2365. instr->InsertBefore(IR::Instr::New(Js::OpCode::PSLLDQ, temp, temp, IR::IntConstOpnd::New(8, TyInt8, instr->m_func, true), instr->m_func));
  2366. // ORPS dst, temp
  2367. newInstr = IR::Instr::New(Js::OpCode::ORPS, dst, dst, temp, instr->m_func);
  2368. instr->InsertBefore(newInstr);
  2369. Legalize(newInstr);
  2370. break;
  2371. }
  2372. case 8:
  2373. // MOVSD dst, src1([arrayBuffer + indexOpnd])
  2374. newInstr = IR::Instr::New(Js::OpCode::MOVSD, dst, src, instr->m_func);
  2375. instr->InsertBefore(newInstr);
  2376. Legalize(newInstr);
  2377. break;
  2378. case 4:
  2379. // MOVSS dst, src1([arrayBuffer + indexOpnd])
  2380. newInstr = IR::Instr::New(Js::OpCode::MOVSS, dst, src, instr->m_func);
  2381. instr->InsertBefore(newInstr);
  2382. Legalize(newInstr);
  2383. break;
  2384. default:
  2385. Assume(UNREACHED);
  2386. }
  2387. instr->Remove();
  2388. return instrPrev;
  2389. }
  2390. IR::Instr*
  2391. LowererMD::Simd128AsmJsLowerStoreElem(IR::Instr *instr)
  2392. {
  2393. Assert(
  2394. instr->m_opcode == Js::OpCode::Simd128_StArr_I4 ||
  2395. instr->m_opcode == Js::OpCode::Simd128_StArr_I8 ||
  2396. instr->m_opcode == Js::OpCode::Simd128_StArr_I16 ||
  2397. instr->m_opcode == Js::OpCode::Simd128_StArr_U4 ||
  2398. instr->m_opcode == Js::OpCode::Simd128_StArr_U8 ||
  2399. instr->m_opcode == Js::OpCode::Simd128_StArr_U16 ||
  2400. instr->m_opcode == Js::OpCode::Simd128_StArr_F4 ||
  2401. //instr->m_opcode == Js::OpCode::Simd128_StArr_D2 ||
  2402. instr->m_opcode == Js::OpCode::Simd128_StArrConst_I4 ||
  2403. instr->m_opcode == Js::OpCode::Simd128_StArrConst_I8 ||
  2404. instr->m_opcode == Js::OpCode::Simd128_StArrConst_I16 ||
  2405. instr->m_opcode == Js::OpCode::Simd128_StArrConst_U4 ||
  2406. instr->m_opcode == Js::OpCode::Simd128_StArrConst_U8 ||
  2407. instr->m_opcode == Js::OpCode::Simd128_StArrConst_U16 ||
  2408. instr->m_opcode == Js::OpCode::Simd128_StArrConst_U4 ||
  2409. instr->m_opcode == Js::OpCode::Simd128_StArrConst_F4
  2410. //instr->m_opcode == Js::OpCode::Simd128_StArrConst_D2
  2411. );
  2412. IR::Instr * instrPrev = instr->m_prev;
  2413. IR::RegOpnd * indexOpnd = instr->GetDst()->AsIndirOpnd()->GetIndexOpnd();
  2414. IR::RegOpnd * baseOpnd = instr->GetDst()->AsIndirOpnd()->GetBaseOpnd();
  2415. IR::Opnd * dst = instr->GetDst();
  2416. IR::Opnd * src1 = instr->GetSrc1();
  2417. IR::Opnd * src2 = instr->GetSrc2();
  2418. ValueType arrType = baseOpnd->GetValueType();
  2419. uint8 dataWidth = instr->dataWidth;
  2420. // Type-specialized.
  2421. Assert(dst->IsSimd128() && src1->IsSimd128() && src2->GetType() == TyUint32);
  2422. IR::Instr * done;
  2423. if (indexOpnd || ((uint32)dst->AsIndirOpnd()->GetOffset() + dataWidth > 0x1000000))
  2424. {
  2425. // CMP indexOpnd, src2(arrSize)
  2426. // JA $helper
  2427. // JMP $store
  2428. // $helper:
  2429. // Throw RangeError
  2430. // JMP $done
  2431. // $store:
  2432. // MOV dst([arrayBuffer + indexOpnd]), src1
  2433. // $done:
  2434. uint32 bpe = Simd128GetTypedArrBytesPerElem(arrType);
  2435. done = this->lowererMDArch.LowerAsmJsStElemHelper(instr, true, bpe != dataWidth);
  2436. }
  2437. else
  2438. {
  2439. // we might have a constant index if globopt propped a constant store. we can ahead of time check if it is in-bounds
  2440. if (src2->IsIntConstOpnd() && ((uint32)dst->AsIndirOpnd()->GetOffset() + dataWidth > src2->AsIntConstOpnd()->AsUint32()))
  2441. {
  2442. m_lowerer->GenerateRuntimeError(instr, JSERR_ArgumentOutOfRange, IR::HelperOp_RuntimeRangeError);
  2443. instr->Remove();
  2444. return instrPrev;
  2445. }
  2446. done = instr;
  2447. }
  2448. return Simd128ConvertToStore(dst, src1, dataWidth, instr);
  2449. }
  2450. IR::Instr*
  2451. LowererMD::Simd128LowerStoreElem(IR::Instr *instr)
  2452. {
  2453. Assert(!m_func->GetJITFunctionBody()->IsAsmJsMode());
  2454. Assert(
  2455. instr->m_opcode == Js::OpCode::Simd128_StArr_I4 ||
  2456. instr->m_opcode == Js::OpCode::Simd128_StArr_I8 ||
  2457. instr->m_opcode == Js::OpCode::Simd128_StArr_I16 ||
  2458. instr->m_opcode == Js::OpCode::Simd128_StArr_U4 ||
  2459. instr->m_opcode == Js::OpCode::Simd128_StArr_U8 ||
  2460. instr->m_opcode == Js::OpCode::Simd128_StArr_U16 ||
  2461. instr->m_opcode == Js::OpCode::Simd128_StArr_F4
  2462. );
  2463. IR::Opnd * dst = instr->GetDst();
  2464. IR::RegOpnd * indexOpnd = dst->AsIndirOpnd()->GetIndexOpnd();
  2465. IR::Opnd * src1 = instr->GetSrc1();
  2466. uint8 dataWidth = instr->dataWidth;
  2467. ValueType arrType = dst->AsIndirOpnd()->GetBaseOpnd()->GetValueType();
  2468. // If we type-specialized, then array is a definite type-array.
  2469. Assert(arrType.IsObject() && arrType.IsTypedArray());
  2470. Simd128GenerateUpperBoundCheck(indexOpnd, dst->AsIndirOpnd(), arrType, instr);
  2471. Simd128LoadHeadSegment(dst->AsIndirOpnd(), arrType, instr);
  2472. return Simd128ConvertToStore(dst, src1, dataWidth, instr, m_lowerer->GetArrayIndirScale(arrType) /*scale factor*/);
  2473. }
  2474. IR::Instr *
  2475. LowererMD::Simd128ConvertToStore(IR::Opnd *dst, IR::Opnd *src1, uint8 dataWidth, IR::Instr* instr, byte scaleFactor /* = 0 */)
  2476. {
  2477. IR::Instr * instrPrev = instr->m_prev;
  2478. Assert(src1 && src1->IsSimd128());
  2479. Assert(dst->IsIndirOpnd());
  2480. if (scaleFactor > 0)
  2481. {
  2482. // needed only for non-Asmjs code
  2483. Assert(!m_func->GetJITFunctionBody()->IsAsmJsMode());
  2484. dst->AsIndirOpnd()->SetScale(scaleFactor);
  2485. }
  2486. switch (dataWidth)
  2487. {
  2488. case 16:
  2489. // MOVUPS dst([arrayBuffer + indexOpnd]), src1
  2490. instr->InsertBefore(IR::Instr::New(LowererMDArch::GetAssignOp(src1->GetType()), dst, src1, instr->m_func));
  2491. break;
  2492. case 12:
  2493. {
  2494. IR::RegOpnd *temp = IR::RegOpnd::New(src1->GetType(), instr->m_func);
  2495. IR::Instr *movss;
  2496. // MOVAPS temp, src
  2497. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVAPS, temp, src1, instr->m_func));
  2498. // MOVSD dst([arrayBuffer + indexOpnd]), temp
  2499. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVSD, dst, temp, instr->m_func));
  2500. // PSRLDQ temp, 0x08
  2501. instr->InsertBefore(IR::Instr::New(Js::OpCode::PSRLDQ, temp, temp, IR::IntConstOpnd::New(8, TyInt8, m_func, true), instr->m_func));
  2502. // MOVSS dst([arrayBuffer + indexOpnd + 8]), temp
  2503. movss = IR::Instr::New(Js::OpCode::MOVSS, dst, temp, instr->m_func);
  2504. instr->InsertBefore(movss);
  2505. movss->GetDst()->AsIndirOpnd()->SetOffset(dst->AsIndirOpnd()->GetOffset() + 8, true);
  2506. break;
  2507. }
  2508. case 8:
  2509. // MOVSD dst([arrayBuffer + indexOpnd]), src1
  2510. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVSD, dst, src1, instr->m_func));
  2511. break;
  2512. case 4:
  2513. // MOVSS dst([arrayBuffer + indexOpnd]), src1
  2514. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVSS, dst, src1, instr->m_func));
  2515. break;
  2516. default:;
  2517. Assume(UNREACHED);
  2518. }
  2519. instr->Remove();
  2520. return instrPrev;
  2521. }
  2522. void
  2523. LowererMD::Simd128GenerateUpperBoundCheck(IR::RegOpnd *indexOpnd, IR::IndirOpnd *indirOpnd, ValueType arrType, IR::Instr *instr)
  2524. {
  2525. Assert(!m_func->GetJITFunctionBody()->IsAsmJsMode());
  2526. IR::ArrayRegOpnd *arrayRegOpnd = indirOpnd->GetBaseOpnd()->AsArrayRegOpnd();
  2527. IR::Opnd* headSegmentLengthOpnd;
  2528. if (arrayRegOpnd->EliminatedUpperBoundCheck())
  2529. {
  2530. // already eliminated or extracted by globOpt (OptArraySrc). Nothing to do.
  2531. return;
  2532. }
  2533. if (arrayRegOpnd->HeadSegmentLengthSym())
  2534. {
  2535. headSegmentLengthOpnd = IR::RegOpnd::New(arrayRegOpnd->HeadSegmentLengthSym(), TyUint32, m_func);
  2536. }
  2537. else
  2538. {
  2539. // (headSegmentLength = [base + offset(length)])
  2540. int lengthOffset;
  2541. lengthOffset = m_lowerer->GetArrayOffsetOfLength(arrType);
  2542. headSegmentLengthOpnd = IR::IndirOpnd::New(arrayRegOpnd, lengthOffset, TyUint32, m_func);
  2543. }
  2544. IR::LabelInstr * skipLabel = Lowerer::InsertLabel(false, instr);
  2545. int32 elemCount = Lowerer::SimdGetElementCountFromBytes(arrayRegOpnd->GetValueType(), instr->dataWidth);
  2546. if (indexOpnd)
  2547. {
  2548. // MOV tmp, elemCount
  2549. // ADD tmp, index
  2550. // CMP tmp, Length -- upper bound check
  2551. // JBE $storeLabel
  2552. // Throw RuntimeError
  2553. // skipLabel:
  2554. IR::RegOpnd *tmp = IR::RegOpnd::New(indexOpnd->GetType(), m_func);
  2555. IR::IntConstOpnd *elemCountOpnd = IR::IntConstOpnd::New(elemCount, TyInt8, m_func, true);
  2556. m_lowerer->InsertMove(tmp, elemCountOpnd, skipLabel);
  2557. Lowerer::InsertAdd(false, tmp, tmp, indexOpnd, skipLabel);
  2558. m_lowerer->InsertCompareBranch(tmp, headSegmentLengthOpnd, Js::OpCode::BrLe_A, true, skipLabel, skipLabel);
  2559. }
  2560. else
  2561. {
  2562. // CMP Length, (offset + elemCount)
  2563. // JA $storeLabel
  2564. int32 offset = indirOpnd->GetOffset();
  2565. int32 index = offset + elemCount;
  2566. m_lowerer->InsertCompareBranch(headSegmentLengthOpnd, IR::IntConstOpnd::New(index, TyInt32, m_func, true), Js::OpCode::BrLe_A, true, skipLabel, skipLabel);
  2567. }
  2568. m_lowerer->GenerateRuntimeError(skipLabel, JSERR_ArgumentOutOfRange, IR::HelperOp_RuntimeRangeError);
  2569. return;
  2570. }
  2571. void
  2572. LowererMD::Simd128LoadHeadSegment(IR::IndirOpnd *indirOpnd, ValueType arrType, IR::Instr *instr)
  2573. {
  2574. // For non-asm.js we check if headSeg symbol exists, else load it.
  2575. IR::ArrayRegOpnd *arrayRegOpnd = indirOpnd->GetBaseOpnd()->AsArrayRegOpnd();
  2576. IR::RegOpnd *headSegmentOpnd;
  2577. if (arrayRegOpnd->HeadSegmentSym())
  2578. {
  2579. headSegmentOpnd = IR::RegOpnd::New(arrayRegOpnd->HeadSegmentSym(), TyMachPtr, m_func);
  2580. }
  2581. else
  2582. {
  2583. // MOV headSegment, [base + offset(head)]
  2584. int32 headOffset = m_lowerer->GetArrayOffsetOfHeadSegment(arrType);
  2585. IR::IndirOpnd * newIndirOpnd = IR::IndirOpnd::New(arrayRegOpnd, headOffset, TyMachPtr, this->m_func);
  2586. headSegmentOpnd = IR::RegOpnd::New(TyMachPtr, this->m_func);
  2587. m_lowerer->InsertMove(headSegmentOpnd, newIndirOpnd, instr);
  2588. }
  2589. // change base to be the head segment instead of the array object
  2590. indirOpnd->SetBaseOpnd(headSegmentOpnd);
  2591. }
  2592. // Builds args list <dst, src1, src2, src3 ..>
  2593. SList<IR::Opnd*> * LowererMD::Simd128GetExtendedArgs(IR::Instr *instr)
  2594. {
  2595. SList<IR::Opnd*> * args = JitAnew(m_lowerer->m_alloc, SList<IR::Opnd*>, m_lowerer->m_alloc);
  2596. IR::Instr *pInstr = instr;
  2597. IR::Opnd *dst, *src1, *src2;
  2598. dst = src1 = src2 = nullptr;
  2599. if (pInstr->GetDst())
  2600. {
  2601. dst = pInstr->UnlinkDst();
  2602. }
  2603. src1 = pInstr->UnlinkSrc1();
  2604. Assert(src1->GetStackSym()->IsSingleDef());
  2605. pInstr = src1->GetStackSym()->GetInstrDef();
  2606. while (pInstr && pInstr->m_opcode == Js::OpCode::ExtendArg_A)
  2607. {
  2608. Assert(pInstr->GetSrc1());
  2609. src1 = pInstr->GetSrc1()->Copy(this->m_func);
  2610. if (src1->IsRegOpnd())
  2611. {
  2612. this->m_lowerer->addToLiveOnBackEdgeSyms->Set(src1->AsRegOpnd()->m_sym->m_id);
  2613. }
  2614. args->Push(src1);
  2615. if (pInstr->GetSrc2())
  2616. {
  2617. src2 = pInstr->GetSrc2();
  2618. Assert(src2->GetStackSym()->IsSingleDef());
  2619. pInstr = src2->GetStackSym()->GetInstrDef();
  2620. }
  2621. else
  2622. {
  2623. pInstr = nullptr;
  2624. }
  2625. }
  2626. args->Push(dst);
  2627. Assert(args->Count() > 3);
  2628. return args;
  2629. }
  2630. IR::Opnd*
  2631. LowererMD::EnregisterIntConst(IR::Instr* instr, IR::Opnd *constOpnd, IRType type /* = TyInt32*/)
  2632. {
  2633. IRType constType = constOpnd->GetType();
  2634. if (!IRType_IsNativeInt(constType))
  2635. {
  2636. // not int opnd, nothing to do
  2637. return constOpnd;
  2638. }
  2639. Assert(type == TyInt32 || type == TyInt16 || type == TyInt8);
  2640. Assert(constType == TyInt32 || constType == TyInt16 || constType == TyInt8);
  2641. if (constOpnd->IsRegOpnd())
  2642. {
  2643. // already a register, just cast
  2644. constOpnd->SetType(type);
  2645. return constOpnd;
  2646. }
  2647. // en-register
  2648. IR::RegOpnd *tempReg = IR::RegOpnd::New(type, m_func);
  2649. // MOV tempReg, constOpnd
  2650. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOV, tempReg, constOpnd, m_func));
  2651. return tempReg;
  2652. }
  2653. void LowererMD::Simd128InitOpcodeMap()
  2654. {
  2655. m_simd128OpCodesMap = JitAnewArrayZ(m_lowerer->m_alloc, Js::OpCode, Js::Simd128OpcodeCount());
  2656. // All simd ops should be contiguous for this mapping to work
  2657. Assert(Js::OpCode::Simd128_End + (Js::OpCode) 1 == Js::OpCode::Simd128_Start_Extend);
  2658. //SET_SIMDOPCODE(Simd128_FromFloat64x2_I4 , CVTTPD2DQ);
  2659. //SET_SIMDOPCODE(Simd128_FromFloat64x2Bits_I4 , MOVAPS);
  2660. SET_SIMDOPCODE(Simd128_FromFloat32x4Bits_I4 , MOVAPS);
  2661. SET_SIMDOPCODE(Simd128_FromInt16x8Bits_I4 , MOVAPS);
  2662. SET_SIMDOPCODE(Simd128_FromInt8x16Bits_I4 , MOVAPS);
  2663. SET_SIMDOPCODE(Simd128_FromUint32x4Bits_I4 , MOVAPS);
  2664. SET_SIMDOPCODE(Simd128_FromUint16x8Bits_I4 , MOVAPS);
  2665. SET_SIMDOPCODE(Simd128_FromUint8x16Bits_I4 , MOVAPS);
  2666. SET_SIMDOPCODE(Simd128_Add_I4 , PADDD);
  2667. SET_SIMDOPCODE(Simd128_Sub_I4 , PSUBD);
  2668. SET_SIMDOPCODE(Simd128_Lt_I4 , PCMPGTD);
  2669. SET_SIMDOPCODE(Simd128_Gt_I4 , PCMPGTD);
  2670. SET_SIMDOPCODE(Simd128_Eq_I4 , PCMPEQD);
  2671. SET_SIMDOPCODE(Simd128_And_I4 , PAND);
  2672. SET_SIMDOPCODE(Simd128_Or_I4 , POR);
  2673. SET_SIMDOPCODE(Simd128_Xor_I4 , PXOR);
  2674. SET_SIMDOPCODE(Simd128_Not_I4 , XORPS);
  2675. SET_SIMDOPCODE(Simd128_FromFloat32x4Bits_I8 , MOVAPS);
  2676. SET_SIMDOPCODE(Simd128_FromInt32x4Bits_I8 , MOVAPS);
  2677. SET_SIMDOPCODE(Simd128_FromInt8x16Bits_I8 , MOVAPS);
  2678. SET_SIMDOPCODE(Simd128_FromUint32x4Bits_I8 , MOVAPS);
  2679. SET_SIMDOPCODE(Simd128_FromUint16x8Bits_I8 , MOVAPS);
  2680. SET_SIMDOPCODE(Simd128_FromUint8x16Bits_I8 , MOVAPS);
  2681. SET_SIMDOPCODE(Simd128_Or_I16 , POR);
  2682. SET_SIMDOPCODE(Simd128_Xor_I16 , PXOR);
  2683. SET_SIMDOPCODE(Simd128_Not_I16 , XORPS);
  2684. SET_SIMDOPCODE(Simd128_And_I16 , PAND);
  2685. SET_SIMDOPCODE(Simd128_Add_I16 , PADDB);
  2686. SET_SIMDOPCODE(Simd128_Sub_I16 , PSUBB);
  2687. SET_SIMDOPCODE(Simd128_Lt_I16 , PCMPGTB);
  2688. SET_SIMDOPCODE(Simd128_Gt_I16 , PCMPGTB);
  2689. SET_SIMDOPCODE(Simd128_Eq_I16 , PCMPEQB);
  2690. SET_SIMDOPCODE(Simd128_FromFloat32x4Bits_I16, MOVAPS);
  2691. SET_SIMDOPCODE(Simd128_FromInt32x4Bits_I16 , MOVAPS);
  2692. SET_SIMDOPCODE(Simd128_FromInt16x8Bits_I16 , MOVAPS);
  2693. SET_SIMDOPCODE(Simd128_FromUint32x4Bits_I16 , MOVAPS);
  2694. SET_SIMDOPCODE(Simd128_FromUint16x8Bits_I16 , MOVAPS);
  2695. SET_SIMDOPCODE(Simd128_FromUint8x16Bits_I16 , MOVAPS);
  2696. SET_SIMDOPCODE(Simd128_FromFloat32x4Bits_U4 , MOVAPS);
  2697. SET_SIMDOPCODE(Simd128_FromInt32x4Bits_U4 , MOVAPS);
  2698. SET_SIMDOPCODE(Simd128_FromInt16x8Bits_U4 , MOVAPS);
  2699. SET_SIMDOPCODE(Simd128_FromInt8x16Bits_U4 , MOVAPS);
  2700. SET_SIMDOPCODE(Simd128_FromUint16x8Bits_U4 , MOVAPS);
  2701. SET_SIMDOPCODE(Simd128_FromUint8x16Bits_U4 , MOVAPS);
  2702. SET_SIMDOPCODE(Simd128_FromFloat32x4Bits_U8 , MOVAPS);
  2703. SET_SIMDOPCODE(Simd128_FromInt32x4Bits_U8 , MOVAPS);
  2704. SET_SIMDOPCODE(Simd128_FromInt16x8Bits_U8 , MOVAPS);
  2705. SET_SIMDOPCODE(Simd128_FromInt8x16Bits_U8 , MOVAPS);
  2706. SET_SIMDOPCODE(Simd128_FromUint32x4Bits_U8 , MOVAPS);
  2707. SET_SIMDOPCODE(Simd128_FromUint8x16Bits_U8 , MOVAPS);
  2708. SET_SIMDOPCODE(Simd128_FromFloat32x4Bits_U16 , MOVAPS);
  2709. SET_SIMDOPCODE(Simd128_FromInt32x4Bits_U16 , MOVAPS);
  2710. SET_SIMDOPCODE(Simd128_FromInt16x8Bits_U16 , MOVAPS);
  2711. SET_SIMDOPCODE(Simd128_FromInt8x16Bits_U16 , MOVAPS);
  2712. SET_SIMDOPCODE(Simd128_FromUint32x4Bits_U16 , MOVAPS);
  2713. SET_SIMDOPCODE(Simd128_FromUint16x8Bits_U16 , MOVAPS);
  2714. //SET_SIMDOPCODE(Simd128_FromFloat64x2_F4 , CVTPD2PS);
  2715. //SET_SIMDOPCODE(Simd128_FromFloat64x2Bits_F4 , MOVAPS);
  2716. SET_SIMDOPCODE(Simd128_FromInt32x4_F4 , CVTDQ2PS);
  2717. SET_SIMDOPCODE(Simd128_FromInt32x4Bits_F4 , MOVAPS);
  2718. SET_SIMDOPCODE(Simd128_FromInt16x8Bits_F4 , MOVAPS);
  2719. SET_SIMDOPCODE(Simd128_FromInt8x16Bits_F4 , MOVAPS);
  2720. SET_SIMDOPCODE(Simd128_FromUint32x4Bits_F4 , MOVAPS);
  2721. SET_SIMDOPCODE(Simd128_FromUint16x8Bits_F4 , MOVAPS);
  2722. SET_SIMDOPCODE(Simd128_FromUint8x16Bits_F4 , MOVAPS);
  2723. SET_SIMDOPCODE(Simd128_Abs_F4 , ANDPS);
  2724. SET_SIMDOPCODE(Simd128_Neg_F4 , XORPS);
  2725. SET_SIMDOPCODE(Simd128_Add_F4 , ADDPS);
  2726. SET_SIMDOPCODE(Simd128_Sub_F4 , SUBPS);
  2727. SET_SIMDOPCODE(Simd128_Mul_F4 , MULPS);
  2728. SET_SIMDOPCODE(Simd128_Div_F4 , DIVPS);
  2729. SET_SIMDOPCODE(Simd128_Sqrt_F4 , SQRTPS);
  2730. SET_SIMDOPCODE(Simd128_Lt_F4 , CMPLTPS); // CMPLTPS
  2731. SET_SIMDOPCODE(Simd128_LtEq_F4 , CMPLEPS); // CMPLEPS
  2732. SET_SIMDOPCODE(Simd128_Eq_F4 , CMPEQPS); // CMPEQPS
  2733. SET_SIMDOPCODE(Simd128_Neq_F4 , CMPNEQPS); // CMPNEQPS
  2734. SET_SIMDOPCODE(Simd128_Gt_F4 , CMPLTPS); // CMPLTPS (swap srcs)
  2735. SET_SIMDOPCODE(Simd128_GtEq_F4 , CMPLEPS); // CMPLEPS (swap srcs)
  2736. #if 0
  2737. SET_SIMDOPCODE(Simd128_FromFloat32x4_D2, CVTPS2PD);
  2738. SET_SIMDOPCODE(Simd128_FromFloat32x4Bits_D2, MOVAPS);
  2739. SET_SIMDOPCODE(Simd128_FromInt32x4_D2, CVTDQ2PD);
  2740. SET_SIMDOPCODE(Simd128_FromInt32x4Bits_D2, MOVAPS);
  2741. SET_SIMDOPCODE(Simd128_Neg_D2, XORPS);
  2742. SET_SIMDOPCODE(Simd128_Add_D2, ADDPD);
  2743. SET_SIMDOPCODE(Simd128_Abs_D2, ANDPD);
  2744. SET_SIMDOPCODE(Simd128_Sub_D2, SUBPD);
  2745. SET_SIMDOPCODE(Simd128_Mul_D2, MULPD);
  2746. SET_SIMDOPCODE(Simd128_Div_D2, DIVPD);
  2747. SET_SIMDOPCODE(Simd128_Min_D2, MINPD);
  2748. SET_SIMDOPCODE(Simd128_Max_D2, MAXPD);
  2749. SET_SIMDOPCODE(Simd128_Sqrt_D2, SQRTPD);
  2750. SET_SIMDOPCODE(Simd128_Lt_D2, CMPLTPD); // CMPLTPD
  2751. SET_SIMDOPCODE(Simd128_LtEq_D2, CMPLEPD); // CMPLEPD
  2752. SET_SIMDOPCODE(Simd128_Eq_D2, CMPEQPD); // CMPEQPD
  2753. SET_SIMDOPCODE(Simd128_Neq_D2, CMPNEQPD); // CMPNEQPD
  2754. SET_SIMDOPCODE(Simd128_Gt_D2, CMPLTPD); // CMPLTPD (swap srcs)
  2755. SET_SIMDOPCODE(Simd128_GtEq_D2, CMPLEPD); // CMPLEPD (swap srcs)
  2756. #endif // 0
  2757. SET_SIMDOPCODE(Simd128_And_I8 , PAND);
  2758. SET_SIMDOPCODE(Simd128_Or_I8 , POR);
  2759. SET_SIMDOPCODE(Simd128_Xor_I8 , XORPS);
  2760. SET_SIMDOPCODE(Simd128_Not_I8 , XORPS);
  2761. SET_SIMDOPCODE(Simd128_Add_I8 , PADDW);
  2762. SET_SIMDOPCODE(Simd128_Sub_I8 , PSUBW);
  2763. SET_SIMDOPCODE(Simd128_Mul_I8 , PMULLW);
  2764. SET_SIMDOPCODE(Simd128_Eq_I8 , PCMPEQW);
  2765. SET_SIMDOPCODE(Simd128_Lt_I8 , PCMPGTW); // (swap srcs)
  2766. SET_SIMDOPCODE(Simd128_Gt_I8 , PCMPGTW);
  2767. SET_SIMDOPCODE(Simd128_AddSaturate_I8 , PADDSW);
  2768. SET_SIMDOPCODE(Simd128_SubSaturate_I8 , PSUBSW);
  2769. SET_SIMDOPCODE(Simd128_AddSaturate_I16 , PADDSB);
  2770. SET_SIMDOPCODE(Simd128_SubSaturate_I16 , PSUBSB);
  2771. SET_SIMDOPCODE(Simd128_And_U4 , PAND);
  2772. SET_SIMDOPCODE(Simd128_Or_U4 , POR);
  2773. SET_SIMDOPCODE(Simd128_Xor_U4 , XORPS);
  2774. SET_SIMDOPCODE(Simd128_Not_U4 , XORPS);
  2775. SET_SIMDOPCODE(Simd128_Add_U4 , PADDD);
  2776. SET_SIMDOPCODE(Simd128_Sub_U4 , PSUBD);
  2777. SET_SIMDOPCODE(Simd128_Eq_U4 , PCMPEQD); // same as int32x4.equal
  2778. SET_SIMDOPCODE(Simd128_And_U8 , PAND);
  2779. SET_SIMDOPCODE(Simd128_Or_U8 , POR);
  2780. SET_SIMDOPCODE(Simd128_Xor_U8 , XORPS);
  2781. SET_SIMDOPCODE(Simd128_Not_U8 , XORPS);
  2782. SET_SIMDOPCODE(Simd128_Add_U8 , PADDW);
  2783. SET_SIMDOPCODE(Simd128_Sub_U8 , PSUBW);
  2784. SET_SIMDOPCODE(Simd128_Mul_U8 , PMULLW);
  2785. SET_SIMDOPCODE(Simd128_Eq_U8 , PCMPEQW); // same as int16X8.equal
  2786. SET_SIMDOPCODE(Simd128_AddSaturate_U8 , PADDUSW);
  2787. SET_SIMDOPCODE(Simd128_SubSaturate_U8 , PSUBUSW);
  2788. SET_SIMDOPCODE(Simd128_And_U16 , PAND);
  2789. SET_SIMDOPCODE(Simd128_Or_U16 , POR);
  2790. SET_SIMDOPCODE(Simd128_Xor_U16 , XORPS);
  2791. SET_SIMDOPCODE(Simd128_Not_U16 , XORPS);
  2792. SET_SIMDOPCODE(Simd128_Add_U16 , PADDB);
  2793. SET_SIMDOPCODE(Simd128_Sub_U16 , PSUBB);
  2794. SET_SIMDOPCODE(Simd128_Eq_U16 , PCMPEQB); // same as int8x16.equal
  2795. SET_SIMDOPCODE(Simd128_AddSaturate_U16 , PADDUSB);
  2796. SET_SIMDOPCODE(Simd128_SubSaturate_U16 , PSUBUSB);
  2797. SET_SIMDOPCODE(Simd128_And_B4 , PAND);
  2798. SET_SIMDOPCODE(Simd128_Or_B4 , POR);
  2799. SET_SIMDOPCODE(Simd128_Xor_B4 , XORPS);
  2800. SET_SIMDOPCODE(Simd128_Not_B4 , XORPS);
  2801. SET_SIMDOPCODE(Simd128_And_B8 , PAND);
  2802. SET_SIMDOPCODE(Simd128_Or_B8 , POR);
  2803. SET_SIMDOPCODE(Simd128_Xor_B8 , XORPS);
  2804. SET_SIMDOPCODE(Simd128_Not_B8 , XORPS);
  2805. SET_SIMDOPCODE(Simd128_And_B16 , PAND);
  2806. SET_SIMDOPCODE(Simd128_Or_B16 , POR);
  2807. SET_SIMDOPCODE(Simd128_Xor_B16 , XORPS);
  2808. SET_SIMDOPCODE(Simd128_Not_B16 , XORPS);
  2809. }
  2810. #undef SIMD_SETOPCODE
  2811. #undef SIMD_GETOPCODE
  2812. // FromVar
  2813. void
  2814. LowererMD::GenerateCheckedSimdLoad(IR::Instr * instr)
  2815. {
  2816. Assert(instr->m_opcode == Js::OpCode::FromVar);
  2817. Assert(instr->GetSrc1()->GetType() == TyVar);
  2818. Assert(IRType_IsSimd128(instr->GetDst()->GetType()));
  2819. bool checkRequired = instr->HasBailOutInfo();
  2820. IR::LabelInstr * labelHelper = nullptr, * labelDone = nullptr;
  2821. IR::Instr * insertInstr = instr, * newInstr;
  2822. IR::RegOpnd * src = instr->GetSrc1()->AsRegOpnd(), * dst = instr->GetDst()->AsRegOpnd();
  2823. Assert(!checkRequired || instr->GetBailOutKind() == IR::BailOutSimd128F4Only || instr->GetBailOutKind() == IR::BailOutSimd128I4Only);
  2824. if (checkRequired)
  2825. {
  2826. labelHelper = IR::LabelInstr::New(Js::OpCode::Label, this->m_func, true);
  2827. labelDone = IR::LabelInstr::New(Js::OpCode::Label, this->m_func);
  2828. instr->InsertBefore(labelHelper);
  2829. instr->InsertAfter(labelDone);
  2830. insertInstr = labelHelper;
  2831. GenerateObjectTest(instr->GetSrc1(), insertInstr, labelHelper);
  2832. newInstr = IR::Instr::New(Js::OpCode::CMP, instr->m_func);
  2833. newInstr->SetSrc1(IR::IndirOpnd::New(instr->GetSrc1()->AsRegOpnd(), 0, TyMachPtr, instr->m_func));
  2834. newInstr->SetSrc2(m_lowerer->LoadVTableValueOpnd(instr, dst->GetType() == TySimd128F4 ? VTableValue::VtableSimd128F4 : VTableValue::VtableSimd128I4));
  2835. insertInstr->InsertBefore(newInstr);
  2836. Legalize(newInstr);
  2837. insertInstr->InsertBefore(IR::BranchInstr::New(Js::OpCode::JNE, labelHelper, this->m_func));
  2838. instr->UnlinkSrc1();
  2839. instr->UnlinkDst();
  2840. this->m_lowerer->GenerateBailOut(instr);
  2841. }
  2842. size_t valueOffset = dst->GetType() == TySimd128F4 ? Js::JavascriptSIMDFloat32x4::GetOffsetOfValue() : Js::JavascriptSIMDInt32x4::GetOffsetOfValue();
  2843. Assert(valueOffset < INT_MAX);
  2844. newInstr = IR::Instr::New(Js::OpCode::MOVUPS, dst, IR::IndirOpnd::New(src, static_cast<int>(valueOffset), dst->GetType(), this->m_func), this->m_func);
  2845. insertInstr->InsertBefore(newInstr);
  2846. insertInstr->InsertBefore(IR::BranchInstr::New(Js::OpCode::JMP, labelDone, this->m_func));
  2847. // FromVar is converted to BailOut call. Don't remove.
  2848. }
  2849. // ToVar
  2850. void LowererMD::GenerateSimdStore(IR::Instr * instr)
  2851. {
  2852. IR::RegOpnd *dst, *src;
  2853. IRType type;
  2854. dst = instr->GetDst()->AsRegOpnd();
  2855. src = instr->GetSrc1()->AsRegOpnd();
  2856. type = src->GetType();
  2857. this->m_lowerer->LoadScriptContext(instr);
  2858. IR::Instr * instrCall = IR::Instr::New(Js::OpCode::CALL, instr->GetDst(),
  2859. IR::HelperCallOpnd::New(type == TySimd128F4 ? IR::HelperAllocUninitializedSimdF4 : IR::HelperAllocUninitializedSimdI4, this->m_func), this->m_func);
  2860. instr->InsertBefore(instrCall);
  2861. this->lowererMDArch.LowerCall(instrCall, 0);
  2862. IR::Opnd * valDst;
  2863. if (type == TySimd128F4)
  2864. {
  2865. valDst = IR::IndirOpnd::New(dst, (int32)Js::JavascriptSIMDFloat32x4::GetOffsetOfValue(), TySimd128F4, this->m_func);
  2866. }
  2867. else
  2868. {
  2869. valDst = IR::IndirOpnd::New(dst, (int32)Js::JavascriptSIMDInt32x4::GetOffsetOfValue(), TySimd128I4, this->m_func);
  2870. }
  2871. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVUPS, valDst, src, this->m_func));
  2872. instr->Remove();
  2873. }
  2874. void LowererMD::CheckShuffleLanes_4(uint8 lanes[], uint8 lanesSrc[], uint *fromSrc1, uint *fromSrc2)
  2875. {
  2876. Assert(lanes);
  2877. Assert(lanesSrc);
  2878. Assert(fromSrc1 && fromSrc2);
  2879. *fromSrc1 = 0;
  2880. *fromSrc2 = 0;
  2881. for (uint i = 0; i < 4; i++)
  2882. {
  2883. if (lanes[i] >= 0 && lanes[i] < 4)
  2884. {
  2885. (*fromSrc1)++;
  2886. lanesSrc[i] = 1;
  2887. }
  2888. else if (lanes[i] >= 4 && lanes[i] < 8)
  2889. {
  2890. (*fromSrc2)++;
  2891. lanesSrc[i] = 2;
  2892. }
  2893. else
  2894. {
  2895. Assert(UNREACHED);
  2896. }
  2897. }
  2898. }
  2899. void LowererMD::InsertShufps(uint8 lanes[], IR::Opnd *dst, IR::Opnd *src1, IR::Opnd *src2, IR::Instr *instr)
  2900. {
  2901. int8 shufMask;
  2902. uint8 normLanes[4];
  2903. IR::RegOpnd * tmp = IR::RegOpnd::New(TySimd128I4, m_func);
  2904. for (uint i = 0; i < 4; i++)
  2905. {
  2906. normLanes[i] = (lanes[i] >= 4) ? (lanes[i] - 4) : lanes[i];
  2907. }
  2908. shufMask = (int8)((normLanes[3] << 6) | (normLanes[2] << 4) | (normLanes[1] << 2) | normLanes[0]);
  2909. // ToDo: Move this to legalization code
  2910. if (dst->IsEqual(src1))
  2911. {
  2912. // instruction already legal
  2913. instr->InsertBefore(IR::Instr::New(Js::OpCode::SHUFPS, dst, src2, IR::IntConstOpnd::New((IntConstType)shufMask, TyInt8, m_func, true), m_func));
  2914. }
  2915. else if (dst->IsEqual(src2))
  2916. {
  2917. // MOVAPS tmp, dst
  2918. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVAPS, tmp, dst, m_func));
  2919. // MOVAPS dst, src1
  2920. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVAPS, dst, src1, m_func));
  2921. // SHUF dst, tmp, imm8
  2922. instr->InsertBefore(IR::Instr::New(Js::OpCode::SHUFPS, dst, tmp, IR::IntConstOpnd::New((IntConstType)shufMask, TyInt8, m_func, true), m_func));
  2923. }
  2924. else
  2925. {
  2926. // MOVAPS dst, src1
  2927. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVAPS, dst, src1, m_func));
  2928. // SHUF dst, src2, imm8
  2929. instr->InsertBefore(IR::Instr::New(Js::OpCode::SHUFPS, dst, src2, IR::IntConstOpnd::New((IntConstType)shufMask, TyInt8, m_func, true), m_func));
  2930. }
  2931. }
  2932. BYTE LowererMD::Simd128GetTypedArrBytesPerElem(ValueType arrType)
  2933. {
  2934. return (1 << Lowerer::GetArrayIndirScale(arrType));
  2935. }
  2936. #endif