LowerMDSharedSimd128.cpp 129 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309
  1. //-------------------------------------------------------------------------------------------------------
  2. // Copyright (C) Microsoft Corporation and contributors. All rights reserved.
  3. // Licensed under the MIT license. See LICENSE.txt file in the project root for full license information.
  4. //-------------------------------------------------------------------------------------------------------
  5. #include "Backend.h"
  6. static IR::Instr* removeInstr(IR::Instr* instr);
  7. #ifdef ENABLE_WASM_SIMD
  8. static IR::Instr* removeInstr(IR::Instr* instr)
  9. {
  10. IR::Instr* prevInstr;
  11. prevInstr = instr->m_prev;
  12. instr->Remove();
  13. return prevInstr;
  14. }
  15. #define GET_SIMDOPCODE(irOpcode) m_simd128OpCodesMap[(uint32)(irOpcode - Js::OpCode::Simd128_Start)]
  16. #define SET_SIMDOPCODE(irOpcode, mdOpcode) \
  17. Assert((uint32)m_simd128OpCodesMap[(uint32)(Js::OpCode::irOpcode - Js::OpCode::Simd128_Start)] == 0);\
  18. Assert(Js::OpCode::mdOpcode > Js::OpCode::MDStart);\
  19. m_simd128OpCodesMap[(uint32)(Js::OpCode::irOpcode - Js::OpCode::Simd128_Start)] = Js::OpCode::mdOpcode;
  20. IR::Instr* LowererMD::Simd128Instruction(IR::Instr *instr)
  21. {
  22. // Currently only handles type-specialized/asm.js opcodes
  23. if (!instr->GetDst())
  24. {
  25. // SIMD ops always have DST in asmjs
  26. Assert(!instr->m_func->GetJITFunctionBody()->IsAsmJsMode());
  27. // unused result. Do nothing.
  28. IR::Instr * pInstr = instr->m_prev;
  29. instr->Remove();
  30. return pInstr;
  31. }
  32. if (Simd128TryLowerMappedInstruction(instr))
  33. {
  34. return instr->m_prev;
  35. }
  36. return Simd128LowerUnMappedInstruction(instr);
  37. }
  38. bool LowererMD::Simd128TryLowerMappedInstruction(IR::Instr *instr)
  39. {
  40. bool legalize = true;
  41. Js::OpCode opcode = GET_SIMDOPCODE(instr->m_opcode);
  42. if ((uint32)opcode == 0)
  43. return false;
  44. Assert(instr->GetDst() && instr->GetDst()->IsRegOpnd() && instr->GetDst()->IsSimd128() || instr->GetDst()->GetType() == TyInt32);
  45. Assert(instr->GetSrc1() && instr->GetSrc1()->IsRegOpnd() && instr->GetSrc1()->IsSimd128());
  46. Assert(!instr->GetSrc2() || (((instr->GetSrc2()->IsRegOpnd() && instr->GetSrc2()->IsSimd128()) || (instr->GetSrc2()->IsIntConstOpnd() && instr->GetSrc2()->GetType() == TyInt8))));
  47. switch (instr->m_opcode)
  48. {
  49. case Js::OpCode::Simd128_Abs_F4:
  50. Assert(opcode == Js::OpCode::ANDPS);
  51. instr->SetSrc2(IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86AbsMaskF4Addr(), instr->GetSrc1()->GetType(), m_func));
  52. break;
  53. case Js::OpCode::Simd128_Abs_D2:
  54. Assert(opcode == Js::OpCode::ANDPD);
  55. instr->SetSrc2(IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86AbsMaskD2Addr(), instr->GetSrc1()->GetType(), m_func));
  56. break;
  57. case Js::OpCode::Simd128_Neg_F4:
  58. Assert(opcode == Js::OpCode::XORPS);
  59. instr->SetSrc2(IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86NegMaskF4Addr(), instr->GetSrc1()->GetType(), m_func));
  60. break;
  61. case Js::OpCode::Simd128_Neg_D2:
  62. Assert(opcode == Js::OpCode::XORPS);
  63. instr->SetSrc2(IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86NegMaskD2Addr(), instr->GetSrc1()->GetType(), m_func));
  64. break;
  65. case Js::OpCode::Simd128_Not_I4:
  66. case Js::OpCode::Simd128_Not_I16:
  67. case Js::OpCode::Simd128_Not_I8:
  68. case Js::OpCode::Simd128_Not_U4:
  69. case Js::OpCode::Simd128_Not_U8:
  70. case Js::OpCode::Simd128_Not_U16:
  71. case Js::OpCode::Simd128_Not_B4:
  72. case Js::OpCode::Simd128_Not_B8:
  73. case Js::OpCode::Simd128_Not_B16:
  74. Assert(opcode == Js::OpCode::XORPS);
  75. instr->SetSrc2(IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86AllNegOnesAddr(), instr->GetSrc1()->GetType(), m_func));
  76. break;
  77. case Js::OpCode::Simd128_Gt_F4:
  78. case Js::OpCode::Simd128_Gt_D2:
  79. case Js::OpCode::Simd128_GtEq_F4:
  80. case Js::OpCode::Simd128_GtEq_D2:
  81. case Js::OpCode::Simd128_Lt_I4:
  82. case Js::OpCode::Simd128_Lt_I8:
  83. case Js::OpCode::Simd128_Lt_I16:
  84. {
  85. Assert(opcode == Js::OpCode::CMPLTPS || opcode == Js::OpCode::CMPLTPD || opcode == Js::OpCode::CMPLEPS
  86. || opcode == Js::OpCode::CMPLEPD || opcode == Js::OpCode::PCMPGTD || opcode == Js::OpCode::PCMPGTB
  87. || opcode == Js::OpCode::PCMPGTW );
  88. // swap operands
  89. auto *src1 = instr->UnlinkSrc1();
  90. auto *src2 = instr->UnlinkSrc2();
  91. instr->SetSrc1(src2);
  92. instr->SetSrc2(src1);
  93. break;
  94. }
  95. }
  96. instr->m_opcode = opcode;
  97. if (legalize)
  98. {
  99. //MakeDstEquSrc1(instr);
  100. Legalize(instr);
  101. }
  102. return true;
  103. }
  104. IR::MemRefOpnd *
  105. LowererMD::LoadSimdHelperArgument(IR::Instr * instr, uint8 index)
  106. {
  107. //the most reliable way to pass a simd value on x86/x64 win/lnx across calls
  108. //is to pass a pointer to a SIMD value in the simd temporary area.
  109. //otherwise we have to use __m128 and msvc intrinsics which may or may not be the same across
  110. //MSVC and Clang
  111. IR::MemRefOpnd* srcMemRef = IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetSimdTempAreaAddr(index), TySimd128F4, m_func);
  112. IR::AddrOpnd* argAddress = IR::AddrOpnd::New(m_func->GetThreadContextInfo()->GetSimdTempAreaAddr(index), IR::AddrOpndKindDynamicMisc, m_func, true /* doesn't come from a user */);
  113. LoadHelperArgument(instr, argAddress);
  114. return srcMemRef;
  115. }
  116. IR::Instr* LowererMD::Simd128LowerUnMappedInstruction(IR::Instr *instr)
  117. {
  118. switch (instr->m_opcode)
  119. {
  120. case Js::OpCode::Simd128_LdC:
  121. return Simd128LoadConst(instr);
  122. #ifdef ENABLE_SIMD
  123. case Js::OpCode::Simd128_FloatsToF4:
  124. case Js::OpCode::Simd128_IntsToI4:
  125. case Js::OpCode::Simd128_IntsToU4:
  126. case Js::OpCode::Simd128_IntsToB4:
  127. return Simd128LowerConstructor_4(instr);
  128. case Js::OpCode::Simd128_IntsToI8:
  129. case Js::OpCode::Simd128_IntsToU8:
  130. case Js::OpCode::Simd128_IntsToB8:
  131. return Simd128LowerConstructor_8(instr);
  132. case Js::OpCode::Simd128_IntsToI16:
  133. case Js::OpCode::Simd128_IntsToU16:
  134. case Js::OpCode::Simd128_IntsToB16:
  135. return Simd128LowerConstructor_16(instr);
  136. case Js::OpCode::Simd128_Rcp_F4:
  137. //case Js::OpCode::Simd128_Rcp_D2:
  138. return Simd128LowerRcp(instr);
  139. //SQRT
  140. case Js::OpCode::Simd128_RcpSqrt_F4:
  141. //case Js::OpCode::Simd128_RcpSqrt_D2:
  142. return Simd128LowerRcpSqrt(instr);
  143. case Js::OpCode::Simd128_Select_F4:
  144. case Js::OpCode::Simd128_Select_I4:
  145. //case Js::OpCode::Simd128_Select_D2:
  146. case Js::OpCode::Simd128_Select_I8:
  147. case Js::OpCode::Simd128_Select_I16:
  148. case Js::OpCode::Simd128_Select_U4:
  149. case Js::OpCode::Simd128_Select_U8:
  150. case Js::OpCode::Simd128_Select_U16:
  151. return Simd128LowerSelect(instr);
  152. #endif
  153. #if 0
  154. case Js::OpCode::Simd128_DoublesToD2:
  155. return Simd128LowerConstructor_2(instr);
  156. #endif // 0
  157. case Js::OpCode::Simd128_ExtractLane_I2:
  158. case Js::OpCode::Simd128_ExtractLane_I4:
  159. case Js::OpCode::Simd128_ExtractLane_I8:
  160. case Js::OpCode::Simd128_ExtractLane_I16:
  161. case Js::OpCode::Simd128_ExtractLane_U4:
  162. case Js::OpCode::Simd128_ExtractLane_U8:
  163. case Js::OpCode::Simd128_ExtractLane_U16:
  164. case Js::OpCode::Simd128_ExtractLane_B4:
  165. case Js::OpCode::Simd128_ExtractLane_B8:
  166. case Js::OpCode::Simd128_ExtractLane_B16:
  167. case Js::OpCode::Simd128_ExtractLane_F4:
  168. return Simd128LowerLdLane(instr);
  169. case Js::OpCode::Simd128_ReplaceLane_I2:
  170. case Js::OpCode::Simd128_ReplaceLane_D2:
  171. return SIMD128LowerReplaceLane_2(instr);
  172. case Js::OpCode::Simd128_ReplaceLane_I4:
  173. case Js::OpCode::Simd128_ReplaceLane_F4:
  174. case Js::OpCode::Simd128_ReplaceLane_U4:
  175. case Js::OpCode::Simd128_ReplaceLane_B4:
  176. return SIMD128LowerReplaceLane_4(instr);
  177. case Js::OpCode::Simd128_ReplaceLane_I8:
  178. case Js::OpCode::Simd128_ReplaceLane_U8:
  179. case Js::OpCode::Simd128_ReplaceLane_B8:
  180. return SIMD128LowerReplaceLane_8(instr);
  181. case Js::OpCode::Simd128_ReplaceLane_I16:
  182. case Js::OpCode::Simd128_ReplaceLane_U16:
  183. case Js::OpCode::Simd128_ReplaceLane_B16:
  184. return SIMD128LowerReplaceLane_16(instr);
  185. case Js::OpCode::Simd128_Splat_F4:
  186. case Js::OpCode::Simd128_Splat_I4:
  187. case Js::OpCode::Simd128_Splat_I2:
  188. case Js::OpCode::Simd128_Splat_D2:
  189. case Js::OpCode::Simd128_Splat_I8:
  190. case Js::OpCode::Simd128_Splat_I16:
  191. case Js::OpCode::Simd128_Splat_U4:
  192. case Js::OpCode::Simd128_Splat_U8:
  193. case Js::OpCode::Simd128_Splat_U16:
  194. case Js::OpCode::Simd128_Splat_B4:
  195. case Js::OpCode::Simd128_Splat_B8:
  196. case Js::OpCode::Simd128_Splat_B16:
  197. return Simd128LowerSplat(instr);
  198. case Js::OpCode::Simd128_Sqrt_F4:
  199. //case Js::OpCode::Simd128_Sqrt_D2:
  200. return Simd128LowerSqrt(instr);
  201. case Js::OpCode::Simd128_Neg_I4:
  202. case Js::OpCode::Simd128_Neg_I8:
  203. case Js::OpCode::Simd128_Neg_I16:
  204. case Js::OpCode::Simd128_Neg_U4:
  205. case Js::OpCode::Simd128_Neg_U8:
  206. case Js::OpCode::Simd128_Neg_U16:
  207. return Simd128LowerNeg(instr);
  208. case Js::OpCode::Simd128_Mul_I4:
  209. case Js::OpCode::Simd128_Mul_U4:
  210. return Simd128LowerMulI4(instr);
  211. case Js::OpCode::Simd128_Mul_I16:
  212. case Js::OpCode::Simd128_Mul_U16:
  213. return Simd128LowerMulI16(instr);
  214. case Js::OpCode::Simd128_ShRtByScalar_I4:
  215. case Js::OpCode::Simd128_ShLtByScalar_I4:
  216. case Js::OpCode::Simd128_ShRtByScalar_I8:
  217. case Js::OpCode::Simd128_ShLtByScalar_I8:
  218. case Js::OpCode::Simd128_ShLtByScalar_I16:
  219. case Js::OpCode::Simd128_ShRtByScalar_I16:
  220. case Js::OpCode::Simd128_ShRtByScalar_U4:
  221. case Js::OpCode::Simd128_ShLtByScalar_U4:
  222. case Js::OpCode::Simd128_ShRtByScalar_U8:
  223. case Js::OpCode::Simd128_ShLtByScalar_U8:
  224. case Js::OpCode::Simd128_ShRtByScalar_U16:
  225. case Js::OpCode::Simd128_ShLtByScalar_U16:
  226. case Js::OpCode::Simd128_ShLtByScalar_I2:
  227. case Js::OpCode::Simd128_ShRtByScalar_U2:
  228. case Js::OpCode::Simd128_ShRtByScalar_I2:
  229. return Simd128LowerShift(instr);
  230. case Js::OpCode::Simd128_LdArr_I4:
  231. case Js::OpCode::Simd128_LdArr_I8:
  232. case Js::OpCode::Simd128_LdArr_I16:
  233. case Js::OpCode::Simd128_LdArr_U4:
  234. case Js::OpCode::Simd128_LdArr_U8:
  235. case Js::OpCode::Simd128_LdArr_U16:
  236. case Js::OpCode::Simd128_LdArr_F4:
  237. //case Js::OpCode::Simd128_LdArr_D2:
  238. case Js::OpCode::Simd128_LdArrConst_I4:
  239. case Js::OpCode::Simd128_LdArrConst_I8:
  240. case Js::OpCode::Simd128_LdArrConst_I16:
  241. case Js::OpCode::Simd128_LdArrConst_U4:
  242. case Js::OpCode::Simd128_LdArrConst_U8:
  243. case Js::OpCode::Simd128_LdArrConst_U16:
  244. case Js::OpCode::Simd128_LdArrConst_F4:
  245. //case Js::OpCode::Simd128_LdArrConst_D2:
  246. if (m_func->GetJITFunctionBody()->IsAsmJsMode())
  247. {
  248. // with bound checks
  249. return Simd128AsmJsLowerLoadElem(instr);
  250. }
  251. else
  252. {
  253. // non-AsmJs, boundChecks are extracted from instr
  254. return Simd128LowerLoadElem(instr);
  255. }
  256. case Js::OpCode::Simd128_StArr_I4:
  257. case Js::OpCode::Simd128_StArr_I8:
  258. case Js::OpCode::Simd128_StArr_I16:
  259. case Js::OpCode::Simd128_StArr_U4:
  260. case Js::OpCode::Simd128_StArr_U8:
  261. case Js::OpCode::Simd128_StArr_U16:
  262. case Js::OpCode::Simd128_StArr_F4:
  263. //case Js::OpCode::Simd128_StArr_D2:
  264. case Js::OpCode::Simd128_StArrConst_I4:
  265. case Js::OpCode::Simd128_StArrConst_I8:
  266. case Js::OpCode::Simd128_StArrConst_I16:
  267. case Js::OpCode::Simd128_StArrConst_U4:
  268. case Js::OpCode::Simd128_StArrConst_U8:
  269. case Js::OpCode::Simd128_StArrConst_U16:
  270. case Js::OpCode::Simd128_StArrConst_F4:
  271. //case Js::OpCode::Simd128_StArrConst_D2:
  272. if (m_func->GetJITFunctionBody()->IsAsmJsMode())
  273. {
  274. return Simd128AsmJsLowerStoreElem(instr);
  275. }
  276. else
  277. {
  278. return Simd128LowerStoreElem(instr);
  279. }
  280. case Js::OpCode::Simd128_Swizzle_U4:
  281. case Js::OpCode::Simd128_Swizzle_I4:
  282. case Js::OpCode::Simd128_Swizzle_F4:
  283. //case Js::OpCode::Simd128_Swizzle_D2:
  284. return Simd128LowerSwizzle_4(instr);
  285. case Js::OpCode::Simd128_Shuffle_U4:
  286. case Js::OpCode::Simd128_Shuffle_I4:
  287. case Js::OpCode::Simd128_Shuffle_F4:
  288. //case Js::OpCode::Simd128_Shuffle_D2:
  289. return Simd128LowerShuffle_4(instr);
  290. case Js::OpCode::Simd128_Swizzle_I8:
  291. case Js::OpCode::Simd128_Swizzle_I16:
  292. case Js::OpCode::Simd128_Swizzle_U8:
  293. case Js::OpCode::Simd128_Swizzle_U16:
  294. case Js::OpCode::Simd128_Shuffle_I8:
  295. case Js::OpCode::Simd128_Shuffle_I16:
  296. case Js::OpCode::Simd128_Shuffle_U8:
  297. case Js::OpCode::Simd128_Shuffle_U16:
  298. return Simd128LowerShuffle(instr);
  299. case Js::OpCode::Simd128_FromUint32x4_F4:
  300. return Simd128LowerFloat32x4FromUint32x4(instr);
  301. case Js::OpCode::Simd128_FromFloat32x4_I4:
  302. return Simd128LowerInt32x4FromFloat32x4(instr);
  303. case Js::OpCode::Simd128_FromFloat32x4_U4:
  304. return Simd128LowerUint32x4FromFloat32x4(instr);
  305. case Js::OpCode::Simd128_FromInt64x2_D2:
  306. return EmitSimdConversion(instr, IR::HelperSimd128ConvertSD2);
  307. case Js::OpCode::Simd128_FromUint64x2_D2:
  308. return EmitSimdConversion(instr, IR::HelperSimd128ConvertUD2);
  309. case Js::OpCode::Simd128_FromFloat64x2_I2:
  310. return EmitSimdConversion(instr, IR::HelperSimd128TruncateI2);
  311. case Js::OpCode::Simd128_FromFloat64x2_U2:
  312. return EmitSimdConversion(instr, IR::HelperSimd128TruncateU2);
  313. case Js::OpCode::Simd128_Neq_I4:
  314. case Js::OpCode::Simd128_Neq_I8:
  315. case Js::OpCode::Simd128_Neq_I16:
  316. case Js::OpCode::Simd128_Neq_U4:
  317. case Js::OpCode::Simd128_Neq_U8:
  318. case Js::OpCode::Simd128_Neq_U16:
  319. return Simd128LowerNotEqual(instr);
  320. case Js::OpCode::Simd128_Lt_U4:
  321. case Js::OpCode::Simd128_Lt_U8:
  322. case Js::OpCode::Simd128_Lt_U16:
  323. case Js::OpCode::Simd128_GtEq_U4:
  324. case Js::OpCode::Simd128_GtEq_U8:
  325. case Js::OpCode::Simd128_GtEq_U16:
  326. return Simd128LowerLessThan(instr);
  327. case Js::OpCode::Simd128_LtEq_I4:
  328. case Js::OpCode::Simd128_LtEq_I8:
  329. case Js::OpCode::Simd128_LtEq_I16:
  330. case Js::OpCode::Simd128_LtEq_U4:
  331. case Js::OpCode::Simd128_LtEq_U8:
  332. case Js::OpCode::Simd128_LtEq_U16:
  333. case Js::OpCode::Simd128_Gt_U4:
  334. case Js::OpCode::Simd128_Gt_U8:
  335. case Js::OpCode::Simd128_Gt_U16:
  336. return Simd128LowerLessThanOrEqual(instr);
  337. case Js::OpCode::Simd128_GtEq_I4:
  338. case Js::OpCode::Simd128_GtEq_I8:
  339. case Js::OpCode::Simd128_GtEq_I16:
  340. return Simd128LowerGreaterThanOrEqual(instr);
  341. case Js::OpCode::Simd128_Min_F4:
  342. case Js::OpCode::Simd128_Max_F4:
  343. return Simd128LowerMinMax_F4(instr);
  344. case Js::OpCode::Simd128_AnyTrue_B2:
  345. case Js::OpCode::Simd128_AnyTrue_B4:
  346. case Js::OpCode::Simd128_AnyTrue_B8:
  347. case Js::OpCode::Simd128_AnyTrue_B16:
  348. return Simd128LowerAnyTrue(instr);
  349. case Js::OpCode::Simd128_AllTrue_B2:
  350. case Js::OpCode::Simd128_AllTrue_B4:
  351. case Js::OpCode::Simd128_AllTrue_B8:
  352. case Js::OpCode::Simd128_AllTrue_B16:
  353. return Simd128LowerAllTrue(instr);
  354. case Js::OpCode::Simd128_BitSelect_I4:
  355. return LowerSimd128BitSelect(instr);
  356. default:
  357. AssertMsg(UNREACHED, "Unsupported Simd128 instruction");
  358. }
  359. return nullptr;
  360. }
  361. IR::Instr* LowererMD::LowerSimd128BitSelect(IR::Instr* instr)
  362. {
  363. SList<IR::Opnd*> *args = Simd128GetExtendedArgs(instr);
  364. IR::Opnd *dst = args->Pop();
  365. IR::Opnd *src1 = args->Pop();
  366. IR::Opnd *src2 = args->Pop();
  367. IR::Opnd *mask = args->Pop();
  368. IR::Instr* pInstr = IR::Instr::New(Js::OpCode::PXOR, dst, src1, src2, m_func);
  369. instr->InsertBefore(pInstr);
  370. Legalize(pInstr);
  371. instr->InsertBefore(IR::Instr::New(Js::OpCode::PAND, dst, dst, mask, m_func));
  372. instr->InsertBefore(IR::Instr::New(Js::OpCode::PXOR, dst, dst, src2, m_func));
  373. return removeInstr(instr);
  374. }
  375. IR::Instr* LowererMD::Simd128LoadConst(IR::Instr* instr)
  376. {
  377. Assert(instr->GetDst() && instr->m_opcode == Js::OpCode::Simd128_LdC);
  378. Assert(instr->GetDst()->IsSimd128());
  379. Assert(instr->GetSrc1()->IsSimd128());
  380. Assert(instr->GetSrc1()->IsSimd128ConstOpnd());
  381. Assert(instr->GetSrc2() == nullptr);
  382. AsmJsSIMDValue value = instr->GetSrc1()->AsSimd128ConstOpnd()->m_value;
  383. // MOVUPS dst, [const]
  384. void *pValue = NativeCodeDataNewNoFixup(this->m_func->GetNativeCodeDataAllocator(), SIMDType<DataDesc_LowererMD_Simd128LoadConst>, value);
  385. IR::Opnd * simdRef;
  386. if (!m_func->IsOOPJIT())
  387. {
  388. simdRef = IR::MemRefOpnd::New((void *)pValue, instr->GetDst()->GetType(), instr->m_func);
  389. }
  390. else
  391. {
  392. int offset = NativeCodeData::GetDataTotalOffset(pValue);
  393. simdRef = IR::IndirOpnd::New(IR::RegOpnd::New(m_func->GetTopFunc()->GetNativeCodeDataSym(), TyVar, m_func), offset, instr->GetDst()->GetType(),
  394. #if DBG
  395. NativeCodeData::GetDataDescription(pValue, m_func->m_alloc),
  396. #endif
  397. m_func, true);
  398. GetLowerer()->addToLiveOnBackEdgeSyms->Set(m_func->GetTopFunc()->GetNativeCodeDataSym()->m_id);
  399. }
  400. instr->ReplaceSrc1(simdRef);
  401. instr->m_opcode = LowererMDArch::GetAssignOp(instr->GetDst()->GetType());
  402. Legalize(instr);
  403. return instr->m_prev;
  404. }
  405. IR::Instr* LowererMD::Simd128CanonicalizeToBools(IR::Instr* instr, const Js::OpCode &cmpOpcode, IR::Opnd& dstOpnd)
  406. {
  407. Assert(instr->m_opcode == Js::OpCode::Simd128_IntsToB4 || instr->m_opcode == Js::OpCode::Simd128_IntsToB8 || instr->m_opcode == Js::OpCode::Simd128_IntsToB16 ||
  408. instr->m_opcode == Js::OpCode::Simd128_ReplaceLane_B4 || instr->m_opcode == Js::OpCode::Simd128_ReplaceLane_B8 || instr->m_opcode == Js::OpCode::Simd128_ReplaceLane_B16 ||
  409. instr->m_opcode == Js::OpCode::Simd128_AnyTrue_B2 || instr->m_opcode == Js::OpCode::Simd128_AnyTrue_B4 || instr->m_opcode == Js::OpCode::Simd128_AnyTrue_B8 || instr->m_opcode == Js::OpCode::Simd128_AnyTrue_B16 ||
  410. instr->m_opcode == Js::OpCode::Simd128_AllTrue_B2 || instr->m_opcode == Js::OpCode::Simd128_AllTrue_B4 || instr->m_opcode == Js::OpCode::Simd128_AllTrue_B8 || instr->m_opcode == Js::OpCode::Simd128_AllTrue_B16
  411. );
  412. IR::Instr *pInstr;
  413. //dst = cmpOpcode dst, X86_ALL_ZEROS
  414. pInstr = IR::Instr::New(cmpOpcode, &dstOpnd, &dstOpnd, IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86AllZerosAddr(), TySimd128I4, m_func), m_func);
  415. instr->InsertBefore(pInstr);
  416. Legalize(pInstr);
  417. // dst = PANDN dst, X86_ALL_NEG_ONES
  418. pInstr = IR::Instr::New(Js::OpCode::PANDN, &dstOpnd, &dstOpnd, IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86AllNegOnesAddr(), TySimd128I4, m_func), m_func);
  419. instr->InsertBefore(pInstr);
  420. Legalize(pInstr);
  421. return instr;
  422. }
  423. IR::Instr* LowererMD::EmitSimdConversion(IR::Instr *instr, IR::JnHelperMethod helper)
  424. {
  425. IR::MemRefOpnd* srcMemRef = LoadSimdHelperArgument(instr, 0);
  426. IR::MemRefOpnd* dstMemRef = LoadSimdHelperArgument(instr, 1);
  427. m_lowerer->InsertMove(srcMemRef, instr->UnlinkSrc1(), instr);
  428. IR::Instr * helperCall = IR::Instr::New(Js::OpCode::CALL, this->m_func);
  429. instr->InsertBefore(helperCall);
  430. this->ChangeToHelperCall(helperCall, helper);
  431. m_lowerer->InsertMove(instr->UnlinkDst(), dstMemRef, instr);
  432. return removeInstr(instr);
  433. }
  434. void LowererMD::EmitShiftByScalarI2(IR::Instr *instr, IR::JnHelperMethod helper)
  435. {
  436. IR::Opnd* src2 = instr->GetSrc2();
  437. IR::Opnd* dst = instr->GetDst();
  438. LoadHelperArgument(instr, src2);
  439. IR::MemRefOpnd* srcMemRef = LoadSimdHelperArgument(instr, 0);
  440. m_lowerer->InsertMove(srcMemRef, instr->GetSrc1(), instr);
  441. IR::MemRefOpnd* dstMemRef = LoadSimdHelperArgument(instr, 1);
  442. IR::Instr * helperCall = IR::Instr::New(Js::OpCode::CALL, this->m_func);
  443. instr->InsertBefore(helperCall);
  444. this->ChangeToHelperCall(helperCall, helper);
  445. m_lowerer->InsertMove(dst, dstMemRef, instr);
  446. }
  447. IR::Instr * LowererMD::SIMD128LowerReplaceLane_2(IR::Instr *instr)
  448. {
  449. SList<IR::Opnd*> *args = Simd128GetExtendedArgs(instr);
  450. IR::Opnd *dst = args->Pop();
  451. IR::Opnd *src1 = args->Pop();
  452. IR::Opnd *src2 = args->Pop();
  453. IR::Opnd *src3 = args->Pop();
  454. int lane = src2->AsIntConstOpnd()->AsInt32();
  455. Assert(dst->IsSimd128() && src1->IsSimd128());
  456. if (instr->m_opcode == Js::OpCode::Simd128_ReplaceLane_D2)
  457. {
  458. AssertMsg(AutoSystemInfo::Data.SSE2Available(), "SSE2 not supported");
  459. Assert(src3->IsFloat64());
  460. m_lowerer->InsertMove(dst, src1, instr);
  461. if (lane)
  462. {
  463. instr->InsertBefore(IR::Instr::New(Js::OpCode::SHUFPD, dst, src3, IR::IntConstOpnd::New(0, TyInt8, m_func, true), m_func));
  464. }
  465. else
  466. {
  467. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVSD, dst, src3, m_func));
  468. }
  469. return removeInstr(instr);
  470. }
  471. Assert(src3->IsInt64());
  472. if (AutoSystemInfo::Data.SSE4_1Available())
  473. {
  474. m_lowerer->InsertMove(dst, src1, instr);
  475. instr->SetDst(dst);
  476. EmitInsertInt64(src3, lane, instr);
  477. }
  478. else
  479. {
  480. LoadHelperArgument(instr, src2);
  481. LoadInt64HelperArgument(instr, src3);
  482. IR::MemRefOpnd* srcMemRef = LoadSimdHelperArgument(instr, 0);
  483. m_lowerer->InsertMove(srcMemRef, src1, instr);
  484. IR::MemRefOpnd* dstMemRef = LoadSimdHelperArgument(instr, 1);
  485. IR::Instr * helperCall = IR::Instr::New(Js::OpCode::CALL, this->m_func);
  486. instr->InsertBefore(helperCall);
  487. this->ChangeToHelperCall(helperCall, IR::HelperSimd128ReplaceLaneI2);
  488. m_lowerer->InsertMove(dst, dstMemRef, instr);
  489. }
  490. return removeInstr(instr);
  491. }
  492. void LowererMD::EmitInsertInt64(IR::Opnd* src, uint index, IR::Instr *instr)
  493. {
  494. IR::Opnd* dst = instr->GetDst();
  495. Assert(dst->IsSimd128() && src->IsInt64());
  496. if (AutoSystemInfo::Data.SSE4_1Available())
  497. {
  498. #ifdef _M_IX86
  499. index *= 2;
  500. Int64RegPair srcPair = m_func->FindOrCreateInt64Pair(src);
  501. instr->InsertBefore(IR::Instr::New(Js::OpCode::PINSRD, dst, srcPair.low, IR::IntConstOpnd::New(index, TyInt8, m_func, true), m_func));
  502. instr->InsertBefore(IR::Instr::New(Js::OpCode::PINSRD, dst, srcPair.high, IR::IntConstOpnd::New(index + 1, TyInt8, m_func, true), m_func));
  503. #else
  504. instr->InsertBefore(IR::Instr::New(Js::OpCode::PINSRQ, dst, src, IR::IntConstOpnd::New(index, TyInt8, m_func, true), m_func));
  505. #endif
  506. }
  507. else
  508. {
  509. intptr_t tempSIMD = m_func->GetThreadContextInfo()->GetSimdTempAreaAddr(0);
  510. #ifdef _M_IX86
  511. Int64RegPair src1Pair = m_func->FindOrCreateInt64Pair(src);
  512. IR::Opnd* lower = IR::MemRefOpnd::New(tempSIMD, TyMachPtr, m_func);
  513. m_lowerer->InsertMove(lower, src1Pair.low, instr);
  514. IR::Opnd* higher = IR::MemRefOpnd::New(tempSIMD + 4, TyMachPtr, m_func);
  515. m_lowerer->InsertMove(higher, src1Pair.high, instr);
  516. #else
  517. IR::Opnd* mem = IR::MemRefOpnd::New(tempSIMD, TyMachPtr, m_func);
  518. m_lowerer->InsertMove(mem, src, instr);
  519. #endif
  520. IR::MemRefOpnd* tmp = IR::MemRefOpnd::New(tempSIMD, TyFloat64, m_func);
  521. Js::OpCode opcode = (index) ? Js::OpCode::MOVHPD : Js::OpCode::MOVLPD;
  522. IR::Instr* newInstr = IR::Instr::New(opcode, dst, tmp, m_func);
  523. instr->InsertBefore(newInstr);
  524. newInstr->HoistMemRefAddress(tmp, Js::OpCode::MOV);
  525. Legalize(newInstr);
  526. }
  527. }
  528. void LowererMD::EmitExtractInt64(IR::Opnd* dst, IR::Opnd* src, uint index, IR::Instr *instr)
  529. {
  530. Assert(index == 0 || index == 1);
  531. Assert(dst->IsInt64() && src->IsSimd128());
  532. if (AutoSystemInfo::Data.SSE4_1Available())
  533. {
  534. #ifdef _M_IX86
  535. index *= 2;
  536. Int64RegPair dstPair = m_func->FindOrCreateInt64Pair(dst);
  537. instr->InsertBefore(IR::Instr::New(Js::OpCode::PEXTRD, dstPair.low, src, IR::IntConstOpnd::New(index, TyInt8, m_func, true), m_func));
  538. instr->InsertBefore(IR::Instr::New(Js::OpCode::PEXTRD, dstPair.high, src, IR::IntConstOpnd::New(index + 1, TyInt8, m_func, true), m_func));
  539. #else
  540. instr->InsertBefore(IR::Instr::New(Js::OpCode::PEXTRQ, dst, src, IR::IntConstOpnd::New(index, TyInt8, m_func, true), m_func));
  541. #endif
  542. }
  543. else
  544. {
  545. IR::Opnd* tmp = src;
  546. if (index)
  547. {
  548. tmp = IR::RegOpnd::New(TySimd128F4, m_func);
  549. instr->InsertBefore(IR::Instr::New(Js::OpCode::PSHUFD, tmp, src, IR::IntConstOpnd::New(2 | 3 << 2, TyInt8, m_func, true), m_func));
  550. }
  551. //kludg-ish; we need a new instruction for LowerReinterpretPrimitive to transform
  552. //and dummy one for a caller to remove
  553. IR::Instr* tmpInstr = IR::Instr::New(Js::OpCode::Simd128_ExtractLane_I2, dst, tmp->UseWithNewType(TyFloat64, m_func), m_func);
  554. instr->InsertBefore(tmpInstr);
  555. m_lowerer->LowerReinterpretPrimitive(tmpInstr);
  556. }
  557. }
  558. IR::Instr* LowererMD::Simd128LowerLdLane(IR::Instr *instr)
  559. {
  560. IR::Opnd* dst, *src1, *src2;
  561. Js::OpCode movOpcode = Js::OpCode::MOVSS;
  562. uint laneWidth = 0, laneIndex = 0, shamt = 0, mask = 0;
  563. IRType laneType = TyInt32;
  564. dst = instr->GetDst();
  565. src1 = instr->GetSrc1();
  566. src2 = instr->GetSrc2();
  567. Assert(dst && dst->IsRegOpnd() && (dst->GetType() == TyFloat32 || dst->GetType() == TyInt32 || dst->GetType() == TyUint32 || dst->GetType() == TyFloat64 || dst->IsInt64()));
  568. Assert(src1 && src1->IsRegOpnd() && src1->IsSimd128());
  569. Assert(src2 && src2->IsIntConstOpnd());
  570. laneIndex = (uint)src2->AsIntConstOpnd()->AsUint32();
  571. laneWidth = 4;
  572. switch (instr->m_opcode)
  573. {
  574. case Js::OpCode::Simd128_ExtractLane_I2:
  575. laneWidth = 8;
  576. break;
  577. case Js::OpCode::Simd128_ExtractLane_F4:
  578. movOpcode = Js::OpCode::MOVSS;
  579. Assert(laneIndex < 4);
  580. break;
  581. case Js::OpCode::Simd128_ExtractLane_I8:
  582. case Js::OpCode::Simd128_ExtractLane_U8:
  583. case Js::OpCode::Simd128_ExtractLane_B8:
  584. movOpcode = Js::OpCode::MOVD;
  585. Assert(laneIndex < 8);
  586. shamt = (laneIndex % 2) * 16;
  587. laneIndex = laneIndex / 2;
  588. laneType = TyInt16;
  589. mask = 0x0000ffff;
  590. break;
  591. case Js::OpCode::Simd128_ExtractLane_I16:
  592. case Js::OpCode::Simd128_ExtractLane_U16:
  593. case Js::OpCode::Simd128_ExtractLane_B16:
  594. movOpcode = Js::OpCode::MOVD;
  595. Assert(laneIndex < 16);
  596. shamt = (laneIndex % 4) * 8;
  597. laneIndex = laneIndex / 4;
  598. laneType = TyInt8;
  599. mask = 0x000000ff;
  600. break;
  601. case Js::OpCode::Simd128_ExtractLane_U4:
  602. case Js::OpCode::Simd128_ExtractLane_I4:
  603. case Js::OpCode::Simd128_ExtractLane_B4:
  604. movOpcode = Js::OpCode::MOVD;
  605. Assert(laneIndex < 4);
  606. break;
  607. default:
  608. Assert(UNREACHED);
  609. }
  610. if (laneWidth == 8) //Simd128_ExtractLane_I2
  611. {
  612. EmitExtractInt64(dst, instr->GetSrc1(), laneIndex, instr);
  613. }
  614. else
  615. {
  616. IR::Opnd* tmp = src1;
  617. if (laneIndex != 0)
  618. {
  619. // tmp = PSRLDQ src1, shamt
  620. tmp = IR::RegOpnd::New(src1->GetType(), m_func);
  621. IR::Instr *shiftInstr = IR::Instr::New(Js::OpCode::PSRLDQ, tmp, src1, IR::IntConstOpnd::New(laneWidth * laneIndex, TyInt8, m_func, true), m_func);
  622. instr->InsertBefore(shiftInstr);
  623. Legalize(shiftInstr);
  624. }
  625. // MOVSS/MOVSD/MOVD dst, tmp
  626. instr->InsertBefore(IR::Instr::New(movOpcode, movOpcode == Js::OpCode::MOVD ? dst : dst->UseWithNewType(tmp->GetType(), m_func), tmp, m_func));
  627. }
  628. // dst has the 4-byte lane
  629. if (instr->m_opcode == Js::OpCode::Simd128_ExtractLane_I8 || instr->m_opcode == Js::OpCode::Simd128_ExtractLane_U8 || instr->m_opcode == Js::OpCode::Simd128_ExtractLane_B8 ||
  630. instr->m_opcode == Js::OpCode::Simd128_ExtractLane_U16 || instr->m_opcode == Js::OpCode::Simd128_ExtractLane_I16 || instr->m_opcode == Js::OpCode::Simd128_ExtractLane_B16)
  631. {
  632. // extract the 1/2 bytes sublane
  633. IR::Instr *newInstr = nullptr;
  634. if (shamt != 0)
  635. {
  636. // SHR dst, dst, shamt
  637. newInstr = IR::Instr::New(Js::OpCode::SHR, dst, dst, IR::IntConstOpnd::New((IntConstType)shamt, TyInt8, m_func), m_func);
  638. instr->InsertBefore(newInstr);
  639. Legalize(newInstr);
  640. }
  641. Assert(laneType == TyInt8 || laneType == TyInt16);
  642. // zero or sign-extend upper bits
  643. if (instr->m_opcode == Js::OpCode::Simd128_ExtractLane_I8 || instr->m_opcode == Js::OpCode::Simd128_ExtractLane_I16)
  644. {
  645. if (laneType == TyInt8)
  646. {
  647. IR::RegOpnd * tmp = IR::RegOpnd::New(TyInt8, m_func);
  648. newInstr = IR::Instr::New(Js::OpCode::MOV, tmp, dst, m_func);
  649. instr->InsertBefore(newInstr);
  650. Legalize(newInstr);
  651. newInstr = IR::Instr::New(Js::OpCode::MOVSX, dst, tmp, m_func);
  652. }
  653. else
  654. {
  655. newInstr = IR::Instr::New(Js::OpCode::MOVSXW, dst, dst->UseWithNewType(laneType, m_func), m_func);
  656. }
  657. }
  658. else
  659. {
  660. newInstr = IR::Instr::New(Js::OpCode::AND, dst, dst, IR::IntConstOpnd::New(mask, TyInt32, m_func), m_func);
  661. }
  662. instr->InsertBefore(newInstr);
  663. Legalize(newInstr);
  664. }
  665. if (instr->m_opcode == Js::OpCode::Simd128_ExtractLane_B4 || instr->m_opcode == Js::OpCode::Simd128_ExtractLane_B8 ||
  666. instr->m_opcode == Js::OpCode::Simd128_ExtractLane_B16)
  667. {
  668. IR::Instr* pInstr = nullptr;
  669. IR::RegOpnd* tmp = IR::RegOpnd::New(TyInt8, m_func);
  670. // cmp dst, 0
  671. pInstr = IR::Instr::New(Js::OpCode::CMP, m_func);
  672. pInstr->SetSrc1(dst->UseWithNewType(laneType, m_func));
  673. pInstr->SetSrc2(IR::IntConstOpnd::New(0, laneType, m_func, true));
  674. instr->InsertBefore(pInstr);
  675. Legalize(pInstr);
  676. // mov tmp(TyInt8), dst
  677. pInstr = IR::Instr::New(Js::OpCode::MOV, tmp, dst, m_func);
  678. instr->InsertBefore(pInstr);
  679. Legalize(pInstr);
  680. // setne tmp(TyInt8)
  681. pInstr = IR::Instr::New(Js::OpCode::SETNE, tmp, tmp, m_func);
  682. instr->InsertBefore(pInstr);
  683. Legalize(pInstr);
  684. // movsx dst, tmp(TyInt8)
  685. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVSX, dst, tmp, m_func));
  686. }
  687. IR::Instr* prevInstr = instr->m_prev;
  688. instr->Remove();
  689. return prevInstr;
  690. }
  691. IR::Instr* LowererMD::Simd128LowerSplat(IR::Instr *instr)
  692. {
  693. Js::OpCode shufOpCode = Js::OpCode::SHUFPS, movOpCode = Js::OpCode::MOVSS;
  694. IR::Opnd *dst, *src1;
  695. IR::Instr *pInstr = nullptr;
  696. dst = instr->GetDst();
  697. src1 = instr->GetSrc1();
  698. Assert(dst && dst->IsRegOpnd() && dst->IsSimd128());
  699. Assert(src1 && src1->IsRegOpnd() && (src1->GetType() == TyFloat32 || src1->GetType() == TyInt32 || src1->GetType() == TyFloat64 ||
  700. src1->GetType() == TyInt16 || src1->GetType() == TyInt8 || src1->GetType() == TyUint16 ||
  701. src1->GetType() == TyUint8 || src1->GetType() == TyUint32 || src1->IsInt64()));
  702. Assert(!instr->GetSrc2());
  703. IR::Opnd* tempTruncate = nullptr;
  704. bool bSkip = false;
  705. IR::LabelInstr *labelZero = IR::LabelInstr::New(Js::OpCode::Label, m_func);
  706. IR::LabelInstr *labelDone = IR::LabelInstr::New(Js::OpCode::Label, m_func);
  707. switch (instr->m_opcode)
  708. {
  709. case Js::OpCode::Simd128_Splat_F4:
  710. shufOpCode = Js::OpCode::SHUFPS;
  711. movOpCode = Js::OpCode::MOVSS;
  712. break;
  713. case Js::OpCode::Simd128_Splat_I4:
  714. case Js::OpCode::Simd128_Splat_U4:
  715. shufOpCode = Js::OpCode::PSHUFD;
  716. movOpCode = Js::OpCode::MOVD;
  717. break;
  718. case Js::OpCode::Simd128_Splat_D2:
  719. shufOpCode = Js::OpCode::SHUFPD;
  720. movOpCode = Js::OpCode::MOVSD;
  721. break;
  722. case Js::OpCode::Simd128_Splat_I2:
  723. {
  724. EmitInsertInt64(src1, 0, instr);
  725. instr->InsertBefore(IR::Instr::New(Js::OpCode::PSHUFD, dst, dst, IR::IntConstOpnd::New(68, TyInt8, m_func, true), m_func));
  726. bSkip = true;
  727. break;
  728. }
  729. case Js::OpCode::Simd128_Splat_I8:
  730. case Js::OpCode::Simd128_Splat_U8:
  731. // MOV tempTruncate(bx), src1: truncate the value to 16bit int
  732. // MOVD dst, tempTruncate(bx)
  733. // PUNPCKLWD dst, dst
  734. // PSHUFD dst, dst, 0
  735. tempTruncate = EnregisterIntConst(instr, src1, TyInt16);
  736. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVD, dst, tempTruncate, m_func));
  737. instr->InsertBefore(IR::Instr::New(Js::OpCode::PUNPCKLWD, dst, dst, dst, m_func));
  738. instr->InsertBefore(IR::Instr::New(Js::OpCode::PSHUFD, dst, dst, IR::IntConstOpnd::New(0, TyInt8, m_func, true), m_func));
  739. bSkip = true;
  740. break;
  741. case Js::OpCode::Simd128_Splat_I16:
  742. case Js::OpCode::Simd128_Splat_U16:
  743. // MOV tempTruncate(bx), src1: truncate the value to 8bit int
  744. // MOVD dst, tempTruncate(bx)
  745. // PUNPCKLBW dst, dst
  746. // PUNPCKLWD dst, dst
  747. // PSHUFD dst, dst, 0
  748. tempTruncate = EnregisterIntConst(instr, src1, TyInt8);
  749. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVD, dst, tempTruncate, m_func));
  750. instr->InsertBefore(IR::Instr::New(Js::OpCode::PUNPCKLBW, dst, dst, dst, m_func));
  751. instr->InsertBefore(IR::Instr::New(Js::OpCode::PUNPCKLWD, dst, dst, dst, m_func));
  752. instr->InsertBefore(IR::Instr::New(Js::OpCode::PSHUFD, dst, dst, IR::IntConstOpnd::New(0, TyInt8, m_func, true), m_func));
  753. bSkip = true;
  754. break;
  755. case Js::OpCode::Simd128_Splat_B4:
  756. case Js::OpCode::Simd128_Splat_B8:
  757. case Js::OpCode::Simd128_Splat_B16:
  758. // CMP src1, 0
  759. // JEQ $labelZero
  760. // MOVAPS dst, xmmword ptr[X86_ALL_NEG_ONES]
  761. // JMP $labelDone
  762. // $labelZero:
  763. // XORPS dst, dst
  764. // $labelDone:
  765. //pInstr = IR::Instr::New(Js::OpCode::CMP, src1, IR::IntConstOpnd::New(0, TyInt8, m_func, true), m_func);
  766. //instr->InsertBefore(pInstr);
  767. //Legalize(pInstr);
  768. // cmp src1, 0000h
  769. pInstr = IR::Instr::New(Js::OpCode::CMP, m_func);
  770. pInstr->SetSrc1(src1);
  771. pInstr->SetSrc2(IR::IntConstOpnd::New(0x0000, TyInt32, m_func, true));
  772. instr->InsertBefore(pInstr);
  773. Legalize(pInstr);
  774. //JEQ $labelZero
  775. instr->InsertBefore(IR::BranchInstr::New(Js::OpCode::JEQ, labelZero, m_func));
  776. // MOVAPS dst, xmmword ptr[X86_ALL_NEG_ONES]
  777. pInstr = IR::Instr::New(Js::OpCode::MOVAPS, dst, IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86AllNegOnesAddr(), TySimd128I4, m_func), m_func);
  778. instr->InsertBefore(pInstr);
  779. Legalize(pInstr);
  780. // JMP $labelDone
  781. instr->InsertBefore(IR::BranchInstr::New(Js::OpCode::JMP, labelDone, m_func));
  782. // $labelZero:
  783. instr->InsertBefore(labelZero);
  784. // XORPS dst, dst
  785. instr->InsertBefore(IR::Instr::New(Js::OpCode::XORPS, dst, dst, dst, m_func)); // make dst to be 0
  786. // $labelDone:
  787. instr->InsertBefore(labelDone);
  788. bSkip = true;
  789. break;
  790. default:
  791. Assert(UNREACHED);
  792. }
  793. if (instr->m_opcode == Js::OpCode::Simd128_Splat_F4 && instr->GetSrc1()->IsFloat64())
  794. {
  795. IR::RegOpnd *regOpnd32 = IR::RegOpnd::New(TyFloat32, this->m_func);
  796. // CVTSD2SS regOpnd32.f32, src.f64 -- Convert regOpnd from f64 to f32
  797. instr->InsertBefore(IR::Instr::New(Js::OpCode::CVTSD2SS, regOpnd32, src1, this->m_func));
  798. src1 = regOpnd32;
  799. }
  800. if (!bSkip)
  801. {
  802. instr->InsertBefore(IR::Instr::New(movOpCode, dst, src1, m_func));
  803. instr->InsertBefore(IR::Instr::New(shufOpCode, dst, dst, IR::IntConstOpnd::New(0, TyInt8, m_func, true), m_func));
  804. }
  805. IR::Instr* prevInstr = instr->m_prev;
  806. instr->Remove();
  807. return prevInstr;
  808. }
  809. IR::Instr* LowererMD::Simd128LowerSqrt(IR::Instr *instr)
  810. {
  811. Js::OpCode opcode = Js::OpCode::SQRTPS;
  812. IR::Opnd *dst, *src1;
  813. dst = instr->GetDst();
  814. src1 = instr->GetSrc1();
  815. Assert(dst && dst->IsRegOpnd());
  816. Assert(src1 && src1->IsRegOpnd());
  817. Assert(instr->GetSrc2() == nullptr);
  818. opcode = Js::OpCode::SQRTPS;
  819. #if 0
  820. {
  821. Assert(instr->m_opcode == Js::OpCode::Simd128_Sqrt_D2);
  822. opcode = Js::OpCode::SQRTPD;
  823. }
  824. #endif // 0
  825. instr->InsertBefore(IR::Instr::New(opcode, dst, src1, m_func));
  826. IR::Instr* prevInstr = instr->m_prev;
  827. instr->Remove();
  828. return prevInstr;
  829. }
  830. IR::Instr* LowererMD::Simd128LowerNeg(IR::Instr *instr)
  831. {
  832. IR::Opnd* dst = instr->GetDst();
  833. IR::Opnd* src1 = instr->GetSrc1();
  834. Js::OpCode addOpcode = Js::OpCode::PADDD;
  835. void * allOnes = (void*)&X86_ALL_ONES_I4;
  836. Assert(dst->IsRegOpnd() && dst->IsSimd128());
  837. Assert(src1->IsRegOpnd() && src1->IsSimd128());
  838. Assert(instr->GetSrc2() == nullptr);
  839. switch (instr->m_opcode)
  840. {
  841. case Js::OpCode::Simd128_Neg_I4:
  842. case Js::OpCode::Simd128_Neg_U4:
  843. break;
  844. case Js::OpCode::Simd128_Neg_I8:
  845. case Js::OpCode::Simd128_Neg_U8:
  846. addOpcode = Js::OpCode::PADDW;
  847. allOnes = (void*)&X86_ALL_ONES_I8;
  848. break;
  849. case Js::OpCode::Simd128_Neg_I16:
  850. case Js::OpCode::Simd128_Neg_U16:
  851. addOpcode = Js::OpCode::PADDB;
  852. allOnes = (void*)&X86_ALL_ONES_I16;
  853. break;
  854. default:
  855. Assert(UNREACHED);
  856. }
  857. // MOVAPS dst, src1
  858. IR::Instr *pInstr = IR::Instr::New(Js::OpCode::MOVAPS, dst, src1, m_func);
  859. instr->InsertBefore(pInstr);
  860. // PANDN dst, dst, 0xfff...f
  861. pInstr = IR::Instr::New(Js::OpCode::PANDN, dst, dst, IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86AllNegOnesAddr(), src1->GetType(), m_func), m_func);
  862. instr->InsertBefore(pInstr);
  863. Legalize(pInstr);
  864. // addOpCode dst, dst, {allOnes}
  865. pInstr = IR::Instr::New(addOpcode, dst, dst, IR::MemRefOpnd::New(allOnes, src1->GetType(), m_func), m_func);
  866. instr->InsertBefore(pInstr);
  867. Legalize(pInstr);
  868. pInstr = instr->m_prev;
  869. instr->Remove();
  870. return pInstr;
  871. }
  872. IR::Instr* LowererMD::Simd128LowerMulI4(IR::Instr *instr)
  873. {
  874. Assert(instr->m_opcode == Js::OpCode::Simd128_Mul_I4 || instr->m_opcode == Js::OpCode::Simd128_Mul_U4);
  875. IR::Instr *pInstr;
  876. IR::Opnd* dst = instr->GetDst();
  877. IR::Opnd* src1 = instr->GetSrc1();
  878. IR::Opnd* src2 = instr->GetSrc2();
  879. IR::Opnd* temp1, *temp2, *temp3;
  880. Assert(dst->IsRegOpnd() && dst->IsSimd128());
  881. Assert(src1->IsRegOpnd() && src1->IsSimd128());
  882. Assert(src2->IsRegOpnd() && src2->IsSimd128());
  883. temp1 = IR::RegOpnd::New(src1->GetType(), m_func);
  884. temp2 = IR::RegOpnd::New(src1->GetType(), m_func);
  885. temp3 = IR::RegOpnd::New(src1->GetType(), m_func);
  886. // temp1 = PMULUDQ src1, src2
  887. pInstr = IR::Instr::New(Js::OpCode::PMULUDQ, temp1, src1, src2, m_func);
  888. instr->InsertBefore(pInstr);
  889. //MakeDstEquSrc1(pInstr);
  890. Legalize(pInstr);
  891. // temp2 = PSLRD src1, 0x4
  892. pInstr = IR::Instr::New(Js::OpCode::PSRLDQ, temp2, src1, IR::IntConstOpnd::New(TySize[TyInt32], TyInt8, m_func, true), m_func);
  893. instr->InsertBefore(pInstr);
  894. //MakeDstEquSrc1(pInstr);
  895. Legalize(pInstr);
  896. // temp3 = PSLRD src2, 0x4
  897. pInstr = IR::Instr::New(Js::OpCode::PSRLDQ, temp3, src2, IR::IntConstOpnd::New(TySize[TyInt32], TyInt8, m_func, true), m_func);
  898. instr->InsertBefore(pInstr);
  899. //MakeDstEquSrc1(pInstr);
  900. Legalize(pInstr);
  901. // temp2 = PMULUDQ temp2, temp3
  902. pInstr = IR::Instr::New(Js::OpCode::PMULUDQ, temp2, temp2, temp3, m_func);
  903. instr->InsertBefore(pInstr);
  904. Legalize(pInstr);
  905. //PSHUFD temp1, temp1, 0x8
  906. instr->InsertBefore(IR::Instr::New(Js::OpCode::PSHUFD, temp1, temp1, IR::IntConstOpnd::New( 8 /*b00001000*/, TyInt8, m_func, true), m_func));
  907. //PSHUFD temp2, temp2, 0x8
  908. instr->InsertBefore(IR::Instr::New(Js::OpCode::PSHUFD, temp2, temp2, IR::IntConstOpnd::New(8 /*b00001000*/, TyInt8, m_func, true), m_func));
  909. // PUNPCKLDQ dst, temp1, temp2
  910. pInstr = IR::Instr::New(Js::OpCode::PUNPCKLDQ, dst, temp1, temp2, m_func);
  911. instr->InsertBefore(pInstr);
  912. Legalize(pInstr);
  913. pInstr = instr->m_prev;
  914. instr->Remove();
  915. return pInstr;
  916. }
  917. IR::Instr* LowererMD::Simd128LowerMulI16(IR::Instr *instr)
  918. {
  919. Assert(instr->m_opcode == Js::OpCode::Simd128_Mul_I16 || instr->m_opcode == Js::OpCode::Simd128_Mul_U16);
  920. IR::Instr *pInstr = nullptr;
  921. IR::Opnd* dst = instr->GetDst();
  922. IR::Opnd* src1 = instr->GetSrc1();
  923. IR::Opnd* src2 = instr->GetSrc2();
  924. IR::Opnd* temp1, *temp2, *temp3;
  925. IRType simdType, laneType;
  926. if (instr->m_opcode == Js::OpCode::Simd128_Mul_I16)
  927. {
  928. simdType = TySimd128I16;
  929. laneType = TyInt8;
  930. }
  931. else
  932. {
  933. simdType = TySimd128U16;
  934. laneType = TyUint8;
  935. }
  936. Assert(dst->IsRegOpnd() && dst->GetType() == simdType);
  937. Assert(src1->IsRegOpnd() && src1->GetType() == simdType);
  938. Assert(src2->IsRegOpnd() && src2->GetType() == simdType);
  939. temp1 = IR::RegOpnd::New(simdType, m_func);
  940. temp2 = IR::RegOpnd::New(simdType, m_func);
  941. temp3 = IR::RegOpnd::New(simdType, m_func);
  942. // MOVAPS temp1, src1
  943. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVAPS, temp1, src1, m_func));
  944. //PMULLW temp1, src2
  945. pInstr = IR::Instr::New(Js::OpCode::PMULLW, temp1, temp1, src2, m_func);
  946. instr->InsertBefore(pInstr);
  947. Legalize(pInstr);
  948. //PAND temp1 {0x00ff00ff00ff00ff00ff00ff00ff00ff} :To zero out bytes 1,3,5...
  949. pInstr = IR::Instr::New(Js::OpCode::PAND, temp1, temp1, IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86LowBytesMaskAddr(), simdType, m_func), m_func);
  950. instr->InsertBefore(pInstr);
  951. Legalize(pInstr);
  952. //PSRLW src1, 8
  953. pInstr = IR::Instr::New(Js::OpCode::PSRLW, temp2, src2, IR::IntConstOpnd::New(8, laneType, m_func, true), m_func);
  954. instr->InsertBefore(pInstr);
  955. Legalize(pInstr);
  956. //PSRLW src2, 8 :upper 8 bits of each word
  957. pInstr = IR::Instr::New(Js::OpCode::PSRLW, temp3, src1, IR::IntConstOpnd::New(8, laneType, m_func, true), m_func);
  958. instr->InsertBefore(pInstr);
  959. Legalize(pInstr);
  960. //PMULLW src1, src2
  961. pInstr = IR::Instr::New(Js::OpCode::PMULLW, temp2, temp2, temp3, m_func);
  962. instr->InsertBefore(pInstr);
  963. Legalize(pInstr);
  964. //PSLLW src1, 8 :sets the results bytes 1,3,5..
  965. pInstr = IR::Instr::New(Js::OpCode::PSLLW, temp2, temp2, IR::IntConstOpnd::New(8, laneType, m_func, true), m_func);
  966. instr->InsertBefore(pInstr);
  967. Legalize(pInstr);
  968. //POR temp1, src1 :OR bytes 0,2,4.. to final result
  969. pInstr = IR::Instr::New(Js::OpCode::POR, dst, temp1, temp2, m_func);
  970. instr->InsertBefore(pInstr);
  971. Legalize(pInstr);
  972. pInstr = instr->m_prev;
  973. instr->Remove();
  974. return pInstr;
  975. }
  976. IR::Instr* LowererMD::Simd128LowerShift(IR::Instr *instr)
  977. {
  978. IR::Opnd* dst = instr->GetDst();
  979. IR::Opnd* src1 = instr->GetSrc1();
  980. IR::Opnd* src2 = instr->GetSrc2();
  981. Assert(dst->IsRegOpnd() && dst->IsSimd128());
  982. Assert(src1->IsRegOpnd() && src1->IsSimd128());
  983. Assert(src2->IsInt32());
  984. Js::OpCode opcode = Js::OpCode::PSLLD;
  985. int elementSizeInBytes = 0;
  986. switch (instr->m_opcode)
  987. {
  988. case Js::OpCode::Simd128_ShRtByScalar_I2:
  989. EmitShiftByScalarI2(instr, IR::HelperSimd128ShRtByScalarI2);
  990. return removeInstr(instr);
  991. case Js::OpCode::Simd128_ShLtByScalar_I2:
  992. opcode = Js::OpCode::PSLLQ;
  993. elementSizeInBytes = 8;
  994. break;
  995. case Js::OpCode::Simd128_ShRtByScalar_U2:
  996. opcode = Js::OpCode::PSRLQ;
  997. elementSizeInBytes = 8;
  998. break;
  999. case Js::OpCode::Simd128_ShLtByScalar_I4:
  1000. case Js::OpCode::Simd128_ShLtByScalar_U4: // same as int32x4.ShiftLeftScalar
  1001. opcode = Js::OpCode::PSLLD;
  1002. elementSizeInBytes = 4;
  1003. break;
  1004. case Js::OpCode::Simd128_ShRtByScalar_I4:
  1005. opcode = Js::OpCode::PSRAD;
  1006. elementSizeInBytes = 4;
  1007. break;
  1008. case Js::OpCode::Simd128_ShLtByScalar_I8:
  1009. case Js::OpCode::Simd128_ShLtByScalar_U8: // same as int16x8.ShiftLeftScalar
  1010. opcode = Js::OpCode::PSLLW;
  1011. elementSizeInBytes = 2;
  1012. break;
  1013. case Js::OpCode::Simd128_ShRtByScalar_I8:
  1014. opcode = Js::OpCode::PSRAW;
  1015. elementSizeInBytes = 2;
  1016. break;
  1017. case Js::OpCode::Simd128_ShRtByScalar_U4:
  1018. opcode = Js::OpCode::PSRLD;
  1019. elementSizeInBytes = 4;
  1020. break;
  1021. case Js::OpCode::Simd128_ShRtByScalar_U8:
  1022. opcode = Js::OpCode::PSRLW;
  1023. elementSizeInBytes = 2;
  1024. break;
  1025. case Js::OpCode::Simd128_ShLtByScalar_I16: // composite, int8x16.ShiftLeftScalar
  1026. case Js::OpCode::Simd128_ShRtByScalar_I16: // composite, int8x16.ShiftRightScalar
  1027. case Js::OpCode::Simd128_ShLtByScalar_U16: // same as int8x16.ShiftLeftScalar
  1028. case Js::OpCode::Simd128_ShRtByScalar_U16: // composite, uint8x16.ShiftRightScalar
  1029. elementSizeInBytes = 1;
  1030. break;
  1031. default:
  1032. Assert(UNREACHED);
  1033. }
  1034. IR::Instr *pInstr = nullptr;
  1035. IR::RegOpnd *reg = IR::RegOpnd::New(TyInt32, m_func);
  1036. IR::RegOpnd *reg2 = IR::RegOpnd::New(TyInt32, m_func);
  1037. IR::RegOpnd *tmp0 = IR::RegOpnd::New(src1->GetType(), m_func);
  1038. IR::RegOpnd *tmp1 = IR::RegOpnd::New(src1->GetType(), m_func);
  1039. IR::RegOpnd *tmp2 = IR::RegOpnd::New(src1->GetType(), m_func);
  1040. //Shift amount: The shift amount is masked by [ElementSize] * 8
  1041. //The masked Shift amount is moved to xmm register
  1042. //AND shamt, shmask, shamt
  1043. //MOVD tmp0, shamt
  1044. IR::RegOpnd *shamt = IR::RegOpnd::New(src2->GetType(), m_func);
  1045. // en-register
  1046. IR::Opnd *origShamt = EnregisterIntConst(instr, src2); //unnormalized shift amount
  1047. pInstr = IR::Instr::New(Js::OpCode::AND, shamt, origShamt, IR::IntConstOpnd::New(Js::SIMDUtils::SIMDGetShiftAmountMask(elementSizeInBytes), TyInt32, m_func), m_func); // normalizing by elm width (i.e. shamt % elm_width)
  1048. instr->InsertBefore(pInstr);
  1049. Legalize(pInstr);
  1050. pInstr = IR::Instr::New(Js::OpCode::MOVD, tmp0, shamt, m_func);
  1051. instr->InsertBefore(pInstr);
  1052. if (instr->m_opcode == Js::OpCode::Simd128_ShLtByScalar_I4 || instr->m_opcode == Js::OpCode::Simd128_ShRtByScalar_I4 ||
  1053. instr->m_opcode == Js::OpCode::Simd128_ShLtByScalar_U4 || instr->m_opcode == Js::OpCode::Simd128_ShRtByScalar_U4 ||
  1054. instr->m_opcode == Js::OpCode::Simd128_ShLtByScalar_I8 || instr->m_opcode == Js::OpCode::Simd128_ShRtByScalar_I8 ||
  1055. instr->m_opcode == Js::OpCode::Simd128_ShLtByScalar_U8 || instr->m_opcode == Js::OpCode::Simd128_ShRtByScalar_U8 ||
  1056. instr->m_opcode == Js::OpCode::Simd128_ShLtByScalar_I2 || instr->m_opcode == Js::OpCode::Simd128_ShRtByScalar_U2)
  1057. {
  1058. // shiftOpCode dst, src1, tmp0
  1059. pInstr = IR::Instr::New(opcode, dst, src1, tmp0, m_func);
  1060. instr->InsertBefore(pInstr);
  1061. Legalize(pInstr);
  1062. }
  1063. else if (instr->m_opcode == Js::OpCode::Simd128_ShLtByScalar_I16 || instr->m_opcode == Js::OpCode::Simd128_ShLtByScalar_U16)
  1064. {
  1065. // MOVAPS tmp1, src1
  1066. pInstr = IR::Instr::New(Js::OpCode::MOVAPS, tmp1, src1, m_func);
  1067. instr->InsertBefore(pInstr);
  1068. // MOVAPS dst, src1
  1069. pInstr = IR::Instr::New(Js::OpCode::MOVAPS, dst, src1, m_func);
  1070. instr->InsertBefore(pInstr);
  1071. // PAND tmp1, [X86_HIGHBYTES_MASK]
  1072. pInstr = IR::Instr::New(Js::OpCode::PAND, tmp1, tmp1, IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86HighBytesMaskAddr(), TySimd128I4, m_func), m_func);
  1073. instr->InsertBefore(pInstr);
  1074. Legalize(pInstr);
  1075. // PSLLW tmp1, tmp0
  1076. pInstr = IR::Instr::New(Js::OpCode::PSLLW, tmp1, tmp1, tmp0, m_func);
  1077. instr->InsertBefore(pInstr);
  1078. Legalize(pInstr);
  1079. // PSLLW dst, tmp0
  1080. pInstr = IR::Instr::New(Js::OpCode::PSLLW, dst, dst, tmp0, m_func);
  1081. instr->InsertBefore(pInstr);
  1082. Legalize(pInstr);
  1083. // PAND dst, [X86_LOWBYTES_MASK]
  1084. pInstr = IR::Instr::New(Js::OpCode::PAND, dst, dst, IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86LowBytesMaskAddr(), TySimd128I4, m_func), m_func);
  1085. instr->InsertBefore(pInstr);
  1086. Legalize(pInstr);
  1087. // POR dst, tmp1
  1088. pInstr = IR::Instr::New(Js::OpCode::POR, dst, dst, tmp1, m_func);
  1089. instr->InsertBefore(pInstr);
  1090. }
  1091. else if (instr->m_opcode == Js::OpCode::Simd128_ShRtByScalar_I16)
  1092. {
  1093. // MOVAPS tmp1, src1
  1094. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVAPS, tmp1, src1, m_func));
  1095. // MOVAPS dst, src1
  1096. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVAPS, dst, src1, m_func));
  1097. // PSLLW dst, 8
  1098. instr->InsertBefore(IR::Instr::New(Js::OpCode::PSLLW, dst, dst, IR::IntConstOpnd::New(8, TyInt8, m_func), m_func));
  1099. // LEA reg, [shamt + 8]
  1100. IR::IndirOpnd *indirOpnd = IR::IndirOpnd::New(shamt->AsRegOpnd(), +8, TyInt32, m_func);
  1101. instr->InsertBefore(IR::Instr::New(Js::OpCode::LEA, reg, indirOpnd, m_func));
  1102. // MOVD tmp0, reg
  1103. pInstr = IR::Instr::New(Js::OpCode::MOVD, tmp2, reg, m_func);
  1104. instr->InsertBefore(pInstr);
  1105. // PSRAW dst, tmp0
  1106. instr->InsertBefore(IR::Instr::New(Js::OpCode::PSRAW, dst, dst, tmp2, m_func));
  1107. // PAND dst, [X86_LOWBYTES_MASK]
  1108. pInstr = IR::Instr::New(Js::OpCode::PAND, dst, dst, IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86LowBytesMaskAddr(), TySimd128I4, m_func), m_func);
  1109. instr->InsertBefore(pInstr);
  1110. Legalize(pInstr);
  1111. // PSRAW tmp1, tmp0
  1112. instr->InsertBefore(IR::Instr::New(Js::OpCode::PSRAW, tmp1, tmp1, tmp0, m_func));
  1113. // PAND tmp1, [X86_HIGHBYTES_MASK]
  1114. pInstr = IR::Instr::New(Js::OpCode::PAND, tmp1, tmp1, IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86HighBytesMaskAddr(), TySimd128I4, m_func), m_func);
  1115. instr->InsertBefore(pInstr);
  1116. Legalize(pInstr);
  1117. // POR dst, tmp1
  1118. instr->InsertBefore(IR::Instr::New(Js::OpCode::POR, dst, dst, tmp1, m_func));
  1119. }
  1120. else if (instr->m_opcode == Js::OpCode::Simd128_ShRtByScalar_U16)
  1121. {
  1122. IR::RegOpnd * shamtReg = IR::RegOpnd::New(TyInt8, m_func);
  1123. shamtReg->SetReg(LowererMDArch::GetRegShiftCount());
  1124. IR::RegOpnd * tmp = IR::RegOpnd::New(TyInt8, m_func);
  1125. // MOVAPS dst, src1
  1126. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVAPS, dst, src1, m_func));
  1127. // MOV reg2, 0FFh
  1128. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOV, reg2, IR::IntConstOpnd::New(0xFF, TyInt32, m_func), m_func));
  1129. // MOV shamtReg, shamt
  1130. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOV, shamtReg, shamt, m_func));
  1131. // SHR reg2, shamtReg (lower 8 bit)
  1132. instr->InsertBefore(IR::Instr::New(Js::OpCode::SHR, reg2, reg2, shamtReg, m_func));
  1133. // MOV tmp, reg2
  1134. // MOVSX reg2, tmp(TyInt8)
  1135. pInstr = IR::Instr::New(Js::OpCode::MOV, tmp, reg2, m_func);
  1136. instr->InsertBefore(pInstr);
  1137. Legalize(pInstr);
  1138. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVSX, reg2, tmp, m_func));
  1139. IR::RegOpnd *mask = IR::RegOpnd::New(TySimd128I4, m_func);
  1140. // PSRLW dst, mask
  1141. instr->InsertBefore(IR::Instr::New(Js::OpCode::PSRLW, dst, dst, tmp0, m_func));
  1142. // splat (0xFF >> shamt) into mask
  1143. // MOVD mask, reg2
  1144. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVD, mask, reg2, m_func));
  1145. // PUNPCKLBW mask, mask
  1146. pInstr = IR::Instr::New(Js::OpCode::PUNPCKLBW, mask, mask, mask, m_func);
  1147. instr->InsertBefore(pInstr);
  1148. Legalize(pInstr);
  1149. // PUNPCKLWD mask, mask
  1150. pInstr = IR::Instr::New(Js::OpCode::PUNPCKLWD, mask, mask, mask, m_func);
  1151. instr->InsertBefore(pInstr);
  1152. Legalize(pInstr);
  1153. // PSHUFD mask, mask, 0
  1154. instr->InsertBefore(IR::Instr::New(Js::OpCode::PSHUFD, mask, mask, IR::IntConstOpnd::New(0, TyInt8, m_func, true), m_func));
  1155. // PAND dst, mask
  1156. instr->InsertBefore(IR::Instr::New(Js::OpCode::PAND, dst, dst, mask, m_func));
  1157. }
  1158. else
  1159. {
  1160. Assert(UNREACHED);
  1161. }
  1162. pInstr = instr->m_prev;
  1163. instr->Remove();
  1164. return pInstr;
  1165. }
  1166. IR::Instr* LowererMD::SIMD128LowerReplaceLane_8(IR::Instr* instr)
  1167. {
  1168. SList<IR::Opnd*> *args = Simd128GetExtendedArgs(instr);
  1169. int lane = 0;
  1170. IR::Opnd *dst = args->Pop();
  1171. IR::Opnd *src1 = args->Pop();
  1172. IR::Opnd *src2 = args->Pop();
  1173. IR::Opnd *src3 = args->Pop();
  1174. IR::Instr * newInstr = nullptr;
  1175. Assert(dst->IsSimd128() && src1->IsSimd128());
  1176. lane = src2->AsIntConstOpnd()->AsInt32();
  1177. IR::Opnd* laneValue = EnregisterIntConst(instr, src3, TyInt16);
  1178. Assert(instr->m_opcode == Js::OpCode::Simd128_ReplaceLane_I8 || instr->m_opcode == Js::OpCode::Simd128_ReplaceLane_U8 || instr->m_opcode == Js::OpCode::Simd128_ReplaceLane_B8);
  1179. // MOVAPS dst, src1
  1180. newInstr = IR::Instr::New(Js::OpCode::MOVAPS, dst, src1, m_func);
  1181. instr->InsertBefore(newInstr);
  1182. Legalize(newInstr);
  1183. // PINSRW dst, value, index
  1184. newInstr = IR::Instr::New(Js::OpCode::PINSRW, dst, laneValue, IR::IntConstOpnd::New(lane, TyInt8, m_func), m_func);
  1185. instr->InsertBefore(newInstr);
  1186. Legalize(newInstr);
  1187. if (instr->m_opcode == Js::OpCode::Simd128_ReplaceLane_B8) //canonicalizing lanes
  1188. {
  1189. instr = Simd128CanonicalizeToBools(instr, Js::OpCode::PCMPEQW, *dst);
  1190. }
  1191. IR::Instr* prevInstr = instr->m_prev;
  1192. instr->Remove();
  1193. return prevInstr;
  1194. }
  1195. IR::Instr* LowererMD::SIMD128LowerReplaceLane_16(IR::Instr* instr)
  1196. {
  1197. SList<IR::Opnd*> *args = Simd128GetExtendedArgs(instr);
  1198. int lane = 0;
  1199. IR::Opnd *dst = args->Pop();
  1200. IR::Opnd *src1 = args->Pop();
  1201. IR::Opnd *src2 = args->Pop();
  1202. IR::Opnd *src3 = args->Pop();
  1203. IR::Instr * newInstr = nullptr;
  1204. Assert(dst->IsSimd128() && src1->IsSimd128());
  1205. lane = src2->AsIntConstOpnd()->AsInt32();
  1206. Assert(lane >= 0 && lane < 16);
  1207. IR::Opnd* laneValue = EnregisterIntConst(instr, src3, TyInt8);
  1208. intptr_t tempSIMD = m_func->GetThreadContextInfo()->GetSimdTempAreaAddr(0);
  1209. #if DBG
  1210. // using only one SIMD temp
  1211. intptr_t endAddrSIMD = tempSIMD + sizeof(X86SIMDValue);
  1212. #endif
  1213. Assert(instr->m_opcode == Js::OpCode::Simd128_ReplaceLane_I16 || instr->m_opcode == Js::OpCode::Simd128_ReplaceLane_U16 || instr->m_opcode == Js::OpCode::Simd128_ReplaceLane_B16);
  1214. // MOVUPS [temp], src1
  1215. intptr_t address = tempSIMD;
  1216. newInstr = IR::Instr::New(Js::OpCode::MOVUPS, IR::MemRefOpnd::New(address, TySimd128I16, m_func), src1, m_func);
  1217. instr->InsertBefore(newInstr);
  1218. Legalize(newInstr);
  1219. // MOV [temp+offset], laneValue
  1220. address = tempSIMD + lane;
  1221. // check for buffer overrun
  1222. Assert((intptr_t)address < endAddrSIMD);
  1223. newInstr = IR::Instr::New(Js::OpCode::MOV, IR::MemRefOpnd::New(address, TyInt8, m_func), laneValue, m_func);
  1224. instr->InsertBefore(newInstr);
  1225. Legalize(newInstr);
  1226. // MOVUPS dst, [temp]
  1227. address = tempSIMD;
  1228. newInstr = IR::Instr::New(Js::OpCode::MOVUPS, dst, IR::MemRefOpnd::New(address, TySimd128I16, m_func), m_func);
  1229. instr->InsertBefore(newInstr);
  1230. Legalize(newInstr);
  1231. if (instr->m_opcode == Js::OpCode::Simd128_ReplaceLane_B16) //canonicalizing lanes.
  1232. {
  1233. instr = Simd128CanonicalizeToBools(instr, Js::OpCode::PCMPEQB, *dst);
  1234. }
  1235. IR::Instr* prevInstr = instr->m_prev;
  1236. instr->Remove();
  1237. return prevInstr;
  1238. }
  1239. IR::Instr* LowererMD::SIMD128LowerReplaceLane_4(IR::Instr* instr)
  1240. {
  1241. SList<IR::Opnd*> *args = Simd128GetExtendedArgs(instr);
  1242. int lane = 0, byteWidth = 0;
  1243. IR::Opnd *dst = args->Pop();
  1244. IR::Opnd *src1 = args->Pop();
  1245. IR::Opnd *src2 = args->Pop();
  1246. IR::Opnd *src3 = args->Pop();
  1247. Assert(dst->IsSimd128() && src1->IsSimd128());
  1248. IRType type = dst->GetType();
  1249. lane = src2->AsIntConstOpnd()->AsInt32();
  1250. IR::Opnd* laneValue = EnregisterIntConst(instr, src3);
  1251. switch (instr->m_opcode)
  1252. {
  1253. case Js::OpCode::Simd128_ReplaceLane_I4:
  1254. case Js::OpCode::Simd128_ReplaceLane_U4:
  1255. case Js::OpCode::Simd128_ReplaceLane_B4:
  1256. byteWidth = TySize[TyInt32];
  1257. break;
  1258. case Js::OpCode::Simd128_ReplaceLane_F4:
  1259. byteWidth = TySize[TyFloat32];
  1260. break;
  1261. default:
  1262. Assert(UNREACHED);
  1263. }
  1264. // MOVAPS dst, src1
  1265. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVAPS, dst, src1, m_func));
  1266. if (laneValue->GetType() == TyInt32 || laneValue->GetType() == TyUint32)
  1267. {
  1268. IR::RegOpnd *tempReg = IR::RegOpnd::New(TyFloat32, m_func);//mov intval to xmm
  1269. //MOVD
  1270. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVD, tempReg, laneValue, m_func));
  1271. laneValue = tempReg;
  1272. }
  1273. Assert(laneValue->GetType() == TyFloat32);
  1274. if (lane == 0)
  1275. {
  1276. // MOVSS for both TyFloat32 and TyInt32. MOVD zeroes upper bits.
  1277. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVSS, dst, laneValue, m_func));
  1278. }
  1279. else if (lane == 2)
  1280. {
  1281. IR::RegOpnd *tmp = IR::RegOpnd::New(type, m_func);
  1282. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVHLPS, tmp, dst, m_func));
  1283. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVSS, tmp, laneValue, m_func));
  1284. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVLHPS, dst, tmp, m_func));
  1285. }
  1286. else
  1287. {
  1288. Assert(lane == 1 || lane == 3);
  1289. uint8 shufMask = 0xE4; // 11 10 01 00
  1290. shufMask |= lane; // 11 10 01 id
  1291. shufMask &= ~(0x03 << (lane << 1)); // set 2 bits corresponding to lane index to 00
  1292. // SHUFPS dst, dst, shufMask
  1293. instr->InsertBefore(IR::Instr::New(Js::OpCode::SHUFPS, dst, dst, IR::IntConstOpnd::New(shufMask, TyInt8, m_func, true), m_func));
  1294. // MOVSS dst, value
  1295. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVSS, dst, laneValue, m_func));
  1296. // SHUFPS dst, dst, shufMask
  1297. instr->InsertBefore(IR::Instr::New(Js::OpCode::SHUFPS, dst, dst, IR::IntConstOpnd::New(shufMask, TyInt8, m_func, true), m_func));
  1298. }
  1299. if (instr->m_opcode == Js::OpCode::Simd128_ReplaceLane_B4) //Canonicalizing lanes
  1300. {
  1301. instr = Simd128CanonicalizeToBools(instr, Js::OpCode::PCMPEQD, *dst);
  1302. }
  1303. IR::Instr* prevInstr = instr->m_prev;
  1304. instr->Remove();
  1305. return prevInstr;
  1306. }
  1307. /*
  1308. 4 and 2 lane Swizzle.
  1309. */
  1310. IR::Instr* LowererMD::Simd128LowerSwizzle_4(IR::Instr* instr)
  1311. {
  1312. Js::OpCode shufOpcode = Js::OpCode::SHUFPS;
  1313. Js::OpCode irOpcode = instr->m_opcode;
  1314. SList<IR::Opnd*> *args = Simd128GetExtendedArgs(instr);
  1315. IR::Opnd *dst = args->Pop();
  1316. IR::Opnd *srcs[6] = { nullptr, nullptr, nullptr, nullptr, nullptr, nullptr };
  1317. int i = 0;
  1318. while (!args->Empty() && i < 6)
  1319. {
  1320. srcs[i++] = args->Pop();
  1321. }
  1322. int8 shufMask = 0;
  1323. int lane0 = 0, lane1 = 0, lane2 = 0, lane3 = 0;
  1324. IR::Instr *pInstr = instr->m_prev;
  1325. Assert(dst->IsSimd128() && srcs[0] && srcs[0]->IsSimd128());
  1326. // globOpt will type-spec if all lane indices are constants, and within range constraints to match a single SSE instruction
  1327. Assert(irOpcode == Js::OpCode::Simd128_Swizzle_I4 || irOpcode == Js::OpCode::Simd128_Swizzle_U4 || irOpcode == Js::OpCode::Simd128_Swizzle_F4 /*|| irOpcode == Js::OpCode::Simd128_Swizzle_D2*/);
  1328. AssertMsg(srcs[1] && srcs[1]->IsIntConstOpnd() &&
  1329. srcs[2] && srcs[2]->IsIntConstOpnd() &&
  1330. (/*irOpcode == Js::OpCode::Simd128_Swizzle_D2 || */(srcs[3] && srcs[3]->IsIntConstOpnd())) &&
  1331. (/*irOpcode == Js::OpCode::Simd128_Swizzle_D2 || */(srcs[4] && srcs[4]->IsIntConstOpnd())), "Type-specialized swizzle is supported only with constant lane indices");
  1332. #if 0
  1333. if (irOpcode == Js::OpCode::Simd128_Swizzle_D2)
  1334. {
  1335. lane0 = srcs[1]->AsIntConstOpnd()->AsInt32();
  1336. lane1 = srcs[2]->AsIntConstOpnd()->AsInt32();
  1337. Assert(lane0 >= 0 && lane0 < 2);
  1338. Assert(lane1 >= 0 && lane1 < 2);
  1339. shufMask = (int8)((lane1 << 1) | lane0);
  1340. shufOpcode = Js::OpCode::SHUFPD;
  1341. }
  1342. #endif // 0
  1343. if (irOpcode == Js::OpCode::Simd128_Swizzle_I4 || irOpcode == Js::OpCode::Simd128_Swizzle_U4)
  1344. {
  1345. shufOpcode = Js::OpCode::PSHUFD;
  1346. }
  1347. AnalysisAssert(srcs[3] != nullptr && srcs[4] != nullptr);
  1348. lane0 = srcs[1]->AsIntConstOpnd()->AsInt32();
  1349. lane1 = srcs[2]->AsIntConstOpnd()->AsInt32();
  1350. lane2 = srcs[3]->AsIntConstOpnd()->AsInt32();
  1351. lane3 = srcs[4]->AsIntConstOpnd()->AsInt32();
  1352. Assert(lane1 >= 0 && lane1 < 4);
  1353. Assert(lane2 >= 0 && lane2 < 4);
  1354. Assert(lane2 >= 0 && lane2 < 4);
  1355. Assert(lane3 >= 0 && lane3 < 4);
  1356. shufMask = (int8)((lane3 << 6) | (lane2 << 4) | (lane1 << 2) | lane0);
  1357. instr->m_opcode = shufOpcode;
  1358. instr->SetDst(dst);
  1359. // MOVAPS dst, src1
  1360. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVAPS, dst, srcs[0], m_func));
  1361. // SHUF dst, dst, imm8
  1362. instr->SetSrc1(dst);
  1363. instr->SetSrc2(IR::IntConstOpnd::New((IntConstType)shufMask, TyInt8, m_func, true));
  1364. return pInstr;
  1365. }
  1366. /*
  1367. 4 lane shuffle. Handles arbitrary lane values.
  1368. */
  1369. IR::Instr* LowererMD::Simd128LowerShuffle_4(IR::Instr* instr)
  1370. {
  1371. Js::OpCode irOpcode = instr->m_opcode;
  1372. SList<IR::Opnd*> *args = Simd128GetExtendedArgs(instr);
  1373. IR::Opnd *dst = args->Pop();
  1374. IR::Opnd *srcs[6] = { nullptr, nullptr, nullptr, nullptr, nullptr, nullptr };
  1375. int j = 0;
  1376. while (!args->Empty() && j < 6)
  1377. {
  1378. srcs[j++] = args->Pop();
  1379. }
  1380. uint8 lanes[4], lanesSrc[4];
  1381. uint fromSrc1, fromSrc2;
  1382. IR::Instr *pInstr = instr->m_prev;
  1383. Assert(dst->IsSimd128() && srcs[0] && srcs[0]->IsSimd128() && srcs[1] && srcs[1]->IsSimd128());
  1384. Assert(irOpcode == Js::OpCode::Simd128_Shuffle_I4 || irOpcode == Js::OpCode::Simd128_Shuffle_U4 || irOpcode == Js::OpCode::Simd128_Shuffle_F4);
  1385. // globOpt will type-spec if all lane indices are constants, and within range constraints to match a single SSE instruction
  1386. AssertMsg(srcs[2] && srcs[2]->IsIntConstOpnd() &&
  1387. srcs[3] && srcs[3]->IsIntConstOpnd() &&
  1388. srcs[4] && srcs[4]->IsIntConstOpnd() &&
  1389. srcs[5] && srcs[5]->IsIntConstOpnd(), "Type-specialized shuffle is supported only with constant lane indices");
  1390. lanes[0] = (uint8) srcs[2]->AsIntConstOpnd()->AsInt32();
  1391. lanes[1] = (uint8) srcs[3]->AsIntConstOpnd()->AsInt32();
  1392. lanes[2] = (uint8) srcs[4]->AsIntConstOpnd()->AsInt32();
  1393. lanes[3] = (uint8) srcs[5]->AsIntConstOpnd()->AsInt32();
  1394. Assert(lanes[0] >= 0 && lanes[0] < 8);
  1395. Assert(lanes[1] >= 0 && lanes[1] < 8);
  1396. Assert(lanes[2] >= 0 && lanes[2] < 8);
  1397. Assert(lanes[3] >= 0 && lanes[3] < 8);
  1398. CheckShuffleLanes_4(lanes, lanesSrc, &fromSrc1, &fromSrc2);
  1399. Assert(fromSrc1 + fromSrc2 == 4);
  1400. if (fromSrc1 == 4 || fromSrc2 == 4)
  1401. {
  1402. // can be done with a swizzle
  1403. IR::Opnd *srcOpnd = fromSrc1 == 4 ? srcs[0] : srcs[1];
  1404. InsertShufps(lanes, dst, srcOpnd, srcOpnd, instr);
  1405. }
  1406. else if (fromSrc1 == 2)
  1407. {
  1408. if (lanes[0] < 4 && lanes[1] < 4)
  1409. {
  1410. // x86 friendly shuffle
  1411. Assert(lanes[2] >= 4 && lanes[3] >= 4);
  1412. InsertShufps(lanes, dst, srcs[0], srcs[1], instr);
  1413. }
  1414. else
  1415. {
  1416. // arbitrary shuffle with 2 lanes from each src
  1417. uint8 ordLanes[4], reArrLanes[4];
  1418. // order lanes based on which src they come from
  1419. // compute re-arrangement mask
  1420. for (uint8 i = 0, j1 = 0, j2 = 2; i < 4; i++)
  1421. {
  1422. if (lanesSrc[i] == 1 && j1 < 4)
  1423. {
  1424. ordLanes[j1] = lanes[i];
  1425. reArrLanes[i] = j1;
  1426. j1++;
  1427. }
  1428. else if(j2 < 4)
  1429. {
  1430. Assert(lanesSrc[i] == 2);
  1431. ordLanes[j2] = lanes[i];
  1432. reArrLanes[i] = j2;
  1433. j2++;
  1434. }
  1435. }
  1436. IR::RegOpnd *temp = IR::RegOpnd::New(dst->GetType(), m_func);
  1437. InsertShufps(ordLanes, temp, srcs[0], srcs[1], instr);
  1438. InsertShufps(reArrLanes, dst, temp, temp, instr);
  1439. }
  1440. }
  1441. else if (fromSrc1 == 3 || fromSrc2 == 3)
  1442. {
  1443. // shuffle with 3 lanes from one src, one from another
  1444. IR::Instr *newInstr;
  1445. IR::Opnd * majSrc, *minSrc;
  1446. IR::RegOpnd *temp1 = IR::RegOpnd::New(dst->GetType(), m_func);
  1447. IR::RegOpnd *temp2 = IR::RegOpnd::New(dst->GetType(), m_func);
  1448. IR::RegOpnd *temp3 = IR::RegOpnd::New(dst->GetType(), m_func);
  1449. uint8 minorityLane = 0, maxLaneValue;
  1450. majSrc = fromSrc1 == 3 ? srcs[0] : srcs[1];
  1451. minSrc = fromSrc1 == 3 ? srcs[1] : srcs[0];
  1452. Assert(majSrc != minSrc);
  1453. // Algorithm:
  1454. // SHUFPS temp1, majSrc, lanes
  1455. // SHUFPS temp2, minSrc, lanes
  1456. // MOVUPS temp3, [minorityLane mask]
  1457. // ANDPS temp2, temp3 // mask all lanes but minorityLane
  1458. // ANDNPS temp3, temp1 // zero minorityLane
  1459. // ORPS dst, temp2, temp3
  1460. // find minorityLane to mask
  1461. maxLaneValue = minSrc == srcs[0] ? 4 : 8;
  1462. for (uint8 i = 0; i < 4; i++)
  1463. {
  1464. if (lanes[i] >= (maxLaneValue - 4) && lanes[i] < maxLaneValue)
  1465. {
  1466. minorityLane = i;
  1467. break;
  1468. }
  1469. }
  1470. IR::MemRefOpnd * laneMask = IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86FourLanesMaskAddr(minorityLane), dst->GetType(), m_func);
  1471. InsertShufps(lanes, temp1, majSrc, majSrc, instr);
  1472. InsertShufps(lanes, temp2, minSrc, minSrc, instr);
  1473. newInstr = IR::Instr::New(Js::OpCode::MOVUPS, temp3, laneMask, m_func);
  1474. instr->InsertBefore(newInstr);
  1475. Legalize(newInstr);
  1476. newInstr = IR::Instr::New(Js::OpCode::ANDPS, temp2, temp2, temp3, m_func);
  1477. instr->InsertBefore(newInstr);
  1478. Legalize(newInstr);
  1479. newInstr = IR::Instr::New(Js::OpCode::ANDNPS, temp3, temp3, temp1, m_func);
  1480. instr->InsertBefore(newInstr);
  1481. Legalize(newInstr);
  1482. newInstr = IR::Instr::New(Js::OpCode::ORPS, dst, temp2, temp3, m_func);
  1483. instr->InsertBefore(newInstr);
  1484. Legalize(newInstr);
  1485. }
  1486. instr->Remove();
  1487. return pInstr;
  1488. }
  1489. // 8 and 16 lane shuffle with memory temps
  1490. IR::Instr* LowererMD::Simd128LowerShuffle(IR::Instr* instr)
  1491. {
  1492. Js::OpCode irOpcode = instr->m_opcode;
  1493. IR::Instr *pInstr = instr->m_prev, *newInstr = nullptr;
  1494. SList<IR::Opnd*> *args = nullptr;
  1495. IR::Opnd *dst = nullptr;
  1496. IR::Opnd *src1 = nullptr, *src2 = nullptr;
  1497. uint8 lanes[16], laneCount = 0, scale = 1;
  1498. bool isShuffle = false;
  1499. IRType laneType = TyInt16;
  1500. intptr_t temp1SIMD = m_func->GetThreadContextInfo()->GetSimdTempAreaAddr(0);
  1501. intptr_t temp2SIMD = m_func->GetThreadContextInfo()->GetSimdTempAreaAddr(1);
  1502. intptr_t dstSIMD = m_func->GetThreadContextInfo()->GetSimdTempAreaAddr(2);
  1503. #if DBG
  1504. intptr_t endAddrSIMD = (intptr_t)(temp1SIMD + sizeof(X86SIMDValue) * SIMD_TEMP_SIZE);
  1505. #endif
  1506. void *address = nullptr;
  1507. args = Simd128GetExtendedArgs(instr);
  1508. switch (irOpcode)
  1509. {
  1510. case Js::OpCode::Simd128_Swizzle_I8:
  1511. case Js::OpCode::Simd128_Swizzle_U8:
  1512. Assert(args->Count() == 10);
  1513. laneCount = 8;
  1514. laneType = TyInt16;
  1515. isShuffle = false;
  1516. scale = 2;
  1517. break;
  1518. case Js::OpCode::Simd128_Swizzle_I16:
  1519. case Js::OpCode::Simd128_Swizzle_U16:
  1520. Assert(args->Count() == 18);
  1521. laneCount = 16;
  1522. laneType = TyInt8;
  1523. isShuffle = false;
  1524. scale = 1;
  1525. break;
  1526. case Js::OpCode::Simd128_Shuffle_I8:
  1527. case Js::OpCode::Simd128_Shuffle_U8:
  1528. Assert(args->Count() == 11);
  1529. laneCount = 8;
  1530. isShuffle = true;
  1531. laneType = TyUint16;
  1532. scale = 2;
  1533. break;
  1534. case Js::OpCode::Simd128_Shuffle_I16:
  1535. case Js::OpCode::Simd128_Shuffle_U16:
  1536. Assert(args->Count() == 19);
  1537. laneCount = 16;
  1538. isShuffle = true;
  1539. laneType = TyUint8;
  1540. scale = 1;
  1541. break;
  1542. default:
  1543. Assert(UNREACHED);
  1544. }
  1545. dst = args->Pop();
  1546. src1 = args->Pop();
  1547. if (isShuffle)
  1548. {
  1549. src2 = args->Pop();
  1550. }
  1551. Assert(dst->IsSimd128() && src1 && src1->IsSimd128() && (!isShuffle|| src2->IsSimd128()));
  1552. for (uint i = 0; i < laneCount; i++)
  1553. {
  1554. IR::Opnd * laneOpnd = args->Pop();
  1555. Assert(laneOpnd->IsIntConstOpnd());
  1556. lanes[i] = (uint8)laneOpnd->AsIntConstOpnd()->AsInt32();
  1557. }
  1558. // MOVUPS [temp], src1
  1559. newInstr = IR::Instr::New(Js::OpCode::MOVUPS, IR::MemRefOpnd::New((void*)temp1SIMD, TySimd128I16, m_func), src1, m_func);
  1560. instr->InsertBefore(newInstr);
  1561. Legalize(newInstr);
  1562. if (isShuffle)
  1563. {
  1564. // MOVUPS [temp+16], src2
  1565. newInstr = IR::Instr::New(Js::OpCode::MOVUPS, IR::MemRefOpnd::New((void*)(temp2SIMD), TySimd128I16, m_func), src2, m_func);
  1566. instr->InsertBefore(newInstr);
  1567. Legalize(newInstr);
  1568. }
  1569. for (uint i = 0; i < laneCount; i++)
  1570. {
  1571. //. MOV tmp, [temp1SIMD + laneValue*scale]
  1572. IR::RegOpnd *tmp = IR::RegOpnd::New(laneType, m_func);
  1573. address = (void*)(temp1SIMD + lanes[i] * scale);
  1574. Assert((intptr_t)address + (intptr_t)scale <= (intptr_t)dstSIMD);
  1575. newInstr = IR::Instr::New(Js::OpCode::MOV, tmp, IR::MemRefOpnd::New(address, laneType, m_func), m_func);
  1576. instr->InsertBefore(newInstr);
  1577. Legalize(newInstr);
  1578. //. MOV [dstSIMD + i*scale], tmp
  1579. address = (void*)(dstSIMD + i * scale);
  1580. Assert((intptr_t)address + (intptr_t) scale <= endAddrSIMD);
  1581. newInstr = IR::Instr::New(Js::OpCode::MOV,IR::MemRefOpnd::New(address, laneType, m_func), tmp, m_func);
  1582. instr->InsertBefore(newInstr);
  1583. Legalize(newInstr);
  1584. }
  1585. // MOVUPS dst, [dstSIMD]
  1586. newInstr = IR::Instr::New(Js::OpCode::MOVUPS, dst, IR::MemRefOpnd::New((void*)dstSIMD, TySimd128I16, m_func), m_func);
  1587. instr->InsertBefore(newInstr);
  1588. Legalize(newInstr);
  1589. instr->Remove();
  1590. return pInstr;
  1591. }
  1592. IR::Instr* LowererMD::Simd128LowerNotEqual(IR::Instr* instr)
  1593. {
  1594. Assert(instr->m_opcode == Js::OpCode::Simd128_Neq_I4 || instr->m_opcode == Js::OpCode::Simd128_Neq_I8 ||
  1595. instr->m_opcode == Js::OpCode::Simd128_Neq_I16 || instr->m_opcode == Js::OpCode::Simd128_Neq_U4 ||
  1596. instr->m_opcode == Js::OpCode::Simd128_Neq_U8 || instr->m_opcode == Js::OpCode::Simd128_Neq_U16);
  1597. IR::Instr *pInstr;
  1598. IR::Opnd* dst = instr->GetDst();
  1599. IR::Opnd* src1 = instr->GetSrc1();
  1600. IR::Opnd* src2 = instr->GetSrc2();
  1601. Assert(dst->IsRegOpnd() && (dst->IsSimd128B4() || dst->IsSimd128B8() || dst->IsSimd128B16()));
  1602. Assert(src1->IsRegOpnd() && src1->IsSimd128());
  1603. Assert(src2->IsRegOpnd() && src2->IsSimd128());
  1604. Js::OpCode cmpOpcode = Js::OpCode::PCMPEQD;
  1605. if (instr->m_opcode == Js::OpCode::Simd128_Neq_I8 || instr->m_opcode == Js::OpCode::Simd128_Neq_U8)
  1606. {
  1607. cmpOpcode = Js::OpCode::PCMPEQW;
  1608. }
  1609. else if (instr->m_opcode == Js::OpCode::Simd128_Neq_I16 || instr->m_opcode == Js::OpCode::Simd128_Neq_U16)
  1610. {
  1611. cmpOpcode = Js::OpCode::PCMPEQB;
  1612. }
  1613. // dst = PCMPEQD src1, src2
  1614. pInstr = IR::Instr::New(cmpOpcode, dst, src1, src2, m_func);
  1615. instr->InsertBefore(pInstr);
  1616. //MakeDstEquSrc1(pInstr);
  1617. Legalize(pInstr);
  1618. // dst = PANDN dst, X86_ALL_NEG_ONES
  1619. pInstr = IR::Instr::New(Js::OpCode::PANDN, dst, dst, IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86AllNegOnesAddr(), TySimd128I4, m_func), m_func);
  1620. instr->InsertBefore(pInstr);
  1621. //MakeDstEquSrc1(pInstr);
  1622. Legalize(pInstr);
  1623. pInstr = instr->m_prev;
  1624. instr->Remove();
  1625. return pInstr;
  1626. }
  1627. IR::Instr* LowererMD::Simd128LowerLessThan(IR::Instr* instr)
  1628. {
  1629. Assert(instr->m_opcode == Js::OpCode::Simd128_Lt_U4 || instr->m_opcode == Js::OpCode::Simd128_Lt_U8 || instr->m_opcode == Js::OpCode::Simd128_Lt_U16 ||
  1630. instr->m_opcode == Js::OpCode::Simd128_GtEq_U4 || instr->m_opcode == Js::OpCode::Simd128_GtEq_U8 || instr->m_opcode == Js::OpCode::Simd128_GtEq_U16);
  1631. IR::Instr *pInstr;
  1632. IR::Opnd* dst = instr->GetDst();
  1633. IR::Opnd* src1 = instr->GetSrc1();
  1634. IR::Opnd* src2 = instr->GetSrc2();
  1635. Assert(dst->IsRegOpnd() && (dst->IsSimd128B4() || dst->IsSimd128B8() || dst->IsSimd128B16()));
  1636. Assert(src1->IsRegOpnd() && src1->IsSimd128());
  1637. Assert(src2->IsRegOpnd() && src2->IsSimd128());
  1638. IR::RegOpnd* tmpa = IR::RegOpnd::New(src1->GetType(), m_func);
  1639. IR::RegOpnd* tmpb = IR::RegOpnd::New(src1->GetType(), m_func);
  1640. IR::MemRefOpnd* signBits = IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86DoubleWordSignBitsAddr(), TySimd128I4, m_func);
  1641. IR::RegOpnd * mask = IR::RegOpnd::New(TySimd128I4, m_func);
  1642. Js::OpCode cmpOpcode = Js::OpCode::PCMPGTD;
  1643. if (instr->m_opcode == Js::OpCode::Simd128_Lt_U8 || instr->m_opcode == Js::OpCode::Simd128_GtEq_U8)
  1644. {
  1645. cmpOpcode = Js::OpCode::PCMPGTW;
  1646. signBits = IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86WordSignBitsAddr(), TySimd128I4, m_func);
  1647. }
  1648. else if (instr->m_opcode == Js::OpCode::Simd128_Lt_U16 || instr->m_opcode == Js::OpCode::Simd128_GtEq_U16)
  1649. {
  1650. cmpOpcode = Js::OpCode::PCMPGTB;
  1651. signBits = IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86ByteSignBitsAddr(), TySimd128I4, m_func);
  1652. }
  1653. // MOVUPS mask, [signBits]
  1654. pInstr = IR::Instr::New(Js::OpCode::MOVUPS, mask, signBits, m_func);
  1655. instr->InsertBefore(pInstr);
  1656. Legalize(pInstr);
  1657. // tmpa = PXOR src1, signBits
  1658. pInstr = IR::Instr::New(Js::OpCode::PXOR, tmpa, src1, mask, m_func);
  1659. instr->InsertBefore(pInstr);
  1660. Legalize(pInstr);
  1661. // tmpb = PXOR src2, signBits
  1662. pInstr = IR::Instr::New(Js::OpCode::PXOR, tmpb, src2, mask, m_func);
  1663. instr->InsertBefore(pInstr);
  1664. Legalize(pInstr);
  1665. // dst = cmpOpCode tmpb, tmpa (Less than, swapped opnds)
  1666. pInstr = IR::Instr::New(cmpOpcode, dst, tmpb, tmpa, m_func);
  1667. instr->InsertBefore(pInstr);
  1668. Legalize(pInstr);
  1669. if (instr->m_opcode == Js::OpCode::Simd128_GtEq_U4 || instr->m_opcode == Js::OpCode::Simd128_GtEq_U8 || instr->m_opcode == Js::OpCode::Simd128_GtEq_U16)
  1670. {
  1671. // for SIMD unsigned int, greaterThanOrEqual == lessThan + Not
  1672. // dst = PANDN dst, X86_ALL_NEG_ONES
  1673. // MOVUPS mask, [allNegOnes]
  1674. pInstr = IR::Instr::New(Js::OpCode::PANDN, dst, dst, IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86AllNegOnesAddr(), TySimd128I4, m_func), m_func);
  1675. instr->InsertBefore(pInstr);
  1676. Legalize(pInstr);
  1677. }
  1678. pInstr = instr->m_prev;
  1679. instr->Remove();
  1680. return pInstr;
  1681. }
  1682. IR::Instr* LowererMD::Simd128LowerLessThanOrEqual(IR::Instr* instr)
  1683. {
  1684. Assert(instr->m_opcode == Js::OpCode::Simd128_LtEq_I4 || instr->m_opcode == Js::OpCode::Simd128_LtEq_I8 || instr->m_opcode == Js::OpCode::Simd128_LtEq_I16 ||
  1685. instr->m_opcode == Js::OpCode::Simd128_LtEq_U4 || instr->m_opcode == Js::OpCode::Simd128_LtEq_U8 || instr->m_opcode == Js::OpCode::Simd128_LtEq_U16 ||
  1686. instr->m_opcode == Js::OpCode::Simd128_Gt_U4 || instr->m_opcode == Js::OpCode::Simd128_Gt_U8 || instr->m_opcode == Js::OpCode::Simd128_Gt_U16);
  1687. IR::Instr *pInstr;
  1688. IR::Opnd* dst = instr->GetDst();
  1689. IR::Opnd* src1 = instr->GetSrc1();
  1690. IR::Opnd* src2 = instr->GetSrc2();
  1691. Assert(dst->IsRegOpnd() && (dst->IsSimd128B4() || dst->IsSimd128B8() || dst->IsSimd128B16()));
  1692. Assert(src1->IsRegOpnd() && src1->IsSimd128());
  1693. Assert(src2->IsRegOpnd() && src2->IsSimd128());
  1694. IR::RegOpnd* tmpa = IR::RegOpnd::New(src1->GetType(), m_func);
  1695. IR::RegOpnd* tmpb = IR::RegOpnd::New(src1->GetType(), m_func);
  1696. Js::OpCode cmpOpcode = Js::OpCode::PCMPGTD;
  1697. Js::OpCode eqpOpcode = Js::OpCode::PCMPEQD;
  1698. if (instr->m_opcode == Js::OpCode::Simd128_LtEq_I8 || instr->m_opcode == Js::OpCode::Simd128_LtEq_U8 || instr->m_opcode == Js::OpCode::Simd128_Gt_U8)
  1699. {
  1700. cmpOpcode = Js::OpCode::PCMPGTW;
  1701. eqpOpcode = Js::OpCode::PCMPEQW;
  1702. }
  1703. else if (instr->m_opcode == Js::OpCode::Simd128_LtEq_I16 || instr->m_opcode == Js::OpCode::Simd128_LtEq_U16 || instr->m_opcode == Js::OpCode::Simd128_Gt_U16)
  1704. {
  1705. cmpOpcode = Js::OpCode::PCMPGTB;
  1706. eqpOpcode = Js::OpCode::PCMPEQB;
  1707. }
  1708. if (instr->m_opcode == Js::OpCode::Simd128_LtEq_I4)
  1709. {
  1710. // dst = pcmpgtd src1, src2
  1711. pInstr = IR::Instr::New(Js::OpCode::PCMPGTD, dst, src1, src2, m_func);
  1712. instr->InsertBefore(pInstr);
  1713. Legalize(pInstr);
  1714. // dst = pandn dst, xmmword ptr[X86_ALL_NEG_ONES]
  1715. pInstr = IR::Instr::New(Js::OpCode::PANDN, dst, dst, IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86AllNegOnesAddr(), TySimd128I4, m_func), m_func);
  1716. instr->InsertBefore(pInstr);
  1717. Legalize(pInstr);
  1718. }
  1719. else if (instr->m_opcode == Js::OpCode::Simd128_LtEq_I8 || instr->m_opcode == Js::OpCode::Simd128_LtEq_I16)
  1720. {
  1721. // tmpa = pcmpgtw src2, src1 (src1 < src2?) [pcmpgtb]
  1722. pInstr = IR::Instr::New(cmpOpcode, tmpa, src2, src1, m_func);
  1723. instr->InsertBefore(pInstr);
  1724. Legalize(pInstr);
  1725. // tmpb = pcmpeqw src1, src2 [pcmpeqb]
  1726. pInstr = IR::Instr::New(eqpOpcode, tmpb, src1, src2, m_func);
  1727. instr->InsertBefore(pInstr);
  1728. Legalize(pInstr);
  1729. // dst = por tmpa, tmpb
  1730. pInstr = IR::Instr::New(Js::OpCode::POR, dst, tmpa, tmpb, m_func);
  1731. instr->InsertBefore(pInstr);
  1732. Legalize(pInstr);
  1733. }
  1734. else if (instr->m_opcode == Js::OpCode::Simd128_LtEq_U4 || instr->m_opcode == Js::OpCode::Simd128_LtEq_U8 || instr->m_opcode == Js::OpCode::Simd128_LtEq_U16 ||
  1735. instr->m_opcode == Js::OpCode::Simd128_Gt_U4 || instr->m_opcode == Js::OpCode::Simd128_Gt_U8 || instr->m_opcode == Js::OpCode::Simd128_Gt_U16)
  1736. {
  1737. IR::MemRefOpnd* signBits = IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86DoubleWordSignBitsAddr(), TySimd128I4, m_func);
  1738. IR::RegOpnd * mask = IR::RegOpnd::New(TySimd128I4, m_func);
  1739. if (instr->m_opcode == Js::OpCode::Simd128_LtEq_U8 || instr->m_opcode == Js::OpCode::Simd128_Gt_U8)
  1740. {
  1741. signBits = IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86WordSignBitsAddr(), TySimd128I4, m_func);
  1742. }
  1743. else if (instr->m_opcode == Js::OpCode::Simd128_LtEq_U16 || instr->m_opcode == Js::OpCode::Simd128_Gt_U16)
  1744. {
  1745. signBits = IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86ByteSignBitsAddr(), TySimd128I4, m_func);
  1746. }
  1747. // MOVUPS mask, [signBits]
  1748. pInstr = IR::Instr::New(Js::OpCode::MOVUPS, mask, signBits, m_func);
  1749. instr->InsertBefore(pInstr);
  1750. Legalize(pInstr);
  1751. // tmpa = PXOR src1, mask
  1752. pInstr = IR::Instr::New(Js::OpCode::PXOR, tmpa, src1, mask, m_func);
  1753. instr->InsertBefore(pInstr);
  1754. Legalize(pInstr);
  1755. // tmpb = PXOR src2, signBits
  1756. pInstr = IR::Instr::New(Js::OpCode::PXOR, tmpb, src2, mask, m_func);
  1757. instr->InsertBefore(pInstr);
  1758. Legalize(pInstr);
  1759. // dst = cmpOpCode tmpb, tmpa
  1760. pInstr = IR::Instr::New(cmpOpcode, dst, tmpb, tmpa, m_func);
  1761. instr->InsertBefore(pInstr);
  1762. Legalize(pInstr);
  1763. // tmpa = pcmpeqd tmpa, tmpb
  1764. pInstr = IR::Instr::New(eqpOpcode, tmpa, tmpa, tmpb, m_func);
  1765. instr->InsertBefore(pInstr);
  1766. Legalize(pInstr);
  1767. // dst = por dst, tmpa
  1768. pInstr = IR::Instr::New(Js::OpCode::POR, dst, dst, tmpa, m_func);
  1769. instr->InsertBefore(pInstr);
  1770. Legalize(pInstr);
  1771. if (instr->m_opcode == Js::OpCode::Simd128_Gt_U4 || instr->m_opcode == Js::OpCode::Simd128_Gt_U8 || instr->m_opcode == Js::OpCode::Simd128_Gt_U16)
  1772. { // for SIMD unsigned int, greaterThan == lessThanOrEqual + Not
  1773. // dst = PANDN dst, X86_ALL_NEG_ONES
  1774. pInstr = IR::Instr::New(Js::OpCode::PANDN, dst, dst, IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86AllNegOnesAddr(), TySimd128I4, m_func), m_func);
  1775. instr->InsertBefore(pInstr);
  1776. Legalize(pInstr);
  1777. }
  1778. }
  1779. pInstr = instr->m_prev;
  1780. instr->Remove();
  1781. return pInstr;
  1782. }
  1783. IR::Instr* LowererMD::Simd128LowerGreaterThanOrEqual(IR::Instr* instr)
  1784. {
  1785. Assert(instr->m_opcode == Js::OpCode::Simd128_GtEq_I4 || instr->m_opcode == Js::OpCode::Simd128_GtEq_I8 || instr->m_opcode == Js::OpCode::Simd128_GtEq_I16);
  1786. IR::Instr *pInstr;
  1787. IR::Opnd* dst = instr->GetDst();
  1788. IR::Opnd* src1 = instr->GetSrc1();
  1789. IR::Opnd* src2 = instr->GetSrc2();
  1790. Assert(dst->IsRegOpnd() && (dst->IsSimd128B4() || dst->IsSimd128B8() || dst->IsSimd128B16()));
  1791. Assert(src1->IsRegOpnd() && src1->IsSimd128());
  1792. Assert(src2->IsRegOpnd() && src2->IsSimd128());
  1793. if (instr->m_opcode == Js::OpCode::Simd128_GtEq_I4)
  1794. {
  1795. // dst = pcmpgtd src2, src1
  1796. pInstr = IR::Instr::New(Js::OpCode::PCMPGTD, dst, src2, src1, m_func);
  1797. instr->InsertBefore(pInstr);
  1798. Legalize(pInstr);
  1799. // dst = pandn dst, xmmword ptr[X86_ALL_NEG_ONES]
  1800. pInstr = IR::Instr::New(Js::OpCode::PANDN, dst, dst, IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86AllNegOnesAddr(), TySimd128I4, m_func), m_func);
  1801. instr->InsertBefore(pInstr);
  1802. Legalize(pInstr);
  1803. }
  1804. else if (instr->m_opcode == Js::OpCode::Simd128_GtEq_I8 || instr->m_opcode == Js::OpCode::Simd128_GtEq_I16)
  1805. {
  1806. IR::RegOpnd* tmp1 = IR::RegOpnd::New(src1->GetType(), m_func);
  1807. IR::RegOpnd* tmp2 = IR::RegOpnd::New(src1->GetType(), m_func);
  1808. Js::OpCode cmpOpcode = Js::OpCode::PCMPGTW;
  1809. Js::OpCode eqpOpcode = Js::OpCode::PCMPEQW;
  1810. if (instr->m_opcode == Js::OpCode::Simd128_GtEq_I16)
  1811. {
  1812. cmpOpcode = Js::OpCode::PCMPGTB;
  1813. eqpOpcode = Js::OpCode::PCMPEQB;
  1814. }
  1815. // tmp1 = pcmpgtw src1, src2 [pcmpgtb]
  1816. pInstr = IR::Instr::New(cmpOpcode, tmp1, src1, src2, m_func);
  1817. instr->InsertBefore(pInstr);
  1818. Legalize(pInstr);
  1819. // tmp2 = pcmpeqw src1, src2 [pcmpeqw]
  1820. pInstr = IR::Instr::New(eqpOpcode, tmp2, src1, src2, m_func);
  1821. instr->InsertBefore(pInstr);
  1822. Legalize(pInstr);
  1823. // dst = por tmp1, tmp2
  1824. pInstr = IR::Instr::New(Js::OpCode::POR, dst, tmp1, tmp2, m_func);
  1825. instr->InsertBefore(pInstr);
  1826. Legalize(pInstr);
  1827. }
  1828. pInstr = instr->m_prev;
  1829. instr->Remove();
  1830. return pInstr;
  1831. }
  1832. IR::Instr* LowererMD::Simd128LowerMinMax_F4(IR::Instr* instr)
  1833. {
  1834. IR::Instr *pInstr;
  1835. IR::Opnd* dst = instr->GetDst();
  1836. IR::Opnd* src1 = instr->GetSrc1();
  1837. IR::Opnd* src2 = instr->GetSrc2();
  1838. Assert(dst->IsRegOpnd() && dst->IsSimd128());
  1839. Assert(src1->IsRegOpnd() && src1->IsSimd128());
  1840. Assert(src2->IsRegOpnd() && src2->IsSimd128());
  1841. Assert(instr->m_opcode == Js::OpCode::Simd128_Min_F4 || instr->m_opcode == Js::OpCode::Simd128_Max_F4);
  1842. IR::RegOpnd* tmp1 = IR::RegOpnd::New(src1->GetType(), m_func);
  1843. IR::RegOpnd* tmp2 = IR::RegOpnd::New(src2->GetType(), m_func);
  1844. if (instr->m_opcode == Js::OpCode::Simd128_Min_F4)
  1845. {
  1846. pInstr = IR::Instr::New(Js::OpCode::MINPS, tmp1, src1, src2, m_func);
  1847. instr->InsertBefore(pInstr);
  1848. Legalize(pInstr);
  1849. //
  1850. pInstr = IR::Instr::New(Js::OpCode::MINPS, tmp2, src2, src1, m_func);
  1851. instr->InsertBefore(pInstr);
  1852. Legalize(pInstr);
  1853. //
  1854. pInstr = IR::Instr::New(Js::OpCode::ORPS, dst, tmp1, tmp2, m_func);
  1855. instr->InsertBefore(pInstr);
  1856. Legalize(pInstr);
  1857. }
  1858. else
  1859. {
  1860. //This sequence closely mirrors SIMDFloat32x4Operation::OpMax except for
  1861. //the fact that tmp2 (tmpbValue) is reused to reduce the number of registers
  1862. //needed for this sequence.
  1863. pInstr = IR::Instr::New(Js::OpCode::MAXPS, tmp1, src1, src2, m_func);
  1864. instr->InsertBefore(pInstr);
  1865. Legalize(pInstr);
  1866. //
  1867. pInstr = IR::Instr::New(Js::OpCode::MAXPS, tmp2, src2, src1, m_func);
  1868. instr->InsertBefore(pInstr);
  1869. Legalize(pInstr);
  1870. //
  1871. pInstr = IR::Instr::New(Js::OpCode::ANDPS, tmp1, tmp1, tmp2, m_func);
  1872. instr->InsertBefore(pInstr);
  1873. Legalize(pInstr);
  1874. //
  1875. pInstr = IR::Instr::New(Js::OpCode::CMPUNORDPS, tmp2, src1, src2, m_func);
  1876. instr->InsertBefore(pInstr);
  1877. Legalize(pInstr);
  1878. //
  1879. pInstr = IR::Instr::New(Js::OpCode::ORPS, dst, tmp1, tmp2, m_func);
  1880. instr->InsertBefore(pInstr);
  1881. Legalize(pInstr);
  1882. }
  1883. pInstr = instr->m_prev;
  1884. instr->Remove();
  1885. return pInstr;
  1886. }
  1887. IR::Opnd* LowererMD::Simd128CanonicalizeToBoolsBeforeReduction(IR::Instr* instr)
  1888. {
  1889. IR::Opnd* src1 = instr->GetSrc1();
  1890. if (m_func->GetJITFunctionBody()->IsWasmFunction())
  1891. {
  1892. Js::OpCode cmpOpcode = Js::OpCode::InvalidOpCode;
  1893. switch (instr->m_opcode)
  1894. {
  1895. case Js::OpCode::Simd128_AnyTrue_B4:
  1896. case Js::OpCode::Simd128_AnyTrue_B2:
  1897. case Js::OpCode::Simd128_AllTrue_B4:
  1898. case Js::OpCode::Simd128_AllTrue_B2:
  1899. cmpOpcode = Js::OpCode::PCMPEQD;
  1900. break;
  1901. case Js::OpCode::Simd128_AnyTrue_B8:
  1902. case Js::OpCode::Simd128_AllTrue_B8:
  1903. cmpOpcode = Js::OpCode::PCMPEQW;
  1904. break;
  1905. case Js::OpCode::Simd128_AnyTrue_B16:
  1906. case Js::OpCode::Simd128_AllTrue_B16:
  1907. cmpOpcode = Js::OpCode::PCMPEQB;
  1908. break;
  1909. default:
  1910. Assert(UNREACHED);
  1911. }
  1912. IR::RegOpnd * newSrc = IR::RegOpnd::New(src1->GetType(), m_func);
  1913. m_lowerer->InsertMove(newSrc, src1, instr);
  1914. Simd128CanonicalizeToBools(instr, cmpOpcode, *newSrc);
  1915. return newSrc;
  1916. }
  1917. return src1;
  1918. }
  1919. IR::Instr* LowererMD::Simd128LowerAnyTrue(IR::Instr* instr)
  1920. {
  1921. Assert(instr->m_opcode == Js::OpCode::Simd128_AnyTrue_B4 || instr->m_opcode == Js::OpCode::Simd128_AnyTrue_B8 ||
  1922. instr->m_opcode == Js::OpCode::Simd128_AnyTrue_B16 || instr->m_opcode == Js::OpCode::Simd128_AnyTrue_B2);
  1923. IR::Instr *pInstr;
  1924. IR::Opnd* dst = instr->GetDst();
  1925. #ifdef ENABLE_WASM_SIMD
  1926. IR::Opnd* src1 = Simd128CanonicalizeToBoolsBeforeReduction(instr);
  1927. #else
  1928. IR::Opnd* src1 = instr->GetSrc1();
  1929. #endif
  1930. Assert(dst->IsRegOpnd() && dst->IsInt32());
  1931. Assert(src1->IsRegOpnd() && src1->IsSimd128());
  1932. // pmovmskb dst, src1
  1933. // neg dst
  1934. // sbb dst, dst
  1935. // neg dst
  1936. // pmovmskb dst, src1
  1937. pInstr = IR::Instr::New(Js::OpCode::PMOVMSKB, dst, src1, m_func);
  1938. instr->InsertBefore(pInstr);
  1939. Legalize(pInstr);
  1940. // neg dst
  1941. pInstr = IR::Instr::New(Js::OpCode::NEG, dst, dst, m_func);
  1942. instr->InsertBefore(pInstr);
  1943. Legalize(pInstr);
  1944. // sbb dst, dst
  1945. pInstr = IR::Instr::New(Js::OpCode::SBB, dst, dst, dst, m_func);
  1946. instr->InsertBefore(pInstr);
  1947. Legalize(pInstr);
  1948. // neg dst
  1949. pInstr = IR::Instr::New(Js::OpCode::NEG, dst, dst, m_func);
  1950. instr->InsertBefore(pInstr);
  1951. Legalize(pInstr);
  1952. pInstr = instr->m_prev;
  1953. instr->Remove();
  1954. return pInstr;
  1955. }
  1956. IR::Instr* LowererMD::Simd128LowerAllTrue(IR::Instr* instr)
  1957. {
  1958. Assert(instr->m_opcode == Js::OpCode::Simd128_AllTrue_B4 || instr->m_opcode == Js::OpCode::Simd128_AllTrue_B8 ||
  1959. instr->m_opcode == Js::OpCode::Simd128_AllTrue_B16 || instr->m_opcode == Js::OpCode::Simd128_AllTrue_B2);
  1960. IR::Instr *pInstr;
  1961. IR::Opnd* dst = instr->GetDst();
  1962. #ifdef ENABLE_WASM_SIMD
  1963. IR::Opnd* src1 = Simd128CanonicalizeToBoolsBeforeReduction(instr);
  1964. #else
  1965. IR::Opnd* src1 = instr->GetSrc1();
  1966. #endif
  1967. Assert(dst->IsRegOpnd() && dst->IsInt32());
  1968. Assert(src1->IsRegOpnd() && src1->IsSimd128());
  1969. IR::RegOpnd * tmp = IR::RegOpnd::New(TyInt8, m_func);
  1970. // pmovmskb dst, src1
  1971. pInstr = IR::Instr::New(Js::OpCode::PMOVMSKB, dst, src1, m_func);
  1972. instr->InsertBefore(pInstr);
  1973. //horizontally OR into 0th and 2nd positions
  1974. //TODO nikolayk revisit the sequence for in64x2.alltrue
  1975. IR::Opnd* newDst = dst;
  1976. uint cmpMask = 0xFFFF;
  1977. if (instr->m_opcode == Js::OpCode::Simd128_AllTrue_B2)
  1978. {
  1979. cmpMask = 0x0F0F;
  1980. IR::RegOpnd* reduceReg = IR::RegOpnd::New(TyInt32, m_func);
  1981. pInstr = IR::Instr::New(Js::OpCode::SHR, reduceReg, dst, (IR::IntConstOpnd::New(4, TyInt32, m_func, true)), m_func);
  1982. instr->InsertBefore(pInstr);
  1983. Legalize(pInstr);
  1984. pInstr = IR::Instr::New(Js::OpCode::OR, reduceReg, reduceReg, dst, m_func);
  1985. instr->InsertBefore(pInstr);
  1986. Legalize(pInstr);
  1987. pInstr = IR::Instr::New(Js::OpCode::AND, reduceReg, reduceReg, (IR::IntConstOpnd::New(0x0F0F, TyInt32, m_func, true)), m_func);
  1988. instr->InsertBefore(pInstr);
  1989. Legalize(pInstr);
  1990. newDst = reduceReg;
  1991. }
  1992. // cmp dst, cmpMask
  1993. pInstr = IR::Instr::New(Js::OpCode::CMP, m_func);
  1994. pInstr->SetSrc1(newDst);
  1995. pInstr->SetSrc2(IR::IntConstOpnd::New(cmpMask, TyInt32, m_func, true));
  1996. instr->InsertBefore(pInstr);
  1997. Legalize(pInstr);
  1998. // mov tmp(TyInt8), dst
  1999. pInstr = IR::Instr::New(Js::OpCode::MOV, tmp, newDst, m_func);
  2000. instr->InsertBefore(pInstr);
  2001. Legalize(pInstr);
  2002. // sete tmp(TyInt8)
  2003. pInstr = IR::Instr::New(Js::OpCode::SETE, tmp, tmp, m_func);
  2004. instr->InsertBefore(pInstr);
  2005. Legalize(pInstr);
  2006. // movsx dst, dst(TyInt8)
  2007. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVSX, dst, tmp, m_func));
  2008. pInstr = instr->m_prev;
  2009. instr->Remove();
  2010. return pInstr;
  2011. }
  2012. IR::Instr* LowererMD::Simd128LowerInt32x4FromFloat32x4(IR::Instr *instr)
  2013. {
  2014. IR::Opnd *dst, *src, *tmp, *tmp2, *mask1, *mask2;
  2015. IR::Instr *insertInstr, *pInstr, *newInstr;
  2016. IR::LabelInstr *doneLabel;
  2017. dst = instr->GetDst();
  2018. src = instr->GetSrc1();
  2019. Assert(dst != nullptr && src != nullptr && dst->IsSimd128() && src->IsSimd128());
  2020. // CVTTPS2DQ dst, src
  2021. instr->m_opcode = Js::OpCode::CVTTPS2DQ;
  2022. insertInstr = instr->m_next;
  2023. pInstr = instr->m_prev;
  2024. doneLabel = IR::LabelInstr::New(Js::OpCode::Label, this->m_func);
  2025. mask1 = IR::RegOpnd::New(TyInt32, m_func);
  2026. mask2 = IR::RegOpnd::New(TyInt32, m_func);
  2027. // bound checks
  2028. // check if any value is potentially out of range (0x80000000 in output)
  2029. // PCMPEQD tmp, dst, X86_NEG_MASK (0x80000000)
  2030. // MOVMSKPS mask1, tmp
  2031. // CMP mask1, 0
  2032. // JNE $doneLabel
  2033. tmp = IR::RegOpnd::New(TySimd128I4, m_func);
  2034. tmp2 = IR::RegOpnd::New(TySimd128I4, m_func);
  2035. newInstr = IR::Instr::New(Js::OpCode::MOVAPS, tmp2, IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86NegMaskF4Addr(), TySimd128I4, m_func), m_func);
  2036. insertInstr->InsertBefore(newInstr);
  2037. Legalize(newInstr);
  2038. newInstr = IR::Instr::New(Js::OpCode::PCMPEQD, tmp, dst, tmp2, m_func);
  2039. insertInstr->InsertBefore(newInstr);
  2040. Legalize(newInstr);
  2041. insertInstr->InsertBefore(IR::Instr::New(Js::OpCode::MOVMSKPS, mask1, tmp, m_func));
  2042. newInstr = IR::Instr::New(Js::OpCode::CMP, m_func);
  2043. newInstr->SetSrc1(mask1);
  2044. newInstr->SetSrc2(IR::IntConstOpnd::New(0, TyInt32, m_func));
  2045. insertInstr->InsertBefore(newInstr);
  2046. insertInstr->InsertBefore(IR::BranchInstr::New(Js::OpCode::JEQ, doneLabel, m_func));
  2047. // we have potential out of bound. check bounds
  2048. // MOVAPS tmp2, X86_TWO_31_F4 (0x4f000000)
  2049. // CMPLEPS tmp, tmp2, src
  2050. // MOVMSKPS mask1, tmp
  2051. // MOVAPS tmp2, X86_NEG_TWO_31_F4 (0xcf000000)
  2052. // CMPLTPS tmp, src, tmp2
  2053. // MOVMSKPS mask2, tmp
  2054. // OR mask1, mask1, mask2
  2055. // check for NaNs
  2056. // CMPEQPS tmp, src
  2057. // MOVMSKPS mask2, tmp
  2058. // NOT mask2
  2059. // AND mask2, 0x00000F
  2060. // OR mask1, mask2
  2061. //
  2062. // CMP mask1, 0
  2063. // JEQ $doneLabel
  2064. newInstr = IR::Instr::New(Js::OpCode::MOVAPS, tmp2, IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86TwoPower31F4Addr(), TySimd128I4, m_func), m_func);
  2065. insertInstr->InsertBefore(newInstr);
  2066. Legalize(newInstr);
  2067. newInstr = IR::Instr::New(Js::OpCode::CMPLEPS, tmp, tmp2, src, m_func);
  2068. insertInstr->InsertBefore(newInstr);
  2069. Legalize(newInstr);
  2070. insertInstr->InsertBefore(IR::Instr::New(Js::OpCode::MOVMSKPS, mask1, tmp, m_func));
  2071. newInstr = IR::Instr::New(Js::OpCode::MOVAPS, tmp2, IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86NegTwoPower31F4Addr(), TySimd128I4, m_func), m_func);
  2072. insertInstr->InsertBefore(newInstr);
  2073. Legalize(newInstr);
  2074. newInstr = IR::Instr::New(Js::OpCode::CMPLTPS, tmp, src, tmp2, m_func);
  2075. insertInstr->InsertBefore(newInstr);
  2076. Legalize(newInstr);
  2077. insertInstr->InsertBefore(IR::Instr::New(Js::OpCode::MOVMSKPS, mask2, tmp, m_func));
  2078. insertInstr->InsertBefore(IR::Instr::New(Js::OpCode::OR, mask1, mask1, mask2, m_func));
  2079. #ifdef ENABLE_WASM_SIMD
  2080. if (m_func->GetJITFunctionBody()->IsWasmFunction())
  2081. {
  2082. newInstr = IR::Instr::New(Js::OpCode::CMPEQPS, tmp, src, src, m_func);
  2083. insertInstr->InsertBefore(newInstr);
  2084. Legalize(newInstr);
  2085. insertInstr->InsertBefore(IR::Instr::New(Js::OpCode::MOVMSKPS, mask2, tmp, m_func));
  2086. insertInstr->InsertBefore(IR::Instr::New(Js::OpCode::NOT, mask2, mask2, m_func));
  2087. newInstr = IR::Instr::New(Js::OpCode::AND, mask2, mask2, IR::IntConstOpnd::New(0x00000F, TyInt32, m_func), m_func);
  2088. insertInstr->InsertBefore(newInstr);
  2089. Legalize(newInstr);
  2090. insertInstr->InsertBefore(IR::Instr::New(Js::OpCode::OR, mask1, mask1, mask2, m_func));
  2091. }
  2092. #endif
  2093. newInstr = IR::Instr::New(Js::OpCode::CMP, m_func);
  2094. newInstr->SetSrc1(mask1);
  2095. newInstr->SetSrc2(IR::IntConstOpnd::New(0, TyInt32, m_func));
  2096. insertInstr->InsertBefore(newInstr);
  2097. insertInstr->InsertBefore(IR::BranchInstr::New(Js::OpCode::JEQ, doneLabel, m_func));
  2098. // throw range error
  2099. m_lowerer->GenerateRuntimeError(insertInstr, JSERR_ArgumentOutOfRange, IR::HelperOp_RuntimeRangeError);
  2100. insertInstr->InsertBefore(doneLabel);
  2101. return pInstr;
  2102. }
  2103. IR::Instr* LowererMD::Simd128LowerUint32x4FromFloat32x4(IR::Instr *instr)
  2104. {
  2105. IR::Opnd *dst, *src, *tmp, *tmp2, *two_31_f4_mask, *two_31_i4_mask, *mask;
  2106. IR::Instr *pInstr, *newInstr;
  2107. IR::LabelInstr *doneLabel, *throwLabel;
  2108. dst = instr->GetDst();
  2109. src = instr->GetSrc1();
  2110. Assert(dst != nullptr && src != nullptr && dst->IsSimd128() && src->IsSimd128());
  2111. doneLabel = IR::LabelInstr::New(Js::OpCode::Label, this->m_func);
  2112. throwLabel = IR::LabelInstr::New(Js::OpCode::Label, this->m_func, true);
  2113. pInstr = instr->m_prev;
  2114. mask = IR::RegOpnd::New(TyInt32, m_func);
  2115. two_31_f4_mask = IR::RegOpnd::New(TySimd128F4, m_func);
  2116. two_31_i4_mask = IR::RegOpnd::New(TySimd128I4, m_func);
  2117. tmp = IR::RegOpnd::New(TySimd128F4, m_func);
  2118. tmp2 = IR::RegOpnd::New(TySimd128F4, m_func);
  2119. // check for NaNs
  2120. // CMPEQPS tmp, src
  2121. // MOVMSKPS mask2, tmp
  2122. // AND mask2, 0x00000F
  2123. // JNE throw
  2124. #ifdef ENABLE_WASM_SIMD
  2125. if (m_func->GetJITFunctionBody()->IsWasmFunction())
  2126. {
  2127. newInstr = IR::Instr::New(Js::OpCode::CMPEQPS, tmp, src, src, m_func);
  2128. instr->InsertBefore(newInstr);
  2129. Legalize(newInstr);
  2130. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVMSKPS, mask, tmp, m_func));
  2131. newInstr = IR::Instr::New(Js::OpCode::CMP, m_func);
  2132. newInstr->SetSrc1(mask);
  2133. newInstr->SetSrc2(IR::IntConstOpnd::New(0x0000000F, TyInt32, m_func));
  2134. instr->InsertBefore(newInstr);
  2135. Legalize(newInstr);
  2136. instr->InsertBefore(IR::BranchInstr::New(Js::OpCode::JNE, throwLabel, m_func));
  2137. }
  2138. #endif
  2139. // any lanes <= -1.0 ?
  2140. // CMPLEPS tmp, src, [X86_ALL_FLOAT32_NEG_ONES]
  2141. // MOVMSKPS mask, tmp
  2142. // CMP mask, 0
  2143. // JNE $throwLabel
  2144. newInstr = IR::Instr::New(Js::OpCode::CMPLEPS, tmp, src, IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86AllNegOnesF4Addr(), TySimd128I4, m_func), m_func);
  2145. instr->InsertBefore(newInstr);
  2146. Legalize(newInstr);
  2147. newInstr = IR::Instr::New(Js::OpCode::MOVMSKPS, mask, tmp, m_func);
  2148. instr->InsertBefore(newInstr);
  2149. Legalize(newInstr);
  2150. newInstr = IR::Instr::New(Js::OpCode::CMP, m_func);
  2151. newInstr->SetSrc1(mask);
  2152. newInstr->SetSrc2(IR::IntConstOpnd::New(0, TyInt32, m_func));
  2153. instr->InsertBefore(newInstr);
  2154. Legalize(newInstr);
  2155. instr->InsertBefore(IR::BranchInstr::New(Js::OpCode::JNE, throwLabel, m_func));
  2156. // CVTTPS2DQ does a range check over signed range [-2^31, 2^31-1], so will fail to convert values >= 2^31.
  2157. // To fix this, subtract 2^31 from values >= 2^31, do CVTTPS2DQ, then add 2^31 back.
  2158. // MOVAPS two_31_f4_mask, [X86_TWO_31]
  2159. // CMPLEPS tmp2, two_31_mask, src
  2160. // ANDPS two_31_f4_mask, tmp2 // tmp has f32(2^31) for lanes >= 2^31, 0 otherwise
  2161. // SUBPS tmp2, two_31_f4_mask // subtract 2^31 from lanes >= 2^31, unchanged otherwise.
  2162. // CVTTPS2DQ dst, tmp2
  2163. newInstr = IR::Instr::New(Js::OpCode::MOVAPS, two_31_f4_mask, IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86TwoPower31F4Addr(), TySimd128F4, m_func), m_func);
  2164. instr->InsertBefore(newInstr);
  2165. Legalize(newInstr);
  2166. newInstr = IR::Instr::New(Js::OpCode::CMPLEPS, tmp2, two_31_f4_mask, src, m_func);
  2167. instr->InsertBefore(newInstr);
  2168. Legalize(newInstr);
  2169. newInstr = IR::Instr::New(Js::OpCode::ANDPS, two_31_f4_mask, two_31_f4_mask, tmp2, m_func);
  2170. instr->InsertBefore(newInstr);
  2171. Legalize(newInstr);
  2172. newInstr = IR::Instr::New(Js::OpCode::SUBPS, tmp2, src, two_31_f4_mask, m_func);
  2173. instr->InsertBefore(newInstr);
  2174. Legalize(newInstr);
  2175. newInstr = IR::Instr::New(Js::OpCode::CVTTPS2DQ, dst, tmp2, m_func);
  2176. instr->InsertBefore(newInstr);
  2177. Legalize(newInstr);
  2178. // check if any value is out of range (i.e. >= 2^31, meaning originally >= 2^32 before value adjustment)
  2179. // PCMPEQD tmp, dst, [X86_NEG_MASK]
  2180. // MOVMSKPS mask, tmp
  2181. // CMP mask, 0
  2182. // JNE $throwLabel
  2183. newInstr = IR::Instr::New(Js::OpCode::PCMPEQD, tmp, dst, IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86NegMaskF4Addr(), TySimd128I4, m_func), m_func);
  2184. instr->InsertBefore(newInstr);
  2185. Legalize(newInstr);
  2186. newInstr = IR::Instr::New(Js::OpCode::MOVMSKPS, mask, tmp, m_func);
  2187. instr->InsertBefore(newInstr);
  2188. Legalize(newInstr);
  2189. newInstr = IR::Instr::New(Js::OpCode::CMP, m_func);
  2190. newInstr->SetSrc1(mask);
  2191. newInstr->SetSrc2(IR::IntConstOpnd::New(0, TyInt32, m_func));
  2192. instr->InsertBefore(newInstr);
  2193. Legalize(newInstr);
  2194. instr->InsertBefore(IR::BranchInstr::New(Js::OpCode::JNE, throwLabel, m_func));
  2195. // we pass range checks
  2196. // add i4(2^31) values back to adjusted values.
  2197. // Use first bit from the 2^31 float mask (0x4f000...0 << 1)
  2198. // and AND with 2^31 int mask (0x8000..0) setting first bit to zero if lane hasn't been adjusted
  2199. // MOVAPS two_31_i4_mask, [X86_TWO_31_I4]
  2200. // PSLLD two_31_f4_mask, 1
  2201. // ANDPS two_31_i4_mask, two_31_f4_mask
  2202. // PADDD dst, dst, two_31_i4_mask
  2203. // JMP $doneLabel
  2204. newInstr = IR::Instr::New(Js::OpCode::MOVAPS, two_31_i4_mask, IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86TwoPower31I4Addr(), TySimd128I4, m_func), m_func);
  2205. instr->InsertBefore(newInstr);
  2206. Legalize(newInstr);
  2207. newInstr = IR::Instr::New(Js::OpCode::PSLLD, two_31_f4_mask, two_31_f4_mask, IR::IntConstOpnd::New(1, TyInt8, m_func), m_func);
  2208. instr->InsertBefore(newInstr);
  2209. Legalize(newInstr);
  2210. newInstr = IR::Instr::New(Js::OpCode::ANDPS, two_31_i4_mask, two_31_i4_mask, two_31_f4_mask, m_func);
  2211. instr->InsertBefore(newInstr);
  2212. Legalize(newInstr);
  2213. newInstr = IR::Instr::New(Js::OpCode::PADDD, dst, dst, two_31_i4_mask, m_func);
  2214. instr->InsertBefore(newInstr);
  2215. Legalize(newInstr);
  2216. instr->InsertBefore(IR::BranchInstr::New(Js::OpCode::JMP, doneLabel, m_func));
  2217. // throwLabel:
  2218. // Throw Range Error
  2219. instr->InsertBefore(throwLabel);
  2220. m_lowerer->GenerateRuntimeError(instr, JSERR_ArgumentOutOfRange, IR::HelperOp_RuntimeRangeError);
  2221. // doneLabe:
  2222. instr->InsertBefore(doneLabel);
  2223. instr->Remove();
  2224. return pInstr;
  2225. }
  2226. IR::Instr* LowererMD::Simd128LowerFloat32x4FromUint32x4(IR::Instr *instr)
  2227. {
  2228. IR::Opnd *dst, *src, *tmp, *zero;
  2229. IR::Instr *pInstr, *newInstr;
  2230. dst = instr->GetDst();
  2231. src = instr->GetSrc1();
  2232. Assert(dst != nullptr && src != nullptr && dst->IsSimd128() && src->IsSimd128());
  2233. pInstr = instr->m_prev;
  2234. zero = IR::RegOpnd::New(TySimd128I4, m_func);
  2235. tmp = IR::RegOpnd::New(TySimd128I4, m_func);
  2236. // find unsigned values above 2^31-1. Comparison is signed, so look for values < 0
  2237. // MOVAPS zero, [X86_ALL_ZEROS]
  2238. newInstr = IR::Instr::New(Js::OpCode::MOVAPS, zero, IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86AllZerosAddr(), TySimd128I4, m_func), m_func);
  2239. instr->InsertBefore(newInstr);
  2240. Legalize(newInstr);
  2241. // tmp = PCMPGTD zero, src
  2242. newInstr = IR::Instr::New(Js::OpCode::PCMPGTD, tmp, zero, src, m_func);
  2243. instr->InsertBefore(newInstr);
  2244. Legalize(newInstr);
  2245. // temp1 has f32(2^32) for unsigned values above 2^31, 0 otherwise
  2246. // ANDPS tmp, tmp, [X86_TWO_32_F4]
  2247. newInstr = IR::Instr::New(Js::OpCode::ANDPS, tmp, tmp, IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86TwoPower32F4Addr(), TySimd128F4, m_func), m_func);
  2248. instr->InsertBefore(newInstr);
  2249. Legalize(newInstr);
  2250. // convert
  2251. // dst = CVTDQ2PS src
  2252. newInstr = IR::Instr::New(Js::OpCode::CVTDQ2PS, dst, src, m_func);
  2253. instr->InsertBefore(newInstr);
  2254. Legalize(newInstr);
  2255. // Add f32(2^32) to negative values
  2256. // ADDPS dst, dst, tmp
  2257. newInstr = IR::Instr::New(Js::OpCode::ADDPS, dst, dst, tmp, m_func);
  2258. instr->InsertBefore(newInstr);
  2259. Legalize(newInstr);
  2260. instr->Remove();
  2261. return pInstr;
  2262. }
  2263. IR::Instr* LowererMD::Simd128AsmJsLowerLoadElem(IR::Instr *instr)
  2264. {
  2265. Assert(instr->m_opcode == Js::OpCode::Simd128_LdArr_I4 ||
  2266. instr->m_opcode == Js::OpCode::Simd128_LdArr_I8 ||
  2267. instr->m_opcode == Js::OpCode::Simd128_LdArr_I16 ||
  2268. instr->m_opcode == Js::OpCode::Simd128_LdArr_U4 ||
  2269. instr->m_opcode == Js::OpCode::Simd128_LdArr_U8 ||
  2270. instr->m_opcode == Js::OpCode::Simd128_LdArr_U16 ||
  2271. instr->m_opcode == Js::OpCode::Simd128_LdArr_F4 ||
  2272. //instr->m_opcode == Js::OpCode::Simd128_LdArr_D2 ||
  2273. instr->m_opcode == Js::OpCode::Simd128_LdArrConst_I4 ||
  2274. instr->m_opcode == Js::OpCode::Simd128_LdArrConst_I8 ||
  2275. instr->m_opcode == Js::OpCode::Simd128_LdArrConst_I16 ||
  2276. instr->m_opcode == Js::OpCode::Simd128_LdArrConst_U4 ||
  2277. instr->m_opcode == Js::OpCode::Simd128_LdArrConst_U8 ||
  2278. instr->m_opcode == Js::OpCode::Simd128_LdArrConst_U16 ||
  2279. instr->m_opcode == Js::OpCode::Simd128_LdArrConst_F4
  2280. //instr->m_opcode == Js::OpCode::Simd128_LdArrConst_D2
  2281. );
  2282. IR::Instr * instrPrev = instr->m_prev;
  2283. IR::RegOpnd * indexOpnd = instr->GetSrc1()->AsIndirOpnd()->GetIndexOpnd();
  2284. IR::RegOpnd * baseOpnd = instr->GetSrc1()->AsIndirOpnd()->GetBaseOpnd();
  2285. IR::Opnd * dst = instr->GetDst();
  2286. IR::Opnd * src1 = instr->GetSrc1();
  2287. IR::Opnd * src2 = instr->GetSrc2();
  2288. ValueType arrType = baseOpnd->GetValueType();
  2289. uint8 dataWidth = instr->dataWidth;
  2290. // Type-specialized.
  2291. Assert(dst->IsSimd128() && src1->IsSimd128() && src2->GetType() == TyUint32);
  2292. IR::Instr * done;
  2293. if (indexOpnd || (((uint32)src1->AsIndirOpnd()->GetOffset() + dataWidth) > 0x1000000 /* 16 MB */))
  2294. {
  2295. uint32 bpe = Simd128GetTypedArrBytesPerElem(arrType);
  2296. // bound check and helper
  2297. done = this->lowererMDArch.LowerAsmJsLdElemHelper(instr, true, bpe != dataWidth);
  2298. }
  2299. else
  2300. {
  2301. // Reaching here means:
  2302. // We have a constant index, and either
  2303. // (1) constant heap or (2) variable heap with constant index < 16MB.
  2304. // Case (1) requires static bound check. Case (2) means we are always in bound.
  2305. // this can happen in cases where globopt props a constant access which was not known at bytecodegen time or when heap is non-constant
  2306. if (src2->IsIntConstOpnd() && ((uint32)src1->AsIndirOpnd()->GetOffset() + dataWidth > src2->AsIntConstOpnd()->AsUint32()))
  2307. {
  2308. m_lowerer->GenerateRuntimeError(instr, JSERR_ArgumentOutOfRange, IR::HelperOp_RuntimeRangeError);
  2309. instr->Remove();
  2310. return instrPrev;
  2311. }
  2312. done = instr;
  2313. }
  2314. return Simd128ConvertToLoad(dst, src1, dataWidth, instr);
  2315. }
  2316. IR::Instr* LowererMD::Simd128LowerLoadElem(IR::Instr *instr)
  2317. {
  2318. Assert(!m_func->GetJITFunctionBody()->IsAsmJsMode());
  2319. Assert(
  2320. instr->m_opcode == Js::OpCode::Simd128_LdArr_I4 ||
  2321. instr->m_opcode == Js::OpCode::Simd128_LdArr_I8 ||
  2322. instr->m_opcode == Js::OpCode::Simd128_LdArr_I16 ||
  2323. instr->m_opcode == Js::OpCode::Simd128_LdArr_U4 ||
  2324. instr->m_opcode == Js::OpCode::Simd128_LdArr_U8 ||
  2325. instr->m_opcode == Js::OpCode::Simd128_LdArr_U16 ||
  2326. instr->m_opcode == Js::OpCode::Simd128_LdArr_F4
  2327. );
  2328. IR::Opnd * src = instr->GetSrc1();
  2329. IR::RegOpnd * indexOpnd =src->AsIndirOpnd()->GetIndexOpnd();
  2330. IR::Opnd * dst = instr->GetDst();
  2331. ValueType arrType = src->AsIndirOpnd()->GetBaseOpnd()->GetValueType();
  2332. // If we type-specialized, then array is a definite typed-array.
  2333. Assert(arrType.IsObject() && arrType.IsTypedArray());
  2334. Simd128GenerateUpperBoundCheck(indexOpnd, src->AsIndirOpnd(), arrType, instr);
  2335. Simd128LoadHeadSegment(src->AsIndirOpnd(), arrType, instr);
  2336. return Simd128ConvertToLoad(dst, src, instr->dataWidth, instr, m_lowerer->GetArrayIndirScale(arrType) /* scale factor */);
  2337. }
  2338. IR::Instr *
  2339. LowererMD::Simd128ConvertToLoad(IR::Opnd *dst, IR::Opnd *src, uint8 dataWidth, IR::Instr* instr, byte scaleFactor /* = 0*/)
  2340. {
  2341. IR::Instr *newInstr = nullptr;
  2342. IR::Instr * instrPrev = instr->m_prev;
  2343. // Type-specialized.
  2344. Assert(dst && dst->IsSimd128());
  2345. Assert(src->IsIndirOpnd());
  2346. if (scaleFactor > 0)
  2347. {
  2348. // needed only for non-Asmjs code
  2349. Assert(!m_func->GetJITFunctionBody()->IsAsmJsMode());
  2350. src->AsIndirOpnd()->SetScale(scaleFactor);
  2351. }
  2352. switch (dataWidth)
  2353. {
  2354. case 16:
  2355. // MOVUPS dst, src1([arrayBuffer + indexOpnd])
  2356. newInstr = IR::Instr::New(LowererMDArch::GetAssignOp(src->GetType()), dst, src, instr->m_func);
  2357. instr->InsertBefore(newInstr);
  2358. Legalize(newInstr);
  2359. break;
  2360. case 12:
  2361. {
  2362. IR::RegOpnd *temp = IR::RegOpnd::New(src->GetType(), instr->m_func);
  2363. // MOVSD dst, src1([arrayBuffer + indexOpnd])
  2364. newInstr = IR::Instr::New(Js::OpCode::MOVSD, dst, src, instr->m_func);
  2365. instr->InsertBefore(newInstr);
  2366. Legalize(newInstr);
  2367. // MOVSS temp, src1([arrayBuffer + indexOpnd + 8])
  2368. newInstr = IR::Instr::New(Js::OpCode::MOVSS, temp, src, instr->m_func);
  2369. instr->InsertBefore(newInstr);
  2370. newInstr->GetSrc1()->AsIndirOpnd()->SetOffset(src->AsIndirOpnd()->GetOffset() + 8, true);
  2371. Legalize(newInstr);
  2372. // PSLLDQ temp, 0x08
  2373. instr->InsertBefore(IR::Instr::New(Js::OpCode::PSLLDQ, temp, temp, IR::IntConstOpnd::New(8, TyInt8, instr->m_func, true), instr->m_func));
  2374. // ORPS dst, temp
  2375. newInstr = IR::Instr::New(Js::OpCode::ORPS, dst, dst, temp, instr->m_func);
  2376. instr->InsertBefore(newInstr);
  2377. Legalize(newInstr);
  2378. break;
  2379. }
  2380. case 8:
  2381. // MOVSD dst, src1([arrayBuffer + indexOpnd])
  2382. newInstr = IR::Instr::New(Js::OpCode::MOVSD, dst, src, instr->m_func);
  2383. instr->InsertBefore(newInstr);
  2384. Legalize(newInstr);
  2385. break;
  2386. case 4:
  2387. // MOVSS dst, src1([arrayBuffer + indexOpnd])
  2388. newInstr = IR::Instr::New(Js::OpCode::MOVSS, dst, src, instr->m_func);
  2389. instr->InsertBefore(newInstr);
  2390. Legalize(newInstr);
  2391. break;
  2392. default:
  2393. Assume(UNREACHED);
  2394. }
  2395. instr->Remove();
  2396. return instrPrev;
  2397. }
  2398. IR::Instr*
  2399. LowererMD::Simd128AsmJsLowerStoreElem(IR::Instr *instr)
  2400. {
  2401. Assert(
  2402. instr->m_opcode == Js::OpCode::Simd128_StArr_I4 ||
  2403. instr->m_opcode == Js::OpCode::Simd128_StArr_I8 ||
  2404. instr->m_opcode == Js::OpCode::Simd128_StArr_I16 ||
  2405. instr->m_opcode == Js::OpCode::Simd128_StArr_U4 ||
  2406. instr->m_opcode == Js::OpCode::Simd128_StArr_U8 ||
  2407. instr->m_opcode == Js::OpCode::Simd128_StArr_U16 ||
  2408. instr->m_opcode == Js::OpCode::Simd128_StArr_F4 ||
  2409. //instr->m_opcode == Js::OpCode::Simd128_StArr_D2 ||
  2410. instr->m_opcode == Js::OpCode::Simd128_StArrConst_I4 ||
  2411. instr->m_opcode == Js::OpCode::Simd128_StArrConst_I8 ||
  2412. instr->m_opcode == Js::OpCode::Simd128_StArrConst_I16 ||
  2413. instr->m_opcode == Js::OpCode::Simd128_StArrConst_U4 ||
  2414. instr->m_opcode == Js::OpCode::Simd128_StArrConst_U8 ||
  2415. instr->m_opcode == Js::OpCode::Simd128_StArrConst_U16 ||
  2416. instr->m_opcode == Js::OpCode::Simd128_StArrConst_U4 ||
  2417. instr->m_opcode == Js::OpCode::Simd128_StArrConst_F4
  2418. //instr->m_opcode == Js::OpCode::Simd128_StArrConst_D2
  2419. );
  2420. IR::Instr * instrPrev = instr->m_prev;
  2421. IR::RegOpnd * indexOpnd = instr->GetDst()->AsIndirOpnd()->GetIndexOpnd();
  2422. IR::RegOpnd * baseOpnd = instr->GetDst()->AsIndirOpnd()->GetBaseOpnd();
  2423. IR::Opnd * dst = instr->GetDst();
  2424. IR::Opnd * src1 = instr->GetSrc1();
  2425. IR::Opnd * src2 = instr->GetSrc2();
  2426. ValueType arrType = baseOpnd->GetValueType();
  2427. uint8 dataWidth = instr->dataWidth;
  2428. // Type-specialized.
  2429. Assert(dst->IsSimd128() && src1->IsSimd128() && src2->GetType() == TyUint32);
  2430. IR::Instr * done;
  2431. if (indexOpnd || ((uint32)dst->AsIndirOpnd()->GetOffset() + dataWidth > 0x1000000))
  2432. {
  2433. // CMP indexOpnd, src2(arrSize)
  2434. // JA $helper
  2435. // JMP $store
  2436. // $helper:
  2437. // Throw RangeError
  2438. // JMP $done
  2439. // $store:
  2440. // MOV dst([arrayBuffer + indexOpnd]), src1
  2441. // $done:
  2442. uint32 bpe = Simd128GetTypedArrBytesPerElem(arrType);
  2443. done = this->lowererMDArch.LowerAsmJsStElemHelper(instr, true, bpe != dataWidth);
  2444. }
  2445. else
  2446. {
  2447. // we might have a constant index if globopt propped a constant store. we can ahead of time check if it is in-bounds
  2448. if (src2->IsIntConstOpnd() && ((uint32)dst->AsIndirOpnd()->GetOffset() + dataWidth > src2->AsIntConstOpnd()->AsUint32()))
  2449. {
  2450. m_lowerer->GenerateRuntimeError(instr, JSERR_ArgumentOutOfRange, IR::HelperOp_RuntimeRangeError);
  2451. instr->Remove();
  2452. return instrPrev;
  2453. }
  2454. done = instr;
  2455. }
  2456. return Simd128ConvertToStore(dst, src1, dataWidth, instr);
  2457. }
  2458. IR::Instr*
  2459. LowererMD::Simd128LowerStoreElem(IR::Instr *instr)
  2460. {
  2461. Assert(!m_func->GetJITFunctionBody()->IsAsmJsMode());
  2462. Assert(
  2463. instr->m_opcode == Js::OpCode::Simd128_StArr_I4 ||
  2464. instr->m_opcode == Js::OpCode::Simd128_StArr_I8 ||
  2465. instr->m_opcode == Js::OpCode::Simd128_StArr_I16 ||
  2466. instr->m_opcode == Js::OpCode::Simd128_StArr_U4 ||
  2467. instr->m_opcode == Js::OpCode::Simd128_StArr_U8 ||
  2468. instr->m_opcode == Js::OpCode::Simd128_StArr_U16 ||
  2469. instr->m_opcode == Js::OpCode::Simd128_StArr_F4
  2470. );
  2471. IR::Opnd * dst = instr->GetDst();
  2472. IR::RegOpnd * indexOpnd = dst->AsIndirOpnd()->GetIndexOpnd();
  2473. IR::Opnd * src1 = instr->GetSrc1();
  2474. uint8 dataWidth = instr->dataWidth;
  2475. ValueType arrType = dst->AsIndirOpnd()->GetBaseOpnd()->GetValueType();
  2476. // If we type-specialized, then array is a definite type-array.
  2477. Assert(arrType.IsObject() && arrType.IsTypedArray());
  2478. Simd128GenerateUpperBoundCheck(indexOpnd, dst->AsIndirOpnd(), arrType, instr);
  2479. Simd128LoadHeadSegment(dst->AsIndirOpnd(), arrType, instr);
  2480. return Simd128ConvertToStore(dst, src1, dataWidth, instr, m_lowerer->GetArrayIndirScale(arrType) /*scale factor*/);
  2481. }
  2482. IR::Instr *
  2483. LowererMD::Simd128ConvertToStore(IR::Opnd *dst, IR::Opnd *src1, uint8 dataWidth, IR::Instr* instr, byte scaleFactor /* = 0 */)
  2484. {
  2485. IR::Instr * instrPrev = instr->m_prev;
  2486. Assert(src1 && src1->IsSimd128());
  2487. Assert(dst->IsIndirOpnd());
  2488. if (scaleFactor > 0)
  2489. {
  2490. // needed only for non-Asmjs code
  2491. Assert(!m_func->GetJITFunctionBody()->IsAsmJsMode());
  2492. dst->AsIndirOpnd()->SetScale(scaleFactor);
  2493. }
  2494. switch (dataWidth)
  2495. {
  2496. case 16:
  2497. // MOVUPS dst([arrayBuffer + indexOpnd]), src1
  2498. instr->InsertBefore(IR::Instr::New(LowererMDArch::GetAssignOp(src1->GetType()), dst, src1, instr->m_func));
  2499. break;
  2500. case 12:
  2501. {
  2502. IR::RegOpnd *temp = IR::RegOpnd::New(src1->GetType(), instr->m_func);
  2503. IR::Instr *movss;
  2504. // MOVAPS temp, src
  2505. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVAPS, temp, src1, instr->m_func));
  2506. // MOVSD dst([arrayBuffer + indexOpnd]), temp
  2507. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVSD, dst, temp, instr->m_func));
  2508. // PSRLDQ temp, 0x08
  2509. instr->InsertBefore(IR::Instr::New(Js::OpCode::PSRLDQ, temp, temp, IR::IntConstOpnd::New(8, TyInt8, m_func, true), instr->m_func));
  2510. // MOVSS dst([arrayBuffer + indexOpnd + 8]), temp
  2511. movss = IR::Instr::New(Js::OpCode::MOVSS, dst, temp, instr->m_func);
  2512. instr->InsertBefore(movss);
  2513. movss->GetDst()->AsIndirOpnd()->SetOffset(dst->AsIndirOpnd()->GetOffset() + 8, true);
  2514. break;
  2515. }
  2516. case 8:
  2517. // MOVSD dst([arrayBuffer + indexOpnd]), src1
  2518. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVSD, dst, src1, instr->m_func));
  2519. break;
  2520. case 4:
  2521. // MOVSS dst([arrayBuffer + indexOpnd]), src1
  2522. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVSS, dst, src1, instr->m_func));
  2523. break;
  2524. default:;
  2525. Assume(UNREACHED);
  2526. }
  2527. instr->Remove();
  2528. return instrPrev;
  2529. }
  2530. void
  2531. LowererMD::Simd128GenerateUpperBoundCheck(IR::RegOpnd *indexOpnd, IR::IndirOpnd *indirOpnd, ValueType arrType, IR::Instr *instr)
  2532. {
  2533. Assert(!m_func->GetJITFunctionBody()->IsAsmJsMode());
  2534. IR::ArrayRegOpnd *arrayRegOpnd = indirOpnd->GetBaseOpnd()->AsArrayRegOpnd();
  2535. IR::Opnd* headSegmentLengthOpnd;
  2536. if (arrayRegOpnd->EliminatedUpperBoundCheck())
  2537. {
  2538. // already eliminated or extracted by globOpt (OptArraySrc). Nothing to do.
  2539. return;
  2540. }
  2541. if (arrayRegOpnd->HeadSegmentLengthSym())
  2542. {
  2543. headSegmentLengthOpnd = IR::RegOpnd::New(arrayRegOpnd->HeadSegmentLengthSym(), TyUint32, m_func);
  2544. }
  2545. else
  2546. {
  2547. // (headSegmentLength = [base + offset(length)])
  2548. int lengthOffset;
  2549. lengthOffset = m_lowerer->GetArrayOffsetOfLength(arrType);
  2550. headSegmentLengthOpnd = IR::IndirOpnd::New(arrayRegOpnd, lengthOffset, TyUint32, m_func);
  2551. }
  2552. IR::LabelInstr * skipLabel = Lowerer::InsertLabel(false, instr);
  2553. int32 elemCount = Lowerer::SimdGetElementCountFromBytes(arrayRegOpnd->GetValueType(), instr->dataWidth);
  2554. if (indexOpnd)
  2555. {
  2556. // MOV tmp, elemCount
  2557. // ADD tmp, index
  2558. // CMP tmp, Length -- upper bound check
  2559. // JBE $storeLabel
  2560. // Throw RuntimeError
  2561. // skipLabel:
  2562. IR::RegOpnd *tmp = IR::RegOpnd::New(indexOpnd->GetType(), m_func);
  2563. IR::IntConstOpnd *elemCountOpnd = IR::IntConstOpnd::New(elemCount, TyInt8, m_func, true);
  2564. m_lowerer->InsertMove(tmp, elemCountOpnd, skipLabel);
  2565. Lowerer::InsertAdd(false, tmp, tmp, indexOpnd, skipLabel);
  2566. m_lowerer->InsertCompareBranch(tmp, headSegmentLengthOpnd, Js::OpCode::BrLe_A, true, skipLabel, skipLabel);
  2567. }
  2568. else
  2569. {
  2570. // CMP Length, (offset + elemCount)
  2571. // JA $storeLabel
  2572. int32 offset = indirOpnd->GetOffset();
  2573. int32 index = offset + elemCount;
  2574. m_lowerer->InsertCompareBranch(headSegmentLengthOpnd, IR::IntConstOpnd::New(index, TyInt32, m_func, true), Js::OpCode::BrLe_A, true, skipLabel, skipLabel);
  2575. }
  2576. m_lowerer->GenerateRuntimeError(skipLabel, JSERR_ArgumentOutOfRange, IR::HelperOp_RuntimeRangeError);
  2577. return;
  2578. }
  2579. void
  2580. LowererMD::Simd128LoadHeadSegment(IR::IndirOpnd *indirOpnd, ValueType arrType, IR::Instr *instr)
  2581. {
  2582. // For non-asm.js we check if headSeg symbol exists, else load it.
  2583. IR::ArrayRegOpnd *arrayRegOpnd = indirOpnd->GetBaseOpnd()->AsArrayRegOpnd();
  2584. IR::RegOpnd *headSegmentOpnd;
  2585. if (arrayRegOpnd->HeadSegmentSym())
  2586. {
  2587. headSegmentOpnd = IR::RegOpnd::New(arrayRegOpnd->HeadSegmentSym(), TyMachPtr, m_func);
  2588. }
  2589. else
  2590. {
  2591. // MOV headSegment, [base + offset(head)]
  2592. int32 headOffset = m_lowerer->GetArrayOffsetOfHeadSegment(arrType);
  2593. IR::IndirOpnd * newIndirOpnd = IR::IndirOpnd::New(arrayRegOpnd, headOffset, TyMachPtr, this->m_func);
  2594. headSegmentOpnd = IR::RegOpnd::New(TyMachPtr, this->m_func);
  2595. m_lowerer->InsertMove(headSegmentOpnd, newIndirOpnd, instr);
  2596. }
  2597. // change base to be the head segment instead of the array object
  2598. indirOpnd->SetBaseOpnd(headSegmentOpnd);
  2599. }
  2600. // Builds args list <dst, src1, src2, src3 ..>
  2601. SList<IR::Opnd*> * LowererMD::Simd128GetExtendedArgs(IR::Instr *instr)
  2602. {
  2603. SList<IR::Opnd*> * args = JitAnew(m_lowerer->m_alloc, SList<IR::Opnd*>, m_lowerer->m_alloc);
  2604. IR::Instr *pInstr = instr;
  2605. IR::Opnd *dst, *src1, *src2;
  2606. dst = src1 = src2 = nullptr;
  2607. if (pInstr->GetDst())
  2608. {
  2609. dst = pInstr->UnlinkDst();
  2610. }
  2611. src1 = pInstr->UnlinkSrc1();
  2612. Assert(src1->GetStackSym()->IsSingleDef());
  2613. pInstr = src1->GetStackSym()->GetInstrDef();
  2614. while (pInstr && pInstr->m_opcode == Js::OpCode::ExtendArg_A)
  2615. {
  2616. Assert(pInstr->GetSrc1());
  2617. src1 = pInstr->GetSrc1()->Copy(this->m_func);
  2618. if (src1->IsRegOpnd())
  2619. {
  2620. this->m_lowerer->addToLiveOnBackEdgeSyms->Set(src1->AsRegOpnd()->m_sym->m_id);
  2621. }
  2622. args->Push(src1);
  2623. if (pInstr->GetSrc2())
  2624. {
  2625. src2 = pInstr->GetSrc2();
  2626. Assert(src2->GetStackSym()->IsSingleDef());
  2627. pInstr = src2->GetStackSym()->GetInstrDef();
  2628. }
  2629. else
  2630. {
  2631. pInstr = nullptr;
  2632. }
  2633. }
  2634. args->Push(dst);
  2635. Assert(args->Count() > 3);
  2636. return args;
  2637. }
  2638. IR::Opnd*
  2639. LowererMD::EnregisterBoolConst(IR::Instr* instr, IR::Opnd *opnd, IRType type)
  2640. {
  2641. if (opnd->IsIntConstOpnd() || opnd->IsInt64ConstOpnd())
  2642. {
  2643. bool isSet = opnd->GetImmediateValue(instr->m_func) != 0;
  2644. IR::RegOpnd *tempReg = IR::RegOpnd::New(type, m_func);
  2645. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOV, tempReg, IR::IntConstOpnd::New(isSet ? -1 : 0, type, m_func, true), m_func));
  2646. return tempReg;
  2647. }
  2648. IRType origType = opnd->GetType();
  2649. IR::RegOpnd *tempReg = IR::RegOpnd::New(origType, m_func);
  2650. IR::Instr* cmovInstr = IR::Instr::New(Js::OpCode::MOV, tempReg, IR::IntConstOpnd::New(0, origType, m_func, true), m_func);
  2651. instr->InsertBefore(cmovInstr);
  2652. Legalize(cmovInstr);
  2653. cmovInstr = IR::Instr::New(Js::OpCode::SUB, tempReg, tempReg, opnd->UseWithNewType(origType, m_func), m_func);
  2654. instr->InsertBefore(cmovInstr);
  2655. Legalize(cmovInstr);
  2656. cmovInstr = IR::Instr::New(Js::OpCode::CMOVS, tempReg, tempReg, IR::IntConstOpnd::New(-1, origType, m_func, true), m_func);
  2657. instr->InsertBefore(cmovInstr);
  2658. Legalize(cmovInstr);
  2659. return tempReg->UseWithNewType(type, m_func);
  2660. }
  2661. IR::Opnd*
  2662. LowererMD::EnregisterIntConst(IR::Instr* instr, IR::Opnd *constOpnd, IRType type /* = TyInt32*/)
  2663. {
  2664. IRType constType = constOpnd->GetType();
  2665. if (!IRType_IsNativeInt(constType))
  2666. {
  2667. // not int opnd, nothing to do
  2668. return constOpnd;
  2669. }
  2670. Assert(type == TyInt32 || type == TyInt16 || type == TyInt8);
  2671. Assert(constType == TyInt32 || constType == TyInt16 || constType == TyInt8);
  2672. if (constOpnd->IsRegOpnd())
  2673. {
  2674. // already a register, just cast
  2675. constOpnd->SetType(type);
  2676. return constOpnd;
  2677. }
  2678. // en-register
  2679. IR::RegOpnd *tempReg = IR::RegOpnd::New(type, m_func);
  2680. // MOV tempReg, constOpnd
  2681. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOV, tempReg, constOpnd, m_func));
  2682. return tempReg;
  2683. }
  2684. void LowererMD::Simd128InitOpcodeMap()
  2685. {
  2686. m_simd128OpCodesMap = JitAnewArrayZ(m_lowerer->m_alloc, Js::OpCode, Js::Simd128OpcodeCount());
  2687. // All simd ops should be contiguous for this mapping to work
  2688. Assert(Js::OpCode::Simd128_End + (Js::OpCode) 1 == Js::OpCode::Simd128_Start_Extend);
  2689. //SET_SIMDOPCODE(Simd128_FromFloat64x2_I4 , CVTTPD2DQ);
  2690. //SET_SIMDOPCODE(Simd128_FromFloat64x2Bits_I4 , MOVAPS);
  2691. SET_SIMDOPCODE(Simd128_FromFloat32x4Bits_I4 , MOVAPS);
  2692. SET_SIMDOPCODE(Simd128_FromInt16x8Bits_I4 , MOVAPS);
  2693. SET_SIMDOPCODE(Simd128_FromInt8x16Bits_I4 , MOVAPS);
  2694. SET_SIMDOPCODE(Simd128_FromUint32x4Bits_I4 , MOVAPS);
  2695. SET_SIMDOPCODE(Simd128_FromUint16x8Bits_I4 , MOVAPS);
  2696. SET_SIMDOPCODE(Simd128_FromUint8x16Bits_I4 , MOVAPS);
  2697. SET_SIMDOPCODE(Simd128_Add_I4 , PADDD);
  2698. SET_SIMDOPCODE(Simd128_Sub_I4 , PSUBD);
  2699. SET_SIMDOPCODE(Simd128_Lt_I4 , PCMPGTD);
  2700. SET_SIMDOPCODE(Simd128_Gt_I4 , PCMPGTD);
  2701. SET_SIMDOPCODE(Simd128_Eq_I4 , PCMPEQD);
  2702. SET_SIMDOPCODE(Simd128_And_I4 , PAND);
  2703. SET_SIMDOPCODE(Simd128_Or_I4 , POR);
  2704. SET_SIMDOPCODE(Simd128_Xor_I4 , PXOR);
  2705. SET_SIMDOPCODE(Simd128_Not_I4 , XORPS);
  2706. SET_SIMDOPCODE(Simd128_FromFloat32x4Bits_I8 , MOVAPS);
  2707. SET_SIMDOPCODE(Simd128_FromInt32x4Bits_I8 , MOVAPS);
  2708. SET_SIMDOPCODE(Simd128_FromInt8x16Bits_I8 , MOVAPS);
  2709. SET_SIMDOPCODE(Simd128_FromUint32x4Bits_I8 , MOVAPS);
  2710. SET_SIMDOPCODE(Simd128_FromUint16x8Bits_I8 , MOVAPS);
  2711. SET_SIMDOPCODE(Simd128_FromUint8x16Bits_I8 , MOVAPS);
  2712. SET_SIMDOPCODE(Simd128_Or_I16 , POR);
  2713. SET_SIMDOPCODE(Simd128_Xor_I16 , PXOR);
  2714. SET_SIMDOPCODE(Simd128_Not_I16 , XORPS);
  2715. SET_SIMDOPCODE(Simd128_And_I16 , PAND);
  2716. SET_SIMDOPCODE(Simd128_Add_I16 , PADDB);
  2717. SET_SIMDOPCODE(Simd128_Sub_I16 , PSUBB);
  2718. SET_SIMDOPCODE(Simd128_Lt_I16 , PCMPGTB);
  2719. SET_SIMDOPCODE(Simd128_Gt_I16 , PCMPGTB);
  2720. SET_SIMDOPCODE(Simd128_Eq_I16 , PCMPEQB);
  2721. SET_SIMDOPCODE(Simd128_FromFloat32x4Bits_I16, MOVAPS);
  2722. SET_SIMDOPCODE(Simd128_FromInt32x4Bits_I16 , MOVAPS);
  2723. SET_SIMDOPCODE(Simd128_FromInt16x8Bits_I16 , MOVAPS);
  2724. SET_SIMDOPCODE(Simd128_FromUint32x4Bits_I16 , MOVAPS);
  2725. SET_SIMDOPCODE(Simd128_FromUint16x8Bits_I16 , MOVAPS);
  2726. SET_SIMDOPCODE(Simd128_FromUint8x16Bits_I16 , MOVAPS);
  2727. SET_SIMDOPCODE(Simd128_FromFloat32x4Bits_U4 , MOVAPS);
  2728. SET_SIMDOPCODE(Simd128_FromInt32x4Bits_U4 , MOVAPS);
  2729. SET_SIMDOPCODE(Simd128_FromInt16x8Bits_U4 , MOVAPS);
  2730. SET_SIMDOPCODE(Simd128_FromInt8x16Bits_U4 , MOVAPS);
  2731. SET_SIMDOPCODE(Simd128_FromUint16x8Bits_U4 , MOVAPS);
  2732. SET_SIMDOPCODE(Simd128_FromUint8x16Bits_U4 , MOVAPS);
  2733. SET_SIMDOPCODE(Simd128_FromFloat32x4Bits_U8 , MOVAPS);
  2734. SET_SIMDOPCODE(Simd128_FromInt32x4Bits_U8 , MOVAPS);
  2735. SET_SIMDOPCODE(Simd128_FromInt16x8Bits_U8 , MOVAPS);
  2736. SET_SIMDOPCODE(Simd128_FromInt8x16Bits_U8 , MOVAPS);
  2737. SET_SIMDOPCODE(Simd128_FromUint32x4Bits_U8 , MOVAPS);
  2738. SET_SIMDOPCODE(Simd128_FromUint8x16Bits_U8 , MOVAPS);
  2739. SET_SIMDOPCODE(Simd128_FromFloat32x4Bits_U16 , MOVAPS);
  2740. SET_SIMDOPCODE(Simd128_FromInt32x4Bits_U16 , MOVAPS);
  2741. SET_SIMDOPCODE(Simd128_FromInt16x8Bits_U16 , MOVAPS);
  2742. SET_SIMDOPCODE(Simd128_FromInt8x16Bits_U16 , MOVAPS);
  2743. SET_SIMDOPCODE(Simd128_FromUint32x4Bits_U16 , MOVAPS);
  2744. SET_SIMDOPCODE(Simd128_FromUint16x8Bits_U16 , MOVAPS);
  2745. //SET_SIMDOPCODE(Simd128_FromFloat64x2_F4 , CVTPD2PS);
  2746. //SET_SIMDOPCODE(Simd128_FromFloat64x2Bits_F4 , MOVAPS);
  2747. SET_SIMDOPCODE(Simd128_FromInt32x4_F4 , CVTDQ2PS);
  2748. SET_SIMDOPCODE(Simd128_FromInt32x4Bits_F4 , MOVAPS);
  2749. SET_SIMDOPCODE(Simd128_FromInt16x8Bits_F4 , MOVAPS);
  2750. SET_SIMDOPCODE(Simd128_FromInt8x16Bits_F4 , MOVAPS);
  2751. SET_SIMDOPCODE(Simd128_FromUint32x4Bits_F4 , MOVAPS);
  2752. SET_SIMDOPCODE(Simd128_FromUint16x8Bits_F4 , MOVAPS);
  2753. SET_SIMDOPCODE(Simd128_FromUint8x16Bits_F4 , MOVAPS);
  2754. SET_SIMDOPCODE(Simd128_Abs_F4 , ANDPS);
  2755. SET_SIMDOPCODE(Simd128_Neg_F4 , XORPS);
  2756. SET_SIMDOPCODE(Simd128_Add_F4 , ADDPS);
  2757. SET_SIMDOPCODE(Simd128_Sub_F4 , SUBPS);
  2758. SET_SIMDOPCODE(Simd128_Mul_F4 , MULPS);
  2759. SET_SIMDOPCODE(Simd128_Div_F4 , DIVPS);
  2760. SET_SIMDOPCODE(Simd128_Sqrt_F4 , SQRTPS);
  2761. SET_SIMDOPCODE(Simd128_Lt_F4 , CMPLTPS); // CMPLTPS
  2762. SET_SIMDOPCODE(Simd128_LtEq_F4 , CMPLEPS); // CMPLEPS
  2763. SET_SIMDOPCODE(Simd128_Eq_F4 , CMPEQPS); // CMPEQPS
  2764. SET_SIMDOPCODE(Simd128_Neq_F4 , CMPNEQPS); // CMPNEQPS
  2765. SET_SIMDOPCODE(Simd128_Gt_F4 , CMPLTPS); // CMPLTPS (swap srcs)
  2766. SET_SIMDOPCODE(Simd128_GtEq_F4 , CMPLEPS); // CMPLEPS (swap srcs)
  2767. SET_SIMDOPCODE(Simd128_Neg_D2 , XORPS);
  2768. SET_SIMDOPCODE(Simd128_Add_D2 , ADDPD);
  2769. SET_SIMDOPCODE(Simd128_Abs_D2 , ANDPD);
  2770. SET_SIMDOPCODE(Simd128_Sub_D2 , SUBPD);
  2771. SET_SIMDOPCODE(Simd128_Mul_D2 , MULPD);
  2772. SET_SIMDOPCODE(Simd128_Div_D2 , DIVPD);
  2773. SET_SIMDOPCODE(Simd128_Min_D2 , MINPD);
  2774. SET_SIMDOPCODE(Simd128_Max_D2 , MAXPD);
  2775. SET_SIMDOPCODE(Simd128_Sqrt_D2 , SQRTPD);
  2776. SET_SIMDOPCODE(Simd128_Lt_D2 , CMPLTPD); // CMPLTPD
  2777. SET_SIMDOPCODE(Simd128_LtEq_D2 , CMPLEPD); // CMPLEPD
  2778. SET_SIMDOPCODE(Simd128_Eq_D2 , CMPEQPD); // CMPEQPD
  2779. SET_SIMDOPCODE(Simd128_Neq_D2 , CMPNEQPD); // CMPNEQPD
  2780. SET_SIMDOPCODE(Simd128_Gt_D2 , CMPLTPD); // CMPLTPD (swap srcs)
  2781. SET_SIMDOPCODE(Simd128_GtEq_D2 , CMPLEPD); // CMPLEPD (swap srcs)
  2782. #if 0
  2783. SET_SIMDOPCODE(Simd128_FromFloat32x4_D2, CVTPS2PD);
  2784. SET_SIMDOPCODE(Simd128_FromFloat32x4Bits_D2, MOVAPS);
  2785. SET_SIMDOPCODE(Simd128_FromInt32x4_D2, CVTDQ2PD);
  2786. SET_SIMDOPCODE(Simd128_FromInt32x4Bits_D2, MOVAPS);
  2787. #endif // 0
  2788. SET_SIMDOPCODE(Simd128_And_I8 , PAND);
  2789. SET_SIMDOPCODE(Simd128_Or_I8 , POR);
  2790. SET_SIMDOPCODE(Simd128_Xor_I8 , XORPS);
  2791. SET_SIMDOPCODE(Simd128_Not_I8 , XORPS);
  2792. SET_SIMDOPCODE(Simd128_Add_I8 , PADDW);
  2793. SET_SIMDOPCODE(Simd128_Sub_I8 , PSUBW);
  2794. SET_SIMDOPCODE(Simd128_Mul_I8 , PMULLW);
  2795. SET_SIMDOPCODE(Simd128_Eq_I8 , PCMPEQW);
  2796. SET_SIMDOPCODE(Simd128_Lt_I8 , PCMPGTW); // (swap srcs)
  2797. SET_SIMDOPCODE(Simd128_Gt_I8 , PCMPGTW);
  2798. SET_SIMDOPCODE(Simd128_AddSaturate_I8 , PADDSW);
  2799. SET_SIMDOPCODE(Simd128_SubSaturate_I8 , PSUBSW);
  2800. SET_SIMDOPCODE(Simd128_AddSaturate_I16 , PADDSB);
  2801. SET_SIMDOPCODE(Simd128_SubSaturate_I16 , PSUBSB);
  2802. SET_SIMDOPCODE(Simd128_And_U4 , PAND);
  2803. SET_SIMDOPCODE(Simd128_Or_U4 , POR);
  2804. SET_SIMDOPCODE(Simd128_Xor_U4 , XORPS);
  2805. SET_SIMDOPCODE(Simd128_Not_U4 , XORPS);
  2806. SET_SIMDOPCODE(Simd128_Add_U4 , PADDD);
  2807. SET_SIMDOPCODE(Simd128_Sub_U4 , PSUBD);
  2808. SET_SIMDOPCODE(Simd128_Eq_U4 , PCMPEQD); // same as int32x4.equal
  2809. SET_SIMDOPCODE(Simd128_And_U8 , PAND);
  2810. SET_SIMDOPCODE(Simd128_Or_U8 , POR);
  2811. SET_SIMDOPCODE(Simd128_Xor_U8 , XORPS);
  2812. SET_SIMDOPCODE(Simd128_Not_U8 , XORPS);
  2813. SET_SIMDOPCODE(Simd128_Add_U8 , PADDW);
  2814. SET_SIMDOPCODE(Simd128_Sub_U8 , PSUBW);
  2815. SET_SIMDOPCODE(Simd128_Mul_U8 , PMULLW);
  2816. SET_SIMDOPCODE(Simd128_Eq_U8 , PCMPEQW); // same as int16X8.equal
  2817. SET_SIMDOPCODE(Simd128_AddSaturate_U8 , PADDUSW);
  2818. SET_SIMDOPCODE(Simd128_SubSaturate_U8 , PSUBUSW);
  2819. SET_SIMDOPCODE(Simd128_And_U16 , PAND);
  2820. SET_SIMDOPCODE(Simd128_Or_U16 , POR);
  2821. SET_SIMDOPCODE(Simd128_Xor_U16 , XORPS);
  2822. SET_SIMDOPCODE(Simd128_Not_U16 , XORPS);
  2823. SET_SIMDOPCODE(Simd128_Add_U16 , PADDB);
  2824. SET_SIMDOPCODE(Simd128_Sub_U16 , PSUBB);
  2825. SET_SIMDOPCODE(Simd128_Eq_U16 , PCMPEQB); // same as int8x16.equal
  2826. SET_SIMDOPCODE(Simd128_AddSaturate_U16 , PADDUSB);
  2827. SET_SIMDOPCODE(Simd128_SubSaturate_U16 , PSUBUSB);
  2828. SET_SIMDOPCODE(Simd128_And_B4 , PAND);
  2829. SET_SIMDOPCODE(Simd128_Or_B4 , POR);
  2830. SET_SIMDOPCODE(Simd128_Xor_B4 , XORPS);
  2831. SET_SIMDOPCODE(Simd128_Not_B4 , XORPS);
  2832. SET_SIMDOPCODE(Simd128_And_B8 , PAND);
  2833. SET_SIMDOPCODE(Simd128_Or_B8 , POR);
  2834. SET_SIMDOPCODE(Simd128_Xor_B8 , XORPS);
  2835. SET_SIMDOPCODE(Simd128_Not_B8 , XORPS);
  2836. SET_SIMDOPCODE(Simd128_And_B16 , PAND);
  2837. SET_SIMDOPCODE(Simd128_Or_B16 , POR);
  2838. SET_SIMDOPCODE(Simd128_Xor_B16 , XORPS);
  2839. SET_SIMDOPCODE(Simd128_Not_B16 , XORPS);
  2840. SET_SIMDOPCODE(Simd128_Add_I2 , PADDQ);
  2841. SET_SIMDOPCODE(Simd128_Sub_I2 , PSUBQ);
  2842. }
  2843. #undef SIMD_SETOPCODE
  2844. #undef SIMD_GETOPCODE
  2845. void LowererMD::CheckShuffleLanes_4(uint8 lanes[], uint8 lanesSrc[], uint *fromSrc1, uint *fromSrc2)
  2846. {
  2847. Assert(lanes);
  2848. Assert(lanesSrc);
  2849. Assert(fromSrc1 && fromSrc2);
  2850. *fromSrc1 = 0;
  2851. *fromSrc2 = 0;
  2852. for (uint i = 0; i < 4; i++)
  2853. {
  2854. if (lanes[i] >= 0 && lanes[i] < 4)
  2855. {
  2856. (*fromSrc1)++;
  2857. lanesSrc[i] = 1;
  2858. }
  2859. else if (lanes[i] >= 4 && lanes[i] < 8)
  2860. {
  2861. (*fromSrc2)++;
  2862. lanesSrc[i] = 2;
  2863. }
  2864. else
  2865. {
  2866. Assert(UNREACHED);
  2867. }
  2868. }
  2869. }
  2870. void LowererMD::InsertShufps(uint8 lanes[], IR::Opnd *dst, IR::Opnd *src1, IR::Opnd *src2, IR::Instr *instr)
  2871. {
  2872. int8 shufMask;
  2873. uint8 normLanes[4];
  2874. IR::RegOpnd * tmp = IR::RegOpnd::New(TySimd128I4, m_func);
  2875. for (uint i = 0; i < 4; i++)
  2876. {
  2877. normLanes[i] = (lanes[i] >= 4) ? (lanes[i] - 4) : lanes[i];
  2878. }
  2879. shufMask = (int8)((normLanes[3] << 6) | (normLanes[2] << 4) | (normLanes[1] << 2) | normLanes[0]);
  2880. // ToDo: Move this to legalization code
  2881. if (dst->IsEqual(src1))
  2882. {
  2883. // instruction already legal
  2884. instr->InsertBefore(IR::Instr::New(Js::OpCode::SHUFPS, dst, src2, IR::IntConstOpnd::New((IntConstType)shufMask, TyInt8, m_func, true), m_func));
  2885. }
  2886. else if (dst->IsEqual(src2))
  2887. {
  2888. // MOVAPS tmp, dst
  2889. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVAPS, tmp, dst, m_func));
  2890. // MOVAPS dst, src1
  2891. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVAPS, dst, src1, m_func));
  2892. // SHUF dst, tmp, imm8
  2893. instr->InsertBefore(IR::Instr::New(Js::OpCode::SHUFPS, dst, tmp, IR::IntConstOpnd::New((IntConstType)shufMask, TyInt8, m_func, true), m_func));
  2894. }
  2895. else
  2896. {
  2897. // MOVAPS dst, src1
  2898. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVAPS, dst, src1, m_func));
  2899. // SHUF dst, src2, imm8
  2900. instr->InsertBefore(IR::Instr::New(Js::OpCode::SHUFPS, dst, src2, IR::IntConstOpnd::New((IntConstType)shufMask, TyInt8, m_func, true), m_func));
  2901. }
  2902. }
  2903. BYTE LowererMD::Simd128GetTypedArrBytesPerElem(ValueType arrType)
  2904. {
  2905. return (1 << Lowerer::GetArrayIndirScale(arrType));
  2906. }
  2907. #endif