LowerMDSharedSimd128.cpp 129 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343
  1. //-------------------------------------------------------------------------------------------------------
  2. // Copyright (C) Microsoft Corporation and contributors. All rights reserved.
  3. // Licensed under the MIT license. See LICENSE.txt file in the project root for full license information.
  4. //-------------------------------------------------------------------------------------------------------
  5. #include "Backend.h"
  6. #define GET_SIMDOPCODE(irOpcode) m_simd128OpCodesMap[(uint32)(irOpcode - Js::OpCode::Simd128_Start)]
  7. #define SET_SIMDOPCODE(irOpcode, mdOpcode) \
  8. Assert((uint32)m_simd128OpCodesMap[(uint32)(Js::OpCode::irOpcode - Js::OpCode::Simd128_Start)] == 0);\
  9. Assert(Js::OpCode::mdOpcode > Js::OpCode::MDStart);\
  10. m_simd128OpCodesMap[(uint32)(Js::OpCode::irOpcode - Js::OpCode::Simd128_Start)] = Js::OpCode::mdOpcode;
  11. IR::Instr* LowererMD::Simd128Instruction(IR::Instr *instr)
  12. {
  13. // Currently only handles type-specialized/asm.js opcodes
  14. if (!instr->GetDst())
  15. {
  16. // SIMD ops always have DST in asmjs
  17. Assert(!instr->m_func->GetJnFunction()->GetIsAsmjsMode());
  18. // unused result. Do nothing.
  19. IR::Instr * pInstr = instr->m_prev;
  20. instr->Remove();
  21. return pInstr;
  22. }
  23. if (Simd128TryLowerMappedInstruction(instr))
  24. {
  25. return instr->m_prev;
  26. }
  27. return Simd128LowerUnMappedInstruction(instr);
  28. }
  29. bool LowererMD::Simd128TryLowerMappedInstruction(IR::Instr *instr)
  30. {
  31. bool legalize = true;
  32. Js::OpCode opcode = GET_SIMDOPCODE(instr->m_opcode);
  33. if ((uint32)opcode == 0)
  34. return false;
  35. Assert(instr->GetDst() && instr->GetDst()->IsRegOpnd() && instr->GetDst()->IsSimd128() || instr->GetDst()->GetType() == TyInt32);
  36. Assert(instr->GetSrc1() && instr->GetSrc1()->IsRegOpnd() && instr->GetSrc1()->IsSimd128());
  37. Assert(!instr->GetSrc2() || (((instr->GetSrc2()->IsRegOpnd() && instr->GetSrc2()->IsSimd128()) || (instr->GetSrc2()->IsIntConstOpnd() && instr->GetSrc2()->GetType() == TyInt8))));
  38. switch (instr->m_opcode)
  39. {
  40. case Js::OpCode::Simd128_Abs_F4:
  41. Assert(opcode == Js::OpCode::ANDPS);
  42. instr->SetSrc2(IR::MemRefOpnd::New((void*)&X86_ABS_MASK_F4, instr->GetSrc1()->GetType(), m_func));
  43. break;
  44. #if 0
  45. case Js::OpCode::Simd128_Abs_D2:
  46. Assert(opcode == Js::OpCode::ANDPD);
  47. instr->SetSrc2(IR::MemRefOpnd::New((void*)&X86_ABS_MASK_D2, instr->GetSrc1()->GetType(), m_func));
  48. break;
  49. #endif // 0
  50. case Js::OpCode::Simd128_Neg_F4:
  51. Assert(opcode == Js::OpCode::XORPS);
  52. instr->SetSrc2(IR::MemRefOpnd::New((void*)&X86_NEG_MASK_F4, instr->GetSrc1()->GetType(), m_func));
  53. break;
  54. #if 0
  55. case Js::OpCode::Simd128_Neg_D2:
  56. Assert(opcode == Js::OpCode::XORPS);
  57. instr->SetSrc2(IR::MemRefOpnd::New((void*)&X86_NEG_MASK_D2, instr->GetSrc1()->GetType(), m_func));
  58. break;
  59. #endif // 0
  60. case Js::OpCode::Simd128_Not_I4:
  61. case Js::OpCode::Simd128_Not_I16:
  62. case Js::OpCode::Simd128_Not_I8:
  63. case Js::OpCode::Simd128_Not_U4:
  64. case Js::OpCode::Simd128_Not_U8:
  65. case Js::OpCode::Simd128_Not_U16:
  66. case Js::OpCode::Simd128_Not_B4:
  67. case Js::OpCode::Simd128_Not_B8:
  68. case Js::OpCode::Simd128_Not_B16:
  69. Assert(opcode == Js::OpCode::XORPS);
  70. instr->SetSrc2(IR::MemRefOpnd::New((void*)&X86_ALL_NEG_ONES, instr->GetSrc1()->GetType(), m_func));
  71. break;
  72. case Js::OpCode::Simd128_Gt_F4:
  73. //case Js::OpCode::Simd128_Gt_D2:
  74. case Js::OpCode::Simd128_GtEq_F4:
  75. //case Js::OpCode::Simd128_GtEq_D2:
  76. case Js::OpCode::Simd128_Lt_I4:
  77. case Js::OpCode::Simd128_Lt_I8:
  78. case Js::OpCode::Simd128_Lt_I16:
  79. {
  80. Assert(opcode == Js::OpCode::CMPLTPS || opcode == Js::OpCode::CMPLTPD || opcode == Js::OpCode::CMPLEPS
  81. || opcode == Js::OpCode::CMPLEPD || opcode == Js::OpCode::PCMPGTD || opcode == Js::OpCode::PCMPGTB
  82. || opcode == Js::OpCode::PCMPGTW );
  83. // swap operands
  84. auto *src1 = instr->UnlinkSrc1();
  85. auto *src2 = instr->UnlinkSrc2();
  86. instr->SetSrc1(src2);
  87. instr->SetSrc2(src1);
  88. break;
  89. }
  90. }
  91. instr->m_opcode = opcode;
  92. if (legalize)
  93. {
  94. //MakeDstEquSrc1(instr);
  95. Legalize(instr);
  96. }
  97. return true;
  98. }
  99. IR::Instr* LowererMD::Simd128LowerUnMappedInstruction(IR::Instr *instr)
  100. {
  101. switch (instr->m_opcode)
  102. {
  103. case Js::OpCode::Simd128_LdC:
  104. return Simd128LoadConst(instr);
  105. case Js::OpCode::Simd128_FloatsToF4:
  106. case Js::OpCode::Simd128_IntsToI4:
  107. case Js::OpCode::Simd128_IntsToU4:
  108. case Js::OpCode::Simd128_IntsToB4:
  109. return Simd128LowerConstructor_4(instr);
  110. case Js::OpCode::Simd128_IntsToI8:
  111. case Js::OpCode::Simd128_IntsToU8:
  112. case Js::OpCode::Simd128_IntsToB8:
  113. return Simd128LowerConstructor_8(instr);
  114. case Js::OpCode::Simd128_IntsToI16:
  115. case Js::OpCode::Simd128_IntsToU16:
  116. case Js::OpCode::Simd128_IntsToB16:
  117. return Simd128LowerConstructor_16(instr);
  118. #if 0
  119. case Js::OpCode::Simd128_DoublesToD2:
  120. return Simd128LowerConstructor_2(instr);
  121. #endif // 0
  122. case Js::OpCode::Simd128_ExtractLane_I4:
  123. case Js::OpCode::Simd128_ExtractLane_I8:
  124. case Js::OpCode::Simd128_ExtractLane_I16:
  125. case Js::OpCode::Simd128_ExtractLane_U4:
  126. case Js::OpCode::Simd128_ExtractLane_U8:
  127. case Js::OpCode::Simd128_ExtractLane_U16:
  128. case Js::OpCode::Simd128_ExtractLane_B4:
  129. case Js::OpCode::Simd128_ExtractLane_B8:
  130. case Js::OpCode::Simd128_ExtractLane_B16:
  131. case Js::OpCode::Simd128_ExtractLane_F4:
  132. return Simd128LowerLdLane(instr);
  133. case Js::OpCode::Simd128_ReplaceLane_I4:
  134. case Js::OpCode::Simd128_ReplaceLane_F4:
  135. case Js::OpCode::Simd128_ReplaceLane_U4:
  136. case Js::OpCode::Simd128_ReplaceLane_B4:
  137. return SIMD128LowerReplaceLane_4(instr);
  138. case Js::OpCode::Simd128_ReplaceLane_I8:
  139. case Js::OpCode::Simd128_ReplaceLane_U8:
  140. case Js::OpCode::Simd128_ReplaceLane_B8:
  141. return SIMD128LowerReplaceLane_8(instr);
  142. case Js::OpCode::Simd128_ReplaceLane_I16:
  143. case Js::OpCode::Simd128_ReplaceLane_U16:
  144. case Js::OpCode::Simd128_ReplaceLane_B16:
  145. return SIMD128LowerReplaceLane_16(instr);
  146. case Js::OpCode::Simd128_Splat_F4:
  147. case Js::OpCode::Simd128_Splat_I4:
  148. //case Js::OpCode::Simd128_Splat_D2:
  149. case Js::OpCode::Simd128_Splat_I8:
  150. case Js::OpCode::Simd128_Splat_I16:
  151. case Js::OpCode::Simd128_Splat_U4:
  152. case Js::OpCode::Simd128_Splat_U8:
  153. case Js::OpCode::Simd128_Splat_U16:
  154. case Js::OpCode::Simd128_Splat_B4:
  155. case Js::OpCode::Simd128_Splat_B8:
  156. case Js::OpCode::Simd128_Splat_B16:
  157. return Simd128LowerSplat(instr);
  158. case Js::OpCode::Simd128_Rcp_F4:
  159. //case Js::OpCode::Simd128_Rcp_D2:
  160. return Simd128LowerRcp(instr);
  161. case Js::OpCode::Simd128_Sqrt_F4:
  162. //case Js::OpCode::Simd128_Sqrt_D2:
  163. return Simd128LowerSqrt(instr);
  164. case Js::OpCode::Simd128_RcpSqrt_F4:
  165. //case Js::OpCode::Simd128_RcpSqrt_D2:
  166. return Simd128LowerRcpSqrt(instr);
  167. case Js::OpCode::Simd128_Select_F4:
  168. case Js::OpCode::Simd128_Select_I4:
  169. //case Js::OpCode::Simd128_Select_D2:
  170. case Js::OpCode::Simd128_Select_I8:
  171. case Js::OpCode::Simd128_Select_I16:
  172. case Js::OpCode::Simd128_Select_U4:
  173. case Js::OpCode::Simd128_Select_U8:
  174. case Js::OpCode::Simd128_Select_U16:
  175. return Simd128LowerSelect(instr);
  176. case Js::OpCode::Simd128_Neg_I4:
  177. case Js::OpCode::Simd128_Neg_I8:
  178. case Js::OpCode::Simd128_Neg_I16:
  179. case Js::OpCode::Simd128_Neg_U4:
  180. case Js::OpCode::Simd128_Neg_U8:
  181. case Js::OpCode::Simd128_Neg_U16:
  182. return Simd128LowerNeg(instr);
  183. case Js::OpCode::Simd128_Mul_I4:
  184. case Js::OpCode::Simd128_Mul_U4:
  185. return Simd128LowerMulI4(instr);
  186. case Js::OpCode::Simd128_Mul_I16:
  187. case Js::OpCode::Simd128_Mul_U16:
  188. return Simd128LowerMulI16(instr);
  189. case Js::OpCode::Simd128_ShRtByScalar_I4:
  190. case Js::OpCode::Simd128_ShLtByScalar_I4:
  191. case Js::OpCode::Simd128_ShRtByScalar_I8:
  192. case Js::OpCode::Simd128_ShLtByScalar_I8:
  193. case Js::OpCode::Simd128_ShLtByScalar_I16:
  194. case Js::OpCode::Simd128_ShRtByScalar_I16:
  195. case Js::OpCode::Simd128_ShRtByScalar_U4:
  196. case Js::OpCode::Simd128_ShLtByScalar_U4:
  197. case Js::OpCode::Simd128_ShRtByScalar_U8:
  198. case Js::OpCode::Simd128_ShLtByScalar_U8:
  199. case Js::OpCode::Simd128_ShRtByScalar_U16:
  200. case Js::OpCode::Simd128_ShLtByScalar_U16:
  201. return Simd128LowerShift(instr);
  202. case Js::OpCode::Simd128_LdArr_I4:
  203. case Js::OpCode::Simd128_LdArr_I8:
  204. case Js::OpCode::Simd128_LdArr_I16:
  205. case Js::OpCode::Simd128_LdArr_U4:
  206. case Js::OpCode::Simd128_LdArr_U8:
  207. case Js::OpCode::Simd128_LdArr_U16:
  208. case Js::OpCode::Simd128_LdArr_F4:
  209. //case Js::OpCode::Simd128_LdArr_D2:
  210. case Js::OpCode::Simd128_LdArrConst_I4:
  211. case Js::OpCode::Simd128_LdArrConst_I8:
  212. case Js::OpCode::Simd128_LdArrConst_I16:
  213. case Js::OpCode::Simd128_LdArrConst_U4:
  214. case Js::OpCode::Simd128_LdArrConst_U8:
  215. case Js::OpCode::Simd128_LdArrConst_U16:
  216. case Js::OpCode::Simd128_LdArrConst_F4:
  217. //case Js::OpCode::Simd128_LdArrConst_D2:
  218. if (m_func->m_workItem->GetFunctionBody()->GetIsAsmjsMode())
  219. {
  220. // with bound checks
  221. return Simd128AsmJsLowerLoadElem(instr);
  222. }
  223. else
  224. {
  225. // non-AsmJs, boundChecks are extracted from instr
  226. return Simd128LowerLoadElem(instr);
  227. }
  228. case Js::OpCode::Simd128_StArr_I4:
  229. case Js::OpCode::Simd128_StArr_I8:
  230. case Js::OpCode::Simd128_StArr_I16:
  231. case Js::OpCode::Simd128_StArr_U4:
  232. case Js::OpCode::Simd128_StArr_U8:
  233. case Js::OpCode::Simd128_StArr_U16:
  234. case Js::OpCode::Simd128_StArr_F4:
  235. //case Js::OpCode::Simd128_StArr_D2:
  236. case Js::OpCode::Simd128_StArrConst_I4:
  237. case Js::OpCode::Simd128_StArrConst_I8:
  238. case Js::OpCode::Simd128_StArrConst_I16:
  239. case Js::OpCode::Simd128_StArrConst_U4:
  240. case Js::OpCode::Simd128_StArrConst_U8:
  241. case Js::OpCode::Simd128_StArrConst_U16:
  242. case Js::OpCode::Simd128_StArrConst_F4:
  243. //case Js::OpCode::Simd128_StArrConst_D2:
  244. if (m_func->m_workItem->GetFunctionBody()->GetIsAsmjsMode())
  245. {
  246. return Simd128AsmJsLowerStoreElem(instr);
  247. }
  248. else
  249. {
  250. return Simd128LowerStoreElem(instr);
  251. }
  252. case Js::OpCode::Simd128_Swizzle_U4:
  253. case Js::OpCode::Simd128_Swizzle_I4:
  254. case Js::OpCode::Simd128_Swizzle_F4:
  255. //case Js::OpCode::Simd128_Swizzle_D2:
  256. return Simd128LowerSwizzle_4(instr);
  257. case Js::OpCode::Simd128_Shuffle_U4:
  258. case Js::OpCode::Simd128_Shuffle_I4:
  259. case Js::OpCode::Simd128_Shuffle_F4:
  260. //case Js::OpCode::Simd128_Shuffle_D2:
  261. return Simd128LowerShuffle_4(instr);
  262. case Js::OpCode::Simd128_Swizzle_I8:
  263. case Js::OpCode::Simd128_Swizzle_I16:
  264. case Js::OpCode::Simd128_Swizzle_U8:
  265. case Js::OpCode::Simd128_Swizzle_U16:
  266. case Js::OpCode::Simd128_Shuffle_I8:
  267. case Js::OpCode::Simd128_Shuffle_I16:
  268. case Js::OpCode::Simd128_Shuffle_U8:
  269. case Js::OpCode::Simd128_Shuffle_U16:
  270. return Simd128LowerShuffle(instr);
  271. case Js::OpCode::Simd128_FromUint32x4_F4:
  272. return Simd128LowerFloat32x4FromUint32x4(instr);
  273. case Js::OpCode::Simd128_FromFloat32x4_I4:
  274. return Simd128LowerInt32x4FromFloat32x4(instr);
  275. case Js::OpCode::Simd128_FromFloat32x4_U4:
  276. return Simd128LowerUint32x4FromFloat32x4(instr);
  277. case Js::OpCode::Simd128_Neq_I4:
  278. case Js::OpCode::Simd128_Neq_I8:
  279. case Js::OpCode::Simd128_Neq_I16:
  280. case Js::OpCode::Simd128_Neq_U4:
  281. case Js::OpCode::Simd128_Neq_U8:
  282. case Js::OpCode::Simd128_Neq_U16:
  283. return Simd128LowerNotEqual(instr);
  284. case Js::OpCode::Simd128_Lt_U4:
  285. case Js::OpCode::Simd128_Lt_U8:
  286. case Js::OpCode::Simd128_Lt_U16:
  287. case Js::OpCode::Simd128_GtEq_U4:
  288. case Js::OpCode::Simd128_GtEq_U8:
  289. case Js::OpCode::Simd128_GtEq_U16:
  290. return Simd128LowerLessThan(instr);
  291. case Js::OpCode::Simd128_LtEq_I4:
  292. case Js::OpCode::Simd128_LtEq_I8:
  293. case Js::OpCode::Simd128_LtEq_I16:
  294. case Js::OpCode::Simd128_LtEq_U4:
  295. case Js::OpCode::Simd128_LtEq_U8:
  296. case Js::OpCode::Simd128_LtEq_U16:
  297. case Js::OpCode::Simd128_Gt_U4:
  298. case Js::OpCode::Simd128_Gt_U8:
  299. case Js::OpCode::Simd128_Gt_U16:
  300. return Simd128LowerLessThanOrEqual(instr);
  301. case Js::OpCode::Simd128_GtEq_I4:
  302. case Js::OpCode::Simd128_GtEq_I8:
  303. case Js::OpCode::Simd128_GtEq_I16:
  304. return Simd128LowerGreaterThanOrEqual(instr);
  305. case Js::OpCode::Simd128_Min_F4:
  306. case Js::OpCode::Simd128_Max_F4:
  307. return Simd128LowerMinMax_F4(instr);
  308. case Js::OpCode::Simd128_AnyTrue_B4:
  309. case Js::OpCode::Simd128_AnyTrue_B8:
  310. case Js::OpCode::Simd128_AnyTrue_B16:
  311. return Simd128LowerAnyTrue(instr);
  312. case Js::OpCode::Simd128_AllTrue_B4:
  313. case Js::OpCode::Simd128_AllTrue_B8:
  314. case Js::OpCode::Simd128_AllTrue_B16:
  315. return Simd128LowerAllTrue(instr);
  316. default:
  317. AssertMsg(UNREACHED, "Unsupported Simd128 instruction");
  318. }
  319. return nullptr;
  320. }
  321. IR::Instr* LowererMD::Simd128LoadConst(IR::Instr* instr)
  322. {
  323. Assert(instr->GetDst() && instr->m_opcode == Js::OpCode::Simd128_LdC);
  324. Assert(instr->GetDst()->IsSimd128());
  325. Assert(instr->GetSrc1()->IsSimd128());
  326. Assert(instr->GetSrc1()->IsSimd128ConstOpnd());
  327. Assert(instr->GetSrc2() == nullptr);
  328. AsmJsSIMDValue value = instr->GetSrc1()->AsSimd128ConstOpnd()->m_value;
  329. // MOVUPS dst, [const]
  330. AsmJsSIMDValue *pValue = NativeCodeDataNew(instr->m_func->GetNativeCodeDataAllocator(), AsmJsSIMDValue);
  331. pValue->SetValue(value);
  332. IR::Opnd * opnd = IR::MemRefOpnd::New((void *)pValue, instr->GetDst()->GetType(), instr->m_func);
  333. instr->ReplaceSrc1(opnd);
  334. instr->m_opcode = LowererMDArch::GetAssignOp(instr->GetDst()->GetType());
  335. Legalize(instr);
  336. return instr->m_prev;
  337. }
  338. IR::Instr* LowererMD::Simd128CanonicalizeToBools(IR::Instr* instr, const Js::OpCode &cmpOpcode, IR::Opnd& dstOpnd)
  339. {
  340. Assert(instr->m_opcode == Js::OpCode::Simd128_IntsToB4 || instr->m_opcode == Js::OpCode::Simd128_IntsToB8 || instr->m_opcode == Js::OpCode::Simd128_IntsToB16 ||
  341. instr->m_opcode == Js::OpCode::Simd128_ReplaceLane_B4 || instr->m_opcode == Js::OpCode::Simd128_ReplaceLane_B8 || instr->m_opcode == Js::OpCode::Simd128_ReplaceLane_B16);
  342. IR::Instr *pInstr;
  343. //dst = cmpOpcode dst, X86_ALL_ZEROS
  344. pInstr = IR::Instr::New(cmpOpcode, &dstOpnd, &dstOpnd, IR::MemRefOpnd::New((void*)&X86_ALL_ZEROS, TySimd128I4, m_func), m_func);
  345. instr->InsertBefore(pInstr);
  346. Legalize(pInstr);
  347. // dst = PANDN dst, X86_ALL_NEG_ONES
  348. pInstr = IR::Instr::New(Js::OpCode::PANDN, &dstOpnd, &dstOpnd, IR::MemRefOpnd::New((void*)&X86_ALL_NEG_ONES, TySimd128I4, m_func), m_func);
  349. instr->InsertBefore(pInstr);
  350. Legalize(pInstr);
  351. return instr;
  352. }
  353. IR::Instr* LowererMD::Simd128LowerConstructor_8(IR::Instr *instr)
  354. {
  355. IR::Opnd* dst = nullptr;
  356. IR::Opnd* srcs[8];
  357. //Simd128_IntsToI8/U8/B8
  358. Assert(instr->m_opcode == Js::OpCode::Simd128_IntsToI8 || instr->m_opcode == Js::OpCode::Simd128_IntsToU8 || instr->m_opcode == Js::OpCode::Simd128_IntsToB8);
  359. SList<IR::Opnd*> *args = Simd128GetExtendedArgs(instr);
  360. Assert(args->Count() == 9);
  361. dst = args->Pop();
  362. uint i = 0;
  363. while (!args->Empty() && i < 8)
  364. {
  365. srcs[i] = args->Pop();
  366. // src's might have been constant prop'ed. Enregister them if so.
  367. srcs[i] = EnregisterIntConst(instr, srcs[i], TyInt16);
  368. Assert(srcs[i]->GetType() == TyInt16 && srcs[i]->IsRegOpnd());
  369. // PINSRW dst, srcs[i], i
  370. instr->InsertBefore(IR::Instr::New(Js::OpCode::PINSRW, dst, srcs[i], IR::IntConstOpnd::New(i, TyInt8, m_func, true), m_func));
  371. i++;
  372. }
  373. if (instr->m_opcode == Js::OpCode::Simd128_IntsToB8)
  374. {
  375. instr = Simd128CanonicalizeToBools(instr, Js::OpCode::PCMPEQW, *dst);
  376. }
  377. IR::Instr* prevInstr;
  378. prevInstr = instr->m_prev;
  379. instr->Remove();
  380. return prevInstr;
  381. }
  382. IR::Instr* LowererMD::Simd128LowerConstructor_16(IR::Instr *instr)
  383. {
  384. IR::Opnd* dst = nullptr;
  385. IR::Opnd* srcs[16];
  386. //Simd128_IntsToI16/U16/B16
  387. Assert(instr->m_opcode == Js::OpCode::Simd128_IntsToU16 || instr->m_opcode == Js::OpCode::Simd128_IntsToI16 || instr->m_opcode == Js::OpCode::Simd128_IntsToB16);
  388. SList<IR::Opnd*> *args = Simd128GetExtendedArgs(instr);
  389. uint8 *tempSIMD = (uint8*)(instr->m_func->GetScriptContext()->GetThreadContext()->GetSimdTempArea());
  390. #if DBG
  391. // using only one SIMD temp
  392. intptr_t endAddrSIMD = (intptr_t)(tempSIMD + sizeof(X86SIMDValue));
  393. #endif
  394. void * address;
  395. IR::Instr * newInstr;
  396. Assert(args->Count() == 17);
  397. dst = args->Pop();
  398. uint i = 0;
  399. while (!args->Empty() && i < 16)
  400. {
  401. srcs[i] = args->Pop();
  402. // src's might have been constant prop'ed. Enregister them if so.
  403. srcs[i] = EnregisterIntConst(instr, srcs[i], TyInt8);
  404. Assert(srcs[i]->GetType() == TyInt8 && srcs[i]->IsRegOpnd());
  405. address = (void*)(tempSIMD + i);
  406. // check for buffer overrun
  407. Assert((intptr_t)address < endAddrSIMD);
  408. // MOV [temp + i], src[i] (TyInt8)
  409. newInstr = IR::Instr::New(Js::OpCode::MOV, IR::MemRefOpnd::New((void*)(tempSIMD + i), TyInt8, m_func), srcs[i], m_func);
  410. instr->InsertBefore(newInstr);
  411. Legalize(newInstr);
  412. i++;
  413. }
  414. // MOVUPS dst, [temp]
  415. newInstr = IR::Instr::New(Js::OpCode::MOVUPS, dst, IR::MemRefOpnd::New((void*)(tempSIMD), TySimd128U16, m_func), m_func);
  416. instr->InsertBefore(newInstr);
  417. Legalize(newInstr);
  418. if (instr->m_opcode == Js::OpCode::Simd128_IntsToB16)
  419. {
  420. instr = Simd128CanonicalizeToBools(instr, Js::OpCode::PCMPEQB, *dst);
  421. }
  422. IR::Instr* prevInstr;
  423. prevInstr = instr->m_prev;
  424. instr->Remove();
  425. return prevInstr;
  426. }
  427. IR::Instr* LowererMD::Simd128LowerConstructor_4(IR::Instr *instr)
  428. {
  429. IR::Opnd* dst = nullptr;
  430. IR::Opnd* src1 = nullptr;
  431. IR::Opnd* src2 = nullptr;
  432. IR::Opnd* src3 = nullptr;
  433. IR::Opnd* src4 = nullptr;
  434. IR::Instr* newInstr = nullptr;
  435. Assert(instr->m_opcode == Js::OpCode::Simd128_FloatsToF4 ||
  436. instr->m_opcode == Js::OpCode::Simd128_IntsToB4 ||
  437. instr->m_opcode == Js::OpCode::Simd128_IntsToI4 ||
  438. instr->m_opcode == Js::OpCode::Simd128_IntsToU4);
  439. // use MOVSS for both int32x4 and float32x4. MOVD zeroes upper bits.
  440. Js::OpCode movOpcode = Js::OpCode::MOVSS;
  441. Js::OpCode shiftOpcode = Js::OpCode::PSLLDQ;
  442. SList<IR::Opnd*> *args = Simd128GetExtendedArgs(instr);
  443. // The number of src opnds should be exact. If opnds are missing, they should be filled in by globopt during type-spec.
  444. Assert(args->Count() == 5);
  445. dst = args->Pop();
  446. src1 = args->Pop();
  447. src2 = args->Pop();
  448. src3 = args->Pop();
  449. src4 = args->Pop();
  450. if (instr->m_opcode == Js::OpCode::Simd128_FloatsToF4)
  451. {
  452. // We don't have f32 type-spec, so we type-spec to f64 and convert to f32 before use.
  453. if (src1->IsFloat64())
  454. {
  455. IR::RegOpnd *regOpnd32 = IR::RegOpnd::New(TyFloat32, this->m_func);
  456. // CVTSD2SS regOpnd32.f32, src.f64 -- Convert regOpnd from f64 to f32
  457. newInstr = IR::Instr::New(Js::OpCode::CVTSD2SS, regOpnd32, src1, this->m_func);
  458. instr->InsertBefore(newInstr);
  459. src1 = regOpnd32;
  460. }
  461. if (src2->IsFloat64())
  462. {
  463. IR::RegOpnd *regOpnd32 = IR::RegOpnd::New(TyFloat32, this->m_func);
  464. // CVTSD2SS regOpnd32.f32, src.f64 -- Convert regOpnd from f64 to f32
  465. newInstr = IR::Instr::New(Js::OpCode::CVTSD2SS, regOpnd32, src2, this->m_func);
  466. instr->InsertBefore(newInstr);
  467. src2 = regOpnd32;
  468. }
  469. if (src3->IsFloat64())
  470. {
  471. IR::RegOpnd *regOpnd32 = IR::RegOpnd::New(TyFloat32, this->m_func);
  472. // CVTSD2SS regOpnd32.f32, src.f64 -- Convert regOpnd from f64 to f32
  473. newInstr = IR::Instr::New(Js::OpCode::CVTSD2SS, regOpnd32, src3, this->m_func);
  474. instr->InsertBefore(newInstr);
  475. src3 = regOpnd32;
  476. }
  477. if (src4->IsFloat64())
  478. {
  479. IR::RegOpnd *regOpnd32 = IR::RegOpnd::New(TyFloat32, this->m_func);
  480. // CVTSD2SS regOpnd32.f32, src.f64 -- Convert regOpnd from f64 to f32
  481. newInstr = IR::Instr::New(Js::OpCode::CVTSD2SS, regOpnd32, src4, this->m_func);
  482. instr->InsertBefore(newInstr);
  483. src4 = regOpnd32;
  484. }
  485. Assert(src1->IsRegOpnd() && src1->GetType() == TyFloat32);
  486. Assert(src2->IsRegOpnd() && src2->GetType() == TyFloat32);
  487. Assert(src3->IsRegOpnd() && src3->GetType() == TyFloat32);
  488. Assert(src4->IsRegOpnd() && src4->GetType() == TyFloat32);
  489. // MOVSS dst, src4
  490. instr->InsertBefore(IR::Instr::New(movOpcode, dst, src4, m_func));
  491. // PSLLDQ dst, dst, 4
  492. instr->InsertBefore(IR::Instr::New(shiftOpcode, dst, dst, IR::IntConstOpnd::New(4, TyInt8, m_func, true), m_func));
  493. // MOVSS dst, src3
  494. instr->InsertBefore(IR::Instr::New(movOpcode, dst, src3, m_func));
  495. // PSLLDQ dst, 4
  496. instr->InsertBefore(IR::Instr::New(shiftOpcode, dst, dst, IR::IntConstOpnd::New(4, TyInt8, m_func, true), m_func));
  497. // MOVSS dst, src2
  498. instr->InsertBefore(IR::Instr::New(movOpcode, dst, src2, m_func));
  499. // PSLLDQ dst, 4
  500. instr->InsertBefore(IR::Instr::New(shiftOpcode, dst, dst, IR::IntConstOpnd::New(4, TyInt8, m_func, true), m_func));
  501. // MOVSS dst, src1
  502. instr->InsertBefore(IR::Instr::New(movOpcode, dst, src1, m_func));
  503. }
  504. else
  505. {
  506. //Simd128_IntsToI4/U4
  507. IR::RegOpnd *temp = IR::RegOpnd::New(TyFloat32, m_func);
  508. // src's might have been constant prop'ed. Enregister them if so.
  509. src4 = EnregisterIntConst(instr, src4);
  510. src3 = EnregisterIntConst(instr, src3);
  511. src2 = EnregisterIntConst(instr, src2);
  512. src1 = EnregisterIntConst(instr, src1);
  513. Assert(src1->GetType() == TyInt32 && src1->IsRegOpnd());
  514. Assert(src2->GetType() == TyInt32 && src2->IsRegOpnd());
  515. Assert(src3->GetType() == TyInt32 && src3->IsRegOpnd());
  516. Assert(src4->GetType() == TyInt32 && src4->IsRegOpnd());
  517. // MOVD t(TyFloat32), src4(TyInt32)
  518. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVD, temp, src4, m_func));
  519. // MOVSS dst, t
  520. instr->InsertBefore(IR::Instr::New(movOpcode, dst, temp, m_func));
  521. // PSLLDQ dst, dst, 4
  522. instr->InsertBefore(IR::Instr::New(shiftOpcode, dst, dst, IR::IntConstOpnd::New(TySize[TyInt32], TyInt8, m_func, true), m_func));
  523. // MOVD t(TyFloat32), sr34(TyInt32)
  524. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVD, temp, src3, m_func));
  525. // MOVSS dst, t
  526. instr->InsertBefore(IR::Instr::New(movOpcode, dst, temp, m_func));
  527. // PSLLDQ dst, dst, 4
  528. instr->InsertBefore(IR::Instr::New(shiftOpcode, dst, dst, IR::IntConstOpnd::New(TySize[TyInt32], TyInt8, m_func, true), m_func));
  529. // MOVD t(TyFloat32), src2(TyInt32)
  530. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVD, temp, src2, m_func));
  531. // MOVSS dst, t
  532. instr->InsertBefore(IR::Instr::New(movOpcode, dst, temp, m_func));
  533. // PSLLDQ dst, dst, 4
  534. instr->InsertBefore(IR::Instr::New(shiftOpcode, dst, dst, IR::IntConstOpnd::New(TySize[TyInt32], TyInt8, m_func, true), m_func));
  535. // MOVD t(TyFloat32), src1(TyInt32)
  536. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVD, temp, src1, m_func));
  537. // MOVSS dst, t
  538. instr->InsertBefore(IR::Instr::New(movOpcode, dst, temp, m_func));
  539. if (instr->m_opcode == Js::OpCode::Simd128_IntsToB4)
  540. {
  541. instr = Simd128CanonicalizeToBools(instr, Js::OpCode::PCMPEQD, *dst);
  542. }
  543. }
  544. IR::Instr* prevInstr;
  545. prevInstr = instr->m_prev;
  546. instr->Remove();
  547. return prevInstr;
  548. }
  549. #if 0
  550. IR::Instr *LowererMD::Simd128LowerConstructor_2(IR::Instr *instr)
  551. {
  552. IR::Opnd* dst = nullptr;
  553. IR::Opnd* src1 = nullptr;
  554. IR::Opnd* src2 = nullptr;
  555. Assert(instr->m_opcode == Js::OpCode::Simd128_DoublesToD2);
  556. dst = instr->GetDst();
  557. src1 = instr->GetSrc1();
  558. src2 = instr->GetSrc2();
  559. Assert(src1->IsRegOpnd() && src1->GetType() == TyFloat64);
  560. Assert(src2->IsRegOpnd() && src2->GetType() == TyFloat64);
  561. // MOVSD dst, src2
  562. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVSD, dst, src2, m_func));
  563. // PSLLDQ dst, dst, 8
  564. instr->InsertBefore(IR::Instr::New(Js::OpCode::PSLLDQ, dst, dst, IR::IntConstOpnd::New(TySize[TyFloat64], TyInt8, m_func, true), m_func));
  565. // MOVSD dst, src1
  566. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVSD, dst, src1, m_func));
  567. Assert(dst->IsRegOpnd() && dst->IsSimd128());
  568. IR::Instr* prevInstr;
  569. prevInstr = instr->m_prev;
  570. instr->Remove();
  571. return prevInstr;
  572. }
  573. #endif
  574. IR::Instr* LowererMD::Simd128LowerLdLane(IR::Instr *instr)
  575. {
  576. IR::Opnd* dst, *src1, *src2;
  577. Js::OpCode movOpcode = Js::OpCode::MOVSS;
  578. uint laneWidth = 0, laneIndex = 0, shamt = 0, mask = 0;
  579. IRType laneType = TyInt32;
  580. dst = instr->GetDst();
  581. src1 = instr->GetSrc1();
  582. src2 = instr->GetSrc2();
  583. Assert(dst && dst->IsRegOpnd() && (dst->GetType() == TyFloat32 || dst->GetType() == TyInt32 || dst->GetType() == TyUint32 || dst->GetType() == TyFloat64));
  584. Assert(src1 && src1->IsRegOpnd() && src1->IsSimd128());
  585. Assert(src2 && src2->IsIntConstOpnd());
  586. laneIndex = (uint)src2->AsIntConstOpnd()->AsUint32();
  587. laneWidth = 4;
  588. switch (instr->m_opcode)
  589. {
  590. case Js::OpCode::Simd128_ExtractLane_F4:
  591. movOpcode = Js::OpCode::MOVSS;
  592. Assert(laneIndex < 4);
  593. break;
  594. case Js::OpCode::Simd128_ExtractLane_I8:
  595. case Js::OpCode::Simd128_ExtractLane_U8:
  596. case Js::OpCode::Simd128_ExtractLane_B8:
  597. movOpcode = Js::OpCode::MOVD;
  598. Assert(laneIndex < 8);
  599. shamt = (laneIndex % 2) * 16;
  600. laneIndex = laneIndex / 2;
  601. laneType = TyInt16;
  602. mask = 0x0000ffff;
  603. break;
  604. case Js::OpCode::Simd128_ExtractLane_I16:
  605. case Js::OpCode::Simd128_ExtractLane_U16:
  606. case Js::OpCode::Simd128_ExtractLane_B16:
  607. movOpcode = Js::OpCode::MOVD;
  608. Assert(laneIndex < 16);
  609. shamt = (laneIndex % 4) * 8;
  610. laneIndex = laneIndex / 4;
  611. laneType = TyInt8;
  612. mask = 0x000000ff;
  613. break;
  614. case Js::OpCode::Simd128_ExtractLane_U4:
  615. case Js::OpCode::Simd128_ExtractLane_I4:
  616. case Js::OpCode::Simd128_ExtractLane_B4:
  617. movOpcode = Js::OpCode::MOVD;
  618. Assert(laneIndex < 4);
  619. break;
  620. default:
  621. Assert(UNREACHED);
  622. }
  623. {
  624. IR::Opnd* tmp = src1;
  625. if (laneIndex != 0)
  626. {
  627. // tmp = PSRLDQ src1, shamt
  628. tmp = IR::RegOpnd::New(src1->GetType(), m_func);
  629. IR::Instr *shiftInstr = IR::Instr::New(Js::OpCode::PSRLDQ, tmp, src1, IR::IntConstOpnd::New(laneWidth * laneIndex, TyInt8, m_func, true), m_func);
  630. instr->InsertBefore(shiftInstr);
  631. Legalize(shiftInstr);
  632. }
  633. // MOVSS/MOVSD/MOVD dst, tmp
  634. instr->InsertBefore(IR::Instr::New(movOpcode, dst, tmp, m_func));
  635. }
  636. // dst has the 4-byte lane
  637. if (instr->m_opcode == Js::OpCode::Simd128_ExtractLane_I8 || instr->m_opcode == Js::OpCode::Simd128_ExtractLane_U8 || instr->m_opcode == Js::OpCode::Simd128_ExtractLane_B8 ||
  638. instr->m_opcode == Js::OpCode::Simd128_ExtractLane_U16|| instr->m_opcode == Js::OpCode::Simd128_ExtractLane_I16|| instr->m_opcode == Js::OpCode::Simd128_ExtractLane_B16)
  639. {
  640. // extract the 1/2 bytes sublane
  641. IR::Instr *newInstr = nullptr;
  642. if (shamt != 0)
  643. {
  644. // SHR dst, dst, shamt
  645. newInstr = IR::Instr::New(Js::OpCode::SHR, dst, dst, IR::IntConstOpnd::New((IntConstType)shamt, TyInt8, m_func), m_func);
  646. instr->InsertBefore(newInstr);
  647. Legalize(newInstr);
  648. }
  649. Assert(laneType == TyInt8 || laneType == TyInt16);
  650. // zero or sign-extend upper bits
  651. if (instr->m_opcode == Js::OpCode::Simd128_ExtractLane_I8 || instr->m_opcode == Js::OpCode::Simd128_ExtractLane_I16)
  652. {
  653. if (laneType == TyInt8)
  654. {
  655. IR::RegOpnd * tmp = IR::RegOpnd::New(TyInt8, m_func);
  656. newInstr = IR::Instr::New(Js::OpCode::MOV, tmp, dst, m_func);
  657. instr->InsertBefore(newInstr);
  658. Legalize(newInstr);
  659. newInstr = IR::Instr::New(Js::OpCode::MOVSX, dst, tmp, m_func);
  660. }
  661. else
  662. {
  663. newInstr = IR::Instr::New(Js::OpCode::MOVSXW, dst, dst->UseWithNewType(laneType, m_func), m_func);
  664. }
  665. }
  666. else
  667. {
  668. newInstr = IR::Instr::New(Js::OpCode::AND, dst, dst, IR::IntConstOpnd::New(mask, TyInt32, m_func), m_func);
  669. }
  670. instr->InsertBefore(newInstr);
  671. Legalize(newInstr);
  672. }
  673. if (instr->m_opcode == Js::OpCode::Simd128_ExtractLane_B4 || instr->m_opcode == Js::OpCode::Simd128_ExtractLane_B8 ||
  674. instr->m_opcode == Js::OpCode::Simd128_ExtractLane_B16)
  675. {
  676. IR::Instr* pInstr = nullptr;
  677. IR::RegOpnd* tmp = IR::RegOpnd::New(TyInt8, m_func);
  678. // cmp dst, -1
  679. pInstr = IR::Instr::New(Js::OpCode::CMP, m_func);
  680. pInstr->SetSrc1(dst->UseWithNewType(laneType, m_func));
  681. pInstr->SetSrc2(IR::IntConstOpnd::New(-1, laneType, m_func, true));
  682. instr->InsertBefore(pInstr);
  683. Legalize(pInstr);
  684. // mov tmp(TyInt8), dst
  685. pInstr = IR::Instr::New(Js::OpCode::MOV, tmp, dst, m_func);
  686. instr->InsertBefore(pInstr);
  687. Legalize(pInstr);
  688. // sete tmp(TyInt8)
  689. pInstr = IR::Instr::New(Js::OpCode::SETE, tmp, tmp, m_func);
  690. instr->InsertBefore(pInstr);
  691. Legalize(pInstr);
  692. // movsx dst, tmp(TyInt8)
  693. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVSX, dst, tmp, m_func));
  694. }
  695. IR::Instr* prevInstr = instr->m_prev;
  696. instr->Remove();
  697. return prevInstr;
  698. }
  699. IR::Instr* LowererMD::Simd128LowerSplat(IR::Instr *instr)
  700. {
  701. Js::OpCode shufOpCode = Js::OpCode::SHUFPS, movOpCode = Js::OpCode::MOVSS;
  702. IR::Opnd *dst, *src1;
  703. IR::Instr *pInstr = nullptr;
  704. dst = instr->GetDst();
  705. src1 = instr->GetSrc1();
  706. Assert(dst && dst->IsRegOpnd() && dst->IsSimd128());
  707. Assert(src1 && src1->IsRegOpnd() && (src1->GetType() == TyFloat32 || src1->GetType() == TyInt32 || src1->GetType() == TyFloat64 ||
  708. src1->GetType() == TyInt16 || src1->GetType() == TyInt8 || src1->GetType() == TyUint16 ||
  709. src1->GetType() == TyUint8 || src1->GetType() == TyUint32));
  710. Assert(!instr->GetSrc2());
  711. IR::Opnd* tempTruncate = nullptr;
  712. bool bSkip = false;
  713. IR::LabelInstr *labelZero = IR::LabelInstr::New(Js::OpCode::Label, m_func);
  714. IR::LabelInstr *labelDone = IR::LabelInstr::New(Js::OpCode::Label, m_func);
  715. switch (instr->m_opcode)
  716. {
  717. case Js::OpCode::Simd128_Splat_F4:
  718. shufOpCode = Js::OpCode::SHUFPS;
  719. movOpCode = Js::OpCode::MOVSS;
  720. break;
  721. case Js::OpCode::Simd128_Splat_I4:
  722. case Js::OpCode::Simd128_Splat_U4:
  723. shufOpCode = Js::OpCode::PSHUFD;
  724. movOpCode = Js::OpCode::MOVD;
  725. break;
  726. #if 0
  727. case Js::OpCode::Simd128_Splat_D2:
  728. shufOpCode = Js::OpCode::SHUFPD;
  729. movOpCode = Js::OpCode::MOVSD;
  730. break;
  731. #endif // 0
  732. case Js::OpCode::Simd128_Splat_I8:
  733. case Js::OpCode::Simd128_Splat_U8:
  734. // MOV tempTruncate(bx), src1: truncate the value to 16bit int
  735. // MOVD dst, tempTruncate(bx)
  736. // PUNPCKLWD dst, dst
  737. // PSHUFD dst, dst, 0
  738. tempTruncate = EnregisterIntConst(instr, src1, TyInt16);
  739. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVD, dst, tempTruncate, m_func));
  740. instr->InsertBefore(IR::Instr::New(Js::OpCode::PUNPCKLWD, dst, dst, dst, m_func));
  741. instr->InsertBefore(IR::Instr::New(Js::OpCode::PSHUFD, dst, dst, IR::IntConstOpnd::New(0, TyInt8, m_func, true), m_func));
  742. bSkip = true;
  743. break;
  744. case Js::OpCode::Simd128_Splat_I16:
  745. case Js::OpCode::Simd128_Splat_U16:
  746. // MOV tempTruncate(bx), src1: truncate the value to 8bit int
  747. // MOVD dst, tempTruncate(bx)
  748. // PUNPCKLBW dst, dst
  749. // PUNPCKLWD dst, dst
  750. // PSHUFD dst, dst, 0
  751. tempTruncate = EnregisterIntConst(instr, src1, TyInt8);
  752. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVD, dst, tempTruncate, m_func));
  753. instr->InsertBefore(IR::Instr::New(Js::OpCode::PUNPCKLBW, dst, dst, dst, m_func));
  754. instr->InsertBefore(IR::Instr::New(Js::OpCode::PUNPCKLWD, dst, dst, dst, m_func));
  755. instr->InsertBefore(IR::Instr::New(Js::OpCode::PSHUFD, dst, dst, IR::IntConstOpnd::New(0, TyInt8, m_func, true), m_func));
  756. bSkip = true;
  757. break;
  758. case Js::OpCode::Simd128_Splat_B4:
  759. case Js::OpCode::Simd128_Splat_B8:
  760. case Js::OpCode::Simd128_Splat_B16:
  761. // CMP src1, 0
  762. // JEQ $labelZero
  763. // MOVAPS dst, xmmword ptr[X86_ALL_NEG_ONES]
  764. // JMP $labelDone
  765. // $labelZero:
  766. // XORPS dst, dst
  767. // $labelDone:
  768. //pInstr = IR::Instr::New(Js::OpCode::CMP, src1, IR::IntConstOpnd::New(0, TyInt8, m_func, true), m_func);
  769. //instr->InsertBefore(pInstr);
  770. //Legalize(pInstr);
  771. // cmp src1, 0000h
  772. pInstr = IR::Instr::New(Js::OpCode::CMP, m_func);
  773. pInstr->SetSrc1(src1);
  774. pInstr->SetSrc2(IR::IntConstOpnd::New(0x0000, TyInt32, m_func, true));
  775. instr->InsertBefore(pInstr);
  776. Legalize(pInstr);
  777. //JEQ $labelZero
  778. instr->InsertBefore(IR::BranchInstr::New(Js::OpCode::JEQ, labelZero, m_func));
  779. // MOVAPS dst, xmmword ptr[X86_ALL_NEG_ONES]
  780. pInstr = IR::Instr::New(Js::OpCode::MOVAPS, dst, IR::MemRefOpnd::New((void*)&X86_ALL_NEG_ONES, TySimd128I4, m_func), m_func);
  781. instr->InsertBefore(pInstr);
  782. Legalize(pInstr);
  783. // JMP $labelDone
  784. instr->InsertBefore(IR::BranchInstr::New(Js::OpCode::JMP, labelDone, m_func));
  785. // $labelZero:
  786. instr->InsertBefore(labelZero);
  787. // XORPS dst, dst
  788. instr->InsertBefore(IR::Instr::New(Js::OpCode::XORPS, dst, dst, dst, m_func)); // make dst to be 0
  789. // $labelDone:
  790. instr->InsertBefore(labelDone);
  791. bSkip = true;
  792. break;
  793. default:
  794. Assert(UNREACHED);
  795. }
  796. if (instr->m_opcode == Js::OpCode::Simd128_Splat_F4 && instr->GetSrc1()->IsFloat64())
  797. {
  798. IR::RegOpnd *regOpnd32 = IR::RegOpnd::New(TyFloat32, this->m_func);
  799. // CVTSD2SS regOpnd32.f32, src.f64 -- Convert regOpnd from f64 to f32
  800. instr->InsertBefore(IR::Instr::New(Js::OpCode::CVTSD2SS, regOpnd32, src1, this->m_func));
  801. src1 = regOpnd32;
  802. }
  803. if (!bSkip)
  804. {
  805. instr->InsertBefore(IR::Instr::New(movOpCode, dst, src1, m_func));
  806. instr->InsertBefore(IR::Instr::New(shufOpCode, dst, dst, IR::IntConstOpnd::New(0, TyInt8, m_func, true), m_func));
  807. }
  808. IR::Instr* prevInstr = instr->m_prev;
  809. instr->Remove();
  810. return prevInstr;
  811. }
  812. IR::Instr* LowererMD::Simd128LowerRcp(IR::Instr *instr, bool removeInstr)
  813. {
  814. Js::OpCode opcode = Js::OpCode::DIVPS;
  815. IR::Opnd *dst, *src1;
  816. dst = instr->GetDst();
  817. src1 = instr->GetSrc1();
  818. Assert(dst && dst->IsRegOpnd());
  819. Assert(src1 && src1->IsRegOpnd());
  820. Assert(instr->GetSrc2() == nullptr);
  821. Assert(src1->IsSimd128F4() || src1->IsSimd128I4());
  822. opcode = Js::OpCode::DIVPS;
  823. #if 0
  824. {
  825. Assert(instr->m_opcode == Js::OpCode::Simd128_Rcp_D2 || instr->m_opcode == Js::OpCode::Simd128_RcpSqrt_D2);
  826. Assert(src1->IsSimd128D2());
  827. opcode = Js::OpCode::DIVPD;
  828. x86_allones_mask = (void*)(&X86_ALL_ONES_D2);
  829. }
  830. #endif // 0
  831. IR::RegOpnd* tmp = IR::RegOpnd::New(src1->GetType(), m_func);
  832. IR::Instr* movInstr = IR::Instr::New(Js::OpCode::MOVAPS, tmp, IR::MemRefOpnd::New((void*)(&X86_ALL_ONES_F4), src1->GetType(), m_func), m_func);
  833. instr->InsertBefore(movInstr);
  834. Legalize(movInstr);
  835. instr->InsertBefore(IR::Instr::New(opcode, tmp, tmp, src1, m_func));
  836. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVAPS, dst, tmp, m_func));
  837. if (removeInstr)
  838. {
  839. IR::Instr* prevInstr = instr->m_prev;
  840. instr->Remove();
  841. return prevInstr;
  842. }
  843. return instr;
  844. }
  845. IR::Instr* LowererMD::Simd128LowerSqrt(IR::Instr *instr)
  846. {
  847. Js::OpCode opcode = Js::OpCode::SQRTPS;
  848. IR::Opnd *dst, *src1;
  849. dst = instr->GetDst();
  850. src1 = instr->GetSrc1();
  851. Assert(dst && dst->IsRegOpnd());
  852. Assert(src1 && src1->IsRegOpnd());
  853. Assert(instr->GetSrc2() == nullptr);
  854. opcode = Js::OpCode::SQRTPS;
  855. #if 0
  856. {
  857. Assert(instr->m_opcode == Js::OpCode::Simd128_Sqrt_D2);
  858. opcode = Js::OpCode::SQRTPD;
  859. }
  860. #endif // 0
  861. instr->InsertBefore(IR::Instr::New(opcode, dst, src1, m_func));
  862. IR::Instr* prevInstr = instr->m_prev;
  863. instr->Remove();
  864. return prevInstr;
  865. }
  866. IR::Instr* LowererMD::Simd128LowerRcpSqrt(IR::Instr *instr)
  867. {
  868. Js::OpCode opcode = Js::OpCode::SQRTPS;
  869. Simd128LowerRcp(instr, false);
  870. opcode = Js::OpCode::SQRTPS;
  871. #if 0
  872. else
  873. {
  874. Assert(instr->m_opcode == Js::OpCode::Simd128_RcpSqrt_D2);
  875. opcode = Js::OpCode::SQRTPD;
  876. }
  877. #endif // 0
  878. instr->InsertBefore(IR::Instr::New(opcode, instr->GetDst(), instr->GetDst(), m_func));
  879. IR::Instr* prevInstr = instr->m_prev;
  880. instr->Remove();
  881. return prevInstr;
  882. }
  883. IR::Instr* LowererMD::Simd128LowerSelect(IR::Instr *instr)
  884. {
  885. Assert(instr->m_opcode == Js::OpCode::Simd128_Select_F4 || instr->m_opcode == Js::OpCode::Simd128_Select_I4 /*|| instr->m_opcode == Js::OpCode::Simd128_Select_D2 */||
  886. instr->m_opcode == Js::OpCode::Simd128_Select_I8 || instr->m_opcode == Js::OpCode::Simd128_Select_I16 || instr->m_opcode == Js::OpCode::Simd128_Select_U4 ||
  887. instr->m_opcode == Js::OpCode::Simd128_Select_U8 || instr->m_opcode == Js::OpCode::Simd128_Select_U16 );
  888. IR::Opnd* dst = nullptr;
  889. IR::Opnd* src1 = nullptr;
  890. IR::Opnd* src2 = nullptr;
  891. IR::Opnd* src3 = nullptr;
  892. SList<IR::Opnd*> *args = Simd128GetExtendedArgs(instr);
  893. // The number of src opnds should be exact. Missing opnds means type-error, and we should generate an exception throw instead (or globopt does).
  894. Assert(args->Count() == 4);
  895. dst = args->Pop();
  896. src1 = args->Pop(); // mask
  897. src2 = args->Pop(); // trueValue
  898. src3 = args->Pop(); // falseValue
  899. Assert(dst->IsRegOpnd() && dst->IsSimd128());
  900. Assert(src1->IsRegOpnd() && src1->IsSimd128());
  901. Assert(src2->IsRegOpnd() && src2->IsSimd128());
  902. Assert(src3->IsRegOpnd() && src3->IsSimd128());
  903. IR::RegOpnd *tmp = IR::RegOpnd::New(src1->GetType(), m_func);
  904. IR::Instr *pInstr = nullptr;
  905. // ANDPS tmp1, mask, tvalue
  906. pInstr = IR::Instr::New(Js::OpCode::ANDPS, tmp, src1, src2, m_func);
  907. instr->InsertBefore(pInstr);
  908. Legalize(pInstr);
  909. // ANDPS dst, mask, fvalue
  910. pInstr = IR::Instr::New(Js::OpCode::ANDNPS, dst, src1, src3, m_func);
  911. instr->InsertBefore(pInstr);
  912. Legalize(pInstr);
  913. // ORPS dst, dst, tmp1
  914. pInstr = IR::Instr::New(Js::OpCode::ORPS, dst, dst, tmp, m_func);
  915. instr->InsertBefore(pInstr);
  916. pInstr = instr->m_prev;
  917. instr->Remove();
  918. return pInstr;
  919. }
  920. IR::Instr* LowererMD::Simd128LowerNeg(IR::Instr *instr)
  921. {
  922. IR::Opnd* dst = instr->GetDst();
  923. IR::Opnd* src1 = instr->GetSrc1();
  924. Js::OpCode addOpcode = Js::OpCode::PADDD;
  925. void * allOnes = (void*)&X86_ALL_ONES_I4;
  926. Assert(dst->IsRegOpnd() && dst->IsSimd128());
  927. Assert(src1->IsRegOpnd() && src1->IsSimd128());
  928. Assert(instr->GetSrc2() == nullptr);
  929. switch (instr->m_opcode)
  930. {
  931. case Js::OpCode::Simd128_Neg_I4:
  932. case Js::OpCode::Simd128_Neg_U4:
  933. break;
  934. case Js::OpCode::Simd128_Neg_I8:
  935. case Js::OpCode::Simd128_Neg_U8:
  936. addOpcode = Js::OpCode::PADDW;
  937. allOnes = (void*)&X86_ALL_ONES_I8;
  938. break;
  939. case Js::OpCode::Simd128_Neg_I16:
  940. case Js::OpCode::Simd128_Neg_U16:
  941. addOpcode = Js::OpCode::PADDB;
  942. allOnes = (void*)&X86_ALL_ONES_I16;
  943. break;
  944. default:
  945. Assert(UNREACHED);
  946. }
  947. // MOVAPS dst, src1
  948. IR::Instr *pInstr = IR::Instr::New(Js::OpCode::MOVAPS, dst, src1, m_func);
  949. instr->InsertBefore(pInstr);
  950. // PANDN dst, dst, 0xfff...f
  951. pInstr = IR::Instr::New(Js::OpCode::PANDN, dst, dst, IR::MemRefOpnd::New((void*)&X86_ALL_NEG_ONES, src1->GetType(), m_func), m_func);
  952. instr->InsertBefore(pInstr);
  953. Legalize(pInstr);
  954. // addOpCode dst, dst, {allOnes}
  955. pInstr = IR::Instr::New(addOpcode, dst, dst, IR::MemRefOpnd::New(allOnes, src1->GetType(), m_func), m_func);
  956. instr->InsertBefore(pInstr);
  957. Legalize(pInstr);
  958. pInstr = instr->m_prev;
  959. instr->Remove();
  960. return pInstr;
  961. }
  962. IR::Instr* LowererMD::Simd128LowerMulI4(IR::Instr *instr)
  963. {
  964. Assert(instr->m_opcode == Js::OpCode::Simd128_Mul_I4 || instr->m_opcode == Js::OpCode::Simd128_Mul_U4);
  965. IR::Instr *pInstr;
  966. IR::Opnd* dst = instr->GetDst();
  967. IR::Opnd* src1 = instr->GetSrc1();
  968. IR::Opnd* src2 = instr->GetSrc2();
  969. IR::Opnd* temp1, *temp2, *temp3;
  970. Assert(dst->IsRegOpnd() && dst->IsSimd128());
  971. Assert(src1->IsRegOpnd() && src1->IsSimd128());
  972. Assert(src2->IsRegOpnd() && src2->IsSimd128());
  973. temp1 = IR::RegOpnd::New(src1->GetType(), m_func);
  974. temp2 = IR::RegOpnd::New(src1->GetType(), m_func);
  975. temp3 = IR::RegOpnd::New(src1->GetType(), m_func);
  976. // temp1 = PMULUDQ src1, src2
  977. pInstr = IR::Instr::New(Js::OpCode::PMULUDQ, temp1, src1, src2, m_func);
  978. instr->InsertBefore(pInstr);
  979. //MakeDstEquSrc1(pInstr);
  980. Legalize(pInstr);
  981. // temp2 = PSLRD src1, 0x4
  982. pInstr = IR::Instr::New(Js::OpCode::PSRLDQ, temp2, src1, IR::IntConstOpnd::New(TySize[TyInt32], TyInt8, m_func, true), m_func);
  983. instr->InsertBefore(pInstr);
  984. //MakeDstEquSrc1(pInstr);
  985. Legalize(pInstr);
  986. // temp3 = PSLRD src2, 0x4
  987. pInstr = IR::Instr::New(Js::OpCode::PSRLDQ, temp3, src2, IR::IntConstOpnd::New(TySize[TyInt32], TyInt8, m_func, true), m_func);
  988. instr->InsertBefore(pInstr);
  989. //MakeDstEquSrc1(pInstr);
  990. Legalize(pInstr);
  991. // temp2 = PMULUDQ temp2, temp3
  992. pInstr = IR::Instr::New(Js::OpCode::PMULUDQ, temp2, temp2, temp3, m_func);
  993. instr->InsertBefore(pInstr);
  994. Legalize(pInstr);
  995. //PSHUFD temp1, temp1, 0x8
  996. instr->InsertBefore(IR::Instr::New(Js::OpCode::PSHUFD, temp1, temp1, IR::IntConstOpnd::New( 8 /*b00001000*/, TyInt8, m_func, true), m_func));
  997. //PSHUFD temp2, temp2, 0x8
  998. instr->InsertBefore(IR::Instr::New(Js::OpCode::PSHUFD, temp2, temp2, IR::IntConstOpnd::New(8 /*b00001000*/, TyInt8, m_func, true), m_func));
  999. // PUNPCKLDQ dst, temp1, temp2
  1000. pInstr = IR::Instr::New(Js::OpCode::PUNPCKLDQ, dst, temp1, temp2, m_func);
  1001. instr->InsertBefore(pInstr);
  1002. Legalize(pInstr);
  1003. pInstr = instr->m_prev;
  1004. instr->Remove();
  1005. return pInstr;
  1006. }
  1007. IR::Instr* LowererMD::Simd128LowerMulI16(IR::Instr *instr)
  1008. {
  1009. Assert(instr->m_opcode == Js::OpCode::Simd128_Mul_I16 || instr->m_opcode == Js::OpCode::Simd128_Mul_U16);
  1010. IR::Instr *pInstr = nullptr;
  1011. IR::Opnd* dst = instr->GetDst();
  1012. IR::Opnd* src1 = instr->GetSrc1();
  1013. IR::Opnd* src2 = instr->GetSrc2();
  1014. IR::Opnd* temp1, *temp2, *temp3;
  1015. IRType simdType, laneType;
  1016. if (instr->m_opcode == Js::OpCode::Simd128_Mul_I16)
  1017. {
  1018. simdType = TySimd128I16;
  1019. laneType = TyInt8;
  1020. }
  1021. else
  1022. {
  1023. simdType = TySimd128U16;
  1024. laneType = TyUint8;
  1025. }
  1026. Assert(dst->IsRegOpnd() && dst->GetType() == simdType);
  1027. Assert(src1->IsRegOpnd() && src1->GetType() == simdType);
  1028. Assert(src2->IsRegOpnd() && src2->GetType() == simdType);
  1029. temp1 = IR::RegOpnd::New(simdType, m_func);
  1030. temp2 = IR::RegOpnd::New(simdType, m_func);
  1031. temp3 = IR::RegOpnd::New(simdType, m_func);
  1032. // MOVAPS temp1, src1
  1033. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVAPS, temp1, src1, m_func));
  1034. //PMULLW temp1, src2
  1035. pInstr = IR::Instr::New(Js::OpCode::PMULLW, temp1, temp1, src2, m_func);
  1036. instr->InsertBefore(pInstr);
  1037. Legalize(pInstr);
  1038. //PAND temp1 {0x00ff00ff00ff00ff00ff00ff00ff00ff} :To zero out bytes 1,3,5...
  1039. pInstr = IR::Instr::New(Js::OpCode::PAND, temp1, temp1, IR::MemRefOpnd::New((void*)&X86_LOWBYTES_MASK, simdType, m_func), m_func);
  1040. instr->InsertBefore(pInstr);
  1041. Legalize(pInstr);
  1042. //PSRLW src1, 8
  1043. pInstr = IR::Instr::New(Js::OpCode::PSRLW, temp2, src2, IR::IntConstOpnd::New(8, laneType, m_func, true), m_func);
  1044. instr->InsertBefore(pInstr);
  1045. Legalize(pInstr);
  1046. //PSRLW src2, 8 :upper 8 bits of each word
  1047. pInstr = IR::Instr::New(Js::OpCode::PSRLW, temp3, src1, IR::IntConstOpnd::New(8, laneType, m_func, true), m_func);
  1048. instr->InsertBefore(pInstr);
  1049. Legalize(pInstr);
  1050. //PMULLW src1, src2
  1051. pInstr = IR::Instr::New(Js::OpCode::PMULLW, temp2, temp2, temp3, m_func);
  1052. instr->InsertBefore(pInstr);
  1053. Legalize(pInstr);
  1054. //PSLLW src1, 8 :sets the results bytes 1,3,5..
  1055. pInstr = IR::Instr::New(Js::OpCode::PSLLW, temp2, temp2, IR::IntConstOpnd::New(8, laneType, m_func, true), m_func);
  1056. instr->InsertBefore(pInstr);
  1057. Legalize(pInstr);
  1058. //POR temp1, src1 :OR bytes 0,2,4.. to final result
  1059. pInstr = IR::Instr::New(Js::OpCode::POR, dst, temp1, temp2, m_func);
  1060. instr->InsertBefore(pInstr);
  1061. Legalize(pInstr);
  1062. pInstr = instr->m_prev;
  1063. instr->Remove();
  1064. return pInstr;
  1065. }
  1066. IR::Instr* LowererMD::Simd128LowerShift(IR::Instr *instr)
  1067. {
  1068. IR::Opnd* dst = instr->GetDst();
  1069. IR::Opnd* src1 = instr->GetSrc1();
  1070. IR::Opnd* src2 = instr->GetSrc2();
  1071. Assert(dst->IsRegOpnd() && dst->IsSimd128());
  1072. Assert(src1->IsRegOpnd() && src1->IsSimd128());
  1073. Assert(src2->IsInt32());
  1074. Js::OpCode opcode = Js::OpCode::PSLLD;
  1075. int elementSizeInBytes = 0;
  1076. switch (instr->m_opcode)
  1077. {
  1078. case Js::OpCode::Simd128_ShLtByScalar_I4:
  1079. case Js::OpCode::Simd128_ShLtByScalar_U4: // same as int32x4.ShiftLeftScalar
  1080. opcode = Js::OpCode::PSLLD;
  1081. elementSizeInBytes = 4;
  1082. break;
  1083. case Js::OpCode::Simd128_ShRtByScalar_I4:
  1084. opcode = Js::OpCode::PSRAD;
  1085. elementSizeInBytes = 4;
  1086. break;
  1087. case Js::OpCode::Simd128_ShLtByScalar_I8:
  1088. case Js::OpCode::Simd128_ShLtByScalar_U8: // same as int16x8.ShiftLeftScalar
  1089. opcode = Js::OpCode::PSLLW;
  1090. elementSizeInBytes = 2;
  1091. break;
  1092. case Js::OpCode::Simd128_ShRtByScalar_I8:
  1093. opcode = Js::OpCode::PSRAW;
  1094. elementSizeInBytes = 2;
  1095. break;
  1096. case Js::OpCode::Simd128_ShRtByScalar_U4:
  1097. opcode = Js::OpCode::PSRLD;
  1098. elementSizeInBytes = 4;
  1099. break;
  1100. case Js::OpCode::Simd128_ShRtByScalar_U8:
  1101. opcode = Js::OpCode::PSRLW;
  1102. elementSizeInBytes = 2;
  1103. break;
  1104. case Js::OpCode::Simd128_ShLtByScalar_I16: // composite, int8x16.ShiftLeftScalar
  1105. case Js::OpCode::Simd128_ShRtByScalar_I16: // composite, int8x16.ShiftRightScalar
  1106. case Js::OpCode::Simd128_ShLtByScalar_U16: // same as int8x16.ShiftLeftScalar
  1107. case Js::OpCode::Simd128_ShRtByScalar_U16: // composite, uint8x16.ShiftRightScalar
  1108. elementSizeInBytes = 1;
  1109. break;
  1110. default:
  1111. Assert(UNREACHED);
  1112. }
  1113. IR::Instr *pInstr = nullptr;
  1114. IR::RegOpnd *reg = IR::RegOpnd::New(TyInt32, m_func);
  1115. IR::RegOpnd *reg2 = IR::RegOpnd::New(TyInt32, m_func);
  1116. IR::RegOpnd *tmp0 = IR::RegOpnd::New(src1->GetType(), m_func);
  1117. IR::RegOpnd *tmp1 = IR::RegOpnd::New(src1->GetType(), m_func);
  1118. IR::RegOpnd *tmp2 = IR::RegOpnd::New(src1->GetType(), m_func);
  1119. //Shift amount: The shift amout is masked by [ElementSize] * 8
  1120. //The masked Shift amount is moved to xmm register
  1121. //AND shamt, shmask, shamt
  1122. //MOVD tmp0, shamt
  1123. IR::RegOpnd *shamt = IR::RegOpnd::New(src2->GetType(), m_func);
  1124. // en-register
  1125. IR::Opnd *origShamt = EnregisterIntConst(instr, src2); //unnormalized shift amount
  1126. pInstr = IR::Instr::New(Js::OpCode::AND, shamt, origShamt, IR::IntConstOpnd::New(Js::SIMDUtils::SIMDGetShiftAmountMask(elementSizeInBytes), TyInt8, m_func), m_func); // normalizing by elm width (i.e. shamt % elm_width)
  1127. instr->InsertBefore(pInstr);
  1128. Legalize(pInstr);
  1129. pInstr = IR::Instr::New(Js::OpCode::MOVD, tmp0, shamt, m_func);
  1130. instr->InsertBefore(pInstr);
  1131. if (instr->m_opcode == Js::OpCode::Simd128_ShLtByScalar_I4 || instr->m_opcode == Js::OpCode::Simd128_ShRtByScalar_I4 ||
  1132. instr->m_opcode == Js::OpCode::Simd128_ShLtByScalar_U4 || instr->m_opcode == Js::OpCode::Simd128_ShRtByScalar_U4 ||
  1133. instr->m_opcode == Js::OpCode::Simd128_ShLtByScalar_I8 || instr->m_opcode == Js::OpCode::Simd128_ShRtByScalar_I8 ||
  1134. instr->m_opcode == Js::OpCode::Simd128_ShLtByScalar_U8 || instr->m_opcode == Js::OpCode::Simd128_ShRtByScalar_U8)
  1135. {
  1136. // shiftOpCode dst, src1, tmp0
  1137. pInstr = IR::Instr::New(opcode, dst, src1, tmp0, m_func);
  1138. instr->InsertBefore(pInstr);
  1139. Legalize(pInstr);
  1140. }
  1141. else if (instr->m_opcode == Js::OpCode::Simd128_ShLtByScalar_I16 || instr->m_opcode == Js::OpCode::Simd128_ShLtByScalar_U16)
  1142. {
  1143. // MOVAPS tmp1, src1
  1144. pInstr = IR::Instr::New(Js::OpCode::MOVAPS, tmp1, src1, m_func);
  1145. instr->InsertBefore(pInstr);
  1146. // MOVAPS dst, src1
  1147. pInstr = IR::Instr::New(Js::OpCode::MOVAPS, dst, src1, m_func);
  1148. instr->InsertBefore(pInstr);
  1149. // PAND tmp1, [X86_HIGHBYTES_MASK]
  1150. pInstr = IR::Instr::New(Js::OpCode::PAND, tmp1, tmp1, IR::MemRefOpnd::New((void*)&X86_HIGHBYTES_MASK, TySimd128I4, m_func), m_func);
  1151. instr->InsertBefore(pInstr);
  1152. Legalize(pInstr);
  1153. // PSLLW tmp1, tmp0
  1154. pInstr = IR::Instr::New(Js::OpCode::PSLLW, tmp1, tmp1, tmp0, m_func);
  1155. instr->InsertBefore(pInstr);
  1156. Legalize(pInstr);
  1157. // PSLLW dst, tmp0
  1158. pInstr = IR::Instr::New(Js::OpCode::PSLLW, dst, dst, tmp0, m_func);
  1159. instr->InsertBefore(pInstr);
  1160. Legalize(pInstr);
  1161. // PAND dst, [X86_LOWBYTES_MASK]
  1162. pInstr = IR::Instr::New(Js::OpCode::PAND, dst, dst, IR::MemRefOpnd::New((void*)&X86_LOWBYTES_MASK, TySimd128I4, m_func), m_func);
  1163. instr->InsertBefore(pInstr);
  1164. Legalize(pInstr);
  1165. // POR dst, tmp1
  1166. pInstr = IR::Instr::New(Js::OpCode::POR, dst, dst, tmp1, m_func);
  1167. instr->InsertBefore(pInstr);
  1168. }
  1169. else if (instr->m_opcode == Js::OpCode::Simd128_ShRtByScalar_I16)
  1170. {
  1171. // MOVAPS tmp1, src1
  1172. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVAPS, tmp1, src1, m_func));
  1173. // MOVAPS dst, src1
  1174. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVAPS, dst, src1, m_func));
  1175. // PSLLW dst, 8
  1176. instr->InsertBefore(IR::Instr::New(Js::OpCode::PSLLW, dst, dst, IR::IntConstOpnd::New(8, TyInt8, m_func), m_func));
  1177. // LEA reg, [shamt + 8]
  1178. IR::IndirOpnd *indirOpnd = IR::IndirOpnd::New(shamt->AsRegOpnd(), +8, TyInt32, m_func);
  1179. instr->InsertBefore(IR::Instr::New(Js::OpCode::LEA, reg, indirOpnd, m_func));
  1180. // MOVD tmp0, reg
  1181. pInstr = IR::Instr::New(Js::OpCode::MOVD, tmp2, reg, m_func);
  1182. instr->InsertBefore(pInstr);
  1183. // PSRAW dst, tmp0
  1184. instr->InsertBefore(IR::Instr::New(Js::OpCode::PSRAW, dst, dst, tmp2, m_func));
  1185. // PAND dst, [X86_LOWBYTES_MASK]
  1186. pInstr = IR::Instr::New(Js::OpCode::PAND, dst, dst, IR::MemRefOpnd::New((void*)&X86_LOWBYTES_MASK, TySimd128I4, m_func), m_func);
  1187. instr->InsertBefore(pInstr);
  1188. Legalize(pInstr);
  1189. // PSRAW tmp1, tmp0
  1190. instr->InsertBefore(IR::Instr::New(Js::OpCode::PSRAW, tmp1, tmp1, tmp0, m_func));
  1191. // PAND tmp1, [X86_HIGHBYTES_MASK]
  1192. pInstr = IR::Instr::New(Js::OpCode::PAND, tmp1, tmp1, IR::MemRefOpnd::New((void*)&X86_HIGHBYTES_MASK, TySimd128I4, m_func), m_func);
  1193. instr->InsertBefore(pInstr);
  1194. Legalize(pInstr);
  1195. // POR dst, tmp1
  1196. instr->InsertBefore(IR::Instr::New(Js::OpCode::POR, dst, dst, tmp1, m_func));
  1197. }
  1198. else if (instr->m_opcode == Js::OpCode::Simd128_ShRtByScalar_U16)
  1199. {
  1200. IR::RegOpnd * shamtReg = IR::RegOpnd::New(TyInt8, m_func);
  1201. shamtReg->SetReg(LowererMDArch::GetRegShiftCount());
  1202. IR::RegOpnd * tmp = IR::RegOpnd::New(TyInt8, m_func);
  1203. // MOVAPS dst, src1
  1204. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVAPS, dst, src1, m_func));
  1205. // MOV reg2, 0FFh
  1206. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOV, reg2, IR::IntConstOpnd::New(0xFF, TyInt32, m_func), m_func));
  1207. // MOV shamtReg, shamt
  1208. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOV, shamtReg, shamt, m_func));
  1209. // SHR reg2, shamtReg (lower 8 bit)
  1210. instr->InsertBefore(IR::Instr::New(Js::OpCode::SHR, reg2, reg2, shamtReg, m_func));
  1211. // MOV tmp, reg2
  1212. // MOVSX reg2, tmp(TyInt8)
  1213. pInstr = IR::Instr::New(Js::OpCode::MOV, tmp, reg2, m_func);
  1214. instr->InsertBefore(pInstr);
  1215. Legalize(pInstr);
  1216. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVSX, reg2, tmp, m_func));
  1217. IR::RegOpnd *mask = IR::RegOpnd::New(TySimd128I4, m_func);
  1218. // PSRLW dst, mask
  1219. instr->InsertBefore(IR::Instr::New(Js::OpCode::PSRLW, dst, dst, tmp0, m_func));
  1220. // splat (0xFF >> shamt) into mask
  1221. // MOVD mask, reg2
  1222. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVD, mask, reg2, m_func));
  1223. // PUNPCKLBW mask, mask
  1224. pInstr = IR::Instr::New(Js::OpCode::PUNPCKLBW, mask, mask, mask, m_func);
  1225. instr->InsertBefore(pInstr);
  1226. Legalize(pInstr);
  1227. // PUNPCKLWD mask, mask
  1228. pInstr = IR::Instr::New(Js::OpCode::PUNPCKLWD, mask, mask, mask, m_func);
  1229. instr->InsertBefore(pInstr);
  1230. Legalize(pInstr);
  1231. // PSHUFD mask, mask, 0
  1232. instr->InsertBefore(IR::Instr::New(Js::OpCode::PSHUFD, mask, mask, IR::IntConstOpnd::New(0, TyInt8, m_func, true), m_func));
  1233. // PAND dst, mask
  1234. instr->InsertBefore(IR::Instr::New(Js::OpCode::PAND, dst, dst, mask, m_func));
  1235. }
  1236. else
  1237. {
  1238. Assert(UNREACHED);
  1239. }
  1240. pInstr = instr->m_prev;
  1241. instr->Remove();
  1242. return pInstr;
  1243. }
  1244. IR::Instr* LowererMD::SIMD128LowerReplaceLane_8(IR::Instr* instr)
  1245. {
  1246. SList<IR::Opnd*> *args = Simd128GetExtendedArgs(instr);
  1247. int lane = 0;
  1248. IR::Opnd *dst = args->Pop();
  1249. IR::Opnd *src1 = args->Pop();
  1250. IR::Opnd *src2 = args->Pop();
  1251. IR::Opnd *src3 = args->Pop();
  1252. IR::Instr * newInstr = nullptr;
  1253. Assert(dst->IsSimd128() && src1->IsSimd128());
  1254. lane = src2->AsIntConstOpnd()->AsInt32();
  1255. IR::Opnd* laneValue = EnregisterIntConst(instr, src3, TyInt16);
  1256. Assert(instr->m_opcode == Js::OpCode::Simd128_ReplaceLane_I8 || instr->m_opcode == Js::OpCode::Simd128_ReplaceLane_U8 || instr->m_opcode == Js::OpCode::Simd128_ReplaceLane_B8);
  1257. // MOVAPS dst, src1
  1258. newInstr = IR::Instr::New(Js::OpCode::MOVAPS, dst, src1, m_func);
  1259. instr->InsertBefore(newInstr);
  1260. Legalize(newInstr);
  1261. // PINSRW dst, value, index
  1262. newInstr = IR::Instr::New(Js::OpCode::PINSRW, dst, laneValue, IR::IntConstOpnd::New(lane, TyInt8, m_func), m_func);
  1263. instr->InsertBefore(newInstr);
  1264. Legalize(newInstr);
  1265. if (instr->m_opcode == Js::OpCode::Simd128_ReplaceLane_B8) //canonicalizing lanes
  1266. {
  1267. instr = Simd128CanonicalizeToBools(instr, Js::OpCode::PCMPEQW, *dst);
  1268. }
  1269. IR::Instr* prevInstr = instr->m_prev;
  1270. instr->Remove();
  1271. return prevInstr;
  1272. }
  1273. IR::Instr* LowererMD::SIMD128LowerReplaceLane_16(IR::Instr* instr)
  1274. {
  1275. SList<IR::Opnd*> *args = Simd128GetExtendedArgs(instr);
  1276. int lane = 0;
  1277. IR::Opnd *dst = args->Pop();
  1278. IR::Opnd *src1 = args->Pop();
  1279. IR::Opnd *src2 = args->Pop();
  1280. IR::Opnd *src3 = args->Pop();
  1281. IR::Instr * newInstr = nullptr;
  1282. Assert(dst->IsSimd128() && src1->IsSimd128());
  1283. lane = src2->AsIntConstOpnd()->AsInt32();
  1284. Assert(lane >= 0 && lane < 16);
  1285. IR::Opnd* laneValue = EnregisterIntConst(instr, src3, TyInt8);
  1286. uint8 *tempSIMD = (uint8*)(instr->m_func->GetScriptContext()->GetThreadContext()->GetSimdTempArea());
  1287. #if DBG
  1288. // using only one SIMD temp
  1289. intptr_t endAddrSIMD = (intptr_t) (tempSIMD + sizeof(X86SIMDValue));
  1290. #endif
  1291. void *address = nullptr;
  1292. Assert(instr->m_opcode == Js::OpCode::Simd128_ReplaceLane_I16 || instr->m_opcode == Js::OpCode::Simd128_ReplaceLane_U16 || instr->m_opcode == Js::OpCode::Simd128_ReplaceLane_B16);
  1293. // MOVUPS [temp], src1
  1294. address = (void*)tempSIMD;
  1295. newInstr = IR::Instr::New(Js::OpCode::MOVUPS, IR::MemRefOpnd::New(address, TySimd128I16, m_func), src1, m_func);
  1296. instr->InsertBefore(newInstr);
  1297. Legalize(newInstr);
  1298. // MOV [temp+offset], laneValue
  1299. address = (void*)(tempSIMD + lane);
  1300. // check for buffer overrun
  1301. Assert((intptr_t)address < endAddrSIMD);
  1302. newInstr = IR::Instr::New(Js::OpCode::MOV, IR::MemRefOpnd::New(address, TyInt8, m_func), laneValue, m_func);
  1303. instr->InsertBefore(newInstr);
  1304. Legalize(newInstr);
  1305. // MOVUPS dst, [temp]
  1306. address = (void*)tempSIMD;
  1307. newInstr = IR::Instr::New(Js::OpCode::MOVUPS, dst, IR::MemRefOpnd::New(address, TySimd128I16, m_func), m_func);
  1308. instr->InsertBefore(newInstr);
  1309. Legalize(newInstr);
  1310. if (instr->m_opcode == Js::OpCode::Simd128_ReplaceLane_B16) //canonicalizing lanes.
  1311. {
  1312. instr = Simd128CanonicalizeToBools(instr, Js::OpCode::PCMPEQB, *dst);
  1313. }
  1314. IR::Instr* prevInstr = instr->m_prev;
  1315. instr->Remove();
  1316. return prevInstr;
  1317. }
  1318. IR::Instr* LowererMD::SIMD128LowerReplaceLane_4(IR::Instr* instr)
  1319. {
  1320. SList<IR::Opnd*> *args = Simd128GetExtendedArgs(instr);
  1321. int lane = 0, byteWidth = 0;
  1322. IR::Opnd *dst = args->Pop();
  1323. IR::Opnd *src1 = args->Pop();
  1324. IR::Opnd *src2 = args->Pop();
  1325. IR::Opnd *src3 = args->Pop();
  1326. Assert(dst->IsSimd128() && src1->IsSimd128());
  1327. IRType type = dst->GetType();
  1328. lane = src2->AsIntConstOpnd()->AsInt32();
  1329. IR::Opnd* laneValue = EnregisterIntConst(instr, src3);
  1330. switch (instr->m_opcode)
  1331. {
  1332. case Js::OpCode::Simd128_ReplaceLane_I4:
  1333. case Js::OpCode::Simd128_ReplaceLane_U4:
  1334. case Js::OpCode::Simd128_ReplaceLane_B4:
  1335. byteWidth = TySize[TyInt32];
  1336. break;
  1337. case Js::OpCode::Simd128_ReplaceLane_F4:
  1338. byteWidth = TySize[TyFloat32];
  1339. break;
  1340. default:
  1341. Assert(UNREACHED);
  1342. }
  1343. // MOVAPS dst, src1
  1344. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVAPS, dst, src1, m_func));
  1345. if (laneValue->GetType() == TyInt32 || laneValue->GetType() == TyUint32)
  1346. {
  1347. IR::RegOpnd *tempReg = IR::RegOpnd::New(TyFloat32, m_func);//mov intval to xmm
  1348. //MOVD
  1349. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVD, tempReg, laneValue, m_func));
  1350. laneValue = tempReg;
  1351. }
  1352. Assert(laneValue->GetType() == TyFloat32);
  1353. if (lane == 0)
  1354. {
  1355. // MOVSS for both TyFloat32 and TyInt32. MOVD zeroes upper bits.
  1356. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVSS, dst, laneValue, m_func));
  1357. }
  1358. else if (lane == 2)
  1359. {
  1360. IR::RegOpnd *tmp = IR::RegOpnd::New(type, m_func);
  1361. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVHLPS, tmp, dst, m_func));
  1362. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVSS, tmp, laneValue, m_func));
  1363. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVLHPS, dst, tmp, m_func));
  1364. }
  1365. else
  1366. {
  1367. Assert(lane == 1 || lane == 3);
  1368. uint8 shufMask = 0xE4; // 11 10 01 00
  1369. shufMask |= lane; // 11 10 01 id
  1370. shufMask &= ~(0x03 << (lane << 1)); // set 2 bits corresponding to lane index to 00
  1371. // SHUFPS dst, dst, shufMask
  1372. instr->InsertBefore(IR::Instr::New(Js::OpCode::SHUFPS, dst, dst, IR::IntConstOpnd::New(shufMask, TyInt8, m_func, true), m_func));
  1373. // MOVSS dst, value
  1374. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVSS, dst, laneValue, m_func));
  1375. // SHUFPS dst, dst, shufMask
  1376. instr->InsertBefore(IR::Instr::New(Js::OpCode::SHUFPS, dst, dst, IR::IntConstOpnd::New(shufMask, TyInt8, m_func, true), m_func));
  1377. }
  1378. if (instr->m_opcode == Js::OpCode::Simd128_ReplaceLane_B4) //Canonicalizing lanes
  1379. {
  1380. instr = Simd128CanonicalizeToBools(instr, Js::OpCode::PCMPEQD, *dst);
  1381. }
  1382. IR::Instr* prevInstr = instr->m_prev;
  1383. instr->Remove();
  1384. return prevInstr;
  1385. }
  1386. /*
  1387. 4 and 2 lane Swizzle.
  1388. */
  1389. IR::Instr* LowererMD::Simd128LowerSwizzle_4(IR::Instr* instr)
  1390. {
  1391. Js::OpCode shufOpcode = Js::OpCode::SHUFPS;
  1392. Js::OpCode irOpcode = instr->m_opcode;
  1393. SList<IR::Opnd*> *args = Simd128GetExtendedArgs(instr);
  1394. IR::Opnd *dst = args->Pop();
  1395. IR::Opnd *srcs[6] = { nullptr, nullptr, nullptr, nullptr, nullptr, nullptr };
  1396. int i = 0;
  1397. while (!args->Empty() && i < 6)
  1398. {
  1399. srcs[i++] = args->Pop();
  1400. }
  1401. int8 shufMask = 0;
  1402. int lane0 = 0, lane1 = 0, lane2 = 0, lane3 = 0;
  1403. IR::Instr *pInstr = instr->m_prev;
  1404. Assert(dst->IsSimd128() && srcs[0] && srcs[0]->IsSimd128());
  1405. // globOpt will type-spec if all lane indices are constants, and within range constraints to match a single SSE instruction
  1406. Assert(irOpcode == Js::OpCode::Simd128_Swizzle_I4 || irOpcode == Js::OpCode::Simd128_Swizzle_U4 || irOpcode == Js::OpCode::Simd128_Swizzle_F4 /*|| irOpcode == Js::OpCode::Simd128_Swizzle_D2*/);
  1407. AssertMsg(srcs[1] && srcs[1]->IsIntConstOpnd() &&
  1408. srcs[2] && srcs[2]->IsIntConstOpnd() &&
  1409. (/*irOpcode == Js::OpCode::Simd128_Swizzle_D2 || */(srcs[3] && srcs[3]->IsIntConstOpnd())) &&
  1410. (/*irOpcode == Js::OpCode::Simd128_Swizzle_D2 || */(srcs[4] && srcs[4]->IsIntConstOpnd())), "Type-specialized swizzle is supported only with constant lane indices");
  1411. #if 0
  1412. if (irOpcode == Js::OpCode::Simd128_Swizzle_D2)
  1413. {
  1414. lane0 = srcs[1]->AsIntConstOpnd()->AsInt32();
  1415. lane1 = srcs[2]->AsIntConstOpnd()->AsInt32();
  1416. Assert(lane0 >= 0 && lane0 < 2);
  1417. Assert(lane1 >= 0 && lane1 < 2);
  1418. shufMask = (int8)((lane1 << 1) | lane0);
  1419. shufOpcode = Js::OpCode::SHUFPD;
  1420. }
  1421. #endif // 0
  1422. if (irOpcode == Js::OpCode::Simd128_Swizzle_I4 || irOpcode == Js::OpCode::Simd128_Swizzle_U4)
  1423. {
  1424. shufOpcode = Js::OpCode::PSHUFD;
  1425. }
  1426. AnalysisAssert(srcs[3] != nullptr && srcs[4] != nullptr);
  1427. lane0 = srcs[1]->AsIntConstOpnd()->AsInt32();
  1428. lane1 = srcs[2]->AsIntConstOpnd()->AsInt32();
  1429. lane2 = srcs[3]->AsIntConstOpnd()->AsInt32();
  1430. lane3 = srcs[4]->AsIntConstOpnd()->AsInt32();
  1431. Assert(lane1 >= 0 && lane1 < 4);
  1432. Assert(lane2 >= 0 && lane2 < 4);
  1433. Assert(lane2 >= 0 && lane2 < 4);
  1434. Assert(lane3 >= 0 && lane3 < 4);
  1435. shufMask = (int8)((lane3 << 6) | (lane2 << 4) | (lane1 << 2) | lane0);
  1436. instr->m_opcode = shufOpcode;
  1437. instr->SetDst(dst);
  1438. // MOVAPS dst, src1
  1439. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVAPS, dst, srcs[0], m_func));
  1440. // SHUF dst, dst, imm8
  1441. instr->SetSrc1(dst);
  1442. instr->SetSrc2(IR::IntConstOpnd::New((IntConstType)shufMask, TyInt8, m_func, true));
  1443. return pInstr;
  1444. }
  1445. /*
  1446. 4 lane shuffle. Handles arbitrary lane values.
  1447. */
  1448. IR::Instr* LowererMD::Simd128LowerShuffle_4(IR::Instr* instr)
  1449. {
  1450. Js::OpCode irOpcode = instr->m_opcode;
  1451. SList<IR::Opnd*> *args = Simd128GetExtendedArgs(instr);
  1452. IR::Opnd *dst = args->Pop();
  1453. IR::Opnd *srcs[6] = { nullptr, nullptr, nullptr, nullptr, nullptr, nullptr };
  1454. int j = 0;
  1455. while (!args->Empty() && j < 6)
  1456. {
  1457. srcs[j++] = args->Pop();
  1458. }
  1459. uint8 lanes[4], lanesSrc[4];
  1460. uint fromSrc1, fromSrc2;
  1461. IR::Instr *pInstr = instr->m_prev;
  1462. Assert(dst->IsSimd128() && srcs[0] && srcs[0]->IsSimd128() && srcs[1] && srcs[1]->IsSimd128());
  1463. Assert(irOpcode == Js::OpCode::Simd128_Shuffle_I4 || irOpcode == Js::OpCode::Simd128_Shuffle_U4 || irOpcode == Js::OpCode::Simd128_Shuffle_F4);
  1464. // globOpt will type-spec if all lane indices are constants, and within range constraints to match a single SSE instruction
  1465. AssertMsg(srcs[2] && srcs[2]->IsIntConstOpnd() &&
  1466. srcs[3] && srcs[3]->IsIntConstOpnd() &&
  1467. srcs[4] && srcs[4]->IsIntConstOpnd() &&
  1468. srcs[5] && srcs[5]->IsIntConstOpnd(), "Type-specialized shuffle is supported only with constant lane indices");
  1469. lanes[0] = (uint8) srcs[2]->AsIntConstOpnd()->AsInt32();
  1470. lanes[1] = (uint8) srcs[3]->AsIntConstOpnd()->AsInt32();
  1471. lanes[2] = (uint8) srcs[4]->AsIntConstOpnd()->AsInt32();
  1472. lanes[3] = (uint8) srcs[5]->AsIntConstOpnd()->AsInt32();
  1473. Assert(lanes[0] >= 0 && lanes[0] < 8);
  1474. Assert(lanes[1] >= 0 && lanes[1] < 8);
  1475. Assert(lanes[2] >= 0 && lanes[2] < 8);
  1476. Assert(lanes[3] >= 0 && lanes[3] < 8);
  1477. CheckShuffleLanes_4(lanes, lanesSrc, &fromSrc1, &fromSrc2);
  1478. Assert(fromSrc1 + fromSrc2 == 4);
  1479. if (fromSrc1 == 4 || fromSrc2 == 4)
  1480. {
  1481. // can be done with a swizzle
  1482. IR::Opnd *srcOpnd = fromSrc1 == 4 ? srcs[0] : srcs[1];
  1483. InsertShufps(lanes, dst, srcOpnd, srcOpnd, instr);
  1484. }
  1485. else if (fromSrc1 == 2)
  1486. {
  1487. if (lanes[0] < 4 && lanes[1] < 4)
  1488. {
  1489. // x86 friendly shuffle
  1490. Assert(lanes[2] >= 4 && lanes[3] >= 4);
  1491. InsertShufps(lanes, dst, srcs[0], srcs[1], instr);
  1492. }
  1493. else
  1494. {
  1495. // arbitrary shuffle with 2 lanes from each src
  1496. uint8 ordLanes[4], reArrLanes[4];
  1497. // order lanes based on which src they come from
  1498. // compute re-arrangement mask
  1499. for (uint8 i = 0, j1 = 0, j2 = 2; i < 4; i++)
  1500. {
  1501. if (lanesSrc[i] == 1 && j1 < 4)
  1502. {
  1503. ordLanes[j1] = lanes[i];
  1504. reArrLanes[i] = j1;
  1505. j1++;
  1506. }
  1507. else if(j2 < 4)
  1508. {
  1509. Assert(lanesSrc[i] == 2);
  1510. ordLanes[j2] = lanes[i];
  1511. reArrLanes[i] = j2;
  1512. j2++;
  1513. }
  1514. }
  1515. IR::RegOpnd *temp = IR::RegOpnd::New(dst->GetType(), m_func);
  1516. InsertShufps(ordLanes, temp, srcs[0], srcs[1], instr);
  1517. InsertShufps(reArrLanes, dst, temp, temp, instr);
  1518. }
  1519. }
  1520. else if (fromSrc1 == 3 || fromSrc2 == 3)
  1521. {
  1522. // shuffle with 3 lanes from one src, one from another
  1523. IR::Instr *newInstr;
  1524. IR::Opnd * majSrc, *minSrc;
  1525. IR::RegOpnd *temp1 = IR::RegOpnd::New(dst->GetType(), m_func);
  1526. IR::RegOpnd *temp2 = IR::RegOpnd::New(dst->GetType(), m_func);
  1527. IR::RegOpnd *temp3 = IR::RegOpnd::New(dst->GetType(), m_func);
  1528. uint8 minorityLane = 0, maxLaneValue;
  1529. majSrc = fromSrc1 == 3 ? srcs[0] : srcs[1];
  1530. minSrc = fromSrc1 == 3 ? srcs[1] : srcs[0];
  1531. Assert(majSrc != minSrc);
  1532. // Algorithm:
  1533. // SHUFPS temp1, majSrc, lanes
  1534. // SHUFPS temp2, minSrc, lanes
  1535. // MOVUPS temp3, [minorityLane mask]
  1536. // ANDPS temp2, temp3 // mask all lanes but minorityLane
  1537. // ANDNPS temp3, temp1 // zero minorityLane
  1538. // ORPS dst, temp2, temp3
  1539. // find minorityLane to mask
  1540. maxLaneValue = minSrc == srcs[0] ? 4 : 8;
  1541. for (uint8 i = 0; i < 4; i++)
  1542. {
  1543. if (lanes[i] >= (maxLaneValue - 4) && lanes[i] < maxLaneValue)
  1544. {
  1545. minorityLane = i;
  1546. break;
  1547. }
  1548. }
  1549. IR::MemRefOpnd * laneMask = IR::MemRefOpnd::New((void*)&X86_4LANES_MASKS[minorityLane], dst->GetType(), m_func);
  1550. InsertShufps(lanes, temp1, majSrc, majSrc, instr);
  1551. InsertShufps(lanes, temp2, minSrc, minSrc, instr);
  1552. newInstr = IR::Instr::New(Js::OpCode::MOVUPS, temp3, laneMask, m_func);
  1553. instr->InsertBefore(newInstr);
  1554. Legalize(newInstr);
  1555. newInstr = IR::Instr::New(Js::OpCode::ANDPS, temp2, temp2, temp3, m_func);
  1556. instr->InsertBefore(newInstr);
  1557. Legalize(newInstr);
  1558. newInstr = IR::Instr::New(Js::OpCode::ANDNPS, temp3, temp3, temp1, m_func);
  1559. instr->InsertBefore(newInstr);
  1560. Legalize(newInstr);
  1561. newInstr = IR::Instr::New(Js::OpCode::ORPS, dst, temp2, temp3, m_func);
  1562. instr->InsertBefore(newInstr);
  1563. Legalize(newInstr);
  1564. }
  1565. instr->Remove();
  1566. return pInstr;
  1567. }
  1568. // 8 and 16 lane shuffle with memory temps
  1569. IR::Instr* LowererMD::Simd128LowerShuffle(IR::Instr* instr)
  1570. {
  1571. Js::OpCode irOpcode = instr->m_opcode;
  1572. IR::Instr *pInstr = instr->m_prev, *newInstr = nullptr;
  1573. SList<IR::Opnd*> *args = nullptr;
  1574. IR::Opnd *dst = nullptr;
  1575. IR::Opnd *src1 = nullptr, *src2 = nullptr;
  1576. uint8 lanes[16], laneCount = 0, scale = 1;
  1577. bool isShuffle = false;
  1578. IRType laneType = TyInt16;
  1579. X86SIMDValue * const tempSIMD = (instr->m_func->GetScriptContext()->GetThreadContext()->GetSimdTempArea());
  1580. uint8 *temp1SIMD = (uint8 *) (&tempSIMD[0]);
  1581. uint8 *temp2SIMD = (uint8 *) (&tempSIMD[1]);
  1582. uint8 *dstSIMD = (uint8 *) (&tempSIMD[2]);
  1583. #if DBG
  1584. intptr_t endAddrSIMD = (intptr_t)(temp1SIMD + sizeof(X86SIMDValue) * SIMD_TEMP_SIZE);
  1585. #endif
  1586. void *address = nullptr;
  1587. args = Simd128GetExtendedArgs(instr);
  1588. switch (irOpcode)
  1589. {
  1590. case Js::OpCode::Simd128_Swizzle_I8:
  1591. case Js::OpCode::Simd128_Swizzle_U8:
  1592. Assert(args->Count() == 10);
  1593. laneCount = 8;
  1594. laneType = TyInt16;
  1595. isShuffle = false;
  1596. scale = 2;
  1597. break;
  1598. case Js::OpCode::Simd128_Swizzle_I16:
  1599. case Js::OpCode::Simd128_Swizzle_U16:
  1600. Assert(args->Count() == 18);
  1601. laneCount = 16;
  1602. laneType = TyInt8;
  1603. isShuffle = false;
  1604. scale = 1;
  1605. break;
  1606. case Js::OpCode::Simd128_Shuffle_I8:
  1607. case Js::OpCode::Simd128_Shuffle_U8:
  1608. Assert(args->Count() == 11);
  1609. laneCount = 8;
  1610. isShuffle = true;
  1611. laneType = TyUint16;
  1612. scale = 2;
  1613. break;
  1614. case Js::OpCode::Simd128_Shuffle_I16:
  1615. case Js::OpCode::Simd128_Shuffle_U16:
  1616. Assert(args->Count() == 19);
  1617. laneCount = 16;
  1618. isShuffle = true;
  1619. laneType = TyUint8;
  1620. scale = 1;
  1621. break;
  1622. default:
  1623. Assert(UNREACHED);
  1624. }
  1625. dst = args->Pop();
  1626. src1 = args->Pop();
  1627. if (isShuffle)
  1628. {
  1629. src2 = args->Pop();
  1630. }
  1631. Assert(dst->IsSimd128() && src1 && src1->IsSimd128() && (!isShuffle|| src2->IsSimd128()));
  1632. for (uint i = 0; i < laneCount; i++)
  1633. {
  1634. IR::Opnd * laneOpnd = args->Pop();
  1635. Assert(laneOpnd->IsIntConstOpnd());
  1636. lanes[i] = (uint8)laneOpnd->AsIntConstOpnd()->AsInt32();
  1637. }
  1638. // MOVUPS [temp], src1
  1639. newInstr = IR::Instr::New(Js::OpCode::MOVUPS, IR::MemRefOpnd::New((void*)temp1SIMD, TySimd128I16, m_func), src1, m_func);
  1640. instr->InsertBefore(newInstr);
  1641. Legalize(newInstr);
  1642. if (isShuffle)
  1643. {
  1644. // MOVUPS [temp+16], src2
  1645. newInstr = IR::Instr::New(Js::OpCode::MOVUPS, IR::MemRefOpnd::New((void*)(temp2SIMD), TySimd128I16, m_func), src2, m_func);
  1646. instr->InsertBefore(newInstr);
  1647. Legalize(newInstr);
  1648. }
  1649. for (uint i = 0; i < laneCount; i++)
  1650. {
  1651. //. MOV tmp, [temp1SIMD + laneValue*scale]
  1652. IR::RegOpnd *tmp = IR::RegOpnd::New(laneType, m_func);
  1653. address = (void*)(temp1SIMD + lanes[i] * scale);
  1654. Assert((intptr_t)address + (intptr_t)scale <= (intptr_t)dstSIMD);
  1655. newInstr = IR::Instr::New(Js::OpCode::MOV, tmp, IR::MemRefOpnd::New(address, laneType, m_func), m_func);
  1656. instr->InsertBefore(newInstr);
  1657. Legalize(newInstr);
  1658. //. MOV [dstSIMD + i*scale], tmp
  1659. address = (void*)(dstSIMD + i * scale);
  1660. Assert((intptr_t)address + (intptr_t) scale <= endAddrSIMD);
  1661. newInstr = IR::Instr::New(Js::OpCode::MOV,IR::MemRefOpnd::New(address, laneType, m_func), tmp, m_func);
  1662. instr->InsertBefore(newInstr);
  1663. Legalize(newInstr);
  1664. }
  1665. // MOVUPS dst, [dstSIMD]
  1666. newInstr = IR::Instr::New(Js::OpCode::MOVUPS, dst, IR::MemRefOpnd::New((void*)dstSIMD, TySimd128I16, m_func), m_func);
  1667. instr->InsertBefore(newInstr);
  1668. Legalize(newInstr);
  1669. instr->Remove();
  1670. return pInstr;
  1671. }
  1672. IR::Instr* LowererMD::Simd128LowerNotEqual(IR::Instr* instr)
  1673. {
  1674. Assert(instr->m_opcode == Js::OpCode::Simd128_Neq_I4 || instr->m_opcode == Js::OpCode::Simd128_Neq_I8 ||
  1675. instr->m_opcode == Js::OpCode::Simd128_Neq_I16 || instr->m_opcode == Js::OpCode::Simd128_Neq_U4 ||
  1676. instr->m_opcode == Js::OpCode::Simd128_Neq_U8 || instr->m_opcode == Js::OpCode::Simd128_Neq_U16);
  1677. IR::Instr *pInstr;
  1678. IR::Opnd* dst = instr->GetDst();
  1679. IR::Opnd* src1 = instr->GetSrc1();
  1680. IR::Opnd* src2 = instr->GetSrc2();
  1681. Assert(dst->IsRegOpnd() && (dst->IsSimd128B4() || dst->IsSimd128B8() || dst->IsSimd128B16()));
  1682. Assert(src1->IsRegOpnd() && src1->IsSimd128());
  1683. Assert(src2->IsRegOpnd() && src2->IsSimd128());
  1684. Js::OpCode cmpOpcode = Js::OpCode::PCMPEQD;
  1685. if (instr->m_opcode == Js::OpCode::Simd128_Neq_I8 || instr->m_opcode == Js::OpCode::Simd128_Neq_U8)
  1686. {
  1687. cmpOpcode = Js::OpCode::PCMPEQW;
  1688. }
  1689. else if (instr->m_opcode == Js::OpCode::Simd128_Neq_I16 || instr->m_opcode == Js::OpCode::Simd128_Neq_U16)
  1690. {
  1691. cmpOpcode = Js::OpCode::PCMPEQB;
  1692. }
  1693. // dst = PCMPEQD src1, src2
  1694. pInstr = IR::Instr::New(cmpOpcode, dst, src1, src2, m_func);
  1695. instr->InsertBefore(pInstr);
  1696. //MakeDstEquSrc1(pInstr);
  1697. Legalize(pInstr);
  1698. // dst = PANDN dst, X86_ALL_NEG_ONES
  1699. pInstr = IR::Instr::New(Js::OpCode::PANDN, dst, dst, IR::MemRefOpnd::New((void*)&X86_ALL_NEG_ONES, TySimd128I4, m_func), m_func);
  1700. instr->InsertBefore(pInstr);
  1701. //MakeDstEquSrc1(pInstr);
  1702. Legalize(pInstr);
  1703. pInstr = instr->m_prev;
  1704. instr->Remove();
  1705. return pInstr;
  1706. }
  1707. IR::Instr* LowererMD::Simd128LowerLessThan(IR::Instr* instr)
  1708. {
  1709. Assert(instr->m_opcode == Js::OpCode::Simd128_Lt_U4 || instr->m_opcode == Js::OpCode::Simd128_Lt_U8 || instr->m_opcode == Js::OpCode::Simd128_Lt_U16 ||
  1710. instr->m_opcode == Js::OpCode::Simd128_GtEq_U4 || instr->m_opcode == Js::OpCode::Simd128_GtEq_U8 || instr->m_opcode == Js::OpCode::Simd128_GtEq_U16);
  1711. IR::Instr *pInstr;
  1712. IR::Opnd* dst = instr->GetDst();
  1713. IR::Opnd* src1 = instr->GetSrc1();
  1714. IR::Opnd* src2 = instr->GetSrc2();
  1715. Assert(dst->IsRegOpnd() && (dst->IsSimd128B4() || dst->IsSimd128B8() || dst->IsSimd128B16()));
  1716. Assert(src1->IsRegOpnd() && src1->IsSimd128());
  1717. Assert(src2->IsRegOpnd() && src2->IsSimd128());
  1718. IR::RegOpnd* tmpa = IR::RegOpnd::New(src1->GetType(), m_func);
  1719. IR::RegOpnd* tmpb = IR::RegOpnd::New(src1->GetType(), m_func);
  1720. IR::MemRefOpnd* signBits = IR::MemRefOpnd::New((void*)&X86_DWORD_SIGNBITS, TySimd128I4, m_func);
  1721. IR::RegOpnd * mask = IR::RegOpnd::New(TySimd128I4, m_func);
  1722. Js::OpCode cmpOpcode = Js::OpCode::PCMPGTD;
  1723. if (instr->m_opcode == Js::OpCode::Simd128_Lt_U8 || instr->m_opcode == Js::OpCode::Simd128_GtEq_U8)
  1724. {
  1725. cmpOpcode = Js::OpCode::PCMPGTW;
  1726. signBits = IR::MemRefOpnd::New((void*)&X86_WORD_SIGNBITS, TySimd128I4, m_func);
  1727. }
  1728. else if (instr->m_opcode == Js::OpCode::Simd128_Lt_U16 || instr->m_opcode == Js::OpCode::Simd128_GtEq_U16)
  1729. {
  1730. cmpOpcode = Js::OpCode::PCMPGTB;
  1731. signBits = IR::MemRefOpnd::New((void*)&X86_BYTE_SIGNBITS, TySimd128I4, m_func);
  1732. }
  1733. // MOVUPS mask, [signBits]
  1734. pInstr = IR::Instr::New(Js::OpCode::MOVUPS, mask, signBits, m_func);
  1735. instr->InsertBefore(pInstr);
  1736. Legalize(pInstr);
  1737. // tmpa = PXOR src1, signBits
  1738. pInstr = IR::Instr::New(Js::OpCode::PXOR, tmpa, src1, mask, m_func);
  1739. instr->InsertBefore(pInstr);
  1740. Legalize(pInstr);
  1741. // tmpb = PXOR src2, signBits
  1742. pInstr = IR::Instr::New(Js::OpCode::PXOR, tmpb, src2, mask, m_func);
  1743. instr->InsertBefore(pInstr);
  1744. Legalize(pInstr);
  1745. // dst = cmpOpCode tmpb, tmpa (Less than, swapped opnds)
  1746. pInstr = IR::Instr::New(cmpOpcode, dst, tmpb, tmpa, m_func);
  1747. instr->InsertBefore(pInstr);
  1748. Legalize(pInstr);
  1749. if (instr->m_opcode == Js::OpCode::Simd128_GtEq_U4 || instr->m_opcode == Js::OpCode::Simd128_GtEq_U8 || instr->m_opcode == Js::OpCode::Simd128_GtEq_U16)
  1750. {
  1751. // for SIMD unsigned int, greaterThanOrEqual == lessThan + Not
  1752. // dst = PANDN dst, X86_ALL_NEG_ONES
  1753. // MOVUPS mask, [allNegOnes]
  1754. pInstr = IR::Instr::New(Js::OpCode::PANDN, dst, dst, IR::MemRefOpnd::New((void*)&X86_ALL_NEG_ONES, TySimd128I4, m_func), m_func);
  1755. instr->InsertBefore(pInstr);
  1756. Legalize(pInstr);
  1757. }
  1758. pInstr = instr->m_prev;
  1759. instr->Remove();
  1760. return pInstr;
  1761. }
  1762. IR::Instr* LowererMD::Simd128LowerLessThanOrEqual(IR::Instr* instr)
  1763. {
  1764. Assert(instr->m_opcode == Js::OpCode::Simd128_LtEq_I4 || instr->m_opcode == Js::OpCode::Simd128_LtEq_I8 || instr->m_opcode == Js::OpCode::Simd128_LtEq_I16 ||
  1765. instr->m_opcode == Js::OpCode::Simd128_LtEq_U4 || instr->m_opcode == Js::OpCode::Simd128_LtEq_U8 || instr->m_opcode == Js::OpCode::Simd128_LtEq_U16 ||
  1766. instr->m_opcode == Js::OpCode::Simd128_Gt_U4 || instr->m_opcode == Js::OpCode::Simd128_Gt_U8 || instr->m_opcode == Js::OpCode::Simd128_Gt_U16);
  1767. IR::Instr *pInstr;
  1768. IR::Opnd* dst = instr->GetDst();
  1769. IR::Opnd* src1 = instr->GetSrc1();
  1770. IR::Opnd* src2 = instr->GetSrc2();
  1771. Assert(dst->IsRegOpnd() && (dst->IsSimd128B4() || dst->IsSimd128B8() || dst->IsSimd128B16()));
  1772. Assert(src1->IsRegOpnd() && src1->IsSimd128());
  1773. Assert(src2->IsRegOpnd() && src2->IsSimd128());
  1774. IR::RegOpnd* tmpa = IR::RegOpnd::New(src1->GetType(), m_func);
  1775. IR::RegOpnd* tmpb = IR::RegOpnd::New(src1->GetType(), m_func);
  1776. Js::OpCode cmpOpcode = Js::OpCode::PCMPGTD;
  1777. Js::OpCode eqpOpcode = Js::OpCode::PCMPEQD;
  1778. if (instr->m_opcode == Js::OpCode::Simd128_LtEq_I8 || instr->m_opcode == Js::OpCode::Simd128_LtEq_U8 || instr->m_opcode == Js::OpCode::Simd128_Gt_U8)
  1779. {
  1780. cmpOpcode = Js::OpCode::PCMPGTW;
  1781. eqpOpcode = Js::OpCode::PCMPEQW;
  1782. }
  1783. else if (instr->m_opcode == Js::OpCode::Simd128_LtEq_I16 || instr->m_opcode == Js::OpCode::Simd128_LtEq_U16 || instr->m_opcode == Js::OpCode::Simd128_Gt_U16)
  1784. {
  1785. cmpOpcode = Js::OpCode::PCMPGTB;
  1786. eqpOpcode = Js::OpCode::PCMPEQB;
  1787. }
  1788. if (instr->m_opcode == Js::OpCode::Simd128_LtEq_I4)
  1789. {
  1790. // dst = pcmpgtd src1, src2
  1791. pInstr = IR::Instr::New(Js::OpCode::PCMPGTD, dst, src1, src2, m_func);
  1792. instr->InsertBefore(pInstr);
  1793. Legalize(pInstr);
  1794. // dst = pandn dst, xmmword ptr[X86_ALL_NEG_ONES]
  1795. pInstr = IR::Instr::New(Js::OpCode::PANDN, dst, dst, IR::MemRefOpnd::New((void*)&X86_ALL_NEG_ONES, TySimd128I4, m_func), m_func);
  1796. instr->InsertBefore(pInstr);
  1797. Legalize(pInstr);
  1798. }
  1799. else if (instr->m_opcode == Js::OpCode::Simd128_LtEq_I8 || instr->m_opcode == Js::OpCode::Simd128_LtEq_I16)
  1800. {
  1801. // tmpa = pcmpgtw src2, src1 (src1 < src2?) [pcmpgtb]
  1802. pInstr = IR::Instr::New(cmpOpcode, tmpa, src2, src1, m_func);
  1803. instr->InsertBefore(pInstr);
  1804. Legalize(pInstr);
  1805. // tmpb = pcmpeqw src1, src2 [pcmpeqb]
  1806. pInstr = IR::Instr::New(eqpOpcode, tmpb, src1, src2, m_func);
  1807. instr->InsertBefore(pInstr);
  1808. Legalize(pInstr);
  1809. // dst = por tmpa, tmpb
  1810. pInstr = IR::Instr::New(Js::OpCode::POR, dst, tmpa, tmpb, m_func);
  1811. instr->InsertBefore(pInstr);
  1812. Legalize(pInstr);
  1813. }
  1814. else if (instr->m_opcode == Js::OpCode::Simd128_LtEq_U4 || instr->m_opcode == Js::OpCode::Simd128_LtEq_U8 || instr->m_opcode == Js::OpCode::Simd128_LtEq_U16 ||
  1815. instr->m_opcode == Js::OpCode::Simd128_Gt_U4 || instr->m_opcode == Js::OpCode::Simd128_Gt_U8 || instr->m_opcode == Js::OpCode::Simd128_Gt_U16)
  1816. {
  1817. IR::MemRefOpnd* signBits = IR::MemRefOpnd::New((void*)&X86_DWORD_SIGNBITS, TySimd128I4, m_func);
  1818. IR::RegOpnd * mask = IR::RegOpnd::New(TySimd128I4, m_func);
  1819. if (instr->m_opcode == Js::OpCode::Simd128_LtEq_U8 || instr->m_opcode == Js::OpCode::Simd128_Gt_U8)
  1820. {
  1821. signBits = IR::MemRefOpnd::New((void*)&X86_WORD_SIGNBITS, TySimd128I4, m_func);
  1822. }
  1823. else if (instr->m_opcode == Js::OpCode::Simd128_LtEq_U16 || instr->m_opcode == Js::OpCode::Simd128_Gt_U16)
  1824. {
  1825. signBits = IR::MemRefOpnd::New((void*)&X86_BYTE_SIGNBITS, TySimd128I4, m_func);
  1826. }
  1827. // MOVUPS mask, [signBits]
  1828. pInstr = IR::Instr::New(Js::OpCode::MOVUPS, mask, signBits, m_func);
  1829. instr->InsertBefore(pInstr);
  1830. Legalize(pInstr);
  1831. // tmpa = PXOR src1, mask
  1832. pInstr = IR::Instr::New(Js::OpCode::PXOR, tmpa, src1, mask, m_func);
  1833. instr->InsertBefore(pInstr);
  1834. Legalize(pInstr);
  1835. // tmpb = PXOR src2, signBits
  1836. pInstr = IR::Instr::New(Js::OpCode::PXOR, tmpb, src2, mask, m_func);
  1837. instr->InsertBefore(pInstr);
  1838. Legalize(pInstr);
  1839. // dst = cmpOpCode tmpb, tmpa
  1840. pInstr = IR::Instr::New(cmpOpcode, dst, tmpb, tmpa, m_func);
  1841. instr->InsertBefore(pInstr);
  1842. Legalize(pInstr);
  1843. // tmpa = pcmpeqd tmpa, tmpb
  1844. pInstr = IR::Instr::New(eqpOpcode, tmpa, tmpa, tmpb, m_func);
  1845. instr->InsertBefore(pInstr);
  1846. Legalize(pInstr);
  1847. // dst = por dst, tmpa
  1848. pInstr = IR::Instr::New(Js::OpCode::POR, dst, dst, tmpa, m_func);
  1849. instr->InsertBefore(pInstr);
  1850. Legalize(pInstr);
  1851. if (instr->m_opcode == Js::OpCode::Simd128_Gt_U4 || instr->m_opcode == Js::OpCode::Simd128_Gt_U8 || instr->m_opcode == Js::OpCode::Simd128_Gt_U16)
  1852. { // for SIMD unsigned int, greaterThan == lessThanOrEqual + Not
  1853. // dst = PANDN dst, X86_ALL_NEG_ONES
  1854. pInstr = IR::Instr::New(Js::OpCode::PANDN, dst, dst, IR::MemRefOpnd::New((void*)&X86_ALL_NEG_ONES, TySimd128I4, m_func), m_func);
  1855. instr->InsertBefore(pInstr);
  1856. Legalize(pInstr);
  1857. }
  1858. }
  1859. pInstr = instr->m_prev;
  1860. instr->Remove();
  1861. return pInstr;
  1862. }
  1863. IR::Instr* LowererMD::Simd128LowerGreaterThanOrEqual(IR::Instr* instr)
  1864. {
  1865. Assert(instr->m_opcode == Js::OpCode::Simd128_GtEq_I4 || instr->m_opcode == Js::OpCode::Simd128_GtEq_I8 || instr->m_opcode == Js::OpCode::Simd128_GtEq_I16);
  1866. IR::Instr *pInstr;
  1867. IR::Opnd* dst = instr->GetDst();
  1868. IR::Opnd* src1 = instr->GetSrc1();
  1869. IR::Opnd* src2 = instr->GetSrc2();
  1870. Assert(dst->IsRegOpnd() && (dst->IsSimd128B4() || dst->IsSimd128B8() || dst->IsSimd128B16()));
  1871. Assert(src1->IsRegOpnd() && src1->IsSimd128());
  1872. Assert(src2->IsRegOpnd() && src2->IsSimd128());
  1873. if (instr->m_opcode == Js::OpCode::Simd128_GtEq_I4)
  1874. {
  1875. // dst = pcmpgtd src2, src1
  1876. pInstr = IR::Instr::New(Js::OpCode::PCMPGTD, dst, src2, src1, m_func);
  1877. instr->InsertBefore(pInstr);
  1878. Legalize(pInstr);
  1879. // dst = pandn dst, xmmword ptr[X86_ALL_NEG_ONES]
  1880. pInstr = IR::Instr::New(Js::OpCode::PANDN, dst, dst, IR::MemRefOpnd::New((void*)&X86_ALL_NEG_ONES, TySimd128I4, m_func), m_func);
  1881. instr->InsertBefore(pInstr);
  1882. Legalize(pInstr);
  1883. }
  1884. else if (instr->m_opcode == Js::OpCode::Simd128_GtEq_I8 || instr->m_opcode == Js::OpCode::Simd128_GtEq_I16)
  1885. {
  1886. IR::RegOpnd* tmp1 = IR::RegOpnd::New(src1->GetType(), m_func);
  1887. IR::RegOpnd* tmp2 = IR::RegOpnd::New(src1->GetType(), m_func);
  1888. Js::OpCode cmpOpcode = Js::OpCode::PCMPGTW;
  1889. Js::OpCode eqpOpcode = Js::OpCode::PCMPEQW;
  1890. if (instr->m_opcode == Js::OpCode::Simd128_GtEq_I16)
  1891. {
  1892. cmpOpcode = Js::OpCode::PCMPGTB;
  1893. eqpOpcode = Js::OpCode::PCMPEQB;
  1894. }
  1895. // tmp1 = pcmpgtw src1, src2 [pcmpgtb]
  1896. pInstr = IR::Instr::New(cmpOpcode, tmp1, src1, src2, m_func);
  1897. instr->InsertBefore(pInstr);
  1898. Legalize(pInstr);
  1899. // tmp2 = pcmpeqw src1, src2 [pcmpeqw]
  1900. pInstr = IR::Instr::New(eqpOpcode, tmp2, src1, src2, m_func);
  1901. instr->InsertBefore(pInstr);
  1902. Legalize(pInstr);
  1903. // dst = por tmp1, tmp2
  1904. pInstr = IR::Instr::New(Js::OpCode::POR, dst, tmp1, tmp2, m_func);
  1905. instr->InsertBefore(pInstr);
  1906. Legalize(pInstr);
  1907. }
  1908. pInstr = instr->m_prev;
  1909. instr->Remove();
  1910. return pInstr;
  1911. }
  1912. IR::Instr* LowererMD::Simd128LowerMinMax_F4(IR::Instr* instr)
  1913. {
  1914. IR::Instr *pInstr;
  1915. IR::Opnd* dst = instr->GetDst();
  1916. IR::Opnd* src1 = instr->GetSrc1();
  1917. IR::Opnd* src2 = instr->GetSrc2();
  1918. Assert(dst->IsRegOpnd() && dst->IsSimd128());
  1919. Assert(src1->IsRegOpnd() && src1->IsSimd128());
  1920. Assert(src2->IsRegOpnd() && src2->IsSimd128());
  1921. Assert(instr->m_opcode == Js::OpCode::Simd128_Min_F4 || instr->m_opcode == Js::OpCode::Simd128_Max_F4);
  1922. IR::RegOpnd* tmp1 = IR::RegOpnd::New(src1->GetType(), m_func);
  1923. IR::RegOpnd* tmp2 = IR::RegOpnd::New(src2->GetType(), m_func);
  1924. if (instr->m_opcode == Js::OpCode::Simd128_Min_F4)
  1925. {
  1926. pInstr = IR::Instr::New(Js::OpCode::MINPS, tmp1, src1, src2, m_func);
  1927. instr->InsertBefore(pInstr);
  1928. Legalize(pInstr);
  1929. //
  1930. pInstr = IR::Instr::New(Js::OpCode::MINPS, tmp2, src2, src1, m_func);
  1931. instr->InsertBefore(pInstr);
  1932. Legalize(pInstr);
  1933. //
  1934. pInstr = IR::Instr::New(Js::OpCode::ORPS, dst, tmp1, tmp2, m_func);
  1935. instr->InsertBefore(pInstr);
  1936. Legalize(pInstr);
  1937. }
  1938. else
  1939. {
  1940. //This sequence closely mirrors SIMDFloat32x4Operation::OpMax except for
  1941. //the fact that tmp2 (tmpbValue) is reused to reduce the number of registers
  1942. //needed for this sequence.
  1943. pInstr = IR::Instr::New(Js::OpCode::MAXPS, tmp1, src1, src2, m_func);
  1944. instr->InsertBefore(pInstr);
  1945. Legalize(pInstr);
  1946. //
  1947. pInstr = IR::Instr::New(Js::OpCode::MAXPS, tmp2, src2, src1, m_func);
  1948. instr->InsertBefore(pInstr);
  1949. Legalize(pInstr);
  1950. //
  1951. pInstr = IR::Instr::New(Js::OpCode::ANDPS, tmp1, tmp1, tmp2, m_func);
  1952. instr->InsertBefore(pInstr);
  1953. Legalize(pInstr);
  1954. //
  1955. pInstr = IR::Instr::New(Js::OpCode::CMPUNORDPS, tmp2, src1, src2, m_func);
  1956. instr->InsertBefore(pInstr);
  1957. Legalize(pInstr);
  1958. //
  1959. pInstr = IR::Instr::New(Js::OpCode::ORPS, dst, tmp1, tmp2, m_func);
  1960. instr->InsertBefore(pInstr);
  1961. Legalize(pInstr);
  1962. }
  1963. pInstr = instr->m_prev;
  1964. instr->Remove();
  1965. return pInstr;
  1966. }
  1967. IR::Instr* LowererMD::Simd128LowerAnyTrue(IR::Instr* instr)
  1968. {
  1969. Assert(instr->m_opcode == Js::OpCode::Simd128_AnyTrue_B4 || instr->m_opcode == Js::OpCode::Simd128_AnyTrue_B8 ||
  1970. instr->m_opcode == Js::OpCode::Simd128_AnyTrue_B16);
  1971. IR::Instr *pInstr;
  1972. IR::Opnd* dst = instr->GetDst();
  1973. IR::Opnd* src1 = instr->GetSrc1();
  1974. Assert(dst->IsRegOpnd() && dst->IsInt32());
  1975. Assert(src1->IsRegOpnd() && src1->IsSimd128());
  1976. // pmovmskb dst, src1
  1977. // neg dst
  1978. // sbb dst, dst
  1979. // neg dst
  1980. // pmovmskb dst, src1
  1981. pInstr = IR::Instr::New(Js::OpCode::PMOVMSKB, dst, src1, m_func);
  1982. instr->InsertBefore(pInstr);
  1983. Legalize(pInstr);
  1984. // neg dst
  1985. pInstr = IR::Instr::New(Js::OpCode::NEG, dst, dst, m_func);
  1986. instr->InsertBefore(pInstr);
  1987. Legalize(pInstr);
  1988. // sbb dst, dst
  1989. pInstr = IR::Instr::New(Js::OpCode::SBB, dst, dst, dst, m_func);
  1990. instr->InsertBefore(pInstr);
  1991. Legalize(pInstr);
  1992. // neg dst
  1993. pInstr = IR::Instr::New(Js::OpCode::NEG, dst, dst, m_func);
  1994. instr->InsertBefore(pInstr);
  1995. Legalize(pInstr);
  1996. pInstr = instr->m_prev;
  1997. instr->Remove();
  1998. return pInstr;
  1999. }
  2000. IR::Instr* LowererMD::Simd128LowerAllTrue(IR::Instr* instr)
  2001. {
  2002. Assert(instr->m_opcode == Js::OpCode::Simd128_AllTrue_B4 || instr->m_opcode == Js::OpCode::Simd128_AllTrue_B8 ||
  2003. instr->m_opcode == Js::OpCode::Simd128_AllTrue_B16);
  2004. IR::Instr *pInstr;
  2005. IR::Opnd* dst = instr->GetDst();
  2006. IR::Opnd* src1 = instr->GetSrc1();
  2007. Assert(dst->IsRegOpnd() && dst->IsInt32());
  2008. Assert(src1->IsRegOpnd() && src1->IsSimd128());
  2009. IR::RegOpnd * tmp = IR::RegOpnd::New(TyInt8, m_func);
  2010. // pmovmskb dst, src1
  2011. pInstr = IR::Instr::New(Js::OpCode::PMOVMSKB, dst, src1, m_func);
  2012. instr->InsertBefore(pInstr);
  2013. // cmp dst, 0FFFFh
  2014. pInstr = IR::Instr::New(Js::OpCode::CMP, m_func);
  2015. pInstr->SetSrc1(dst);
  2016. pInstr->SetSrc2(IR::IntConstOpnd::New(0x0FFFF, TyInt32, m_func, true));
  2017. instr->InsertBefore(pInstr);
  2018. Legalize(pInstr);
  2019. // mov tmp(TyInt8), dst
  2020. pInstr = IR::Instr::New(Js::OpCode::MOV, tmp, dst, m_func);
  2021. instr->InsertBefore(pInstr);
  2022. Legalize(pInstr);
  2023. // sete tmp(TyInt8)
  2024. pInstr = IR::Instr::New(Js::OpCode::SETE, tmp, tmp, m_func);
  2025. instr->InsertBefore(pInstr);
  2026. Legalize(pInstr);
  2027. // movsx dst, dst(TyInt8)
  2028. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVSX, dst, tmp, m_func));
  2029. pInstr = instr->m_prev;
  2030. instr->Remove();
  2031. return pInstr;
  2032. }
  2033. IR::Instr* LowererMD::Simd128LowerInt32x4FromFloat32x4(IR::Instr *instr)
  2034. {
  2035. IR::Opnd *dst, *src, *tmp, *tmp2, *mask1, *mask2;
  2036. IR::Instr *insertInstr, *pInstr, *newInstr;
  2037. IR::LabelInstr *doneLabel;
  2038. dst = instr->GetDst();
  2039. src = instr->GetSrc1();
  2040. Assert(dst != nullptr && src != nullptr && dst->IsSimd128() && src->IsSimd128());
  2041. // CVTTPS2DQ dst, src
  2042. instr->m_opcode = Js::OpCode::CVTTPS2DQ;
  2043. insertInstr = instr->m_next;
  2044. pInstr = instr->m_prev;
  2045. doneLabel = IR::LabelInstr::New(Js::OpCode::Label, this->m_func);
  2046. mask1 = IR::RegOpnd::New(TyInt32, m_func);
  2047. mask2 = IR::RegOpnd::New(TyInt32, m_func);
  2048. // bound checks
  2049. // check if any value is potentially out of range (0x80000000 in output)
  2050. // PCMPEQD tmp, dst, X86_NEG_MASK (0x80000000)
  2051. // MOVMSKPS mask1, tmp
  2052. // CMP mask1, 0
  2053. // JNE $doneLabel
  2054. tmp = IR::RegOpnd::New(TySimd128I4, m_func);
  2055. tmp2 = IR::RegOpnd::New(TySimd128I4, m_func);
  2056. newInstr = IR::Instr::New(Js::OpCode::MOVAPS, tmp2, IR::MemRefOpnd::New((void*)&X86_NEG_MASK_F4, TySimd128I4, m_func), m_func);
  2057. insertInstr->InsertBefore(newInstr);
  2058. Legalize(newInstr);
  2059. newInstr = IR::Instr::New(Js::OpCode::PCMPEQD, tmp, dst, tmp2, m_func);
  2060. insertInstr->InsertBefore(newInstr);
  2061. Legalize(newInstr);
  2062. insertInstr->InsertBefore(IR::Instr::New(Js::OpCode::MOVMSKPS, mask1, tmp, m_func));
  2063. newInstr = IR::Instr::New(Js::OpCode::CMP, m_func);
  2064. newInstr->SetSrc1(mask1);
  2065. newInstr->SetSrc2(IR::IntConstOpnd::New(0, TyInt32, m_func));
  2066. insertInstr->InsertBefore(newInstr);
  2067. insertInstr->InsertBefore(IR::BranchInstr::New(Js::OpCode::JEQ, doneLabel, m_func));
  2068. // we have potential out of bound. check bounds
  2069. // MOVAPS tmp2, X86_TWO_31_F4 (0x4f000000)
  2070. // CMPLEPS tmp, tmp2, src
  2071. // MOVMSKPS mask1, tmp
  2072. // MOVAPS tmp2, X86_NEG_TWO_31_F4 (0xcf000000)
  2073. // CMPLTPS tmp, src, tmp2
  2074. // MOVMSKPS mask2, tmp
  2075. // OR mask1, mask1, mask2
  2076. // CMP mask1, 0
  2077. // JNE $doneLabel
  2078. newInstr = IR::Instr::New(Js::OpCode::MOVAPS, tmp2, IR::MemRefOpnd::New((void*)&X86_TWO_31_F4, TySimd128I4, m_func), m_func);
  2079. insertInstr->InsertBefore(newInstr);
  2080. Legalize(newInstr);
  2081. newInstr = IR::Instr::New(Js::OpCode::CMPLEPS, tmp, tmp2, src, m_func);
  2082. insertInstr->InsertBefore(newInstr);
  2083. Legalize(newInstr);
  2084. insertInstr->InsertBefore(IR::Instr::New(Js::OpCode::MOVMSKPS, mask1, tmp, m_func));
  2085. newInstr = IR::Instr::New(Js::OpCode::MOVAPS, tmp2, IR::MemRefOpnd::New((void*)&X86_NEG_TWO_31_F4, TySimd128I4, m_func), m_func);
  2086. insertInstr->InsertBefore(newInstr);
  2087. Legalize(newInstr);
  2088. newInstr = IR::Instr::New(Js::OpCode::CMPLTPS, tmp, src, tmp2, m_func);
  2089. insertInstr->InsertBefore(newInstr);
  2090. Legalize(newInstr);
  2091. insertInstr->InsertBefore(IR::Instr::New(Js::OpCode::MOVMSKPS, mask2, tmp, m_func));
  2092. insertInstr->InsertBefore(IR::Instr::New(Js::OpCode::OR, mask1, mask1, mask2, m_func));
  2093. newInstr = IR::Instr::New(Js::OpCode::CMP, m_func);
  2094. newInstr->SetSrc1(mask1);
  2095. newInstr->SetSrc2(IR::IntConstOpnd::New(0, TyInt32, m_func));
  2096. insertInstr->InsertBefore(newInstr);
  2097. insertInstr->InsertBefore(IR::BranchInstr::New(Js::OpCode::JEQ, doneLabel, m_func));
  2098. // throw range error
  2099. m_lowerer->GenerateRuntimeError(insertInstr, JSERR_ArgumentOutOfRange, IR::HelperOp_RuntimeRangeError);
  2100. insertInstr->InsertBefore(doneLabel);
  2101. return pInstr;
  2102. }
  2103. IR::Instr* LowererMD::Simd128LowerUint32x4FromFloat32x4(IR::Instr *instr)
  2104. {
  2105. IR::Opnd *dst, *src, *tmp, *tmp2, *two_31_f4_mask, *two_31_i4_mask, *mask;
  2106. IR::Instr *pInstr, *newInstr;
  2107. IR::LabelInstr *doneLabel, *throwLabel;
  2108. dst = instr->GetDst();
  2109. src = instr->GetSrc1();
  2110. Assert(dst != nullptr && src != nullptr && dst->IsSimd128() && src->IsSimd128());
  2111. doneLabel = IR::LabelInstr::New(Js::OpCode::Label, this->m_func);
  2112. throwLabel = IR::LabelInstr::New(Js::OpCode::Label, this->m_func, true);
  2113. pInstr = instr->m_prev;
  2114. mask = IR::RegOpnd::New(TyInt32, m_func);
  2115. two_31_f4_mask = IR::RegOpnd::New(TySimd128F4, m_func);
  2116. two_31_i4_mask = IR::RegOpnd::New(TySimd128I4, m_func);
  2117. tmp = IR::RegOpnd::New(TySimd128F4, m_func);
  2118. tmp2 = IR::RegOpnd::New(TySimd128F4, m_func);
  2119. // any lanes <= -1.0 ?
  2120. // CMPLEPS tmp, src, [X86_ALL_FLOAT32_NEG_ONES]
  2121. // MOVMSKPS mask, tmp
  2122. // CMP mask, 0
  2123. // JNE $throwLabel
  2124. newInstr = IR::Instr::New(Js::OpCode::CMPLEPS, tmp, src, IR::MemRefOpnd::New((void*)&X86_ALL_NEG_ONES_F4, TySimd128I4, m_func), m_func);
  2125. instr->InsertBefore(newInstr);
  2126. Legalize(newInstr);
  2127. newInstr = IR::Instr::New(Js::OpCode::MOVMSKPS, mask, tmp, m_func);
  2128. instr->InsertBefore(newInstr);
  2129. Legalize(newInstr);
  2130. newInstr = IR::Instr::New(Js::OpCode::CMP, m_func);
  2131. newInstr->SetSrc1(mask);
  2132. newInstr->SetSrc2(IR::IntConstOpnd::New(0, TyInt32, m_func));
  2133. instr->InsertBefore(newInstr);
  2134. Legalize(newInstr);
  2135. instr->InsertBefore(IR::BranchInstr::New(Js::OpCode::JNE, throwLabel, m_func));
  2136. // CVTTPS2DQ does a range check over signed range [-2^31, 2^31-1], so will fail to convert values >= 2^31.
  2137. // To fix this, subtract 2^31 from values >= 2^31, do CVTTPS2DQ, then add 2^31 back.
  2138. // MOVAPS two_31_f4_mask, [X86_TWO_31]
  2139. // CMPLEPS tmp2, two_31_mask, src
  2140. // ANDPS two_31_f4_mask, tmp2 // tmp has f32(2^31) for lanes >= 2^31, 0 otherwise
  2141. // SUBPS tmp2, two_31_f4_mask // subtract 2^31 from lanes >= 2^31, unchanged otherwise.
  2142. // CVTTPS2DQ dst, tmp2
  2143. newInstr = IR::Instr::New(Js::OpCode::MOVAPS, two_31_f4_mask, IR::MemRefOpnd::New((void*)&X86_TWO_31_F4, TySimd128F4, m_func), m_func);
  2144. instr->InsertBefore(newInstr);
  2145. Legalize(newInstr);
  2146. newInstr = IR::Instr::New(Js::OpCode::CMPLEPS, tmp2, two_31_f4_mask, src, m_func);
  2147. instr->InsertBefore(newInstr);
  2148. Legalize(newInstr);
  2149. newInstr = IR::Instr::New(Js::OpCode::ANDPS, two_31_f4_mask, two_31_f4_mask, tmp2, m_func);
  2150. instr->InsertBefore(newInstr);
  2151. Legalize(newInstr);
  2152. newInstr = IR::Instr::New(Js::OpCode::SUBPS, tmp2, src, two_31_f4_mask, m_func);
  2153. instr->InsertBefore(newInstr);
  2154. Legalize(newInstr);
  2155. newInstr = IR::Instr::New(Js::OpCode::CVTTPS2DQ, dst, tmp2, m_func);
  2156. instr->InsertBefore(newInstr);
  2157. Legalize(newInstr);
  2158. // check if any value is out of range (i.e. >= 2^31, meaning originally >= 2^32 before value adjustment)
  2159. // PCMPEQD tmp, dst, [X86_NEG_MASK]
  2160. // MOVMSKPS mask, tmp
  2161. // CMP mask, 0
  2162. // JNE $throwLabel
  2163. newInstr = IR::Instr::New(Js::OpCode::PCMPEQD, tmp, dst, IR::MemRefOpnd::New((void*)&X86_NEG_MASK_F4, TySimd128I4, m_func), m_func);
  2164. instr->InsertBefore(newInstr);
  2165. Legalize(newInstr);
  2166. newInstr = IR::Instr::New(Js::OpCode::MOVMSKPS, mask, tmp, m_func);
  2167. instr->InsertBefore(newInstr);
  2168. Legalize(newInstr);
  2169. newInstr = IR::Instr::New(Js::OpCode::CMP, m_func);
  2170. newInstr->SetSrc1(mask);
  2171. newInstr->SetSrc2(IR::IntConstOpnd::New(0, TyInt32, m_func));
  2172. instr->InsertBefore(newInstr);
  2173. Legalize(newInstr);
  2174. instr->InsertBefore(IR::BranchInstr::New(Js::OpCode::JNE, throwLabel, m_func));
  2175. // we pass range checks
  2176. // add i4(2^31) values back to adjusted values.
  2177. // Use first bit from the 2^31 float mask (0x4f000...0 << 1)
  2178. // and AND with 2^31 int mask (0x8000..0) setting first bit to zero if lane hasn't been adjusted
  2179. // MOVAPS two_31_i4_mask, [X86_TWO_31_I4]
  2180. // PSLLD two_31_f4_mask, 1
  2181. // ANDPS two_31_i4_mask, two_31_f4_mask
  2182. // PADDD dst, dst, two_31_i4_mask
  2183. // JMP $doneLabel
  2184. newInstr = IR::Instr::New(Js::OpCode::MOVAPS, two_31_i4_mask, IR::MemRefOpnd::New((void*)&X86_TWO_31_I4, TySimd128I4, m_func), m_func);
  2185. instr->InsertBefore(newInstr);
  2186. Legalize(newInstr);
  2187. newInstr = IR::Instr::New(Js::OpCode::PSLLD, two_31_f4_mask, two_31_f4_mask, IR::IntConstOpnd::New(1, TyInt8, m_func), m_func);
  2188. instr->InsertBefore(newInstr);
  2189. Legalize(newInstr);
  2190. newInstr = IR::Instr::New(Js::OpCode::ANDPS, two_31_i4_mask, two_31_i4_mask, two_31_f4_mask, m_func);
  2191. instr->InsertBefore(newInstr);
  2192. Legalize(newInstr);
  2193. newInstr = IR::Instr::New(Js::OpCode::PADDD, dst, dst, two_31_i4_mask, m_func);
  2194. instr->InsertBefore(newInstr);
  2195. Legalize(newInstr);
  2196. instr->InsertBefore(IR::BranchInstr::New(Js::OpCode::JMP, doneLabel, m_func));
  2197. // throwLabel:
  2198. // Throw Range Error
  2199. instr->InsertBefore(throwLabel);
  2200. m_lowerer->GenerateRuntimeError(instr, JSERR_ArgumentOutOfRange, IR::HelperOp_RuntimeRangeError);
  2201. // doneLabe:
  2202. instr->InsertBefore(doneLabel);
  2203. instr->Remove();
  2204. return pInstr;
  2205. }
  2206. IR::Instr* LowererMD::Simd128LowerFloat32x4FromUint32x4(IR::Instr *instr)
  2207. {
  2208. IR::Opnd *dst, *src, *tmp, *zero;
  2209. IR::Instr *pInstr, *newInstr;
  2210. dst = instr->GetDst();
  2211. src = instr->GetSrc1();
  2212. Assert(dst != nullptr && src != nullptr && dst->IsSimd128() && src->IsSimd128());
  2213. pInstr = instr->m_prev;
  2214. zero = IR::RegOpnd::New(TySimd128I4, m_func);
  2215. tmp = IR::RegOpnd::New(TySimd128I4, m_func);
  2216. // find unsigned values above 2^31-1. Comparison is signed, so look for values < 0
  2217. // MOVAPS zero, [X86_ALL_ZEROS]
  2218. newInstr = IR::Instr::New(Js::OpCode::MOVAPS, zero, IR::MemRefOpnd::New((void*)&X86_ALL_ZEROS, TySimd128I4, m_func), m_func);
  2219. instr->InsertBefore(newInstr);
  2220. Legalize(newInstr);
  2221. // tmp = PCMPGTD zero, src
  2222. newInstr = IR::Instr::New(Js::OpCode::PCMPGTD, tmp, zero, src, m_func);
  2223. instr->InsertBefore(newInstr);
  2224. Legalize(newInstr);
  2225. // temp1 has f32(2^32) for unsigned values above 2^31, 0 otherwise
  2226. // ANDPS tmp, tmp, [X86_TWO_32_F4]
  2227. newInstr = IR::Instr::New(Js::OpCode::ANDPS, tmp, tmp, IR::MemRefOpnd::New((void*)&X86_TWO_32_F4, TySimd128F4, m_func), m_func);
  2228. instr->InsertBefore(newInstr);
  2229. Legalize(newInstr);
  2230. // convert
  2231. // dst = CVTDQ2PS src
  2232. newInstr = IR::Instr::New(Js::OpCode::CVTDQ2PS, dst, src, m_func);
  2233. instr->InsertBefore(newInstr);
  2234. Legalize(newInstr);
  2235. // Add f32(2^32) to negative values
  2236. // ADDPS dst, dst, tmp
  2237. newInstr = IR::Instr::New(Js::OpCode::ADDPS, dst, dst, tmp, m_func);
  2238. instr->InsertBefore(newInstr);
  2239. Legalize(newInstr);
  2240. instr->Remove();
  2241. return pInstr;
  2242. }
  2243. IR::Instr* LowererMD::Simd128AsmJsLowerLoadElem(IR::Instr *instr)
  2244. {
  2245. Assert(instr->m_opcode == Js::OpCode::Simd128_LdArr_I4 ||
  2246. instr->m_opcode == Js::OpCode::Simd128_LdArr_I8 ||
  2247. instr->m_opcode == Js::OpCode::Simd128_LdArr_I16 ||
  2248. instr->m_opcode == Js::OpCode::Simd128_LdArr_U4 ||
  2249. instr->m_opcode == Js::OpCode::Simd128_LdArr_U8 ||
  2250. instr->m_opcode == Js::OpCode::Simd128_LdArr_U16 ||
  2251. instr->m_opcode == Js::OpCode::Simd128_LdArr_F4 ||
  2252. //instr->m_opcode == Js::OpCode::Simd128_LdArr_D2 ||
  2253. instr->m_opcode == Js::OpCode::Simd128_LdArrConst_I4 ||
  2254. instr->m_opcode == Js::OpCode::Simd128_LdArrConst_I8 ||
  2255. instr->m_opcode == Js::OpCode::Simd128_LdArrConst_I16 ||
  2256. instr->m_opcode == Js::OpCode::Simd128_LdArrConst_U4 ||
  2257. instr->m_opcode == Js::OpCode::Simd128_LdArrConst_U8 ||
  2258. instr->m_opcode == Js::OpCode::Simd128_LdArrConst_U16 ||
  2259. instr->m_opcode == Js::OpCode::Simd128_LdArrConst_F4
  2260. //instr->m_opcode == Js::OpCode::Simd128_LdArrConst_D2
  2261. );
  2262. IR::Instr * instrPrev = instr->m_prev;
  2263. IR::RegOpnd * indexOpnd = instr->GetSrc1()->AsIndirOpnd()->GetIndexOpnd();
  2264. IR::RegOpnd * baseOpnd = instr->GetSrc1()->AsIndirOpnd()->GetBaseOpnd();
  2265. IR::Opnd * dst = instr->GetDst();
  2266. IR::Opnd * src1 = instr->GetSrc1();
  2267. IR::Opnd * src2 = instr->GetSrc2();
  2268. ValueType arrType = baseOpnd->GetValueType();
  2269. uint8 dataWidth = instr->dataWidth;
  2270. // Type-specialized.
  2271. Assert(dst->IsSimd128() && src1->IsSimd128() && src2->GetType() == TyUint32);
  2272. IR::Instr * done;
  2273. if (indexOpnd || (((uint32)src1->AsIndirOpnd()->GetOffset() + dataWidth) > 0x1000000 /* 16 MB */))
  2274. {
  2275. uint32 bpe = Simd128GetTypedArrBytesPerElem(arrType);
  2276. // bound check and helper
  2277. done = this->lowererMDArch.LowerAsmJsLdElemHelper(instr, true, bpe != dataWidth);
  2278. }
  2279. else
  2280. {
  2281. // Reaching here means:
  2282. // We have a constant index, and either
  2283. // (1) constant heap or (2) variable heap with constant index < 16MB.
  2284. // Case (1) requires static bound check. Case (2) means we are always in bound.
  2285. // this can happen in cases where globopt props a constant access which was not known at bytecodegen time or when heap is non-constant
  2286. if (src2->IsIntConstOpnd() && ((uint32)src1->AsIndirOpnd()->GetOffset() + dataWidth > src2->AsIntConstOpnd()->AsUint32()))
  2287. {
  2288. m_lowerer->GenerateRuntimeError(instr, JSERR_ArgumentOutOfRange, IR::HelperOp_RuntimeRangeError);
  2289. instr->Remove();
  2290. return instrPrev;
  2291. }
  2292. done = instr;
  2293. }
  2294. return Simd128ConvertToLoad(dst, src1, dataWidth, instr);
  2295. }
  2296. IR::Instr* LowererMD::Simd128LowerLoadElem(IR::Instr *instr)
  2297. {
  2298. Assert(!m_func->m_workItem->GetFunctionBody()->GetIsAsmjsMode());
  2299. Assert(
  2300. instr->m_opcode == Js::OpCode::Simd128_LdArr_I4 ||
  2301. instr->m_opcode == Js::OpCode::Simd128_LdArr_I8 ||
  2302. instr->m_opcode == Js::OpCode::Simd128_LdArr_I16 ||
  2303. instr->m_opcode == Js::OpCode::Simd128_LdArr_U4 ||
  2304. instr->m_opcode == Js::OpCode::Simd128_LdArr_U8 ||
  2305. instr->m_opcode == Js::OpCode::Simd128_LdArr_U16 ||
  2306. instr->m_opcode == Js::OpCode::Simd128_LdArr_F4
  2307. );
  2308. IR::Opnd * src = instr->GetSrc1();
  2309. IR::RegOpnd * indexOpnd =src->AsIndirOpnd()->GetIndexOpnd();
  2310. IR::Opnd * dst = instr->GetDst();
  2311. ValueType arrType = src->AsIndirOpnd()->GetBaseOpnd()->GetValueType();
  2312. // If we type-specialized, then array is a definite typed-array.
  2313. Assert(arrType.IsObject() && arrType.IsTypedArray());
  2314. Simd128GenerateUpperBoundCheck(indexOpnd, src->AsIndirOpnd(), arrType, instr);
  2315. Simd128LoadHeadSegment(src->AsIndirOpnd(), arrType, instr);
  2316. return Simd128ConvertToLoad(dst, src, instr->dataWidth, instr, m_lowerer->GetArrayIndirScale(arrType) /* scale factor */);
  2317. }
  2318. IR::Instr *
  2319. LowererMD::Simd128ConvertToLoad(IR::Opnd *dst, IR::Opnd *src, uint8 dataWidth, IR::Instr* instr, byte scaleFactor /* = 0*/)
  2320. {
  2321. IR::Instr *newInstr = nullptr;
  2322. IR::Instr * instrPrev = instr->m_prev;
  2323. // Type-specialized.
  2324. Assert(dst && dst->IsSimd128());
  2325. Assert(src->IsIndirOpnd());
  2326. if (scaleFactor > 0)
  2327. {
  2328. // needed only for non-Asmjs code
  2329. Assert(!m_func->m_workItem->GetFunctionBody()->GetIsAsmjsMode());
  2330. src->AsIndirOpnd()->SetScale(scaleFactor);
  2331. }
  2332. switch (dataWidth)
  2333. {
  2334. case 16:
  2335. // MOVUPS dst, src1([arrayBuffer + indexOpnd])
  2336. newInstr = IR::Instr::New(LowererMDArch::GetAssignOp(src->GetType()), dst, src, instr->m_func);
  2337. instr->InsertBefore(newInstr);
  2338. Legalize(newInstr);
  2339. break;
  2340. case 12:
  2341. {
  2342. IR::RegOpnd *temp = IR::RegOpnd::New(src->GetType(), instr->m_func);
  2343. // MOVSD dst, src1([arrayBuffer + indexOpnd])
  2344. newInstr = IR::Instr::New(Js::OpCode::MOVSD, dst, src, instr->m_func);
  2345. instr->InsertBefore(newInstr);
  2346. Legalize(newInstr);
  2347. // MOVSS temp, src1([arrayBuffer + indexOpnd + 8])
  2348. newInstr = IR::Instr::New(Js::OpCode::MOVSS, temp, src, instr->m_func);
  2349. instr->InsertBefore(newInstr);
  2350. newInstr->GetSrc1()->AsIndirOpnd()->SetOffset(src->AsIndirOpnd()->GetOffset() + 8, true);
  2351. Legalize(newInstr);
  2352. // PSLLDQ temp, 0x08
  2353. instr->InsertBefore(IR::Instr::New(Js::OpCode::PSLLDQ, temp, temp, IR::IntConstOpnd::New(8, TyInt8, instr->m_func, true), instr->m_func));
  2354. // ORPS dst, temp
  2355. newInstr = IR::Instr::New(Js::OpCode::ORPS, dst, dst, temp, instr->m_func);
  2356. instr->InsertBefore(newInstr);
  2357. Legalize(newInstr);
  2358. break;
  2359. }
  2360. case 8:
  2361. // MOVSD dst, src1([arrayBuffer + indexOpnd])
  2362. newInstr = IR::Instr::New(Js::OpCode::MOVSD, dst, src, instr->m_func);
  2363. instr->InsertBefore(newInstr);
  2364. Legalize(newInstr);
  2365. break;
  2366. case 4:
  2367. // MOVSS dst, src1([arrayBuffer + indexOpnd])
  2368. newInstr = IR::Instr::New(Js::OpCode::MOVSS, dst, src, instr->m_func);
  2369. instr->InsertBefore(newInstr);
  2370. Legalize(newInstr);
  2371. break;
  2372. default:
  2373. Assume(UNREACHED);
  2374. }
  2375. instr->Remove();
  2376. return instrPrev;
  2377. }
  2378. IR::Instr*
  2379. LowererMD::Simd128AsmJsLowerStoreElem(IR::Instr *instr)
  2380. {
  2381. Assert(
  2382. instr->m_opcode == Js::OpCode::Simd128_StArr_I4 ||
  2383. instr->m_opcode == Js::OpCode::Simd128_StArr_I8 ||
  2384. instr->m_opcode == Js::OpCode::Simd128_StArr_I16 ||
  2385. instr->m_opcode == Js::OpCode::Simd128_StArr_U4 ||
  2386. instr->m_opcode == Js::OpCode::Simd128_StArr_U8 ||
  2387. instr->m_opcode == Js::OpCode::Simd128_StArr_U16 ||
  2388. instr->m_opcode == Js::OpCode::Simd128_StArr_F4 ||
  2389. //instr->m_opcode == Js::OpCode::Simd128_StArr_D2 ||
  2390. instr->m_opcode == Js::OpCode::Simd128_StArrConst_I4 ||
  2391. instr->m_opcode == Js::OpCode::Simd128_StArrConst_I8 ||
  2392. instr->m_opcode == Js::OpCode::Simd128_StArrConst_I16 ||
  2393. instr->m_opcode == Js::OpCode::Simd128_StArrConst_U4 ||
  2394. instr->m_opcode == Js::OpCode::Simd128_StArrConst_U8 ||
  2395. instr->m_opcode == Js::OpCode::Simd128_StArrConst_U16 ||
  2396. instr->m_opcode == Js::OpCode::Simd128_StArrConst_U4 ||
  2397. instr->m_opcode == Js::OpCode::Simd128_StArrConst_F4
  2398. //instr->m_opcode == Js::OpCode::Simd128_StArrConst_D2
  2399. );
  2400. IR::Instr * instrPrev = instr->m_prev;
  2401. IR::RegOpnd * indexOpnd = instr->GetDst()->AsIndirOpnd()->GetIndexOpnd();
  2402. IR::RegOpnd * baseOpnd = instr->GetDst()->AsIndirOpnd()->GetBaseOpnd();
  2403. IR::Opnd * dst = instr->GetDst();
  2404. IR::Opnd * src1 = instr->GetSrc1();
  2405. IR::Opnd * src2 = instr->GetSrc2();
  2406. ValueType arrType = baseOpnd->GetValueType();
  2407. uint8 dataWidth = instr->dataWidth;
  2408. // Type-specialized.
  2409. Assert(dst->IsSimd128() && src1->IsSimd128() && src2->GetType() == TyUint32);
  2410. IR::Instr * done;
  2411. if (indexOpnd || ((uint32)dst->AsIndirOpnd()->GetOffset() + dataWidth > 0x1000000))
  2412. {
  2413. // CMP indexOpnd, src2(arrSize)
  2414. // JA $helper
  2415. // JMP $store
  2416. // $helper:
  2417. // Throw RangeError
  2418. // JMP $done
  2419. // $store:
  2420. // MOV dst([arrayBuffer + indexOpnd]), src1
  2421. // $done:
  2422. uint32 bpe = Simd128GetTypedArrBytesPerElem(arrType);
  2423. done = this->lowererMDArch.LowerAsmJsStElemHelper(instr, true, bpe != dataWidth);
  2424. }
  2425. else
  2426. {
  2427. // we might have a constant index if globopt propped a constant store. we can ahead of time check if it is in-bounds
  2428. if (src2->IsIntConstOpnd() && ((uint32)dst->AsIndirOpnd()->GetOffset() + dataWidth > src2->AsIntConstOpnd()->AsUint32()))
  2429. {
  2430. m_lowerer->GenerateRuntimeError(instr, JSERR_ArgumentOutOfRange, IR::HelperOp_RuntimeRangeError);
  2431. instr->Remove();
  2432. return instrPrev;
  2433. }
  2434. done = instr;
  2435. }
  2436. return Simd128ConvertToStore(dst, src1, dataWidth, instr);
  2437. }
  2438. IR::Instr*
  2439. LowererMD::Simd128LowerStoreElem(IR::Instr *instr)
  2440. {
  2441. Assert(!m_func->m_workItem->GetFunctionBody()->GetIsAsmjsMode());
  2442. Assert(
  2443. instr->m_opcode == Js::OpCode::Simd128_StArr_I4 ||
  2444. instr->m_opcode == Js::OpCode::Simd128_StArr_I8 ||
  2445. instr->m_opcode == Js::OpCode::Simd128_StArr_I16 ||
  2446. instr->m_opcode == Js::OpCode::Simd128_StArr_U4 ||
  2447. instr->m_opcode == Js::OpCode::Simd128_StArr_U8 ||
  2448. instr->m_opcode == Js::OpCode::Simd128_StArr_U16 ||
  2449. instr->m_opcode == Js::OpCode::Simd128_StArr_F4
  2450. );
  2451. IR::Opnd * dst = instr->GetDst();
  2452. IR::RegOpnd * indexOpnd = dst->AsIndirOpnd()->GetIndexOpnd();
  2453. IR::Opnd * src1 = instr->GetSrc1();
  2454. uint8 dataWidth = instr->dataWidth;
  2455. ValueType arrType = dst->AsIndirOpnd()->GetBaseOpnd()->GetValueType();
  2456. // If we type-specialized, then array is a definite type-array.
  2457. Assert(arrType.IsObject() && arrType.IsTypedArray());
  2458. Simd128GenerateUpperBoundCheck(indexOpnd, dst->AsIndirOpnd(), arrType, instr);
  2459. Simd128LoadHeadSegment(dst->AsIndirOpnd(), arrType, instr);
  2460. return Simd128ConvertToStore(dst, src1, dataWidth, instr, m_lowerer->GetArrayIndirScale(arrType) /*scale factor*/);
  2461. }
  2462. IR::Instr *
  2463. LowererMD::Simd128ConvertToStore(IR::Opnd *dst, IR::Opnd *src1, uint8 dataWidth, IR::Instr* instr, byte scaleFactor /* = 0 */)
  2464. {
  2465. IR::Instr * instrPrev = instr->m_prev;
  2466. Assert(src1 && src1->IsSimd128());
  2467. Assert(dst->IsIndirOpnd());
  2468. if (scaleFactor > 0)
  2469. {
  2470. // needed only for non-Asmjs code
  2471. Assert(!m_func->m_workItem->GetFunctionBody()->GetIsAsmjsMode());
  2472. dst->AsIndirOpnd()->SetScale(scaleFactor);
  2473. }
  2474. switch (dataWidth)
  2475. {
  2476. case 16:
  2477. // MOVUPS dst([arrayBuffer + indexOpnd]), src1
  2478. instr->InsertBefore(IR::Instr::New(LowererMDArch::GetAssignOp(src1->GetType()), dst, src1, instr->m_func));
  2479. break;
  2480. case 12:
  2481. {
  2482. IR::RegOpnd *temp = IR::RegOpnd::New(src1->GetType(), instr->m_func);
  2483. IR::Instr *movss;
  2484. // MOVAPS temp, src
  2485. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVAPS, temp, src1, instr->m_func));
  2486. // MOVSD dst([arrayBuffer + indexOpnd]), temp
  2487. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVSD, dst, temp, instr->m_func));
  2488. // PSRLDQ temp, 0x08
  2489. instr->InsertBefore(IR::Instr::New(Js::OpCode::PSRLDQ, temp, temp, IR::IntConstOpnd::New(8, TyInt8, m_func, true), instr->m_func));
  2490. // MOVSS dst([arrayBuffer + indexOpnd + 8]), temp
  2491. movss = IR::Instr::New(Js::OpCode::MOVSS, dst, temp, instr->m_func);
  2492. instr->InsertBefore(movss);
  2493. movss->GetDst()->AsIndirOpnd()->SetOffset(dst->AsIndirOpnd()->GetOffset() + 8, true);
  2494. break;
  2495. }
  2496. case 8:
  2497. // MOVSD dst([arrayBuffer + indexOpnd]), src1
  2498. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVSD, dst, src1, instr->m_func));
  2499. break;
  2500. case 4:
  2501. // MOVSS dst([arrayBuffer + indexOpnd]), src1
  2502. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVSS, dst, src1, instr->m_func));
  2503. break;
  2504. default:;
  2505. Assume(UNREACHED);
  2506. }
  2507. instr->Remove();
  2508. return instrPrev;
  2509. }
  2510. void
  2511. LowererMD::Simd128GenerateUpperBoundCheck(IR::RegOpnd *indexOpnd, IR::IndirOpnd *indirOpnd, ValueType arrType, IR::Instr *instr)
  2512. {
  2513. Assert(!m_func->m_workItem->GetFunctionBody()->GetIsAsmjsMode());
  2514. IR::ArrayRegOpnd *arrayRegOpnd = indirOpnd->GetBaseOpnd()->AsArrayRegOpnd();
  2515. IR::Opnd* headSegmentLengthOpnd;
  2516. if (arrayRegOpnd->EliminatedUpperBoundCheck())
  2517. {
  2518. // already eliminated or extracted by globOpt (OptArraySrc). Nothing to do.
  2519. return;
  2520. }
  2521. if (arrayRegOpnd->HeadSegmentLengthSym())
  2522. {
  2523. headSegmentLengthOpnd = IR::RegOpnd::New(arrayRegOpnd->HeadSegmentLengthSym(), TyUint32, m_func);
  2524. }
  2525. else
  2526. {
  2527. // (headSegmentLength = [base + offset(length)])
  2528. int lengthOffset;
  2529. lengthOffset = m_lowerer->GetArrayOffsetOfLength(arrType);
  2530. headSegmentLengthOpnd = IR::IndirOpnd::New(arrayRegOpnd, lengthOffset, TyUint32, m_func);
  2531. }
  2532. IR::LabelInstr * skipLabel = Lowerer::InsertLabel(false, instr);
  2533. int32 elemCount = Lowerer::SimdGetElementCountFromBytes(arrayRegOpnd->GetValueType(), instr->dataWidth);
  2534. if (indexOpnd)
  2535. {
  2536. // MOV tmp, elemCount
  2537. // ADD tmp, index
  2538. // CMP tmp, Length -- upper bound check
  2539. // JBE $storeLabel
  2540. // Throw RuntimeError
  2541. // skipLabel:
  2542. IR::RegOpnd *tmp = IR::RegOpnd::New(indexOpnd->GetType(), m_func);
  2543. IR::IntConstOpnd *elemCountOpnd = IR::IntConstOpnd::New(elemCount, TyInt8, m_func, true);
  2544. m_lowerer->InsertMove(tmp, elemCountOpnd, skipLabel);
  2545. Lowerer::InsertAdd(false, tmp, tmp, indexOpnd, skipLabel);
  2546. m_lowerer->InsertCompareBranch(tmp, headSegmentLengthOpnd, Js::OpCode::BrLe_A, true, skipLabel, skipLabel);
  2547. }
  2548. else
  2549. {
  2550. // CMP Length, (offset + elemCount)
  2551. // JA $storeLabel
  2552. int32 offset = indirOpnd->GetOffset();
  2553. int32 index = offset + elemCount;
  2554. m_lowerer->InsertCompareBranch(headSegmentLengthOpnd, IR::IntConstOpnd::New(index, TyInt32, m_func, true), Js::OpCode::BrLe_A, true, skipLabel, skipLabel);
  2555. }
  2556. m_lowerer->GenerateRuntimeError(skipLabel, JSERR_ArgumentOutOfRange, IR::HelperOp_RuntimeRangeError);
  2557. return;
  2558. }
  2559. void
  2560. LowererMD::Simd128LoadHeadSegment(IR::IndirOpnd *indirOpnd, ValueType arrType, IR::Instr *instr)
  2561. {
  2562. // For non-asm.js we check if headSeg symbol exists, else load it.
  2563. IR::ArrayRegOpnd *arrayRegOpnd = indirOpnd->GetBaseOpnd()->AsArrayRegOpnd();
  2564. IR::RegOpnd *headSegmentOpnd;
  2565. if (arrayRegOpnd->HeadSegmentSym())
  2566. {
  2567. headSegmentOpnd = IR::RegOpnd::New(arrayRegOpnd->HeadSegmentSym(), TyMachPtr, m_func);
  2568. }
  2569. else
  2570. {
  2571. // MOV headSegment, [base + offset(head)]
  2572. int32 headOffset = m_lowerer->GetArrayOffsetOfHeadSegment(arrType);
  2573. IR::IndirOpnd * newIndirOpnd = IR::IndirOpnd::New(arrayRegOpnd, headOffset, TyMachPtr, this->m_func);
  2574. headSegmentOpnd = IR::RegOpnd::New(TyMachPtr, this->m_func);
  2575. m_lowerer->InsertMove(headSegmentOpnd, newIndirOpnd, instr);
  2576. }
  2577. // change base to be the head segment instead of the array object
  2578. indirOpnd->SetBaseOpnd(headSegmentOpnd);
  2579. }
  2580. // Builds args list <dst, src1, src2, src3 ..>
  2581. SList<IR::Opnd*> * LowererMD::Simd128GetExtendedArgs(IR::Instr *instr)
  2582. {
  2583. SList<IR::Opnd*> * args = JitAnew(m_lowerer->m_alloc, SList<IR::Opnd*>, m_lowerer->m_alloc);
  2584. IR::Instr *pInstr = instr;
  2585. IR::Opnd *dst, *src1, *src2;
  2586. dst = src1 = src2 = nullptr;
  2587. if (pInstr->GetDst())
  2588. {
  2589. dst = pInstr->UnlinkDst();
  2590. }
  2591. src1 = pInstr->UnlinkSrc1();
  2592. Assert(src1->GetStackSym()->IsSingleDef());
  2593. pInstr = src1->GetStackSym()->GetInstrDef();
  2594. while (pInstr && pInstr->m_opcode == Js::OpCode::ExtendArg_A)
  2595. {
  2596. Assert(pInstr->GetSrc1());
  2597. src1 = pInstr->GetSrc1()->Copy(this->m_func);
  2598. if (src1->IsRegOpnd())
  2599. {
  2600. this->m_lowerer->addToLiveOnBackEdgeSyms->Set(src1->AsRegOpnd()->m_sym->m_id);
  2601. }
  2602. args->Push(src1);
  2603. if (pInstr->GetSrc2())
  2604. {
  2605. src2 = pInstr->GetSrc2();
  2606. Assert(src2->GetStackSym()->IsSingleDef());
  2607. pInstr = src2->GetStackSym()->GetInstrDef();
  2608. }
  2609. else
  2610. {
  2611. pInstr = nullptr;
  2612. }
  2613. }
  2614. args->Push(dst);
  2615. Assert(args->Count() > 3);
  2616. return args;
  2617. }
  2618. IR::Opnd*
  2619. LowererMD::EnregisterIntConst(IR::Instr* instr, IR::Opnd *constOpnd, IRType type /* = TyInt32*/)
  2620. {
  2621. IRType constType = constOpnd->GetType();
  2622. if (!IRType_IsNativeInt(constType))
  2623. {
  2624. // not int opnd, nothing to do
  2625. return constOpnd;
  2626. }
  2627. Assert(type == TyInt32 || type == TyInt16 || type == TyInt8);
  2628. Assert(constType == TyInt32 || constType == TyInt16 || constType == TyInt8);
  2629. if (constOpnd->IsRegOpnd())
  2630. {
  2631. // already a register, just cast
  2632. constOpnd->SetType(type);
  2633. return constOpnd;
  2634. }
  2635. // en-register
  2636. IR::RegOpnd *tempReg = IR::RegOpnd::New(type, m_func);
  2637. // MOV tempReg, constOpnd
  2638. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOV, tempReg, constOpnd, m_func));
  2639. return tempReg;
  2640. }
  2641. void LowererMD::Simd128InitOpcodeMap()
  2642. {
  2643. m_simd128OpCodesMap = JitAnewArrayZ(m_lowerer->m_alloc, Js::OpCode, Js::Simd128OpcodeCount());
  2644. // All simd ops should be contiguous for this mapping to work
  2645. Assert(Js::OpCode::Simd128_End + (Js::OpCode) 1 == Js::OpCode::Simd128_Start_Extend);
  2646. //SET_SIMDOPCODE(Simd128_FromFloat64x2_I4 , CVTTPD2DQ);
  2647. //SET_SIMDOPCODE(Simd128_FromFloat64x2Bits_I4 , MOVAPS);
  2648. SET_SIMDOPCODE(Simd128_FromFloat32x4Bits_I4 , MOVAPS);
  2649. SET_SIMDOPCODE(Simd128_FromInt16x8Bits_I4 , MOVAPS);
  2650. SET_SIMDOPCODE(Simd128_FromInt8x16Bits_I4 , MOVAPS);
  2651. SET_SIMDOPCODE(Simd128_FromUint32x4Bits_I4 , MOVAPS);
  2652. SET_SIMDOPCODE(Simd128_FromUint16x8Bits_I4 , MOVAPS);
  2653. SET_SIMDOPCODE(Simd128_FromUint8x16Bits_I4 , MOVAPS);
  2654. SET_SIMDOPCODE(Simd128_Add_I4 , PADDD);
  2655. SET_SIMDOPCODE(Simd128_Sub_I4 , PSUBD);
  2656. SET_SIMDOPCODE(Simd128_Lt_I4 , PCMPGTD);
  2657. SET_SIMDOPCODE(Simd128_Gt_I4 , PCMPGTD);
  2658. SET_SIMDOPCODE(Simd128_Eq_I4 , PCMPEQD);
  2659. SET_SIMDOPCODE(Simd128_And_I4 , PAND);
  2660. SET_SIMDOPCODE(Simd128_Or_I4 , POR);
  2661. SET_SIMDOPCODE(Simd128_Xor_I4 , PXOR);
  2662. SET_SIMDOPCODE(Simd128_Not_I4 , XORPS);
  2663. SET_SIMDOPCODE(Simd128_FromFloat32x4Bits_I8 , MOVAPS);
  2664. SET_SIMDOPCODE(Simd128_FromInt32x4Bits_I8 , MOVAPS);
  2665. SET_SIMDOPCODE(Simd128_FromInt8x16Bits_I8 , MOVAPS);
  2666. SET_SIMDOPCODE(Simd128_FromUint32x4Bits_I8 , MOVAPS);
  2667. SET_SIMDOPCODE(Simd128_FromUint16x8Bits_I8 , MOVAPS);
  2668. SET_SIMDOPCODE(Simd128_FromUint8x16Bits_I8 , MOVAPS);
  2669. SET_SIMDOPCODE(Simd128_Or_I16 , POR);
  2670. SET_SIMDOPCODE(Simd128_Xor_I16 , PXOR);
  2671. SET_SIMDOPCODE(Simd128_Not_I16 , XORPS);
  2672. SET_SIMDOPCODE(Simd128_And_I16 , PAND);
  2673. SET_SIMDOPCODE(Simd128_Add_I16 , PADDB);
  2674. SET_SIMDOPCODE(Simd128_Sub_I16 , PSUBB);
  2675. SET_SIMDOPCODE(Simd128_Lt_I16 , PCMPGTB);
  2676. SET_SIMDOPCODE(Simd128_Gt_I16 , PCMPGTB);
  2677. SET_SIMDOPCODE(Simd128_Eq_I16 , PCMPEQB);
  2678. SET_SIMDOPCODE(Simd128_FromFloat32x4Bits_I16, MOVAPS);
  2679. SET_SIMDOPCODE(Simd128_FromInt32x4Bits_I16 , MOVAPS);
  2680. SET_SIMDOPCODE(Simd128_FromInt16x8Bits_I16 , MOVAPS);
  2681. SET_SIMDOPCODE(Simd128_FromUint32x4Bits_I16 , MOVAPS);
  2682. SET_SIMDOPCODE(Simd128_FromUint16x8Bits_I16 , MOVAPS);
  2683. SET_SIMDOPCODE(Simd128_FromUint8x16Bits_I16 , MOVAPS);
  2684. SET_SIMDOPCODE(Simd128_FromFloat32x4Bits_U4 , MOVAPS);
  2685. SET_SIMDOPCODE(Simd128_FromInt32x4Bits_U4 , MOVAPS);
  2686. SET_SIMDOPCODE(Simd128_FromInt16x8Bits_U4 , MOVAPS);
  2687. SET_SIMDOPCODE(Simd128_FromInt8x16Bits_U4 , MOVAPS);
  2688. SET_SIMDOPCODE(Simd128_FromUint16x8Bits_U4 , MOVAPS);
  2689. SET_SIMDOPCODE(Simd128_FromUint8x16Bits_U4 , MOVAPS);
  2690. SET_SIMDOPCODE(Simd128_FromFloat32x4Bits_U8 , MOVAPS);
  2691. SET_SIMDOPCODE(Simd128_FromInt32x4Bits_U8 , MOVAPS);
  2692. SET_SIMDOPCODE(Simd128_FromInt16x8Bits_U8 , MOVAPS);
  2693. SET_SIMDOPCODE(Simd128_FromInt8x16Bits_U8 , MOVAPS);
  2694. SET_SIMDOPCODE(Simd128_FromUint32x4Bits_U8 , MOVAPS);
  2695. SET_SIMDOPCODE(Simd128_FromUint8x16Bits_U8 , MOVAPS);
  2696. SET_SIMDOPCODE(Simd128_FromFloat32x4Bits_U16 , MOVAPS);
  2697. SET_SIMDOPCODE(Simd128_FromInt32x4Bits_U16 , MOVAPS);
  2698. SET_SIMDOPCODE(Simd128_FromInt16x8Bits_U16 , MOVAPS);
  2699. SET_SIMDOPCODE(Simd128_FromInt8x16Bits_U16 , MOVAPS);
  2700. SET_SIMDOPCODE(Simd128_FromUint32x4Bits_U16 , MOVAPS);
  2701. SET_SIMDOPCODE(Simd128_FromUint16x8Bits_U16 , MOVAPS);
  2702. //SET_SIMDOPCODE(Simd128_FromFloat64x2_F4 , CVTPD2PS);
  2703. //SET_SIMDOPCODE(Simd128_FromFloat64x2Bits_F4 , MOVAPS);
  2704. SET_SIMDOPCODE(Simd128_FromInt32x4_F4 , CVTDQ2PS);
  2705. SET_SIMDOPCODE(Simd128_FromInt32x4Bits_F4 , MOVAPS);
  2706. SET_SIMDOPCODE(Simd128_FromInt16x8Bits_F4 , MOVAPS);
  2707. SET_SIMDOPCODE(Simd128_FromInt8x16Bits_F4 , MOVAPS);
  2708. SET_SIMDOPCODE(Simd128_FromUint32x4Bits_F4 , MOVAPS);
  2709. SET_SIMDOPCODE(Simd128_FromUint16x8Bits_F4 , MOVAPS);
  2710. SET_SIMDOPCODE(Simd128_FromUint8x16Bits_F4 , MOVAPS);
  2711. SET_SIMDOPCODE(Simd128_Abs_F4 , ANDPS);
  2712. SET_SIMDOPCODE(Simd128_Neg_F4 , XORPS);
  2713. SET_SIMDOPCODE(Simd128_Add_F4 , ADDPS);
  2714. SET_SIMDOPCODE(Simd128_Sub_F4 , SUBPS);
  2715. SET_SIMDOPCODE(Simd128_Mul_F4 , MULPS);
  2716. SET_SIMDOPCODE(Simd128_Div_F4 , DIVPS);
  2717. SET_SIMDOPCODE(Simd128_Sqrt_F4 , SQRTPS);
  2718. SET_SIMDOPCODE(Simd128_Lt_F4 , CMPLTPS); // CMPLTPS
  2719. SET_SIMDOPCODE(Simd128_LtEq_F4 , CMPLEPS); // CMPLEPS
  2720. SET_SIMDOPCODE(Simd128_Eq_F4 , CMPEQPS); // CMPEQPS
  2721. SET_SIMDOPCODE(Simd128_Neq_F4 , CMPNEQPS); // CMPNEQPS
  2722. SET_SIMDOPCODE(Simd128_Gt_F4 , CMPLTPS); // CMPLTPS (swap srcs)
  2723. SET_SIMDOPCODE(Simd128_GtEq_F4 , CMPLEPS); // CMPLEPS (swap srcs)
  2724. #if 0
  2725. SET_SIMDOPCODE(Simd128_FromFloat32x4_D2, CVTPS2PD);
  2726. SET_SIMDOPCODE(Simd128_FromFloat32x4Bits_D2, MOVAPS);
  2727. SET_SIMDOPCODE(Simd128_FromInt32x4_D2, CVTDQ2PD);
  2728. SET_SIMDOPCODE(Simd128_FromInt32x4Bits_D2, MOVAPS);
  2729. SET_SIMDOPCODE(Simd128_Neg_D2, XORPS);
  2730. SET_SIMDOPCODE(Simd128_Add_D2, ADDPD);
  2731. SET_SIMDOPCODE(Simd128_Abs_D2, ANDPD);
  2732. SET_SIMDOPCODE(Simd128_Sub_D2, SUBPD);
  2733. SET_SIMDOPCODE(Simd128_Mul_D2, MULPD);
  2734. SET_SIMDOPCODE(Simd128_Div_D2, DIVPD);
  2735. SET_SIMDOPCODE(Simd128_Min_D2, MINPD);
  2736. SET_SIMDOPCODE(Simd128_Max_D2, MAXPD);
  2737. SET_SIMDOPCODE(Simd128_Sqrt_D2, SQRTPD);
  2738. SET_SIMDOPCODE(Simd128_Lt_D2, CMPLTPD); // CMPLTPD
  2739. SET_SIMDOPCODE(Simd128_LtEq_D2, CMPLEPD); // CMPLEPD
  2740. SET_SIMDOPCODE(Simd128_Eq_D2, CMPEQPD); // CMPEQPD
  2741. SET_SIMDOPCODE(Simd128_Neq_D2, CMPNEQPD); // CMPNEQPD
  2742. SET_SIMDOPCODE(Simd128_Gt_D2, CMPLTPD); // CMPLTPD (swap srcs)
  2743. SET_SIMDOPCODE(Simd128_GtEq_D2, CMPLEPD); // CMPLEPD (swap srcs)
  2744. #endif // 0
  2745. SET_SIMDOPCODE(Simd128_And_I8 , PAND);
  2746. SET_SIMDOPCODE(Simd128_Or_I8 , POR);
  2747. SET_SIMDOPCODE(Simd128_Xor_I8 , XORPS);
  2748. SET_SIMDOPCODE(Simd128_Not_I8 , XORPS);
  2749. SET_SIMDOPCODE(Simd128_Add_I8 , PADDW);
  2750. SET_SIMDOPCODE(Simd128_Sub_I8 , PSUBW);
  2751. SET_SIMDOPCODE(Simd128_Mul_I8 , PMULLW);
  2752. SET_SIMDOPCODE(Simd128_Eq_I8 , PCMPEQW);
  2753. SET_SIMDOPCODE(Simd128_Lt_I8 , PCMPGTW); // (swap srcs)
  2754. SET_SIMDOPCODE(Simd128_Gt_I8 , PCMPGTW);
  2755. SET_SIMDOPCODE(Simd128_AddSaturate_I8 , PADDSW);
  2756. SET_SIMDOPCODE(Simd128_SubSaturate_I8 , PSUBSW);
  2757. SET_SIMDOPCODE(Simd128_AddSaturate_I16 , PADDSB);
  2758. SET_SIMDOPCODE(Simd128_SubSaturate_I16 , PSUBSB);
  2759. SET_SIMDOPCODE(Simd128_And_U4 , PAND);
  2760. SET_SIMDOPCODE(Simd128_Or_U4 , POR);
  2761. SET_SIMDOPCODE(Simd128_Xor_U4 , XORPS);
  2762. SET_SIMDOPCODE(Simd128_Not_U4 , XORPS);
  2763. SET_SIMDOPCODE(Simd128_Add_U4 , PADDD);
  2764. SET_SIMDOPCODE(Simd128_Sub_U4 , PSUBD);
  2765. SET_SIMDOPCODE(Simd128_Eq_U4 , PCMPEQD); // same as int32x4.equal
  2766. SET_SIMDOPCODE(Simd128_And_U8 , PAND);
  2767. SET_SIMDOPCODE(Simd128_Or_U8 , POR);
  2768. SET_SIMDOPCODE(Simd128_Xor_U8 , XORPS);
  2769. SET_SIMDOPCODE(Simd128_Not_U8 , XORPS);
  2770. SET_SIMDOPCODE(Simd128_Add_U8 , PADDW);
  2771. SET_SIMDOPCODE(Simd128_Sub_U8 , PSUBW);
  2772. SET_SIMDOPCODE(Simd128_Mul_U8 , PMULLW);
  2773. SET_SIMDOPCODE(Simd128_Eq_U8 , PCMPEQW); // same as int16X8.equal
  2774. SET_SIMDOPCODE(Simd128_AddSaturate_U8 , PADDUSW);
  2775. SET_SIMDOPCODE(Simd128_SubSaturate_U8 , PSUBUSW);
  2776. SET_SIMDOPCODE(Simd128_And_U16 , PAND);
  2777. SET_SIMDOPCODE(Simd128_Or_U16 , POR);
  2778. SET_SIMDOPCODE(Simd128_Xor_U16 , XORPS);
  2779. SET_SIMDOPCODE(Simd128_Not_U16 , XORPS);
  2780. SET_SIMDOPCODE(Simd128_Add_U16 , PADDB);
  2781. SET_SIMDOPCODE(Simd128_Sub_U16 , PSUBB);
  2782. SET_SIMDOPCODE(Simd128_Eq_U16 , PCMPEQB); // same as int8x16.equal
  2783. SET_SIMDOPCODE(Simd128_AddSaturate_U16 , PADDUSB);
  2784. SET_SIMDOPCODE(Simd128_SubSaturate_U16 , PSUBUSB);
  2785. SET_SIMDOPCODE(Simd128_And_B4 , PAND);
  2786. SET_SIMDOPCODE(Simd128_Or_B4 , POR);
  2787. SET_SIMDOPCODE(Simd128_Xor_B4 , XORPS);
  2788. SET_SIMDOPCODE(Simd128_Not_B4 , XORPS);
  2789. SET_SIMDOPCODE(Simd128_And_B8 , PAND);
  2790. SET_SIMDOPCODE(Simd128_Or_B8 , POR);
  2791. SET_SIMDOPCODE(Simd128_Xor_B8 , XORPS);
  2792. SET_SIMDOPCODE(Simd128_Not_B8 , XORPS);
  2793. SET_SIMDOPCODE(Simd128_And_B16 , PAND);
  2794. SET_SIMDOPCODE(Simd128_Or_B16 , POR);
  2795. SET_SIMDOPCODE(Simd128_Xor_B16 , XORPS);
  2796. SET_SIMDOPCODE(Simd128_Not_B16 , XORPS);
  2797. }
  2798. #undef SIMD_SETOPCODE
  2799. #undef SIMD_GETOPCODE
  2800. // FromVar
  2801. void
  2802. LowererMD::GenerateCheckedSimdLoad(IR::Instr * instr)
  2803. {
  2804. Assert(instr->m_opcode == Js::OpCode::FromVar);
  2805. Assert(instr->GetSrc1()->GetType() == TyVar);
  2806. Assert(IRType_IsSimd128(instr->GetDst()->GetType()));
  2807. bool checkRequired = instr->HasBailOutInfo();
  2808. IR::LabelInstr * labelHelper = nullptr, * labelDone = nullptr;
  2809. IR::Instr * insertInstr = instr, * newInstr;
  2810. IR::RegOpnd * src = instr->GetSrc1()->AsRegOpnd(), * dst = instr->GetDst()->AsRegOpnd();
  2811. Assert(!checkRequired || instr->GetBailOutKind() == IR::BailOutSimd128F4Only || instr->GetBailOutKind() == IR::BailOutSimd128I4Only);
  2812. if (checkRequired)
  2813. {
  2814. labelHelper = IR::LabelInstr::New(Js::OpCode::Label, this->m_func, true);
  2815. labelDone = IR::LabelInstr::New(Js::OpCode::Label, this->m_func);
  2816. instr->InsertBefore(labelHelper);
  2817. instr->InsertAfter(labelDone);
  2818. insertInstr = labelHelper;
  2819. GenerateObjectTest(instr->GetSrc1(), insertInstr, labelHelper);
  2820. newInstr = IR::Instr::New(Js::OpCode::CMP, instr->m_func);
  2821. newInstr->SetSrc1(IR::IndirOpnd::New(instr->GetSrc1()->AsRegOpnd(), 0, TyMachPtr, instr->m_func));
  2822. newInstr->SetSrc2(m_lowerer->LoadVTableValueOpnd(instr, dst->GetType() == TySimd128F4 ? VTableValue::VtableSimd128F4 : VTableValue::VtableSimd128I4));
  2823. insertInstr->InsertBefore(newInstr);
  2824. Legalize(newInstr);
  2825. insertInstr->InsertBefore(IR::BranchInstr::New(Js::OpCode::JNE, labelHelper, this->m_func));
  2826. instr->UnlinkSrc1();
  2827. instr->UnlinkDst();
  2828. this->m_lowerer->GenerateBailOut(instr);
  2829. }
  2830. size_t valueOffset = dst->GetType() == TySimd128F4 ? Js::JavascriptSIMDFloat32x4::GetOffsetOfValue() : Js::JavascriptSIMDInt32x4::GetOffsetOfValue();
  2831. Assert(valueOffset < INT_MAX);
  2832. newInstr = IR::Instr::New(Js::OpCode::MOVUPS, dst, IR::IndirOpnd::New(src, static_cast<int>(valueOffset), dst->GetType(), this->m_func), this->m_func);
  2833. insertInstr->InsertBefore(newInstr);
  2834. insertInstr->InsertBefore(IR::BranchInstr::New(Js::OpCode::JMP, labelDone, this->m_func));
  2835. // FromVar is converted to BailOut call. Don't remove.
  2836. }
  2837. // ToVar
  2838. void LowererMD::GenerateSimdStore(IR::Instr * instr)
  2839. {
  2840. IR::RegOpnd *dst, *src;
  2841. IRType type;
  2842. dst = instr->GetDst()->AsRegOpnd();
  2843. src = instr->GetSrc1()->AsRegOpnd();
  2844. type = src->GetType();
  2845. this->m_lowerer->LoadScriptContext(instr);
  2846. IR::Instr * instrCall = IR::Instr::New(Js::OpCode::CALL, instr->GetDst(),
  2847. IR::HelperCallOpnd::New(type == TySimd128F4 ? IR::HelperAllocUninitializedSimdF4 : IR::HelperAllocUninitializedSimdI4, this->m_func), this->m_func);
  2848. instr->InsertBefore(instrCall);
  2849. this->lowererMDArch.LowerCall(instrCall, 0);
  2850. IR::Opnd * valDst;
  2851. if (type == TySimd128F4)
  2852. {
  2853. valDst = IR::IndirOpnd::New(dst, (int32)Js::JavascriptSIMDFloat32x4::GetOffsetOfValue(), TySimd128F4, this->m_func);
  2854. }
  2855. else
  2856. {
  2857. valDst = IR::IndirOpnd::New(dst, (int32)Js::JavascriptSIMDInt32x4::GetOffsetOfValue(), TySimd128I4, this->m_func);
  2858. }
  2859. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVUPS, valDst, src, this->m_func));
  2860. instr->Remove();
  2861. }
  2862. void LowererMD::CheckShuffleLanes_4(uint8 lanes[], uint8 lanesSrc[], uint *fromSrc1, uint *fromSrc2)
  2863. {
  2864. Assert(lanes);
  2865. Assert(lanesSrc);
  2866. Assert(fromSrc1 && fromSrc2);
  2867. *fromSrc1 = 0;
  2868. *fromSrc2 = 0;
  2869. for (uint i = 0; i < 4; i++)
  2870. {
  2871. if (lanes[i] >= 0 && lanes[i] < 4)
  2872. {
  2873. (*fromSrc1)++;
  2874. lanesSrc[i] = 1;
  2875. }
  2876. else if (lanes[i] >= 4 && lanes[i] < 8)
  2877. {
  2878. (*fromSrc2)++;
  2879. lanesSrc[i] = 2;
  2880. }
  2881. else
  2882. {
  2883. Assert(UNREACHED);
  2884. }
  2885. }
  2886. }
  2887. void LowererMD::InsertShufps(uint8 lanes[], IR::Opnd *dst, IR::Opnd *src1, IR::Opnd *src2, IR::Instr *instr)
  2888. {
  2889. int8 shufMask;
  2890. uint8 normLanes[4];
  2891. IR::RegOpnd * tmp = IR::RegOpnd::New(TySimd128I4, m_func);
  2892. for (uint i = 0; i < 4; i++)
  2893. {
  2894. normLanes[i] = (lanes[i] >= 4) ? (lanes[i] - 4) : lanes[i];
  2895. }
  2896. shufMask = (int8)((normLanes[3] << 6) | (normLanes[2] << 4) | (normLanes[1] << 2) | normLanes[0]);
  2897. // ToDo: Move this to legalization code
  2898. if (dst->IsEqual(src1))
  2899. {
  2900. // instruction already legal
  2901. instr->InsertBefore(IR::Instr::New(Js::OpCode::SHUFPS, dst, src2, IR::IntConstOpnd::New((IntConstType)shufMask, TyInt8, m_func, true), m_func));
  2902. }
  2903. else if (dst->IsEqual(src2))
  2904. {
  2905. // MOVAPS tmp, dst
  2906. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVAPS, tmp, dst, m_func));
  2907. // MOVAPS dst, src1
  2908. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVAPS, dst, src1, m_func));
  2909. // SHUF dst, tmp, imm8
  2910. instr->InsertBefore(IR::Instr::New(Js::OpCode::SHUFPS, dst, tmp, IR::IntConstOpnd::New((IntConstType)shufMask, TyInt8, m_func, true), m_func));
  2911. }
  2912. else
  2913. {
  2914. // MOVAPS dst, src1
  2915. instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVAPS, dst, src1, m_func));
  2916. // SHUF dst, src2, imm8
  2917. instr->InsertBefore(IR::Instr::New(Js::OpCode::SHUFPS, dst, src2, IR::IntConstOpnd::New((IntConstType)shufMask, TyInt8, m_func, true), m_func));
  2918. }
  2919. }
  2920. BYTE LowererMD::Simd128GetTypedArrBytesPerElem(ValueType arrType)
  2921. {
  2922. return (1 << Lowerer::GetArrayIndirScale(arrType));
  2923. }