| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309 |
- //-------------------------------------------------------------------------------------------------------
- // Copyright (C) Microsoft Corporation and contributors. All rights reserved.
- // Licensed under the MIT license. See LICENSE.txt file in the project root for full license information.
- //-------------------------------------------------------------------------------------------------------
- #include "Backend.h"
- static IR::Instr* removeInstr(IR::Instr* instr);
- #ifdef ENABLE_WASM_SIMD
- static IR::Instr* removeInstr(IR::Instr* instr)
- {
- IR::Instr* prevInstr;
- prevInstr = instr->m_prev;
- instr->Remove();
- return prevInstr;
- }
- #define GET_SIMDOPCODE(irOpcode) m_simd128OpCodesMap[(uint32)(irOpcode - Js::OpCode::Simd128_Start)]
- #define SET_SIMDOPCODE(irOpcode, mdOpcode) \
- Assert((uint32)m_simd128OpCodesMap[(uint32)(Js::OpCode::irOpcode - Js::OpCode::Simd128_Start)] == 0);\
- Assert(Js::OpCode::mdOpcode > Js::OpCode::MDStart);\
- m_simd128OpCodesMap[(uint32)(Js::OpCode::irOpcode - Js::OpCode::Simd128_Start)] = Js::OpCode::mdOpcode;
- IR::Instr* LowererMD::Simd128Instruction(IR::Instr *instr)
- {
- // Currently only handles type-specialized/asm.js opcodes
- if (!instr->GetDst())
- {
- // SIMD ops always have DST in asmjs
- Assert(!instr->m_func->GetJITFunctionBody()->IsAsmJsMode());
- // unused result. Do nothing.
- IR::Instr * pInstr = instr->m_prev;
- instr->Remove();
- return pInstr;
- }
- if (Simd128TryLowerMappedInstruction(instr))
- {
- return instr->m_prev;
- }
- return Simd128LowerUnMappedInstruction(instr);
- }
- bool LowererMD::Simd128TryLowerMappedInstruction(IR::Instr *instr)
- {
- bool legalize = true;
- Js::OpCode opcode = GET_SIMDOPCODE(instr->m_opcode);
- if ((uint32)opcode == 0)
- return false;
- Assert(instr->GetDst() && instr->GetDst()->IsRegOpnd() && instr->GetDst()->IsSimd128() || instr->GetDst()->GetType() == TyInt32);
- Assert(instr->GetSrc1() && instr->GetSrc1()->IsRegOpnd() && instr->GetSrc1()->IsSimd128());
- Assert(!instr->GetSrc2() || (((instr->GetSrc2()->IsRegOpnd() && instr->GetSrc2()->IsSimd128()) || (instr->GetSrc2()->IsIntConstOpnd() && instr->GetSrc2()->GetType() == TyInt8))));
- switch (instr->m_opcode)
- {
- case Js::OpCode::Simd128_Abs_F4:
- Assert(opcode == Js::OpCode::ANDPS);
- instr->SetSrc2(IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86AbsMaskF4Addr(), instr->GetSrc1()->GetType(), m_func));
- break;
- case Js::OpCode::Simd128_Abs_D2:
- Assert(opcode == Js::OpCode::ANDPD);
- instr->SetSrc2(IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86AbsMaskD2Addr(), instr->GetSrc1()->GetType(), m_func));
- break;
- case Js::OpCode::Simd128_Neg_F4:
- Assert(opcode == Js::OpCode::XORPS);
- instr->SetSrc2(IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86NegMaskF4Addr(), instr->GetSrc1()->GetType(), m_func));
- break;
- case Js::OpCode::Simd128_Neg_D2:
- Assert(opcode == Js::OpCode::XORPS);
- instr->SetSrc2(IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86NegMaskD2Addr(), instr->GetSrc1()->GetType(), m_func));
- break;
- case Js::OpCode::Simd128_Not_I4:
- case Js::OpCode::Simd128_Not_I16:
- case Js::OpCode::Simd128_Not_I8:
- case Js::OpCode::Simd128_Not_U4:
- case Js::OpCode::Simd128_Not_U8:
- case Js::OpCode::Simd128_Not_U16:
- case Js::OpCode::Simd128_Not_B4:
- case Js::OpCode::Simd128_Not_B8:
- case Js::OpCode::Simd128_Not_B16:
- Assert(opcode == Js::OpCode::XORPS);
- instr->SetSrc2(IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86AllNegOnesAddr(), instr->GetSrc1()->GetType(), m_func));
- break;
- case Js::OpCode::Simd128_Gt_F4:
- case Js::OpCode::Simd128_Gt_D2:
- case Js::OpCode::Simd128_GtEq_F4:
- case Js::OpCode::Simd128_GtEq_D2:
- case Js::OpCode::Simd128_Lt_I4:
- case Js::OpCode::Simd128_Lt_I8:
- case Js::OpCode::Simd128_Lt_I16:
- {
- Assert(opcode == Js::OpCode::CMPLTPS || opcode == Js::OpCode::CMPLTPD || opcode == Js::OpCode::CMPLEPS
- || opcode == Js::OpCode::CMPLEPD || opcode == Js::OpCode::PCMPGTD || opcode == Js::OpCode::PCMPGTB
- || opcode == Js::OpCode::PCMPGTW );
- // swap operands
- auto *src1 = instr->UnlinkSrc1();
- auto *src2 = instr->UnlinkSrc2();
- instr->SetSrc1(src2);
- instr->SetSrc2(src1);
- break;
- }
- }
- instr->m_opcode = opcode;
- if (legalize)
- {
- //MakeDstEquSrc1(instr);
- Legalize(instr);
- }
- return true;
- }
- IR::MemRefOpnd *
- LowererMD::LoadSimdHelperArgument(IR::Instr * instr, uint8 index)
- {
- //the most reliable way to pass a simd value on x86/x64 win/lnx across calls
- //is to pass a pointer to a SIMD value in the simd temporary area.
- //otherwise we have to use __m128 and msvc intrinsics which may or may not be the same across
- //MSVC and Clang
- IR::MemRefOpnd* srcMemRef = IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetSimdTempAreaAddr(index), TySimd128F4, m_func);
- IR::AddrOpnd* argAddress = IR::AddrOpnd::New(m_func->GetThreadContextInfo()->GetSimdTempAreaAddr(index), IR::AddrOpndKindDynamicMisc, m_func, true /* doesn't come from a user */);
- LoadHelperArgument(instr, argAddress);
- return srcMemRef;
- }
- IR::Instr* LowererMD::Simd128LowerUnMappedInstruction(IR::Instr *instr)
- {
- switch (instr->m_opcode)
- {
- case Js::OpCode::Simd128_LdC:
- return Simd128LoadConst(instr);
- #ifdef ENABLE_SIMD
- case Js::OpCode::Simd128_FloatsToF4:
- case Js::OpCode::Simd128_IntsToI4:
- case Js::OpCode::Simd128_IntsToU4:
- case Js::OpCode::Simd128_IntsToB4:
- return Simd128LowerConstructor_4(instr);
- case Js::OpCode::Simd128_IntsToI8:
- case Js::OpCode::Simd128_IntsToU8:
- case Js::OpCode::Simd128_IntsToB8:
- return Simd128LowerConstructor_8(instr);
- case Js::OpCode::Simd128_IntsToI16:
- case Js::OpCode::Simd128_IntsToU16:
- case Js::OpCode::Simd128_IntsToB16:
- return Simd128LowerConstructor_16(instr);
- case Js::OpCode::Simd128_Rcp_F4:
- //case Js::OpCode::Simd128_Rcp_D2:
- return Simd128LowerRcp(instr);
- //SQRT
- case Js::OpCode::Simd128_RcpSqrt_F4:
- //case Js::OpCode::Simd128_RcpSqrt_D2:
- return Simd128LowerRcpSqrt(instr);
- case Js::OpCode::Simd128_Select_F4:
- case Js::OpCode::Simd128_Select_I4:
- //case Js::OpCode::Simd128_Select_D2:
- case Js::OpCode::Simd128_Select_I8:
- case Js::OpCode::Simd128_Select_I16:
- case Js::OpCode::Simd128_Select_U4:
- case Js::OpCode::Simd128_Select_U8:
- case Js::OpCode::Simd128_Select_U16:
- return Simd128LowerSelect(instr);
- #endif
- #if 0
- case Js::OpCode::Simd128_DoublesToD2:
- return Simd128LowerConstructor_2(instr);
- #endif // 0
- case Js::OpCode::Simd128_ExtractLane_I2:
- case Js::OpCode::Simd128_ExtractLane_I4:
- case Js::OpCode::Simd128_ExtractLane_I8:
- case Js::OpCode::Simd128_ExtractLane_I16:
- case Js::OpCode::Simd128_ExtractLane_U4:
- case Js::OpCode::Simd128_ExtractLane_U8:
- case Js::OpCode::Simd128_ExtractLane_U16:
- case Js::OpCode::Simd128_ExtractLane_B4:
- case Js::OpCode::Simd128_ExtractLane_B8:
- case Js::OpCode::Simd128_ExtractLane_B16:
- case Js::OpCode::Simd128_ExtractLane_F4:
- return Simd128LowerLdLane(instr);
- case Js::OpCode::Simd128_ReplaceLane_I2:
- case Js::OpCode::Simd128_ReplaceLane_D2:
- return SIMD128LowerReplaceLane_2(instr);
- case Js::OpCode::Simd128_ReplaceLane_I4:
- case Js::OpCode::Simd128_ReplaceLane_F4:
- case Js::OpCode::Simd128_ReplaceLane_U4:
- case Js::OpCode::Simd128_ReplaceLane_B4:
- return SIMD128LowerReplaceLane_4(instr);
- case Js::OpCode::Simd128_ReplaceLane_I8:
- case Js::OpCode::Simd128_ReplaceLane_U8:
- case Js::OpCode::Simd128_ReplaceLane_B8:
- return SIMD128LowerReplaceLane_8(instr);
- case Js::OpCode::Simd128_ReplaceLane_I16:
- case Js::OpCode::Simd128_ReplaceLane_U16:
- case Js::OpCode::Simd128_ReplaceLane_B16:
- return SIMD128LowerReplaceLane_16(instr);
- case Js::OpCode::Simd128_Splat_F4:
- case Js::OpCode::Simd128_Splat_I4:
- case Js::OpCode::Simd128_Splat_I2:
- case Js::OpCode::Simd128_Splat_D2:
- case Js::OpCode::Simd128_Splat_I8:
- case Js::OpCode::Simd128_Splat_I16:
- case Js::OpCode::Simd128_Splat_U4:
- case Js::OpCode::Simd128_Splat_U8:
- case Js::OpCode::Simd128_Splat_U16:
- case Js::OpCode::Simd128_Splat_B4:
- case Js::OpCode::Simd128_Splat_B8:
- case Js::OpCode::Simd128_Splat_B16:
- return Simd128LowerSplat(instr);
- case Js::OpCode::Simd128_Sqrt_F4:
- //case Js::OpCode::Simd128_Sqrt_D2:
- return Simd128LowerSqrt(instr);
- case Js::OpCode::Simd128_Neg_I4:
- case Js::OpCode::Simd128_Neg_I8:
- case Js::OpCode::Simd128_Neg_I16:
- case Js::OpCode::Simd128_Neg_U4:
- case Js::OpCode::Simd128_Neg_U8:
- case Js::OpCode::Simd128_Neg_U16:
- return Simd128LowerNeg(instr);
- case Js::OpCode::Simd128_Mul_I4:
- case Js::OpCode::Simd128_Mul_U4:
- return Simd128LowerMulI4(instr);
- case Js::OpCode::Simd128_Mul_I16:
- case Js::OpCode::Simd128_Mul_U16:
- return Simd128LowerMulI16(instr);
- case Js::OpCode::Simd128_ShRtByScalar_I4:
- case Js::OpCode::Simd128_ShLtByScalar_I4:
- case Js::OpCode::Simd128_ShRtByScalar_I8:
- case Js::OpCode::Simd128_ShLtByScalar_I8:
- case Js::OpCode::Simd128_ShLtByScalar_I16:
- case Js::OpCode::Simd128_ShRtByScalar_I16:
- case Js::OpCode::Simd128_ShRtByScalar_U4:
- case Js::OpCode::Simd128_ShLtByScalar_U4:
- case Js::OpCode::Simd128_ShRtByScalar_U8:
- case Js::OpCode::Simd128_ShLtByScalar_U8:
- case Js::OpCode::Simd128_ShRtByScalar_U16:
- case Js::OpCode::Simd128_ShLtByScalar_U16:
- case Js::OpCode::Simd128_ShLtByScalar_I2:
- case Js::OpCode::Simd128_ShRtByScalar_U2:
- case Js::OpCode::Simd128_ShRtByScalar_I2:
- return Simd128LowerShift(instr);
- case Js::OpCode::Simd128_LdArr_I4:
- case Js::OpCode::Simd128_LdArr_I8:
- case Js::OpCode::Simd128_LdArr_I16:
- case Js::OpCode::Simd128_LdArr_U4:
- case Js::OpCode::Simd128_LdArr_U8:
- case Js::OpCode::Simd128_LdArr_U16:
- case Js::OpCode::Simd128_LdArr_F4:
- //case Js::OpCode::Simd128_LdArr_D2:
- case Js::OpCode::Simd128_LdArrConst_I4:
- case Js::OpCode::Simd128_LdArrConst_I8:
- case Js::OpCode::Simd128_LdArrConst_I16:
- case Js::OpCode::Simd128_LdArrConst_U4:
- case Js::OpCode::Simd128_LdArrConst_U8:
- case Js::OpCode::Simd128_LdArrConst_U16:
- case Js::OpCode::Simd128_LdArrConst_F4:
- //case Js::OpCode::Simd128_LdArrConst_D2:
- if (m_func->GetJITFunctionBody()->IsAsmJsMode())
- {
- // with bound checks
- return Simd128AsmJsLowerLoadElem(instr);
- }
- else
- {
- // non-AsmJs, boundChecks are extracted from instr
- return Simd128LowerLoadElem(instr);
- }
- case Js::OpCode::Simd128_StArr_I4:
- case Js::OpCode::Simd128_StArr_I8:
- case Js::OpCode::Simd128_StArr_I16:
- case Js::OpCode::Simd128_StArr_U4:
- case Js::OpCode::Simd128_StArr_U8:
- case Js::OpCode::Simd128_StArr_U16:
- case Js::OpCode::Simd128_StArr_F4:
- //case Js::OpCode::Simd128_StArr_D2:
- case Js::OpCode::Simd128_StArrConst_I4:
- case Js::OpCode::Simd128_StArrConst_I8:
- case Js::OpCode::Simd128_StArrConst_I16:
- case Js::OpCode::Simd128_StArrConst_U4:
- case Js::OpCode::Simd128_StArrConst_U8:
- case Js::OpCode::Simd128_StArrConst_U16:
- case Js::OpCode::Simd128_StArrConst_F4:
- //case Js::OpCode::Simd128_StArrConst_D2:
- if (m_func->GetJITFunctionBody()->IsAsmJsMode())
- {
- return Simd128AsmJsLowerStoreElem(instr);
- }
- else
- {
- return Simd128LowerStoreElem(instr);
- }
- case Js::OpCode::Simd128_Swizzle_U4:
- case Js::OpCode::Simd128_Swizzle_I4:
- case Js::OpCode::Simd128_Swizzle_F4:
- //case Js::OpCode::Simd128_Swizzle_D2:
- return Simd128LowerSwizzle_4(instr);
- case Js::OpCode::Simd128_Shuffle_U4:
- case Js::OpCode::Simd128_Shuffle_I4:
- case Js::OpCode::Simd128_Shuffle_F4:
- //case Js::OpCode::Simd128_Shuffle_D2:
- return Simd128LowerShuffle_4(instr);
- case Js::OpCode::Simd128_Swizzle_I8:
- case Js::OpCode::Simd128_Swizzle_I16:
- case Js::OpCode::Simd128_Swizzle_U8:
- case Js::OpCode::Simd128_Swizzle_U16:
- case Js::OpCode::Simd128_Shuffle_I8:
- case Js::OpCode::Simd128_Shuffle_I16:
- case Js::OpCode::Simd128_Shuffle_U8:
- case Js::OpCode::Simd128_Shuffle_U16:
- return Simd128LowerShuffle(instr);
- case Js::OpCode::Simd128_FromUint32x4_F4:
- return Simd128LowerFloat32x4FromUint32x4(instr);
- case Js::OpCode::Simd128_FromFloat32x4_I4:
- return Simd128LowerInt32x4FromFloat32x4(instr);
- case Js::OpCode::Simd128_FromFloat32x4_U4:
- return Simd128LowerUint32x4FromFloat32x4(instr);
- case Js::OpCode::Simd128_FromInt64x2_D2:
- return EmitSimdConversion(instr, IR::HelperSimd128ConvertSD2);
- case Js::OpCode::Simd128_FromUint64x2_D2:
- return EmitSimdConversion(instr, IR::HelperSimd128ConvertUD2);
- case Js::OpCode::Simd128_FromFloat64x2_I2:
- return EmitSimdConversion(instr, IR::HelperSimd128TruncateI2);
- case Js::OpCode::Simd128_FromFloat64x2_U2:
- return EmitSimdConversion(instr, IR::HelperSimd128TruncateU2);
- case Js::OpCode::Simd128_Neq_I4:
- case Js::OpCode::Simd128_Neq_I8:
- case Js::OpCode::Simd128_Neq_I16:
- case Js::OpCode::Simd128_Neq_U4:
- case Js::OpCode::Simd128_Neq_U8:
- case Js::OpCode::Simd128_Neq_U16:
- return Simd128LowerNotEqual(instr);
- case Js::OpCode::Simd128_Lt_U4:
- case Js::OpCode::Simd128_Lt_U8:
- case Js::OpCode::Simd128_Lt_U16:
- case Js::OpCode::Simd128_GtEq_U4:
- case Js::OpCode::Simd128_GtEq_U8:
- case Js::OpCode::Simd128_GtEq_U16:
- return Simd128LowerLessThan(instr);
- case Js::OpCode::Simd128_LtEq_I4:
- case Js::OpCode::Simd128_LtEq_I8:
- case Js::OpCode::Simd128_LtEq_I16:
- case Js::OpCode::Simd128_LtEq_U4:
- case Js::OpCode::Simd128_LtEq_U8:
- case Js::OpCode::Simd128_LtEq_U16:
- case Js::OpCode::Simd128_Gt_U4:
- case Js::OpCode::Simd128_Gt_U8:
- case Js::OpCode::Simd128_Gt_U16:
- return Simd128LowerLessThanOrEqual(instr);
- case Js::OpCode::Simd128_GtEq_I4:
- case Js::OpCode::Simd128_GtEq_I8:
- case Js::OpCode::Simd128_GtEq_I16:
- return Simd128LowerGreaterThanOrEqual(instr);
- case Js::OpCode::Simd128_Min_F4:
- case Js::OpCode::Simd128_Max_F4:
- return Simd128LowerMinMax_F4(instr);
- case Js::OpCode::Simd128_AnyTrue_B2:
- case Js::OpCode::Simd128_AnyTrue_B4:
- case Js::OpCode::Simd128_AnyTrue_B8:
- case Js::OpCode::Simd128_AnyTrue_B16:
- return Simd128LowerAnyTrue(instr);
- case Js::OpCode::Simd128_AllTrue_B2:
- case Js::OpCode::Simd128_AllTrue_B4:
- case Js::OpCode::Simd128_AllTrue_B8:
- case Js::OpCode::Simd128_AllTrue_B16:
- return Simd128LowerAllTrue(instr);
- case Js::OpCode::Simd128_BitSelect_I4:
- return LowerSimd128BitSelect(instr);
- default:
- AssertMsg(UNREACHED, "Unsupported Simd128 instruction");
- }
- return nullptr;
- }
- IR::Instr* LowererMD::LowerSimd128BitSelect(IR::Instr* instr)
- {
- SList<IR::Opnd*> *args = Simd128GetExtendedArgs(instr);
- IR::Opnd *dst = args->Pop();
- IR::Opnd *src1 = args->Pop();
- IR::Opnd *src2 = args->Pop();
- IR::Opnd *mask = args->Pop();
- IR::Instr* pInstr = IR::Instr::New(Js::OpCode::PXOR, dst, src1, src2, m_func);
- instr->InsertBefore(pInstr);
- Legalize(pInstr);
- instr->InsertBefore(IR::Instr::New(Js::OpCode::PAND, dst, dst, mask, m_func));
- instr->InsertBefore(IR::Instr::New(Js::OpCode::PXOR, dst, dst, src2, m_func));
- return removeInstr(instr);
- }
- IR::Instr* LowererMD::Simd128LoadConst(IR::Instr* instr)
- {
- Assert(instr->GetDst() && instr->m_opcode == Js::OpCode::Simd128_LdC);
- Assert(instr->GetDst()->IsSimd128());
- Assert(instr->GetSrc1()->IsSimd128());
- Assert(instr->GetSrc1()->IsSimd128ConstOpnd());
- Assert(instr->GetSrc2() == nullptr);
- AsmJsSIMDValue value = instr->GetSrc1()->AsSimd128ConstOpnd()->m_value;
- // MOVUPS dst, [const]
- void *pValue = NativeCodeDataNewNoFixup(this->m_func->GetNativeCodeDataAllocator(), SIMDType<DataDesc_LowererMD_Simd128LoadConst>, value);
- IR::Opnd * simdRef;
- if (!m_func->IsOOPJIT())
- {
- simdRef = IR::MemRefOpnd::New((void *)pValue, instr->GetDst()->GetType(), instr->m_func);
- }
- else
- {
- int offset = NativeCodeData::GetDataTotalOffset(pValue);
- simdRef = IR::IndirOpnd::New(IR::RegOpnd::New(m_func->GetTopFunc()->GetNativeCodeDataSym(), TyVar, m_func), offset, instr->GetDst()->GetType(),
- #if DBG
- NativeCodeData::GetDataDescription(pValue, m_func->m_alloc),
- #endif
- m_func, true);
- GetLowerer()->addToLiveOnBackEdgeSyms->Set(m_func->GetTopFunc()->GetNativeCodeDataSym()->m_id);
- }
- instr->ReplaceSrc1(simdRef);
- instr->m_opcode = LowererMDArch::GetAssignOp(instr->GetDst()->GetType());
- Legalize(instr);
- return instr->m_prev;
- }
- IR::Instr* LowererMD::Simd128CanonicalizeToBools(IR::Instr* instr, const Js::OpCode &cmpOpcode, IR::Opnd& dstOpnd)
- {
- Assert(instr->m_opcode == Js::OpCode::Simd128_IntsToB4 || instr->m_opcode == Js::OpCode::Simd128_IntsToB8 || instr->m_opcode == Js::OpCode::Simd128_IntsToB16 ||
- instr->m_opcode == Js::OpCode::Simd128_ReplaceLane_B4 || instr->m_opcode == Js::OpCode::Simd128_ReplaceLane_B8 || instr->m_opcode == Js::OpCode::Simd128_ReplaceLane_B16 ||
- instr->m_opcode == Js::OpCode::Simd128_AnyTrue_B2 || instr->m_opcode == Js::OpCode::Simd128_AnyTrue_B4 || instr->m_opcode == Js::OpCode::Simd128_AnyTrue_B8 || instr->m_opcode == Js::OpCode::Simd128_AnyTrue_B16 ||
- instr->m_opcode == Js::OpCode::Simd128_AllTrue_B2 || instr->m_opcode == Js::OpCode::Simd128_AllTrue_B4 || instr->m_opcode == Js::OpCode::Simd128_AllTrue_B8 || instr->m_opcode == Js::OpCode::Simd128_AllTrue_B16
- );
- IR::Instr *pInstr;
- //dst = cmpOpcode dst, X86_ALL_ZEROS
- pInstr = IR::Instr::New(cmpOpcode, &dstOpnd, &dstOpnd, IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86AllZerosAddr(), TySimd128I4, m_func), m_func);
- instr->InsertBefore(pInstr);
- Legalize(pInstr);
- // dst = PANDN dst, X86_ALL_NEG_ONES
- pInstr = IR::Instr::New(Js::OpCode::PANDN, &dstOpnd, &dstOpnd, IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86AllNegOnesAddr(), TySimd128I4, m_func), m_func);
- instr->InsertBefore(pInstr);
- Legalize(pInstr);
- return instr;
- }
- IR::Instr* LowererMD::EmitSimdConversion(IR::Instr *instr, IR::JnHelperMethod helper)
- {
- IR::MemRefOpnd* srcMemRef = LoadSimdHelperArgument(instr, 0);
- IR::MemRefOpnd* dstMemRef = LoadSimdHelperArgument(instr, 1);
- m_lowerer->InsertMove(srcMemRef, instr->UnlinkSrc1(), instr);
- IR::Instr * helperCall = IR::Instr::New(Js::OpCode::CALL, this->m_func);
- instr->InsertBefore(helperCall);
- this->ChangeToHelperCall(helperCall, helper);
- m_lowerer->InsertMove(instr->UnlinkDst(), dstMemRef, instr);
- return removeInstr(instr);
- }
- void LowererMD::EmitShiftByScalarI2(IR::Instr *instr, IR::JnHelperMethod helper)
- {
- IR::Opnd* src2 = instr->GetSrc2();
- IR::Opnd* dst = instr->GetDst();
- LoadHelperArgument(instr, src2);
- IR::MemRefOpnd* srcMemRef = LoadSimdHelperArgument(instr, 0);
- m_lowerer->InsertMove(srcMemRef, instr->GetSrc1(), instr);
- IR::MemRefOpnd* dstMemRef = LoadSimdHelperArgument(instr, 1);
- IR::Instr * helperCall = IR::Instr::New(Js::OpCode::CALL, this->m_func);
- instr->InsertBefore(helperCall);
- this->ChangeToHelperCall(helperCall, helper);
- m_lowerer->InsertMove(dst, dstMemRef, instr);
- }
- IR::Instr * LowererMD::SIMD128LowerReplaceLane_2(IR::Instr *instr)
- {
- SList<IR::Opnd*> *args = Simd128GetExtendedArgs(instr);
- IR::Opnd *dst = args->Pop();
- IR::Opnd *src1 = args->Pop();
- IR::Opnd *src2 = args->Pop();
- IR::Opnd *src3 = args->Pop();
- int lane = src2->AsIntConstOpnd()->AsInt32();
- Assert(dst->IsSimd128() && src1->IsSimd128());
- if (instr->m_opcode == Js::OpCode::Simd128_ReplaceLane_D2)
- {
- AssertMsg(AutoSystemInfo::Data.SSE2Available(), "SSE2 not supported");
- Assert(src3->IsFloat64());
- m_lowerer->InsertMove(dst, src1, instr);
- if (lane)
- {
- instr->InsertBefore(IR::Instr::New(Js::OpCode::SHUFPD, dst, src3, IR::IntConstOpnd::New(0, TyInt8, m_func, true), m_func));
- }
- else
- {
- instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVSD, dst, src3, m_func));
- }
- return removeInstr(instr);
- }
- Assert(src3->IsInt64());
- if (AutoSystemInfo::Data.SSE4_1Available())
- {
- m_lowerer->InsertMove(dst, src1, instr);
- instr->SetDst(dst);
- EmitInsertInt64(src3, lane, instr);
- }
- else
- {
- LoadHelperArgument(instr, src2);
- LoadInt64HelperArgument(instr, src3);
- IR::MemRefOpnd* srcMemRef = LoadSimdHelperArgument(instr, 0);
- m_lowerer->InsertMove(srcMemRef, src1, instr);
- IR::MemRefOpnd* dstMemRef = LoadSimdHelperArgument(instr, 1);
- IR::Instr * helperCall = IR::Instr::New(Js::OpCode::CALL, this->m_func);
- instr->InsertBefore(helperCall);
- this->ChangeToHelperCall(helperCall, IR::HelperSimd128ReplaceLaneI2);
- m_lowerer->InsertMove(dst, dstMemRef, instr);
- }
- return removeInstr(instr);
- }
- void LowererMD::EmitInsertInt64(IR::Opnd* src, uint index, IR::Instr *instr)
- {
- IR::Opnd* dst = instr->GetDst();
- Assert(dst->IsSimd128() && src->IsInt64());
- if (AutoSystemInfo::Data.SSE4_1Available())
- {
- #ifdef _M_IX86
- index *= 2;
- Int64RegPair srcPair = m_func->FindOrCreateInt64Pair(src);
- instr->InsertBefore(IR::Instr::New(Js::OpCode::PINSRD, dst, srcPair.low, IR::IntConstOpnd::New(index, TyInt8, m_func, true), m_func));
- instr->InsertBefore(IR::Instr::New(Js::OpCode::PINSRD, dst, srcPair.high, IR::IntConstOpnd::New(index + 1, TyInt8, m_func, true), m_func));
- #else
- instr->InsertBefore(IR::Instr::New(Js::OpCode::PINSRQ, dst, src, IR::IntConstOpnd::New(index, TyInt8, m_func, true), m_func));
- #endif
- }
- else
- {
- intptr_t tempSIMD = m_func->GetThreadContextInfo()->GetSimdTempAreaAddr(0);
- #ifdef _M_IX86
- Int64RegPair src1Pair = m_func->FindOrCreateInt64Pair(src);
- IR::Opnd* lower = IR::MemRefOpnd::New(tempSIMD, TyMachPtr, m_func);
- m_lowerer->InsertMove(lower, src1Pair.low, instr);
- IR::Opnd* higher = IR::MemRefOpnd::New(tempSIMD + 4, TyMachPtr, m_func);
- m_lowerer->InsertMove(higher, src1Pair.high, instr);
- #else
- IR::Opnd* mem = IR::MemRefOpnd::New(tempSIMD, TyMachPtr, m_func);
- m_lowerer->InsertMove(mem, src, instr);
- #endif
- IR::MemRefOpnd* tmp = IR::MemRefOpnd::New(tempSIMD, TyFloat64, m_func);
- Js::OpCode opcode = (index) ? Js::OpCode::MOVHPD : Js::OpCode::MOVLPD;
- IR::Instr* newInstr = IR::Instr::New(opcode, dst, tmp, m_func);
- instr->InsertBefore(newInstr);
- newInstr->HoistMemRefAddress(tmp, Js::OpCode::MOV);
- Legalize(newInstr);
- }
- }
- void LowererMD::EmitExtractInt64(IR::Opnd* dst, IR::Opnd* src, uint index, IR::Instr *instr)
- {
- Assert(index == 0 || index == 1);
- Assert(dst->IsInt64() && src->IsSimd128());
- if (AutoSystemInfo::Data.SSE4_1Available())
- {
- #ifdef _M_IX86
- index *= 2;
- Int64RegPair dstPair = m_func->FindOrCreateInt64Pair(dst);
- instr->InsertBefore(IR::Instr::New(Js::OpCode::PEXTRD, dstPair.low, src, IR::IntConstOpnd::New(index, TyInt8, m_func, true), m_func));
- instr->InsertBefore(IR::Instr::New(Js::OpCode::PEXTRD, dstPair.high, src, IR::IntConstOpnd::New(index + 1, TyInt8, m_func, true), m_func));
- #else
- instr->InsertBefore(IR::Instr::New(Js::OpCode::PEXTRQ, dst, src, IR::IntConstOpnd::New(index, TyInt8, m_func, true), m_func));
- #endif
- }
- else
- {
- IR::Opnd* tmp = src;
- if (index)
- {
- tmp = IR::RegOpnd::New(TySimd128F4, m_func);
- instr->InsertBefore(IR::Instr::New(Js::OpCode::PSHUFD, tmp, src, IR::IntConstOpnd::New(2 | 3 << 2, TyInt8, m_func, true), m_func));
- }
- //kludg-ish; we need a new instruction for LowerReinterpretPrimitive to transform
- //and dummy one for a caller to remove
- IR::Instr* tmpInstr = IR::Instr::New(Js::OpCode::Simd128_ExtractLane_I2, dst, tmp->UseWithNewType(TyFloat64, m_func), m_func);
- instr->InsertBefore(tmpInstr);
- m_lowerer->LowerReinterpretPrimitive(tmpInstr);
- }
- }
- IR::Instr* LowererMD::Simd128LowerLdLane(IR::Instr *instr)
- {
- IR::Opnd* dst, *src1, *src2;
- Js::OpCode movOpcode = Js::OpCode::MOVSS;
- uint laneWidth = 0, laneIndex = 0, shamt = 0, mask = 0;
- IRType laneType = TyInt32;
- dst = instr->GetDst();
- src1 = instr->GetSrc1();
- src2 = instr->GetSrc2();
- Assert(dst && dst->IsRegOpnd() && (dst->GetType() == TyFloat32 || dst->GetType() == TyInt32 || dst->GetType() == TyUint32 || dst->GetType() == TyFloat64 || dst->IsInt64()));
- Assert(src1 && src1->IsRegOpnd() && src1->IsSimd128());
- Assert(src2 && src2->IsIntConstOpnd());
- laneIndex = (uint)src2->AsIntConstOpnd()->AsUint32();
- laneWidth = 4;
- switch (instr->m_opcode)
- {
- case Js::OpCode::Simd128_ExtractLane_I2:
- laneWidth = 8;
- break;
- case Js::OpCode::Simd128_ExtractLane_F4:
- movOpcode = Js::OpCode::MOVSS;
- Assert(laneIndex < 4);
- break;
- case Js::OpCode::Simd128_ExtractLane_I8:
- case Js::OpCode::Simd128_ExtractLane_U8:
- case Js::OpCode::Simd128_ExtractLane_B8:
- movOpcode = Js::OpCode::MOVD;
- Assert(laneIndex < 8);
- shamt = (laneIndex % 2) * 16;
- laneIndex = laneIndex / 2;
- laneType = TyInt16;
- mask = 0x0000ffff;
- break;
- case Js::OpCode::Simd128_ExtractLane_I16:
- case Js::OpCode::Simd128_ExtractLane_U16:
- case Js::OpCode::Simd128_ExtractLane_B16:
- movOpcode = Js::OpCode::MOVD;
- Assert(laneIndex < 16);
- shamt = (laneIndex % 4) * 8;
- laneIndex = laneIndex / 4;
- laneType = TyInt8;
- mask = 0x000000ff;
- break;
- case Js::OpCode::Simd128_ExtractLane_U4:
- case Js::OpCode::Simd128_ExtractLane_I4:
- case Js::OpCode::Simd128_ExtractLane_B4:
- movOpcode = Js::OpCode::MOVD;
- Assert(laneIndex < 4);
- break;
- default:
- Assert(UNREACHED);
- }
- if (laneWidth == 8) //Simd128_ExtractLane_I2
- {
- EmitExtractInt64(dst, instr->GetSrc1(), laneIndex, instr);
- }
- else
- {
- IR::Opnd* tmp = src1;
- if (laneIndex != 0)
- {
- // tmp = PSRLDQ src1, shamt
- tmp = IR::RegOpnd::New(src1->GetType(), m_func);
- IR::Instr *shiftInstr = IR::Instr::New(Js::OpCode::PSRLDQ, tmp, src1, IR::IntConstOpnd::New(laneWidth * laneIndex, TyInt8, m_func, true), m_func);
- instr->InsertBefore(shiftInstr);
- Legalize(shiftInstr);
- }
- // MOVSS/MOVSD/MOVD dst, tmp
- instr->InsertBefore(IR::Instr::New(movOpcode, movOpcode == Js::OpCode::MOVD ? dst : dst->UseWithNewType(tmp->GetType(), m_func), tmp, m_func));
- }
- // dst has the 4-byte lane
- if (instr->m_opcode == Js::OpCode::Simd128_ExtractLane_I8 || instr->m_opcode == Js::OpCode::Simd128_ExtractLane_U8 || instr->m_opcode == Js::OpCode::Simd128_ExtractLane_B8 ||
- instr->m_opcode == Js::OpCode::Simd128_ExtractLane_U16 || instr->m_opcode == Js::OpCode::Simd128_ExtractLane_I16 || instr->m_opcode == Js::OpCode::Simd128_ExtractLane_B16)
- {
- // extract the 1/2 bytes sublane
- IR::Instr *newInstr = nullptr;
- if (shamt != 0)
- {
- // SHR dst, dst, shamt
- newInstr = IR::Instr::New(Js::OpCode::SHR, dst, dst, IR::IntConstOpnd::New((IntConstType)shamt, TyInt8, m_func), m_func);
- instr->InsertBefore(newInstr);
- Legalize(newInstr);
- }
- Assert(laneType == TyInt8 || laneType == TyInt16);
- // zero or sign-extend upper bits
- if (instr->m_opcode == Js::OpCode::Simd128_ExtractLane_I8 || instr->m_opcode == Js::OpCode::Simd128_ExtractLane_I16)
- {
- if (laneType == TyInt8)
- {
- IR::RegOpnd * tmp = IR::RegOpnd::New(TyInt8, m_func);
- newInstr = IR::Instr::New(Js::OpCode::MOV, tmp, dst, m_func);
- instr->InsertBefore(newInstr);
- Legalize(newInstr);
- newInstr = IR::Instr::New(Js::OpCode::MOVSX, dst, tmp, m_func);
- }
- else
- {
- newInstr = IR::Instr::New(Js::OpCode::MOVSXW, dst, dst->UseWithNewType(laneType, m_func), m_func);
- }
- }
- else
- {
- newInstr = IR::Instr::New(Js::OpCode::AND, dst, dst, IR::IntConstOpnd::New(mask, TyInt32, m_func), m_func);
- }
- instr->InsertBefore(newInstr);
- Legalize(newInstr);
- }
- if (instr->m_opcode == Js::OpCode::Simd128_ExtractLane_B4 || instr->m_opcode == Js::OpCode::Simd128_ExtractLane_B8 ||
- instr->m_opcode == Js::OpCode::Simd128_ExtractLane_B16)
- {
- IR::Instr* pInstr = nullptr;
- IR::RegOpnd* tmp = IR::RegOpnd::New(TyInt8, m_func);
- // cmp dst, 0
- pInstr = IR::Instr::New(Js::OpCode::CMP, m_func);
- pInstr->SetSrc1(dst->UseWithNewType(laneType, m_func));
- pInstr->SetSrc2(IR::IntConstOpnd::New(0, laneType, m_func, true));
- instr->InsertBefore(pInstr);
- Legalize(pInstr);
- // mov tmp(TyInt8), dst
- pInstr = IR::Instr::New(Js::OpCode::MOV, tmp, dst, m_func);
- instr->InsertBefore(pInstr);
- Legalize(pInstr);
- // setne tmp(TyInt8)
- pInstr = IR::Instr::New(Js::OpCode::SETNE, tmp, tmp, m_func);
- instr->InsertBefore(pInstr);
- Legalize(pInstr);
- // movsx dst, tmp(TyInt8)
- instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVSX, dst, tmp, m_func));
- }
- IR::Instr* prevInstr = instr->m_prev;
- instr->Remove();
- return prevInstr;
- }
- IR::Instr* LowererMD::Simd128LowerSplat(IR::Instr *instr)
- {
- Js::OpCode shufOpCode = Js::OpCode::SHUFPS, movOpCode = Js::OpCode::MOVSS;
- IR::Opnd *dst, *src1;
- IR::Instr *pInstr = nullptr;
- dst = instr->GetDst();
- src1 = instr->GetSrc1();
- Assert(dst && dst->IsRegOpnd() && dst->IsSimd128());
- Assert(src1 && src1->IsRegOpnd() && (src1->GetType() == TyFloat32 || src1->GetType() == TyInt32 || src1->GetType() == TyFloat64 ||
- src1->GetType() == TyInt16 || src1->GetType() == TyInt8 || src1->GetType() == TyUint16 ||
- src1->GetType() == TyUint8 || src1->GetType() == TyUint32 || src1->IsInt64()));
- Assert(!instr->GetSrc2());
- IR::Opnd* tempTruncate = nullptr;
- bool bSkip = false;
- IR::LabelInstr *labelZero = IR::LabelInstr::New(Js::OpCode::Label, m_func);
- IR::LabelInstr *labelDone = IR::LabelInstr::New(Js::OpCode::Label, m_func);
- switch (instr->m_opcode)
- {
- case Js::OpCode::Simd128_Splat_F4:
- shufOpCode = Js::OpCode::SHUFPS;
- movOpCode = Js::OpCode::MOVSS;
- break;
- case Js::OpCode::Simd128_Splat_I4:
- case Js::OpCode::Simd128_Splat_U4:
- shufOpCode = Js::OpCode::PSHUFD;
- movOpCode = Js::OpCode::MOVD;
- break;
- case Js::OpCode::Simd128_Splat_D2:
- shufOpCode = Js::OpCode::SHUFPD;
- movOpCode = Js::OpCode::MOVSD;
- break;
- case Js::OpCode::Simd128_Splat_I2:
- {
- EmitInsertInt64(src1, 0, instr);
- instr->InsertBefore(IR::Instr::New(Js::OpCode::PSHUFD, dst, dst, IR::IntConstOpnd::New(68, TyInt8, m_func, true), m_func));
- bSkip = true;
- break;
- }
- case Js::OpCode::Simd128_Splat_I8:
- case Js::OpCode::Simd128_Splat_U8:
- // MOV tempTruncate(bx), src1: truncate the value to 16bit int
- // MOVD dst, tempTruncate(bx)
- // PUNPCKLWD dst, dst
- // PSHUFD dst, dst, 0
- tempTruncate = EnregisterIntConst(instr, src1, TyInt16);
- instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVD, dst, tempTruncate, m_func));
- instr->InsertBefore(IR::Instr::New(Js::OpCode::PUNPCKLWD, dst, dst, dst, m_func));
- instr->InsertBefore(IR::Instr::New(Js::OpCode::PSHUFD, dst, dst, IR::IntConstOpnd::New(0, TyInt8, m_func, true), m_func));
- bSkip = true;
- break;
- case Js::OpCode::Simd128_Splat_I16:
- case Js::OpCode::Simd128_Splat_U16:
- // MOV tempTruncate(bx), src1: truncate the value to 8bit int
- // MOVD dst, tempTruncate(bx)
- // PUNPCKLBW dst, dst
- // PUNPCKLWD dst, dst
- // PSHUFD dst, dst, 0
- tempTruncate = EnregisterIntConst(instr, src1, TyInt8);
- instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVD, dst, tempTruncate, m_func));
- instr->InsertBefore(IR::Instr::New(Js::OpCode::PUNPCKLBW, dst, dst, dst, m_func));
- instr->InsertBefore(IR::Instr::New(Js::OpCode::PUNPCKLWD, dst, dst, dst, m_func));
- instr->InsertBefore(IR::Instr::New(Js::OpCode::PSHUFD, dst, dst, IR::IntConstOpnd::New(0, TyInt8, m_func, true), m_func));
- bSkip = true;
- break;
- case Js::OpCode::Simd128_Splat_B4:
- case Js::OpCode::Simd128_Splat_B8:
- case Js::OpCode::Simd128_Splat_B16:
- // CMP src1, 0
- // JEQ $labelZero
- // MOVAPS dst, xmmword ptr[X86_ALL_NEG_ONES]
- // JMP $labelDone
- // $labelZero:
- // XORPS dst, dst
- // $labelDone:
- //pInstr = IR::Instr::New(Js::OpCode::CMP, src1, IR::IntConstOpnd::New(0, TyInt8, m_func, true), m_func);
- //instr->InsertBefore(pInstr);
- //Legalize(pInstr);
- // cmp src1, 0000h
- pInstr = IR::Instr::New(Js::OpCode::CMP, m_func);
- pInstr->SetSrc1(src1);
- pInstr->SetSrc2(IR::IntConstOpnd::New(0x0000, TyInt32, m_func, true));
- instr->InsertBefore(pInstr);
- Legalize(pInstr);
- //JEQ $labelZero
- instr->InsertBefore(IR::BranchInstr::New(Js::OpCode::JEQ, labelZero, m_func));
- // MOVAPS dst, xmmword ptr[X86_ALL_NEG_ONES]
- pInstr = IR::Instr::New(Js::OpCode::MOVAPS, dst, IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86AllNegOnesAddr(), TySimd128I4, m_func), m_func);
- instr->InsertBefore(pInstr);
- Legalize(pInstr);
- // JMP $labelDone
- instr->InsertBefore(IR::BranchInstr::New(Js::OpCode::JMP, labelDone, m_func));
- // $labelZero:
- instr->InsertBefore(labelZero);
- // XORPS dst, dst
- instr->InsertBefore(IR::Instr::New(Js::OpCode::XORPS, dst, dst, dst, m_func)); // make dst to be 0
- // $labelDone:
- instr->InsertBefore(labelDone);
- bSkip = true;
- break;
- default:
- Assert(UNREACHED);
- }
- if (instr->m_opcode == Js::OpCode::Simd128_Splat_F4 && instr->GetSrc1()->IsFloat64())
- {
- IR::RegOpnd *regOpnd32 = IR::RegOpnd::New(TyFloat32, this->m_func);
- // CVTSD2SS regOpnd32.f32, src.f64 -- Convert regOpnd from f64 to f32
- instr->InsertBefore(IR::Instr::New(Js::OpCode::CVTSD2SS, regOpnd32, src1, this->m_func));
- src1 = regOpnd32;
- }
- if (!bSkip)
- {
- instr->InsertBefore(IR::Instr::New(movOpCode, dst, src1, m_func));
- instr->InsertBefore(IR::Instr::New(shufOpCode, dst, dst, IR::IntConstOpnd::New(0, TyInt8, m_func, true), m_func));
- }
- IR::Instr* prevInstr = instr->m_prev;
- instr->Remove();
- return prevInstr;
- }
- IR::Instr* LowererMD::Simd128LowerSqrt(IR::Instr *instr)
- {
- Js::OpCode opcode = Js::OpCode::SQRTPS;
- IR::Opnd *dst, *src1;
- dst = instr->GetDst();
- src1 = instr->GetSrc1();
- Assert(dst && dst->IsRegOpnd());
- Assert(src1 && src1->IsRegOpnd());
- Assert(instr->GetSrc2() == nullptr);
- opcode = Js::OpCode::SQRTPS;
- #if 0
- {
- Assert(instr->m_opcode == Js::OpCode::Simd128_Sqrt_D2);
- opcode = Js::OpCode::SQRTPD;
- }
- #endif // 0
- instr->InsertBefore(IR::Instr::New(opcode, dst, src1, m_func));
- IR::Instr* prevInstr = instr->m_prev;
- instr->Remove();
- return prevInstr;
- }
- IR::Instr* LowererMD::Simd128LowerNeg(IR::Instr *instr)
- {
- IR::Opnd* dst = instr->GetDst();
- IR::Opnd* src1 = instr->GetSrc1();
- Js::OpCode addOpcode = Js::OpCode::PADDD;
- void * allOnes = (void*)&X86_ALL_ONES_I4;
- Assert(dst->IsRegOpnd() && dst->IsSimd128());
- Assert(src1->IsRegOpnd() && src1->IsSimd128());
- Assert(instr->GetSrc2() == nullptr);
- switch (instr->m_opcode)
- {
- case Js::OpCode::Simd128_Neg_I4:
- case Js::OpCode::Simd128_Neg_U4:
- break;
- case Js::OpCode::Simd128_Neg_I8:
- case Js::OpCode::Simd128_Neg_U8:
- addOpcode = Js::OpCode::PADDW;
- allOnes = (void*)&X86_ALL_ONES_I8;
- break;
- case Js::OpCode::Simd128_Neg_I16:
- case Js::OpCode::Simd128_Neg_U16:
- addOpcode = Js::OpCode::PADDB;
- allOnes = (void*)&X86_ALL_ONES_I16;
- break;
- default:
- Assert(UNREACHED);
- }
- // MOVAPS dst, src1
- IR::Instr *pInstr = IR::Instr::New(Js::OpCode::MOVAPS, dst, src1, m_func);
- instr->InsertBefore(pInstr);
- // PANDN dst, dst, 0xfff...f
- pInstr = IR::Instr::New(Js::OpCode::PANDN, dst, dst, IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86AllNegOnesAddr(), src1->GetType(), m_func), m_func);
- instr->InsertBefore(pInstr);
- Legalize(pInstr);
- // addOpCode dst, dst, {allOnes}
- pInstr = IR::Instr::New(addOpcode, dst, dst, IR::MemRefOpnd::New(allOnes, src1->GetType(), m_func), m_func);
- instr->InsertBefore(pInstr);
- Legalize(pInstr);
- pInstr = instr->m_prev;
- instr->Remove();
- return pInstr;
- }
- IR::Instr* LowererMD::Simd128LowerMulI4(IR::Instr *instr)
- {
- Assert(instr->m_opcode == Js::OpCode::Simd128_Mul_I4 || instr->m_opcode == Js::OpCode::Simd128_Mul_U4);
- IR::Instr *pInstr;
- IR::Opnd* dst = instr->GetDst();
- IR::Opnd* src1 = instr->GetSrc1();
- IR::Opnd* src2 = instr->GetSrc2();
- IR::Opnd* temp1, *temp2, *temp3;
- Assert(dst->IsRegOpnd() && dst->IsSimd128());
- Assert(src1->IsRegOpnd() && src1->IsSimd128());
- Assert(src2->IsRegOpnd() && src2->IsSimd128());
- temp1 = IR::RegOpnd::New(src1->GetType(), m_func);
- temp2 = IR::RegOpnd::New(src1->GetType(), m_func);
- temp3 = IR::RegOpnd::New(src1->GetType(), m_func);
- // temp1 = PMULUDQ src1, src2
- pInstr = IR::Instr::New(Js::OpCode::PMULUDQ, temp1, src1, src2, m_func);
- instr->InsertBefore(pInstr);
- //MakeDstEquSrc1(pInstr);
- Legalize(pInstr);
- // temp2 = PSLRD src1, 0x4
- pInstr = IR::Instr::New(Js::OpCode::PSRLDQ, temp2, src1, IR::IntConstOpnd::New(TySize[TyInt32], TyInt8, m_func, true), m_func);
- instr->InsertBefore(pInstr);
- //MakeDstEquSrc1(pInstr);
- Legalize(pInstr);
- // temp3 = PSLRD src2, 0x4
- pInstr = IR::Instr::New(Js::OpCode::PSRLDQ, temp3, src2, IR::IntConstOpnd::New(TySize[TyInt32], TyInt8, m_func, true), m_func);
- instr->InsertBefore(pInstr);
- //MakeDstEquSrc1(pInstr);
- Legalize(pInstr);
- // temp2 = PMULUDQ temp2, temp3
- pInstr = IR::Instr::New(Js::OpCode::PMULUDQ, temp2, temp2, temp3, m_func);
- instr->InsertBefore(pInstr);
- Legalize(pInstr);
- //PSHUFD temp1, temp1, 0x8
- instr->InsertBefore(IR::Instr::New(Js::OpCode::PSHUFD, temp1, temp1, IR::IntConstOpnd::New( 8 /*b00001000*/, TyInt8, m_func, true), m_func));
- //PSHUFD temp2, temp2, 0x8
- instr->InsertBefore(IR::Instr::New(Js::OpCode::PSHUFD, temp2, temp2, IR::IntConstOpnd::New(8 /*b00001000*/, TyInt8, m_func, true), m_func));
- // PUNPCKLDQ dst, temp1, temp2
- pInstr = IR::Instr::New(Js::OpCode::PUNPCKLDQ, dst, temp1, temp2, m_func);
- instr->InsertBefore(pInstr);
- Legalize(pInstr);
- pInstr = instr->m_prev;
- instr->Remove();
- return pInstr;
- }
- IR::Instr* LowererMD::Simd128LowerMulI16(IR::Instr *instr)
- {
- Assert(instr->m_opcode == Js::OpCode::Simd128_Mul_I16 || instr->m_opcode == Js::OpCode::Simd128_Mul_U16);
- IR::Instr *pInstr = nullptr;
- IR::Opnd* dst = instr->GetDst();
- IR::Opnd* src1 = instr->GetSrc1();
- IR::Opnd* src2 = instr->GetSrc2();
- IR::Opnd* temp1, *temp2, *temp3;
- IRType simdType, laneType;
- if (instr->m_opcode == Js::OpCode::Simd128_Mul_I16)
- {
- simdType = TySimd128I16;
- laneType = TyInt8;
- }
- else
- {
- simdType = TySimd128U16;
- laneType = TyUint8;
- }
- Assert(dst->IsRegOpnd() && dst->GetType() == simdType);
- Assert(src1->IsRegOpnd() && src1->GetType() == simdType);
- Assert(src2->IsRegOpnd() && src2->GetType() == simdType);
- temp1 = IR::RegOpnd::New(simdType, m_func);
- temp2 = IR::RegOpnd::New(simdType, m_func);
- temp3 = IR::RegOpnd::New(simdType, m_func);
- // MOVAPS temp1, src1
- instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVAPS, temp1, src1, m_func));
- //PMULLW temp1, src2
- pInstr = IR::Instr::New(Js::OpCode::PMULLW, temp1, temp1, src2, m_func);
- instr->InsertBefore(pInstr);
- Legalize(pInstr);
- //PAND temp1 {0x00ff00ff00ff00ff00ff00ff00ff00ff} :To zero out bytes 1,3,5...
- pInstr = IR::Instr::New(Js::OpCode::PAND, temp1, temp1, IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86LowBytesMaskAddr(), simdType, m_func), m_func);
- instr->InsertBefore(pInstr);
- Legalize(pInstr);
- //PSRLW src1, 8
- pInstr = IR::Instr::New(Js::OpCode::PSRLW, temp2, src2, IR::IntConstOpnd::New(8, laneType, m_func, true), m_func);
- instr->InsertBefore(pInstr);
- Legalize(pInstr);
- //PSRLW src2, 8 :upper 8 bits of each word
- pInstr = IR::Instr::New(Js::OpCode::PSRLW, temp3, src1, IR::IntConstOpnd::New(8, laneType, m_func, true), m_func);
- instr->InsertBefore(pInstr);
- Legalize(pInstr);
- //PMULLW src1, src2
- pInstr = IR::Instr::New(Js::OpCode::PMULLW, temp2, temp2, temp3, m_func);
- instr->InsertBefore(pInstr);
- Legalize(pInstr);
- //PSLLW src1, 8 :sets the results bytes 1,3,5..
- pInstr = IR::Instr::New(Js::OpCode::PSLLW, temp2, temp2, IR::IntConstOpnd::New(8, laneType, m_func, true), m_func);
- instr->InsertBefore(pInstr);
- Legalize(pInstr);
- //POR temp1, src1 :OR bytes 0,2,4.. to final result
- pInstr = IR::Instr::New(Js::OpCode::POR, dst, temp1, temp2, m_func);
- instr->InsertBefore(pInstr);
- Legalize(pInstr);
- pInstr = instr->m_prev;
- instr->Remove();
- return pInstr;
- }
- IR::Instr* LowererMD::Simd128LowerShift(IR::Instr *instr)
- {
- IR::Opnd* dst = instr->GetDst();
- IR::Opnd* src1 = instr->GetSrc1();
- IR::Opnd* src2 = instr->GetSrc2();
- Assert(dst->IsRegOpnd() && dst->IsSimd128());
- Assert(src1->IsRegOpnd() && src1->IsSimd128());
- Assert(src2->IsInt32());
- Js::OpCode opcode = Js::OpCode::PSLLD;
- int elementSizeInBytes = 0;
- switch (instr->m_opcode)
- {
- case Js::OpCode::Simd128_ShRtByScalar_I2:
- EmitShiftByScalarI2(instr, IR::HelperSimd128ShRtByScalarI2);
- return removeInstr(instr);
- case Js::OpCode::Simd128_ShLtByScalar_I2:
- opcode = Js::OpCode::PSLLQ;
- elementSizeInBytes = 8;
- break;
- case Js::OpCode::Simd128_ShRtByScalar_U2:
- opcode = Js::OpCode::PSRLQ;
- elementSizeInBytes = 8;
- break;
- case Js::OpCode::Simd128_ShLtByScalar_I4:
- case Js::OpCode::Simd128_ShLtByScalar_U4: // same as int32x4.ShiftLeftScalar
- opcode = Js::OpCode::PSLLD;
- elementSizeInBytes = 4;
- break;
- case Js::OpCode::Simd128_ShRtByScalar_I4:
- opcode = Js::OpCode::PSRAD;
- elementSizeInBytes = 4;
- break;
- case Js::OpCode::Simd128_ShLtByScalar_I8:
- case Js::OpCode::Simd128_ShLtByScalar_U8: // same as int16x8.ShiftLeftScalar
- opcode = Js::OpCode::PSLLW;
- elementSizeInBytes = 2;
- break;
- case Js::OpCode::Simd128_ShRtByScalar_I8:
- opcode = Js::OpCode::PSRAW;
- elementSizeInBytes = 2;
- break;
- case Js::OpCode::Simd128_ShRtByScalar_U4:
- opcode = Js::OpCode::PSRLD;
- elementSizeInBytes = 4;
- break;
- case Js::OpCode::Simd128_ShRtByScalar_U8:
- opcode = Js::OpCode::PSRLW;
- elementSizeInBytes = 2;
- break;
- case Js::OpCode::Simd128_ShLtByScalar_I16: // composite, int8x16.ShiftLeftScalar
- case Js::OpCode::Simd128_ShRtByScalar_I16: // composite, int8x16.ShiftRightScalar
- case Js::OpCode::Simd128_ShLtByScalar_U16: // same as int8x16.ShiftLeftScalar
- case Js::OpCode::Simd128_ShRtByScalar_U16: // composite, uint8x16.ShiftRightScalar
- elementSizeInBytes = 1;
- break;
- default:
- Assert(UNREACHED);
- }
- IR::Instr *pInstr = nullptr;
- IR::RegOpnd *reg = IR::RegOpnd::New(TyInt32, m_func);
- IR::RegOpnd *reg2 = IR::RegOpnd::New(TyInt32, m_func);
- IR::RegOpnd *tmp0 = IR::RegOpnd::New(src1->GetType(), m_func);
- IR::RegOpnd *tmp1 = IR::RegOpnd::New(src1->GetType(), m_func);
- IR::RegOpnd *tmp2 = IR::RegOpnd::New(src1->GetType(), m_func);
- //Shift amount: The shift amount is masked by [ElementSize] * 8
- //The masked Shift amount is moved to xmm register
- //AND shamt, shmask, shamt
- //MOVD tmp0, shamt
- IR::RegOpnd *shamt = IR::RegOpnd::New(src2->GetType(), m_func);
- // en-register
- IR::Opnd *origShamt = EnregisterIntConst(instr, src2); //unnormalized shift amount
- pInstr = IR::Instr::New(Js::OpCode::AND, shamt, origShamt, IR::IntConstOpnd::New(Js::SIMDUtils::SIMDGetShiftAmountMask(elementSizeInBytes), TyInt32, m_func), m_func); // normalizing by elm width (i.e. shamt % elm_width)
- instr->InsertBefore(pInstr);
- Legalize(pInstr);
- pInstr = IR::Instr::New(Js::OpCode::MOVD, tmp0, shamt, m_func);
- instr->InsertBefore(pInstr);
- if (instr->m_opcode == Js::OpCode::Simd128_ShLtByScalar_I4 || instr->m_opcode == Js::OpCode::Simd128_ShRtByScalar_I4 ||
- instr->m_opcode == Js::OpCode::Simd128_ShLtByScalar_U4 || instr->m_opcode == Js::OpCode::Simd128_ShRtByScalar_U4 ||
- instr->m_opcode == Js::OpCode::Simd128_ShLtByScalar_I8 || instr->m_opcode == Js::OpCode::Simd128_ShRtByScalar_I8 ||
- instr->m_opcode == Js::OpCode::Simd128_ShLtByScalar_U8 || instr->m_opcode == Js::OpCode::Simd128_ShRtByScalar_U8 ||
- instr->m_opcode == Js::OpCode::Simd128_ShLtByScalar_I2 || instr->m_opcode == Js::OpCode::Simd128_ShRtByScalar_U2)
- {
- // shiftOpCode dst, src1, tmp0
- pInstr = IR::Instr::New(opcode, dst, src1, tmp0, m_func);
- instr->InsertBefore(pInstr);
- Legalize(pInstr);
- }
- else if (instr->m_opcode == Js::OpCode::Simd128_ShLtByScalar_I16 || instr->m_opcode == Js::OpCode::Simd128_ShLtByScalar_U16)
- {
- // MOVAPS tmp1, src1
- pInstr = IR::Instr::New(Js::OpCode::MOVAPS, tmp1, src1, m_func);
- instr->InsertBefore(pInstr);
- // MOVAPS dst, src1
- pInstr = IR::Instr::New(Js::OpCode::MOVAPS, dst, src1, m_func);
- instr->InsertBefore(pInstr);
- // PAND tmp1, [X86_HIGHBYTES_MASK]
- pInstr = IR::Instr::New(Js::OpCode::PAND, tmp1, tmp1, IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86HighBytesMaskAddr(), TySimd128I4, m_func), m_func);
- instr->InsertBefore(pInstr);
- Legalize(pInstr);
- // PSLLW tmp1, tmp0
- pInstr = IR::Instr::New(Js::OpCode::PSLLW, tmp1, tmp1, tmp0, m_func);
- instr->InsertBefore(pInstr);
- Legalize(pInstr);
- // PSLLW dst, tmp0
- pInstr = IR::Instr::New(Js::OpCode::PSLLW, dst, dst, tmp0, m_func);
- instr->InsertBefore(pInstr);
- Legalize(pInstr);
- // PAND dst, [X86_LOWBYTES_MASK]
- pInstr = IR::Instr::New(Js::OpCode::PAND, dst, dst, IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86LowBytesMaskAddr(), TySimd128I4, m_func), m_func);
- instr->InsertBefore(pInstr);
- Legalize(pInstr);
- // POR dst, tmp1
- pInstr = IR::Instr::New(Js::OpCode::POR, dst, dst, tmp1, m_func);
- instr->InsertBefore(pInstr);
- }
- else if (instr->m_opcode == Js::OpCode::Simd128_ShRtByScalar_I16)
- {
- // MOVAPS tmp1, src1
- instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVAPS, tmp1, src1, m_func));
- // MOVAPS dst, src1
- instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVAPS, dst, src1, m_func));
- // PSLLW dst, 8
- instr->InsertBefore(IR::Instr::New(Js::OpCode::PSLLW, dst, dst, IR::IntConstOpnd::New(8, TyInt8, m_func), m_func));
- // LEA reg, [shamt + 8]
- IR::IndirOpnd *indirOpnd = IR::IndirOpnd::New(shamt->AsRegOpnd(), +8, TyInt32, m_func);
- instr->InsertBefore(IR::Instr::New(Js::OpCode::LEA, reg, indirOpnd, m_func));
- // MOVD tmp0, reg
- pInstr = IR::Instr::New(Js::OpCode::MOVD, tmp2, reg, m_func);
- instr->InsertBefore(pInstr);
- // PSRAW dst, tmp0
- instr->InsertBefore(IR::Instr::New(Js::OpCode::PSRAW, dst, dst, tmp2, m_func));
- // PAND dst, [X86_LOWBYTES_MASK]
- pInstr = IR::Instr::New(Js::OpCode::PAND, dst, dst, IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86LowBytesMaskAddr(), TySimd128I4, m_func), m_func);
- instr->InsertBefore(pInstr);
- Legalize(pInstr);
- // PSRAW tmp1, tmp0
- instr->InsertBefore(IR::Instr::New(Js::OpCode::PSRAW, tmp1, tmp1, tmp0, m_func));
- // PAND tmp1, [X86_HIGHBYTES_MASK]
- pInstr = IR::Instr::New(Js::OpCode::PAND, tmp1, tmp1, IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86HighBytesMaskAddr(), TySimd128I4, m_func), m_func);
- instr->InsertBefore(pInstr);
- Legalize(pInstr);
- // POR dst, tmp1
- instr->InsertBefore(IR::Instr::New(Js::OpCode::POR, dst, dst, tmp1, m_func));
- }
- else if (instr->m_opcode == Js::OpCode::Simd128_ShRtByScalar_U16)
- {
- IR::RegOpnd * shamtReg = IR::RegOpnd::New(TyInt8, m_func);
- shamtReg->SetReg(LowererMDArch::GetRegShiftCount());
- IR::RegOpnd * tmp = IR::RegOpnd::New(TyInt8, m_func);
- // MOVAPS dst, src1
- instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVAPS, dst, src1, m_func));
- // MOV reg2, 0FFh
- instr->InsertBefore(IR::Instr::New(Js::OpCode::MOV, reg2, IR::IntConstOpnd::New(0xFF, TyInt32, m_func), m_func));
- // MOV shamtReg, shamt
- instr->InsertBefore(IR::Instr::New(Js::OpCode::MOV, shamtReg, shamt, m_func));
- // SHR reg2, shamtReg (lower 8 bit)
- instr->InsertBefore(IR::Instr::New(Js::OpCode::SHR, reg2, reg2, shamtReg, m_func));
- // MOV tmp, reg2
- // MOVSX reg2, tmp(TyInt8)
- pInstr = IR::Instr::New(Js::OpCode::MOV, tmp, reg2, m_func);
- instr->InsertBefore(pInstr);
- Legalize(pInstr);
- instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVSX, reg2, tmp, m_func));
- IR::RegOpnd *mask = IR::RegOpnd::New(TySimd128I4, m_func);
- // PSRLW dst, mask
- instr->InsertBefore(IR::Instr::New(Js::OpCode::PSRLW, dst, dst, tmp0, m_func));
- // splat (0xFF >> shamt) into mask
- // MOVD mask, reg2
- instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVD, mask, reg2, m_func));
- // PUNPCKLBW mask, mask
- pInstr = IR::Instr::New(Js::OpCode::PUNPCKLBW, mask, mask, mask, m_func);
- instr->InsertBefore(pInstr);
- Legalize(pInstr);
- // PUNPCKLWD mask, mask
- pInstr = IR::Instr::New(Js::OpCode::PUNPCKLWD, mask, mask, mask, m_func);
- instr->InsertBefore(pInstr);
- Legalize(pInstr);
- // PSHUFD mask, mask, 0
- instr->InsertBefore(IR::Instr::New(Js::OpCode::PSHUFD, mask, mask, IR::IntConstOpnd::New(0, TyInt8, m_func, true), m_func));
- // PAND dst, mask
- instr->InsertBefore(IR::Instr::New(Js::OpCode::PAND, dst, dst, mask, m_func));
- }
- else
- {
- Assert(UNREACHED);
- }
- pInstr = instr->m_prev;
- instr->Remove();
- return pInstr;
- }
- IR::Instr* LowererMD::SIMD128LowerReplaceLane_8(IR::Instr* instr)
- {
- SList<IR::Opnd*> *args = Simd128GetExtendedArgs(instr);
- int lane = 0;
- IR::Opnd *dst = args->Pop();
- IR::Opnd *src1 = args->Pop();
- IR::Opnd *src2 = args->Pop();
- IR::Opnd *src3 = args->Pop();
- IR::Instr * newInstr = nullptr;
- Assert(dst->IsSimd128() && src1->IsSimd128());
- lane = src2->AsIntConstOpnd()->AsInt32();
- IR::Opnd* laneValue = EnregisterIntConst(instr, src3, TyInt16);
- Assert(instr->m_opcode == Js::OpCode::Simd128_ReplaceLane_I8 || instr->m_opcode == Js::OpCode::Simd128_ReplaceLane_U8 || instr->m_opcode == Js::OpCode::Simd128_ReplaceLane_B8);
- // MOVAPS dst, src1
- newInstr = IR::Instr::New(Js::OpCode::MOVAPS, dst, src1, m_func);
- instr->InsertBefore(newInstr);
- Legalize(newInstr);
- // PINSRW dst, value, index
- newInstr = IR::Instr::New(Js::OpCode::PINSRW, dst, laneValue, IR::IntConstOpnd::New(lane, TyInt8, m_func), m_func);
- instr->InsertBefore(newInstr);
- Legalize(newInstr);
- if (instr->m_opcode == Js::OpCode::Simd128_ReplaceLane_B8) //canonicalizing lanes
- {
- instr = Simd128CanonicalizeToBools(instr, Js::OpCode::PCMPEQW, *dst);
- }
- IR::Instr* prevInstr = instr->m_prev;
- instr->Remove();
- return prevInstr;
- }
- IR::Instr* LowererMD::SIMD128LowerReplaceLane_16(IR::Instr* instr)
- {
- SList<IR::Opnd*> *args = Simd128GetExtendedArgs(instr);
- int lane = 0;
- IR::Opnd *dst = args->Pop();
- IR::Opnd *src1 = args->Pop();
- IR::Opnd *src2 = args->Pop();
- IR::Opnd *src3 = args->Pop();
- IR::Instr * newInstr = nullptr;
- Assert(dst->IsSimd128() && src1->IsSimd128());
- lane = src2->AsIntConstOpnd()->AsInt32();
- Assert(lane >= 0 && lane < 16);
- IR::Opnd* laneValue = EnregisterIntConst(instr, src3, TyInt8);
- intptr_t tempSIMD = m_func->GetThreadContextInfo()->GetSimdTempAreaAddr(0);
- #if DBG
- // using only one SIMD temp
- intptr_t endAddrSIMD = tempSIMD + sizeof(X86SIMDValue);
- #endif
- Assert(instr->m_opcode == Js::OpCode::Simd128_ReplaceLane_I16 || instr->m_opcode == Js::OpCode::Simd128_ReplaceLane_U16 || instr->m_opcode == Js::OpCode::Simd128_ReplaceLane_B16);
- // MOVUPS [temp], src1
- intptr_t address = tempSIMD;
- newInstr = IR::Instr::New(Js::OpCode::MOVUPS, IR::MemRefOpnd::New(address, TySimd128I16, m_func), src1, m_func);
- instr->InsertBefore(newInstr);
- Legalize(newInstr);
- // MOV [temp+offset], laneValue
- address = tempSIMD + lane;
- // check for buffer overrun
- Assert((intptr_t)address < endAddrSIMD);
- newInstr = IR::Instr::New(Js::OpCode::MOV, IR::MemRefOpnd::New(address, TyInt8, m_func), laneValue, m_func);
- instr->InsertBefore(newInstr);
- Legalize(newInstr);
- // MOVUPS dst, [temp]
- address = tempSIMD;
- newInstr = IR::Instr::New(Js::OpCode::MOVUPS, dst, IR::MemRefOpnd::New(address, TySimd128I16, m_func), m_func);
- instr->InsertBefore(newInstr);
- Legalize(newInstr);
- if (instr->m_opcode == Js::OpCode::Simd128_ReplaceLane_B16) //canonicalizing lanes.
- {
- instr = Simd128CanonicalizeToBools(instr, Js::OpCode::PCMPEQB, *dst);
- }
- IR::Instr* prevInstr = instr->m_prev;
- instr->Remove();
- return prevInstr;
- }
- IR::Instr* LowererMD::SIMD128LowerReplaceLane_4(IR::Instr* instr)
- {
- SList<IR::Opnd*> *args = Simd128GetExtendedArgs(instr);
- int lane = 0, byteWidth = 0;
- IR::Opnd *dst = args->Pop();
- IR::Opnd *src1 = args->Pop();
- IR::Opnd *src2 = args->Pop();
- IR::Opnd *src3 = args->Pop();
- Assert(dst->IsSimd128() && src1->IsSimd128());
- IRType type = dst->GetType();
- lane = src2->AsIntConstOpnd()->AsInt32();
- IR::Opnd* laneValue = EnregisterIntConst(instr, src3);
- switch (instr->m_opcode)
- {
- case Js::OpCode::Simd128_ReplaceLane_I4:
- case Js::OpCode::Simd128_ReplaceLane_U4:
- case Js::OpCode::Simd128_ReplaceLane_B4:
- byteWidth = TySize[TyInt32];
- break;
- case Js::OpCode::Simd128_ReplaceLane_F4:
- byteWidth = TySize[TyFloat32];
- break;
- default:
- Assert(UNREACHED);
- }
- // MOVAPS dst, src1
- instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVAPS, dst, src1, m_func));
- if (laneValue->GetType() == TyInt32 || laneValue->GetType() == TyUint32)
- {
- IR::RegOpnd *tempReg = IR::RegOpnd::New(TyFloat32, m_func);//mov intval to xmm
- //MOVD
- instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVD, tempReg, laneValue, m_func));
- laneValue = tempReg;
- }
- Assert(laneValue->GetType() == TyFloat32);
- if (lane == 0)
- {
- // MOVSS for both TyFloat32 and TyInt32. MOVD zeroes upper bits.
- instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVSS, dst, laneValue, m_func));
- }
- else if (lane == 2)
- {
- IR::RegOpnd *tmp = IR::RegOpnd::New(type, m_func);
- instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVHLPS, tmp, dst, m_func));
- instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVSS, tmp, laneValue, m_func));
- instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVLHPS, dst, tmp, m_func));
- }
- else
- {
- Assert(lane == 1 || lane == 3);
- uint8 shufMask = 0xE4; // 11 10 01 00
- shufMask |= lane; // 11 10 01 id
- shufMask &= ~(0x03 << (lane << 1)); // set 2 bits corresponding to lane index to 00
- // SHUFPS dst, dst, shufMask
- instr->InsertBefore(IR::Instr::New(Js::OpCode::SHUFPS, dst, dst, IR::IntConstOpnd::New(shufMask, TyInt8, m_func, true), m_func));
- // MOVSS dst, value
- instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVSS, dst, laneValue, m_func));
- // SHUFPS dst, dst, shufMask
- instr->InsertBefore(IR::Instr::New(Js::OpCode::SHUFPS, dst, dst, IR::IntConstOpnd::New(shufMask, TyInt8, m_func, true), m_func));
- }
- if (instr->m_opcode == Js::OpCode::Simd128_ReplaceLane_B4) //Canonicalizing lanes
- {
- instr = Simd128CanonicalizeToBools(instr, Js::OpCode::PCMPEQD, *dst);
- }
- IR::Instr* prevInstr = instr->m_prev;
- instr->Remove();
- return prevInstr;
- }
- /*
- 4 and 2 lane Swizzle.
- */
- IR::Instr* LowererMD::Simd128LowerSwizzle_4(IR::Instr* instr)
- {
- Js::OpCode shufOpcode = Js::OpCode::SHUFPS;
- Js::OpCode irOpcode = instr->m_opcode;
- SList<IR::Opnd*> *args = Simd128GetExtendedArgs(instr);
- IR::Opnd *dst = args->Pop();
- IR::Opnd *srcs[6] = { nullptr, nullptr, nullptr, nullptr, nullptr, nullptr };
- int i = 0;
- while (!args->Empty() && i < 6)
- {
- srcs[i++] = args->Pop();
- }
- int8 shufMask = 0;
- int lane0 = 0, lane1 = 0, lane2 = 0, lane3 = 0;
- IR::Instr *pInstr = instr->m_prev;
- Assert(dst->IsSimd128() && srcs[0] && srcs[0]->IsSimd128());
- // globOpt will type-spec if all lane indices are constants, and within range constraints to match a single SSE instruction
- Assert(irOpcode == Js::OpCode::Simd128_Swizzle_I4 || irOpcode == Js::OpCode::Simd128_Swizzle_U4 || irOpcode == Js::OpCode::Simd128_Swizzle_F4 /*|| irOpcode == Js::OpCode::Simd128_Swizzle_D2*/);
- AssertMsg(srcs[1] && srcs[1]->IsIntConstOpnd() &&
- srcs[2] && srcs[2]->IsIntConstOpnd() &&
- (/*irOpcode == Js::OpCode::Simd128_Swizzle_D2 || */(srcs[3] && srcs[3]->IsIntConstOpnd())) &&
- (/*irOpcode == Js::OpCode::Simd128_Swizzle_D2 || */(srcs[4] && srcs[4]->IsIntConstOpnd())), "Type-specialized swizzle is supported only with constant lane indices");
- #if 0
- if (irOpcode == Js::OpCode::Simd128_Swizzle_D2)
- {
- lane0 = srcs[1]->AsIntConstOpnd()->AsInt32();
- lane1 = srcs[2]->AsIntConstOpnd()->AsInt32();
- Assert(lane0 >= 0 && lane0 < 2);
- Assert(lane1 >= 0 && lane1 < 2);
- shufMask = (int8)((lane1 << 1) | lane0);
- shufOpcode = Js::OpCode::SHUFPD;
- }
- #endif // 0
- if (irOpcode == Js::OpCode::Simd128_Swizzle_I4 || irOpcode == Js::OpCode::Simd128_Swizzle_U4)
- {
- shufOpcode = Js::OpCode::PSHUFD;
- }
- AnalysisAssert(srcs[3] != nullptr && srcs[4] != nullptr);
- lane0 = srcs[1]->AsIntConstOpnd()->AsInt32();
- lane1 = srcs[2]->AsIntConstOpnd()->AsInt32();
- lane2 = srcs[3]->AsIntConstOpnd()->AsInt32();
- lane3 = srcs[4]->AsIntConstOpnd()->AsInt32();
- Assert(lane1 >= 0 && lane1 < 4);
- Assert(lane2 >= 0 && lane2 < 4);
- Assert(lane2 >= 0 && lane2 < 4);
- Assert(lane3 >= 0 && lane3 < 4);
- shufMask = (int8)((lane3 << 6) | (lane2 << 4) | (lane1 << 2) | lane0);
- instr->m_opcode = shufOpcode;
- instr->SetDst(dst);
- // MOVAPS dst, src1
- instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVAPS, dst, srcs[0], m_func));
- // SHUF dst, dst, imm8
- instr->SetSrc1(dst);
- instr->SetSrc2(IR::IntConstOpnd::New((IntConstType)shufMask, TyInt8, m_func, true));
- return pInstr;
- }
- /*
- 4 lane shuffle. Handles arbitrary lane values.
- */
- IR::Instr* LowererMD::Simd128LowerShuffle_4(IR::Instr* instr)
- {
- Js::OpCode irOpcode = instr->m_opcode;
- SList<IR::Opnd*> *args = Simd128GetExtendedArgs(instr);
- IR::Opnd *dst = args->Pop();
- IR::Opnd *srcs[6] = { nullptr, nullptr, nullptr, nullptr, nullptr, nullptr };
- int j = 0;
- while (!args->Empty() && j < 6)
- {
- srcs[j++] = args->Pop();
- }
- uint8 lanes[4], lanesSrc[4];
- uint fromSrc1, fromSrc2;
- IR::Instr *pInstr = instr->m_prev;
- Assert(dst->IsSimd128() && srcs[0] && srcs[0]->IsSimd128() && srcs[1] && srcs[1]->IsSimd128());
- Assert(irOpcode == Js::OpCode::Simd128_Shuffle_I4 || irOpcode == Js::OpCode::Simd128_Shuffle_U4 || irOpcode == Js::OpCode::Simd128_Shuffle_F4);
- // globOpt will type-spec if all lane indices are constants, and within range constraints to match a single SSE instruction
- AssertMsg(srcs[2] && srcs[2]->IsIntConstOpnd() &&
- srcs[3] && srcs[3]->IsIntConstOpnd() &&
- srcs[4] && srcs[4]->IsIntConstOpnd() &&
- srcs[5] && srcs[5]->IsIntConstOpnd(), "Type-specialized shuffle is supported only with constant lane indices");
- lanes[0] = (uint8) srcs[2]->AsIntConstOpnd()->AsInt32();
- lanes[1] = (uint8) srcs[3]->AsIntConstOpnd()->AsInt32();
- lanes[2] = (uint8) srcs[4]->AsIntConstOpnd()->AsInt32();
- lanes[3] = (uint8) srcs[5]->AsIntConstOpnd()->AsInt32();
- Assert(lanes[0] >= 0 && lanes[0] < 8);
- Assert(lanes[1] >= 0 && lanes[1] < 8);
- Assert(lanes[2] >= 0 && lanes[2] < 8);
- Assert(lanes[3] >= 0 && lanes[3] < 8);
- CheckShuffleLanes_4(lanes, lanesSrc, &fromSrc1, &fromSrc2);
- Assert(fromSrc1 + fromSrc2 == 4);
- if (fromSrc1 == 4 || fromSrc2 == 4)
- {
- // can be done with a swizzle
- IR::Opnd *srcOpnd = fromSrc1 == 4 ? srcs[0] : srcs[1];
- InsertShufps(lanes, dst, srcOpnd, srcOpnd, instr);
- }
- else if (fromSrc1 == 2)
- {
- if (lanes[0] < 4 && lanes[1] < 4)
- {
- // x86 friendly shuffle
- Assert(lanes[2] >= 4 && lanes[3] >= 4);
- InsertShufps(lanes, dst, srcs[0], srcs[1], instr);
- }
- else
- {
- // arbitrary shuffle with 2 lanes from each src
- uint8 ordLanes[4], reArrLanes[4];
- // order lanes based on which src they come from
- // compute re-arrangement mask
- for (uint8 i = 0, j1 = 0, j2 = 2; i < 4; i++)
- {
- if (lanesSrc[i] == 1 && j1 < 4)
- {
- ordLanes[j1] = lanes[i];
- reArrLanes[i] = j1;
- j1++;
- }
- else if(j2 < 4)
- {
- Assert(lanesSrc[i] == 2);
- ordLanes[j2] = lanes[i];
- reArrLanes[i] = j2;
- j2++;
- }
- }
- IR::RegOpnd *temp = IR::RegOpnd::New(dst->GetType(), m_func);
- InsertShufps(ordLanes, temp, srcs[0], srcs[1], instr);
- InsertShufps(reArrLanes, dst, temp, temp, instr);
- }
- }
- else if (fromSrc1 == 3 || fromSrc2 == 3)
- {
- // shuffle with 3 lanes from one src, one from another
- IR::Instr *newInstr;
- IR::Opnd * majSrc, *minSrc;
- IR::RegOpnd *temp1 = IR::RegOpnd::New(dst->GetType(), m_func);
- IR::RegOpnd *temp2 = IR::RegOpnd::New(dst->GetType(), m_func);
- IR::RegOpnd *temp3 = IR::RegOpnd::New(dst->GetType(), m_func);
- uint8 minorityLane = 0, maxLaneValue;
- majSrc = fromSrc1 == 3 ? srcs[0] : srcs[1];
- minSrc = fromSrc1 == 3 ? srcs[1] : srcs[0];
- Assert(majSrc != minSrc);
- // Algorithm:
- // SHUFPS temp1, majSrc, lanes
- // SHUFPS temp2, minSrc, lanes
- // MOVUPS temp3, [minorityLane mask]
- // ANDPS temp2, temp3 // mask all lanes but minorityLane
- // ANDNPS temp3, temp1 // zero minorityLane
- // ORPS dst, temp2, temp3
- // find minorityLane to mask
- maxLaneValue = minSrc == srcs[0] ? 4 : 8;
- for (uint8 i = 0; i < 4; i++)
- {
- if (lanes[i] >= (maxLaneValue - 4) && lanes[i] < maxLaneValue)
- {
- minorityLane = i;
- break;
- }
- }
- IR::MemRefOpnd * laneMask = IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86FourLanesMaskAddr(minorityLane), dst->GetType(), m_func);
- InsertShufps(lanes, temp1, majSrc, majSrc, instr);
- InsertShufps(lanes, temp2, minSrc, minSrc, instr);
- newInstr = IR::Instr::New(Js::OpCode::MOVUPS, temp3, laneMask, m_func);
- instr->InsertBefore(newInstr);
- Legalize(newInstr);
- newInstr = IR::Instr::New(Js::OpCode::ANDPS, temp2, temp2, temp3, m_func);
- instr->InsertBefore(newInstr);
- Legalize(newInstr);
- newInstr = IR::Instr::New(Js::OpCode::ANDNPS, temp3, temp3, temp1, m_func);
- instr->InsertBefore(newInstr);
- Legalize(newInstr);
- newInstr = IR::Instr::New(Js::OpCode::ORPS, dst, temp2, temp3, m_func);
- instr->InsertBefore(newInstr);
- Legalize(newInstr);
- }
- instr->Remove();
- return pInstr;
- }
- // 8 and 16 lane shuffle with memory temps
- IR::Instr* LowererMD::Simd128LowerShuffle(IR::Instr* instr)
- {
- Js::OpCode irOpcode = instr->m_opcode;
- IR::Instr *pInstr = instr->m_prev, *newInstr = nullptr;
- SList<IR::Opnd*> *args = nullptr;
- IR::Opnd *dst = nullptr;
- IR::Opnd *src1 = nullptr, *src2 = nullptr;
- uint8 lanes[16], laneCount = 0, scale = 1;
- bool isShuffle = false;
- IRType laneType = TyInt16;
- intptr_t temp1SIMD = m_func->GetThreadContextInfo()->GetSimdTempAreaAddr(0);
- intptr_t temp2SIMD = m_func->GetThreadContextInfo()->GetSimdTempAreaAddr(1);
- intptr_t dstSIMD = m_func->GetThreadContextInfo()->GetSimdTempAreaAddr(2);
- #if DBG
- intptr_t endAddrSIMD = (intptr_t)(temp1SIMD + sizeof(X86SIMDValue) * SIMD_TEMP_SIZE);
- #endif
- void *address = nullptr;
- args = Simd128GetExtendedArgs(instr);
- switch (irOpcode)
- {
- case Js::OpCode::Simd128_Swizzle_I8:
- case Js::OpCode::Simd128_Swizzle_U8:
- Assert(args->Count() == 10);
- laneCount = 8;
- laneType = TyInt16;
- isShuffle = false;
- scale = 2;
- break;
- case Js::OpCode::Simd128_Swizzle_I16:
- case Js::OpCode::Simd128_Swizzle_U16:
- Assert(args->Count() == 18);
- laneCount = 16;
- laneType = TyInt8;
- isShuffle = false;
- scale = 1;
- break;
- case Js::OpCode::Simd128_Shuffle_I8:
- case Js::OpCode::Simd128_Shuffle_U8:
- Assert(args->Count() == 11);
- laneCount = 8;
- isShuffle = true;
- laneType = TyUint16;
- scale = 2;
- break;
- case Js::OpCode::Simd128_Shuffle_I16:
- case Js::OpCode::Simd128_Shuffle_U16:
- Assert(args->Count() == 19);
- laneCount = 16;
- isShuffle = true;
- laneType = TyUint8;
- scale = 1;
- break;
- default:
- Assert(UNREACHED);
- }
- dst = args->Pop();
- src1 = args->Pop();
- if (isShuffle)
- {
- src2 = args->Pop();
- }
- Assert(dst->IsSimd128() && src1 && src1->IsSimd128() && (!isShuffle|| src2->IsSimd128()));
- for (uint i = 0; i < laneCount; i++)
- {
- IR::Opnd * laneOpnd = args->Pop();
- Assert(laneOpnd->IsIntConstOpnd());
- lanes[i] = (uint8)laneOpnd->AsIntConstOpnd()->AsInt32();
- }
- // MOVUPS [temp], src1
- newInstr = IR::Instr::New(Js::OpCode::MOVUPS, IR::MemRefOpnd::New((void*)temp1SIMD, TySimd128I16, m_func), src1, m_func);
- instr->InsertBefore(newInstr);
- Legalize(newInstr);
- if (isShuffle)
- {
- // MOVUPS [temp+16], src2
- newInstr = IR::Instr::New(Js::OpCode::MOVUPS, IR::MemRefOpnd::New((void*)(temp2SIMD), TySimd128I16, m_func), src2, m_func);
- instr->InsertBefore(newInstr);
- Legalize(newInstr);
- }
- for (uint i = 0; i < laneCount; i++)
- {
- //. MOV tmp, [temp1SIMD + laneValue*scale]
- IR::RegOpnd *tmp = IR::RegOpnd::New(laneType, m_func);
- address = (void*)(temp1SIMD + lanes[i] * scale);
- Assert((intptr_t)address + (intptr_t)scale <= (intptr_t)dstSIMD);
- newInstr = IR::Instr::New(Js::OpCode::MOV, tmp, IR::MemRefOpnd::New(address, laneType, m_func), m_func);
- instr->InsertBefore(newInstr);
- Legalize(newInstr);
- //. MOV [dstSIMD + i*scale], tmp
- address = (void*)(dstSIMD + i * scale);
- Assert((intptr_t)address + (intptr_t) scale <= endAddrSIMD);
- newInstr = IR::Instr::New(Js::OpCode::MOV,IR::MemRefOpnd::New(address, laneType, m_func), tmp, m_func);
- instr->InsertBefore(newInstr);
- Legalize(newInstr);
- }
- // MOVUPS dst, [dstSIMD]
- newInstr = IR::Instr::New(Js::OpCode::MOVUPS, dst, IR::MemRefOpnd::New((void*)dstSIMD, TySimd128I16, m_func), m_func);
- instr->InsertBefore(newInstr);
- Legalize(newInstr);
- instr->Remove();
- return pInstr;
- }
- IR::Instr* LowererMD::Simd128LowerNotEqual(IR::Instr* instr)
- {
- Assert(instr->m_opcode == Js::OpCode::Simd128_Neq_I4 || instr->m_opcode == Js::OpCode::Simd128_Neq_I8 ||
- instr->m_opcode == Js::OpCode::Simd128_Neq_I16 || instr->m_opcode == Js::OpCode::Simd128_Neq_U4 ||
- instr->m_opcode == Js::OpCode::Simd128_Neq_U8 || instr->m_opcode == Js::OpCode::Simd128_Neq_U16);
- IR::Instr *pInstr;
- IR::Opnd* dst = instr->GetDst();
- IR::Opnd* src1 = instr->GetSrc1();
- IR::Opnd* src2 = instr->GetSrc2();
- Assert(dst->IsRegOpnd() && (dst->IsSimd128B4() || dst->IsSimd128B8() || dst->IsSimd128B16()));
- Assert(src1->IsRegOpnd() && src1->IsSimd128());
- Assert(src2->IsRegOpnd() && src2->IsSimd128());
- Js::OpCode cmpOpcode = Js::OpCode::PCMPEQD;
- if (instr->m_opcode == Js::OpCode::Simd128_Neq_I8 || instr->m_opcode == Js::OpCode::Simd128_Neq_U8)
- {
- cmpOpcode = Js::OpCode::PCMPEQW;
- }
- else if (instr->m_opcode == Js::OpCode::Simd128_Neq_I16 || instr->m_opcode == Js::OpCode::Simd128_Neq_U16)
- {
- cmpOpcode = Js::OpCode::PCMPEQB;
- }
- // dst = PCMPEQD src1, src2
- pInstr = IR::Instr::New(cmpOpcode, dst, src1, src2, m_func);
- instr->InsertBefore(pInstr);
- //MakeDstEquSrc1(pInstr);
- Legalize(pInstr);
- // dst = PANDN dst, X86_ALL_NEG_ONES
- pInstr = IR::Instr::New(Js::OpCode::PANDN, dst, dst, IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86AllNegOnesAddr(), TySimd128I4, m_func), m_func);
- instr->InsertBefore(pInstr);
- //MakeDstEquSrc1(pInstr);
- Legalize(pInstr);
- pInstr = instr->m_prev;
- instr->Remove();
- return pInstr;
- }
- IR::Instr* LowererMD::Simd128LowerLessThan(IR::Instr* instr)
- {
- Assert(instr->m_opcode == Js::OpCode::Simd128_Lt_U4 || instr->m_opcode == Js::OpCode::Simd128_Lt_U8 || instr->m_opcode == Js::OpCode::Simd128_Lt_U16 ||
- instr->m_opcode == Js::OpCode::Simd128_GtEq_U4 || instr->m_opcode == Js::OpCode::Simd128_GtEq_U8 || instr->m_opcode == Js::OpCode::Simd128_GtEq_U16);
- IR::Instr *pInstr;
- IR::Opnd* dst = instr->GetDst();
- IR::Opnd* src1 = instr->GetSrc1();
- IR::Opnd* src2 = instr->GetSrc2();
- Assert(dst->IsRegOpnd() && (dst->IsSimd128B4() || dst->IsSimd128B8() || dst->IsSimd128B16()));
- Assert(src1->IsRegOpnd() && src1->IsSimd128());
- Assert(src2->IsRegOpnd() && src2->IsSimd128());
- IR::RegOpnd* tmpa = IR::RegOpnd::New(src1->GetType(), m_func);
- IR::RegOpnd* tmpb = IR::RegOpnd::New(src1->GetType(), m_func);
- IR::MemRefOpnd* signBits = IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86DoubleWordSignBitsAddr(), TySimd128I4, m_func);
- IR::RegOpnd * mask = IR::RegOpnd::New(TySimd128I4, m_func);
- Js::OpCode cmpOpcode = Js::OpCode::PCMPGTD;
- if (instr->m_opcode == Js::OpCode::Simd128_Lt_U8 || instr->m_opcode == Js::OpCode::Simd128_GtEq_U8)
- {
- cmpOpcode = Js::OpCode::PCMPGTW;
- signBits = IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86WordSignBitsAddr(), TySimd128I4, m_func);
- }
- else if (instr->m_opcode == Js::OpCode::Simd128_Lt_U16 || instr->m_opcode == Js::OpCode::Simd128_GtEq_U16)
- {
- cmpOpcode = Js::OpCode::PCMPGTB;
- signBits = IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86ByteSignBitsAddr(), TySimd128I4, m_func);
- }
- // MOVUPS mask, [signBits]
- pInstr = IR::Instr::New(Js::OpCode::MOVUPS, mask, signBits, m_func);
- instr->InsertBefore(pInstr);
- Legalize(pInstr);
- // tmpa = PXOR src1, signBits
- pInstr = IR::Instr::New(Js::OpCode::PXOR, tmpa, src1, mask, m_func);
- instr->InsertBefore(pInstr);
- Legalize(pInstr);
- // tmpb = PXOR src2, signBits
- pInstr = IR::Instr::New(Js::OpCode::PXOR, tmpb, src2, mask, m_func);
- instr->InsertBefore(pInstr);
- Legalize(pInstr);
- // dst = cmpOpCode tmpb, tmpa (Less than, swapped opnds)
- pInstr = IR::Instr::New(cmpOpcode, dst, tmpb, tmpa, m_func);
- instr->InsertBefore(pInstr);
- Legalize(pInstr);
- if (instr->m_opcode == Js::OpCode::Simd128_GtEq_U4 || instr->m_opcode == Js::OpCode::Simd128_GtEq_U8 || instr->m_opcode == Js::OpCode::Simd128_GtEq_U16)
- {
- // for SIMD unsigned int, greaterThanOrEqual == lessThan + Not
- // dst = PANDN dst, X86_ALL_NEG_ONES
- // MOVUPS mask, [allNegOnes]
- pInstr = IR::Instr::New(Js::OpCode::PANDN, dst, dst, IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86AllNegOnesAddr(), TySimd128I4, m_func), m_func);
- instr->InsertBefore(pInstr);
- Legalize(pInstr);
- }
- pInstr = instr->m_prev;
- instr->Remove();
- return pInstr;
- }
- IR::Instr* LowererMD::Simd128LowerLessThanOrEqual(IR::Instr* instr)
- {
- Assert(instr->m_opcode == Js::OpCode::Simd128_LtEq_I4 || instr->m_opcode == Js::OpCode::Simd128_LtEq_I8 || instr->m_opcode == Js::OpCode::Simd128_LtEq_I16 ||
- instr->m_opcode == Js::OpCode::Simd128_LtEq_U4 || instr->m_opcode == Js::OpCode::Simd128_LtEq_U8 || instr->m_opcode == Js::OpCode::Simd128_LtEq_U16 ||
- instr->m_opcode == Js::OpCode::Simd128_Gt_U4 || instr->m_opcode == Js::OpCode::Simd128_Gt_U8 || instr->m_opcode == Js::OpCode::Simd128_Gt_U16);
- IR::Instr *pInstr;
- IR::Opnd* dst = instr->GetDst();
- IR::Opnd* src1 = instr->GetSrc1();
- IR::Opnd* src2 = instr->GetSrc2();
- Assert(dst->IsRegOpnd() && (dst->IsSimd128B4() || dst->IsSimd128B8() || dst->IsSimd128B16()));
- Assert(src1->IsRegOpnd() && src1->IsSimd128());
- Assert(src2->IsRegOpnd() && src2->IsSimd128());
- IR::RegOpnd* tmpa = IR::RegOpnd::New(src1->GetType(), m_func);
- IR::RegOpnd* tmpb = IR::RegOpnd::New(src1->GetType(), m_func);
- Js::OpCode cmpOpcode = Js::OpCode::PCMPGTD;
- Js::OpCode eqpOpcode = Js::OpCode::PCMPEQD;
- if (instr->m_opcode == Js::OpCode::Simd128_LtEq_I8 || instr->m_opcode == Js::OpCode::Simd128_LtEq_U8 || instr->m_opcode == Js::OpCode::Simd128_Gt_U8)
- {
- cmpOpcode = Js::OpCode::PCMPGTW;
- eqpOpcode = Js::OpCode::PCMPEQW;
- }
- else if (instr->m_opcode == Js::OpCode::Simd128_LtEq_I16 || instr->m_opcode == Js::OpCode::Simd128_LtEq_U16 || instr->m_opcode == Js::OpCode::Simd128_Gt_U16)
- {
- cmpOpcode = Js::OpCode::PCMPGTB;
- eqpOpcode = Js::OpCode::PCMPEQB;
- }
- if (instr->m_opcode == Js::OpCode::Simd128_LtEq_I4)
- {
- // dst = pcmpgtd src1, src2
- pInstr = IR::Instr::New(Js::OpCode::PCMPGTD, dst, src1, src2, m_func);
- instr->InsertBefore(pInstr);
- Legalize(pInstr);
- // dst = pandn dst, xmmword ptr[X86_ALL_NEG_ONES]
- pInstr = IR::Instr::New(Js::OpCode::PANDN, dst, dst, IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86AllNegOnesAddr(), TySimd128I4, m_func), m_func);
- instr->InsertBefore(pInstr);
- Legalize(pInstr);
- }
- else if (instr->m_opcode == Js::OpCode::Simd128_LtEq_I8 || instr->m_opcode == Js::OpCode::Simd128_LtEq_I16)
- {
- // tmpa = pcmpgtw src2, src1 (src1 < src2?) [pcmpgtb]
- pInstr = IR::Instr::New(cmpOpcode, tmpa, src2, src1, m_func);
- instr->InsertBefore(pInstr);
- Legalize(pInstr);
- // tmpb = pcmpeqw src1, src2 [pcmpeqb]
- pInstr = IR::Instr::New(eqpOpcode, tmpb, src1, src2, m_func);
- instr->InsertBefore(pInstr);
- Legalize(pInstr);
- // dst = por tmpa, tmpb
- pInstr = IR::Instr::New(Js::OpCode::POR, dst, tmpa, tmpb, m_func);
- instr->InsertBefore(pInstr);
- Legalize(pInstr);
- }
- else if (instr->m_opcode == Js::OpCode::Simd128_LtEq_U4 || instr->m_opcode == Js::OpCode::Simd128_LtEq_U8 || instr->m_opcode == Js::OpCode::Simd128_LtEq_U16 ||
- instr->m_opcode == Js::OpCode::Simd128_Gt_U4 || instr->m_opcode == Js::OpCode::Simd128_Gt_U8 || instr->m_opcode == Js::OpCode::Simd128_Gt_U16)
- {
- IR::MemRefOpnd* signBits = IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86DoubleWordSignBitsAddr(), TySimd128I4, m_func);
- IR::RegOpnd * mask = IR::RegOpnd::New(TySimd128I4, m_func);
- if (instr->m_opcode == Js::OpCode::Simd128_LtEq_U8 || instr->m_opcode == Js::OpCode::Simd128_Gt_U8)
- {
- signBits = IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86WordSignBitsAddr(), TySimd128I4, m_func);
- }
- else if (instr->m_opcode == Js::OpCode::Simd128_LtEq_U16 || instr->m_opcode == Js::OpCode::Simd128_Gt_U16)
- {
- signBits = IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86ByteSignBitsAddr(), TySimd128I4, m_func);
- }
- // MOVUPS mask, [signBits]
- pInstr = IR::Instr::New(Js::OpCode::MOVUPS, mask, signBits, m_func);
- instr->InsertBefore(pInstr);
- Legalize(pInstr);
- // tmpa = PXOR src1, mask
- pInstr = IR::Instr::New(Js::OpCode::PXOR, tmpa, src1, mask, m_func);
- instr->InsertBefore(pInstr);
- Legalize(pInstr);
- // tmpb = PXOR src2, signBits
- pInstr = IR::Instr::New(Js::OpCode::PXOR, tmpb, src2, mask, m_func);
- instr->InsertBefore(pInstr);
- Legalize(pInstr);
- // dst = cmpOpCode tmpb, tmpa
- pInstr = IR::Instr::New(cmpOpcode, dst, tmpb, tmpa, m_func);
- instr->InsertBefore(pInstr);
- Legalize(pInstr);
- // tmpa = pcmpeqd tmpa, tmpb
- pInstr = IR::Instr::New(eqpOpcode, tmpa, tmpa, tmpb, m_func);
- instr->InsertBefore(pInstr);
- Legalize(pInstr);
- // dst = por dst, tmpa
- pInstr = IR::Instr::New(Js::OpCode::POR, dst, dst, tmpa, m_func);
- instr->InsertBefore(pInstr);
- Legalize(pInstr);
- if (instr->m_opcode == Js::OpCode::Simd128_Gt_U4 || instr->m_opcode == Js::OpCode::Simd128_Gt_U8 || instr->m_opcode == Js::OpCode::Simd128_Gt_U16)
- { // for SIMD unsigned int, greaterThan == lessThanOrEqual + Not
- // dst = PANDN dst, X86_ALL_NEG_ONES
- pInstr = IR::Instr::New(Js::OpCode::PANDN, dst, dst, IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86AllNegOnesAddr(), TySimd128I4, m_func), m_func);
- instr->InsertBefore(pInstr);
- Legalize(pInstr);
- }
- }
- pInstr = instr->m_prev;
- instr->Remove();
- return pInstr;
- }
- IR::Instr* LowererMD::Simd128LowerGreaterThanOrEqual(IR::Instr* instr)
- {
- Assert(instr->m_opcode == Js::OpCode::Simd128_GtEq_I4 || instr->m_opcode == Js::OpCode::Simd128_GtEq_I8 || instr->m_opcode == Js::OpCode::Simd128_GtEq_I16);
- IR::Instr *pInstr;
- IR::Opnd* dst = instr->GetDst();
- IR::Opnd* src1 = instr->GetSrc1();
- IR::Opnd* src2 = instr->GetSrc2();
- Assert(dst->IsRegOpnd() && (dst->IsSimd128B4() || dst->IsSimd128B8() || dst->IsSimd128B16()));
- Assert(src1->IsRegOpnd() && src1->IsSimd128());
- Assert(src2->IsRegOpnd() && src2->IsSimd128());
- if (instr->m_opcode == Js::OpCode::Simd128_GtEq_I4)
- {
- // dst = pcmpgtd src2, src1
- pInstr = IR::Instr::New(Js::OpCode::PCMPGTD, dst, src2, src1, m_func);
- instr->InsertBefore(pInstr);
- Legalize(pInstr);
- // dst = pandn dst, xmmword ptr[X86_ALL_NEG_ONES]
- pInstr = IR::Instr::New(Js::OpCode::PANDN, dst, dst, IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86AllNegOnesAddr(), TySimd128I4, m_func), m_func);
- instr->InsertBefore(pInstr);
- Legalize(pInstr);
- }
- else if (instr->m_opcode == Js::OpCode::Simd128_GtEq_I8 || instr->m_opcode == Js::OpCode::Simd128_GtEq_I16)
- {
- IR::RegOpnd* tmp1 = IR::RegOpnd::New(src1->GetType(), m_func);
- IR::RegOpnd* tmp2 = IR::RegOpnd::New(src1->GetType(), m_func);
- Js::OpCode cmpOpcode = Js::OpCode::PCMPGTW;
- Js::OpCode eqpOpcode = Js::OpCode::PCMPEQW;
- if (instr->m_opcode == Js::OpCode::Simd128_GtEq_I16)
- {
- cmpOpcode = Js::OpCode::PCMPGTB;
- eqpOpcode = Js::OpCode::PCMPEQB;
- }
- // tmp1 = pcmpgtw src1, src2 [pcmpgtb]
- pInstr = IR::Instr::New(cmpOpcode, tmp1, src1, src2, m_func);
- instr->InsertBefore(pInstr);
- Legalize(pInstr);
- // tmp2 = pcmpeqw src1, src2 [pcmpeqw]
- pInstr = IR::Instr::New(eqpOpcode, tmp2, src1, src2, m_func);
- instr->InsertBefore(pInstr);
- Legalize(pInstr);
- // dst = por tmp1, tmp2
- pInstr = IR::Instr::New(Js::OpCode::POR, dst, tmp1, tmp2, m_func);
- instr->InsertBefore(pInstr);
- Legalize(pInstr);
- }
- pInstr = instr->m_prev;
- instr->Remove();
- return pInstr;
- }
- IR::Instr* LowererMD::Simd128LowerMinMax_F4(IR::Instr* instr)
- {
- IR::Instr *pInstr;
- IR::Opnd* dst = instr->GetDst();
- IR::Opnd* src1 = instr->GetSrc1();
- IR::Opnd* src2 = instr->GetSrc2();
- Assert(dst->IsRegOpnd() && dst->IsSimd128());
- Assert(src1->IsRegOpnd() && src1->IsSimd128());
- Assert(src2->IsRegOpnd() && src2->IsSimd128());
- Assert(instr->m_opcode == Js::OpCode::Simd128_Min_F4 || instr->m_opcode == Js::OpCode::Simd128_Max_F4);
- IR::RegOpnd* tmp1 = IR::RegOpnd::New(src1->GetType(), m_func);
- IR::RegOpnd* tmp2 = IR::RegOpnd::New(src2->GetType(), m_func);
- if (instr->m_opcode == Js::OpCode::Simd128_Min_F4)
- {
- pInstr = IR::Instr::New(Js::OpCode::MINPS, tmp1, src1, src2, m_func);
- instr->InsertBefore(pInstr);
- Legalize(pInstr);
- //
- pInstr = IR::Instr::New(Js::OpCode::MINPS, tmp2, src2, src1, m_func);
- instr->InsertBefore(pInstr);
- Legalize(pInstr);
- //
- pInstr = IR::Instr::New(Js::OpCode::ORPS, dst, tmp1, tmp2, m_func);
- instr->InsertBefore(pInstr);
- Legalize(pInstr);
- }
- else
- {
- //This sequence closely mirrors SIMDFloat32x4Operation::OpMax except for
- //the fact that tmp2 (tmpbValue) is reused to reduce the number of registers
- //needed for this sequence.
- pInstr = IR::Instr::New(Js::OpCode::MAXPS, tmp1, src1, src2, m_func);
- instr->InsertBefore(pInstr);
- Legalize(pInstr);
- //
- pInstr = IR::Instr::New(Js::OpCode::MAXPS, tmp2, src2, src1, m_func);
- instr->InsertBefore(pInstr);
- Legalize(pInstr);
- //
- pInstr = IR::Instr::New(Js::OpCode::ANDPS, tmp1, tmp1, tmp2, m_func);
- instr->InsertBefore(pInstr);
- Legalize(pInstr);
- //
- pInstr = IR::Instr::New(Js::OpCode::CMPUNORDPS, tmp2, src1, src2, m_func);
- instr->InsertBefore(pInstr);
- Legalize(pInstr);
- //
- pInstr = IR::Instr::New(Js::OpCode::ORPS, dst, tmp1, tmp2, m_func);
- instr->InsertBefore(pInstr);
- Legalize(pInstr);
- }
- pInstr = instr->m_prev;
- instr->Remove();
- return pInstr;
- }
- IR::Opnd* LowererMD::Simd128CanonicalizeToBoolsBeforeReduction(IR::Instr* instr)
- {
- IR::Opnd* src1 = instr->GetSrc1();
- if (m_func->GetJITFunctionBody()->IsWasmFunction())
- {
- Js::OpCode cmpOpcode = Js::OpCode::InvalidOpCode;
- switch (instr->m_opcode)
- {
- case Js::OpCode::Simd128_AnyTrue_B4:
- case Js::OpCode::Simd128_AnyTrue_B2:
- case Js::OpCode::Simd128_AllTrue_B4:
- case Js::OpCode::Simd128_AllTrue_B2:
- cmpOpcode = Js::OpCode::PCMPEQD;
- break;
- case Js::OpCode::Simd128_AnyTrue_B8:
- case Js::OpCode::Simd128_AllTrue_B8:
- cmpOpcode = Js::OpCode::PCMPEQW;
- break;
- case Js::OpCode::Simd128_AnyTrue_B16:
- case Js::OpCode::Simd128_AllTrue_B16:
- cmpOpcode = Js::OpCode::PCMPEQB;
- break;
- default:
- Assert(UNREACHED);
- }
- IR::RegOpnd * newSrc = IR::RegOpnd::New(src1->GetType(), m_func);
- m_lowerer->InsertMove(newSrc, src1, instr);
- Simd128CanonicalizeToBools(instr, cmpOpcode, *newSrc);
- return newSrc;
- }
- return src1;
- }
- IR::Instr* LowererMD::Simd128LowerAnyTrue(IR::Instr* instr)
- {
- Assert(instr->m_opcode == Js::OpCode::Simd128_AnyTrue_B4 || instr->m_opcode == Js::OpCode::Simd128_AnyTrue_B8 ||
- instr->m_opcode == Js::OpCode::Simd128_AnyTrue_B16 || instr->m_opcode == Js::OpCode::Simd128_AnyTrue_B2);
- IR::Instr *pInstr;
- IR::Opnd* dst = instr->GetDst();
- #ifdef ENABLE_WASM_SIMD
- IR::Opnd* src1 = Simd128CanonicalizeToBoolsBeforeReduction(instr);
- #else
- IR::Opnd* src1 = instr->GetSrc1();
- #endif
- Assert(dst->IsRegOpnd() && dst->IsInt32());
- Assert(src1->IsRegOpnd() && src1->IsSimd128());
- // pmovmskb dst, src1
- // neg dst
- // sbb dst, dst
- // neg dst
- // pmovmskb dst, src1
- pInstr = IR::Instr::New(Js::OpCode::PMOVMSKB, dst, src1, m_func);
- instr->InsertBefore(pInstr);
- Legalize(pInstr);
- // neg dst
- pInstr = IR::Instr::New(Js::OpCode::NEG, dst, dst, m_func);
- instr->InsertBefore(pInstr);
- Legalize(pInstr);
- // sbb dst, dst
- pInstr = IR::Instr::New(Js::OpCode::SBB, dst, dst, dst, m_func);
- instr->InsertBefore(pInstr);
- Legalize(pInstr);
- // neg dst
- pInstr = IR::Instr::New(Js::OpCode::NEG, dst, dst, m_func);
- instr->InsertBefore(pInstr);
- Legalize(pInstr);
- pInstr = instr->m_prev;
- instr->Remove();
- return pInstr;
- }
- IR::Instr* LowererMD::Simd128LowerAllTrue(IR::Instr* instr)
- {
- Assert(instr->m_opcode == Js::OpCode::Simd128_AllTrue_B4 || instr->m_opcode == Js::OpCode::Simd128_AllTrue_B8 ||
- instr->m_opcode == Js::OpCode::Simd128_AllTrue_B16 || instr->m_opcode == Js::OpCode::Simd128_AllTrue_B2);
- IR::Instr *pInstr;
- IR::Opnd* dst = instr->GetDst();
- #ifdef ENABLE_WASM_SIMD
- IR::Opnd* src1 = Simd128CanonicalizeToBoolsBeforeReduction(instr);
- #else
- IR::Opnd* src1 = instr->GetSrc1();
- #endif
- Assert(dst->IsRegOpnd() && dst->IsInt32());
- Assert(src1->IsRegOpnd() && src1->IsSimd128());
- IR::RegOpnd * tmp = IR::RegOpnd::New(TyInt8, m_func);
- // pmovmskb dst, src1
- pInstr = IR::Instr::New(Js::OpCode::PMOVMSKB, dst, src1, m_func);
- instr->InsertBefore(pInstr);
- //horizontally OR into 0th and 2nd positions
- //TODO nikolayk revisit the sequence for in64x2.alltrue
- IR::Opnd* newDst = dst;
- uint cmpMask = 0xFFFF;
- if (instr->m_opcode == Js::OpCode::Simd128_AllTrue_B2)
- {
- cmpMask = 0x0F0F;
- IR::RegOpnd* reduceReg = IR::RegOpnd::New(TyInt32, m_func);
- pInstr = IR::Instr::New(Js::OpCode::SHR, reduceReg, dst, (IR::IntConstOpnd::New(4, TyInt32, m_func, true)), m_func);
- instr->InsertBefore(pInstr);
- Legalize(pInstr);
- pInstr = IR::Instr::New(Js::OpCode::OR, reduceReg, reduceReg, dst, m_func);
- instr->InsertBefore(pInstr);
- Legalize(pInstr);
- pInstr = IR::Instr::New(Js::OpCode::AND, reduceReg, reduceReg, (IR::IntConstOpnd::New(0x0F0F, TyInt32, m_func, true)), m_func);
- instr->InsertBefore(pInstr);
- Legalize(pInstr);
- newDst = reduceReg;
- }
-
- // cmp dst, cmpMask
- pInstr = IR::Instr::New(Js::OpCode::CMP, m_func);
- pInstr->SetSrc1(newDst);
- pInstr->SetSrc2(IR::IntConstOpnd::New(cmpMask, TyInt32, m_func, true));
- instr->InsertBefore(pInstr);
- Legalize(pInstr);
- // mov tmp(TyInt8), dst
- pInstr = IR::Instr::New(Js::OpCode::MOV, tmp, newDst, m_func);
- instr->InsertBefore(pInstr);
- Legalize(pInstr);
- // sete tmp(TyInt8)
- pInstr = IR::Instr::New(Js::OpCode::SETE, tmp, tmp, m_func);
- instr->InsertBefore(pInstr);
- Legalize(pInstr);
- // movsx dst, dst(TyInt8)
- instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVSX, dst, tmp, m_func));
- pInstr = instr->m_prev;
- instr->Remove();
- return pInstr;
- }
- IR::Instr* LowererMD::Simd128LowerInt32x4FromFloat32x4(IR::Instr *instr)
- {
- IR::Opnd *dst, *src, *tmp, *tmp2, *mask1, *mask2;
- IR::Instr *insertInstr, *pInstr, *newInstr;
- IR::LabelInstr *doneLabel;
- dst = instr->GetDst();
- src = instr->GetSrc1();
- Assert(dst != nullptr && src != nullptr && dst->IsSimd128() && src->IsSimd128());
- // CVTTPS2DQ dst, src
- instr->m_opcode = Js::OpCode::CVTTPS2DQ;
- insertInstr = instr->m_next;
- pInstr = instr->m_prev;
- doneLabel = IR::LabelInstr::New(Js::OpCode::Label, this->m_func);
- mask1 = IR::RegOpnd::New(TyInt32, m_func);
- mask2 = IR::RegOpnd::New(TyInt32, m_func);
- // bound checks
- // check if any value is potentially out of range (0x80000000 in output)
- // PCMPEQD tmp, dst, X86_NEG_MASK (0x80000000)
- // MOVMSKPS mask1, tmp
- // CMP mask1, 0
- // JNE $doneLabel
- tmp = IR::RegOpnd::New(TySimd128I4, m_func);
- tmp2 = IR::RegOpnd::New(TySimd128I4, m_func);
- newInstr = IR::Instr::New(Js::OpCode::MOVAPS, tmp2, IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86NegMaskF4Addr(), TySimd128I4, m_func), m_func);
- insertInstr->InsertBefore(newInstr);
- Legalize(newInstr);
- newInstr = IR::Instr::New(Js::OpCode::PCMPEQD, tmp, dst, tmp2, m_func);
- insertInstr->InsertBefore(newInstr);
- Legalize(newInstr);
- insertInstr->InsertBefore(IR::Instr::New(Js::OpCode::MOVMSKPS, mask1, tmp, m_func));
- newInstr = IR::Instr::New(Js::OpCode::CMP, m_func);
- newInstr->SetSrc1(mask1);
- newInstr->SetSrc2(IR::IntConstOpnd::New(0, TyInt32, m_func));
- insertInstr->InsertBefore(newInstr);
- insertInstr->InsertBefore(IR::BranchInstr::New(Js::OpCode::JEQ, doneLabel, m_func));
- // we have potential out of bound. check bounds
- // MOVAPS tmp2, X86_TWO_31_F4 (0x4f000000)
- // CMPLEPS tmp, tmp2, src
- // MOVMSKPS mask1, tmp
- // MOVAPS tmp2, X86_NEG_TWO_31_F4 (0xcf000000)
- // CMPLTPS tmp, src, tmp2
- // MOVMSKPS mask2, tmp
- // OR mask1, mask1, mask2
- // check for NaNs
- // CMPEQPS tmp, src
- // MOVMSKPS mask2, tmp
- // NOT mask2
- // AND mask2, 0x00000F
- // OR mask1, mask2
- //
- // CMP mask1, 0
- // JEQ $doneLabel
- newInstr = IR::Instr::New(Js::OpCode::MOVAPS, tmp2, IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86TwoPower31F4Addr(), TySimd128I4, m_func), m_func);
- insertInstr->InsertBefore(newInstr);
- Legalize(newInstr);
- newInstr = IR::Instr::New(Js::OpCode::CMPLEPS, tmp, tmp2, src, m_func);
- insertInstr->InsertBefore(newInstr);
- Legalize(newInstr);
- insertInstr->InsertBefore(IR::Instr::New(Js::OpCode::MOVMSKPS, mask1, tmp, m_func));
- newInstr = IR::Instr::New(Js::OpCode::MOVAPS, tmp2, IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86NegTwoPower31F4Addr(), TySimd128I4, m_func), m_func);
- insertInstr->InsertBefore(newInstr);
- Legalize(newInstr);
- newInstr = IR::Instr::New(Js::OpCode::CMPLTPS, tmp, src, tmp2, m_func);
- insertInstr->InsertBefore(newInstr);
- Legalize(newInstr);
- insertInstr->InsertBefore(IR::Instr::New(Js::OpCode::MOVMSKPS, mask2, tmp, m_func));
- insertInstr->InsertBefore(IR::Instr::New(Js::OpCode::OR, mask1, mask1, mask2, m_func));
- #ifdef ENABLE_WASM_SIMD
- if (m_func->GetJITFunctionBody()->IsWasmFunction())
- {
- newInstr = IR::Instr::New(Js::OpCode::CMPEQPS, tmp, src, src, m_func);
- insertInstr->InsertBefore(newInstr);
- Legalize(newInstr);
- insertInstr->InsertBefore(IR::Instr::New(Js::OpCode::MOVMSKPS, mask2, tmp, m_func));
- insertInstr->InsertBefore(IR::Instr::New(Js::OpCode::NOT, mask2, mask2, m_func));
- newInstr = IR::Instr::New(Js::OpCode::AND, mask2, mask2, IR::IntConstOpnd::New(0x00000F, TyInt32, m_func), m_func);
- insertInstr->InsertBefore(newInstr);
- Legalize(newInstr);
- insertInstr->InsertBefore(IR::Instr::New(Js::OpCode::OR, mask1, mask1, mask2, m_func));
- }
- #endif
- newInstr = IR::Instr::New(Js::OpCode::CMP, m_func);
- newInstr->SetSrc1(mask1);
- newInstr->SetSrc2(IR::IntConstOpnd::New(0, TyInt32, m_func));
- insertInstr->InsertBefore(newInstr);
- insertInstr->InsertBefore(IR::BranchInstr::New(Js::OpCode::JEQ, doneLabel, m_func));
- // throw range error
- m_lowerer->GenerateRuntimeError(insertInstr, JSERR_ArgumentOutOfRange, IR::HelperOp_RuntimeRangeError);
- insertInstr->InsertBefore(doneLabel);
- return pInstr;
- }
- IR::Instr* LowererMD::Simd128LowerUint32x4FromFloat32x4(IR::Instr *instr)
- {
- IR::Opnd *dst, *src, *tmp, *tmp2, *two_31_f4_mask, *two_31_i4_mask, *mask;
- IR::Instr *pInstr, *newInstr;
- IR::LabelInstr *doneLabel, *throwLabel;
- dst = instr->GetDst();
- src = instr->GetSrc1();
- Assert(dst != nullptr && src != nullptr && dst->IsSimd128() && src->IsSimd128());
- doneLabel = IR::LabelInstr::New(Js::OpCode::Label, this->m_func);
- throwLabel = IR::LabelInstr::New(Js::OpCode::Label, this->m_func, true);
- pInstr = instr->m_prev;
- mask = IR::RegOpnd::New(TyInt32, m_func);
- two_31_f4_mask = IR::RegOpnd::New(TySimd128F4, m_func);
- two_31_i4_mask = IR::RegOpnd::New(TySimd128I4, m_func);
- tmp = IR::RegOpnd::New(TySimd128F4, m_func);
- tmp2 = IR::RegOpnd::New(TySimd128F4, m_func);
- // check for NaNs
- // CMPEQPS tmp, src
- // MOVMSKPS mask2, tmp
- // AND mask2, 0x00000F
- // JNE throw
- #ifdef ENABLE_WASM_SIMD
- if (m_func->GetJITFunctionBody()->IsWasmFunction())
- {
- newInstr = IR::Instr::New(Js::OpCode::CMPEQPS, tmp, src, src, m_func);
- instr->InsertBefore(newInstr);
- Legalize(newInstr);
- instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVMSKPS, mask, tmp, m_func));
- newInstr = IR::Instr::New(Js::OpCode::CMP, m_func);
- newInstr->SetSrc1(mask);
- newInstr->SetSrc2(IR::IntConstOpnd::New(0x0000000F, TyInt32, m_func));
- instr->InsertBefore(newInstr);
- Legalize(newInstr);
- instr->InsertBefore(IR::BranchInstr::New(Js::OpCode::JNE, throwLabel, m_func));
- }
- #endif
- // any lanes <= -1.0 ?
- // CMPLEPS tmp, src, [X86_ALL_FLOAT32_NEG_ONES]
- // MOVMSKPS mask, tmp
- // CMP mask, 0
- // JNE $throwLabel
- newInstr = IR::Instr::New(Js::OpCode::CMPLEPS, tmp, src, IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86AllNegOnesF4Addr(), TySimd128I4, m_func), m_func);
- instr->InsertBefore(newInstr);
- Legalize(newInstr);
- newInstr = IR::Instr::New(Js::OpCode::MOVMSKPS, mask, tmp, m_func);
- instr->InsertBefore(newInstr);
- Legalize(newInstr);
- newInstr = IR::Instr::New(Js::OpCode::CMP, m_func);
- newInstr->SetSrc1(mask);
- newInstr->SetSrc2(IR::IntConstOpnd::New(0, TyInt32, m_func));
- instr->InsertBefore(newInstr);
- Legalize(newInstr);
- instr->InsertBefore(IR::BranchInstr::New(Js::OpCode::JNE, throwLabel, m_func));
- // CVTTPS2DQ does a range check over signed range [-2^31, 2^31-1], so will fail to convert values >= 2^31.
- // To fix this, subtract 2^31 from values >= 2^31, do CVTTPS2DQ, then add 2^31 back.
- // MOVAPS two_31_f4_mask, [X86_TWO_31]
- // CMPLEPS tmp2, two_31_mask, src
- // ANDPS two_31_f4_mask, tmp2 // tmp has f32(2^31) for lanes >= 2^31, 0 otherwise
- // SUBPS tmp2, two_31_f4_mask // subtract 2^31 from lanes >= 2^31, unchanged otherwise.
- // CVTTPS2DQ dst, tmp2
- newInstr = IR::Instr::New(Js::OpCode::MOVAPS, two_31_f4_mask, IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86TwoPower31F4Addr(), TySimd128F4, m_func), m_func);
- instr->InsertBefore(newInstr);
- Legalize(newInstr);
- newInstr = IR::Instr::New(Js::OpCode::CMPLEPS, tmp2, two_31_f4_mask, src, m_func);
- instr->InsertBefore(newInstr);
- Legalize(newInstr);
- newInstr = IR::Instr::New(Js::OpCode::ANDPS, two_31_f4_mask, two_31_f4_mask, tmp2, m_func);
- instr->InsertBefore(newInstr);
- Legalize(newInstr);
- newInstr = IR::Instr::New(Js::OpCode::SUBPS, tmp2, src, two_31_f4_mask, m_func);
- instr->InsertBefore(newInstr);
- Legalize(newInstr);
- newInstr = IR::Instr::New(Js::OpCode::CVTTPS2DQ, dst, tmp2, m_func);
- instr->InsertBefore(newInstr);
- Legalize(newInstr);
- // check if any value is out of range (i.e. >= 2^31, meaning originally >= 2^32 before value adjustment)
- // PCMPEQD tmp, dst, [X86_NEG_MASK]
- // MOVMSKPS mask, tmp
- // CMP mask, 0
- // JNE $throwLabel
- newInstr = IR::Instr::New(Js::OpCode::PCMPEQD, tmp, dst, IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86NegMaskF4Addr(), TySimd128I4, m_func), m_func);
- instr->InsertBefore(newInstr);
- Legalize(newInstr);
- newInstr = IR::Instr::New(Js::OpCode::MOVMSKPS, mask, tmp, m_func);
- instr->InsertBefore(newInstr);
- Legalize(newInstr);
- newInstr = IR::Instr::New(Js::OpCode::CMP, m_func);
- newInstr->SetSrc1(mask);
- newInstr->SetSrc2(IR::IntConstOpnd::New(0, TyInt32, m_func));
- instr->InsertBefore(newInstr);
- Legalize(newInstr);
- instr->InsertBefore(IR::BranchInstr::New(Js::OpCode::JNE, throwLabel, m_func));
- // we pass range checks
- // add i4(2^31) values back to adjusted values.
- // Use first bit from the 2^31 float mask (0x4f000...0 << 1)
- // and AND with 2^31 int mask (0x8000..0) setting first bit to zero if lane hasn't been adjusted
- // MOVAPS two_31_i4_mask, [X86_TWO_31_I4]
- // PSLLD two_31_f4_mask, 1
- // ANDPS two_31_i4_mask, two_31_f4_mask
- // PADDD dst, dst, two_31_i4_mask
- // JMP $doneLabel
- newInstr = IR::Instr::New(Js::OpCode::MOVAPS, two_31_i4_mask, IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86TwoPower31I4Addr(), TySimd128I4, m_func), m_func);
- instr->InsertBefore(newInstr);
- Legalize(newInstr);
- newInstr = IR::Instr::New(Js::OpCode::PSLLD, two_31_f4_mask, two_31_f4_mask, IR::IntConstOpnd::New(1, TyInt8, m_func), m_func);
- instr->InsertBefore(newInstr);
- Legalize(newInstr);
- newInstr = IR::Instr::New(Js::OpCode::ANDPS, two_31_i4_mask, two_31_i4_mask, two_31_f4_mask, m_func);
- instr->InsertBefore(newInstr);
- Legalize(newInstr);
- newInstr = IR::Instr::New(Js::OpCode::PADDD, dst, dst, two_31_i4_mask, m_func);
- instr->InsertBefore(newInstr);
- Legalize(newInstr);
- instr->InsertBefore(IR::BranchInstr::New(Js::OpCode::JMP, doneLabel, m_func));
- // throwLabel:
- // Throw Range Error
- instr->InsertBefore(throwLabel);
- m_lowerer->GenerateRuntimeError(instr, JSERR_ArgumentOutOfRange, IR::HelperOp_RuntimeRangeError);
- // doneLabe:
- instr->InsertBefore(doneLabel);
- instr->Remove();
- return pInstr;
- }
- IR::Instr* LowererMD::Simd128LowerFloat32x4FromUint32x4(IR::Instr *instr)
- {
- IR::Opnd *dst, *src, *tmp, *zero;
- IR::Instr *pInstr, *newInstr;
- dst = instr->GetDst();
- src = instr->GetSrc1();
- Assert(dst != nullptr && src != nullptr && dst->IsSimd128() && src->IsSimd128());
- pInstr = instr->m_prev;
- zero = IR::RegOpnd::New(TySimd128I4, m_func);
- tmp = IR::RegOpnd::New(TySimd128I4, m_func);
- // find unsigned values above 2^31-1. Comparison is signed, so look for values < 0
- // MOVAPS zero, [X86_ALL_ZEROS]
- newInstr = IR::Instr::New(Js::OpCode::MOVAPS, zero, IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86AllZerosAddr(), TySimd128I4, m_func), m_func);
- instr->InsertBefore(newInstr);
- Legalize(newInstr);
- // tmp = PCMPGTD zero, src
- newInstr = IR::Instr::New(Js::OpCode::PCMPGTD, tmp, zero, src, m_func);
- instr->InsertBefore(newInstr);
- Legalize(newInstr);
- // temp1 has f32(2^32) for unsigned values above 2^31, 0 otherwise
- // ANDPS tmp, tmp, [X86_TWO_32_F4]
- newInstr = IR::Instr::New(Js::OpCode::ANDPS, tmp, tmp, IR::MemRefOpnd::New(m_func->GetThreadContextInfo()->GetX86TwoPower32F4Addr(), TySimd128F4, m_func), m_func);
- instr->InsertBefore(newInstr);
- Legalize(newInstr);
- // convert
- // dst = CVTDQ2PS src
- newInstr = IR::Instr::New(Js::OpCode::CVTDQ2PS, dst, src, m_func);
- instr->InsertBefore(newInstr);
- Legalize(newInstr);
- // Add f32(2^32) to negative values
- // ADDPS dst, dst, tmp
- newInstr = IR::Instr::New(Js::OpCode::ADDPS, dst, dst, tmp, m_func);
- instr->InsertBefore(newInstr);
- Legalize(newInstr);
- instr->Remove();
- return pInstr;
- }
- IR::Instr* LowererMD::Simd128AsmJsLowerLoadElem(IR::Instr *instr)
- {
- Assert(instr->m_opcode == Js::OpCode::Simd128_LdArr_I4 ||
- instr->m_opcode == Js::OpCode::Simd128_LdArr_I8 ||
- instr->m_opcode == Js::OpCode::Simd128_LdArr_I16 ||
- instr->m_opcode == Js::OpCode::Simd128_LdArr_U4 ||
- instr->m_opcode == Js::OpCode::Simd128_LdArr_U8 ||
- instr->m_opcode == Js::OpCode::Simd128_LdArr_U16 ||
- instr->m_opcode == Js::OpCode::Simd128_LdArr_F4 ||
- //instr->m_opcode == Js::OpCode::Simd128_LdArr_D2 ||
- instr->m_opcode == Js::OpCode::Simd128_LdArrConst_I4 ||
- instr->m_opcode == Js::OpCode::Simd128_LdArrConst_I8 ||
- instr->m_opcode == Js::OpCode::Simd128_LdArrConst_I16 ||
- instr->m_opcode == Js::OpCode::Simd128_LdArrConst_U4 ||
- instr->m_opcode == Js::OpCode::Simd128_LdArrConst_U8 ||
- instr->m_opcode == Js::OpCode::Simd128_LdArrConst_U16 ||
- instr->m_opcode == Js::OpCode::Simd128_LdArrConst_F4
- //instr->m_opcode == Js::OpCode::Simd128_LdArrConst_D2
- );
- IR::Instr * instrPrev = instr->m_prev;
- IR::RegOpnd * indexOpnd = instr->GetSrc1()->AsIndirOpnd()->GetIndexOpnd();
- IR::RegOpnd * baseOpnd = instr->GetSrc1()->AsIndirOpnd()->GetBaseOpnd();
- IR::Opnd * dst = instr->GetDst();
- IR::Opnd * src1 = instr->GetSrc1();
- IR::Opnd * src2 = instr->GetSrc2();
- ValueType arrType = baseOpnd->GetValueType();
- uint8 dataWidth = instr->dataWidth;
- // Type-specialized.
- Assert(dst->IsSimd128() && src1->IsSimd128() && src2->GetType() == TyUint32);
- IR::Instr * done;
- if (indexOpnd || (((uint32)src1->AsIndirOpnd()->GetOffset() + dataWidth) > 0x1000000 /* 16 MB */))
- {
- uint32 bpe = Simd128GetTypedArrBytesPerElem(arrType);
- // bound check and helper
- done = this->lowererMDArch.LowerAsmJsLdElemHelper(instr, true, bpe != dataWidth);
- }
- else
- {
- // Reaching here means:
- // We have a constant index, and either
- // (1) constant heap or (2) variable heap with constant index < 16MB.
- // Case (1) requires static bound check. Case (2) means we are always in bound.
- // this can happen in cases where globopt props a constant access which was not known at bytecodegen time or when heap is non-constant
- if (src2->IsIntConstOpnd() && ((uint32)src1->AsIndirOpnd()->GetOffset() + dataWidth > src2->AsIntConstOpnd()->AsUint32()))
- {
- m_lowerer->GenerateRuntimeError(instr, JSERR_ArgumentOutOfRange, IR::HelperOp_RuntimeRangeError);
- instr->Remove();
- return instrPrev;
- }
- done = instr;
- }
- return Simd128ConvertToLoad(dst, src1, dataWidth, instr);
- }
- IR::Instr* LowererMD::Simd128LowerLoadElem(IR::Instr *instr)
- {
- Assert(!m_func->GetJITFunctionBody()->IsAsmJsMode());
- Assert(
- instr->m_opcode == Js::OpCode::Simd128_LdArr_I4 ||
- instr->m_opcode == Js::OpCode::Simd128_LdArr_I8 ||
- instr->m_opcode == Js::OpCode::Simd128_LdArr_I16 ||
- instr->m_opcode == Js::OpCode::Simd128_LdArr_U4 ||
- instr->m_opcode == Js::OpCode::Simd128_LdArr_U8 ||
- instr->m_opcode == Js::OpCode::Simd128_LdArr_U16 ||
- instr->m_opcode == Js::OpCode::Simd128_LdArr_F4
- );
- IR::Opnd * src = instr->GetSrc1();
- IR::RegOpnd * indexOpnd =src->AsIndirOpnd()->GetIndexOpnd();
- IR::Opnd * dst = instr->GetDst();
- ValueType arrType = src->AsIndirOpnd()->GetBaseOpnd()->GetValueType();
- // If we type-specialized, then array is a definite typed-array.
- Assert(arrType.IsObject() && arrType.IsTypedArray());
- Simd128GenerateUpperBoundCheck(indexOpnd, src->AsIndirOpnd(), arrType, instr);
- Simd128LoadHeadSegment(src->AsIndirOpnd(), arrType, instr);
- return Simd128ConvertToLoad(dst, src, instr->dataWidth, instr, m_lowerer->GetArrayIndirScale(arrType) /* scale factor */);
- }
- IR::Instr *
- LowererMD::Simd128ConvertToLoad(IR::Opnd *dst, IR::Opnd *src, uint8 dataWidth, IR::Instr* instr, byte scaleFactor /* = 0*/)
- {
- IR::Instr *newInstr = nullptr;
- IR::Instr * instrPrev = instr->m_prev;
- // Type-specialized.
- Assert(dst && dst->IsSimd128());
- Assert(src->IsIndirOpnd());
- if (scaleFactor > 0)
- {
- // needed only for non-Asmjs code
- Assert(!m_func->GetJITFunctionBody()->IsAsmJsMode());
- src->AsIndirOpnd()->SetScale(scaleFactor);
- }
- switch (dataWidth)
- {
- case 16:
- // MOVUPS dst, src1([arrayBuffer + indexOpnd])
- newInstr = IR::Instr::New(LowererMDArch::GetAssignOp(src->GetType()), dst, src, instr->m_func);
- instr->InsertBefore(newInstr);
- Legalize(newInstr);
- break;
- case 12:
- {
- IR::RegOpnd *temp = IR::RegOpnd::New(src->GetType(), instr->m_func);
- // MOVSD dst, src1([arrayBuffer + indexOpnd])
- newInstr = IR::Instr::New(Js::OpCode::MOVSD, dst, src, instr->m_func);
- instr->InsertBefore(newInstr);
- Legalize(newInstr);
- // MOVSS temp, src1([arrayBuffer + indexOpnd + 8])
- newInstr = IR::Instr::New(Js::OpCode::MOVSS, temp, src, instr->m_func);
- instr->InsertBefore(newInstr);
- newInstr->GetSrc1()->AsIndirOpnd()->SetOffset(src->AsIndirOpnd()->GetOffset() + 8, true);
- Legalize(newInstr);
- // PSLLDQ temp, 0x08
- instr->InsertBefore(IR::Instr::New(Js::OpCode::PSLLDQ, temp, temp, IR::IntConstOpnd::New(8, TyInt8, instr->m_func, true), instr->m_func));
- // ORPS dst, temp
- newInstr = IR::Instr::New(Js::OpCode::ORPS, dst, dst, temp, instr->m_func);
- instr->InsertBefore(newInstr);
- Legalize(newInstr);
- break;
- }
- case 8:
- // MOVSD dst, src1([arrayBuffer + indexOpnd])
- newInstr = IR::Instr::New(Js::OpCode::MOVSD, dst, src, instr->m_func);
- instr->InsertBefore(newInstr);
- Legalize(newInstr);
- break;
- case 4:
- // MOVSS dst, src1([arrayBuffer + indexOpnd])
- newInstr = IR::Instr::New(Js::OpCode::MOVSS, dst, src, instr->m_func);
- instr->InsertBefore(newInstr);
- Legalize(newInstr);
- break;
- default:
- Assume(UNREACHED);
- }
- instr->Remove();
- return instrPrev;
- }
- IR::Instr*
- LowererMD::Simd128AsmJsLowerStoreElem(IR::Instr *instr)
- {
- Assert(
- instr->m_opcode == Js::OpCode::Simd128_StArr_I4 ||
- instr->m_opcode == Js::OpCode::Simd128_StArr_I8 ||
- instr->m_opcode == Js::OpCode::Simd128_StArr_I16 ||
- instr->m_opcode == Js::OpCode::Simd128_StArr_U4 ||
- instr->m_opcode == Js::OpCode::Simd128_StArr_U8 ||
- instr->m_opcode == Js::OpCode::Simd128_StArr_U16 ||
- instr->m_opcode == Js::OpCode::Simd128_StArr_F4 ||
- //instr->m_opcode == Js::OpCode::Simd128_StArr_D2 ||
- instr->m_opcode == Js::OpCode::Simd128_StArrConst_I4 ||
- instr->m_opcode == Js::OpCode::Simd128_StArrConst_I8 ||
- instr->m_opcode == Js::OpCode::Simd128_StArrConst_I16 ||
- instr->m_opcode == Js::OpCode::Simd128_StArrConst_U4 ||
- instr->m_opcode == Js::OpCode::Simd128_StArrConst_U8 ||
- instr->m_opcode == Js::OpCode::Simd128_StArrConst_U16 ||
- instr->m_opcode == Js::OpCode::Simd128_StArrConst_U4 ||
- instr->m_opcode == Js::OpCode::Simd128_StArrConst_F4
- //instr->m_opcode == Js::OpCode::Simd128_StArrConst_D2
- );
- IR::Instr * instrPrev = instr->m_prev;
- IR::RegOpnd * indexOpnd = instr->GetDst()->AsIndirOpnd()->GetIndexOpnd();
- IR::RegOpnd * baseOpnd = instr->GetDst()->AsIndirOpnd()->GetBaseOpnd();
- IR::Opnd * dst = instr->GetDst();
- IR::Opnd * src1 = instr->GetSrc1();
- IR::Opnd * src2 = instr->GetSrc2();
- ValueType arrType = baseOpnd->GetValueType();
- uint8 dataWidth = instr->dataWidth;
- // Type-specialized.
- Assert(dst->IsSimd128() && src1->IsSimd128() && src2->GetType() == TyUint32);
- IR::Instr * done;
- if (indexOpnd || ((uint32)dst->AsIndirOpnd()->GetOffset() + dataWidth > 0x1000000))
- {
- // CMP indexOpnd, src2(arrSize)
- // JA $helper
- // JMP $store
- // $helper:
- // Throw RangeError
- // JMP $done
- // $store:
- // MOV dst([arrayBuffer + indexOpnd]), src1
- // $done:
- uint32 bpe = Simd128GetTypedArrBytesPerElem(arrType);
- done = this->lowererMDArch.LowerAsmJsStElemHelper(instr, true, bpe != dataWidth);
- }
- else
- {
- // we might have a constant index if globopt propped a constant store. we can ahead of time check if it is in-bounds
- if (src2->IsIntConstOpnd() && ((uint32)dst->AsIndirOpnd()->GetOffset() + dataWidth > src2->AsIntConstOpnd()->AsUint32()))
- {
- m_lowerer->GenerateRuntimeError(instr, JSERR_ArgumentOutOfRange, IR::HelperOp_RuntimeRangeError);
- instr->Remove();
- return instrPrev;
- }
- done = instr;
- }
- return Simd128ConvertToStore(dst, src1, dataWidth, instr);
- }
- IR::Instr*
- LowererMD::Simd128LowerStoreElem(IR::Instr *instr)
- {
- Assert(!m_func->GetJITFunctionBody()->IsAsmJsMode());
- Assert(
- instr->m_opcode == Js::OpCode::Simd128_StArr_I4 ||
- instr->m_opcode == Js::OpCode::Simd128_StArr_I8 ||
- instr->m_opcode == Js::OpCode::Simd128_StArr_I16 ||
- instr->m_opcode == Js::OpCode::Simd128_StArr_U4 ||
- instr->m_opcode == Js::OpCode::Simd128_StArr_U8 ||
- instr->m_opcode == Js::OpCode::Simd128_StArr_U16 ||
- instr->m_opcode == Js::OpCode::Simd128_StArr_F4
- );
- IR::Opnd * dst = instr->GetDst();
- IR::RegOpnd * indexOpnd = dst->AsIndirOpnd()->GetIndexOpnd();
- IR::Opnd * src1 = instr->GetSrc1();
- uint8 dataWidth = instr->dataWidth;
- ValueType arrType = dst->AsIndirOpnd()->GetBaseOpnd()->GetValueType();
- // If we type-specialized, then array is a definite type-array.
- Assert(arrType.IsObject() && arrType.IsTypedArray());
- Simd128GenerateUpperBoundCheck(indexOpnd, dst->AsIndirOpnd(), arrType, instr);
- Simd128LoadHeadSegment(dst->AsIndirOpnd(), arrType, instr);
- return Simd128ConvertToStore(dst, src1, dataWidth, instr, m_lowerer->GetArrayIndirScale(arrType) /*scale factor*/);
- }
- IR::Instr *
- LowererMD::Simd128ConvertToStore(IR::Opnd *dst, IR::Opnd *src1, uint8 dataWidth, IR::Instr* instr, byte scaleFactor /* = 0 */)
- {
- IR::Instr * instrPrev = instr->m_prev;
- Assert(src1 && src1->IsSimd128());
- Assert(dst->IsIndirOpnd());
- if (scaleFactor > 0)
- {
- // needed only for non-Asmjs code
- Assert(!m_func->GetJITFunctionBody()->IsAsmJsMode());
- dst->AsIndirOpnd()->SetScale(scaleFactor);
- }
- switch (dataWidth)
- {
- case 16:
- // MOVUPS dst([arrayBuffer + indexOpnd]), src1
- instr->InsertBefore(IR::Instr::New(LowererMDArch::GetAssignOp(src1->GetType()), dst, src1, instr->m_func));
- break;
- case 12:
- {
- IR::RegOpnd *temp = IR::RegOpnd::New(src1->GetType(), instr->m_func);
- IR::Instr *movss;
- // MOVAPS temp, src
- instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVAPS, temp, src1, instr->m_func));
- // MOVSD dst([arrayBuffer + indexOpnd]), temp
- instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVSD, dst, temp, instr->m_func));
- // PSRLDQ temp, 0x08
- instr->InsertBefore(IR::Instr::New(Js::OpCode::PSRLDQ, temp, temp, IR::IntConstOpnd::New(8, TyInt8, m_func, true), instr->m_func));
- // MOVSS dst([arrayBuffer + indexOpnd + 8]), temp
- movss = IR::Instr::New(Js::OpCode::MOVSS, dst, temp, instr->m_func);
- instr->InsertBefore(movss);
- movss->GetDst()->AsIndirOpnd()->SetOffset(dst->AsIndirOpnd()->GetOffset() + 8, true);
- break;
- }
- case 8:
- // MOVSD dst([arrayBuffer + indexOpnd]), src1
- instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVSD, dst, src1, instr->m_func));
- break;
- case 4:
- // MOVSS dst([arrayBuffer + indexOpnd]), src1
- instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVSS, dst, src1, instr->m_func));
- break;
- default:;
- Assume(UNREACHED);
- }
- instr->Remove();
- return instrPrev;
- }
- void
- LowererMD::Simd128GenerateUpperBoundCheck(IR::RegOpnd *indexOpnd, IR::IndirOpnd *indirOpnd, ValueType arrType, IR::Instr *instr)
- {
- Assert(!m_func->GetJITFunctionBody()->IsAsmJsMode());
- IR::ArrayRegOpnd *arrayRegOpnd = indirOpnd->GetBaseOpnd()->AsArrayRegOpnd();
- IR::Opnd* headSegmentLengthOpnd;
- if (arrayRegOpnd->EliminatedUpperBoundCheck())
- {
- // already eliminated or extracted by globOpt (OptArraySrc). Nothing to do.
- return;
- }
- if (arrayRegOpnd->HeadSegmentLengthSym())
- {
- headSegmentLengthOpnd = IR::RegOpnd::New(arrayRegOpnd->HeadSegmentLengthSym(), TyUint32, m_func);
- }
- else
- {
- // (headSegmentLength = [base + offset(length)])
- int lengthOffset;
- lengthOffset = m_lowerer->GetArrayOffsetOfLength(arrType);
- headSegmentLengthOpnd = IR::IndirOpnd::New(arrayRegOpnd, lengthOffset, TyUint32, m_func);
- }
- IR::LabelInstr * skipLabel = Lowerer::InsertLabel(false, instr);
- int32 elemCount = Lowerer::SimdGetElementCountFromBytes(arrayRegOpnd->GetValueType(), instr->dataWidth);
- if (indexOpnd)
- {
- // MOV tmp, elemCount
- // ADD tmp, index
- // CMP tmp, Length -- upper bound check
- // JBE $storeLabel
- // Throw RuntimeError
- // skipLabel:
- IR::RegOpnd *tmp = IR::RegOpnd::New(indexOpnd->GetType(), m_func);
- IR::IntConstOpnd *elemCountOpnd = IR::IntConstOpnd::New(elemCount, TyInt8, m_func, true);
- m_lowerer->InsertMove(tmp, elemCountOpnd, skipLabel);
- Lowerer::InsertAdd(false, tmp, tmp, indexOpnd, skipLabel);
- m_lowerer->InsertCompareBranch(tmp, headSegmentLengthOpnd, Js::OpCode::BrLe_A, true, skipLabel, skipLabel);
- }
- else
- {
- // CMP Length, (offset + elemCount)
- // JA $storeLabel
- int32 offset = indirOpnd->GetOffset();
- int32 index = offset + elemCount;
- m_lowerer->InsertCompareBranch(headSegmentLengthOpnd, IR::IntConstOpnd::New(index, TyInt32, m_func, true), Js::OpCode::BrLe_A, true, skipLabel, skipLabel);
- }
- m_lowerer->GenerateRuntimeError(skipLabel, JSERR_ArgumentOutOfRange, IR::HelperOp_RuntimeRangeError);
- return;
- }
- void
- LowererMD::Simd128LoadHeadSegment(IR::IndirOpnd *indirOpnd, ValueType arrType, IR::Instr *instr)
- {
- // For non-asm.js we check if headSeg symbol exists, else load it.
- IR::ArrayRegOpnd *arrayRegOpnd = indirOpnd->GetBaseOpnd()->AsArrayRegOpnd();
- IR::RegOpnd *headSegmentOpnd;
- if (arrayRegOpnd->HeadSegmentSym())
- {
- headSegmentOpnd = IR::RegOpnd::New(arrayRegOpnd->HeadSegmentSym(), TyMachPtr, m_func);
- }
- else
- {
- // MOV headSegment, [base + offset(head)]
- int32 headOffset = m_lowerer->GetArrayOffsetOfHeadSegment(arrType);
- IR::IndirOpnd * newIndirOpnd = IR::IndirOpnd::New(arrayRegOpnd, headOffset, TyMachPtr, this->m_func);
- headSegmentOpnd = IR::RegOpnd::New(TyMachPtr, this->m_func);
- m_lowerer->InsertMove(headSegmentOpnd, newIndirOpnd, instr);
- }
- // change base to be the head segment instead of the array object
- indirOpnd->SetBaseOpnd(headSegmentOpnd);
- }
- // Builds args list <dst, src1, src2, src3 ..>
- SList<IR::Opnd*> * LowererMD::Simd128GetExtendedArgs(IR::Instr *instr)
- {
- SList<IR::Opnd*> * args = JitAnew(m_lowerer->m_alloc, SList<IR::Opnd*>, m_lowerer->m_alloc);
- IR::Instr *pInstr = instr;
- IR::Opnd *dst, *src1, *src2;
- dst = src1 = src2 = nullptr;
- if (pInstr->GetDst())
- {
- dst = pInstr->UnlinkDst();
- }
- src1 = pInstr->UnlinkSrc1();
- Assert(src1->GetStackSym()->IsSingleDef());
- pInstr = src1->GetStackSym()->GetInstrDef();
- while (pInstr && pInstr->m_opcode == Js::OpCode::ExtendArg_A)
- {
- Assert(pInstr->GetSrc1());
- src1 = pInstr->GetSrc1()->Copy(this->m_func);
- if (src1->IsRegOpnd())
- {
- this->m_lowerer->addToLiveOnBackEdgeSyms->Set(src1->AsRegOpnd()->m_sym->m_id);
- }
- args->Push(src1);
- if (pInstr->GetSrc2())
- {
- src2 = pInstr->GetSrc2();
- Assert(src2->GetStackSym()->IsSingleDef());
- pInstr = src2->GetStackSym()->GetInstrDef();
- }
- else
- {
- pInstr = nullptr;
- }
- }
- args->Push(dst);
- Assert(args->Count() > 3);
- return args;
- }
- IR::Opnd*
- LowererMD::EnregisterBoolConst(IR::Instr* instr, IR::Opnd *opnd, IRType type)
- {
- if (opnd->IsIntConstOpnd() || opnd->IsInt64ConstOpnd())
- {
- bool isSet = opnd->GetImmediateValue(instr->m_func) != 0;
- IR::RegOpnd *tempReg = IR::RegOpnd::New(type, m_func);
- instr->InsertBefore(IR::Instr::New(Js::OpCode::MOV, tempReg, IR::IntConstOpnd::New(isSet ? -1 : 0, type, m_func, true), m_func));
- return tempReg;
- }
- IRType origType = opnd->GetType();
- IR::RegOpnd *tempReg = IR::RegOpnd::New(origType, m_func);
- IR::Instr* cmovInstr = IR::Instr::New(Js::OpCode::MOV, tempReg, IR::IntConstOpnd::New(0, origType, m_func, true), m_func);
- instr->InsertBefore(cmovInstr);
- Legalize(cmovInstr);
- cmovInstr = IR::Instr::New(Js::OpCode::SUB, tempReg, tempReg, opnd->UseWithNewType(origType, m_func), m_func);
- instr->InsertBefore(cmovInstr);
- Legalize(cmovInstr);
- cmovInstr = IR::Instr::New(Js::OpCode::CMOVS, tempReg, tempReg, IR::IntConstOpnd::New(-1, origType, m_func, true), m_func);
- instr->InsertBefore(cmovInstr);
- Legalize(cmovInstr);
- return tempReg->UseWithNewType(type, m_func);
- }
- IR::Opnd*
- LowererMD::EnregisterIntConst(IR::Instr* instr, IR::Opnd *constOpnd, IRType type /* = TyInt32*/)
- {
- IRType constType = constOpnd->GetType();
- if (!IRType_IsNativeInt(constType))
- {
- // not int opnd, nothing to do
- return constOpnd;
- }
- Assert(type == TyInt32 || type == TyInt16 || type == TyInt8);
- Assert(constType == TyInt32 || constType == TyInt16 || constType == TyInt8);
- if (constOpnd->IsRegOpnd())
- {
- // already a register, just cast
- constOpnd->SetType(type);
- return constOpnd;
- }
- // en-register
- IR::RegOpnd *tempReg = IR::RegOpnd::New(type, m_func);
- // MOV tempReg, constOpnd
- instr->InsertBefore(IR::Instr::New(Js::OpCode::MOV, tempReg, constOpnd, m_func));
- return tempReg;
- }
- void LowererMD::Simd128InitOpcodeMap()
- {
- m_simd128OpCodesMap = JitAnewArrayZ(m_lowerer->m_alloc, Js::OpCode, Js::Simd128OpcodeCount());
- // All simd ops should be contiguous for this mapping to work
- Assert(Js::OpCode::Simd128_End + (Js::OpCode) 1 == Js::OpCode::Simd128_Start_Extend);
- //SET_SIMDOPCODE(Simd128_FromFloat64x2_I4 , CVTTPD2DQ);
- //SET_SIMDOPCODE(Simd128_FromFloat64x2Bits_I4 , MOVAPS);
- SET_SIMDOPCODE(Simd128_FromFloat32x4Bits_I4 , MOVAPS);
- SET_SIMDOPCODE(Simd128_FromInt16x8Bits_I4 , MOVAPS);
- SET_SIMDOPCODE(Simd128_FromInt8x16Bits_I4 , MOVAPS);
- SET_SIMDOPCODE(Simd128_FromUint32x4Bits_I4 , MOVAPS);
- SET_SIMDOPCODE(Simd128_FromUint16x8Bits_I4 , MOVAPS);
- SET_SIMDOPCODE(Simd128_FromUint8x16Bits_I4 , MOVAPS);
- SET_SIMDOPCODE(Simd128_Add_I4 , PADDD);
- SET_SIMDOPCODE(Simd128_Sub_I4 , PSUBD);
- SET_SIMDOPCODE(Simd128_Lt_I4 , PCMPGTD);
- SET_SIMDOPCODE(Simd128_Gt_I4 , PCMPGTD);
- SET_SIMDOPCODE(Simd128_Eq_I4 , PCMPEQD);
- SET_SIMDOPCODE(Simd128_And_I4 , PAND);
- SET_SIMDOPCODE(Simd128_Or_I4 , POR);
- SET_SIMDOPCODE(Simd128_Xor_I4 , PXOR);
- SET_SIMDOPCODE(Simd128_Not_I4 , XORPS);
- SET_SIMDOPCODE(Simd128_FromFloat32x4Bits_I8 , MOVAPS);
- SET_SIMDOPCODE(Simd128_FromInt32x4Bits_I8 , MOVAPS);
- SET_SIMDOPCODE(Simd128_FromInt8x16Bits_I8 , MOVAPS);
- SET_SIMDOPCODE(Simd128_FromUint32x4Bits_I8 , MOVAPS);
- SET_SIMDOPCODE(Simd128_FromUint16x8Bits_I8 , MOVAPS);
- SET_SIMDOPCODE(Simd128_FromUint8x16Bits_I8 , MOVAPS);
- SET_SIMDOPCODE(Simd128_Or_I16 , POR);
- SET_SIMDOPCODE(Simd128_Xor_I16 , PXOR);
- SET_SIMDOPCODE(Simd128_Not_I16 , XORPS);
- SET_SIMDOPCODE(Simd128_And_I16 , PAND);
- SET_SIMDOPCODE(Simd128_Add_I16 , PADDB);
- SET_SIMDOPCODE(Simd128_Sub_I16 , PSUBB);
- SET_SIMDOPCODE(Simd128_Lt_I16 , PCMPGTB);
- SET_SIMDOPCODE(Simd128_Gt_I16 , PCMPGTB);
- SET_SIMDOPCODE(Simd128_Eq_I16 , PCMPEQB);
- SET_SIMDOPCODE(Simd128_FromFloat32x4Bits_I16, MOVAPS);
- SET_SIMDOPCODE(Simd128_FromInt32x4Bits_I16 , MOVAPS);
- SET_SIMDOPCODE(Simd128_FromInt16x8Bits_I16 , MOVAPS);
- SET_SIMDOPCODE(Simd128_FromUint32x4Bits_I16 , MOVAPS);
- SET_SIMDOPCODE(Simd128_FromUint16x8Bits_I16 , MOVAPS);
- SET_SIMDOPCODE(Simd128_FromUint8x16Bits_I16 , MOVAPS);
- SET_SIMDOPCODE(Simd128_FromFloat32x4Bits_U4 , MOVAPS);
- SET_SIMDOPCODE(Simd128_FromInt32x4Bits_U4 , MOVAPS);
- SET_SIMDOPCODE(Simd128_FromInt16x8Bits_U4 , MOVAPS);
- SET_SIMDOPCODE(Simd128_FromInt8x16Bits_U4 , MOVAPS);
- SET_SIMDOPCODE(Simd128_FromUint16x8Bits_U4 , MOVAPS);
- SET_SIMDOPCODE(Simd128_FromUint8x16Bits_U4 , MOVAPS);
- SET_SIMDOPCODE(Simd128_FromFloat32x4Bits_U8 , MOVAPS);
- SET_SIMDOPCODE(Simd128_FromInt32x4Bits_U8 , MOVAPS);
- SET_SIMDOPCODE(Simd128_FromInt16x8Bits_U8 , MOVAPS);
- SET_SIMDOPCODE(Simd128_FromInt8x16Bits_U8 , MOVAPS);
- SET_SIMDOPCODE(Simd128_FromUint32x4Bits_U8 , MOVAPS);
- SET_SIMDOPCODE(Simd128_FromUint8x16Bits_U8 , MOVAPS);
- SET_SIMDOPCODE(Simd128_FromFloat32x4Bits_U16 , MOVAPS);
- SET_SIMDOPCODE(Simd128_FromInt32x4Bits_U16 , MOVAPS);
- SET_SIMDOPCODE(Simd128_FromInt16x8Bits_U16 , MOVAPS);
- SET_SIMDOPCODE(Simd128_FromInt8x16Bits_U16 , MOVAPS);
- SET_SIMDOPCODE(Simd128_FromUint32x4Bits_U16 , MOVAPS);
- SET_SIMDOPCODE(Simd128_FromUint16x8Bits_U16 , MOVAPS);
- //SET_SIMDOPCODE(Simd128_FromFloat64x2_F4 , CVTPD2PS);
- //SET_SIMDOPCODE(Simd128_FromFloat64x2Bits_F4 , MOVAPS);
- SET_SIMDOPCODE(Simd128_FromInt32x4_F4 , CVTDQ2PS);
- SET_SIMDOPCODE(Simd128_FromInt32x4Bits_F4 , MOVAPS);
- SET_SIMDOPCODE(Simd128_FromInt16x8Bits_F4 , MOVAPS);
- SET_SIMDOPCODE(Simd128_FromInt8x16Bits_F4 , MOVAPS);
- SET_SIMDOPCODE(Simd128_FromUint32x4Bits_F4 , MOVAPS);
- SET_SIMDOPCODE(Simd128_FromUint16x8Bits_F4 , MOVAPS);
- SET_SIMDOPCODE(Simd128_FromUint8x16Bits_F4 , MOVAPS);
- SET_SIMDOPCODE(Simd128_Abs_F4 , ANDPS);
- SET_SIMDOPCODE(Simd128_Neg_F4 , XORPS);
- SET_SIMDOPCODE(Simd128_Add_F4 , ADDPS);
- SET_SIMDOPCODE(Simd128_Sub_F4 , SUBPS);
- SET_SIMDOPCODE(Simd128_Mul_F4 , MULPS);
- SET_SIMDOPCODE(Simd128_Div_F4 , DIVPS);
- SET_SIMDOPCODE(Simd128_Sqrt_F4 , SQRTPS);
- SET_SIMDOPCODE(Simd128_Lt_F4 , CMPLTPS); // CMPLTPS
- SET_SIMDOPCODE(Simd128_LtEq_F4 , CMPLEPS); // CMPLEPS
- SET_SIMDOPCODE(Simd128_Eq_F4 , CMPEQPS); // CMPEQPS
- SET_SIMDOPCODE(Simd128_Neq_F4 , CMPNEQPS); // CMPNEQPS
- SET_SIMDOPCODE(Simd128_Gt_F4 , CMPLTPS); // CMPLTPS (swap srcs)
- SET_SIMDOPCODE(Simd128_GtEq_F4 , CMPLEPS); // CMPLEPS (swap srcs)
- SET_SIMDOPCODE(Simd128_Neg_D2 , XORPS);
- SET_SIMDOPCODE(Simd128_Add_D2 , ADDPD);
- SET_SIMDOPCODE(Simd128_Abs_D2 , ANDPD);
- SET_SIMDOPCODE(Simd128_Sub_D2 , SUBPD);
- SET_SIMDOPCODE(Simd128_Mul_D2 , MULPD);
- SET_SIMDOPCODE(Simd128_Div_D2 , DIVPD);
- SET_SIMDOPCODE(Simd128_Min_D2 , MINPD);
- SET_SIMDOPCODE(Simd128_Max_D2 , MAXPD);
- SET_SIMDOPCODE(Simd128_Sqrt_D2 , SQRTPD);
- SET_SIMDOPCODE(Simd128_Lt_D2 , CMPLTPD); // CMPLTPD
- SET_SIMDOPCODE(Simd128_LtEq_D2 , CMPLEPD); // CMPLEPD
- SET_SIMDOPCODE(Simd128_Eq_D2 , CMPEQPD); // CMPEQPD
- SET_SIMDOPCODE(Simd128_Neq_D2 , CMPNEQPD); // CMPNEQPD
- SET_SIMDOPCODE(Simd128_Gt_D2 , CMPLTPD); // CMPLTPD (swap srcs)
- SET_SIMDOPCODE(Simd128_GtEq_D2 , CMPLEPD); // CMPLEPD (swap srcs)
- #if 0
- SET_SIMDOPCODE(Simd128_FromFloat32x4_D2, CVTPS2PD);
- SET_SIMDOPCODE(Simd128_FromFloat32x4Bits_D2, MOVAPS);
- SET_SIMDOPCODE(Simd128_FromInt32x4_D2, CVTDQ2PD);
- SET_SIMDOPCODE(Simd128_FromInt32x4Bits_D2, MOVAPS);
- #endif // 0
- SET_SIMDOPCODE(Simd128_And_I8 , PAND);
- SET_SIMDOPCODE(Simd128_Or_I8 , POR);
- SET_SIMDOPCODE(Simd128_Xor_I8 , XORPS);
- SET_SIMDOPCODE(Simd128_Not_I8 , XORPS);
- SET_SIMDOPCODE(Simd128_Add_I8 , PADDW);
- SET_SIMDOPCODE(Simd128_Sub_I8 , PSUBW);
- SET_SIMDOPCODE(Simd128_Mul_I8 , PMULLW);
- SET_SIMDOPCODE(Simd128_Eq_I8 , PCMPEQW);
- SET_SIMDOPCODE(Simd128_Lt_I8 , PCMPGTW); // (swap srcs)
- SET_SIMDOPCODE(Simd128_Gt_I8 , PCMPGTW);
- SET_SIMDOPCODE(Simd128_AddSaturate_I8 , PADDSW);
- SET_SIMDOPCODE(Simd128_SubSaturate_I8 , PSUBSW);
- SET_SIMDOPCODE(Simd128_AddSaturate_I16 , PADDSB);
- SET_SIMDOPCODE(Simd128_SubSaturate_I16 , PSUBSB);
- SET_SIMDOPCODE(Simd128_And_U4 , PAND);
- SET_SIMDOPCODE(Simd128_Or_U4 , POR);
- SET_SIMDOPCODE(Simd128_Xor_U4 , XORPS);
- SET_SIMDOPCODE(Simd128_Not_U4 , XORPS);
- SET_SIMDOPCODE(Simd128_Add_U4 , PADDD);
- SET_SIMDOPCODE(Simd128_Sub_U4 , PSUBD);
- SET_SIMDOPCODE(Simd128_Eq_U4 , PCMPEQD); // same as int32x4.equal
- SET_SIMDOPCODE(Simd128_And_U8 , PAND);
- SET_SIMDOPCODE(Simd128_Or_U8 , POR);
- SET_SIMDOPCODE(Simd128_Xor_U8 , XORPS);
- SET_SIMDOPCODE(Simd128_Not_U8 , XORPS);
- SET_SIMDOPCODE(Simd128_Add_U8 , PADDW);
- SET_SIMDOPCODE(Simd128_Sub_U8 , PSUBW);
- SET_SIMDOPCODE(Simd128_Mul_U8 , PMULLW);
- SET_SIMDOPCODE(Simd128_Eq_U8 , PCMPEQW); // same as int16X8.equal
- SET_SIMDOPCODE(Simd128_AddSaturate_U8 , PADDUSW);
- SET_SIMDOPCODE(Simd128_SubSaturate_U8 , PSUBUSW);
- SET_SIMDOPCODE(Simd128_And_U16 , PAND);
- SET_SIMDOPCODE(Simd128_Or_U16 , POR);
- SET_SIMDOPCODE(Simd128_Xor_U16 , XORPS);
- SET_SIMDOPCODE(Simd128_Not_U16 , XORPS);
- SET_SIMDOPCODE(Simd128_Add_U16 , PADDB);
- SET_SIMDOPCODE(Simd128_Sub_U16 , PSUBB);
- SET_SIMDOPCODE(Simd128_Eq_U16 , PCMPEQB); // same as int8x16.equal
- SET_SIMDOPCODE(Simd128_AddSaturate_U16 , PADDUSB);
- SET_SIMDOPCODE(Simd128_SubSaturate_U16 , PSUBUSB);
- SET_SIMDOPCODE(Simd128_And_B4 , PAND);
- SET_SIMDOPCODE(Simd128_Or_B4 , POR);
- SET_SIMDOPCODE(Simd128_Xor_B4 , XORPS);
- SET_SIMDOPCODE(Simd128_Not_B4 , XORPS);
- SET_SIMDOPCODE(Simd128_And_B8 , PAND);
- SET_SIMDOPCODE(Simd128_Or_B8 , POR);
- SET_SIMDOPCODE(Simd128_Xor_B8 , XORPS);
- SET_SIMDOPCODE(Simd128_Not_B8 , XORPS);
- SET_SIMDOPCODE(Simd128_And_B16 , PAND);
- SET_SIMDOPCODE(Simd128_Or_B16 , POR);
- SET_SIMDOPCODE(Simd128_Xor_B16 , XORPS);
- SET_SIMDOPCODE(Simd128_Not_B16 , XORPS);
- SET_SIMDOPCODE(Simd128_Add_I2 , PADDQ);
- SET_SIMDOPCODE(Simd128_Sub_I2 , PSUBQ);
- }
- #undef SIMD_SETOPCODE
- #undef SIMD_GETOPCODE
- void LowererMD::CheckShuffleLanes_4(uint8 lanes[], uint8 lanesSrc[], uint *fromSrc1, uint *fromSrc2)
- {
- Assert(lanes);
- Assert(lanesSrc);
- Assert(fromSrc1 && fromSrc2);
- *fromSrc1 = 0;
- *fromSrc2 = 0;
- for (uint i = 0; i < 4; i++)
- {
- if (lanes[i] >= 0 && lanes[i] < 4)
- {
- (*fromSrc1)++;
- lanesSrc[i] = 1;
- }
- else if (lanes[i] >= 4 && lanes[i] < 8)
- {
- (*fromSrc2)++;
- lanesSrc[i] = 2;
- }
- else
- {
- Assert(UNREACHED);
- }
- }
- }
- void LowererMD::InsertShufps(uint8 lanes[], IR::Opnd *dst, IR::Opnd *src1, IR::Opnd *src2, IR::Instr *instr)
- {
- int8 shufMask;
- uint8 normLanes[4];
- IR::RegOpnd * tmp = IR::RegOpnd::New(TySimd128I4, m_func);
- for (uint i = 0; i < 4; i++)
- {
- normLanes[i] = (lanes[i] >= 4) ? (lanes[i] - 4) : lanes[i];
- }
- shufMask = (int8)((normLanes[3] << 6) | (normLanes[2] << 4) | (normLanes[1] << 2) | normLanes[0]);
- // ToDo: Move this to legalization code
- if (dst->IsEqual(src1))
- {
- // instruction already legal
- instr->InsertBefore(IR::Instr::New(Js::OpCode::SHUFPS, dst, src2, IR::IntConstOpnd::New((IntConstType)shufMask, TyInt8, m_func, true), m_func));
- }
- else if (dst->IsEqual(src2))
- {
- // MOVAPS tmp, dst
- instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVAPS, tmp, dst, m_func));
- // MOVAPS dst, src1
- instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVAPS, dst, src1, m_func));
- // SHUF dst, tmp, imm8
- instr->InsertBefore(IR::Instr::New(Js::OpCode::SHUFPS, dst, tmp, IR::IntConstOpnd::New((IntConstType)shufMask, TyInt8, m_func, true), m_func));
- }
- else
- {
- // MOVAPS dst, src1
- instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVAPS, dst, src1, m_func));
- // SHUF dst, src2, imm8
- instr->InsertBefore(IR::Instr::New(Js::OpCode::SHUFPS, dst, src2, IR::IntConstOpnd::New((IntConstType)shufMask, TyInt8, m_func, true), m_func));
- }
- }
- BYTE LowererMD::Simd128GetTypedArrBytesPerElem(ValueType arrType)
- {
- return (1 << Lowerer::GetArrayIndirScale(arrType));
- }
- #endif
|