ソースを参照

Replacing high latency i32 div and reminder operations with mul, shift and adds.

Looking for early feedback and planning to submit another PR extending this to mod, rem ops and
enabling the optim for wasm and js with tests.
Arun 8 年 前
コミット
29d150da60

+ 19 - 4
lib/Backend/Lower.cpp

@@ -24677,6 +24677,11 @@ Lowerer::LowerRemI4(IR::Instr * instr)
 {
     Assert(instr);
     Assert(instr->m_opcode == Js::OpCode::Rem_I4 || instr->m_opcode == Js::OpCode::RemU_I4);
+    //Generate fast path for const divisors
+    if (m_lowererMD.GenerateFastDiv(instr))
+    {
+        return;
+    }
 
     if (m_func->GetJITFunctionBody()->IsAsmJsMode())
     {
@@ -24793,21 +24798,31 @@ Lowerer::LowerDivI4(IR::Instr * instr)
     }
 #endif
 
+    Assert(instr->GetSrc2());
     if (m_func->GetJITFunctionBody()->IsWasmFunction())
     {
-        m_lowererMD.EmitInt4Instr(instr);
+        if (!m_lowererMD.GenerateFastDiv(instr))
+        {
+            m_lowererMD.EmitInt4Instr(instr);
+        }
         return;
     }
 
     if (m_func->GetJITFunctionBody()->IsAsmJsMode())
     {
-        LowerDivI4Common(instr);
+        if (!m_lowererMD.GenerateFastDiv(instr))
+        {
+            LowerDivI4Common(instr);
+        }
         return;
     }
 
     if(!instr->HasBailOutInfo())
     {
-        m_lowererMD.EmitInt4Instr(instr);
+        if (!m_lowererMD.GenerateFastDiv(instr))
+        {
+            m_lowererMD.EmitInt4Instr(instr);
+        }
         return;
     }
 
@@ -24836,9 +24851,9 @@ Lowerer::LowerDivI4(IR::Instr * instr)
         // before bailing out, but does not seem worth the extra code..)
         InsertCompareBranch(nominatorOpnd, IR::IntConstOpnd::New(INT32_MIN, TyInt32, this->m_func, true), Js::OpCode::BrEq_A, bailOutLabel, nonBailOutInstr);
     }
-
     if (denominatorOpnd->IsIntConstOpnd() && Math::IsPow2(denominatorOpnd->AsIntConstOpnd()->AsInt32()))
     {
+        //ToDo enable fast divs here with tests.
         Assert((bailOutKind & (IR::BailOutOnNegativeZero | IR::BailOutOnDivByZero)) == 0);
         int pow2 = denominatorOpnd->AsIntConstOpnd()->AsInt32();
         InsertTestBranch(nominatorOpnd, IR::IntConstOpnd::New(pow2 - 1, TyInt32, this->m_func, true),

+ 6 - 1
lib/Backend/LowerMDShared.cpp

@@ -1183,7 +1183,7 @@ void LowererMD::ChangeToShift(IR::Instr *const instr, const bool needFlags)
             __assume(false);
     }
 
-    if(instr->GetSrc2()->IsIntConstOpnd())
+    if(instr->GetSrc2()->IsIntConstOpnd() && !instr->GetSrc1()->IsInt64())
     {
         // Only values between 0-31 mean anything
         IntConstType value = instr->GetSrc2()->AsIntConstOpnd()->GetValue();
@@ -7618,6 +7618,11 @@ bool LowererMD::GenerateFastAnd(IR::Instr * instrAnd)
     return this->lowererMDArch.GenerateFastAnd(instrAnd);
 }
 
+bool LowererMD::GenerateFastDiv(IR::Instr* instrDiv)
+{
+    return this->lowererMDArch.GenerateFastDiv(instrDiv);
+}
+
 bool LowererMD::GenerateFastXor(IR::Instr * instrXor)
 {
     return this->lowererMDArch.GenerateFastXor(instrXor);

+ 2 - 1
lib/Backend/LowerMDShared.h

@@ -134,8 +134,9 @@ public:
             void            GenerateFastCmXxR8(IR::Instr *instr);
             void            GenerateFastCmXx(IR::Instr *instr);
             IR::Instr *     GenerateConvBool(IR::Instr *instr);
-            void            GenerateFastDivByPow2(IR::Instr *instr);
+            void            GenerateFastDivByPow2(IR::Instr *instrDiv);
             bool            GenerateFastAdd(IR::Instr * instrAdd);
+            bool            GenerateFastDiv(IR::Instr* instr);
 #if DBG
             static void     GenerateDebugBreak( IR::Instr * insertInstr );
 #endif

+ 102 - 0
lib/Backend/amd64/LowererMDArch.cpp

@@ -2888,6 +2888,108 @@ bool LowererMDArch::GenerateFastAnd(IR::Instr * instrAnd)
     return true;
 }
 
+bool LowererMDArch::GenerateFastDiv(IR::Instr * instrDiv)
+{
+    Assert(instrDiv);
+    IR::Opnd * divisor = instrDiv->GetSrc2(); // denominator
+
+    if (PHASE_OFF(Js::BitopsFastPathPhase, this->m_func) || !divisor->IsIntConstOpnd() ||
+        !(instrDiv->m_opcode == Js::OpCode::Div_I4 || instrDiv->m_opcode == Js::OpCode::Rem_I4)) //ToDo Optimize unsigned division
+    {
+        return false;
+    }
+
+    IR::Opnd* divident = instrDiv->GetSrc1(); // nominator
+    IR::Opnd* dst      = instrDiv->GetDst();
+    int constDivisor   = divisor->AsIntConstOpnd()->AsInt32();
+    bool isNegDevisor  = false;
+
+    if (constDivisor < 0)
+    {
+        isNegDevisor = true;
+        constDivisor = abs(constDivisor);
+    }
+
+    if (constDivisor < 2 || constDivisor > INT32_MAX - 1)
+    {
+        return false;
+    }
+
+    if (Math::IsPow2(constDivisor)) // Power of two
+    {
+        // Negative dividents needs the result incremented by 1
+        // Following sequence avoids branch
+        // For q = n/d and d = 2^k
+
+        //      sar q n k-1   //2^(k-1) if n < 0 else 0
+        //      shr q q 32-k
+        //      add q q n
+        //      sar q q k
+
+        int k = Math::Log2(constDivisor);
+        Lowerer::InsertShift(Js::OpCode::Shr_A, false, dst, divident, IR::IntConstOpnd::New(k - 1, TyInt8, this->m_func), instrDiv);
+        Lowerer::InsertShift(Js::OpCode::ShrU_A, false, dst, dst, IR::IntConstOpnd::New(32 - k, TyInt8, this->m_func), instrDiv);
+        Lowerer::InsertAdd(false, dst, dst, divident, instrDiv);
+        Lowerer::InsertShift(Js::OpCode::Shr_A, false, dst, dst, IR::IntConstOpnd::New(k, TyInt8, this->m_func), instrDiv);
+    }
+    else
+    {
+        // For q = n/d where d is a signed constant
+        // Calculate magic_number (multiplier) and shift amounts (shiftAmt) and replace  div with mul and shift
+        // Ref: Warren's Hacker's Delight, Chapter 10
+
+        Js::NumberUtilities::DivMagicNumber magic_number(Js::NumberUtilities::GenerateDivMagicNumber(constDivisor));
+        int32 multiplier = magic_number.multiplier;
+
+        // Compute mulhs divident, multiplier
+        IR::Opnd* quotient       = IR::RegOpnd::New(TyInt64, this->m_func);
+        IR::Opnd* divident64Reg  = IR::RegOpnd::New(TyInt64, this->m_func);
+
+        Lowerer::InsertMove(divident64Reg, divident, instrDiv);
+        IR::Instr* imul = IR::Instr::New(LowererMD::MDImulOpcode, quotient, IR::IntConstOpnd::New(multiplier, TyInt32, this->m_func), divident64Reg, this->m_func);
+        instrDiv->InsertBefore(imul);
+        LowererMD::Legalize(imul);
+
+        Lowerer::InsertShift(Js::OpCode::Shr_A, false, quotient, quotient, IR::IntConstOpnd::New(32, TyInt8, this->m_func), instrDiv);
+        Lowerer::InsertMove(dst, quotient, instrDiv);
+
+        // Special handling when divisor is of type 5 and 7.
+        if (multiplier < 0)
+        {
+            Lowerer::InsertAdd(false, dst, dst, divident, instrDiv);
+        }
+        if (magic_number.shiftAmt > 0)
+        {
+            Lowerer::InsertShift(Js::OpCode::Shr_A, false, dst, dst, IR::IntConstOpnd::New(magic_number.shiftAmt, TyInt8, this->m_func), instrDiv);
+        }
+        IR::Opnd* tmpReg2 = IR::RegOpnd::New(TyInt32, this->m_func);
+        Lowerer::InsertMove(tmpReg2, divident, instrDiv);
+
+        // Add 1 if divisor is less than 0
+        Lowerer::InsertShift(Js::OpCode::ShrU_A, false, tmpReg2, tmpReg2, IR::IntConstOpnd::New(31, TyInt8, this->m_func), instrDiv); // 1 if divident < 0, 0 otherwise
+        Lowerer::InsertAdd(false, dst, dst, tmpReg2, instrDiv);
+    }
+
+    // Negate results if divident is less than zero
+    if (isNegDevisor)
+    {
+        Lowerer::InsertSub(false, dst, IR::IntConstOpnd::New(0, TyInt8, this->m_func), dst, instrDiv);
+    }
+    //For Reminder ops
+    if (instrDiv->m_opcode == Js::OpCode::Rem_I4)
+    {
+        // for q = n/d
+        // mul dst, dst, divident
+        // sub dst, divident, dst
+        IR::Instr* imul = IR::Instr::New(LowererMD::MDImulOpcode, dst, dst, divisor, instrDiv->m_func);
+        instrDiv->InsertBefore(imul);
+        LowererMD::Legalize(imul);
+        Lowerer::InsertSub(false, dst, divident, dst, instrDiv);
+    }
+    instrDiv->Remove();
+    return true;
+}
+
 bool LowererMDArch::GenerateFastXor(IR::Instr * instrXor)
 {
     return true;

+ 2 - 1
lib/Backend/amd64/LowererMDArch.h

@@ -1,5 +1,5 @@
 //-------------------------------------------------------------------------------------------------------
-// Copyright (C) Microsoft. All rights reserved.
+// Copyright (C) Microsoft Corporation and contributors. All rights reserved.
 // Licensed under the MIT license. See LICENSE.txt file in the project root for full license information.
 //-------------------------------------------------------------------------------------------------------
 #pragma once
@@ -139,6 +139,7 @@ public:
     bool                GenerateFastNot(IR::Instr * instrNot);
     bool                GenerateFastShiftLeft(IR::Instr * instrShift);
     bool                GenerateFastShiftRight(IR::Instr * instrShift);
+    bool                GenerateFastDiv(IR::Instr * divInstr);
 
     IR::Opnd*           GenerateArgOutForStackArgs(IR::Instr* callInstr, IR::Instr* stackArgsInstr);
     void                GenerateFunctionObjectTest(IR::Instr * callInstr, IR::RegOpnd  *functionObjOpnd, bool isHelper, IR::LabelInstr* afterCallLabel = nullptr);

+ 6 - 1
lib/Backend/arm/LowerMD.cpp

@@ -1,5 +1,5 @@
 //-------------------------------------------------------------------------------------------------------
-// Copyright (C) Microsoft. All rights reserved.
+// Copyright (C) Microsoft Corporation and contributors. All rights reserved.
 // Licensed under the MIT license. See LICENSE.txt file in the project root for full license information.
 //-------------------------------------------------------------------------------------------------------
 
@@ -5801,6 +5801,11 @@ bool LowererMD::GenerateFastCharAt(Js::BuiltinFunction index, IR::Opnd *dst, IR:
     return true;
 }
 
+bool LowererMD::GenerateFastDiv(IR::Instr* instrDiv)
+{
+    return false;
+}
+
 void
 LowererMD::EmitInt4Instr(IR::Instr *instr)
 {

+ 1 - 0
lib/Backend/arm/LowerMD.h

@@ -111,6 +111,7 @@ public:
             bool            GenerateFastOr(IR::Instr * instrOr);
             bool            GenerateFastNot(IR::Instr * instrNot);
             bool            GenerateFastNeg(IR::Instr * instrNeg);
+            bool            GenerateFastDiv(IR::Instr* instrDiv);
             bool            GenerateFastShiftLeft(IR::Instr * instrShift);
             bool            GenerateFastShiftRight(IR::Instr * instrShift);
             void            GenerateFastBrS(IR::BranchInstr *brInstr);

+ 5 - 1
lib/Backend/arm64/LowerMD.cpp

@@ -2474,7 +2474,11 @@ LowererMD::LoadFunctionObjectOpnd(IR::Instr *instr, IR::Opnd *&functionObjOpnd)
 
     return instrPrev;
 }
-
+bool
+LowererMD::GenerateFastDiv(IR::Instr *instrDiv)
+{
+    return false;
+}
 void
 LowererMD::GenerateFastDivByPow2(IR::Instr *instrDiv)
 {

+ 1 - 0
lib/Backend/arm64/LowerMD.h

@@ -101,6 +101,7 @@ public:
             void            GeneratePopCnt(IR::Instr * instr) { Assert(UNREACHED); }
             void            GenerateTruncWithCheck(IR::Instr * instr) { Assert(UNREACHED); }
             void            GenerateFastDivByPow2(IR::Instr *instr);
+            bool            GenerateFastDiv(IR::Instr* instrDiv);
             bool            GenerateFastAdd(IR::Instr * instrAdd);
             bool            GenerateFastSub(IR::Instr * instrSub);
             bool            GenerateFastMul(IR::Instr * instrMul);

+ 5 - 0
lib/Backend/i386/LowererMDArch.cpp

@@ -3456,6 +3456,11 @@ bool
     return false;
 }
 
+bool
+    LowererMDArch::GenerateFastDiv(IR::Instr* instrDiv)
+{
+    return false;
+}
 
 ///----------------------------------------------------------------------------
 ///

+ 2 - 1
lib/Backend/i386/LowererMDArch.h

@@ -1,5 +1,5 @@
 //-------------------------------------------------------------------------------------------------------
-// Copyright (C) Microsoft. All rights reserved.
+// Copyright (C) Microsoft Corporation and contributors. All rights reserved.
 // Licensed under the MIT license. See LICENSE.txt file in the project root for full license information.
 //-------------------------------------------------------------------------------------------------------
 #pragma once
@@ -107,6 +107,7 @@ public:
             bool                GenerateFastNot(IR::Instr * instrNot);
             bool                GenerateFastShiftLeft(IR::Instr * instrShift);
             bool                GenerateFastShiftRight(IR::Instr * instrShift);
+            bool                GenerateFastDiv(IR::Instr* instrDiv);
 
             IR::LabelInstr *    GetBailOutStackRestoreLabel(BailOutInfo * bailOutInfo, IR::LabelInstr * exitTargetInstr);
             IR::Opnd*           GenerateArgOutForStackArgs(IR::Instr* callInstr, IR::Instr* stackArgsInstr);

+ 56 - 0
lib/Common/Common/NumberUtilities.h

@@ -115,6 +115,62 @@ namespace Js
             FormatPrecision
         };
 
+        // Calculates magic number (multiplier) and shift amounts (shiftAmt) to replace division by constants with multiplication and shifts
+        // Ref: Warren's Hacker's Delight, Chapter 10.
+        struct DivMagicNumber
+        {
+            DivMagicNumber(int multiplier, uint shiftAmt) : multiplier(multiplier), shiftAmt(shiftAmt) {}
+            int multiplier;
+            uint shiftAmt;
+        };
+
+        DivMagicNumber static GenerateDivMagicNumber(const int divisor)
+        {
+            Assert((1 < divisor && divisor < INT32_MAX - 1) || (-1 > divisor && divisor > INT32_MIN));
+            int p;
+            unsigned ad, anc, delta, q1, r1, q2, r2, t;
+            const unsigned two31 = static_cast<unsigned>(1) << (sizeof(int) * 8 - 1); //2^31
+
+            ad = (divisor < 0) ? (0 - divisor) : divisor;
+            t = two31 + ((unsigned)divisor >> 31);
+
+            anc = t - 1 - t % ad;  //abs(nc)
+            p = 31;              //init p
+            q1 = two31 / anc;     //init q1 = 2^p/|nc|
+            r1 = two31 - q1 * anc;//init r1 = rem(2^p, |nc|)
+            q2 = two31 / ad;      //init q2 = 2^p / |d|
+            r2 = two31 - q2 * ad; //init r2 = rem(2^p, |d|)
+
+            do
+            {
+                p = p + 1;
+                q1 = 2 * q1;       //update q1 = 2 ^p / |nc|
+                r1 = 2 * r1;       //update r1 = rem(2^p, |nc|)
+
+                if (r1 >= anc)     //Must be an unsigned comparison here. 
+                {
+                    q1 = q1 + 1;
+                    r1 = r1 - anc;
+                }
+
+                q2 = 2 * q2;       //update q2 = 2^p / |d|
+                r2 = 2 * r2;       //update r2 = rem(2^p, |d|)
+
+                if (r2 >= ad)      //Must be an unsigned comparison here.
+                {
+                    q2 = q2 + 1;
+                    r2 = r2 - ad;
+                }
+                delta = ad - r2;
+            } while (q1 < delta || (q1 == delta && r1 == 0));
+            int magic_num = q2 + 1;
+            if (divisor < 0)
+            {
+                magic_num = -magic_num;
+            }
+            return DivMagicNumber(magic_num, p - 32); //(Magic number, shift amount)
+        }
+
         // Implemented in lib\parser\common.  Should move to lib\common
         template<typename EncodedChar>
         static double StrToDbl(const EncodedChar *psz, const EncodedChar **ppchLim, bool& likelyInt);

+ 2 - 0
test/AsmJs/divByConstants.baseline

@@ -0,0 +1,2 @@
+Successfully compiled asm.js code
+PASSED

+ 180 - 0
test/AsmJs/divByConstants.js

@@ -0,0 +1,180 @@
+//-------------------------------------------------------------------------------------------------------
+// Copyright (C) Microsoft Corporation and contributors. All rights reserved.
+// Licensed under the MIT license. See LICENSE.txt file in the project root for full license information.
+//-------------------------------------------------------------------------------------------------------
+var CreateBaseline = false; // Set True to generate Baseline data. Copy the numbers from console to initialize the results variable below.
+var debug = false; // Set True to print debug messages.
+var debugTestNum = -1; // Set test num to run a specific test. -1 otherwise
+
+var test_values = [-5, 5, 124, 248, 654, 987, -1026, 98768, -88754, 1<<32, -(1<<32),  (1<<32)-1, 1<<31, -(1<<31), 1<<25, -1<<25, 65536, 46341];
+
+var results = [-5,5,124,248,654,987,-1026,98768,-88754,1,-1,0,-2147483648,-2147483648,33554432,-33554432,65536,46341,5,-5,-124,-248,-654,-987,1026,-98768,88754,-1,1,0,-2147483648,
+-2147483648,-33554432,33554432,-65536,-46341,-1,1,41,82,218,329,-342,32922,-29584,0,0,0,-715827882,-715827882,11184810,-11184810,21845,15447,-1,1,24,49,130,197,-205,19753,
+-17750,0,0,0,-429496729,-429496729,6710886,-6710886,13107,9268,0,0,17,35,93,141,-146,14109,-12679,0,0,0,-306783378,-306783378,4793490,-4793490,9362,6620,1,-1,-41,-82,-218,
+-329,342,-32922,29584,0,0,0,715827882,715827882,-11184810,11184810,-21845,-15447,1,-1,-24,-49,-130,-197,205,-19753,17750,0,0,0,429496729,429496729,-6710886,6710886,
+-13107,-9268,0,0,-17,-35,-93,-141,146,-14109,12679,0,0,0,306783378,306783378,-4793490,4793490,-9362,-6620,0,0,15,31,81,123,-128,12346,-11094,0,0,0,-268435456,-268435456,
+4194304,-4194304,8192,5792,0,0,13,27,72,109,-114,10974,-9861,0,0,0,-238609294,-238609294,3728270,-3728270,7281,5149,0,0,9,19,50,75,-78,7597,-6827,0,0,0,-165191049,-165191049,
+2581110,-2581110,5041,3564,0,0,3,7,19,29,-31,2992,-2689,0,0,0,-65075262,-65075262,1016800,-1016800,1985,1404,0,0,2,4,10,16,-17,1646,-1479,0,0,0,-35791394,-35791394,
+559240,-559240,1092,772,0,0,1,2,6,9,-10,987,-887,0,0,0,-21474836,-21474836,335544,-335544,655,463,0,0,1,2,5,8,-8,823,-739,0,0,0,-17895697,-17895697,279620,-279620,546,386,0,
+0,0,0,0,0,-1,98,-88,0,0,0,-2145338,-2145338,33520,-33520,65,46,0,0,0,0,0,0,0,9,-8,0,0,0,-214769,-214769,3355,-3355,6,4,0,0,0,0,0,0,0,0,0,0,0,0,-21475,-21475,335,-335,0,0,
+-5,5,124,248,654,987,-1026,98768,-88754,1,-1,0,-2147483648,-2147483648,33554432,-33554432,65536,46341,5,-5,-124,-248,-654,-987,1026,-98768,88754,-1,1,0,-2147483648,
+-2147483648,-33554432,33554432,-65536,-46341,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,-5,5,124,248,654,987,-1026,98768,-88754,1,-1,0,-2147483648,-2147483648,33554432,-33554432,
+65536,46341,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,-2,2,1,2,0,0,0,2,-2,1,-1,0,-2,-2,2,-2,1,0,-2,2,1,2,0,0,0,2,-2,1,-1,0,-2,-2,2,-2,1,0,0,0,4,3,4,2,-1,3,-4,1,-1,0,-3,-3,2,-2,1,
+1,-5,5,5,3,3,0,-4,5,-1,1,-1,0,-2,-2,2,-2,2,1,-5,5,5,3,3,0,-4,5,-1,1,-1,0,-2,-2,2,-2,2,1,-5,5,7,1,4,12,-12,7,-3,1,-1,0,-11,-11,2,-2,3,9,-5,5,19,3,24,7,-11,33,-29,1,-1,0,
+-23,-23,2,-2,16,1,-5,5,4,8,54,27,-66,8,-74,1,-1,0,-8,-8,32,-32,16,21,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-5,5,124,248,654,987,-1026,98768,-88754,1,-1,0,0,0,33554432,
+-33554432,65536,46341,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-5,5,124,248,654,987,-1026,98768,-88754,1,-1,0,0,0,33554432,-33554432,65536,46341]
+
+function asmModule() {
+    "use asm";
+
+    function div1(x)  { x = x | 0; return ((x | 0) /  1) | 0; }
+    function divn1(x) { x = x | 0; return ((x | 0) / -1) | 0; }
+    
+    function div3(x)  { x = x | 0; return ((x | 0) /  3) | 0; } // Key Scenario 1
+    function div5(x)  { x = x | 0; return ((x | 0) /  5) | 0; } // Key Scenario 2
+    function div7(x)  { x = x | 0; return ((x | 0) /  7) | 0; } // Key Scenario 3
+    function divn3(x) { x = x | 0; return ((x | 0) / -3) | 0; }
+    function divn5(x) { x = x | 0; return ((x | 0) / -5) | 0; }
+    function divn7(x) { x = x | 0; return ((x | 0) / -7) | 0; }
+    function div8(x)  { x = x | 0; return ((x | 0) /  8) | 0; }
+    function div9(x)  { x = x | 0; return ((x | 0) /  9) | 0; }
+
+    function div13(x) { x = x | 0; return ((x | 0) / 13) | 0; }
+    function div33(x) { x = x | 0; return ((x | 0) / 33) | 0; }
+    function div60(x) { x = x | 0; return ((x | 0) / 60) | 0; }
+
+    function div100(x) { x = x | 0; return ((x | 0) / 100) | 0; }
+    function div120(x) { x = x | 0; return ((x | 0) / 120) | 0; }
+
+    function div1001(x) { x = x | 0; return ((x | 0) / 1001) | 0; }
+    function div9999(x) { x = x | 0; return ((x | 0) / 9999) | 0; }
+    function div99999(x){ x = x | 0; return ((x | 0) / 99999) | 0; }
+    
+
+    function divMax0(x) { x = x | 0; return ((x | 0) / (1<<32)) | 0; }
+    function divMax1(x) { x = x | 0; return ((x | 0) / ((1<<32)|0-1|0)) | 0; }
+    function divMax2(x) { x = x | 0; return ((x | 0) / (1<<31)) | 0; }
+    function divMin0(x) { x = x | 0; return ((x | 0) / (1<<32)|0 * -1|0)| 0; }
+    function divMin1(x) { x = x | 0; return ((x | 0) / (1<<31)|0 * -1|0)| 0; }
+
+    function rem3(x) { x = x|0; return ((x|0) % 3)|0; }
+    function remn3(x) { x = x|0; return ((x|0) % -3)|0; }
+    function rem5(x) { x = x|0; return ((x|0) % 5)|0; }
+    function remn5(x) { x = x|0; return ((x|0) % -5)|0; }
+    function rem7(x) { x = x|0; return ((x|0) % 7)|0; }
+    function remn7(x) { x = x|0; return ((x|0) % -7)|0; }
+
+    function rem15(x) { x = x|0; return ((x|0) % 13)|0; }
+    function rem35(x) { x = x|0; return ((x|0) % 35)|0; }
+    function rem120(x) { x = x|0; return ((x|0) % 120)|0; }
+    function remMax0(x) { x = x|0; return ((x|0) % (1<<32)|0)|0; }
+    function remMax1(x) { x = x|0; return ((x|0) % (1<<31)|0)|0; }
+    function remMin0(x) { x = x|0; return ((x|0) % ((1<<32)|0 * -1|0))|0; }
+    function remMin1(x) { x = x|0; return ((x|0) % ((1<<31)|0 * -1|0))|0; }
+
+    return {
+        div1    : div1,
+        divn1   : divn1,
+        div3    : div3,
+        div5    : div5,
+        div7    : div7,
+        divn3   : divn3,
+        divn5   : divn5,
+        divn7   : divn7,
+        div8    : div8,
+        div9    : div9,
+        div13   : div13,
+        div33   : div33,
+        div60   : div60,
+        div100  : div100,
+        div120  : div120,
+        div1001 : div1001,
+        div9999 : div9999,
+        div99999: div99999,
+        divMax0 : divMax0,
+        divMax1 : divMax1,
+        divMax2 : divMax2,
+        divMin0 : divMin0,
+        divMin1 : divMin1,
+        rem3    : rem3,
+        remn3   : remn3,
+        rem5    : rem5,
+        remn5   : remn5,
+        rem7    : rem7,
+        remn7   : remn7,
+        rem15   : rem15,
+        rem35   : rem35,
+        rem120  : rem120,
+        remMax0 : remMax0,
+        remMax1 : remMax1,
+        remMin0 : remMin0,
+        remMin1 : remMin1
+    };
+}
+var am = asmModule();     // produces AOT-compiled version
+var fns = [am.div1, am.divn1, am.div3, am.div5, am.div7, am.divn3, am.divn5, am.divn7, am.div8, am.div9, am.div13, am.div33, am.div60,
+     am.div100, am.div120, am.div1001, am.div9999, am.div99999, am.divMax0, am.divMax1, am.divMax2, am.divMin0, am.divMin1,
+     am.rem3, am.remn3, am.rem5, am.rem7, am.remn7, am.rem15, am.rem35, am.rem120, am.remMax0, am.remMax1, am.remMin0, am.remMin1];
+
+/*****Generate Baseline*********/
+function GenerateBaseline() {
+    var tmp = [];
+    fns.forEach(function (fn) {
+        test_values.forEach(function (value) {
+            tmp.push(fn(value));
+        }, this);
+    }, this);
+    print("[" + tmp + "]");
+}
+
+/******End Baseline gen */
+
+/*Math test for int div strength reduction*/
+var test_result = "PASSED";
+var total=0,fail=0;
+function testSignedDivStrengthReduction() {
+    var i = 0;
+    total = 0;
+    fail  = 0;
+    fns.forEach(function (fn) {
+        test_values.forEach(function (value) {
+            if(debug && debugTestNum == -1)
+            {
+                print("Test# "+ i + " " + fn.name + "(" + value + ") :\t\tExpected " + results[i] + "\t Found " + fn(value));
+            }
+            else if(debug && i == debugTestNum)
+            {
+                print("Test# "+ i + " " + fn.name + "(" + value + ") :\tExpected " + results[i] + "\t Found " + fn(value));
+            }
+            else if (results[i] != fn(value)) {
+                print();
+                print("TestFail at Test# "+ i + " " + fn.name + "(" + value + ") :\tExpected " + results[i] + "\tFound " + fn(value));
+                test_result = "Fail";
+                ++fail;
+            }
+            ++i;
+            ++total;
+        }, this);
+    }, this);
+}
+
+if( CreateBaseline )
+{
+    GenerateBaseline();
+}
+else
+{
+
+    // var a = new Date().getTime();
+    // for (var i = 0; i < 1; ++i)
+        testSignedDivStrengthReduction();
+    // var b = new Date().getTime() - a;
+    // print("ElapsedTime = " + b);
+
+    if (fail != 0)
+    {
+        print(fail + "/" + total + " tests Failed.");
+    }
+    // print(total + "/" + total + " tests Passed.")
+print(test_result);
+}

+ 7 - 0
test/AsmJs/rlexe.xml

@@ -1050,4 +1050,11 @@
       <compile-flags>-testtrace:asmjs</compile-flags>
     </default>
   </test>
+  <test>
+    <default>
+      <files>divByConstants.js</files>
+      <baseline>divByConstants.baseline</baseline>
+      <compile-flags>-testtrace:asmjs -maic:1</compile-flags>
+    </default>
+  </test>
 </regress-exe>

+ 198 - 0
test/wasm/divByConstants.js

@@ -0,0 +1,198 @@
+//-------------------------------------------------------------------------------------------------------
+// Copyright (C) Microsoft Corporation and contributors. All rights reserved.
+// Licensed under the MIT license. See LICENSE.txt file in the project root for full license information.
+//-------------------------------------------------------------------------------------------------------
+var {fixupI64Return} = WScript.LoadScriptFile("./wasmutils.js");
+WScript.Flag("-wasmI64");
+
+var CreateBaseline = false; // Set True to generate Baseline data. Initialize results array with console output.
+var debug = false; // True for printing debug messages
+var debugTestNum = -1; // Set test number to run a specific test. Set to default -1 otherwise.
+
+var test_values = [-5, 5, 124, 248, 654, 987, -1026, 98768, -88754, 1<<32, -(1<<32),  (1<<32)-1, (1<<31)-1, -(1<<31)+1, 1<<25, -1<<25, 65536, 46341];
+
+var results = 
+    [-5,5,124,248,654,987,-1026,98768,-88754,1,-1,0,2147483647,-2147483647,33554432,-33554432,65536,46341,5,-5,-124,-248,-654,-987,1026,-98768,88754,-1,1,0,-2147483647,2147483647,
+    -33554432,33554432,-65536,-46341,-1,1,41,82,218,329,-342,32922,-29584,0,0,0,715827882,-715827882,11184810,-11184810,21845,15447,-1,1,24,49,130,197,-205,19753,-17750,
+    0,0,0,429496729,-429496729,6710886,-6710886,13107,9268,0,0,17,35,93,141,-146,14109,-12679,0,0,0,306783378,-306783378,4793490,-4793490,9362,6620,1,-1,-41,-82,-218,-329,
+    342,-32922,29584,0,0,0,-715827882,715827882,-11184810,11184810,-21845,-15447,1,-1,-24,-49,-130,-197,205,-19753,17750,0,0,0,-429496729,429496729,-6710886,6710886,-13107,-9268,
+    0,0,-17,-35,-93,-141,146,-14109,12679,0,0,0,-306783378,306783378,-4793490,4793490,-9362,-6620,0,0,15,31,81,123,-128,12346,-11094,0,0,0,268435455,-268435455,4194304,
+    -4194304,8192,5792,0,0,13,27,72,109,-114,10974,-9861,0,0,0,238609294,-238609294,3728270,-3728270,7281,5149,0,0,9,19,50,75,-78,7597,-6827,0,0,0,165191049,-165191049,2581110,
+    -2581110,5041,3564,0,0,3,7,19,29,-31,2992,-2689,0,0,0,65075262,-65075262,1016800,-1016800,1985,1404,0,0,2,4,10,16,-17,1646,-1479,0,0,0,35791394,-35791394,559240,-559240,1092,
+    772,0,0,1,2,6,9,-10,987,-887,0,0,0,21474836,-21474836,335544,-335544,655,463,0,0,1,2,5,8,-8,823,-739,0,0,0,17895697,-17895697,279620,-279620,546,386,0,0,0,0,0,0,
+    -1,98,-88,0,0,0,2145338,-2145338,33520,-33520,65,46,0,0,0,0,0,0,0,9,-8,0,0,0,214769,-214769,3355,-3355,6,4,0,0,0,0,0,0,0,0,0,0,0,0,21475,-21475,335,-335,0,0,5,-5,-124,
+    -248,-654,-987,1026,-98768,88754,-1,1,0,-2147483647,2147483647,-33554432,33554432,-65536,-46341,5,-5,-124,-248,-654,-987,1026,-98768,88754,-1,1,0,-2147483647,2147483647,
+    -33554432,33554432,-65536,-46341,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-5,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-1,1,0,0,0,0,-2,2,1,2,0,0,0,2,-2,1,-1,0,1
+    ,-1,2,-2,1,0,0,0,4,3,4,2,-1,3,-4,1,-1,0,2,-2,2,-2,1,1,-5,5,5,3,3,0,-4,5,-1,1,-1,0,1,-1,2,-2,2,1,-2,2,1,2,0,0,0,2,-2,1,-1,0,1,-1,2,-2,1,0,0,0,4,3,4,2,-1,3,-4,1,-1,0,2,-2,
+    2,-2,1,1,-5,5,5,3,3,0,-4,5,-1,1,-1,0,1,-1,2,-2,2,1,-5,5,12,10,10,7,-4,12,-8,1,-1,0,1,-1,2,-2,2,1,-5,5,13,26,25,25,-27,15,-28,1,-1,0,21,-21,20,-20,9,17,-5,5,4,8,54,27,-66,
+    8,-74,1,-1,0,7,-7,32,-32,16,21,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-5,5,124,248,654,987,-1026,98768,-88754,1,-1,0,2147483647,-2147483647,33554432,-33554432,65536,46341,
+    -5,5,124,248,654,987,-1026,98768,-88754,1,-1,0,2,-2,33554432,-33554432,65536,46341,-5,5,124,248,654,987,-1026,98768,-88754,1,-1,0,0,0,33554432,-33554432,65536,46341];
+
+let passed = true;
+function check(expected, funName, ...args)
+{
+  let fun = eval(funName);
+  var result;
+  try {
+     result = fun(...args);
+  } catch (e) {
+    result = e.message;
+  }
+
+  if(result != expected) {
+    passed = false;
+    print(`${funName}(${[...args]}) \t produced ${result} \texpected ${expected}`);
+  }
+}
+
+function GenerateBaseline(funName, ...args)
+{
+  let fun = eval(funName);
+  var result;
+  try {
+     result = fun(...args);
+  } catch (e) {
+    result = e.message;
+  }
+return result;
+}
+
+const wasmModuleText = `(module
+  (func (export "i32_div_1")  (param $x i32) (result i32)  (i32.div_s (get_local $x) (i32.const 1)) )
+  (func (export "i32_div_n1") (param $x i32) (result i32)  (i32.div_s (get_local $x) (i32.const -1)) )
+
+  (func (export "i32_div_3")  (param $x i32) (result i32)  (i32.div_s (get_local $x) (i32.const 3)) )
+  (func (export "i32_div_5")  (param $x i32) (result i32)  (i32.div_s (get_local $x) (i32.const 5)) )
+  (func (export "i32_div_7")  (param $x i32) (result i32)  (i32.div_s (get_local $x) (i32.const 7)) )
+  (func (export "i32_div_n3") (param $x i32) (result i32)  (i32.div_s (get_local $x) (i32.const -3)) )
+  (func (export "i32_div_n5") (param $x i32) (result i32)  (i32.div_s (get_local $x) (i32.const -5)) )
+  (func (export "i32_div_n7") (param $x i32) (result i32)  (i32.div_s (get_local $x) (i32.const -7)) )
+  (func (export "i32_div_8")  (param $x i32) (result i32)  (i32.div_s (get_local $x) (i32.const 8)) )
+  (func (export "i32_div_9")  (param $x i32) (result i32)  (i32.div_s (get_local $x) (i32.const 9)) )
+
+  (func (export "i32_div_13") (param $x i32) (result i32)  (i32.div_s (get_local $x) (i32.const 13)) )
+  (func (export "i32_div_33") (param $x i32) (result i32)  (i32.div_s (get_local $x) (i32.const 33)) )
+  (func (export "i32_div_60") (param $x i32) (result i32)  (i32.div_s (get_local $x) (i32.const 60)) )
+  
+  (func (export "i32_div_100")  (param $x i32) (result i32)  (i32.div_s (get_local $x) (i32.const 100)) )
+  (func (export "i32_div_120")  (param $x i32) (result i32)  (i32.div_s (get_local $x) (i32.const 120)) )
+  
+  (func (export "i32_div_1001") (param $x i32) (result i32)  (i32.div_s (get_local $x) (i32.const 1001)) )
+  (func (export "i32_div_9999") (param $x i32) (result i32)  (i32.div_s (get_local $x) (i32.const 9999)) )
+  (func (export "i32_div_99999") (param $x i32) (result i32)  (i32.div_s (get_local $x) (i32.const 99999)) )
+
+  (func (export "i32_div_max0") (param $x i32) (result i32)  (i32.div_s (get_local $x) (i32.const 4294967295)) )
+  (func (export "i32_div_max1") (param $x i32) (result i32)  (i32.div_s (get_local $x) (i32.const 4294967295)) )
+  (func (export "i32_div_max2") (param $x i32) (result i32)  (i32.div_s (get_local $x) (i32.const 2147483648)) )
+  (func (export "i32_div_min0") (param $x i32) (result i32)  (i32.div_s (get_local $x) (i32.const -429496729)) )
+  (func (export "i32_div_min1") (param $x i32) (result i32)  (i32.div_s (get_local $x) (i32.const -2147483647)) )
+
+  (func (export "i32_rem_3")  (param $x i32) (result i32)  (i32.rem_s (get_local $x) (i32.const 3)) )
+  (func (export "i32_rem_5")  (param $x i32) (result i32)  (i32.rem_s (get_local $x) (i32.const 5)) )
+  (func (export "i32_rem_7")  (param $x i32) (result i32)  (i32.rem_s (get_local $x) (i32.const 7)) )
+  (func (export "i32_rem_n3") (param $x i32) (result i32)  (i32.rem_s (get_local $x) (i32.const -3)) )
+  (func (export "i32_rem_n5") (param $x i32) (result i32)  (i32.rem_s (get_local $x) (i32.const -5)) )
+  (func (export "i32_rem_n7") (param $x i32) (result i32)  (i32.rem_s (get_local $x) (i32.const -7)) )
+
+  (func (export "i32_rem_14") (param $x i32) (result i32)  (i32.rem_s (get_local $x) (i32.const 14)) )
+  (func (export "i32_rem_37") (param $x i32) (result i32)  (i32.rem_s (get_local $x) (i32.const 37)) )
+  (func (export "i32_rem_120") (param $x i32) (result i32)  (i32.rem_s (get_local $x) (i32.const 120)) )
+  (func (export "i32_rem_Max0") (param $x i32) (result i32)  (i32.rem_s (get_local $x) (i32.const 4294967295)) )
+  (func (export "i32_rem_Max1") (param $x i32) (result i32)  (i32.rem_s (get_local $x) (i32.const 2147483648)) )
+  (func (export "i32_rem_Min0") (param $x i32) (result i32)  (i32.rem_s (get_local $x) (i32.const -429496729)) )
+  (func (export "i32_rem_Min1") (param $x i32) (result i32)  (i32.rem_s (get_local $x) (i32.const -2147483647)) )
+)`;
+
+const mod = new WebAssembly.Module(WebAssembly.wabt.convertWast2Wasm(wasmModuleText));
+const {exports} = new WebAssembly.Instance(mod);
+
+var fns = [exports.i32_div_1, 
+           exports.i32_div_n1,
+           exports.i32_div_3,
+           exports.i32_div_5,
+           exports.i32_div_7,
+           exports.i32_div_n3,
+           exports.i32_div_n5,
+           exports.i32_div_n7,
+           exports.i32_div_8,
+           exports.i32_div_9,
+           exports.i32_div_13,
+           exports.i32_div_33,
+           exports.i32_div_60,
+           exports.i32_div_100,
+           exports.i32_div_120,
+           exports.i32_div_1001,
+           exports.i32_div_9999,
+           exports.i32_div_99999,
+           exports.i32_div_max0,
+           exports.i32_div_max1,
+           exports.i32_div_max2,
+           exports.i32_div_min0,
+           exports.i32_div_min1,
+           exports.i32_rem_3,
+           exports.i32_rem_5,
+           exports.i32_rem_7,
+           exports.i32_rem_n3,
+           exports.i32_rem_n5,
+           exports.i32_rem_n7,
+           exports.i32_rem_14,
+           exports.i32_rem_37,
+           exports.i32_rem_120,
+           exports.i32_rem_Max0,
+           exports.i32_rem_Max1,
+           exports.i32_rem_Min0,
+           exports.i32_rem_Min1];
+
+
+/*Math test for int div strength reduction*/
+
+function testSignedDivStrengthReduction() {
+  var i = 0;
+  fns.forEach(function (fn) {
+      test_values.forEach(function (value) {
+        if(debug && debugTestNum == -1)
+        {
+          print("Test# "+ i + " " + fn + " ("+ value + ") \t Expected:" + results[i] + "\t Found:" + GenerateBaseline(fn, value));
+        }
+        else if(debug && debugTestNum == i)
+        {
+          print("Test# "+ i + " " + fn + " ("+ value + ") \t Expected:" + results[i] + "\t Found:" + GenerateBaseline(fn, value));
+        }
+        else
+        {
+          check(results[i], fn, value);
+        }
+        ++i;
+      }, this);
+  }, this);
+}
+
+if( CreateBaseline )
+{
+  var tmp = [];
+  var i = 0;
+    fns.forEach(function (fn) {
+      test_values.forEach(function (value) {
+        if(debug)
+        {
+          print("Test #"+i++ + " " + fn + "\t(" + value + ")\t Result: "+ GenerateBaseline(fn, value));
+        }
+        tmp.push(GenerateBaseline(fn, value));
+      }, this);
+  }, this);
+  print("[" + tmp + "]");
+}
+else
+{
+
+  // var a = new Date().getTime();
+  // for (var i = 0; i < 1; ++i)
+      testSignedDivStrengthReduction();
+  // var b = new Date().getTime() - a;
+  // print("ElapsedTime = " + b);
+
+}
+
+if(passed) {
+  print("Passed");
+}

+ 6 - 0
test/wasm/rlexe.xml

@@ -89,6 +89,12 @@
     <compile-flags>-wasm  </compile-flags>
   </default>
 </test>
+<test>
+  <default>
+    <files>divByConstants.js</files>
+    <compile-flags>-wasm  </compile-flags>
+  </default>
+</test>
 <test>
   <default>
     <files>global.js</files>