SimdUint32x4OperationX86X64.cpp 7.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171
  1. //-------------------------------------------------------------------------------------------------------
  2. // Copyright (C) Microsoft Corporation and contributors. All rights reserved.
  3. // Licensed under the MIT license. See LICENSE.txt file in the project root for full license information.
  4. //-------------------------------------------------------------------------------------------------------
  5. #include "RuntimeLanguagePch.h"
  6. #if _M_IX86 || _M_AMD64
  7. namespace Js
  8. {
  9. SIMDValue SIMDUint32x4Operation::OpUint32x4(unsigned int x, unsigned int y, unsigned int z, unsigned int w)
  10. {
  11. X86SIMDValue x86Result;
  12. x86Result.m128i_value = _mm_set_epi32(w, z, y, x);
  13. return X86SIMDValue::ToSIMDValue(x86Result);
  14. }
  15. SIMDValue SIMDUint32x4Operation::OpSplat(unsigned int x)
  16. {
  17. X86SIMDValue x86Result;
  18. // set 4 signed 32-bit integers values to input value x
  19. x86Result.m128i_value = _mm_set1_epi32(x);
  20. return X86SIMDValue::ToSIMDValue(x86Result);
  21. }
  22. SIMDValue SIMDUint32x4Operation::OpShiftRightByScalar(const SIMDValue& value, int count)
  23. {
  24. X86SIMDValue x86Result;
  25. X86SIMDValue tmpValue = X86SIMDValue::ToX86SIMDValue(value);
  26. // Shifts the 4 signed 32-bit integers right by count bits while shifting in zeros
  27. x86Result.m128i_value = _mm_srli_epi32(tmpValue.m128i_value, count & SIMDUtils::SIMDGetShiftAmountMask(4));
  28. return X86SIMDValue::ToSIMDValue(x86Result);
  29. }
  30. SIMDValue SIMDUint32x4Operation::OpFromFloat32x4(const SIMDValue& value, bool& throws)
  31. {
  32. X86SIMDValue x86Result = { 0 };
  33. X86SIMDValue v = X86SIMDValue::ToX86SIMDValue(value);
  34. X86SIMDValue temp, temp2;
  35. X86SIMDValue two_31_f4, two_31_i4;
  36. int mask = 0;
  37. // any lanes <= -1.0 ?
  38. temp.m128_value = _mm_cmple_ps(v.m128_value, X86_ALL_NEG_ONES_F4.m128_value);
  39. mask = _mm_movemask_ps(temp.m128_value);
  40. // negative value are out of range, caller should throw Range Error
  41. if (mask)
  42. {
  43. throws = true;
  44. return X86SIMDValue::ToSIMDValue(x86Result);
  45. }
  46. // CVTTPS2DQ does a range check over signed range [-2^31, 2^31-1], so will fail to convert values >= 2^31.
  47. // To fix this, subtract 2^31 from values >= 2^31, do CVTTPS2DQ, then add 2^31 back.
  48. _mm_store_ps(two_31_f4.f32, X86_TWO_31_F4.m128_value);
  49. // any lanes >= 2^31 ?
  50. temp.m128_value = _mm_cmpge_ps(v.m128_value, two_31_f4.m128_value);
  51. // two_31_f4 has f32(2^31) for lanes >= 2^31, 0 otherwise
  52. two_31_f4.m128_value = _mm_and_ps(two_31_f4.m128_value, temp.m128_value);
  53. // subtract 2^31 from lanes >= 2^31, unchanged otherwise.
  54. v.m128_value = _mm_sub_ps(v.m128_value, two_31_f4.m128_value);
  55. // CVTTPS2DQ
  56. x86Result.m128i_value = _mm_cvttps_epi32(v.m128_value);
  57. // check if any value is out of range (i.e. >= 2^31, meaning originally >= 2^32 before value adjustment)
  58. temp2.m128i_value = _mm_cmpeq_epi32(x86Result.m128i_value, X86_NEG_MASK_F4.m128i_value); // any value == 0x80000000 ?
  59. mask = _mm_movemask_ps(temp2.m128_value);
  60. if (mask)
  61. {
  62. throws = true;
  63. return X86SIMDValue::ToSIMDValue(x86Result);
  64. }
  65. // we pass range check
  66. // add 2^31 values back to adjusted values.
  67. // Use first bit from the 2^31 float mask (0x4f000...0 << 1)
  68. // and result with 2^31 int mask (0x8000..0) setting first bit to zero if lane hasn't been adjusted
  69. _mm_store_ps(two_31_i4.f32, X86_TWO_31_I4.m128_value);
  70. two_31_f4.m128i_value = _mm_slli_epi32(two_31_f4.m128i_value, 1);
  71. two_31_i4.m128i_value = _mm_and_si128(two_31_i4.m128i_value, two_31_f4.m128i_value);
  72. // add 2^31 back to adjusted values
  73. // Note at this point all values are in [0, 2^31-1]. Adding 2^31 is guaranteed not to overflow.
  74. x86Result.m128i_value = _mm_add_epi32(x86Result.m128i_value, two_31_i4.m128i_value);
  75. return X86SIMDValue::ToSIMDValue(x86Result);
  76. }
  77. // Unary Ops
  78. SIMDValue SIMDUint32x4Operation::OpMin(const SIMDValue& aValue, const SIMDValue& bValue)
  79. {
  80. // _mm_min_epu32 is SSE4.1
  81. //X86SIMDValue x86Result;
  82. //X86SIMDValue tmpaValue = X86SIMDValue::ToX86SIMDValue(aValue);
  83. //X86SIMDValue tmpbValue = X86SIMDValue::ToX86SIMDValue(bValue);
  84. //x86Result.m128i_value = _mm_min_epu32(tmpaValue.m128i_value, tmpbValue.m128i_value);
  85. SIMDValue selector = SIMDUint32x4Operation::OpLessThan(aValue, bValue);
  86. return SIMDInt32x4Operation::OpSelect(selector, aValue, bValue);
  87. }
  88. SIMDValue SIMDUint32x4Operation::OpMax(const SIMDValue& aValue, const SIMDValue& bValue)
  89. {
  90. // _mm_min_epu32 is SSE4.1
  91. //X86SIMDValue x86Result;
  92. //X86SIMDValue tmpaValue = X86SIMDValue::ToX86SIMDValue(aValue);
  93. //X86SIMDValue tmpbValue = X86SIMDValue::ToX86SIMDValue(bValue);
  94. //x86Result.m128i_value = _mm_min_epu32(tmpaValue.m128i_value, tmpbValue.m128i_value);
  95. SIMDValue selector = SIMDUint32x4Operation::OpLessThan(bValue, aValue);
  96. return SIMDInt32x4Operation::OpSelect(selector, aValue, bValue);
  97. }
  98. SIMDValue SIMDUint32x4Operation::OpLessThan(const SIMDValue& aValue, const SIMDValue& bValue)
  99. {
  100. X86SIMDValue x86Result;
  101. X86SIMDValue tmpaValue = X86SIMDValue::ToX86SIMDValue(aValue);
  102. X86SIMDValue tmpbValue = X86SIMDValue::ToX86SIMDValue(bValue);
  103. X86SIMDValue signBits;
  104. signBits.m128i_value = _mm_set1_epi32(0x80000000);
  105. // Signed comparison of unsigned ints can be done if the ints have the "sign" bit xored with 1
  106. tmpaValue.m128i_value = _mm_xor_si128(tmpaValue.m128i_value, signBits.m128i_value);
  107. tmpbValue.m128i_value = _mm_xor_si128(tmpbValue.m128i_value, signBits.m128i_value);
  108. x86Result.m128i_value = _mm_cmplt_epi32(tmpaValue.m128i_value, tmpbValue.m128i_value); // compare a < b?
  109. return X86SIMDValue::ToSIMDValue(x86Result);
  110. }
  111. SIMDValue SIMDUint32x4Operation::OpLessThanOrEqual(const SIMDValue& aValue, const SIMDValue& bValue)
  112. {
  113. X86SIMDValue x86Result;
  114. X86SIMDValue tmpaValue = X86SIMDValue::ToX86SIMDValue(aValue);
  115. X86SIMDValue tmpbValue = X86SIMDValue::ToX86SIMDValue(bValue);
  116. X86SIMDValue signBits;
  117. signBits.m128i_value = _mm_set1_epi32(0x80000000);
  118. // Signed comparison of unsigned ints can be done if the ints have the "sign" bit xored with 1
  119. tmpaValue.m128i_value = _mm_xor_si128(tmpaValue.m128i_value, signBits.m128i_value);
  120. tmpbValue.m128i_value = _mm_xor_si128(tmpbValue.m128i_value, signBits.m128i_value);
  121. x86Result.m128i_value = _mm_cmplt_epi32(tmpaValue.m128i_value, tmpbValue.m128i_value); // compare a < b?
  122. tmpaValue.m128i_value = _mm_cmpeq_epi32(tmpaValue.m128i_value, tmpbValue.m128i_value); // compare a == b?
  123. x86Result.m128i_value = _mm_or_si128(x86Result.m128i_value, tmpaValue.m128i_value); // result = (a<b)|(a==b)
  124. return X86SIMDValue::ToSIMDValue(x86Result);
  125. }
  126. SIMDValue SIMDUint32x4Operation::OpGreaterThanOrEqual(const SIMDValue& aValue, const SIMDValue& bValue)
  127. {
  128. SIMDValue result;
  129. result = SIMDUint32x4Operation::OpLessThan(aValue, bValue);
  130. result = SIMDInt32x4Operation::OpNot(result);
  131. return result;
  132. }
  133. SIMDValue SIMDUint32x4Operation::OpGreaterThan(const SIMDValue& aValue, const SIMDValue& bValue)
  134. {
  135. SIMDValue result;
  136. result = SIMDUint32x4Operation::OpLessThanOrEqual(aValue, bValue);
  137. result = SIMDInt32x4Operation::OpNot(result);
  138. return result;
  139. }
  140. }
  141. #endif