SimdInt32x4OperationX86X64.cpp 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329
  1. //-------------------------------------------------------------------------------------------------------
  2. // Copyright (C) Microsoft Corporation and contributors. All rights reserved.
  3. // Licensed under the MIT license. See LICENSE.txt file in the project root for full license information.
  4. //-------------------------------------------------------------------------------------------------------
  5. #include "RuntimeLanguagePch.h"
  6. #if _M_IX86 || _M_AMD64
  7. namespace Js
  8. {
  9. // SIMD.Int32x4 operation wrappers that cover intrinsics for x86/x64 system
  10. SIMDValue SIMDInt32x4Operation::OpInt32x4(int x, int y, int z, int w)
  11. {
  12. X86SIMDValue x86Result;
  13. x86Result.m128i_value = _mm_set_epi32(w, z, y, x);
  14. // Sets the 4 signed 32-bit integer values, note in revised order: starts with W below
  15. return X86SIMDValue::ToSIMDValue(x86Result);
  16. }
  17. SIMDValue SIMDInt32x4Operation::OpSplat(int x)
  18. {
  19. X86SIMDValue x86Result;
  20. // set 4 signed 32-bit integers values to input value x
  21. x86Result.m128i_value = _mm_set1_epi32(x);
  22. return X86SIMDValue::ToSIMDValue(x86Result);
  23. }
  24. // Conversions
  25. SIMDValue SIMDInt32x4Operation::OpFromFloat32x4(const SIMDValue& value, bool &throws)
  26. {
  27. X86SIMDValue x86Result = { 0 };
  28. X86SIMDValue v = X86SIMDValue::ToX86SIMDValue(value);
  29. X86SIMDValue temp;
  30. int mask = 0;
  31. // Converts the 4 single-precision, floating-point values to signed 32-bit integer values
  32. // using truncate, using truncate one instead of _mm_cvtps_epi32
  33. x86Result.m128i_value = _mm_cvttps_epi32(v.m128_value);
  34. // check if any value is potentially out of range (0x80000000 in output)
  35. temp.m128i_value = _mm_cmpeq_epi32(x86Result.m128i_value, X86_NEG_MASK_F4.m128i_value);
  36. mask = _mm_movemask_ps(temp.m128_value);
  37. if (mask)
  38. {
  39. // potential overflow. Do bound checks.
  40. temp.m128_value = _mm_cmpge_ps(v.m128_value, X86_TWO_31_F4.m128_value);
  41. mask = _mm_movemask_ps(temp.m128_value);
  42. if (mask)
  43. {
  44. throws = true;
  45. return X86SIMDValue::ToSIMDValue(x86Result);
  46. }
  47. temp.m128_value = _mm_cmplt_ps(v.m128_value, X86_NEG_TWO_31_F4.m128_value);
  48. mask = _mm_movemask_ps(temp.m128_value);
  49. if (mask)
  50. {
  51. throws = true;
  52. return X86SIMDValue::ToSIMDValue(x86Result);
  53. }
  54. }
  55. return X86SIMDValue::ToSIMDValue(x86Result);
  56. }
  57. SIMDValue SIMDInt32x4Operation::OpFromFloat64x2(const SIMDValue& value)
  58. {
  59. X86SIMDValue x86Result;
  60. X86SIMDValue v = X86SIMDValue::ToX86SIMDValue(value);
  61. // Converts the 2 double-precision, floating-point values to 32-bit signed integers
  62. // using truncate. using truncate one instead of _mm_cvtpd_epi32
  63. x86Result.m128i_value = _mm_cvttpd_epi32(v.m128d_value);
  64. return X86SIMDValue::ToSIMDValue(x86Result);
  65. }
  66. // Unary Ops
  67. SIMDValue SIMDInt32x4Operation::OpAbs(const SIMDValue& value)
  68. {
  69. SIMDValue result;
  70. X86SIMDValue x86Result;
  71. X86SIMDValue v = X86SIMDValue::ToX86SIMDValue(value);
  72. if (AutoSystemInfo::Data.SSE3Available())
  73. {
  74. x86Result.m128i_value = _mm_abs_epi32(v.m128i_value); // only available after SSE3
  75. result = X86SIMDValue::ToSIMDValue(x86Result);
  76. }
  77. else
  78. {
  79. X86SIMDValue temp, SIGNMASK;
  80. SIGNMASK.m128i_value = _mm_srai_epi32(v.m128i_value, 31); // mask = value >> 31
  81. temp.m128i_value = _mm_xor_si128(v.m128i_value, SIGNMASK.m128i_value); // temp = value ^ mask
  82. x86Result.m128i_value = _mm_sub_epi32(temp.m128i_value, SIGNMASK.m128i_value); // temp - mask
  83. result = X86SIMDValue::ToSIMDValue(x86Result);
  84. }
  85. return result;
  86. }
  87. SIMDValue SIMDInt32x4Operation::OpNeg(const SIMDValue& value)
  88. {
  89. X86SIMDValue x86Result;
  90. X86SIMDValue SIGNMASK, temp;
  91. X86SIMDValue negativeOnes = { { -1, -1, -1, -1 } };
  92. X86SIMDValue v = X86SIMDValue::ToX86SIMDValue(value);
  93. temp.m128i_value = _mm_andnot_si128(v.m128i_value, negativeOnes.m128i_value); // (~value) & (negative ones)
  94. SIGNMASK.m128i_value = _mm_set1_epi32(0x00000001); // set SIGNMASK to 1
  95. x86Result.m128i_value = _mm_add_epi32(SIGNMASK.m128i_value, temp.m128i_value);// add 4 integers respectively
  96. return X86SIMDValue::ToSIMDValue(x86Result);
  97. }
  98. SIMDValue SIMDInt32x4Operation::OpNot(const SIMDValue& value)
  99. {
  100. X86SIMDValue x86Result;
  101. X86SIMDValue negativeOnes = { { -1, -1, -1, -1 } };
  102. X86SIMDValue temp = X86SIMDValue::ToX86SIMDValue(value);
  103. x86Result.m128i_value = _mm_andnot_si128(temp.m128i_value, negativeOnes.m128i_value);
  104. return X86SIMDValue::ToSIMDValue(x86Result);
  105. }
  106. SIMDValue SIMDInt32x4Operation::OpAdd(const SIMDValue& aValue, const SIMDValue& bValue)
  107. {
  108. X86SIMDValue x86Result;
  109. X86SIMDValue tmpaValue = X86SIMDValue::ToX86SIMDValue(aValue);
  110. X86SIMDValue tmpbValue = X86SIMDValue::ToX86SIMDValue(bValue);
  111. x86Result.m128i_value = _mm_add_epi32(tmpaValue.m128i_value, tmpbValue.m128i_value); // a + b
  112. return X86SIMDValue::ToSIMDValue(x86Result);
  113. }
  114. SIMDValue SIMDInt32x4Operation::OpSub(const SIMDValue& aValue, const SIMDValue& bValue)
  115. {
  116. X86SIMDValue x86Result;
  117. X86SIMDValue tmpaValue = X86SIMDValue::ToX86SIMDValue(aValue);
  118. X86SIMDValue tmpbValue = X86SIMDValue::ToX86SIMDValue(bValue);
  119. x86Result.m128i_value = _mm_sub_epi32(tmpaValue.m128i_value, tmpbValue.m128i_value); // a - b
  120. return X86SIMDValue::ToSIMDValue(x86Result);
  121. }
  122. SIMDValue SIMDInt32x4Operation::OpMul(const SIMDValue& aValue, const SIMDValue& bValue)
  123. {
  124. SIMDValue result;
  125. X86SIMDValue x86Result;
  126. X86SIMDValue tmpaValue = X86SIMDValue::ToX86SIMDValue(aValue);
  127. X86SIMDValue tmpbValue = X86SIMDValue::ToX86SIMDValue(bValue);
  128. // a * b, only available in SSE4
  129. // x86Result.m128i_value = _mm_mullo_epi32(tmpaValue.m128i_value, tmpbValue.m128i_value);
  130. // result = X86SIMDValue::ToSIMDValue(x86Result);
  131. // SSE2
  132. // mul 2,0: r0 = a0*b0; r1 = a2*b2
  133. __m128i tmp1 = _mm_mul_epu32(tmpaValue.m128i_value, tmpbValue.m128i_value);
  134. // mul 3,1: r0=a1*b1; r1=a3*b3
  135. __m128i tmp2 = _mm_mul_epu32(_mm_srli_si128(tmpaValue.m128i_value, 4), _mm_srli_si128(tmpbValue.m128i_value, 4));
  136. // shuffle x86Results to [63..0] and pack
  137. x86Result.m128i_value = _mm_unpacklo_epi32(_mm_shuffle_epi32(tmp1, _MM_SHUFFLE(0, 0, 2, 0)), _mm_shuffle_epi32(tmp2, _MM_SHUFFLE(0, 0, 2, 0)));
  138. result = X86SIMDValue::ToSIMDValue(x86Result);
  139. return result;
  140. }
  141. SIMDValue SIMDInt32x4Operation::OpAnd(const SIMDValue& aValue, const SIMDValue& bValue)
  142. {
  143. X86SIMDValue x86Result;
  144. X86SIMDValue tmpaValue = X86SIMDValue::ToX86SIMDValue(aValue);
  145. X86SIMDValue tmpbValue = X86SIMDValue::ToX86SIMDValue(bValue);
  146. x86Result.m128i_value = _mm_and_si128(tmpaValue.m128i_value, tmpbValue.m128i_value); // a & b
  147. return X86SIMDValue::ToSIMDValue(x86Result);
  148. }
  149. SIMDValue SIMDInt32x4Operation::OpOr(const SIMDValue& aValue, const SIMDValue& bValue)
  150. {
  151. X86SIMDValue x86Result;
  152. X86SIMDValue tmpaValue = X86SIMDValue::ToX86SIMDValue(aValue);
  153. X86SIMDValue tmpbValue = X86SIMDValue::ToX86SIMDValue(bValue);
  154. x86Result.m128i_value = _mm_or_si128(tmpaValue.m128i_value, tmpbValue.m128i_value); // a | b
  155. return X86SIMDValue::ToSIMDValue(x86Result);
  156. }
  157. SIMDValue SIMDInt32x4Operation::OpXor(const SIMDValue& aValue, const SIMDValue& bValue)
  158. {
  159. X86SIMDValue x86Result;
  160. X86SIMDValue tmpaValue = X86SIMDValue::ToX86SIMDValue(aValue);
  161. X86SIMDValue tmpbValue = X86SIMDValue::ToX86SIMDValue(bValue);
  162. x86Result.m128i_value = _mm_xor_si128(tmpaValue.m128i_value, tmpbValue.m128i_value); // a ^ b
  163. return X86SIMDValue::ToSIMDValue(x86Result);
  164. }
  165. SIMDValue SIMDInt32x4Operation::OpMin(const SIMDValue& aValue, const SIMDValue& bValue)
  166. {
  167. // choose the smaller value of the two parameters, only available after SSE4
  168. // x86Result.m128i_value = _mm_min_epi32(tmpaValue.m128i_value, tmpbValue.m128i_value);
  169. // result = X86SIMDValue::ToSIMDValue(x86Result);
  170. // SSE2
  171. SIMDValue selector = SIMDInt32x4Operation::OpLessThan(aValue, bValue);
  172. return SIMDInt32x4Operation::OpSelect(selector, aValue, bValue);
  173. }
  174. SIMDValue SIMDInt32x4Operation::OpMax(const SIMDValue& aValue, const SIMDValue& bValue)
  175. {
  176. // choose the larger value of the two parameters, only available after SSE4
  177. // x86Result.m128i_value = _mm_max_epi32(tmpaValue.m128i_value, tmpbValue.m128i_value); // a ^ b
  178. // result = X86SIMDValue::ToSIMDValue(x86Result);
  179. // SSE2
  180. SIMDValue selector = SIMDInt32x4Operation::OpLessThan(bValue, aValue);
  181. return SIMDInt32x4Operation::OpSelect(selector, aValue, bValue);
  182. }
  183. SIMDValue SIMDInt32x4Operation::OpLessThan(const SIMDValue& aValue, const SIMDValue& bValue)
  184. {
  185. X86SIMDValue x86Result;
  186. X86SIMDValue tmpaValue = X86SIMDValue::ToX86SIMDValue(aValue);
  187. X86SIMDValue tmpbValue = X86SIMDValue::ToX86SIMDValue(bValue);
  188. x86Result.m128i_value = _mm_cmplt_epi32(tmpaValue.m128i_value, tmpbValue.m128i_value); // compare a < b?
  189. return X86SIMDValue::ToSIMDValue(x86Result);
  190. }
  191. SIMDValue SIMDInt32x4Operation::OpLessThanOrEqual(const SIMDValue& aValue, const SIMDValue& bValue)
  192. {
  193. SIMDValue tmpResult = SIMDInt32x4Operation::OpGreaterThan(aValue, bValue);
  194. X86SIMDValue x86Result = X86SIMDValue::ToX86SIMDValue(tmpResult);
  195. X86SIMDValue negativeOnes = X86_ALL_NEG_ONES; // { { -1, -1, -1, -1 } };
  196. x86Result.m128i_value = _mm_andnot_si128(x86Result.m128i_value, negativeOnes.m128i_value);
  197. return X86SIMDValue::ToSIMDValue(x86Result);
  198. }
  199. SIMDValue SIMDInt32x4Operation::OpEqual(const SIMDValue& aValue, const SIMDValue& bValue)
  200. {
  201. X86SIMDValue x86Result;
  202. X86SIMDValue tmpaValue = X86SIMDValue::ToX86SIMDValue(aValue);
  203. X86SIMDValue tmpbValue = X86SIMDValue::ToX86SIMDValue(bValue);
  204. x86Result.m128i_value = _mm_cmpeq_epi32(tmpaValue.m128i_value, tmpbValue.m128i_value); // compare a == b?
  205. return X86SIMDValue::ToSIMDValue(x86Result);
  206. }
  207. SIMDValue SIMDInt32x4Operation::OpNotEqual(const SIMDValue& aValue, const SIMDValue& bValue)
  208. {
  209. SIMDValue tmpResult = SIMDInt32x4Operation::OpEqual(aValue, bValue);
  210. X86SIMDValue x86Result = X86SIMDValue::ToX86SIMDValue(tmpResult);
  211. X86SIMDValue negativeOnes = X86_ALL_NEG_ONES; // { { -1, -1, -1, -1 } };
  212. x86Result.m128i_value = _mm_andnot_si128(x86Result.m128i_value, negativeOnes.m128i_value);
  213. return X86SIMDValue::ToSIMDValue(x86Result);
  214. }
  215. SIMDValue SIMDInt32x4Operation::OpGreaterThan(const SIMDValue& aValue, const SIMDValue& bValue)
  216. {
  217. X86SIMDValue x86Result;
  218. X86SIMDValue tmpaValue = X86SIMDValue::ToX86SIMDValue(aValue);
  219. X86SIMDValue tmpbValue = X86SIMDValue::ToX86SIMDValue(bValue);
  220. x86Result.m128i_value = _mm_cmpgt_epi32(tmpaValue.m128i_value, tmpbValue.m128i_value); // compare a > b?
  221. return X86SIMDValue::ToSIMDValue(x86Result);
  222. }
  223. SIMDValue SIMDInt32x4Operation::OpGreaterThanOrEqual(const SIMDValue& aValue, const SIMDValue& bValue)
  224. {
  225. SIMDValue tmpResult = SIMDInt32x4Operation::OpLessThan(aValue, bValue);
  226. X86SIMDValue x86Result = X86SIMDValue::ToX86SIMDValue(tmpResult);
  227. X86SIMDValue negativeOnes = X86_ALL_NEG_ONES; // { { -1, -1, -1, -1 } };
  228. x86Result.m128i_value = _mm_andnot_si128(x86Result.m128i_value, negativeOnes.m128i_value);
  229. return X86SIMDValue::ToSIMDValue(x86Result);
  230. }
  231. SIMDValue SIMDInt32x4Operation::OpShiftLeftByScalar(const SIMDValue& value, int count)
  232. {
  233. X86SIMDValue x86Result;
  234. X86SIMDValue tmpValue = X86SIMDValue::ToX86SIMDValue(value);
  235. // Shifts the 4 signed 32-bit integers in a left by count bits while shifting in zeros
  236. x86Result.m128i_value = _mm_slli_epi32(tmpValue.m128i_value, count & SIMDUtils::SIMDGetShiftAmountMask(4));
  237. return X86SIMDValue::ToSIMDValue(x86Result);
  238. }
  239. SIMDValue SIMDInt32x4Operation::OpShiftRightByScalar(const SIMDValue& value, int count)
  240. {
  241. X86SIMDValue x86Result;
  242. X86SIMDValue tmpValue = X86SIMDValue::ToX86SIMDValue(value);
  243. // Shifts the 4 signed 32-bit integers right by count bits while shifting in the sign bit
  244. x86Result.m128i_value = _mm_srai_epi32(tmpValue.m128i_value, count & SIMDUtils::SIMDGetShiftAmountMask(4));
  245. return X86SIMDValue::ToSIMDValue(x86Result);
  246. }
  247. SIMDValue SIMDInt32x4Operation::OpSelect(const SIMDValue& mV, const SIMDValue& tV, const SIMDValue& fV)
  248. {
  249. X86SIMDValue x86Result;
  250. X86SIMDValue maskValue = X86SIMDValue::ToX86SIMDValue(mV);
  251. X86SIMDValue trueValue = X86SIMDValue::ToX86SIMDValue(tV);
  252. X86SIMDValue falseValue = X86SIMDValue::ToX86SIMDValue(fV);
  253. X86SIMDValue tempTrue, tempFalse;
  254. tempTrue.m128i_value = _mm_and_si128(maskValue.m128i_value, trueValue.m128i_value); // mask & T
  255. tempFalse.m128i_value = _mm_andnot_si128(maskValue.m128i_value, falseValue.m128i_value); //!mask & F
  256. x86Result.m128i_value = _mm_or_si128(tempTrue.m128i_value, tempFalse.m128i_value); // tempT | temp F
  257. return X86SIMDValue::ToSIMDValue(x86Result);
  258. }
  259. }
  260. #endif