SimdFloat32x4OperationX86X64.cpp 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378
  1. //-------------------------------------------------------------------------------------------------------
  2. // Copyright (C) Microsoft Corporation and contributors. All rights reserved.
  3. // Licensed under the MIT license. See LICENSE.txt file in the project root for full license information.
  4. //-------------------------------------------------------------------------------------------------------
  5. #include "RuntimeLanguagePch.h"
  6. #if _M_IX86 || _M_AMD64
  7. namespace Js
  8. {
  9. SIMDValue SIMDFloat32x4Operation::OpFloat32x4(float x, float y, float z, float w)
  10. {
  11. X86SIMDValue x86Result;
  12. // Sets the 4 single-precision, floating-point values, note order starts with W below
  13. x86Result.m128_value = _mm_set_ps(w, z, y, x);
  14. return X86SIMDValue::ToSIMDValue(x86Result);
  15. }
  16. SIMDValue SIMDFloat32x4Operation::OpSplat(float x)
  17. {
  18. X86SIMDValue x86Result;
  19. // Sets the four single-precision, floating-point values to x
  20. x86Result.m128_value = _mm_set1_ps(x);
  21. return X86SIMDValue::ToSIMDValue(x86Result);
  22. }
  23. // Conversions
  24. SIMDValue SIMDFloat32x4Operation::OpFromFloat64x2(const SIMDValue& value)
  25. {
  26. X86SIMDValue x86Result;
  27. X86SIMDValue v = X86SIMDValue::ToX86SIMDValue(value);
  28. // Converts the two double-precision, floating-point values of v.m128d_value
  29. // to single-precision, floating-point values.
  30. x86Result.m128_value = _mm_cvtpd_ps(v.m128d_value);
  31. return X86SIMDValue::ToSIMDValue(x86Result);
  32. }
  33. SIMDValue SIMDFloat32x4Operation::OpFromInt32x4(const SIMDValue& value)
  34. {
  35. X86SIMDValue x86Result;
  36. X86SIMDValue v = X86SIMDValue::ToX86SIMDValue(value);
  37. // Converts the 4 signed 32-bit integer values of v.m128i_value
  38. // to single-precision, floating-point values.
  39. x86Result.m128_value = _mm_cvtepi32_ps(v.m128i_value);
  40. return X86SIMDValue::ToSIMDValue(x86Result);
  41. }
  42. SIMDValue SIMDFloat32x4Operation::OpFromUint32x4(const SIMDValue& value)
  43. {
  44. X86SIMDValue x86Result, temp1;
  45. X86SIMDValue v = X86SIMDValue::ToX86SIMDValue(value);
  46. // find unsigned values above 2^31-1. Comparison is signed, so look for values < 0
  47. temp1.m128i_value = _mm_cmplt_epi32(v.m128i_value, X86_ALL_ZEROS.m128i_value);
  48. // temp1 has f32(2^32) for unsigned values above 2^31, 0 otherwise
  49. temp1.m128_value = _mm_and_ps(temp1.m128_value, X86_TWO_32_F4.m128_value);
  50. // convert
  51. x86Result.m128_value = _mm_cvtepi32_ps(v.m128i_value);
  52. // Add f32(2^32) to negative values
  53. x86Result.m128_value = _mm_add_ps(x86Result.m128_value, temp1.m128_value);
  54. return X86SIMDValue::ToSIMDValue(x86Result);
  55. }
  56. // Unary Ops
  57. SIMDValue SIMDFloat32x4Operation::OpAbs(const SIMDValue& value)
  58. {
  59. X86SIMDValue x86Result = { 0 };
  60. X86SIMDValue v = X86SIMDValue::ToX86SIMDValue(value);
  61. x86Result.m128_value = _mm_and_ps(v.m128_value, X86_ABS_MASK_F4.m128_value);
  62. return X86SIMDValue::ToSIMDValue(x86Result);
  63. }
  64. SIMDValue SIMDFloat32x4Operation::OpNeg(const SIMDValue& value)
  65. {
  66. X86SIMDValue x86Result;
  67. X86SIMDValue v = X86SIMDValue::ToX86SIMDValue(value);
  68. x86Result.m128_value = _mm_xor_ps(v.m128_value, X86_NEG_MASK_F4.m128_value);
  69. return X86SIMDValue::ToSIMDValue(x86Result);
  70. }
  71. SIMDValue SIMDFloat32x4Operation::OpNot(const SIMDValue& value)
  72. {
  73. X86SIMDValue x86Result;
  74. X86SIMDValue v = X86SIMDValue::ToX86SIMDValue(value);
  75. x86Result.m128_value = _mm_xor_ps(v.m128_value, X86_ALL_NEG_ONES.m128_value);
  76. return X86SIMDValue::ToSIMDValue(x86Result);
  77. }
  78. SIMDValue SIMDFloat32x4Operation::OpReciprocal(const SIMDValue& value)
  79. {
  80. X86SIMDValue x86Result;
  81. X86SIMDValue v = X86SIMDValue::ToX86SIMDValue(value);
  82. // RCPPS is not precise. Using DIVPS
  83. // Divides the four single-precision, floating-point values of 1.0 and value
  84. x86Result.m128_value = _mm_div_ps(X86_ALL_ONES_F4.m128_value, v.m128_value); // result = 1.0/value
  85. return X86SIMDValue::ToSIMDValue(x86Result);
  86. }
  87. SIMDValue SIMDFloat32x4Operation::OpReciprocalSqrt(const SIMDValue& value)
  88. {
  89. X86SIMDValue x86Result;
  90. X86SIMDValue temp;
  91. X86SIMDValue v = X86SIMDValue::ToX86SIMDValue(value);
  92. temp.m128_value = _mm_div_ps(X86_ALL_ONES_F4.m128_value, v.m128_value); // temp = 1.0/value
  93. x86Result.m128_value = _mm_sqrt_ps(temp.m128_value); // result = sqrt(1.0/value)
  94. return X86SIMDValue::ToSIMDValue(x86Result);
  95. }
  96. SIMDValue SIMDFloat32x4Operation::OpSqrt(const SIMDValue& value)
  97. {
  98. X86SIMDValue x86Result;
  99. X86SIMDValue v = X86SIMDValue::ToX86SIMDValue(value);
  100. x86Result.m128_value = _mm_sqrt_ps(v.m128_value); // result = sqrt(value)
  101. return X86SIMDValue::ToSIMDValue(x86Result);
  102. }
  103. // Binary Ops
  104. SIMDValue SIMDFloat32x4Operation::OpAdd(const SIMDValue& aValue, const SIMDValue& bValue)
  105. {
  106. X86SIMDValue x86Result;
  107. X86SIMDValue tmpaValue = X86SIMDValue::ToX86SIMDValue(aValue);
  108. X86SIMDValue tmpbValue = X86SIMDValue::ToX86SIMDValue(bValue);
  109. x86Result.m128_value = _mm_add_ps(tmpaValue.m128_value, tmpbValue.m128_value); // a + b
  110. return X86SIMDValue::ToSIMDValue(x86Result);
  111. }
  112. SIMDValue SIMDFloat32x4Operation::OpSub(const SIMDValue& aValue, const SIMDValue& bValue)
  113. {
  114. X86SIMDValue x86Result;
  115. X86SIMDValue tmpaValue = X86SIMDValue::ToX86SIMDValue(aValue);
  116. X86SIMDValue tmpbValue = X86SIMDValue::ToX86SIMDValue(bValue);
  117. x86Result.m128_value = _mm_sub_ps(tmpaValue.m128_value, tmpbValue.m128_value); // a - b
  118. return X86SIMDValue::ToSIMDValue(x86Result);
  119. }
  120. SIMDValue SIMDFloat32x4Operation::OpMul(const SIMDValue& aValue, const SIMDValue& bValue)
  121. {
  122. X86SIMDValue x86Result;
  123. X86SIMDValue tmpaValue = X86SIMDValue::ToX86SIMDValue(aValue);
  124. X86SIMDValue tmpbValue = X86SIMDValue::ToX86SIMDValue(bValue);
  125. x86Result.m128_value = _mm_mul_ps(tmpaValue.m128_value, tmpbValue.m128_value); // a * b
  126. return X86SIMDValue::ToSIMDValue(x86Result);
  127. }
  128. SIMDValue SIMDFloat32x4Operation::OpDiv(const SIMDValue& aValue, const SIMDValue& bValue)
  129. {
  130. X86SIMDValue x86Result;
  131. X86SIMDValue tmpaValue = X86SIMDValue::ToX86SIMDValue(aValue);
  132. X86SIMDValue tmpbValue = X86SIMDValue::ToX86SIMDValue(bValue);
  133. x86Result.m128_value = _mm_div_ps(tmpaValue.m128_value, tmpbValue.m128_value); // a / b
  134. return X86SIMDValue::ToSIMDValue(x86Result);
  135. }
  136. SIMDValue SIMDFloat32x4Operation::OpAnd(const SIMDValue& aValue, const SIMDValue& bValue)
  137. {
  138. X86SIMDValue x86Result;
  139. X86SIMDValue tmpaValue = X86SIMDValue::ToX86SIMDValue(aValue);
  140. X86SIMDValue tmpbValue = X86SIMDValue::ToX86SIMDValue(bValue);
  141. x86Result.m128_value = _mm_and_ps(tmpaValue.m128_value, tmpbValue.m128_value); // a & b
  142. return X86SIMDValue::ToSIMDValue(x86Result);
  143. }
  144. SIMDValue SIMDFloat32x4Operation::OpOr(const SIMDValue& aValue, const SIMDValue& bValue)
  145. {
  146. X86SIMDValue x86Result;
  147. X86SIMDValue tmpaValue = X86SIMDValue::ToX86SIMDValue(aValue);
  148. X86SIMDValue tmpbValue = X86SIMDValue::ToX86SIMDValue(bValue);
  149. x86Result.m128_value = _mm_or_ps(tmpaValue.m128_value, tmpbValue.m128_value); // a | b
  150. return X86SIMDValue::ToSIMDValue(x86Result);
  151. }
  152. SIMDValue SIMDFloat32x4Operation::OpXor(const SIMDValue& aValue, const SIMDValue& bValue)
  153. {
  154. X86SIMDValue x86Result;
  155. X86SIMDValue tmpaValue = X86SIMDValue::ToX86SIMDValue(aValue);
  156. X86SIMDValue tmpbValue = X86SIMDValue::ToX86SIMDValue(bValue);
  157. x86Result.m128_value = _mm_xor_ps(tmpaValue.m128_value, tmpbValue.m128_value); // a ^ b
  158. return X86SIMDValue::ToSIMDValue(x86Result);
  159. }
  160. /*
  161. Min/Max(a, b) spec semantics:
  162. If any value is NaN, return NaN
  163. a < b ? a : b; where +0.0 > -0.0 (vice versa for Max)
  164. X86 MIN/MAXPS semantics:
  165. If any value is NaN, return 2nd operand
  166. If both values are +/-0.0, return 2nd operand
  167. return a < b ? a : b (vice versa for Max)
  168. */
  169. SIMDValue SIMDFloat32x4Operation::OpMin(const SIMDValue& aValue, const SIMDValue& bValue)
  170. {
  171. X86SIMDValue x86Result;
  172. X86SIMDValue tmpaValue = X86SIMDValue::ToX86SIMDValue(aValue);
  173. X86SIMDValue tmpbValue = X86SIMDValue::ToX86SIMDValue(bValue);
  174. X86SIMDValue tmp1, tmp2;
  175. // if tmp1 and tmp2 are not identical then either
  176. // 1) at least one value is NaN, then the OR will set that lane to NaN
  177. // 2) one value is 0.0 and the other is -0.0, the OR will set the sign bit to have -0.0
  178. tmp1.m128_value = _mm_min_ps(tmpaValue.m128_value, tmpbValue.m128_value);
  179. tmp2.m128_value = _mm_min_ps(tmpbValue.m128_value, tmpaValue.m128_value);
  180. x86Result.m128_value = _mm_or_ps(tmp1.m128_value, tmp2.m128_value);
  181. return X86SIMDValue::ToSIMDValue(x86Result);
  182. }
  183. SIMDValue SIMDFloat32x4Operation::OpMax(const SIMDValue& aValue, const SIMDValue& bValue)
  184. {
  185. X86SIMDValue x86Result;
  186. X86SIMDValue tmpaValue = X86SIMDValue::ToX86SIMDValue(aValue);
  187. X86SIMDValue tmpbValue = X86SIMDValue::ToX86SIMDValue(bValue);
  188. X86SIMDValue tmp1, tmp2, NaNs;
  189. // if tmp1 and tmp2 are not identical then either
  190. // 1) at least one value is NaN, then the OR will set that lane to NaN
  191. // 2) one value is 0.0 and the other is -0.0, the OR will set the sign bit to have -0.0
  192. // 1's where NaNs are
  193. NaNs.m128_value = _mm_cmpunord_ps(tmpaValue.m128_value, tmpbValue.m128_value);
  194. tmp1.m128_value = _mm_max_ps(tmpaValue.m128_value, tmpbValue.m128_value);
  195. tmp2.m128_value = _mm_max_ps(tmpbValue.m128_value, tmpaValue.m128_value);
  196. // Force lanes that had +/-0.0 to be +0.0
  197. // Lanes that had NaNs can be garbage after this step.
  198. tmp1.m128_value = _mm_and_ps(tmp1.m128_value, tmp2.m128_value);
  199. // Fix lanes that had NaNs to all 1's (NaNs).
  200. x86Result.m128_value = _mm_or_ps(tmp1.m128_value, NaNs.m128_value);
  201. return X86SIMDValue::ToSIMDValue(x86Result);
  202. }
  203. SIMDValue SIMDFloat32x4Operation::OpScale(const SIMDValue& Value, float scaleValue)
  204. {
  205. X86SIMDValue x86Result;
  206. X86SIMDValue v = X86SIMDValue::ToX86SIMDValue(Value);
  207. X86SIMDValue scaleVector;
  208. scaleVector.m128_value = _mm_set1_ps(scaleValue);
  209. x86Result.m128_value = _mm_mul_ps(v.m128_value, scaleVector.m128_value); // v * scale
  210. return X86SIMDValue::ToSIMDValue(x86Result);
  211. }
  212. SIMDValue SIMDFloat32x4Operation::OpLessThan(const SIMDValue& aValue, const SIMDValue& bValue)
  213. {
  214. X86SIMDValue x86Result;
  215. X86SIMDValue tmpaValue = X86SIMDValue::ToX86SIMDValue(aValue);
  216. X86SIMDValue tmpbValue = X86SIMDValue::ToX86SIMDValue(bValue);
  217. x86Result.m128_value = _mm_cmplt_ps(tmpaValue.m128_value, tmpbValue.m128_value); // a < b?
  218. return X86SIMDValue::ToSIMDValue(x86Result);
  219. }
  220. SIMDValue SIMDFloat32x4Operation::OpLessThanOrEqual(const SIMDValue& aValue, const SIMDValue& bValue)
  221. {
  222. X86SIMDValue x86Result;
  223. X86SIMDValue tmpaValue = X86SIMDValue::ToX86SIMDValue(aValue);
  224. X86SIMDValue tmpbValue = X86SIMDValue::ToX86SIMDValue(bValue);
  225. x86Result.m128_value = _mm_cmple_ps(tmpaValue.m128_value, tmpbValue.m128_value); // a <= b?
  226. return X86SIMDValue::ToSIMDValue(x86Result);
  227. }
  228. SIMDValue SIMDFloat32x4Operation::OpEqual(const SIMDValue& aValue, const SIMDValue& bValue)
  229. {
  230. X86SIMDValue x86Result;
  231. X86SIMDValue tmpaValue = X86SIMDValue::ToX86SIMDValue(aValue);
  232. X86SIMDValue tmpbValue = X86SIMDValue::ToX86SIMDValue(bValue);
  233. x86Result.m128_value = _mm_cmpeq_ps(tmpaValue.m128_value, tmpbValue.m128_value); // a == b?
  234. return X86SIMDValue::ToSIMDValue(x86Result);
  235. }
  236. SIMDValue SIMDFloat32x4Operation::OpNotEqual(const SIMDValue& aValue, const SIMDValue& bValue)
  237. {
  238. X86SIMDValue x86Result;
  239. X86SIMDValue tmpaValue = X86SIMDValue::ToX86SIMDValue(aValue);
  240. X86SIMDValue tmpbValue = X86SIMDValue::ToX86SIMDValue(bValue);
  241. x86Result.m128_value = _mm_cmpneq_ps(tmpaValue.m128_value, tmpbValue.m128_value); // a != b?
  242. return X86SIMDValue::ToSIMDValue(x86Result);
  243. }
  244. SIMDValue SIMDFloat32x4Operation::OpGreaterThan(const SIMDValue& aValue, const SIMDValue& bValue)
  245. {
  246. X86SIMDValue x86Result;
  247. X86SIMDValue tmpaValue = X86SIMDValue::ToX86SIMDValue(aValue);
  248. X86SIMDValue tmpbValue = X86SIMDValue::ToX86SIMDValue(bValue);
  249. x86Result.m128_value = _mm_cmpgt_ps(tmpaValue.m128_value, tmpbValue.m128_value); // a > b?
  250. return X86SIMDValue::ToSIMDValue(x86Result);
  251. }
  252. SIMDValue SIMDFloat32x4Operation::OpGreaterThanOrEqual(const SIMDValue& aValue, const SIMDValue& bValue)
  253. {
  254. X86SIMDValue x86Result;
  255. X86SIMDValue tmpaValue = X86SIMDValue::ToX86SIMDValue(aValue);
  256. X86SIMDValue tmpbValue = X86SIMDValue::ToX86SIMDValue(bValue);
  257. x86Result.m128_value = _mm_cmpge_ps(tmpaValue.m128_value, tmpbValue.m128_value); // a >= b?
  258. return X86SIMDValue::ToSIMDValue(x86Result);
  259. }
  260. SIMDValue SIMDFloat32x4Operation::OpClamp(const SIMDValue& value, const SIMDValue& lower, const SIMDValue& upper)
  261. { // SIMD review: do we have intrinsic for the implementation?
  262. SIMDValue result;
  263. // lower clamp
  264. result.f32[SIMD_X] = value.f32[SIMD_X] < lower.f32[SIMD_X] ? lower.f32[SIMD_X] : value.f32[SIMD_X];
  265. result.f32[SIMD_Y] = value.f32[SIMD_Y] < lower.f32[SIMD_Y] ? lower.f32[SIMD_Y] : value.f32[SIMD_Y];
  266. result.f32[SIMD_Z] = value.f32[SIMD_Z] < lower.f32[SIMD_Z] ? lower.f32[SIMD_Z] : value.f32[SIMD_Z];
  267. result.f32[SIMD_W] = value.f32[SIMD_W] < lower.f32[SIMD_W] ? lower.f32[SIMD_W] : value.f32[SIMD_W];
  268. // upper clamp
  269. result.f32[SIMD_X] = result.f32[SIMD_X] > upper.f32[SIMD_X] ? upper.f32[SIMD_X] : result.f32[SIMD_X];
  270. result.f32[SIMD_Y] = result.f32[SIMD_Y] > upper.f32[SIMD_Y] ? upper.f32[SIMD_Y] : result.f32[SIMD_Y];
  271. result.f32[SIMD_Z] = result.f32[SIMD_Z] > upper.f32[SIMD_Z] ? upper.f32[SIMD_Z] : result.f32[SIMD_Z];
  272. result.f32[SIMD_W] = result.f32[SIMD_W] > upper.f32[SIMD_W] ? upper.f32[SIMD_W] : result.f32[SIMD_W];
  273. return result;
  274. }
  275. SIMDValue SIMDFloat32x4Operation::OpSelect(const SIMDValue& mV, const SIMDValue& tV, const SIMDValue& fV)
  276. {
  277. X86SIMDValue x86Result;
  278. X86SIMDValue maskValue = X86SIMDValue::ToX86SIMDValue(mV);
  279. X86SIMDValue trueValue = X86SIMDValue::ToX86SIMDValue(tV);
  280. X86SIMDValue falseValue = X86SIMDValue::ToX86SIMDValue(fV);
  281. X86SIMDValue tempTrue, tempFalse;
  282. tempTrue.m128_value = _mm_and_ps(maskValue.m128_value, trueValue.m128_value); // mask & True
  283. tempFalse.m128_value = _mm_andnot_ps(maskValue.m128_value, falseValue.m128_value); // !mask & False
  284. x86Result.m128_value = _mm_or_ps(tempTrue.m128_value, tempFalse.m128_value); // tempTrue | tempFalse
  285. return X86SIMDValue::ToSIMDValue(x86Result);
  286. }
  287. }
  288. #endif