SimdUtils.h 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289
  1. //-------------------------------------------------------------------------------------------------------
  2. // Copyright (C) Microsoft Corporation and contributors. All rights reserved.
  3. // Licensed under the MIT license. See LICENSE.txt file in the project root for full license information.
  4. //-------------------------------------------------------------------------------------------------------
  5. #pragma once
  6. // The representations below assume little-endian.
  7. #define SIMD_X 0
  8. #define SIMD_Y 1
  9. #define SIMD_Z 2
  10. #define SIMD_W 3
  11. #define FLOAT64_SIZE 8
  12. #define FLOAT32_SIZE 4
  13. #define INT32_SIZE 4
  14. #define INT16_SIZE 2
  15. #define INT8_SIZE 1
  16. #define SIMD_INDEX_VALUE_MAX 5
  17. #define SIMD_STRING_BUFFER_MAX 1024
  18. #define SIMD_DATA \
  19. Field(int32) i32[4];\
  20. Field(int16) i16[8];\
  21. Field(int8) i8[16];\
  22. Field(uint32) u32[4];\
  23. Field(uint16) u16[8];\
  24. Field(uint8) u8[16];\
  25. Field(float) f32[4];\
  26. Field(double) f64[2]; \
  27. Field(int64) i64[2];
  28. #define SIMD_TEMP_SIZE 3
  29. struct _SIMDValue
  30. {
  31. union{
  32. SIMD_DATA
  33. };
  34. void SetValue(_SIMDValue value)
  35. {
  36. i32[SIMD_X] = value.i32[SIMD_X];
  37. i32[SIMD_Y] = value.i32[SIMD_Y];
  38. i32[SIMD_Z] = value.i32[SIMD_Z];
  39. i32[SIMD_W] = value.i32[SIMD_W];
  40. }
  41. void Zero()
  42. {
  43. f64[SIMD_X] = f64[SIMD_Y] = 0;
  44. }
  45. bool operator==(const _SIMDValue& r)
  46. {
  47. // don't compare f64/f32 because NaN bit patterns will not be considered equal.
  48. return (this->i32[SIMD_X] == r.i32[SIMD_X] &&
  49. this->i32[SIMD_Y] == r.i32[SIMD_Y] &&
  50. this->i32[SIMD_Z] == r.i32[SIMD_Z] &&
  51. this->i32[SIMD_W] == r.i32[SIMD_W]);
  52. }
  53. bool IsZero()
  54. {
  55. return (i32[SIMD_X] == 0 && i32[SIMD_Y] == 0 && i32[SIMD_Z] == 0 && i32[SIMD_W] == 0);
  56. }
  57. };
  58. typedef _SIMDValue SIMDValue;
  59. // For dictionary use
  60. template <>
  61. struct DefaultComparer<_SIMDValue>
  62. {
  63. __forceinline static bool Equals(_SIMDValue x, _SIMDValue y)
  64. {
  65. return x == y;
  66. }
  67. __forceinline static hash_t GetHashCode(_SIMDValue d)
  68. {
  69. return (hash_t)(d.i32[SIMD_X] ^ d.i32[SIMD_Y] ^ d.i32[SIMD_Z] ^ d.i32[SIMD_W]);
  70. }
  71. };
  72. #if _M_IX86 || _M_AMD64
  73. struct _x86_SIMDValue
  74. {
  75. union{
  76. SIMD_DATA
  77. __m128 m128_value;
  78. __m128d m128d_value;
  79. __m128i m128i_value;
  80. };
  81. static _x86_SIMDValue ToX86SIMDValue(const SIMDValue& val)
  82. {
  83. _x86_SIMDValue result;
  84. result.m128_value = _mm_loadu_ps((float*) &val);
  85. return result;
  86. }
  87. static SIMDValue ToSIMDValue(const _x86_SIMDValue& val)
  88. {
  89. SIMDValue result;
  90. _mm_storeu_ps((float*) &result, val.m128_value);
  91. return result;
  92. }
  93. };
  94. #pragma warning(push)
  95. #pragma warning(disable:4838) // conversion from 'unsigned int' to 'int32' requires a narrowing conversion
  96. // These global values are 16-byte aligned.
  97. const _x86_SIMDValue X86_ABS_MASK_F4 = { 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff };
  98. const _x86_SIMDValue X86_ABS_MASK_I4 = { 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff };
  99. const _x86_SIMDValue X86_ABS_MASK_D2 = { 0xffffffff, 0x7fffffff, 0xffffffff, 0x7fffffff };
  100. const _x86_SIMDValue X86_NEG_MASK_F4 = { 0x80000000, 0x80000000, 0x80000000, 0x80000000 };
  101. const _x86_SIMDValue X86_NEG_MASK_D2 = { 0x00000000, 0x80000000, 0x00000000, 0x80000000 };
  102. const _x86_SIMDValue X86_ALL_ONES_F4 = { 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000 }; // {1.0, 1.0, 1.0, 1.0}
  103. const _x86_SIMDValue X86_ALL_ONES_I4 = { 0x00000001, 0x00000001, 0x00000001, 0x00000001 }; // {1, 1, 1, 1}
  104. const _x86_SIMDValue X86_ALL_ONES_D2 = { 0x00000000, 0x3ff00000, 0x00000000, 0x3ff00000 }; // {1.0, 1.0}
  105. const _x86_SIMDValue X86_ALL_NEG_ONES = { 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff };
  106. const _x86_SIMDValue X86_ALL_NEG_ONES_F4 = { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000 }; //-1.0, -1.0, -1.0, -1.0
  107. const _x86_SIMDValue X86_ALL_ONES_I8 = { 0x00010001, 0x00010001, 0x00010001, 0x00010001 }; // {1, 1, 1, 1, 1, 1, 1, 1}
  108. const _x86_SIMDValue X86_ALL_ZEROS = { 0x00000000, 0x00000000, 0x00000000, 0x00000000 };
  109. const _x86_SIMDValue X86_ALL_ONES_I16 = { 0x01010101, 0x01010101, 0x01010101, 0x01010101 }; // {1, 1, 1, 1,1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}
  110. const _x86_SIMDValue X86_LANE0_ONES_I16 = { 0x000000ff, 0x00000000, 0x00000000, 0x00000000 };
  111. const _x86_SIMDValue X86_LOWBYTES_MASK = { 0x00ff00ff, 0x00ff00ff, 0x00ff00ff, 0x00ff00ff };
  112. const _x86_SIMDValue X86_HIGHBYTES_MASK = { 0xff00ff00, 0xff00ff00, 0xff00ff00, 0xff00ff00 };
  113. const _x86_SIMDValue X86_LANE_W_ZEROS = { 0xffffffff, 0xffffffff, 0xffffffff, 0x00000000 };
  114. const _x86_SIMDValue X86_TWO_31_F4 = { 0x4f000000, 0x4f000000, 0x4f000000, 0x4f000000 }; // f32(2^31), ....
  115. const _x86_SIMDValue X86_NEG_TWO_31_F4 = { 0xcf000000, 0xcf000000, 0xcf000000, 0xcf000000 }; // f32(-2^31), ....
  116. const _x86_SIMDValue X86_TWO_32_F4 = { 0x4f800000, 0x4f800000, 0x4f800000, 0x4f800000 }; // f32(2^32), ....
  117. const _x86_SIMDValue X86_TWO_31_I4 = X86_NEG_MASK_F4; // 2^31, ....
  118. const _x86_SIMDValue X86_WORD_SIGNBITS = { 0x80008000, 0x80008000, 0x80008000, 0x80008000 };
  119. const _x86_SIMDValue X86_DWORD_SIGNBITS = { 0x80000000, 0x80000000, 0x80000000, 0x80000000 };
  120. const _x86_SIMDValue X86_BYTE_SIGNBITS = { 0x80808080, 0x80808080, 0x80808080, 0x80808080 };
  121. const _x86_SIMDValue X86_4LANES_MASKS[] = {{ 0xffffffff, 0x00000000, 0x00000000, 0x00000000 },
  122. { 0x00000000, 0xffffffff, 0x00000000, 0x00000000 },
  123. { 0x00000000, 0x00000000, 0xffffffff, 0x00000000 },
  124. { 0x00000000, 0x00000000, 0x00000000, 0xffffffff }};
  125. #pragma warning(pop)
  126. #if ENABLE_NATIVE_CODEGEN && defined(ENABLE_WASM_SIMD)
  127. // auxiliary SIMD values in memory to help JIT'ed code. E.g. used for Int8x16 shuffle.
  128. extern _x86_SIMDValue X86_TEMP_SIMD[];
  129. #endif
  130. typedef _x86_SIMDValue X86SIMDValue;
  131. CompileAssert(sizeof(X86SIMDValue) == 16);
  132. #endif
  133. typedef SIMDValue AsmJsSIMDValue; // alias for asmjs
  134. CompileAssert(sizeof(SIMDValue) == 16);
  135. class ValueType;
  136. namespace Js {
  137. enum class OpCode : ushort;
  138. ///////////////////////////////////////////////////////////////
  139. //Class with static helper methods for manipulating SIMD Data.
  140. ///////////////////////////////////////////////////////////////
  141. class SIMDUtils
  142. {
  143. public:
  144. static uint32 inline SIMDGetShiftAmountMask(uint32 eleSizeInBytes) { return (eleSizeInBytes << 3) - 1; }
  145. ////////////////////////////////////////////
  146. //SIMD Extract Lane / Replace Lane Helpers
  147. ////////////////////////////////////////////
  148. static inline SIMDValue SIMD128InnerReplaceLaneF4(SIMDValue simdVal, const uint32 lane, const float value)
  149. {
  150. Assert(lane < 4);
  151. simdVal.f32[lane] = value;
  152. return simdVal;
  153. };
  154. static inline SIMDValue SIMD128InnerReplaceLaneD2(SIMDValue simdVal, const uint32 lane, const double value)
  155. {
  156. Assert(lane < 2);
  157. simdVal.f64[lane] = value;
  158. return simdVal;
  159. };
  160. static inline SIMDValue SIMD128InnerReplaceLaneI2(SIMDValue simdVal, const uint32 lane, const int64 value)
  161. {
  162. Assert(lane < 2);
  163. simdVal.i64[lane] = value;
  164. return simdVal;
  165. };
  166. static inline SIMDValue SIMD128InnerReplaceLaneI4(SIMDValue simdVal, const uint32 lane, const int32 value)
  167. {
  168. Assert(lane < 4);
  169. simdVal.i32[lane] = value;
  170. return simdVal;
  171. };
  172. static inline SIMDValue SIMD128InnerReplaceLaneI8(SIMDValue simdVal, const uint32 lane, const int16 value)
  173. {
  174. Assert(lane < 8);
  175. simdVal.i16[lane] = value;
  176. return simdVal;
  177. };
  178. static inline SIMDValue SIMD128InnerReplaceLaneI16(SIMDValue simdVal, const uint32 lane, const int8 value)
  179. {
  180. Assert(lane < 16);
  181. simdVal.i8[lane] = value;
  182. return simdVal;
  183. };
  184. static inline int32 SIMD128InnerExtractLaneB4(const SIMDValue src1, const uint32 lane)
  185. {
  186. Assert(lane < 4);
  187. int val = SIMD128InnerExtractLaneI4(src1, lane);
  188. return val ? 1 : 0;
  189. };
  190. static inline int16 SIMD128InnerExtractLaneB8(const SIMDValue src1, const uint32 lane)
  191. {
  192. Assert(lane < 8);
  193. int16 val = SIMD128InnerExtractLaneI8(src1, lane);
  194. return val ? 1 : 0;
  195. };
  196. static inline int8 SIMD128InnerExtractLaneB16(const SIMDValue src1, const uint32 lane)
  197. {
  198. Assert(lane < 16);
  199. int8 val = SIMD128InnerExtractLaneI16(src1, lane);
  200. return val ? 1 : 0;
  201. };
  202. static inline double SIMD128InnerExtractLaneD2(const SIMDValue src1, const uint32 lane) { Assert(lane < 2); return src1.f64[lane]; };
  203. static inline float SIMD128InnerExtractLaneF4(const SIMDValue src1, const uint32 lane) { Assert(lane < 4); return src1.f32[lane]; };
  204. static inline int64 SIMD128InnerExtractLaneI2(const SIMDValue src1, const uint32 lane) { Assert(lane < 2); return src1.i64[lane]; };
  205. static inline int32 SIMD128InnerExtractLaneI4(const SIMDValue src1, const uint32 lane) { Assert(lane < 4); return src1.i32[lane]; };
  206. static inline int16 SIMD128InnerExtractLaneI8(const SIMDValue src1, const uint32 lane) { Assert(lane < 8); return src1.i16[lane]; };
  207. static inline int8 SIMD128InnerExtractLaneI16(const SIMDValue src1, const uint32 lane) { Assert(lane < 16); return src1.i8[lane]; };
  208. static inline SIMDValue SIMD128BitSelect(const SIMDValue src1, const SIMDValue src2, const SIMDValue mask)
  209. {
  210. SIMDValue res{ 0 };
  211. res.i32[0] = (src1.i32[0] & mask.i32[0]) | (src2.i32[0] & ~mask.i32[0]);
  212. res.i32[1] = (src1.i32[1] & mask.i32[1]) | (src2.i32[1] & ~mask.i32[1]);
  213. res.i32[2] = (src1.i32[2] & mask.i32[2]) | (src2.i32[2] & ~mask.i32[2]);
  214. res.i32[3] = (src1.i32[3] & mask.i32[3]) | (src2.i32[3] & ~mask.i32[3]);
  215. return res;
  216. }
  217. ////////////////////////////////////////////
  218. // SIMD Shuffle Swizzle helpers
  219. ////////////////////////////////////////////
  220. static SIMDValue SIMD128InnerShuffle(const SIMDValue src1, const SIMDValue src2, uint32 laneCount, const uint32* lanes = nullptr);
  221. ///////////////////////////////////////////
  222. // SIMD Type conversion
  223. ///////////////////////////////////////////
  224. static SIMDValue FromSimdBits(const SIMDValue value);
  225. ///////////////////////////////////////////
  226. // SIMD Data load/store
  227. ///////////////////////////////////////////
  228. static SIMDValue SIMDLdData(const SIMDValue *data, uint8 dataWidth);
  229. static void SIMDStData(SIMDValue *data, const SIMDValue simdValue, uint8 dataWidth);
  230. template <typename T>
  231. static SIMDValue CanonicalizeToBools(SIMDValue val)
  232. {
  233. #ifdef ENABLE_WASM_SIMD
  234. CompileAssert(sizeof(T) <= sizeof(SIMDValue));
  235. CompileAssert(sizeof(SIMDValue) % sizeof(T) == 0);
  236. T* cursor = (T*)val.i8;
  237. const uint maxBytes = 16;
  238. uint size = maxBytes / sizeof(T);
  239. for (uint i = 0; i < size; i++)
  240. {
  241. cursor[i] = cursor[i] ? (T) -1 : 0;
  242. }
  243. return val;
  244. #else
  245. return val;
  246. #endif
  247. }
  248. static uint32 SimdOpcodeAsIndex(Js::OpCode op);
  249. };
  250. }