SimdInt8x16OperationX86X64.cpp 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280
  1. //-------------------------------------------------------------------------------------------------------
  2. // Copyright (C) Microsoft Corporation and contributors. All rights reserved.
  3. // Licensed under the MIT license. See LICENSE.txt file in the project root for full license information.
  4. //-------------------------------------------------------------------------------------------------------
  5. #include "RuntimeLanguagePch.h"
  6. #if _M_IX86 || _M_AMD64
  7. namespace Js
  8. {
  9. // SIMD.Int8x16 operation wrappers that cover intrinsics for x86/x64 system
  10. SIMDValue SIMDInt8x16Operation::OpInt8x16(int8 values[])
  11. {
  12. X86SIMDValue x86Result;
  13. // Sets the 16 signed 8-bit integer values, note in revised order: starts with x15 below
  14. x86Result.m128i_value = _mm_set_epi8(values[15], values[14], values[13], values[12],
  15. values[11], values[10], values[9], values[8],
  16. values[7], values[6], values[5], values[4],
  17. values[3], values[2], values[1], values[0]);
  18. return X86SIMDValue::ToSIMDValue(x86Result);
  19. }
  20. SIMDValue SIMDInt8x16Operation::OpSplat(int8 x)
  21. {
  22. X86SIMDValue x86Result;
  23. // set 16 signed 8-bit integers values to input value x
  24. x86Result.m128i_value = _mm_set1_epi8(x);
  25. return X86SIMDValue::ToSIMDValue(x86Result);
  26. }
  27. //// Unary Ops
  28. SIMDValue SIMDInt8x16Operation::OpNeg(const SIMDValue& value)
  29. {
  30. X86SIMDValue x86Result;
  31. X86SIMDValue SIGNMASK, temp;
  32. X86SIMDValue negativeOnes = { { -1, -1, -1, -1 } };
  33. X86SIMDValue v = X86SIMDValue::ToX86SIMDValue(value);
  34. temp.m128i_value = _mm_andnot_si128(v.m128i_value, negativeOnes.m128i_value); // (~value) & (negative ones)
  35. SIGNMASK.m128i_value = _mm_set1_epi8(0x00000001); // set SIGNMASK to 1
  36. x86Result.m128i_value = _mm_add_epi8(SIGNMASK.m128i_value, temp.m128i_value);// add 16 integers respectively
  37. return X86SIMDValue::ToSIMDValue(x86Result);
  38. }
  39. SIMDValue SIMDInt8x16Operation::OpAdd(const SIMDValue& aValue, const SIMDValue& bValue)
  40. {
  41. X86SIMDValue x86Result;
  42. X86SIMDValue tmpaValue = X86SIMDValue::ToX86SIMDValue(aValue);
  43. X86SIMDValue tmpbValue = X86SIMDValue::ToX86SIMDValue(bValue);
  44. x86Result.m128i_value = _mm_add_epi8(tmpaValue.m128i_value, tmpbValue.m128i_value); // a + b
  45. return X86SIMDValue::ToSIMDValue(x86Result);
  46. }
  47. SIMDValue SIMDInt8x16Operation::OpSub(const SIMDValue& aValue, const SIMDValue& bValue)
  48. {
  49. X86SIMDValue x86Result;
  50. X86SIMDValue tmpaValue = X86SIMDValue::ToX86SIMDValue(aValue);
  51. X86SIMDValue tmpbValue = X86SIMDValue::ToX86SIMDValue(bValue);
  52. x86Result.m128i_value = _mm_sub_epi8(tmpaValue.m128i_value, tmpbValue.m128i_value); // a - b
  53. return X86SIMDValue::ToSIMDValue(x86Result);
  54. }
  55. SIMDValue SIMDInt8x16Operation::OpMul(const SIMDValue& aValue, const SIMDValue& bValue)
  56. {
  57. X86SIMDValue x86Result;
  58. X86SIMDValue x86tmp1;
  59. X86SIMDValue x86tmp2;
  60. X86SIMDValue x86tmp3;
  61. const _x86_SIMDValue X86_LOWBYTE_MASK = { 0x00ff00ff, 0x00ff00ff, 0x00ff00ff, 0x00ff00ff };
  62. X86SIMDValue tmpaValue = X86SIMDValue::ToX86SIMDValue(aValue);
  63. X86SIMDValue tmpbValue = X86SIMDValue::ToX86SIMDValue(bValue);
  64. // (ah* 2^8 + al) * (bh *2^8 + bl) = (ah*bh* 2^8 + al*bh + ah* bl) * 2^8 + al * bl
  65. x86tmp1.m128i_value = _mm_mullo_epi16(tmpaValue.m128i_value, tmpbValue.m128i_value);
  66. x86tmp2.m128i_value = _mm_and_si128(x86tmp1.m128i_value, X86_LOWBYTE_MASK.m128i_value);
  67. tmpaValue.m128i_value = _mm_srli_epi16(tmpaValue.m128i_value, 8);
  68. tmpbValue.m128i_value = _mm_srli_epi16(tmpbValue.m128i_value, 8);
  69. x86tmp3.m128i_value = _mm_mullo_epi16(tmpaValue.m128i_value, tmpbValue.m128i_value);
  70. x86tmp3.m128i_value = _mm_slli_epi16(x86tmp3.m128i_value, 8);
  71. x86Result.m128i_value = _mm_or_si128(x86tmp2.m128i_value, x86tmp3.m128i_value);
  72. return X86SIMDValue::ToSIMDValue(x86Result);
  73. }
  74. SIMDValue SIMDInt8x16Operation::OpAddSaturate(const SIMDValue& aValue, const SIMDValue& bValue)
  75. {
  76. X86SIMDValue x86Result;
  77. X86SIMDValue tmpaValue = X86SIMDValue::ToX86SIMDValue(aValue);
  78. X86SIMDValue tmpbValue = X86SIMDValue::ToX86SIMDValue(bValue);
  79. x86Result.m128i_value = _mm_adds_epi8(tmpaValue.m128i_value, tmpbValue.m128i_value); // a + b
  80. return X86SIMDValue::ToSIMDValue(x86Result);
  81. }
  82. SIMDValue SIMDInt8x16Operation::OpSubSaturate(const SIMDValue& aValue, const SIMDValue& bValue)
  83. {
  84. X86SIMDValue x86Result;
  85. X86SIMDValue tmpaValue = X86SIMDValue::ToX86SIMDValue(aValue);
  86. X86SIMDValue tmpbValue = X86SIMDValue::ToX86SIMDValue(bValue);
  87. x86Result.m128i_value = _mm_subs_epi8(tmpaValue.m128i_value, tmpbValue.m128i_value); // a - b
  88. return X86SIMDValue::ToSIMDValue(x86Result);;
  89. }
  90. SIMDValue SIMDInt8x16Operation::OpMin(const SIMDValue& aValue, const SIMDValue& bValue)
  91. {
  92. //Only available in SSE 4
  93. //x86Result.m128i_value = _mm_min_epi8(tmpaValue.m128i_value, tmpbValue.m128i_value); // min a b
  94. //SSE 2
  95. SIMDValue selector = SIMDInt8x16Operation::OpLessThan(aValue, bValue);
  96. return SIMDInt8x16Operation::OpSelect(selector, aValue, bValue);
  97. }
  98. SIMDValue SIMDInt8x16Operation::OpMax(const SIMDValue& aValue, const SIMDValue& bValue)
  99. {
  100. //Only available in SSE 4
  101. //x86Result.m128i_value = _mm_max_epi8(tmpaValue.m128i_value, tmpbValue.m128i_value); // min a b
  102. //SSE 2
  103. SIMDValue selector = SIMDInt8x16Operation::OpGreaterThan(aValue, bValue);
  104. return SIMDInt8x16Operation::OpSelect(selector, aValue, bValue);
  105. }
  106. SIMDValue SIMDInt8x16Operation::OpLessThan(const SIMDValue& aValue, const SIMDValue& bValue)
  107. {
  108. X86SIMDValue x86Result;
  109. X86SIMDValue tmpaValue = X86SIMDValue::ToX86SIMDValue(aValue);
  110. X86SIMDValue tmpbValue = X86SIMDValue::ToX86SIMDValue(bValue);
  111. x86Result.m128i_value = _mm_cmplt_epi8(tmpaValue.m128i_value, tmpbValue.m128i_value); // compare a < b?
  112. return X86SIMDValue::ToSIMDValue(x86Result);
  113. }
  114. SIMDValue SIMDInt8x16Operation::OpLessThanOrEqual(const SIMDValue& aValue, const SIMDValue& bValue)
  115. {
  116. X86SIMDValue x86Result;
  117. X86SIMDValue tmpaValue = X86SIMDValue::ToX86SIMDValue(aValue);
  118. X86SIMDValue tmpbValue = X86SIMDValue::ToX86SIMDValue(bValue);
  119. if (AutoSystemInfo::Data.SSE4_1Available())
  120. {
  121. x86Result.m128i_value = _mm_max_epi8(tmpaValue.m128i_value, tmpbValue.m128i_value); // max(a,b) == a
  122. x86Result.m128i_value = _mm_cmpeq_epi8(tmpbValue.m128i_value, x86Result.m128i_value); //
  123. }
  124. else
  125. {
  126. X86SIMDValue x86Tmp1, x86Tmp2;
  127. x86Tmp1.m128i_value = _mm_cmplt_epi8(tmpaValue.m128i_value, tmpbValue.m128i_value); // compare a > b?
  128. x86Tmp2.m128i_value = _mm_cmpeq_epi8(tmpaValue.m128i_value, tmpbValue.m128i_value); // compare a == b?
  129. x86Result.m128i_value = _mm_or_si128(x86Tmp1.m128i_value, x86Tmp2.m128i_value);
  130. }
  131. return X86SIMDValue::ToSIMDValue(x86Result);
  132. }
  133. SIMDValue SIMDInt8x16Operation::OpEqual(const SIMDValue& aValue, const SIMDValue& bValue)
  134. {
  135. X86SIMDValue x86Result;
  136. X86SIMDValue tmpaValue = X86SIMDValue::ToX86SIMDValue(aValue);
  137. X86SIMDValue tmpbValue = X86SIMDValue::ToX86SIMDValue(bValue);
  138. x86Result.m128i_value = _mm_cmpeq_epi8(tmpaValue.m128i_value, tmpbValue.m128i_value); // compare a == b?
  139. return X86SIMDValue::ToSIMDValue(x86Result);
  140. }
  141. SIMDValue SIMDInt8x16Operation::OpNotEqual(const SIMDValue& aValue, const SIMDValue& bValue)
  142. {
  143. X86SIMDValue x86Result;
  144. X86SIMDValue tmpaValue = X86SIMDValue::ToX86SIMDValue(aValue);
  145. X86SIMDValue tmpbValue = X86SIMDValue::ToX86SIMDValue(bValue);
  146. x86Result.m128i_value = _mm_cmpeq_epi8(tmpaValue.m128i_value, tmpbValue.m128i_value); // compare a != b?
  147. X86SIMDValue negativeOnes = { { -1, -1, -1, -1 } };
  148. x86Result.m128i_value = _mm_andnot_si128(x86Result.m128i_value, negativeOnes.m128i_value);
  149. return X86SIMDValue::ToSIMDValue(x86Result);
  150. }
  151. SIMDValue SIMDInt8x16Operation::OpGreaterThan(const SIMDValue& aValue, const SIMDValue& bValue)
  152. {
  153. X86SIMDValue x86Result;
  154. X86SIMDValue tmpaValue = X86SIMDValue::ToX86SIMDValue(aValue);
  155. X86SIMDValue tmpbValue = X86SIMDValue::ToX86SIMDValue(bValue);
  156. x86Result.m128i_value = _mm_cmpgt_epi8(tmpaValue.m128i_value, tmpbValue.m128i_value); // compare a > b?
  157. return X86SIMDValue::ToSIMDValue(x86Result);
  158. }
  159. SIMDValue SIMDInt8x16Operation::OpGreaterThanOrEqual(const SIMDValue& aValue, const SIMDValue& bValue)
  160. {
  161. X86SIMDValue x86Result;
  162. X86SIMDValue tmpaValue = X86SIMDValue::ToX86SIMDValue(aValue);
  163. X86SIMDValue tmpbValue = X86SIMDValue::ToX86SIMDValue(bValue);
  164. if (AutoSystemInfo::Data.SSE4_1Available())
  165. {
  166. x86Result.m128i_value = _mm_max_epi8(tmpaValue.m128i_value, tmpbValue.m128i_value); // max(a,b) == b
  167. x86Result.m128i_value = _mm_cmpeq_epi8(tmpaValue.m128i_value, x86Result.m128i_value); //
  168. }
  169. else
  170. {
  171. X86SIMDValue x86Tmp1, x86Tmp2;
  172. x86Tmp1.m128i_value = _mm_cmpgt_epi8(tmpaValue.m128i_value, tmpbValue.m128i_value); // compare a > b?
  173. x86Tmp2.m128i_value = _mm_cmpeq_epi8(tmpaValue.m128i_value, tmpbValue.m128i_value); // compare a == b?
  174. x86Result.m128i_value = _mm_or_si128(x86Tmp1.m128i_value, x86Tmp2.m128i_value);
  175. }
  176. return X86SIMDValue::ToSIMDValue(x86Result);
  177. }
  178. SIMDValue SIMDInt8x16Operation::OpShiftLeftByScalar(const SIMDValue& value, int count)
  179. {
  180. X86SIMDValue x86Result;
  181. X86SIMDValue tmpaValue = X86SIMDValue::ToX86SIMDValue(value);
  182. X86SIMDValue x86tmp1;
  183. count = count & SIMDUtils::SIMDGetShiftAmountMask(1);
  184. x86tmp1.m128i_value = _mm_and_si128(tmpaValue.m128i_value, X86_HIGHBYTES_MASK.m128i_value);
  185. x86tmp1.m128i_value = _mm_slli_epi16(x86tmp1.m128i_value, count);
  186. tmpaValue.m128i_value = _mm_slli_epi16(tmpaValue.m128i_value, count);
  187. tmpaValue.m128i_value = _mm_and_si128(tmpaValue.m128i_value, X86_LOWBYTES_MASK.m128i_value);
  188. x86Result.m128i_value = _mm_or_si128(tmpaValue.m128i_value, x86tmp1.m128i_value);
  189. return X86SIMDValue::ToSIMDValue(x86Result);
  190. }
  191. SIMDValue SIMDInt8x16Operation::OpShiftRightByScalar(const SIMDValue& value, int count)
  192. {
  193. X86SIMDValue x86Result;
  194. X86SIMDValue tmpaValue = X86SIMDValue::ToX86SIMDValue(value);
  195. X86SIMDValue x86tmp1;
  196. count = count & SIMDUtils::SIMDGetShiftAmountMask(1);
  197. x86tmp1.m128i_value = _mm_slli_epi16(tmpaValue.m128i_value, 8);
  198. x86tmp1.m128i_value = _mm_srai_epi16(x86tmp1.m128i_value, count + 8);
  199. x86tmp1.m128i_value = _mm_and_si128(x86tmp1.m128i_value, X86_LOWBYTES_MASK.m128i_value);
  200. tmpaValue.m128i_value = _mm_srai_epi16(tmpaValue.m128i_value, count);
  201. tmpaValue.m128i_value = _mm_and_si128(tmpaValue.m128i_value, X86_HIGHBYTES_MASK.m128i_value);
  202. x86Result.m128i_value = _mm_or_si128(tmpaValue.m128i_value, x86tmp1.m128i_value);
  203. return X86SIMDValue::ToSIMDValue(x86Result);
  204. }
  205. SIMDValue SIMDInt8x16Operation::OpSelect(const SIMDValue& mV, const SIMDValue& tV, const SIMDValue& fV)
  206. {
  207. X86SIMDValue x86Result;
  208. X86SIMDValue maskValue = X86SIMDValue::ToX86SIMDValue(mV);
  209. X86SIMDValue trueValue = X86SIMDValue::ToX86SIMDValue(tV);
  210. X86SIMDValue falseValue = X86SIMDValue::ToX86SIMDValue(fV);
  211. X86SIMDValue tempTrue, tempFalse;
  212. tempTrue.m128i_value = _mm_and_si128(maskValue.m128i_value, trueValue.m128i_value); // mask & T
  213. tempFalse.m128i_value = _mm_andnot_si128(maskValue.m128i_value, falseValue.m128i_value); //!mask & F
  214. x86Result.m128i_value = _mm_or_si128(tempTrue.m128i_value, tempFalse.m128i_value); // tempT | temp F
  215. return X86SIMDValue::ToSIMDValue(x86Result);
  216. }
  217. }
  218. #endif