float_tensor.cpp 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469
  1. #include <traph/tensor/float_tensor.h>
  2. namespace traph
  3. {
  4. // definition
  5. // private
  6. void Tensor<f32>::auto_strides()
  7. {
  8. idx_type dim_num = _dimensions.size();
  9. _strides.resize(dim_num);
  10. idx_type stride = 1;
  11. if(_order == layout_type::column_major)
  12. {
  13. for (idx_type i = dim_num - 1; i >= 0; --i)
  14. {
  15. _strides[i] = stride;
  16. stride *= _dimensions[i];
  17. }
  18. }
  19. else
  20. {
  21. for (idx_type i = 0; i < dim_num; ++i)
  22. {
  23. _strides[i] = stride;
  24. stride *= _dimensions[i];
  25. }
  26. }
  27. }
  28. void Tensor<f32>::apply_impl(idx_type dim, idx_type idx, std::function<f32(f32)> f)
  29. {
  30. idx_type dim_size = _dimensions.size();
  31. idx_type step_len = _strides[dim];
  32. idx_type step_num = _dimensions[dim];
  33. for(idx_type i = 0; i < step_num; ++i)
  34. {
  35. if(dim == dim_size - 1)
  36. _rep->data[idx] = f(_rep->data[idx]);
  37. else
  38. apply_impl(dim + 1, idx, f);
  39. idx += step_len;
  40. }
  41. }
  42. void Tensor<f32>::reduce_impl(f32& result, idx_type dim, idx_type idx, std::function<f32(f32,f32)> f) const
  43. {
  44. idx_type dim_size = _dimensions.size();
  45. idx_type step_len = _strides[dim];
  46. idx_type step_num = _dimensions[dim];
  47. for(idx_type i = 0; i < step_num; ++i)
  48. {
  49. if(dim == dim_size - 1)
  50. result = f(result, _rep->data[idx]);
  51. else
  52. reduce_impl(result, dim + 1, idx, f);
  53. idx += step_len;
  54. }
  55. }
  56. f32 Tensor<f32>::reduce_dim_kernel(idx_type begin, idx_type step_len, idx_type step_num, std::function<f32(f32,f32)> f) const
  57. {
  58. f32 result{};
  59. for(idx_type i = 0; i < step_num; ++i)
  60. {
  61. result = f(result, _rep->data[begin]);
  62. begin += step_len;
  63. }
  64. return result;
  65. }
  66. void Tensor<f32>::reduce_dim_impl(Tensor<f32>& result, idx_type dim, idx_type reduce_dim,
  67. idx_type this_idx, idx_type result_idx,
  68. std::function<f32(f32,f32)> f) const
  69. {
  70. idx_type dim_size = _dimensions.size();
  71. if(dim == dim_size)
  72. {
  73. result._rep->data[result_idx] =
  74. reduce_dim_kernel(this_idx, _strides[reduce_dim], _dimensions[reduce_dim], f);
  75. return;
  76. }
  77. if(dim == reduce_dim)
  78. {
  79. reduce_dim_impl(result, dim + 1, reduce_dim, this_idx,result_idx, f);
  80. }
  81. else
  82. {
  83. for(idx_type i = 0; i < _dimensions[dim]; ++i)
  84. {
  85. reduce_dim_impl(result, dim + 1, reduce_dim, this_idx,result_idx, f);
  86. this_idx += _strides[dim];
  87. result_idx += result._strides[dim];
  88. }
  89. }
  90. }
  91. // public
  92. Tensor<f32>::Tensor()
  93. :_rep(new TensorStorage<f32>),
  94. _dimensions(), _offset(0), _strides(), _order(layout_type::column_major)
  95. {
  96. }
  97. Tensor<f32>::Tensor(const DimVector& dimensions)
  98. :_rep(new TensorStorage<f32>),
  99. _dimensions(dimensions), _offset(0), _strides(), _order(layout_type::column_major)
  100. {
  101. auto_strides();
  102. _rep->resize_(_dimensions.flat_size());
  103. }
  104. Tensor<f32>::Tensor(const DimVector& dimensions, layout_type order)
  105. :_rep(new TensorStorage<f32>),
  106. _dimensions(dimensions), _offset(0), _strides(), _order(order)
  107. {
  108. auto_strides();
  109. _rep->resize_(_dimensions.flat_size());
  110. }
  111. Tensor<f32>::Tensor(const DimVector& dimensions, const DimVector& strides)
  112. :_rep(new TensorStorage<f32>),
  113. _dimensions(dimensions), _offset(0), _strides(strides), _order(layout_type::column_major)
  114. {
  115. auto_strides();
  116. _rep->resize_(_dimensions.flat_size());
  117. }
  118. Tensor<f32>::Tensor(const DimVector& dimensions, const DimVector& strides, layout_type order)
  119. :_rep(new TensorStorage<f32>),
  120. _dimensions(dimensions), _offset(0), _strides(strides), _order(order)
  121. {
  122. auto_strides();
  123. _rep->resize_(_dimensions.flat_size());
  124. }
  125. Tensor<f32>::Tensor(const f32& t)
  126. :_rep(new TensorStorage<f32>),
  127. _dimensions(), _offset(0), _strides()
  128. {
  129. _dimensions.resize(1);
  130. auto_strides();
  131. }
  132. void Tensor<f32>::add_(TensorInterfacePtr other)
  133. {
  134. // check tensor other type
  135. // check broadcast.shape = this.shape
  136. // ok, get lhs, rhs
  137. Tensor<f32> * lhs = this;
  138. Tensor<f32> * rhs = dynamic_cast<Tensor<f32> *>(other.get());
  139. std::function<void(Tensor<f32> *, Tensor<f32> *, idx_type, idx_type,idx_type, idx_type)> add_impl =
  140. [&](Tensor<f32> * lhs, Tensor<f32> * rhs, idx_type lhs_dim, idx_type rhs_dim, idx_type lhs_idx, idx_type rhs_idx) {
  141. auto lhs_storage = std::dynamic_pointer_cast<TensorStorage<f32>>(lhs->storage())->data_ptr();
  142. auto rhs_storage = std::dynamic_pointer_cast<TensorStorage<f32>>(rhs->storage())->data_ptr();
  143. if (lhs_dim < -(lhs->size().size()) && rhs_dim < -(rhs->size().size()))
  144. {
  145. lhs_storage[lhs_idx] += rhs_storage[rhs_idx];
  146. return;
  147. }
  148. idx_type lsh_shape_size = lhs_dim >= -(lhs->size().size())? lhs->size(lhs_dim) : 1;
  149. idx_type rsh_shape_size = rhs_dim >= -(rhs->size().size()) ? rhs->size(rhs_dim) : 1;
  150. idx_type max_shape_size = std::max(lsh_shape_size, rsh_shape_size);
  151. for (idx_type i = 0; i < max_shape_size; ++i)
  152. {
  153. add_impl(lhs, rhs, lhs_dim - 1, rhs_dim - 1, lhs_idx, rhs_idx);
  154. if(lsh_shape_size > 1)
  155. lhs_idx += lhs->stride(lhs_dim);
  156. if (rsh_shape_size > 1)
  157. rhs_idx += rhs->stride(rhs_dim);
  158. }
  159. };
  160. add_impl(lhs, rhs, -1, -1, lhs->offset(), rhs->offset());
  161. }
  162. void Tensor<f32>::apply_(std::function<f32(f32)> f)
  163. {
  164. if(_dimensions.size() > 0)
  165. apply_impl(0, _offset, f);
  166. }
  167. TensorInterfacePtr Tensor<f32>::clone() const
  168. {
  169. std::shared_ptr<Tensor<f32>> cloned_tensor(new Tensor<f32>);
  170. cloned_tensor->_rep = std::dynamic_pointer_cast<TensorStorage<f32>>(_rep->clone());
  171. cloned_tensor->_dimensions = _dimensions;
  172. cloned_tensor->_offset = _offset;
  173. cloned_tensor->_strides = _strides;
  174. cloned_tensor->_order = _order;
  175. return cloned_tensor;
  176. }
  177. void Tensor<f32>::cos_()
  178. {
  179. apply_([](f32 a)->f32 {return std::cos(a); });
  180. }
  181. std::shared_ptr<TensorBase<f32>> Tensor<f32>::create_grad()
  182. {
  183. return std::shared_ptr<TensorBase<f32>>(new Tensor<f32>(_dimensions));
  184. }
  185. f32* Tensor<f32>::data_ptr()
  186. {
  187. return _rep->data_ptr();
  188. }
  189. const f32* Tensor<f32>::data_ptr() const
  190. {
  191. return _rep->data_ptr();
  192. }
  193. device_id Tensor<f32>::device() { return 0; }
  194. std::shared_ptr<TensorInterface> Tensor<f32>::inverse() const
  195. {
  196. return std::dynamic_pointer_cast<TensorInterface>(inverse_impl(*this));
  197. }
  198. void Tensor<f32>::fill_(f32 value)
  199. {
  200. apply_([&value](f32 a)->f32 {return value; });
  201. }
  202. f32 Tensor<f32>::item() const
  203. {
  204. if(_dimensions.flat_size() == 1)
  205. {
  206. return _rep->data[_offset];
  207. }
  208. else
  209. {
  210. throw std::runtime_error("item: only one element tensors can be converted to scalars");
  211. }
  212. }
  213. std::shared_ptr<TensorInterface> Tensor<f32>::matmul(std::shared_ptr<TensorInterface> mat) const
  214. {
  215. auto right_matrix = std::dynamic_pointer_cast<Tensor<f32>>(mat);
  216. return matmul_impl(*this, *right_matrix);
  217. }
  218. void Tensor<f32>::neg_()
  219. {
  220. apply_([](f32 a)->f32 {return -a; });
  221. }
  222. idx_type Tensor<f32>::offset() const { return _offset; }
  223. layout_type Tensor<f32>::order() const { return _order; }
  224. platform_type Tensor<f32>::platform() { return platform_type::none; }
  225. void Tensor<f32>::pow_(f32 exp)
  226. {
  227. apply_([&exp](f32 a)->f32 {return std::pow(a, exp); });
  228. }
  229. f32 Tensor<f32>::reduce_(std::function<f32(f32, f32)> f) const
  230. {
  231. f32 result{};
  232. reduce_impl(result, 0, _offset, f);
  233. return result;
  234. }
  235. TensorInterfacePtr Tensor<f32>::reduce_dim(idx_type dim, std::function<f32(f32, f32)> f) const
  236. {
  237. DimVector reduced_dim = _dimensions;
  238. reduced_dim.erase(dim); // check dim?
  239. TensorBasePtr<f32> result(new Tensor<f32>(reduced_dim));
  240. TensorPtr<f32> raw_result = std::dynamic_pointer_cast<Tensor<f32>>(result);
  241. reduce_dim_impl(*(raw_result.get()), 0, dim, _offset, raw_result->_offset, f);
  242. return std::dynamic_pointer_cast<TensorInterface>(result);
  243. }
  244. void Tensor<f32>::reshape_(const DimVector& dims)
  245. {
  246. }
  247. void Tensor<f32>::resize_(const DimVector& dims)
  248. {
  249. _dimensions = dims;
  250. _rep->resize_(dims.flat_size());
  251. auto_strides();
  252. }
  253. std::shared_ptr<TensorInterface> Tensor<f32>::select(const SliceVector& slice) const
  254. {
  255. std::shared_ptr<Tensor<f32>> result(new Tensor<f32>);
  256. result->_rep = _rep;
  257. // dimension
  258. DimVector dim;
  259. std::fesetround(FE_TONEAREST);
  260. for (idx_type i = 0; i < slice.size(); ++i)
  261. {
  262. auto& each = slice[i];
  263. dim.push_back(
  264. std::lrint(std::ceil((each.end.value_or(_dimensions[i]) - each.start.value_or(0)) / (float)each.step.value_or(1)))
  265. );
  266. }
  267. result->_dimensions = dim;
  268. // offset
  269. idx_type new_offset = 1;
  270. for (idx_type i = 0; i < slice.size(); ++i)
  271. {
  272. new_offset *= _strides[i] * slice[i].start.value_or(0);
  273. }
  274. result->_offset = _offset + new_offset;
  275. // strides
  276. DimVector strides;
  277. for (idx_type i = 0; i < slice.size(); ++i)
  278. {
  279. strides.push_back(_strides[i] * slice[i].step.value_or(1));
  280. }
  281. result->_strides = strides;
  282. result->_order = _order;
  283. return std::dynamic_pointer_cast<TensorInterface>(result);
  284. }
  285. void Tensor<f32>::sin_()
  286. {
  287. apply_([](f32 a)->f32 {return std::sin(a); });
  288. }
  289. DimVector Tensor<f32>::size() const { return _dimensions;}
  290. idx_type Tensor<f32>::size(idx_type i) const
  291. {
  292. auto shape_size = _dimensions.size();
  293. if (i >= 0 && i < _dimensions.size())
  294. return _dimensions[i];
  295. else if (i <= -1 && i >= -_dimensions.size())
  296. return _dimensions[shape_size + i];
  297. else
  298. throw std::runtime_error("Dimension out of range");
  299. }
  300. std::shared_ptr<StorageBase<f32>> Tensor<f32>::storage() const { return _rep; }
  301. DimVector Tensor<f32>::stride() const { return _strides; }
  302. idx_type Tensor<f32>::stride(idx_type i) const
  303. {
  304. auto stride_size = _strides.size();
  305. if (i >= 0 && i < _strides.size())
  306. return _strides[i];
  307. else if (i <= -1 && i >= -_strides.size())
  308. return _strides[stride_size + i];
  309. else
  310. throw std::runtime_error("Stride out of range");
  311. }
  312. void Tensor<f32>::sub_(std::shared_ptr<TensorInterface> other)
  313. {
  314. Tensor<f32> * lhs = this;
  315. Tensor<f32> * rhs = dynamic_cast<Tensor<f32> *>(other.get());
  316. std::function<void(Tensor<f32> *, Tensor<f32> *, idx_type, idx_type,idx_type, idx_type)> sub_impl =
  317. [&](Tensor<f32> * lhs, Tensor<f32> * rhs, idx_type lhs_dim, idx_type rhs_dim, idx_type lhs_idx, idx_type rhs_idx) {
  318. auto lhs_storage = std::dynamic_pointer_cast<TensorStorage<f32>>(lhs->storage())->data_ptr();
  319. auto rhs_storage = std::dynamic_pointer_cast<TensorStorage<f32>>(rhs->storage())->data_ptr();
  320. if (lhs_dim < -(lhs->size().size()) && rhs_dim < -(rhs->size().size()))
  321. {
  322. lhs_storage[lhs_idx] -= rhs_storage[rhs_idx];
  323. return;
  324. }
  325. idx_type lhs_shape_size = lhs_dim >= -(lhs->size().size())? lhs->size(lhs_dim) : 1;
  326. idx_type rhs_shape_size = rhs_dim >= -(rhs->size().size()) ? rhs->size(rhs_dim) : 1;
  327. idx_type max_shape_size = std::max(lhs_shape_size, rhs_shape_size);
  328. for (idx_type i = 0; i < max_shape_size; ++i)
  329. {
  330. sub_impl(lhs, rhs, lhs_dim - 1, rhs_dim - 1, lhs_idx, rhs_idx);
  331. if(lhs_shape_size > 1)
  332. lhs_idx += lhs->stride(lhs_dim);
  333. if (rhs_shape_size > 1)
  334. rhs_idx += rhs->stride(rhs_dim);
  335. }
  336. };
  337. sub_impl(lhs, rhs, -1, -1, lhs->offset(), rhs->offset());
  338. }
  339. TensorInterfacePtr Tensor<f32>::sum() const
  340. {
  341. DimVector d(1);
  342. d[0] = 1;
  343. TensorPtr<f32> result(new Tensor<f32>(d));
  344. result->_rep->data[0] = reduce_([](f32 a, f32 b)->f32 {return a + b; });
  345. return std::dynamic_pointer_cast<TensorInterface>(result);
  346. }
  347. std::string Tensor<f32>::to_string() const
  348. {
  349. std::function<std::string(const Tensor<f32>&, idx_type, idx_type)> to_string_impl =
  350. [&](const Tensor<f32>& t, idx_type dim, idx_type idx)->std::string {
  351. std::string result;
  352. if (dim == t.size().size())
  353. {
  354. result += std::to_string(t.data_ptr()[idx]);
  355. return result;
  356. }
  357. for (idx_type i = 0; i < t.size(dim); ++i)
  358. {
  359. if (dim != t.size().size() - 1 && i != 0) result += ",\n";
  360. if(dim != t.size().size() - 1) result += "[";
  361. result += to_string_impl(t, dim + 1, idx);
  362. if (i != t.size(dim) - 1 && dim == t.size().size() - 1)
  363. result += ",";
  364. if (dim != t.size().size() - 1) result += "]";
  365. idx += t.stride(dim);
  366. }
  367. return result;
  368. };
  369. std::string result;
  370. result += "[" + to_string_impl(*this, 0, offset()) + "]";
  371. return result;
  372. }
  373. void Tensor<f32>::transpose_(idx_type dim0, idx_type dim1)
  374. {
  375. if(dim0 != dim1 &&
  376. _dimensions.in_range(dim0) &&
  377. _dimensions.in_range(dim1))
  378. {
  379. std::swap(_dimensions[dim0], _dimensions[dim1]);
  380. std::swap(_strides[dim0], _strides[dim1]);
  381. }
  382. }
  383. std::shared_ptr<TensorInterface> Tensor<f32>::transpose(idx_type dim0, idx_type dim1)
  384. {
  385. std::shared_ptr<TensorInterface> result= this->clone();
  386. result->transpose_(dim0, dim1);
  387. return result;
  388. }
  389. }