double_tensor.cpp 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599
  1. #include <traph/tensor/double_tensor.h>
  2. namespace traph
  3. {
  4. // definition
  5. // private
  6. void Tensor<f64>::auto_strides()
  7. {
  8. idx_type dim_num = _dimensions.size();
  9. _strides.resize(dim_num);
  10. idx_type stride = 1;
  11. for (idx_type i = dim_num - 1; i >= 0; --i)
  12. {
  13. _strides[i] = stride;
  14. stride *= _dimensions[i];
  15. }
  16. }
  17. void Tensor<f64>::reduce_impl(f64& result, idx_type dim, idx_type idx, std::function<f64(f64,f64)> f) const
  18. {
  19. idx_type dim_size = _dimensions.size();
  20. idx_type step_len = _strides[dim];
  21. idx_type step_num = _dimensions[dim];
  22. for(idx_type i = 0; i < step_num; ++i)
  23. {
  24. if(dim == dim_size - 1)
  25. result = f(result, _rep->data[idx]);
  26. else
  27. reduce_impl(result, dim + 1, idx, f);
  28. idx += step_len;
  29. }
  30. }
  31. f64 Tensor<f64>::reduce_dim_kernel(idx_type begin, idx_type step_len, idx_type step_num, std::function<f64(f64,f64)> f) const
  32. {
  33. f64 result{};
  34. for(idx_type i = 0; i < step_num; ++i)
  35. {
  36. result = f(result, _rep->data[begin]);
  37. begin += step_len;
  38. }
  39. return result;
  40. }
  41. void Tensor<f64>::reduce_dim_impl(Tensor<f64>& result, idx_type dim, idx_type reduce_dim,
  42. idx_type this_idx, idx_type result_idx,
  43. std::function<f64(f64,f64)> f) const
  44. {
  45. idx_type dim_size = _dimensions.size();
  46. if(dim == dim_size)
  47. {
  48. result._rep->data[result_idx] =
  49. reduce_dim_kernel(this_idx, _strides[reduce_dim], _dimensions[reduce_dim], f);
  50. return;
  51. }
  52. if(dim == reduce_dim)
  53. {
  54. reduce_dim_impl(result, dim + 1, reduce_dim, this_idx,result_idx, f);
  55. }
  56. else
  57. {
  58. for(idx_type i = 0; i < _dimensions[dim]; ++i)
  59. {
  60. reduce_dim_impl(result, dim + 1, reduce_dim, this_idx,result_idx, f);
  61. this_idx += _strides[dim];
  62. result_idx += result._strides[dim];
  63. }
  64. }
  65. }
  66. // public
  67. Tensor<f64>::Tensor()
  68. :_rep(new TensorStorage<f64>),
  69. _dimensions(), _offset(0), _strides()
  70. {
  71. }
  72. Tensor<f64>::Tensor(const DimVector& dimensions)
  73. :_rep(new TensorStorage<f64>),
  74. _dimensions(dimensions), _offset(0), _strides()
  75. {
  76. auto_strides();
  77. _rep->resize_(_dimensions.flat_size());
  78. }
  79. Tensor<f64>::Tensor(const DimVector& dimensions, const DimVector& strides)
  80. :_rep(new TensorStorage<f64>),
  81. _dimensions(dimensions), _offset(0), _strides(strides)
  82. {
  83. auto_strides();
  84. _rep->resize_(_dimensions.flat_size());
  85. }
  86. Tensor<f64>::Tensor(const f64& t)
  87. :_rep(new TensorStorage<f64>),
  88. _dimensions(), _offset(0), _strides()
  89. {
  90. _dimensions.resize(1);
  91. auto_strides();
  92. }
  93. void Tensor<f64>::add_(TensorInterfacePtr other)
  94. {
  95. // check tensor other type
  96. if(other->dtype() != DataType::DOUBLE)
  97. throw std::runtime_error("expected type double tensor");
  98. // check broadcast.shape = this.shape
  99. auto shape = broadcast_shape(this->size(), other->size());
  100. if(shape != this->size())
  101. throw std::runtime_error("The size of tensor a must match the size of tensor b");
  102. // ok, get lhs, rhs
  103. Tensor<f64> * lhs = this;
  104. Tensor<f64> * rhs = dynamic_cast<Tensor<f64> *>(other.get());
  105. std::function<void(Tensor<f64> *, Tensor<f64> *, idx_type, idx_type,idx_type, idx_type)> add_impl =
  106. [&](Tensor<f64> * lhs, Tensor<f64> * rhs, idx_type lhs_dim, idx_type rhs_dim, idx_type lhs_idx, idx_type rhs_idx) {
  107. auto lhs_storage = std::dynamic_pointer_cast<TensorStorage<f64>>(lhs->storage())->data_ptr();
  108. auto rhs_storage = std::dynamic_pointer_cast<TensorStorage<f64>>(rhs->storage())->data_ptr();
  109. if (lhs_dim < -(lhs->size().size()) && rhs_dim < -(rhs->size().size()))
  110. {
  111. lhs_storage[lhs_idx] += rhs_storage[rhs_idx];
  112. return;
  113. }
  114. idx_type lsh_shape_size = lhs_dim >= -(lhs->size().size())? lhs->size(lhs_dim) : 1;
  115. idx_type rsh_shape_size = rhs_dim >= -(rhs->size().size()) ? rhs->size(rhs_dim) : 1;
  116. idx_type max_shape_size = std::max(lsh_shape_size, rsh_shape_size);
  117. for (idx_type i = 0; i < max_shape_size; ++i)
  118. {
  119. add_impl(lhs, rhs, lhs_dim - 1, rhs_dim - 1, lhs_idx, rhs_idx);
  120. if(lsh_shape_size > 1)
  121. lhs_idx += lhs->stride(lhs_dim);
  122. if (rsh_shape_size > 1)
  123. rhs_idx += rhs->stride(rhs_dim);
  124. }
  125. };
  126. add_impl(lhs, rhs, -1, -1, lhs->offset(), rhs->offset());
  127. }
  128. void Tensor<f64>::apply_(std::function<f64(f64)> f)
  129. {
  130. // sort stride for cache optimization
  131. DimVector cloned_stride(_strides);
  132. DimVector sorted_stride(_strides.size());
  133. for(int i = 0; i<_strides.size(); ++i)
  134. sorted_stride[i] = i;
  135. for (int i = 0; i < cloned_stride.size() - 1; i++)
  136. for (int j = 0; j < cloned_stride.size() - 1 - i; j++)
  137. if (cloned_stride[j] < cloned_stride[j + 1])
  138. {
  139. std::swap(cloned_stride[j], cloned_stride[j+1]);
  140. std::swap(sorted_stride[j], sorted_stride[j+1]);
  141. }
  142. std::function<void(idx_type, idx_type, std::function<f64(f64)>)> apply_impl =
  143. [&](idx_type dim_idx, idx_type idx, std::function<f64(f64)> f){
  144. idx_type dim = sorted_stride[dim_idx];
  145. idx_type dim_size = _dimensions.size();
  146. idx_type step_len = _strides[dim];
  147. idx_type step_num = _dimensions[dim];
  148. for(idx_type i = 0; i < step_num; ++i)
  149. {
  150. if(dim_idx == dim_size - 1)
  151. _rep->data[idx] = f(_rep->data[idx]);
  152. else
  153. apply_impl(dim_idx + 1, idx, f);
  154. idx += step_len;
  155. }
  156. };
  157. if(_dimensions.size() > 0)
  158. apply_impl(0, _offset, f);
  159. }
  160. TensorInterfacePtr Tensor<f64>::clone() const
  161. {
  162. std::shared_ptr<Tensor<f64>> cloned_tensor(new Tensor<f64>);
  163. cloned_tensor->_rep = std::dynamic_pointer_cast<TensorStorage<f64>>(_rep->clone());
  164. cloned_tensor->_dimensions = _dimensions;
  165. cloned_tensor->_offset = _offset;
  166. cloned_tensor->_strides = _strides;
  167. return cloned_tensor;
  168. }
  169. void Tensor<f64>::cos_()
  170. {
  171. apply_([](f64 a)->f64 {return std::cos(a); });
  172. }
  173. std::shared_ptr<TensorBase<f32>> Tensor<f64>::create_grad()
  174. {
  175. return std::shared_ptr<TensorBase<f32>>(new Tensor<f32>(_dimensions));
  176. }
  177. f64* Tensor<f64>::data_ptr()
  178. {
  179. return _rep->data_ptr();
  180. }
  181. const f64* Tensor<f64>::data_ptr() const
  182. {
  183. return _rep->data_ptr();
  184. }
  185. device_id Tensor<f64>::device() { return 0; }
  186. DataType Tensor<f64>::dtype() const
  187. {
  188. return DataType::DOUBLE;
  189. }
  190. bool Tensor<f64>::equal(std::shared_ptr<TensorInterface> other) const
  191. {
  192. if(other->platform() != this->platform())
  193. throw std::runtime_error("equal: Two tensors must be the same platform");
  194. if(other->dtype() != this->dtype())
  195. return false;
  196. if(other->size() != this->size())
  197. return false;
  198. std::shared_ptr<Tensor<f64>> other_ptr = std::dynamic_pointer_cast<Tensor<f64>>(other);
  199. std::function<bool(idx_type, f64*, f64*)> equal_impl =
  200. [&](idx_type dim, f64* lhs_idx, f64* rhs_idx){
  201. idx_type dim_size = _dimensions.size();
  202. for(idx_type i = 0; i < _dimensions[dim]; ++i)
  203. {
  204. if(dim == dim - 1)
  205. {
  206. if(*lhs_idx != *rhs_idx) return false;
  207. }
  208. else
  209. {
  210. if(!equal_impl(dim + 1, lhs_idx, rhs_idx)) return false;
  211. }
  212. lhs_idx += _strides[dim];
  213. rhs_idx += other_ptr->stride(dim);
  214. }
  215. return true;
  216. };
  217. return equal_impl(0, _rep->data_ptr() + _offset, other_ptr->data_ptr() + other_ptr->offset());
  218. }
  219. std::shared_ptr<TensorInterface> Tensor<f64>::inverse() const
  220. {
  221. return std::dynamic_pointer_cast<TensorInterface>(inverse_impl(*this));
  222. }
  223. void Tensor<f64>::fill_(f64 value)
  224. {
  225. apply_([&value](f64 a)->f64 {return value; });
  226. }
  227. f64 Tensor<f64>::item() const
  228. {
  229. if(_dimensions.flat_size() == 1)
  230. {
  231. return _rep->data[_offset];
  232. }
  233. else
  234. {
  235. throw std::runtime_error("item: only one element tensors can be converted to scalars");
  236. }
  237. }
  238. std::shared_ptr<TensorInterface> Tensor<f64>::matmul(std::shared_ptr<TensorInterface> mat) const
  239. {
  240. auto right_matrix = std::dynamic_pointer_cast<Tensor<f64>>(mat);
  241. return matmul_impl(*this, *right_matrix);
  242. }
  243. TensorInterfacePtr Tensor<f64>::mean() const
  244. {
  245. DimVector d(1);
  246. d[0] = 1;
  247. TensorPtr<f64> result(new Tensor<f64>(d));
  248. auto flat_size = _dimensions.flat_size();
  249. result->_rep->data[0] = reduce([](f64 a, f64 b)->f64 {return a + b; });
  250. result->_rep->data[0] /= flat_size;
  251. return std::dynamic_pointer_cast<TensorInterface>(result);
  252. }
  253. void Tensor<f64>::mul_(f64 value)
  254. {
  255. apply_([value](f64 a)->f64 {return a*value; });
  256. }
  257. void Tensor<f64>::mul_(std::shared_ptr<TensorInterface> other)
  258. {
  259. // check tensor other type
  260. if(other->dtype() != DataType::DOUBLE)
  261. throw std::runtime_error("expected type double tensor");
  262. // check broadcast.shape = this.shape
  263. auto shape = broadcast_shape(this->size(), other->size());
  264. if(shape != this->size())
  265. throw std::runtime_error("The size of tensor a must match the size of tensor b");
  266. // ok, get lhs, rhs
  267. Tensor<f64> * lhs = this;
  268. Tensor<f64> * rhs = dynamic_cast<Tensor<f64> *>(other.get());
  269. std::function<void(idx_type, idx_type, idx_type, idx_type)> mul_impl =
  270. [&](idx_type lhs_dim, idx_type rhs_dim, idx_type lhs_idx, idx_type rhs_idx) {
  271. auto lhs_storage = std::dynamic_pointer_cast<TensorStorage<f32>>(lhs->storage())->data_ptr();
  272. auto rhs_storage = std::dynamic_pointer_cast<TensorStorage<f32>>(rhs->storage())->data_ptr();
  273. idx_type lsh_shape_size = lhs_dim >= -(lhs->size().size())? lhs->size(lhs_dim) : 1;
  274. idx_type rsh_shape_size = rhs_dim >= -(rhs->size().size()) ? rhs->size(rhs_dim) : 1;
  275. idx_type max_shape_size = std::max(lsh_shape_size, rsh_shape_size);
  276. for (idx_type i = 0; i < max_shape_size; ++i)
  277. {
  278. if (lhs_dim <= -(lhs->size().size()) && rhs_dim <= -(rhs->size().size()))
  279. {
  280. lhs_storage[lhs_idx] *= rhs_storage[rhs_idx];
  281. }
  282. else
  283. {
  284. mul_impl(lhs_dim - 1, rhs_dim - 1, lhs_idx, rhs_idx);
  285. }
  286. if(lsh_shape_size > 1)
  287. lhs_idx += lhs->stride(lhs_dim);
  288. if (rsh_shape_size > 1)
  289. rhs_idx += rhs->stride(rhs_dim);
  290. }
  291. };
  292. mul_impl(-1, -1, lhs->offset(), rhs->offset());
  293. }
  294. idx_type Tensor<f64>::ndimension() const
  295. {
  296. return _dimensions.size();
  297. }
  298. void Tensor<f64>::neg_()
  299. {
  300. apply_([](f64 a)->f64 {return -a; });
  301. }
  302. idx_type Tensor<f64>::offset() const { return _offset; }
  303. std::shared_ptr<TensorInterface> Tensor<f64>::permute(const DimVector& dims) const
  304. {
  305. // check dims
  306. if(dims.size() != _strides.size())
  307. throw std::runtime_error("permute dimension must have the same size");
  308. std::vector<int> check_vec(dims.size(), 0);
  309. for(int i = 0; i < dims.size();++i)
  310. if(dims[i] >= 0 && dims[i] < dims.size())
  311. check_vec[dims[i]] = 1;
  312. else
  313. throw std::runtime_error("permute dimension must in ndimension range");
  314. for(int i = 0; i < check_vec.size();++i)
  315. {
  316. if(check_vec[i] != 1)
  317. throw std::runtime_error("permute dimension error");
  318. }
  319. // permute
  320. std::shared_ptr<Tensor<f64>> result(new Tensor<f64>);
  321. result->_rep = _rep;
  322. result->_dimensions = _dimensions;
  323. result->_offset = _offset;
  324. result->_strides = _strides;
  325. for(int i=0; i<dims.size(); ++i)
  326. {
  327. result->_dimensions[i] = _dimensions[dims[i]];
  328. result->_strides[i] = _strides[dims[i]];
  329. }
  330. return result;
  331. }
  332. PlatformType Tensor<f64>::platform() const { return PlatformType::CPU; }
  333. void Tensor<f64>::pow_(f32 exp)
  334. {
  335. apply_([&exp](f64 a)->f64 {return std::pow(a, exp); });
  336. }
  337. f64 Tensor<f64>::reduce(std::function<f64(f64, f64)> f) const
  338. {
  339. f64 result{};
  340. reduce_impl(result, 0, _offset, f);
  341. return result;
  342. }
  343. TensorInterfacePtr Tensor<f64>::reduce_dim(idx_type dim, std::function<f64(f64, f64)> f) const
  344. {
  345. DimVector reduced_dim = _dimensions;
  346. reduced_dim.erase(dim); // check dim?
  347. TensorBasePtr<f64> result(new Tensor<f64>(reduced_dim));
  348. TensorPtr<f64> raw_result = std::dynamic_pointer_cast<Tensor<f64>>(result);
  349. reduce_dim_impl(*(raw_result.get()), 0, dim, _offset, raw_result->_offset, f);
  350. return std::dynamic_pointer_cast<TensorInterface>(result);
  351. }
  352. void Tensor<f64>::reshape_(const DimVector& dims)
  353. {
  354. }
  355. void Tensor<f64>::resize_(const DimVector& dims)
  356. {
  357. _dimensions = dims;
  358. _rep->resize_(dims.flat_size());
  359. auto_strides();
  360. }
  361. std::shared_ptr<TensorInterface> Tensor<f64>::select(const SliceVector& slice) const
  362. {
  363. std::shared_ptr<Tensor<f64>> result(new Tensor<f64>);
  364. result->_rep = _rep;
  365. // dimension
  366. DimVector dim;
  367. std::fesetround(FE_TONEAREST);
  368. for (idx_type i = 0; i < slice.size(); ++i)
  369. {
  370. auto& each = slice[i];
  371. dim.push_back(
  372. std::lrint(std::ceil((each.end.value_or(_dimensions[i]) - each.start.value_or(0)) / (float)each.step.value_or(1)))
  373. );
  374. }
  375. result->_dimensions = dim;
  376. // offset
  377. idx_type new_offset = 1;
  378. for (idx_type i = 0; i < slice.size(); ++i)
  379. {
  380. new_offset *= _strides[i] * slice[i].start.value_or(0);
  381. }
  382. result->_offset = _offset + new_offset;
  383. // strides
  384. DimVector strides;
  385. for (idx_type i = 0; i < slice.size(); ++i)
  386. {
  387. strides.push_back(_strides[i] * slice[i].step.value_or(1));
  388. }
  389. result->_strides = strides;
  390. return std::dynamic_pointer_cast<TensorInterface>(result);
  391. }
  392. void Tensor<f64>::sin_()
  393. {
  394. apply_([](f64 a)->f64 {return std::sin(a); });
  395. }
  396. DimVector Tensor<f64>::size() const { return _dimensions;}
  397. idx_type Tensor<f64>::size(idx_type i) const
  398. {
  399. auto shape_size = _dimensions.size();
  400. if (i >= 0 && i < _dimensions.size())
  401. return _dimensions[i];
  402. else if (i <= -1 && i >= -_dimensions.size())
  403. return _dimensions[shape_size + i];
  404. else
  405. throw std::runtime_error("Dimension out of range");
  406. }
  407. std::shared_ptr<StorageBase<f64>> Tensor<f64>::storage() const { return _rep; }
  408. DimVector Tensor<f64>::stride() const { return _strides; }
  409. idx_type Tensor<f64>::stride(idx_type i) const
  410. {
  411. auto stride_size = _strides.size();
  412. if (i >= 0 && i < _strides.size())
  413. return _strides[i];
  414. else if (i <= -1 && i >= -_strides.size())
  415. return _strides[stride_size + i];
  416. else
  417. throw std::runtime_error("Stride out of range");
  418. }
  419. void Tensor<f64>::sub_(std::shared_ptr<TensorInterface> other)
  420. {
  421. Tensor<f64> * lhs = this;
  422. Tensor<f64> * rhs = dynamic_cast<Tensor<f64> *>(other.get());
  423. std::function<void(Tensor<f64> *, Tensor<f64> *, idx_type, idx_type,idx_type, idx_type)> sub_impl =
  424. [&](Tensor<f64> * lhs, Tensor<f64> * rhs, idx_type lhs_dim, idx_type rhs_dim, idx_type lhs_idx, idx_type rhs_idx) {
  425. auto lhs_storage = std::dynamic_pointer_cast<TensorStorage<f64>>(lhs->storage())->data_ptr();
  426. auto rhs_storage = std::dynamic_pointer_cast<TensorStorage<f64>>(rhs->storage())->data_ptr();
  427. if (lhs_dim < -(lhs->size().size()) && rhs_dim < -(rhs->size().size()))
  428. {
  429. lhs_storage[lhs_idx] -= rhs_storage[rhs_idx];
  430. return;
  431. }
  432. idx_type lhs_shape_size = lhs_dim >= -(lhs->size().size())? lhs->size(lhs_dim) : 1;
  433. idx_type rhs_shape_size = rhs_dim >= -(rhs->size().size()) ? rhs->size(rhs_dim) : 1;
  434. idx_type max_shape_size = std::max(lhs_shape_size, rhs_shape_size);
  435. for (idx_type i = 0; i < max_shape_size; ++i)
  436. {
  437. sub_impl(lhs, rhs, lhs_dim - 1, rhs_dim - 1, lhs_idx, rhs_idx);
  438. if(lhs_shape_size > 1)
  439. lhs_idx += lhs->stride(lhs_dim);
  440. if (rhs_shape_size > 1)
  441. rhs_idx += rhs->stride(rhs_dim);
  442. }
  443. };
  444. sub_impl(lhs, rhs, -1, -1, lhs->offset(), rhs->offset());
  445. }
  446. TensorInterfacePtr Tensor<f64>::sum() const
  447. {
  448. DimVector d(1);
  449. d[0] = 1;
  450. TensorPtr<f64> result(new Tensor<f64>(d));
  451. result->_rep->data[0] = reduce([](f64 a, f64 b)->f64 {return a + b; });
  452. return std::dynamic_pointer_cast<TensorInterface>(result);
  453. }
  454. std::string Tensor<f64>::to_string() const
  455. {
  456. std::function<std::string(const Tensor<f64>&, idx_type, idx_type)> to_string_impl =
  457. [&](const Tensor<f64>& t, idx_type dim, idx_type idx)->std::string {
  458. std::string result;
  459. if (dim == t.size().size())
  460. {
  461. result += std::to_string(t.data_ptr()[idx]);
  462. return result;
  463. }
  464. for (idx_type i = 0; i < t.size(dim); ++i)
  465. {
  466. if (dim != t.size().size() - 1 && i != 0) result += ",\n";
  467. if(dim != t.size().size() - 1) result += "[";
  468. result += to_string_impl(t, dim + 1, idx);
  469. if (i != t.size(dim) - 1 && dim == t.size().size() - 1)
  470. result += ",";
  471. if (dim != t.size().size() - 1) result += "]";
  472. idx += t.stride(dim);
  473. }
  474. return result;
  475. };
  476. std::string result;
  477. result += "[" + to_string_impl(*this, 0, offset()) + "]";
  478. return result;
  479. }
  480. void Tensor<f64>::transpose_(idx_type dim0, idx_type dim1)
  481. {
  482. if(dim0 != dim1 &&
  483. _dimensions.in_range(dim0) &&
  484. _dimensions.in_range(dim1))
  485. {
  486. std::swap(_dimensions[dim0], _dimensions[dim1]);
  487. std::swap(_strides[dim0], _strides[dim1]);
  488. }
  489. }
  490. std::shared_ptr<TensorInterface> Tensor<f64>::transpose(idx_type dim0, idx_type dim1)
  491. {
  492. std::shared_ptr<Tensor<f64>> result(new Tensor<f64>);
  493. result->_rep = _rep;
  494. result->_dimensions = _dimensions;
  495. result->_offset = _offset;
  496. result->_strides = _strides;
  497. result->transpose_(dim0, dim1);
  498. return result;
  499. }
  500. }