#include namespace traph { // definition // private void Tensor::auto_strides() { idx_type dim_num = _dimensions.size(); _strides.resize(dim_num); idx_type stride = 1; for (idx_type i = dim_num - 1; i >= 0; --i) { _strides[i] = stride; stride *= _dimensions[i]; } } void Tensor::reduce_impl(f64& result, idx_type dim, idx_type idx, std::function f) const { idx_type dim_size = _dimensions.size(); idx_type step_len = _strides[dim]; idx_type step_num = _dimensions[dim]; for(idx_type i = 0; i < step_num; ++i) { if(dim == dim_size - 1) result = f(result, _rep->data[idx]); else reduce_impl(result, dim + 1, idx, f); idx += step_len; } } f64 Tensor::reduce_dim_kernel(idx_type begin, idx_type step_len, idx_type step_num, std::function f) const { f64 result{}; for(idx_type i = 0; i < step_num; ++i) { result = f(result, _rep->data[begin]); begin += step_len; } return result; } void Tensor::reduce_dim_impl(Tensor& result, idx_type dim, idx_type reduce_dim, idx_type this_idx, idx_type result_idx, std::function f) const { idx_type dim_size = _dimensions.size(); if(dim == dim_size) { result._rep->data[result_idx] = reduce_dim_kernel(this_idx, _strides[reduce_dim], _dimensions[reduce_dim], f); return; } if(dim == reduce_dim) { reduce_dim_impl(result, dim + 1, reduce_dim, this_idx,result_idx, f); } else { for(idx_type i = 0; i < _dimensions[dim]; ++i) { reduce_dim_impl(result, dim + 1, reduce_dim, this_idx,result_idx, f); this_idx += _strides[dim]; result_idx += result._strides[dim]; } } } // public Tensor::Tensor() :_rep(new TensorStorage), _dimensions(), _offset(0), _strides() { } Tensor::Tensor(const DimVector& dimensions) :_rep(new TensorStorage), _dimensions(dimensions), _offset(0), _strides() { auto_strides(); _rep->resize_(_dimensions.flat_size()); } Tensor::Tensor(const DimVector& dimensions, const DimVector& strides) :_rep(new TensorStorage), _dimensions(dimensions), _offset(0), _strides(strides) { auto_strides(); _rep->resize_(_dimensions.flat_size()); } Tensor::Tensor(const f64& t) :_rep(new TensorStorage), _dimensions(), _offset(0), _strides() { _dimensions.resize(1); auto_strides(); } void Tensor::add_(TensorInterfacePtr other) { // check tensor other type if(other->dtype() != DataType::DOUBLE) throw std::runtime_error("expected type double tensor"); // check broadcast.shape = this.shape auto shape = broadcast_shape(this->size(), other->size()); if(shape != this->size()) throw std::runtime_error("The size of tensor a must match the size of tensor b"); // ok, get lhs, rhs Tensor * lhs = this; Tensor * rhs = dynamic_cast *>(other.get()); std::function *, Tensor *, idx_type, idx_type,idx_type, idx_type)> add_impl = [&](Tensor * lhs, Tensor * rhs, idx_type lhs_dim, idx_type rhs_dim, idx_type lhs_idx, idx_type rhs_idx) { auto lhs_storage = std::dynamic_pointer_cast>(lhs->storage())->data_ptr(); auto rhs_storage = std::dynamic_pointer_cast>(rhs->storage())->data_ptr(); if (lhs_dim < -(lhs->size().size()) && rhs_dim < -(rhs->size().size())) { lhs_storage[lhs_idx] += rhs_storage[rhs_idx]; return; } idx_type lsh_shape_size = lhs_dim >= -(lhs->size().size())? lhs->size(lhs_dim) : 1; idx_type rsh_shape_size = rhs_dim >= -(rhs->size().size()) ? rhs->size(rhs_dim) : 1; idx_type max_shape_size = std::max(lsh_shape_size, rsh_shape_size); for (idx_type i = 0; i < max_shape_size; ++i) { add_impl(lhs, rhs, lhs_dim - 1, rhs_dim - 1, lhs_idx, rhs_idx); if(lsh_shape_size > 1) lhs_idx += lhs->stride(lhs_dim); if (rsh_shape_size > 1) rhs_idx += rhs->stride(rhs_dim); } }; add_impl(lhs, rhs, -1, -1, lhs->offset(), rhs->offset()); } void Tensor::apply_(std::function f) { // sort stride for cache optimization DimVector cloned_stride(_strides); DimVector sorted_stride(_strides.size()); for(int i = 0; i<_strides.size(); ++i) sorted_stride[i] = i; for (int i = 0; i < cloned_stride.size() - 1; i++) for (int j = 0; j < cloned_stride.size() - 1 - i; j++) if (cloned_stride[j] < cloned_stride[j + 1]) { std::swap(cloned_stride[j], cloned_stride[j+1]); std::swap(sorted_stride[j], sorted_stride[j+1]); } std::function)> apply_impl = [&](idx_type dim_idx, idx_type idx, std::function f){ idx_type dim = sorted_stride[dim_idx]; idx_type dim_size = _dimensions.size(); idx_type step_len = _strides[dim]; idx_type step_num = _dimensions[dim]; for(idx_type i = 0; i < step_num; ++i) { if(dim_idx == dim_size - 1) _rep->data[idx] = f(_rep->data[idx]); else apply_impl(dim_idx + 1, idx, f); idx += step_len; } }; if(_dimensions.size() > 0) apply_impl(0, _offset, f); } TensorInterfacePtr Tensor::clone() const { std::shared_ptr> cloned_tensor(new Tensor); cloned_tensor->_rep = std::dynamic_pointer_cast>(_rep->clone()); cloned_tensor->_dimensions = _dimensions; cloned_tensor->_offset = _offset; cloned_tensor->_strides = _strides; return cloned_tensor; } void Tensor::cos_() { apply_([](f64 a)->f64 {return std::cos(a); }); } std::shared_ptr> Tensor::create_grad() { return std::shared_ptr>(new Tensor(_dimensions)); } f64* Tensor::data_ptr() { return _rep->data_ptr(); } const f64* Tensor::data_ptr() const { return _rep->data_ptr(); } device_id Tensor::device() { return 0; } DataType Tensor::dtype() const { return DataType::DOUBLE; } bool Tensor::equal(std::shared_ptr other) const { if(other->platform() != this->platform()) throw std::runtime_error("equal: Two tensors must be the same platform"); if(other->dtype() != this->dtype()) return false; if(other->size() != this->size()) return false; std::shared_ptr> other_ptr = std::dynamic_pointer_cast>(other); std::function equal_impl = [&](idx_type dim, f64* lhs_idx, f64* rhs_idx){ idx_type dim_size = _dimensions.size(); for(idx_type i = 0; i < _dimensions[dim]; ++i) { if(dim == dim - 1) { if(*lhs_idx != *rhs_idx) return false; } else { if(!equal_impl(dim + 1, lhs_idx, rhs_idx)) return false; } lhs_idx += _strides[dim]; rhs_idx += other_ptr->stride(dim); } return true; }; return equal_impl(0, _rep->data_ptr() + _offset, other_ptr->data_ptr() + other_ptr->offset()); } std::shared_ptr Tensor::inverse() const { return std::dynamic_pointer_cast(inverse_impl(*this)); } void Tensor::fill_(f64 value) { apply_([&value](f64 a)->f64 {return value; }); } f64 Tensor::item() const { if(_dimensions.flat_size() == 1) { return _rep->data[_offset]; } else { throw std::runtime_error("item: only one element tensors can be converted to scalars"); } } std::shared_ptr Tensor::matmul(std::shared_ptr mat) const { auto right_matrix = std::dynamic_pointer_cast>(mat); return matmul_impl(*this, *right_matrix); } TensorInterfacePtr Tensor::mean() const { DimVector d(1); d[0] = 1; TensorPtr result(new Tensor(d)); auto flat_size = _dimensions.flat_size(); result->_rep->data[0] = reduce([](f64 a, f64 b)->f64 {return a + b; }); result->_rep->data[0] /= flat_size; return std::dynamic_pointer_cast(result); } void Tensor::mul_(f64 value) { apply_([value](f64 a)->f64 {return a*value; }); } void Tensor::mul_(std::shared_ptr other) { // check tensor other type if(other->dtype() != DataType::DOUBLE) throw std::runtime_error("expected type double tensor"); // check broadcast.shape = this.shape auto shape = broadcast_shape(this->size(), other->size()); if(shape != this->size()) throw std::runtime_error("The size of tensor a must match the size of tensor b"); // ok, get lhs, rhs Tensor * lhs = this; Tensor * rhs = dynamic_cast *>(other.get()); std::function mul_impl = [&](idx_type lhs_dim, idx_type rhs_dim, idx_type lhs_idx, idx_type rhs_idx) { auto lhs_storage = std::dynamic_pointer_cast>(lhs->storage())->data_ptr(); auto rhs_storage = std::dynamic_pointer_cast>(rhs->storage())->data_ptr(); idx_type lsh_shape_size = lhs_dim >= -(lhs->size().size())? lhs->size(lhs_dim) : 1; idx_type rsh_shape_size = rhs_dim >= -(rhs->size().size()) ? rhs->size(rhs_dim) : 1; idx_type max_shape_size = std::max(lsh_shape_size, rsh_shape_size); for (idx_type i = 0; i < max_shape_size; ++i) { if (lhs_dim <= -(lhs->size().size()) && rhs_dim <= -(rhs->size().size())) { lhs_storage[lhs_idx] *= rhs_storage[rhs_idx]; } else { mul_impl(lhs_dim - 1, rhs_dim - 1, lhs_idx, rhs_idx); } if(lsh_shape_size > 1) lhs_idx += lhs->stride(lhs_dim); if (rsh_shape_size > 1) rhs_idx += rhs->stride(rhs_dim); } }; mul_impl(-1, -1, lhs->offset(), rhs->offset()); } idx_type Tensor::ndimension() const { return _dimensions.size(); } void Tensor::neg_() { apply_([](f64 a)->f64 {return -a; }); } idx_type Tensor::offset() const { return _offset; } std::shared_ptr Tensor::permute(const DimVector& dims) const { // check dims if(dims.size() != _strides.size()) throw std::runtime_error("permute dimension must have the same size"); std::vector check_vec(dims.size(), 0); for(int i = 0; i < dims.size();++i) if(dims[i] >= 0 && dims[i] < dims.size()) check_vec[dims[i]] = 1; else throw std::runtime_error("permute dimension must in ndimension range"); for(int i = 0; i < check_vec.size();++i) { if(check_vec[i] != 1) throw std::runtime_error("permute dimension error"); } // permute std::shared_ptr> result(new Tensor); result->_rep = _rep; result->_dimensions = _dimensions; result->_offset = _offset; result->_strides = _strides; for(int i=0; i_dimensions[i] = _dimensions[dims[i]]; result->_strides[i] = _strides[dims[i]]; } return result; } PlatformType Tensor::platform() const { return PlatformType::CPU; } void Tensor::pow_(f32 exp) { apply_([&exp](f64 a)->f64 {return std::pow(a, exp); }); } f64 Tensor::reduce(std::function f) const { f64 result{}; reduce_impl(result, 0, _offset, f); return result; } TensorInterfacePtr Tensor::reduce_dim(idx_type dim, std::function f) const { DimVector reduced_dim = _dimensions; reduced_dim.erase(dim); // check dim? TensorBasePtr result(new Tensor(reduced_dim)); TensorPtr raw_result = std::dynamic_pointer_cast>(result); reduce_dim_impl(*(raw_result.get()), 0, dim, _offset, raw_result->_offset, f); return std::dynamic_pointer_cast(result); } void Tensor::reshape_(const DimVector& dims) { } void Tensor::resize_(const DimVector& dims) { _dimensions = dims; _rep->resize_(dims.flat_size()); auto_strides(); } std::shared_ptr Tensor::select(const SliceVector& slice) const { std::shared_ptr> result(new Tensor); result->_rep = _rep; // dimension DimVector dim; std::fesetround(FE_TONEAREST); for (idx_type i = 0; i < slice.size(); ++i) { auto& each = slice[i]; dim.push_back( std::lrint(std::ceil((each.end.value_or(_dimensions[i]) - each.start.value_or(0)) / (float)each.step.value_or(1))) ); } result->_dimensions = dim; // offset idx_type new_offset = 1; for (idx_type i = 0; i < slice.size(); ++i) { new_offset *= _strides[i] * slice[i].start.value_or(0); } result->_offset = _offset + new_offset; // strides DimVector strides; for (idx_type i = 0; i < slice.size(); ++i) { strides.push_back(_strides[i] * slice[i].step.value_or(1)); } result->_strides = strides; return std::dynamic_pointer_cast(result); } void Tensor::sin_() { apply_([](f64 a)->f64 {return std::sin(a); }); } DimVector Tensor::size() const { return _dimensions;} idx_type Tensor::size(idx_type i) const { auto shape_size = _dimensions.size(); if (i >= 0 && i < _dimensions.size()) return _dimensions[i]; else if (i <= -1 && i >= -_dimensions.size()) return _dimensions[shape_size + i]; else throw std::runtime_error("Dimension out of range"); } std::shared_ptr> Tensor::storage() const { return _rep; } DimVector Tensor::stride() const { return _strides; } idx_type Tensor::stride(idx_type i) const { auto stride_size = _strides.size(); if (i >= 0 && i < _strides.size()) return _strides[i]; else if (i <= -1 && i >= -_strides.size()) return _strides[stride_size + i]; else throw std::runtime_error("Stride out of range"); } void Tensor::sub_(std::shared_ptr other) { Tensor * lhs = this; Tensor * rhs = dynamic_cast *>(other.get()); std::function *, Tensor *, idx_type, idx_type,idx_type, idx_type)> sub_impl = [&](Tensor * lhs, Tensor * rhs, idx_type lhs_dim, idx_type rhs_dim, idx_type lhs_idx, idx_type rhs_idx) { auto lhs_storage = std::dynamic_pointer_cast>(lhs->storage())->data_ptr(); auto rhs_storage = std::dynamic_pointer_cast>(rhs->storage())->data_ptr(); if (lhs_dim < -(lhs->size().size()) && rhs_dim < -(rhs->size().size())) { lhs_storage[lhs_idx] -= rhs_storage[rhs_idx]; return; } idx_type lhs_shape_size = lhs_dim >= -(lhs->size().size())? lhs->size(lhs_dim) : 1; idx_type rhs_shape_size = rhs_dim >= -(rhs->size().size()) ? rhs->size(rhs_dim) : 1; idx_type max_shape_size = std::max(lhs_shape_size, rhs_shape_size); for (idx_type i = 0; i < max_shape_size; ++i) { sub_impl(lhs, rhs, lhs_dim - 1, rhs_dim - 1, lhs_idx, rhs_idx); if(lhs_shape_size > 1) lhs_idx += lhs->stride(lhs_dim); if (rhs_shape_size > 1) rhs_idx += rhs->stride(rhs_dim); } }; sub_impl(lhs, rhs, -1, -1, lhs->offset(), rhs->offset()); } TensorInterfacePtr Tensor::sum() const { DimVector d(1); d[0] = 1; TensorPtr result(new Tensor(d)); result->_rep->data[0] = reduce([](f64 a, f64 b)->f64 {return a + b; }); return std::dynamic_pointer_cast(result); } std::string Tensor::to_string() const { std::function&, idx_type, idx_type)> to_string_impl = [&](const Tensor& t, idx_type dim, idx_type idx)->std::string { std::string result; if (dim == t.size().size()) { result += std::to_string(t.data_ptr()[idx]); return result; } for (idx_type i = 0; i < t.size(dim); ++i) { if (dim != t.size().size() - 1 && i != 0) result += ",\n"; if(dim != t.size().size() - 1) result += "["; result += to_string_impl(t, dim + 1, idx); if (i != t.size(dim) - 1 && dim == t.size().size() - 1) result += ","; if (dim != t.size().size() - 1) result += "]"; idx += t.stride(dim); } return result; }; std::string result; result += "[" + to_string_impl(*this, 0, offset()) + "]"; return result; } void Tensor::transpose_(idx_type dim0, idx_type dim1) { if(dim0 != dim1 && _dimensions.in_range(dim0) && _dimensions.in_range(dim1)) { std::swap(_dimensions[dim0], _dimensions[dim1]); std::swap(_strides[dim0], _strides[dim1]); } } std::shared_ptr Tensor::transpose(idx_type dim0, idx_type dim1) { std::shared_ptr> result(new Tensor); result->_rep = _rep; result->_dimensions = _dimensions; result->_offset = _offset; result->_strides = _strides; result->transpose_(dim0, dim1); return result; } }