JasonWang
/
traph
mirrorاز https://github.com/jstzwj/traph.git


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635
							#include <traph/tensor/float_tensor.h>

namespace traph
{
	// definition
    // private
    void Tensor<f32>::auto_strides()
    {
        idx_type dim_num = _dimensions.size();
        _strides.resize(dim_num);
        idx_type stride = 1;
        if(_order == layout_type::column_major)
        {
            for (idx_type i = dim_num - 1; i >= 0; --i)
            {
                _strides[i] = stride;
                stride *= _dimensions[i];
            }
        }
        else
        {
            for (idx_type i = 0; i < dim_num; ++i)
            {
                _strides[i] = stride;
                stride *= _dimensions[i];
            }
        }
    }

    void Tensor<f32>::reduce_impl(f32& result, idx_type dim, idx_type idx, std::function<f32(f32,f32)> f) const
    {
        idx_type dim_size = _dimensions.size();

        idx_type step_len = _strides[dim];
        idx_type step_num = _dimensions[dim];

        for(idx_type i = 0; i < step_num; ++i)
        {
            if(dim == dim_size - 1)
                result = f(result, _rep->data[idx]);
            else
                reduce_impl(result, dim + 1, idx, f);
            idx += step_len;
        }
    }

    f32 Tensor<f32>::reduce_dim_kernel(idx_type begin, idx_type step_len, idx_type step_num, std::function<f32(f32,f32)> f) const
    {
        f32 result{};
        for(idx_type i = 0; i < step_num; ++i)
        {
            result = f(result, _rep->data[begin]);
            begin += step_len;
        }
        return result;
    }

    void Tensor<f32>::reduce_dim_impl(Tensor<f32>& result, idx_type dim, idx_type reduce_dim,
        idx_type this_idx, idx_type result_idx,
        std::function<f32(f32,f32)> f) const
    {
        idx_type dim_size = _dimensions.size();

        if(dim == dim_size)
        {
            result._rep->data[result_idx] = 
                reduce_dim_kernel(this_idx, _strides[reduce_dim], _dimensions[reduce_dim], f);
            return;
        }

        if(dim == reduce_dim)
        {
            reduce_dim_impl(result, dim + 1, reduce_dim, this_idx,result_idx, f);
        }
        else
        {
            for(idx_type i = 0; i < _dimensions[dim]; ++i)
            {
                reduce_dim_impl(result, dim + 1, reduce_dim, this_idx,result_idx, f);
                    
                this_idx += _strides[dim];
                result_idx += result._strides[dim];
            }
        }
    }
    // public
    Tensor<f32>::Tensor()
        :_rep(new TensorStorage<f32>),
        _dimensions(), _offset(0), _strides(), _order(layout_type::column_major)
    {
    }

    Tensor<f32>::Tensor(const DimVector& dimensions)
        :_rep(new TensorStorage<f32>),
        _dimensions(dimensions), _offset(0), _strides(), _order(layout_type::column_major)
    {
        auto_strides();
        
        _rep->resize_(_dimensions.flat_size());
    }

    Tensor<f32>::Tensor(const DimVector& dimensions, layout_type order)
        :_rep(new TensorStorage<f32>),
        _dimensions(dimensions), _offset(0), _strides(), _order(order)
    {
        auto_strides();

        _rep->resize_(_dimensions.flat_size());
    }

    Tensor<f32>::Tensor(const DimVector& dimensions, const DimVector& strides)
        :_rep(new TensorStorage<f32>),
        _dimensions(dimensions), _offset(0), _strides(strides), _order(layout_type::column_major)
    {
        auto_strides();

        _rep->resize_(_dimensions.flat_size());
    }

    Tensor<f32>::Tensor(const DimVector& dimensions, const DimVector& strides, layout_type order)
        :_rep(new TensorStorage<f32>),
        _dimensions(dimensions), _offset(0), _strides(strides), _order(order)
    {
        auto_strides();

        _rep->resize_(_dimensions.flat_size());
    }

    Tensor<f32>::Tensor(const f32& t)
        :_rep(new TensorStorage<f32>),
        _dimensions(), _offset(0), _strides()
    {
        _dimensions.resize(1);
        auto_strides();
    }

    void Tensor<f32>::add_(TensorInterfacePtr other)
    {
		// check tensor other type
        if(other->dtype() != DataType::FLOAT)
            throw std::runtime_error("expected type float tensor");
		// check broadcast.shape = this.shape
        auto shape = broadcast_shape(this->size(), other->size());
        if(shape != this->size())
            throw std::runtime_error("The size of tensor a must match the size of tensor b");
		// ok, get lhs, rhs
		Tensor<f32> * lhs = this;
		Tensor<f32> * rhs = dynamic_cast<Tensor<f32> *>(other.get());
		std::function<void(idx_type, idx_type, idx_type, idx_type)> add_impl =
			[&](idx_type lhs_dim, idx_type rhs_dim, idx_type lhs_idx, idx_type rhs_idx) {

			auto lhs_storage = std::dynamic_pointer_cast<TensorStorage<f32>>(lhs->storage())->data_ptr();
			auto rhs_storage = std::dynamic_pointer_cast<TensorStorage<f32>>(rhs->storage())->data_ptr();

			idx_type lsh_shape_size = lhs_dim >= -(lhs->size().size())? lhs->size(lhs_dim) : 1;
			idx_type rsh_shape_size = rhs_dim >= -(rhs->size().size()) ? rhs->size(rhs_dim) : 1;
			idx_type max_shape_size = std::max(lsh_shape_size, rsh_shape_size);

			for (idx_type i = 0; i < max_shape_size; ++i)
			{
                if (lhs_dim <= -(lhs->size().size()) && rhs_dim <= -(rhs->size().size()))
                {
                    lhs_storage[lhs_idx] += rhs_storage[rhs_idx];
                }
                else
                {
                    add_impl(lhs_dim - 1, rhs_dim - 1, lhs_idx, rhs_idx);
                }

				if(lsh_shape_size > 1)
					lhs_idx += lhs->stride(lhs_dim);
				if (rsh_shape_size > 1)
					rhs_idx += rhs->stride(rhs_dim);
			}
		};

		add_impl(-1, -1, lhs->offset(), rhs->offset());
    }

    void Tensor<f32>::apply_(std::function<f32(f32)> f)
    {
        // sort stride for cache optimization
		DimVector cloned_stride(_strides);
        DimVector sorted_stride(_strides.size());
        for(int i = 0; i<_strides.size(); ++i)
            sorted_stride[i] = i;
        
        for (int i = 0; i < cloned_stride.size() - 1; i++)
            for (int j = 0; j < cloned_stride.size() - 1 - i; j++)
                if (cloned_stride[j] < cloned_stride[j + 1])
                {
                    std::swap(cloned_stride[j], cloned_stride[j+1]);
                    std::swap(sorted_stride[j], sorted_stride[j+1]);
                }
        
        std::function<void(idx_type, idx_type, std::function<f32(f32)>)> apply_impl =
        [&](idx_type dim_idx, idx_type idx, std::function<f32(f32)> f){
            idx_type dim = sorted_stride[dim_idx];
            idx_type dim_size = _dimensions.size();

            idx_type step_len = _strides[dim];
            idx_type step_num = _dimensions[dim];
            
            for(idx_type i = 0; i < step_num; ++i)
            {
                if(dim_idx == dim_size - 1)
                    _rep->data[idx] = f(_rep->data[idx]);
                else
                    apply_impl(dim_idx + 1, idx, f);
                idx += step_len;
            }
        };

        if(_dimensions.size() > 0)
            apply_impl(0, _offset, f);
    }

    TensorInterfacePtr Tensor<f32>::clone() const
    {
        std::shared_ptr<Tensor<f32>> cloned_tensor(new Tensor<f32>);
        cloned_tensor->_rep = std::dynamic_pointer_cast<TensorStorage<f32>>(_rep->clone());
        cloned_tensor->_dimensions = _dimensions;
        cloned_tensor->_offset = _offset;
        cloned_tensor->_strides = _strides;
        cloned_tensor->_order = _order;
        
        return cloned_tensor;
    }

    void Tensor<f32>::cos_()
    {
        apply_([](f32 a)->f32 {return std::cos(a); });
    }

    std::shared_ptr<TensorBase<f32>> Tensor<f32>::create_grad()
    {
        return std::shared_ptr<TensorBase<f32>>(new Tensor<f32>(_dimensions));
    }

	f32* Tensor<f32>::data_ptr()
    {
        return _rep->data_ptr();
    }

    const f32* Tensor<f32>::data_ptr() const
    {
        return _rep->data_ptr();
    }

    device_id Tensor<f32>::device() { return 0; }

    DataType Tensor<f32>::dtype() const
    {
        return DataType::FLOAT;
    }

    bool Tensor<f32>::equal(std::shared_ptr<TensorInterface> other) const
    {
        if(other->platform() != this->platform())
            throw std::runtime_error("equal: Two tensors must be the same platform");
        
        if(other->dtype() != this->dtype())
            return false;

        if(other->size() != this->size())
            return false;

        std::shared_ptr<Tensor<f32>> other_ptr = std::dynamic_pointer_cast<Tensor<f32>>(other);
        
        std::function<bool(idx_type, f32*, f32*)> equal_impl =
        [&](idx_type dim, f32* lhs_idx, f32* rhs_idx){
            idx_type dim_size = _dimensions.size();
            
            for(idx_type i = 0; i < _dimensions[dim]; ++i)
            {
                if(dim == dim - 1)
                {
                    if(*lhs_idx != *rhs_idx) return false;
                }
                else
                {
                    if(!equal_impl(dim + 1, lhs_idx, rhs_idx)) return false;
                }
                lhs_idx += _strides[dim];
                rhs_idx += other_ptr->stride(dim);
            }
            return true;
        };

        return equal_impl(0, _rep->data_ptr() + _offset, other_ptr->data_ptr() + other_ptr->offset());
    }

	std::shared_ptr<TensorInterface> Tensor<f32>::inverse() const
	{
		return std::dynamic_pointer_cast<TensorInterface>(inverse_impl(*this));
	}

    void Tensor<f32>::fill_(f32 value)
    {
        apply_([&value](f32 a)->f32 {return value; });
    }

	f32 Tensor<f32>::item() const
    {
        if(_dimensions.flat_size() == 1)
        {
            return _rep->data[_offset];
        }
        else
        {
            throw std::runtime_error("item: only one element tensors can be converted to scalars");
        }
    }

	std::shared_ptr<TensorInterface> Tensor<f32>::matmul(std::shared_ptr<TensorInterface> mat) const
	{
		auto right_matrix = std::dynamic_pointer_cast<Tensor<f32>>(mat);
		return matmul_impl(*this, *right_matrix);
	}

    TensorInterfacePtr Tensor<f32>::mean() const
    {
        DimVector d(1);
        d[0] = 1;

        TensorPtr<f32> result(new Tensor<f32>(d));
        auto flat_size = _dimensions.flat_size();
        result->_rep->data[0] = reduce([](f32 a, f32 b)->f32 {return a + b; });
        result->_rep->data[0] /= flat_size;
        return std::dynamic_pointer_cast<TensorInterface>(result);
    }

    void Tensor<f32>::mul_(f32 value)
    {
        apply_([value](f32 a)->f32 {return a*value; });
    }

    void Tensor<f32>::mul_(std::shared_ptr<TensorInterface> other)
    {
        // check tensor other type
        if(other->dtype() != DataType::FLOAT)
            throw std::runtime_error("expected type float tensor");
		// check broadcast.shape = this.shape
        auto shape = broadcast_shape(this->size(), other->size());
        if(shape != this->size())
            throw std::runtime_error("The size of tensor a must match the size of tensor b");
		// ok, get lhs, rhs
		Tensor<f32> * lhs = this;
		Tensor<f32> * rhs = dynamic_cast<Tensor<f32> *>(other.get());
		std::function<void(idx_type, idx_type, idx_type, idx_type)> mul_impl =
			[&](idx_type lhs_dim, idx_type rhs_dim, idx_type lhs_idx, idx_type rhs_idx) {

			auto lhs_storage = std::dynamic_pointer_cast<TensorStorage<f32>>(lhs->storage())->data_ptr();
			auto rhs_storage = std::dynamic_pointer_cast<TensorStorage<f32>>(rhs->storage())->data_ptr();

			idx_type lsh_shape_size = lhs_dim >= -(lhs->size().size())? lhs->size(lhs_dim) : 1;
			idx_type rsh_shape_size = rhs_dim >= -(rhs->size().size()) ? rhs->size(rhs_dim) : 1;
			idx_type max_shape_size = std::max(lsh_shape_size, rsh_shape_size);

			for (idx_type i = 0; i < max_shape_size; ++i)
			{
                if (lhs_dim <= -(lhs->size().size()) && rhs_dim <= -(rhs->size().size()))
                {
                    lhs_storage[lhs_idx] *= rhs_storage[rhs_idx];
                }
                else
                {
                    mul_impl(lhs_dim - 1, rhs_dim - 1, lhs_idx, rhs_idx);
                }

				if(lsh_shape_size > 1)
					lhs_idx += lhs->stride(lhs_dim);
				if (rsh_shape_size > 1)
					rhs_idx += rhs->stride(rhs_dim);
			}
		};

		mul_impl(-1, -1, lhs->offset(), rhs->offset());
    }

    idx_type Tensor<f32>::ndimension() const
    {
        return _dimensions.size();
    }

    void Tensor<f32>::neg_()
    {
        apply_([](f32 a)->f32 {return -a; });
    }

    idx_type Tensor<f32>::offset() const { return _offset; }

    layout_type Tensor<f32>::order() const { return _order; }

    std::shared_ptr<TensorInterface> Tensor<f32>::permute(const DimVector& dims) const
    {
        // check dims
        if(dims.size() != _strides.size())
            throw std::runtime_error("permute dimension must have the same size");
        std::vector<int> check_vec(dims.size(), 0);
        for(int i = 0; i < dims.size();++i)
            if(dims[i] >= 0 && dims[i] < dims.size())
                check_vec[dims[i]] = 1;
            else
                throw std::runtime_error("permute dimension must in ndimension range");
        
        for(int i = 0; i < check_vec.size();++i)
        {
            if(check_vec[i] != 1)
                throw std::runtime_error("permute dimension error");
        }
        // permute
        std::shared_ptr<Tensor<f32>> result(new Tensor<f32>);
        result->_rep = _rep;
        result->_dimensions = _dimensions;
        result->_offset = _offset;
        result->_strides = _strides;
        result->_order = _order;

        for(int i=0; i<dims.size(); ++i)
        {
            result->_dimensions[i] = _dimensions[dims[i]];
            result->_strides[i] = _strides[dims[i]];
        }

        return result;
    }

    PlatformType Tensor<f32>::platform() const { return PlatformType::CPU; }

    void Tensor<f32>::pow_(f32 exp)
    {
        apply_([&exp](f32 a)->f32 {return std::pow(a, exp); });
    }

	f32 Tensor<f32>::reduce(std::function<f32(f32, f32)> f) const
    {
		f32 result{};
        reduce_impl(result, 0, _offset, f);
        return result;
    }
    
    TensorInterfacePtr Tensor<f32>::reduce_dim(idx_type dim, std::function<f32(f32, f32)> f) const
    {
        DimVector reduced_dim = _dimensions;
        reduced_dim.erase(dim); // check dim?
        TensorBasePtr<f32> result(new Tensor<f32>(reduced_dim));
        TensorPtr<f32> raw_result = std::dynamic_pointer_cast<Tensor<f32>>(result);
        reduce_dim_impl(*(raw_result.get()), 0, dim, _offset, raw_result->_offset, f);
        return std::dynamic_pointer_cast<TensorInterface>(result);
    }
    
    void Tensor<f32>::reshape_(const DimVector& dims)
    {

    }
    
    void Tensor<f32>::resize_(const DimVector& dims)
    {
        _dimensions = dims;
        _rep->resize_(dims.flat_size());
        auto_strides();
    }

	std::shared_ptr<TensorInterface> Tensor<f32>::select(const SliceVector& slice) const
	{
		std::shared_ptr<Tensor<f32>> result(new Tensor<f32>);
		result->_rep = _rep;

		// dimension
		DimVector dim;
		std::fesetround(FE_TONEAREST);
		for (idx_type i = 0; i < slice.size(); ++i)
		{
			auto& each = slice[i];
			dim.push_back(
				std::lrint(std::ceil((each.end.value_or(_dimensions[i]) - each.start.value_or(0)) / (float)each.step.value_or(1)))
			);
		}
		result->_dimensions = dim;

		// offset
		idx_type new_offset = 1;
		for (idx_type i = 0; i < slice.size(); ++i)
		{
			new_offset *= _strides[i] * slice[i].start.value_or(0);
		}
		result->_offset = _offset + new_offset;

		// strides
		DimVector strides;
		for (idx_type i = 0; i < slice.size(); ++i)
		{
			strides.push_back(_strides[i] * slice[i].step.value_or(1));
		}
		result->_strides = strides;

		result->_order = _order;

		return std::dynamic_pointer_cast<TensorInterface>(result);
	}
    
    void Tensor<f32>::sin_()
    {
        apply_([](f32 a)->f32 {return std::sin(a); });
    }
    
    DimVector Tensor<f32>::size() const { return _dimensions;}
	
	idx_type Tensor<f32>::size(idx_type i) const
	{ 
		auto shape_size = _dimensions.size();
		if (i >= 0 && i < _dimensions.size())
			return _dimensions[i];
		else if (i <= -1 && i >= -_dimensions.size())
			return _dimensions[shape_size + i];
		else
			throw std::runtime_error("Dimension out of range");
	}
    
	std::shared_ptr<StorageBase<f32>>  Tensor<f32>::storage() const { return _rep; }
    
    DimVector Tensor<f32>::stride() const { return _strides; }
	
	idx_type Tensor<f32>::stride(idx_type i) const
	{
		auto stride_size = _strides.size();
		if (i >= 0 && i < _strides.size())
			return _strides[i];
		else if (i <= -1 && i >= -_strides.size())
			return _strides[stride_size + i];
		else
			throw std::runtime_error("Stride out of range");
	}

    void Tensor<f32>::sub_(std::shared_ptr<TensorInterface> other)
    {
        Tensor<f32> * lhs = this;
		Tensor<f32> * rhs = dynamic_cast<Tensor<f32> *>(other.get());
		std::function<void(Tensor<f32> *, Tensor<f32> *, idx_type, idx_type,idx_type, idx_type)> sub_impl =
			[&](Tensor<f32> * lhs, Tensor<f32> * rhs, idx_type lhs_dim, idx_type rhs_dim, idx_type lhs_idx, idx_type rhs_idx) {

			auto lhs_storage = std::dynamic_pointer_cast<TensorStorage<f32>>(lhs->storage())->data_ptr();
			auto rhs_storage = std::dynamic_pointer_cast<TensorStorage<f32>>(rhs->storage())->data_ptr();

			if (lhs_dim < -(lhs->size().size()) && rhs_dim < -(rhs->size().size()))
			{
				lhs_storage[lhs_idx] -= rhs_storage[rhs_idx];
				return;
			}

			idx_type lhs_shape_size = lhs_dim >= -(lhs->size().size())? lhs->size(lhs_dim) : 1;
			idx_type rhs_shape_size = rhs_dim >= -(rhs->size().size()) ? rhs->size(rhs_dim) : 1;
			idx_type max_shape_size = std::max(lhs_shape_size, rhs_shape_size);

			for (idx_type i = 0; i < max_shape_size; ++i)
			{
				sub_impl(lhs, rhs, lhs_dim - 1, rhs_dim - 1, lhs_idx, rhs_idx);

				if(lhs_shape_size > 1)
					lhs_idx += lhs->stride(lhs_dim);
				if (rhs_shape_size > 1)
					rhs_idx += rhs->stride(rhs_dim);
			}
		};

		sub_impl(lhs, rhs, -1, -1, lhs->offset(), rhs->offset());
    }
    
    TensorInterfacePtr Tensor<f32>::sum() const
    {
        DimVector d(1);
        d[0] = 1;

        TensorPtr<f32> result(new Tensor<f32>(d));
        result->_rep->data[0] = reduce([](f32 a, f32 b)->f32 {return a + b; });
        return std::dynamic_pointer_cast<TensorInterface>(result);
    }
    
    std::string Tensor<f32>::to_string() const
    {
        std::function<std::string(const Tensor<f32>&, idx_type, idx_type)> to_string_impl =
			[&](const Tensor<f32>& t, idx_type dim, idx_type idx)->std::string {
            std::string result;
			if (dim == t.size().size())
            {
                result += std::to_string(t.data_ptr()[idx]);
				return result;
            }

			for (idx_type i = 0; i < t.size(dim); ++i)
			{
				if (dim != t.size().size() - 1 && i != 0) result += ",\n";
				if(dim != t.size().size() - 1)	result += "[";
				result += to_string_impl(t, dim + 1, idx);
				if (i != t.size(dim) - 1 && dim == t.size().size() - 1)
					result += ",";
				if (dim != t.size().size() - 1) result += "]";

				idx += t.stride(dim);
			}

			return result;
		};

		std::string result;
		result += "[" + to_string_impl(*this, 0, offset()) + "]";
		return result;
    }

    void Tensor<f32>::transpose_(idx_type dim0, idx_type dim1)
    {
        if(dim0 != dim1 &&
            _dimensions.in_range(dim0) &&
            _dimensions.in_range(dim1))
        {
            std::swap(_dimensions[dim0], _dimensions[dim1]);
            std::swap(_strides[dim0], _strides[dim1]);
        }
    }

    std::shared_ptr<TensorInterface> Tensor<f32>::transpose(idx_type dim0, idx_type dim1)
    {
        std::shared_ptr<Tensor<f32>> result(new Tensor<f32>);
        result->_rep = _rep;
        result->_dimensions = _dimensions;
        result->_offset = _offset;
        result->_strides = _strides;
        result->_order = _order;

        result->transpose_(dim0, dim1);

        return result;
    }
}