Bladeren bron

back to tensor template

JasonWang 6 jaren geleden
bovenliggende
commit
ae80d4e205

+ 1 - 0
traph/include/traph/core/type.h

@@ -17,6 +17,7 @@ namespace traph
     using u16 = std::uint16_t;
     using u32 = std::uint32_t;
     using u64 = std::uint64_t;
+    using grad_type = f32;
     using idx_type = i32;
     using size_type = i32;
     using device_id = i32;

+ 0 - 98
traph/include/traph/tensor/byte_tensor.h

@@ -1,98 +0,0 @@
-#ifndef TRAPH_TENSOR_BYTE_TENSOR_H_
-#define TRAPH_TENSOR_BYTE_TENSOR_H_
-
-#include <utility>
-#include <cmath>
-
-
-#include <traph/core/type.h>
-#include <traph/tensor/tensor.h>
-
-namespace traph
-{
-    // ndarray
-    template<>
-    class Tensor<u8>: public TensorBase<u8>
-    {
-    public:
-        using value_type = u8;
-        using self_type = Tensor<u8>;
-        using base_type = TensorBase<u8>;
-        using storage_type = TensorStorage<value_type>;
-
-        using raw_pointer = self_type*;
-        using shared_pointer = std::shared_ptr<self_type>;
-        using reference = self_type&;
-        using const_reference = const self_type&;
-    private:
-        std::shared_ptr<storage_type> _rep;
-        DimVector _dimensions;
-        idx_type _offset;
-		DimVector _strides;
-
-    private:
-        void auto_strides();
-
-        void reduce_impl(value_type& result, idx_type dim, idx_type idx, std::function<value_type(value_type,value_type)> f) const;
-
-        value_type reduce_dim_kernel(idx_type begin, idx_type step_len, idx_type step_num, std::function<value_type(value_type,value_type)> f) const;
-
-        void reduce_dim_impl(reference result, idx_type dim, idx_type reduce_dim,
-            idx_type this_idx, idx_type result_idx,
-            std::function<value_type(value_type,value_type)> f) const;
-    public:
-        Tensor();
-        explicit Tensor(const DimVector& dimensions);
-        explicit Tensor(const DimVector& dimensions, const DimVector& strides);
-        Tensor(const value_type& t);
-
-        Tensor(const Tensor& other) = delete;
-        Tensor(Tensor&& other) = delete;
-        Tensor& operator= (const Tensor& other) = delete;
-        Tensor& operator= (Tensor&& other) = delete;
-
-		virtual void add_(TensorInterfacePtr other) override;
-		virtual void apply_(std::function<u8(u8)> f) override;
-		virtual TensorInterfacePtr clone() const override;
-		virtual void cos_() override;
-		virtual std::shared_ptr<TensorBase<f32>> create_grad() override;
-		virtual u8* data_ptr() override;
-		virtual const u8* data_ptr() const override;
-		virtual device_id device() override;
-        virtual DataType dtype() const override;
-        virtual bool equal(std::shared_ptr<TensorInterface> other) const override;
-		virtual void fill_(u8 value) override;
-		virtual std::shared_ptr<TensorInterface> inverse() const override;
-		virtual u8 item() const override;
-		virtual std::shared_ptr<TensorInterface> matmul(std::shared_ptr<TensorInterface> mat) const override;
-		virtual TensorInterfacePtr mean() const override;
-        virtual void mul_(u8 value) override;
-        virtual void mul_(std::shared_ptr<TensorInterface> other) override;
-        virtual idx_type ndimension() const override;
-        virtual void neg_() override;
-        virtual idx_type offset() const override;
-        virtual std::shared_ptr<TensorInterface> permute(const DimVector& dims) const override;
-		virtual PlatformType platform() const override;
-        virtual void pow_(f32 exp) override;
-		virtual u8 reduce(std::function<u8(u8, u8)> f) const override;
-		virtual TensorInterfacePtr reduce_dim(idx_type dim, std::function<u8(u8, u8)> f) const override;
-		virtual void reshape_(const DimVector& dims) override;
-		virtual void resize_(const DimVector& dims) override;
-		virtual std::shared_ptr<TensorInterface> select(const SliceVector& slice) const override;
-		virtual void sin_() override;
-		virtual DimVector size() const override;
-		virtual idx_type size(idx_type i) const override;
-		virtual std::shared_ptr<StorageBase<u8>> storage() const override;
-		virtual DimVector stride() const override;
-		virtual idx_type stride(idx_type i) const override;
-        virtual void sub_(std::shared_ptr<TensorInterface> other) override;
-		virtual TensorInterfacePtr sum() const override;
-		virtual std::string to_string() const override;
-        virtual void transpose_(idx_type dim0, idx_type dim1) override;
-        virtual std::shared_ptr<TensorInterface> transpose(idx_type dim0, idx_type dim1) override;
-    };
-
-    using ByteTensor = Tensor<u8>;
-}
-
-#endif

+ 0 - 97
traph/include/traph/tensor/char_tensor.h

@@ -1,97 +0,0 @@
-#ifndef TRAPH_TENSOR_CHAR_TENSOR_H_
-#define TRAPH_TENSOR_CHAR_TENSOR_H_
-
-#include <utility>
-#include <cmath>
-
-#include <traph/tensor/tensor.h>
-
-namespace traph
-{
-    // ndarray
-    template<>
-    class Tensor<i8>: public TensorBase<i8>
-    {
-    public:
-        using value_type = i8;
-        using self_type = Tensor<i8>;
-        using base_type = TensorBase<i8>;
-        using storage_type = TensorStorage<value_type>;
-
-        using raw_pointer = self_type*;
-        using shared_pointer = std::shared_ptr<self_type>;
-        using reference = self_type&;
-        using const_reference = const self_type&;
-    private:
-        std::shared_ptr<storage_type> _rep;
-        DimVector _dimensions;
-        idx_type _offset;
-		DimVector _strides;
-
-    private:
-        void auto_strides();
-
-        void reduce_impl(value_type& result, idx_type dim, idx_type idx, std::function<value_type(value_type,value_type)> f) const;
-
-        value_type reduce_dim_kernel(idx_type begin, idx_type step_len, idx_type step_num, std::function<value_type(value_type,value_type)> f) const;
-
-        void reduce_dim_impl(reference result, idx_type dim, idx_type reduce_dim,
-            idx_type this_idx, idx_type result_idx,
-            std::function<value_type(value_type,value_type)> f) const;
-    public:
-        Tensor();
-        explicit Tensor(const DimVector& dimensions);
-        explicit Tensor(const DimVector& dimensions, const DimVector& strides);
-        Tensor(const value_type& t);
-
-        Tensor(const Tensor& other) = delete;
-        Tensor(Tensor&& other) = delete;
-        Tensor& operator= (const Tensor& other) = delete;
-        Tensor& operator= (Tensor&& other) = delete;
-
-		virtual void add_(TensorInterfacePtr other) override;
-		virtual void apply_(std::function<i8(i8)> f) override;
-		virtual TensorInterfacePtr clone() const override;
-		virtual void cos_() override;
-		virtual std::shared_ptr<TensorBase<f32>> create_grad() override;
-		virtual i8* data_ptr() override;
-		virtual const i8* data_ptr() const override;
-		virtual device_id device() override;
-        virtual DataType dtype() const override;
-        virtual bool equal(std::shared_ptr<TensorInterface> other) const override;
-		virtual void fill_(i8 value) override;
-		virtual std::shared_ptr<TensorInterface> inverse() const override;
-		virtual i8 item() const override;
-		virtual std::shared_ptr<TensorInterface> matmul(std::shared_ptr<TensorInterface> mat) const override;
-		virtual TensorInterfacePtr mean() const override;
-        virtual void mul_(i8 value) override;
-        virtual void mul_(std::shared_ptr<TensorInterface> other) override;
-        virtual idx_type ndimension() const override;
-        virtual void neg_() override;
-        virtual idx_type offset() const override;
-        virtual std::shared_ptr<TensorInterface> permute(const DimVector& dims) const override;
-		virtual PlatformType platform() const override;
-        virtual void pow_(f32 exp) override;
-		virtual i8 reduce(std::function<i8(i8, i8)> f) const override;
-		virtual TensorInterfacePtr reduce_dim(idx_type dim, std::function<i8(i8, i8)> f) const override;
-		virtual void reshape_(const DimVector& dims) override;
-		virtual void resize_(const DimVector& dims) override;
-		virtual std::shared_ptr<TensorInterface> select(const SliceVector& slice) const override;
-		virtual void sin_() override;
-		virtual DimVector size() const override;
-		virtual idx_type size(idx_type i) const override;
-		virtual std::shared_ptr<StorageBase<i8>> storage() const override;
-		virtual DimVector stride() const override;
-		virtual idx_type stride(idx_type i) const override;
-        virtual void sub_(std::shared_ptr<TensorInterface> other) override;
-		virtual TensorInterfacePtr sum() const override;
-		virtual std::string to_string() const override;
-        virtual void transpose_(idx_type dim0, idx_type dim1) override;
-        virtual std::shared_ptr<TensorInterface> transpose(idx_type dim0, idx_type dim1) override;
-    };
-
-    using CharTensor = Tensor<i8>;
-
-}
-
-#endif

+ 0 - 97
traph/include/traph/tensor/double_tensor.h

@@ -1,97 +0,0 @@
-#ifndef TRAPH_TENSOR_DOUBLE_TENSOR_H_
-#define TRAPH_TENSOR_DOUBLE_TENSOR_H_
-
-#include <utility>
-#include <cmath>
-
-#include <traph/tensor/tensor.h>
-
-namespace traph
-{
-    // ndarray
-    template<>
-    class Tensor<f64>: public TensorBase<f64>
-    {
-    public:
-        using value_type = f64;
-        using self_type = Tensor<f64>;
-        using base_type = TensorBase<f64>;
-        using storage_type = TensorStorage<value_type>;
-
-        using raw_pointer = self_type*;
-        using shared_pointer = std::shared_ptr<self_type>;
-        using reference = self_type&;
-        using const_reference = const self_type&;
-    private:
-        std::shared_ptr<storage_type> _rep;
-        DimVector _dimensions;
-        idx_type _offset;
-		DimVector _strides;
-
-    private:
-        void auto_strides();
-
-        void reduce_impl(value_type& result, idx_type dim, idx_type idx, std::function<value_type(value_type,value_type)> f) const;
-
-        value_type reduce_dim_kernel(idx_type begin, idx_type step_len, idx_type step_num, std::function<value_type(value_type,value_type)> f) const;
-
-        void reduce_dim_impl(reference result, idx_type dim, idx_type reduce_dim,
-            idx_type this_idx, idx_type result_idx,
-            std::function<value_type(value_type,value_type)> f) const;
-    public:
-        Tensor();
-        explicit Tensor(const DimVector& dimensions);
-        explicit Tensor(const DimVector& dimensions, const DimVector& strides);
-        Tensor(const value_type& t);
-
-        Tensor(const Tensor& other) = delete;
-        Tensor(Tensor&& other) = delete;
-        Tensor& operator= (const Tensor& other) = delete;
-        Tensor& operator= (Tensor&& other) = delete;
-
-		virtual void add_(TensorInterfacePtr other) override;
-		virtual void apply_(std::function<f64(f64)> f) override;
-		virtual TensorInterfacePtr clone() const override;
-		virtual void cos_() override;
-		virtual std::shared_ptr<TensorBase<f32>> create_grad() override;
-		virtual f64* data_ptr() override;
-		virtual const f64* data_ptr() const override;
-		virtual device_id device() override;
-        virtual DataType dtype() const override;
-        virtual bool equal(std::shared_ptr<TensorInterface> other) const override;
-		virtual void fill_(f64 value) override;
-		virtual std::shared_ptr<TensorInterface> inverse() const override;
-		virtual f64 item() const override;
-		virtual std::shared_ptr<TensorInterface> matmul(std::shared_ptr<TensorInterface> mat) const override;
-		virtual TensorInterfacePtr mean() const override;
-        virtual void mul_(f64 value) override;
-        virtual void mul_(std::shared_ptr<TensorInterface> other) override;
-        virtual idx_type ndimension() const override;
-        virtual void neg_() override;
-        virtual idx_type offset() const override;
-        virtual std::shared_ptr<TensorInterface> permute(const DimVector& dims) const override;
-		virtual PlatformType platform() const override;
-        virtual void pow_(f32 exp) override;
-		virtual f64 reduce(std::function<f64(f64, f64)> f) const override;
-		virtual TensorInterfacePtr reduce_dim(idx_type dim, std::function<f64(f64, f64)> f) const override;
-		virtual void reshape_(const DimVector& dims) override;
-		virtual void resize_(const DimVector& dims) override;
-		virtual std::shared_ptr<TensorInterface> select(const SliceVector& slice) const override;
-		virtual void sin_() override;
-		virtual DimVector size() const override;
-		virtual idx_type size(idx_type i) const override;
-		virtual std::shared_ptr<StorageBase<f64>> storage() const override;
-		virtual DimVector stride() const override;
-		virtual idx_type stride(idx_type i) const override;
-        virtual void sub_(std::shared_ptr<TensorInterface> other) override;
-		virtual TensorInterfacePtr sum() const override;
-		virtual std::string to_string() const override;
-        virtual void transpose_(idx_type dim0, idx_type dim1) override;
-        virtual std::shared_ptr<TensorInterface> transpose(idx_type dim0, idx_type dim1) override;
-    };
-
-    using DoubleTensor = Tensor<f64>;
-
-}
-
-#endif

+ 0 - 97
traph/include/traph/tensor/float_tensor.h

@@ -1,97 +0,0 @@
-#ifndef TRAPH_TENSOR_FLOAT_TENSOR_H_
-#define TRAPH_TENSOR_FLOAT_TENSOR_H_
-
-#include <utility>
-#include <cmath>
-#include <memory>
-
-#include <traph/tensor/tensor.h>
-
-namespace traph
-{
-    // ndarray
-    template<>
-    class Tensor<f32>: public TensorBase<f32>
-    {
-    public:
-        using value_type = f32;
-        using self_type = Tensor<f32>;
-        using base_type = TensorBase<f32>;
-        using storage_type = TensorStorage<value_type>;
-
-        using raw_pointer = self_type*;
-        using shared_pointer = std::shared_ptr<self_type>;
-        using reference = self_type&;
-        using const_reference = const self_type&;
-    private:
-        std::shared_ptr<storage_type> _rep;
-        DimVector _dimensions;
-        idx_type _offset;
-		DimVector _strides;
-
-    private:
-        void auto_strides();
-
-        void reduce_impl(value_type& result, idx_type dim, idx_type idx, std::function<value_type(value_type,value_type)> f) const;
-
-        value_type reduce_dim_kernel(idx_type begin, idx_type step_len, idx_type step_num, std::function<value_type(value_type,value_type)> f) const;
-
-        void reduce_dim_impl(reference result, idx_type dim, idx_type reduce_dim,
-            idx_type this_idx, idx_type result_idx,
-            std::function<value_type(value_type,value_type)> f) const;
-    public:
-        Tensor();
-        explicit Tensor(const DimVector& dimensions);
-        explicit Tensor(const DimVector& dimensions, const DimVector& strides);
-        Tensor(const value_type& t);
-
-        Tensor(const Tensor& other) = delete;
-        Tensor(Tensor&& other) = delete;
-        Tensor& operator= (const Tensor& other) = delete;
-        Tensor& operator= (Tensor&& other) = delete;
-
-		virtual void add_(TensorInterfacePtr other) override;
-		virtual void apply_(std::function<f32(f32)> f) override;
-		virtual TensorInterfacePtr clone() const override;
-		virtual void cos_() override;
-		virtual std::shared_ptr<TensorBase<f32>> create_grad() override;
-		virtual f32* data_ptr() override;
-		virtual const f32* data_ptr() const override;
-		virtual device_id device() override;
-        virtual DataType dtype() const override;
-        virtual bool equal(std::shared_ptr<TensorInterface> other) const override;
-		virtual void fill_(f32 value) override;
-		virtual std::shared_ptr<TensorInterface> inverse() const override;
-		virtual f32 item() const override;
-		virtual std::shared_ptr<TensorInterface> matmul(std::shared_ptr<TensorInterface> mat) const override;
-		virtual TensorInterfacePtr mean() const override;
-        virtual void mul_(f32 value) override;
-        virtual void mul_(std::shared_ptr<TensorInterface> other) override;
-        virtual idx_type ndimension() const override;
-        virtual void neg_() override;
-        virtual idx_type offset() const override;
-        virtual std::shared_ptr<TensorInterface> permute(const DimVector& dims) const override;
-		virtual PlatformType platform() const override;
-        virtual void pow_(f32 exp) override;
-		virtual f32 reduce(std::function<f32(f32, f32)> f) const override;
-		virtual TensorInterfacePtr reduce_dim(idx_type dim, std::function<f32(f32, f32)> f) const override;
-		virtual void reshape_(const DimVector& dims) override;
-		virtual void resize_(const DimVector& dims) override;
-		virtual std::shared_ptr<TensorInterface> select(const SliceVector& slice) const override;
-		virtual void sin_() override;
-		virtual DimVector size() const override;
-		virtual idx_type size(idx_type i) const override;
-		virtual std::shared_ptr<StorageBase<f32>> storage() const override;
-		virtual DimVector stride() const override;
-		virtual idx_type stride(idx_type i) const override;
-        virtual void sub_(std::shared_ptr<TensorInterface> other) override;
-		virtual TensorInterfacePtr sum() const override;
-		virtual std::string to_string() const override;
-        virtual void transpose_(idx_type dim0, idx_type dim1) override;
-        virtual std::shared_ptr<TensorInterface> transpose(idx_type dim0, idx_type dim1) override;
-    };
-
-    using FloatTensor = Tensor<f32>;
-}
-
-#endif

+ 0 - 97
traph/include/traph/tensor/int_tensor.h

@@ -1,97 +0,0 @@
-#ifndef TRAPH_TENSOR_INT_TENSOR_H_
-#define TRAPH_TENSOR_INT_TENSOR_H_
-
-#include <utility>
-#include <cmath>
-
-#include <traph/tensor/tensor.h>
-
-namespace traph
-{
-    // ndarray
-    template<>
-    class Tensor<i32>: public TensorBase<i32>
-    {
-    public:
-        using value_type = i32;
-        using self_type = Tensor<i32>;
-        using base_type = TensorBase<i32>;
-        using storage_type = TensorStorage<value_type>;
-
-        using raw_pointer = self_type*;
-        using shared_pointer = std::shared_ptr<self_type>;
-        using reference = self_type&;
-        using const_reference = const self_type&;
-    private:
-        std::shared_ptr<storage_type> _rep;
-        DimVector _dimensions;
-        idx_type _offset;
-		DimVector _strides;
-
-    private:
-        void auto_strides();
-
-        void reduce_impl(value_type& result, idx_type dim, idx_type idx, std::function<value_type(value_type,value_type)> f) const;
-
-        value_type reduce_dim_kernel(idx_type begin, idx_type step_len, idx_type step_num, std::function<value_type(value_type,value_type)> f) const;
-
-        void reduce_dim_impl(reference result, idx_type dim, idx_type reduce_dim,
-            idx_type this_idx, idx_type result_idx,
-            std::function<value_type(value_type,value_type)> f) const;
-    public:
-        Tensor();
-        explicit Tensor(const DimVector& dimensions);
-        explicit Tensor(const DimVector& dimensions, const DimVector& strides);
-        Tensor(const value_type& t);
-
-        Tensor(const Tensor& other) = delete;
-        Tensor(Tensor&& other) = delete;
-        Tensor& operator= (const Tensor& other) = delete;
-        Tensor& operator= (Tensor&& other) = delete;
-
-		virtual void add_(TensorInterfacePtr other) override;
-		virtual void apply_(std::function<i32(i32)> f) override;
-		virtual TensorInterfacePtr clone() const override;
-		virtual void cos_() override;
-		virtual std::shared_ptr<TensorBase<f32>> create_grad() override;
-		virtual i32* data_ptr() override;
-		virtual const i32* data_ptr() const override;
-		virtual device_id device() override;
-        virtual DataType dtype() const override;
-        virtual bool equal(std::shared_ptr<TensorInterface> other) const override;
-		virtual void fill_(i32 value) override;
-		virtual std::shared_ptr<TensorInterface> inverse() const override;
-		virtual i32 item() const override;
-		virtual std::shared_ptr<TensorInterface> matmul(std::shared_ptr<TensorInterface> mat) const override;
-		virtual TensorInterfacePtr mean() const override;
-        virtual void mul_(i32 value) override;
-        virtual void mul_(std::shared_ptr<TensorInterface> other) override;
-        virtual idx_type ndimension() const override;
-        virtual void neg_() override;
-        virtual idx_type offset() const override;
-        virtual std::shared_ptr<TensorInterface> permute(const DimVector& dims) const override;
-		virtual PlatformType platform() const override;
-        virtual void pow_(f32 exp) override;
-		virtual i32 reduce(std::function<i32(i32, i32)> f) const override;
-		virtual TensorInterfacePtr reduce_dim(idx_type dim, std::function<i32(i32, i32)> f) const override;
-		virtual void reshape_(const DimVector& dims) override;
-		virtual void resize_(const DimVector& dims) override;
-		virtual std::shared_ptr<TensorInterface> select(const SliceVector& slice) const override;
-		virtual void sin_() override;
-		virtual DimVector size() const override;
-		virtual idx_type size(idx_type i) const override;
-		virtual std::shared_ptr<StorageBase<i32>> storage() const override;
-		virtual DimVector stride() const override;
-		virtual idx_type stride(idx_type i) const override;
-        virtual void sub_(std::shared_ptr<TensorInterface> other) override;
-		virtual TensorInterfacePtr sum() const override;
-		virtual std::string to_string() const override;
-        virtual void transpose_(idx_type dim0, idx_type dim1) override;
-        virtual std::shared_ptr<TensorInterface> transpose(idx_type dim0, idx_type dim1) override;
-    };
-
-    using IntTensor = Tensor<i32>;
-
-}
-
-#endif

+ 0 - 97
traph/include/traph/tensor/long_tensor.h

@@ -1,97 +0,0 @@
-#ifndef TRAPH_TENSOR_LONG_TENSOR_H_
-#define TRAPH_TENSOR_LONG_TENSOR_H_
-
-#include <utility>
-#include <cmath>
-
-#include <traph/tensor/tensor.h>
-
-namespace traph
-{
-    // ndarray
-    template<>
-    class Tensor<i64>: public TensorBase<i64>
-    {
-    public:
-        using value_type = i64;
-        using self_type = Tensor<i64>;
-        using base_type = TensorBase<i64>;
-        using storage_type = TensorStorage<value_type>;
-
-        using raw_pointer = self_type*;
-        using shared_pointer = std::shared_ptr<self_type>;
-        using reference = self_type&;
-        using const_reference = const self_type&;
-    private:
-        std::shared_ptr<storage_type> _rep;
-        DimVector _dimensions;
-        idx_type _offset;
-		DimVector _strides;
-
-    private:
-        void auto_strides();
-
-        void reduce_impl(value_type& result, idx_type dim, idx_type idx, std::function<value_type(value_type,value_type)> f) const;
-
-        value_type reduce_dim_kernel(idx_type begin, idx_type step_len, idx_type step_num, std::function<value_type(value_type,value_type)> f) const;
-
-        void reduce_dim_impl(reference result, idx_type dim, idx_type reduce_dim,
-            idx_type this_idx, idx_type result_idx,
-            std::function<value_type(value_type,value_type)> f) const;
-    public:
-        Tensor();
-        explicit Tensor(const DimVector& dimensions);
-        explicit Tensor(const DimVector& dimensions, const DimVector& strides);
-        Tensor(const value_type& t);
-
-        Tensor(const Tensor& other) = delete;
-        Tensor(Tensor&& other) = delete;
-        Tensor& operator= (const Tensor& other) = delete;
-        Tensor& operator= (Tensor&& other) = delete;
-
-		virtual void add_(TensorInterfacePtr other) override;
-		virtual void apply_(std::function<i64(i64)> f) override;
-		virtual TensorInterfacePtr clone() const override;
-		virtual void cos_() override;
-		virtual std::shared_ptr<TensorBase<f32>> create_grad() override;
-		virtual i64* data_ptr() override;
-		virtual const i64* data_ptr() const override;
-		virtual device_id device() override;
-        virtual DataType dtype() const override;
-        virtual bool equal(std::shared_ptr<TensorInterface> other) const override;
-		virtual void fill_(i64 value) override;
-		virtual std::shared_ptr<TensorInterface> inverse() const override;
-		virtual i64 item() const override;
-		virtual std::shared_ptr<TensorInterface> matmul(std::shared_ptr<TensorInterface> mat) const override;
-		virtual TensorInterfacePtr mean() const override;
-        virtual void mul_(i64 value) override;
-        virtual void mul_(std::shared_ptr<TensorInterface> other) override;
-        virtual idx_type ndimension() const override;
-        virtual void neg_() override;
-		virtual idx_type offset() const override;
-        virtual std::shared_ptr<TensorInterface> permute(const DimVector& dims) const override;
-		virtual PlatformType platform() const override;
-        virtual void pow_(f32 exp) override;
-		virtual i64 reduce(std::function<i64(i64, i64)> f) const override;
-		virtual TensorInterfacePtr reduce_dim(idx_type dim, std::function<i64(i64, i64)> f) const override;
-		virtual void reshape_(const DimVector& dims) override;
-		virtual void resize_(const DimVector& dims) override;
-		virtual std::shared_ptr<TensorInterface> select(const SliceVector& slice) const override;
-		virtual void sin_() override;
-		virtual DimVector size() const override;
-		virtual idx_type size(idx_type i) const override;
-		virtual std::shared_ptr<StorageBase<i64>> storage() const override;
-		virtual DimVector stride() const override;
-		virtual idx_type stride(idx_type i) const override;
-        virtual void sub_(std::shared_ptr<TensorInterface> other) override;
-		virtual TensorInterfacePtr sum() const override;
-		virtual std::string to_string() const override;
-        virtual void transpose_(idx_type dim0, idx_type dim1) override;
-        virtual std::shared_ptr<TensorInterface> transpose(idx_type dim0, idx_type dim1) override;
-    };
-
-    using LongTensor = Tensor<i64>;
-
-}
-
-#endif

+ 0 - 96
traph/include/traph/tensor/short_tensor.h

@@ -1,96 +0,0 @@
-#ifndef TRAPH_TENSOR_SHORT_TENSOR_H_
-#define TRAPH_TENSOR_SHORT_TENSOR_H_
-
-#include <utility>
-#include <cmath>
-
-#include <traph/tensor/tensor.h>
-
-namespace traph
-{
-    // ndarray
-    template<>
-    class Tensor<i16>: public TensorBase<i16>
-    {
-    public:
-        using value_type = i16;
-        using self_type = Tensor<i16>;
-        using base_type = TensorBase<i16>;
-        using storage_type = TensorStorage<value_type>;
-
-        using raw_pointer = self_type*;
-        using shared_pointer = std::shared_ptr<self_type>;
-        using reference = self_type&;
-        using const_reference = const self_type&;
-    private:
-        std::shared_ptr<storage_type> _rep;
-        DimVector _dimensions;
-        idx_type _offset;
-		DimVector _strides;
-
-    private:
-        void auto_strides();
-        
-        void reduce_impl(value_type& result, idx_type dim, idx_type idx, std::function<value_type(value_type,value_type)> f) const;
-
-        value_type reduce_dim_kernel(idx_type begin, idx_type step_len, idx_type step_num, std::function<value_type(value_type,value_type)> f) const;
-
-        void reduce_dim_impl(reference result, idx_type dim, idx_type reduce_dim,
-            idx_type this_idx, idx_type result_idx,
-            std::function<value_type(value_type,value_type)> f) const;
-    public:
-        Tensor();
-        explicit Tensor(const DimVector& dimensions);
-        explicit Tensor(const DimVector& dimensions, const DimVector& strides);
-        Tensor(const value_type& t);
-
-        Tensor(const Tensor& other) = delete;
-        Tensor(Tensor&& other) = delete;
-        Tensor& operator= (const Tensor& other) = delete;
-        Tensor& operator= (Tensor&& other) = delete;
-
-		virtual void add_(TensorInterfacePtr other) override;
-		virtual void apply_(std::function<i16(i16)> f) override;
-		virtual TensorInterfacePtr clone() const override;
-		virtual void cos_() override;
-		virtual std::shared_ptr<TensorBase<f32>> create_grad() override;
-		virtual i16* data_ptr() override;
-		virtual const i16* data_ptr() const override;
-		virtual device_id device() override;
-        virtual DataType dtype() const override;
-        virtual bool equal(std::shared_ptr<TensorInterface> other) const override;
-		virtual void fill_(i16 value) override;
-		virtual std::shared_ptr<TensorInterface> inverse() const override;
-		virtual i16 item() const override;
-		virtual std::shared_ptr<TensorInterface> matmul(std::shared_ptr<TensorInterface> mat) const override;
-		virtual TensorInterfacePtr mean() const override;
-        virtual void mul_(i16 value) override;
-        virtual void mul_(std::shared_ptr<TensorInterface> other) override;
-        virtual idx_type ndimension() const override;
-        virtual void neg_() override;
-        virtual idx_type offset() const override;
-        virtual std::shared_ptr<TensorInterface> permute(const DimVector& dims) const override;
-		virtual PlatformType platform() const override;
-        virtual void pow_(f32 exp) override;
-		virtual i16 reduce(std::function<i16(i16, i16)> f) const override;
-		virtual TensorInterfacePtr reduce_dim(idx_type dim, std::function<i16(i16, i16)> f) const override;
-		virtual void reshape_(const DimVector& dims) override;
-		virtual void resize_(const DimVector& dims) override;
-		virtual std::shared_ptr<TensorInterface> select(const SliceVector& slice) const override;
-		virtual void sin_() override;
-		virtual DimVector size() const override;
-		virtual idx_type size(idx_type i) const override;
-		virtual std::shared_ptr<StorageBase<i16>> storage() const override;
-		virtual DimVector stride() const override;
-		virtual idx_type stride(idx_type i) const override;
-        virtual void sub_(std::shared_ptr<TensorInterface> other) override;
-		virtual TensorInterfacePtr sum() const override;
-		virtual std::string to_string() const override;
-        virtual void transpose_(idx_type dim0, idx_type dim1) override;
-        virtual std::shared_ptr<TensorInterface> transpose(idx_type dim0, idx_type dim1) override;
-    };
-
-    using ShortTensor = Tensor<i16>;
-}
-
-#endif

+ 24 - 0
traph/include/traph/tensor/tensor.h

@@ -42,6 +42,13 @@ namespace traph
         idx_type _offset;
 		DimVector _strides;
 
+    private:
+        void auto_strides();
+        void reduce_impl(T& result, idx_type dim, idx_type idx, std::function<T(T,T)> f) const;
+		void Tensor<T>::reduce_dim_impl(Tensor<T>& result, idx_type dim, idx_type reduce_dim,
+			idx_type this_idx, idx_type result_idx,
+			std::function<T(T, T)> f) const;
+        T reduce_dim_kernel(idx_type begin, idx_type step_len, idx_type step_num, std::function<T(T,T)> f) const;
     public:
         Tensor();
         explicit Tensor(const DimVector& dimensions);
@@ -101,6 +108,23 @@ namespace traph
 	template<typename T>
 	using TensorConstRef = const Tensor<T>&;
 
+
+    template class Tensor<u8>;
+    template class Tensor<i8>;
+    template class Tensor<i16>;
+    template class Tensor<i32>;
+    template class Tensor<i64>;
+    template class Tensor<f32>;
+    template class Tensor<f64>;
+
+    using ByteTensor = Tensor<u8>;
+    using CharTensor = Tensor<i8>;
+    using ShortTensor = Tensor<i16>;
+    using IntTensor = Tensor<i32>;
+    using LongTensor = Tensor<i64>;
+    using FloatTensor = Tensor<f32>;
+    using DoubleTensor = Tensor<f64>;
+
     // TODO: macros
     // apply apply2 reduce...
 

+ 1 - 1
traph/source/demo/main.cpp

@@ -5,7 +5,7 @@
 #include <traph/nn/layers/linear.h>
 #include <traph/nn/layers/loss.h>
 #include <traph/core/tensor.h>
-#include <traph/tensor/float_tensor.h>
+#include <traph/tensor/tensor.h>
 #include <traph/nn/optim.h>
 
 #include <iostream>

+ 0 - 14
traph/source/tensor/CMakeLists.txt

@@ -5,20 +5,6 @@ SET(HEADER_PATH ${TRAPH_PATH_HEADER}/${LIB_NAME})
 SET(SOURCE_PATH ${TRAPH_PATH_SOURCE}/${LIB_NAME})
 
 SET(TENSOR_LIST
-	${HEADER_PATH}/float_tensor.h
-	${SOURCE_PATH}/float_tensor.cpp
-	${HEADER_PATH}/double_tensor.h
-	${SOURCE_PATH}/double_tensor.cpp
-	${HEADER_PATH}/char_tensor.h
-	${SOURCE_PATH}/char_tensor.cpp
-	${HEADER_PATH}/byte_tensor.h
-	${SOURCE_PATH}/byte_tensor.cpp
-	${HEADER_PATH}/short_tensor.h
-	${SOURCE_PATH}/short_tensor.cpp
-	${HEADER_PATH}/int_tensor.h
-	${SOURCE_PATH}/int_tensor.cpp
-	${HEADER_PATH}/long_tensor.h
-	${SOURCE_PATH}/long_tensor.cpp
 	${HEADER_PATH}/tensor.h
 	${SOURCE_PATH}/tensor.cpp
 	${HEADER_PATH}/arithmetic.h

+ 0 - 598
traph/source/tensor/byte_tensor.cpp

@@ -1,598 +0,0 @@
-#include <traph/tensor/byte_tensor.h>
-
-namespace traph
-{
-    // definition
-    // private
-    void Tensor<u8>::auto_strides()
-    {
-        idx_type dim_num = _dimensions.size();
-        _strides.resize(dim_num);
-        idx_type stride = 1;
-        for (idx_type i = dim_num - 1; i >= 0; --i)
-        {
-            _strides[i] = stride;
-            stride *= _dimensions[i];
-        }
-    }
-
-    void Tensor<u8>::reduce_impl(u8& result, idx_type dim, idx_type idx, std::function<u8(u8,u8)> f) const
-    {
-        idx_type dim_size = _dimensions.size();
-
-        idx_type step_len = _strides[dim];
-        idx_type step_num = _dimensions[dim];
-
-        for(idx_type i = 0; i < step_num; ++i)
-        {
-            if(dim == dim_size - 1)
-                result = f(result, _rep->data[idx]);
-            else
-                reduce_impl(result, dim + 1, idx, f);
-            idx += step_len;
-        }
-    }
-
-    u8 Tensor<u8>::reduce_dim_kernel(idx_type begin, idx_type step_len, idx_type step_num, std::function<u8(u8,u8)> f) const
-    {
-        u8 result{};
-        for(idx_type i = 0; i < step_num; ++i)
-        {
-            result = f(result, _rep->data[begin]);
-            begin += step_len;
-        }
-        return result;
-    }
-
-    void Tensor<u8>::reduce_dim_impl(Tensor<u8>& result, idx_type dim, idx_type reduce_dim,
-        idx_type this_idx, idx_type result_idx,
-        std::function<u8(u8,u8)> f) const
-    {
-        idx_type dim_size = _dimensions.size();
-
-        if(dim == dim_size)
-        {
-            result._rep->data[result_idx] = 
-                reduce_dim_kernel(this_idx, _strides[reduce_dim], _dimensions[reduce_dim], f);
-            return;
-        }
-
-        if(dim == reduce_dim)
-        {
-            reduce_dim_impl(result, dim + 1, reduce_dim, this_idx,result_idx, f);
-        }
-        else
-        {
-            for(idx_type i = 0; i < _dimensions[dim]; ++i)
-            {
-                reduce_dim_impl(result, dim + 1, reduce_dim, this_idx,result_idx, f);
-                    
-                this_idx += _strides[dim];
-                result_idx += result._strides[dim];
-            }
-        }
-    }
-    // public
-    Tensor<u8>::Tensor()
-        :_rep(new TensorStorage<u8>),
-        _dimensions(), _offset(0), _strides()
-    {
-    }
-
-    Tensor<u8>::Tensor(const DimVector& dimensions)
-        :_rep(new TensorStorage<u8>),
-        _dimensions(dimensions), _offset(0), _strides()
-    {
-        auto_strides();
-        
-        _rep->resize_(_dimensions.flat_size());
-    }
-
-    Tensor<u8>::Tensor(const DimVector& dimensions, const DimVector& strides)
-        :_rep(new TensorStorage<u8>),
-        _dimensions(dimensions), _offset(0), _strides(strides)
-    {
-        auto_strides();
-
-        _rep->resize_(_dimensions.flat_size());
-    }
-
-    Tensor<u8>::Tensor(const u8& t)
-        :_rep(new TensorStorage<u8>),
-        _dimensions(), _offset(0), _strides()
-    {
-        _dimensions.resize(1);
-        auto_strides();
-    }
-
-    void Tensor<u8>::add_(TensorInterfacePtr other)
-    {
-		// check tensor other type
-        if(other->dtype() != DataType::BYTE)
-            throw std::runtime_error("expected type byte tensor");
-		// check broadcast.shape = this.shape
-        auto shape = broadcast_shape(this->size(), other->size());
-        if(shape != this->size())
-            throw std::runtime_error("The size of tensor a must match the size of tensor b");
-		// ok, get lhs, rhs
-		Tensor<u8> * lhs = this;
-		Tensor<u8> * rhs = dynamic_cast<Tensor<u8> *>(other.get());
-		std::function<void(Tensor<u8> *, Tensor<u8> *, idx_type, idx_type,idx_type, idx_type)> add_impl =
-			[&](Tensor<u8> * lhs, Tensor<u8> * rhs, idx_type lhs_dim, idx_type rhs_dim, idx_type lhs_idx, idx_type rhs_idx) {
-
-			auto lhs_storage = std::dynamic_pointer_cast<TensorStorage<u8>>(lhs->storage())->data_ptr();
-			auto rhs_storage = std::dynamic_pointer_cast<TensorStorage<u8>>(rhs->storage())->data_ptr();
-
-			if (lhs_dim < -(lhs->size().size()) && rhs_dim < -(rhs->size().size()))
-			{
-				lhs_storage[lhs_idx] += rhs_storage[rhs_idx];
-				return;
-			}
-
-			idx_type lsh_shape_size = lhs_dim >= -(lhs->size().size())? lhs->size(lhs_dim) : 1;
-			idx_type rsh_shape_size = rhs_dim >= -(rhs->size().size()) ? rhs->size(rhs_dim) : 1;
-			idx_type max_shape_size = std::max(lsh_shape_size, rsh_shape_size);
-
-			for (idx_type i = 0; i < max_shape_size; ++i)
-			{
-				add_impl(lhs, rhs, lhs_dim - 1, rhs_dim - 1, lhs_idx, rhs_idx);
-
-				if(lsh_shape_size > 1)
-					lhs_idx += lhs->stride(lhs_dim);
-				if (rsh_shape_size > 1)
-					rhs_idx += rhs->stride(rhs_dim);
-			}
-		};
-
-		add_impl(lhs, rhs, -1, -1, lhs->offset(), rhs->offset());
-    }
-
-    void Tensor<u8>::apply_(std::function<u8(u8)> f)
-    {
-        // sort stride for cache optimization
-		DimVector cloned_stride(_strides);
-        DimVector sorted_stride(_strides.size());
-        for(int i = 0; i<_strides.size(); ++i)
-            sorted_stride[i] = i;
-        
-        for (int i = 0; i < cloned_stride.size() - 1; i++)
-            for (int j = 0; j < cloned_stride.size() - 1 - i; j++)
-                if (cloned_stride[j] < cloned_stride[j + 1])
-                {
-                    std::swap(cloned_stride[j], cloned_stride[j+1]);
-                    std::swap(sorted_stride[j], sorted_stride[j+1]);
-                }
-        
-        std::function<void(idx_type, idx_type, std::function<u8(u8)>)> apply_impl =
-        [&](idx_type dim_idx, idx_type idx, std::function<u8(u8)> f){
-            idx_type dim = sorted_stride[dim_idx];
-            idx_type dim_size = _dimensions.size();
-
-            idx_type step_len = _strides[dim];
-            idx_type step_num = _dimensions[dim];
-            
-            for(idx_type i = 0; i < step_num; ++i)
-            {
-                if(dim_idx == dim_size - 1)
-                    _rep->data[idx] = f(_rep->data[idx]);
-                else
-                    apply_impl(dim_idx + 1, idx, f);
-                idx += step_len;
-            }
-        };
-
-        apply_impl(0, _offset, f);
-    }
-
-    TensorInterfacePtr Tensor<u8>::clone() const
-    {
-        std::shared_ptr<Tensor<u8>> cloned_tensor(new Tensor<u8>);
-        cloned_tensor->_rep = std::dynamic_pointer_cast<TensorStorage<u8>>(_rep->clone());
-        cloned_tensor->_dimensions = _dimensions;
-        cloned_tensor->_offset = _offset;
-        cloned_tensor->_strides = _strides;
-        
-        return cloned_tensor;
-    }
-
-    void Tensor<u8>::cos_()
-    {
-        throw std::runtime_error("No implement");
-    }
-
-    std::shared_ptr<TensorBase<f32>> Tensor<u8>::create_grad()
-    {
-        return std::shared_ptr<TensorBase<f32>>(new Tensor<f32>(_dimensions));
-    }
-
-	u8* Tensor<u8>::data_ptr()
-    {
-        return _rep->data_ptr();
-    }
-
-    const u8* Tensor<u8>::data_ptr() const
-    {
-        return _rep->data_ptr();
-    }
-
-    device_id Tensor<u8>::device() { return 0; }
-
-    DataType Tensor<u8>::dtype() const
-    {
-        return DataType::BYTE;
-    }
-
-    bool Tensor<u8>::equal(std::shared_ptr<TensorInterface> other) const
-    {
-        if(other->platform() != this->platform())
-            throw std::runtime_error("equal: Two tensors must be the same platform");
-        
-        if(other->dtype() != this->dtype())
-            return false;
-
-        if(other->size() != this->size())
-            return false;
-
-        std::shared_ptr<Tensor<u8>> other_ptr = std::dynamic_pointer_cast<Tensor<u8>>(other);
-        
-        std::function<bool(idx_type, u8*, u8*)> equal_impl =
-        [&](idx_type dim, u8* lhs_idx, u8* rhs_idx){
-            idx_type dim_size = _dimensions.size();
-            
-            for(idx_type i = 0; i < _dimensions[dim]; ++i)
-            {
-                if(dim == dim - 1)
-                {
-                    if(*lhs_idx != *rhs_idx) return false;
-                }
-                else
-                {
-                    if(!equal_impl(dim + 1, lhs_idx, rhs_idx)) return false;
-                }
-                lhs_idx += _strides[dim];
-                rhs_idx += other_ptr->stride(dim);
-            }
-            return true;
-        };
-
-        return equal_impl(0, _rep->data_ptr() + _offset, other_ptr->data_ptr() + other_ptr->offset());
-    }
-
-	std::shared_ptr<TensorInterface> Tensor<u8>::inverse() const
-	{
-		throw std::runtime_error("No implement");
-	}
-
-    void Tensor<u8>::fill_(u8 value)
-    {
-        apply_([&value](u8 a)->u8 {return value; });
-    }
-
-	u8 Tensor<u8>::item() const
-    {
-        if(_dimensions.flat_size() == 1)
-        {
-            return _rep->data[_offset];
-        }
-        else
-        {
-            throw std::runtime_error("item: only one element tensors can be converted to scalars");
-        }
-    }
-
-	std::shared_ptr<TensorInterface> Tensor<u8>::matmul(std::shared_ptr<TensorInterface> mat) const
-	{
-		auto right_matrix = std::dynamic_pointer_cast<Tensor<u8>>(mat);
-		return matmul_impl(*this, *right_matrix);
-	}
-
-    TensorInterfacePtr Tensor<u8>::mean() const
-    {
-        DimVector d(1);
-        d[0] = 1;
-
-        TensorPtr<u8> result(new Tensor<u8>(d));
-        auto flat_size = _dimensions.flat_size();
-        result->_rep->data[0] = reduce([](u8 a, u8 b)->u8 {return a + b; });
-        result->_rep->data[0] /= flat_size;
-        return std::dynamic_pointer_cast<TensorInterface>(result);
-    }
-
-    void Tensor<u8>::mul_(u8 value)
-    {
-        apply_([value](u8 a)->u8 {return a*value; });
-    }
-
-    void Tensor<u8>::mul_(std::shared_ptr<TensorInterface> other)
-    {
-        // check tensor other type
-        if(other->dtype() != DataType::BYTE)
-            throw std::runtime_error("expected type byte tensor");
-		// check broadcast.shape = this.shape
-        auto shape = broadcast_shape(this->size(), other->size());
-        if(shape != this->size())
-            throw std::runtime_error("The size of tensor a must match the size of tensor b");
-		// ok, get lhs, rhs
-		Tensor<u8> * lhs = this;
-		Tensor<u8> * rhs = dynamic_cast<Tensor<u8> *>(other.get());
-		std::function<void(idx_type, idx_type, idx_type, idx_type)> mul_impl =
-			[&](idx_type lhs_dim, idx_type rhs_dim, idx_type lhs_idx, idx_type rhs_idx) {
-
-			auto lhs_storage = std::dynamic_pointer_cast<TensorStorage<f32>>(lhs->storage())->data_ptr();
-			auto rhs_storage = std::dynamic_pointer_cast<TensorStorage<f32>>(rhs->storage())->data_ptr();
-
-			idx_type lsh_shape_size = lhs_dim >= -(lhs->size().size())? lhs->size(lhs_dim) : 1;
-			idx_type rsh_shape_size = rhs_dim >= -(rhs->size().size()) ? rhs->size(rhs_dim) : 1;
-			idx_type max_shape_size = std::max(lsh_shape_size, rsh_shape_size);
-
-			for (idx_type i = 0; i < max_shape_size; ++i)
-			{
-                if (lhs_dim <= -(lhs->size().size()) && rhs_dim <= -(rhs->size().size()))
-                {
-                    lhs_storage[lhs_idx] *= rhs_storage[rhs_idx];
-                }
-                else
-                {
-                    mul_impl(lhs_dim - 1, rhs_dim - 1, lhs_idx, rhs_idx);
-                }
-
-				if(lsh_shape_size > 1)
-					lhs_idx += lhs->stride(lhs_dim);
-				if (rsh_shape_size > 1)
-					rhs_idx += rhs->stride(rhs_dim);
-			}
-		};
-
-		mul_impl(-1, -1, lhs->offset(), rhs->offset());
-    }
-
-    idx_type Tensor<u8>::ndimension() const
-    {
-        return _dimensions.size();
-    }
-
-    void Tensor<u8>::neg_()
-    {
-        apply_([](u8 a)->u8 {return -a; });
-    }
-
-    idx_type Tensor<u8>::offset() const { return _offset; }
-
-    std::shared_ptr<TensorInterface> Tensor<u8>::permute(const DimVector& dims) const
-    {
-        // check dims
-        if(dims.size() != _strides.size())
-            throw std::runtime_error("permute dimension must have the same size");
-        std::vector<int> check_vec(dims.size(), 0);
-        for(int i = 0; i < dims.size();++i)
-            if(dims[i] >= 0 && dims[i] < dims.size())
-                check_vec[dims[i]] = 1;
-            else
-                throw std::runtime_error("permute dimension must in ndimension range");
-        
-        for(int i = 0; i < check_vec.size();++i)
-        {
-            if(check_vec[i] != 1)
-                throw std::runtime_error("permute dimension error");
-        }
-        // permute
-        std::shared_ptr<Tensor<u8>> result(new Tensor<u8>);
-        result->_rep = _rep;
-        result->_dimensions = _dimensions;
-        result->_offset = _offset;
-        result->_strides = _strides;
-
-        for(int i=0; i<dims.size(); ++i)
-        {
-            result->_dimensions[i] = _dimensions[dims[i]];
-            result->_strides[i] = _strides[dims[i]];
-        }
-
-        return result;
-    }
-
-    PlatformType Tensor<u8>::platform() const { return PlatformType::CPU; }
-
-    void Tensor<u8>::pow_(f32 exp)
-    {
-        std::int32_t exp_int = static_cast<std::int32_t>(exp);
-        apply_([&exp_int](u8 a)->u8 {return static_cast<u8>(std::pow(a, exp_int)); });
-    }
-
-	u8 Tensor<u8>::reduce(std::function<u8(u8, u8)> f) const
-    {
-		u8 result{};
-        reduce_impl(result, 0, _offset, f);
-        return result;
-    }
-    
-    TensorInterfacePtr Tensor<u8>::reduce_dim(idx_type dim, std::function<u8(u8, u8)> f) const
-    {
-        DimVector reduced_dim = _dimensions;
-        reduced_dim.erase(dim); // check dim?
-        TensorBasePtr<u8> result(new Tensor<u8>(reduced_dim));
-        TensorPtr<u8> raw_result = std::dynamic_pointer_cast<Tensor<u8>>(result);
-        reduce_dim_impl(*(raw_result.get()), 0, dim, _offset, raw_result->_offset, f);
-        return std::dynamic_pointer_cast<TensorInterface>(result);
-    }
-    
-    void Tensor<u8>::reshape_(const DimVector& dims)
-    {
-
-    }
-    
-    void Tensor<u8>::resize_(const DimVector& dims)
-    {
-        _dimensions = dims;
-        _rep->resize_(dims.flat_size());
-        auto_strides();
-    }
-
-	std::shared_ptr<TensorInterface> Tensor<u8>::select(const SliceVector& slice) const
-	{
-		std::shared_ptr<Tensor<u8>> result(new Tensor<u8>);
-		result->_rep = _rep;
-
-		// dimension
-		DimVector dim;
-		std::fesetround(FE_TONEAREST);
-		for (idx_type i = 0; i < slice.size(); ++i)
-		{
-			auto& each = slice[i];
-			dim.push_back(
-				std::lrint(std::ceil((each.end.value_or(_dimensions[i]) - each.start.value_or(0)) / (float)each.step.value_or(1)))
-			);
-		}
-		result->_dimensions = dim;
-
-		// offset
-		idx_type new_offset = 1;
-		for (idx_type i = 0; i < slice.size(); ++i)
-		{
-			new_offset *= _strides[i] * slice[i].start.value_or(0);
-		}
-		result->_offset = _offset + new_offset;
-
-		// strides
-		DimVector strides;
-		for (idx_type i = 0; i < slice.size(); ++i)
-		{
-			strides.push_back(_strides[i] * slice[i].step.value_or(1));
-		}
-		result->_strides = strides;
-
-		return std::dynamic_pointer_cast<TensorInterface>(result);
-	}
-    
-    void Tensor<u8>::sin_()
-    {
-        throw std::runtime_error("No implement");
-    }
-    
-    DimVector Tensor<u8>::size() const { return _dimensions;}
-	
-	idx_type Tensor<u8>::size(idx_type i) const
-	{ 
-		auto shape_size = _dimensions.size();
-		if (i >= 0 && i < _dimensions.size())
-			return _dimensions[i];
-		else if (i <= -1 && i >= -_dimensions.size())
-			return _dimensions[shape_size + i];
-		else
-			throw std::runtime_error("Dimension out of range");
-	}
-    
-	std::shared_ptr<StorageBase<u8>>  Tensor<u8>::storage() const { return _rep; }
-    
-    DimVector Tensor<u8>::stride() const { return _strides; }
-	
-	idx_type Tensor<u8>::stride(idx_type i) const
-	{
-		auto stride_size = _strides.size();
-		if (i >= 0 && i < _strides.size())
-			return _strides[i];
-		else if (i <= -1 && i >= -_strides.size())
-			return _strides[stride_size + i];
-		else
-			throw std::runtime_error("Stride out of range");
-	}
-
-    void Tensor<u8>::sub_(std::shared_ptr<TensorInterface> other)
-    {
-        Tensor<u8> * lhs = this;
-		Tensor<u8> * rhs = dynamic_cast<Tensor<u8> *>(other.get());
-		std::function<void(Tensor<u8> *, Tensor<u8> *, idx_type, idx_type,idx_type, idx_type)> sub_impl =
-			[&](Tensor<u8> * lhs, Tensor<u8> * rhs, idx_type lhs_dim, idx_type rhs_dim, idx_type lhs_idx, idx_type rhs_idx) {
-
-			auto lhs_storage = std::dynamic_pointer_cast<TensorStorage<u8>>(lhs->storage())->data_ptr();
-			auto rhs_storage = std::dynamic_pointer_cast<TensorStorage<u8>>(rhs->storage())->data_ptr();
-
-			if (lhs_dim < -(lhs->size().size()) && rhs_dim < -(rhs->size().size()))
-			{
-				lhs_storage[lhs_idx] -= rhs_storage[rhs_idx];
-				return;
-			}
-
-			idx_type lhs_shape_size = lhs_dim >= -(lhs->size().size())? lhs->size(lhs_dim) : 1;
-			idx_type rhs_shape_size = rhs_dim >= -(rhs->size().size()) ? rhs->size(rhs_dim) : 1;
-			idx_type max_shape_size = std::max(lhs_shape_size, rhs_shape_size);
-
-			for (idx_type i = 0; i < max_shape_size; ++i)
-			{
-				sub_impl(lhs, rhs, lhs_dim - 1, rhs_dim - 1, lhs_idx, rhs_idx);
-
-				if(lhs_shape_size > 1)
-					lhs_idx += lhs->stride(lhs_dim);
-				if (rhs_shape_size > 1)
-					rhs_idx += rhs->stride(rhs_dim);
-			}
-		};
-
-		sub_impl(lhs, rhs, -1, -1, lhs->offset(), rhs->offset());
-    }
-    
-    TensorInterfacePtr Tensor<u8>::sum() const
-    {
-        DimVector d(1);
-        d[0] = 1;
-
-        TensorPtr<u8> result(new Tensor<u8>(d));
-        result->_rep->data[0] = reduce([](u8 a, u8 b)->u8 {return a + b; });
-        return std::dynamic_pointer_cast<TensorInterface>(result);
-    }
-    
-    std::string Tensor<u8>::to_string() const
-    {
-        std::function<std::string(const Tensor<u8>&, idx_type, idx_type)> to_string_impl =
-			[&](const Tensor<u8>& t, idx_type dim, idx_type idx)->std::string {
-            std::string result;
-			if (dim == t.size().size())
-            {
-                result += std::to_string(t.data_ptr()[idx]);
-				return result;
-            }
-
-			for (idx_type i = 0; i < t.size(dim); ++i)
-			{
-				if (dim != t.size().size() - 1 && i != 0) result += ",\n";
-				if(dim != t.size().size() - 1)	result += "[";
-				result += to_string_impl(t, dim + 1, idx);
-				if (i != t.size(dim) - 1 && dim == t.size().size() - 1)
-					result += ",";
-				if (dim != t.size().size() - 1) result += "]";
-
-				idx += t.stride(dim);
-			}
-
-			return result;
-		};
-
-		std::string result;
-		result += "[" + to_string_impl(*this, 0, offset()) + "]";
-		return result;
-    }
-
-    void Tensor<u8>::transpose_(idx_type dim0, idx_type dim1)
-    {
-        if(dim0 != dim1 &&
-            _dimensions.in_range(dim0) &&
-            _dimensions.in_range(dim1))
-        {
-            std::swap(_dimensions[dim0], _dimensions[dim1]);
-            std::swap(_strides[dim0], _strides[dim1]);
-        }
-    }
-
-    std::shared_ptr<TensorInterface> Tensor<u8>::transpose(idx_type dim0, idx_type dim1)
-    {
-        std::shared_ptr<Tensor<u8>> result(new Tensor<u8>);
-        result->_rep = _rep;
-        result->_dimensions = _dimensions;
-        result->_offset = _offset;
-        result->_strides = _strides;
-
-        result->transpose_(dim0, dim1);
-
-        return result;
-    }
-}

+ 0 - 598
traph/source/tensor/char_tensor.cpp

@@ -1,598 +0,0 @@
-#include <traph/tensor/char_tensor.h>
-
-namespace traph
-{
-    // definition
-    // private
-    void Tensor<i8>::auto_strides()
-    {
-        idx_type dim_num = _dimensions.size();
-        _strides.resize(dim_num);
-        idx_type stride = 1;
-        for (idx_type i = dim_num - 1; i >= 0; --i)
-        {
-            _strides[i] = stride;
-            stride *= _dimensions[i];
-        }
-    }
-
-    void Tensor<i8>::reduce_impl(i8& result, idx_type dim, idx_type idx, std::function<i8(i8,i8)> f) const
-    {
-        idx_type dim_size = _dimensions.size();
-
-        idx_type step_len = _strides[dim];
-        idx_type step_num = _dimensions[dim];
-
-        for(idx_type i = 0; i < step_num; ++i)
-        {
-            if(dim == dim_size - 1)
-                result = f(result, _rep->data[idx]);
-            else
-                reduce_impl(result, dim + 1, idx, f);
-            idx += step_len;
-        }
-    }
-
-    i8 Tensor<i8>::reduce_dim_kernel(idx_type begin, idx_type step_len, idx_type step_num, std::function<i8(i8,i8)> f) const
-    {
-        i8 result{};
-        for(idx_type i = 0; i < step_num; ++i)
-        {
-            result = f(result, _rep->data[begin]);
-            begin += step_len;
-        }
-        return result;
-    }
-
-    void Tensor<i8>::reduce_dim_impl(Tensor<i8>& result, idx_type dim, idx_type reduce_dim,
-        idx_type this_idx, idx_type result_idx,
-        std::function<i8(i8,i8)> f) const
-    {
-        idx_type dim_size = _dimensions.size();
-
-        if(dim == dim_size)
-        {
-            result._rep->data[result_idx] = 
-                reduce_dim_kernel(this_idx, _strides[reduce_dim], _dimensions[reduce_dim], f);
-            return;
-        }
-
-        if(dim == reduce_dim)
-        {
-            reduce_dim_impl(result, dim + 1, reduce_dim, this_idx,result_idx, f);
-        }
-        else
-        {
-            for(idx_type i = 0; i < _dimensions[dim]; ++i)
-            {
-                reduce_dim_impl(result, dim + 1, reduce_dim, this_idx,result_idx, f);
-                    
-                this_idx += _strides[dim];
-                result_idx += result._strides[dim];
-            }
-        }
-    }
-    // public
-    Tensor<i8>::Tensor()
-        :_rep(new TensorStorage<i8>),
-        _dimensions(), _offset(0), _strides()
-    {
-    }
-
-    Tensor<i8>::Tensor(const DimVector& dimensions)
-        :_rep(new TensorStorage<i8>),
-        _dimensions(dimensions), _offset(0), _strides()
-    {
-        auto_strides();
-        
-        _rep->resize_(_dimensions.flat_size());
-    }
-
-    Tensor<i8>::Tensor(const DimVector& dimensions, const DimVector& strides)
-        :_rep(new TensorStorage<i8>),
-        _dimensions(dimensions), _offset(0), _strides(strides)
-    {
-        auto_strides();
-
-        _rep->resize_(_dimensions.flat_size());
-    }
-
-    Tensor<i8>::Tensor(const i8& t)
-        :_rep(new TensorStorage<i8>),
-        _dimensions(), _offset(0), _strides()
-    {
-        _dimensions.resize(1);
-        auto_strides();
-    }
-
-    void Tensor<i8>::add_(TensorInterfacePtr other)
-    {
-		// check tensor other type
-        if(other->dtype() != DataType::CHAR)
-            throw std::runtime_error("expected type char tensor");
-		// check broadcast.shape = this.shape
-        auto shape = broadcast_shape(this->size(), other->size());
-        if(shape != this->size())
-            throw std::runtime_error("The size of tensor a must match the size of tensor b");
-		// ok, get lhs, rhs
-		Tensor<i8> * lhs = this;
-		Tensor<i8> * rhs = dynamic_cast<Tensor<i8> *>(other.get());
-		std::function<void(Tensor<i8> *, Tensor<i8> *, idx_type, idx_type,idx_type, idx_type)> add_impl =
-			[&](Tensor<i8> * lhs, Tensor<i8> * rhs, idx_type lhs_dim, idx_type rhs_dim, idx_type lhs_idx, idx_type rhs_idx) {
-
-			auto lhs_storage = std::dynamic_pointer_cast<TensorStorage<i8>>(lhs->storage())->data_ptr();
-			auto rhs_storage = std::dynamic_pointer_cast<TensorStorage<i8>>(rhs->storage())->data_ptr();
-
-			if (lhs_dim < -(lhs->size().size()) && rhs_dim < -(rhs->size().size()))
-			{
-				lhs_storage[lhs_idx] += rhs_storage[rhs_idx];
-				return;
-			}
-
-			idx_type lsh_shape_size = lhs_dim >= -(lhs->size().size())? lhs->size(lhs_dim) : 1;
-			idx_type rsh_shape_size = rhs_dim >= -(rhs->size().size()) ? rhs->size(rhs_dim) : 1;
-			idx_type max_shape_size = std::max(lsh_shape_size, rsh_shape_size);
-
-			for (idx_type i = 0; i < max_shape_size; ++i)
-			{
-				add_impl(lhs, rhs, lhs_dim - 1, rhs_dim - 1, lhs_idx, rhs_idx);
-
-				if(lsh_shape_size > 1)
-					lhs_idx += lhs->stride(lhs_dim);
-				if (rsh_shape_size > 1)
-					rhs_idx += rhs->stride(rhs_dim);
-			}
-		};
-
-		add_impl(lhs, rhs, -1, -1, lhs->offset(), rhs->offset());
-    }
-
-    void Tensor<i8>::apply_(std::function<i8(i8)> f)
-    {
-        // sort stride for cache optimization
-		DimVector cloned_stride(_strides);
-        DimVector sorted_stride(_strides.size());
-        for(int i = 0; i<_strides.size(); ++i)
-            sorted_stride[i] = i;
-        
-        for (int i = 0; i < cloned_stride.size() - 1; i++)
-            for (int j = 0; j < cloned_stride.size() - 1 - i; j++)
-                if (cloned_stride[j] < cloned_stride[j + 1])
-                {
-                    std::swap(cloned_stride[j], cloned_stride[j+1]);
-                    std::swap(sorted_stride[j], sorted_stride[j+1]);
-                }
-        
-        std::function<void(idx_type, idx_type, std::function<i8(i8)>)> apply_impl =
-        [&](idx_type dim_idx, idx_type idx, std::function<i8(i8)> f){
-            idx_type dim = sorted_stride[dim_idx];
-            idx_type dim_size = _dimensions.size();
-
-            idx_type step_len = _strides[dim];
-            idx_type step_num = _dimensions[dim];
-            
-            for(idx_type i = 0; i < step_num; ++i)
-            {
-                if(dim_idx == dim_size - 1)
-                    _rep->data[idx] = f(_rep->data[idx]);
-                else
-                    apply_impl(dim_idx + 1, idx, f);
-                idx += step_len;
-            }
-        };
-
-        apply_impl(0, _offset, f);
-    }
-
-    TensorInterfacePtr Tensor<i8>::clone() const
-    {
-        std::shared_ptr<Tensor<i8>> cloned_tensor(new Tensor<i8>);
-        cloned_tensor->_rep = std::dynamic_pointer_cast<TensorStorage<i8>>(_rep->clone());
-        cloned_tensor->_dimensions = _dimensions;
-        cloned_tensor->_offset = _offset;
-        cloned_tensor->_strides = _strides;
-        
-        return cloned_tensor;
-    }
-
-    void Tensor<i8>::cos_()
-    {
-        throw std::runtime_error("No implement");
-    }
-
-    std::shared_ptr<TensorBase<f32>> Tensor<i8>::create_grad()
-    {
-        return std::shared_ptr<TensorBase<f32>>(new Tensor<f32>(_dimensions));
-    }
-
-	i8* Tensor<i8>::data_ptr()
-    {
-        return _rep->data_ptr();
-    }
-
-    const i8* Tensor<i8>::data_ptr() const
-    {
-        return _rep->data_ptr();
-    }
-
-    device_id Tensor<i8>::device() { return 0; }
-
-    DataType Tensor<i8>::dtype() const
-    {
-        return DataType::CHAR;
-    }
-
-    bool Tensor<i8>::equal(std::shared_ptr<TensorInterface> other) const
-    {
-        if(other->platform() != this->platform())
-            throw std::runtime_error("equal: Two tensors must be the same platform");
-        
-        if(other->dtype() != this->dtype())
-            return false;
-
-        if(other->size() != this->size())
-            return false;
-
-        std::shared_ptr<Tensor<i8>> other_ptr = std::dynamic_pointer_cast<Tensor<i8>>(other);
-        
-        std::function<bool(idx_type, i8*, i8*)> equal_impl =
-        [&](idx_type dim, i8* lhs_idx, i8* rhs_idx){
-            idx_type dim_size = _dimensions.size();
-            
-            for(idx_type i = 0; i < _dimensions[dim]; ++i)
-            {
-                if(dim == dim - 1)
-                {
-                    if(*lhs_idx != *rhs_idx) return false;
-                }
-                else
-                {
-                    if(!equal_impl(dim + 1, lhs_idx, rhs_idx)) return false;
-                }
-                lhs_idx += _strides[dim];
-                rhs_idx += other_ptr->stride(dim);
-            }
-            return true;
-        };
-
-        return equal_impl(0, _rep->data_ptr() + _offset, other_ptr->data_ptr() + other_ptr->offset());
-    }
-
-	std::shared_ptr<TensorInterface> Tensor<i8>::inverse() const
-	{
-		throw std::runtime_error("No implement");
-	}
-
-    void Tensor<i8>::fill_(i8 value)
-    {
-        apply_([&value](i8 a)->i8 {return value; });
-    }
-
-	i8 Tensor<i8>::item() const
-    {
-        if(_dimensions.flat_size() == 1)
-        {
-            return _rep->data[_offset];
-        }
-        else
-        {
-            throw std::runtime_error("item: only one element tensors can be converted to scalars");
-        }
-    }
-
-	std::shared_ptr<TensorInterface> Tensor<i8>::matmul(std::shared_ptr<TensorInterface> mat) const
-	{
-		auto right_matrix = std::dynamic_pointer_cast<Tensor<i8>>(mat);
-		return matmul_impl(*this, *right_matrix);
-	}
-
-    TensorInterfacePtr Tensor<i8>::mean() const
-    {
-        DimVector d(1);
-        d[0] = 1;
-
-        TensorPtr<i8> result(new Tensor<i8>(d));
-        auto flat_size = _dimensions.flat_size();
-        result->_rep->data[0] = reduce([](i8 a, i8 b)->i8 {return a + b; });
-        result->_rep->data[0] /= flat_size;
-        return std::dynamic_pointer_cast<TensorInterface>(result);
-    }
-
-    void Tensor<i8>::mul_(i8 value)
-    {
-        apply_([value](i8 a)->i8 {return a*value; });
-    }
-
-    void Tensor<i8>::mul_(std::shared_ptr<TensorInterface> other)
-    {
-        // check tensor other type
-        if(other->dtype() != DataType::CHAR)
-            throw std::runtime_error("expected type char tensor");
-		// check broadcast.shape = this.shape
-        auto shape = broadcast_shape(this->size(), other->size());
-        if(shape != this->size())
-            throw std::runtime_error("The size of tensor a must match the size of tensor b");
-		// ok, get lhs, rhs
-		Tensor<i8> * lhs = this;
-		Tensor<i8> * rhs = dynamic_cast<Tensor<i8> *>(other.get());
-		std::function<void(idx_type, idx_type, idx_type, idx_type)> mul_impl =
-			[&](idx_type lhs_dim, idx_type rhs_dim, idx_type lhs_idx, idx_type rhs_idx) {
-
-			auto lhs_storage = std::dynamic_pointer_cast<TensorStorage<f32>>(lhs->storage())->data_ptr();
-			auto rhs_storage = std::dynamic_pointer_cast<TensorStorage<f32>>(rhs->storage())->data_ptr();
-
-			idx_type lsh_shape_size = lhs_dim >= -(lhs->size().size())? lhs->size(lhs_dim) : 1;
-			idx_type rsh_shape_size = rhs_dim >= -(rhs->size().size()) ? rhs->size(rhs_dim) : 1;
-			idx_type max_shape_size = std::max(lsh_shape_size, rsh_shape_size);
-
-			for (idx_type i = 0; i < max_shape_size; ++i)
-			{
-                if (lhs_dim <= -(lhs->size().size()) && rhs_dim <= -(rhs->size().size()))
-                {
-                    lhs_storage[lhs_idx] *= rhs_storage[rhs_idx];
-                }
-                else
-                {
-                    mul_impl(lhs_dim - 1, rhs_dim - 1, lhs_idx, rhs_idx);
-                }
-
-				if(lsh_shape_size > 1)
-					lhs_idx += lhs->stride(lhs_dim);
-				if (rsh_shape_size > 1)
-					rhs_idx += rhs->stride(rhs_dim);
-			}
-		};
-
-		mul_impl(-1, -1, lhs->offset(), rhs->offset());
-    }
-
-    idx_type Tensor<i8>::ndimension() const
-    {
-        return _dimensions.size();
-    }
-
-    void Tensor<i8>::neg_()
-    {
-        apply_([](i8 a)->i8 {return -a; });
-    }
-
-    idx_type Tensor<i8>::offset() const { return _offset; }
-
-    std::shared_ptr<TensorInterface> Tensor<i8>::permute(const DimVector& dims) const
-    {
-        // check dims
-        if(dims.size() != _strides.size())
-            throw std::runtime_error("permute dimension must have the same size");
-        std::vector<int> check_vec(dims.size(), 0);
-        for(int i = 0; i < dims.size();++i)
-            if(dims[i] >= 0 && dims[i] < dims.size())
-                check_vec[dims[i]] = 1;
-            else
-                throw std::runtime_error("permute dimension must in ndimension range");
-        
-        for(int i = 0; i < check_vec.size();++i)
-        {
-            if(check_vec[i] != 1)
-                throw std::runtime_error("permute dimension error");
-        }
-        // permute
-        std::shared_ptr<Tensor<i8>> result(new Tensor<i8>);
-        result->_rep = _rep;
-        result->_dimensions = _dimensions;
-        result->_offset = _offset;
-        result->_strides = _strides;
-
-        for(int i=0; i<dims.size(); ++i)
-        {
-            result->_dimensions[i] = _dimensions[dims[i]];
-            result->_strides[i] = _strides[dims[i]];
-        }
-
-        return result;
-    }
-
-    PlatformType Tensor<i8>::platform() const { return PlatformType::CPU; }
-
-    void Tensor<i8>::pow_(f32 exp)
-    {
-        std::int32_t exp_int = static_cast<std::int32_t>(exp);
-        apply_([&exp_int](i8 a)->i8 {return static_cast<i8>(std::pow(a, exp_int)); });
-    }
-
-	i8 Tensor<i8>::reduce(std::function<i8(i8, i8)> f) const
-    {
-		i8 result{};
-        reduce_impl(result, 0, _offset, f);
-        return result;
-    }
-    
-    TensorInterfacePtr Tensor<i8>::reduce_dim(idx_type dim, std::function<i8(i8, i8)> f) const
-    {
-        DimVector reduced_dim = _dimensions;
-        reduced_dim.erase(dim); // check dim?
-        TensorBasePtr<i8> result(new Tensor<i8>(reduced_dim));
-        TensorPtr<i8> raw_result = std::dynamic_pointer_cast<Tensor<i8>>(result);
-        reduce_dim_impl(*(raw_result.get()), 0, dim, _offset, raw_result->_offset, f);
-        return std::dynamic_pointer_cast<TensorInterface>(result);
-    }
-    
-    void Tensor<i8>::reshape_(const DimVector& dims)
-    {
-
-    }
-    
-    void Tensor<i8>::resize_(const DimVector& dims)
-    {
-        _dimensions = dims;
-        _rep->resize_(dims.flat_size());
-        auto_strides();
-    }
-
-	std::shared_ptr<TensorInterface> Tensor<i8>::select(const SliceVector& slice) const
-	{
-		std::shared_ptr<Tensor<i8>> result(new Tensor<i8>);
-		result->_rep = _rep;
-
-		// dimension
-		DimVector dim;
-		std::fesetround(FE_TONEAREST);
-		for (idx_type i = 0; i < slice.size(); ++i)
-		{
-			auto& each = slice[i];
-			dim.push_back(
-				std::lrint(std::ceil((each.end.value_or(_dimensions[i]) - each.start.value_or(0)) / (float)each.step.value_or(1)))
-			);
-		}
-		result->_dimensions = dim;
-
-		// offset
-		idx_type new_offset = 1;
-		for (idx_type i = 0; i < slice.size(); ++i)
-		{
-			new_offset *= _strides[i] * slice[i].start.value_or(0);
-		}
-		result->_offset = _offset + new_offset;
-
-		// strides
-		DimVector strides;
-		for (idx_type i = 0; i < slice.size(); ++i)
-		{
-			strides.push_back(_strides[i] * slice[i].step.value_or(1));
-		}
-		result->_strides = strides;
-
-		return std::dynamic_pointer_cast<TensorInterface>(result);
-	}
-    
-    void Tensor<i8>::sin_()
-    {
-        throw std::runtime_error("No implement");
-    }
-    
-    DimVector Tensor<i8>::size() const { return _dimensions;}
-	
-	idx_type Tensor<i8>::size(idx_type i) const
-	{ 
-		auto shape_size = _dimensions.size();
-		if (i >= 0 && i < _dimensions.size())
-			return _dimensions[i];
-		else if (i <= -1 && i >= -_dimensions.size())
-			return _dimensions[shape_size + i];
-		else
-			throw std::runtime_error("Dimension out of range");
-	}
-    
-	std::shared_ptr<StorageBase<i8>>  Tensor<i8>::storage() const { return _rep; }
-    
-    DimVector Tensor<i8>::stride() const { return _strides; }
-	
-	idx_type Tensor<i8>::stride(idx_type i) const
-	{
-		auto stride_size = _strides.size();
-		if (i >= 0 && i < _strides.size())
-			return _strides[i];
-		else if (i <= -1 && i >= -_strides.size())
-			return _strides[stride_size + i];
-		else
-			throw std::runtime_error("Stride out of range");
-	}
-
-    void Tensor<i8>::sub_(std::shared_ptr<TensorInterface> other)
-    {
-        Tensor<i8> * lhs = this;
-		Tensor<i8> * rhs = dynamic_cast<Tensor<i8> *>(other.get());
-		std::function<void(Tensor<i8> *, Tensor<i8> *, idx_type, idx_type,idx_type, idx_type)> sub_impl =
-			[&](Tensor<i8> * lhs, Tensor<i8> * rhs, idx_type lhs_dim, idx_type rhs_dim, idx_type lhs_idx, idx_type rhs_idx) {
-
-			auto lhs_storage = std::dynamic_pointer_cast<TensorStorage<i8>>(lhs->storage())->data_ptr();
-			auto rhs_storage = std::dynamic_pointer_cast<TensorStorage<i8>>(rhs->storage())->data_ptr();
-
-			if (lhs_dim < -(lhs->size().size()) && rhs_dim < -(rhs->size().size()))
-			{
-				lhs_storage[lhs_idx] -= rhs_storage[rhs_idx];
-				return;
-			}
-
-			idx_type lhs_shape_size = lhs_dim >= -(lhs->size().size())? lhs->size(lhs_dim) : 1;
-			idx_type rhs_shape_size = rhs_dim >= -(rhs->size().size()) ? rhs->size(rhs_dim) : 1;
-			idx_type max_shape_size = std::max(lhs_shape_size, rhs_shape_size);
-
-			for (idx_type i = 0; i < max_shape_size; ++i)
-			{
-				sub_impl(lhs, rhs, lhs_dim - 1, rhs_dim - 1, lhs_idx, rhs_idx);
-
-				if(lhs_shape_size > 1)
-					lhs_idx += lhs->stride(lhs_dim);
-				if (rhs_shape_size > 1)
-					rhs_idx += rhs->stride(rhs_dim);
-			}
-		};
-
-		sub_impl(lhs, rhs, -1, -1, lhs->offset(), rhs->offset());
-    }
-    
-    TensorInterfacePtr Tensor<i8>::sum() const
-    {
-        DimVector d(1);
-        d[0] = 1;
-
-        TensorPtr<i8> result(new Tensor<i8>(d));
-        result->_rep->data[0] = reduce([](i8 a, i8 b)->i8 {return a + b; });
-        return std::dynamic_pointer_cast<TensorInterface>(result);
-    }
-    
-    std::string Tensor<i8>::to_string() const
-    {
-        std::function<std::string(const Tensor<i8>&, idx_type, idx_type)> to_string_impl =
-			[&](const Tensor<i8>& t, idx_type dim, idx_type idx)->std::string {
-            std::string result;
-			if (dim == t.size().size())
-            {
-                result += std::to_string(t.data_ptr()[idx]);
-				return result;
-            }
-
-			for (idx_type i = 0; i < t.size(dim); ++i)
-			{
-				if (dim != t.size().size() - 1 && i != 0) result += ",\n";
-				if(dim != t.size().size() - 1)	result += "[";
-				result += to_string_impl(t, dim + 1, idx);
-				if (i != t.size(dim) - 1 && dim == t.size().size() - 1)
-					result += ",";
-				if (dim != t.size().size() - 1) result += "]";
-
-				idx += t.stride(dim);
-			}
-
-			return result;
-		};
-
-		std::string result;
-		result += "[" + to_string_impl(*this, 0, offset()) + "]";
-		return result;
-    }
-
-    void Tensor<i8>::transpose_(idx_type dim0, idx_type dim1)
-    {
-        if(dim0 != dim1 &&
-            _dimensions.in_range(dim0) &&
-            _dimensions.in_range(dim1))
-        {
-            std::swap(_dimensions[dim0], _dimensions[dim1]);
-            std::swap(_strides[dim0], _strides[dim1]);
-        }
-    }
-
-    std::shared_ptr<TensorInterface> Tensor<i8>::transpose(idx_type dim0, idx_type dim1)
-    {
-        std::shared_ptr<Tensor<i8>> result(new Tensor<i8>);
-        result->_rep = _rep;
-        result->_dimensions = _dimensions;
-        result->_offset = _offset;
-        result->_strides = _strides;
-
-        result->transpose_(dim0, dim1);
-
-        return result;
-    }
-}

+ 0 - 599
traph/source/tensor/double_tensor.cpp

@@ -1,599 +0,0 @@
-#include <traph/tensor/double_tensor.h>
-
-namespace traph
-{
-    // definition
-    // private
-    void Tensor<f64>::auto_strides()
-    {
-        idx_type dim_num = _dimensions.size();
-        _strides.resize(dim_num);
-        idx_type stride = 1;
-
-        for (idx_type i = dim_num - 1; i >= 0; --i)
-        {
-            _strides[i] = stride;
-            stride *= _dimensions[i];
-        }
-    }
-
-    void Tensor<f64>::reduce_impl(f64& result, idx_type dim, idx_type idx, std::function<f64(f64,f64)> f) const
-    {
-        idx_type dim_size = _dimensions.size();
-
-        idx_type step_len = _strides[dim];
-        idx_type step_num = _dimensions[dim];
-
-        for(idx_type i = 0; i < step_num; ++i)
-        {
-            if(dim == dim_size - 1)
-                result = f(result, _rep->data[idx]);
-            else
-                reduce_impl(result, dim + 1, idx, f);
-            idx += step_len;
-        }
-    }
-
-    f64 Tensor<f64>::reduce_dim_kernel(idx_type begin, idx_type step_len, idx_type step_num, std::function<f64(f64,f64)> f) const
-    {
-        f64 result{};
-        for(idx_type i = 0; i < step_num; ++i)
-        {
-            result = f(result, _rep->data[begin]);
-            begin += step_len;
-        }
-        return result;
-    }
-
-    void Tensor<f64>::reduce_dim_impl(Tensor<f64>& result, idx_type dim, idx_type reduce_dim,
-        idx_type this_idx, idx_type result_idx,
-        std::function<f64(f64,f64)> f) const
-    {
-        idx_type dim_size = _dimensions.size();
-
-        if(dim == dim_size)
-        {
-            result._rep->data[result_idx] = 
-                reduce_dim_kernel(this_idx, _strides[reduce_dim], _dimensions[reduce_dim], f);
-            return;
-        }
-
-        if(dim == reduce_dim)
-        {
-            reduce_dim_impl(result, dim + 1, reduce_dim, this_idx,result_idx, f);
-        }
-        else
-        {
-            for(idx_type i = 0; i < _dimensions[dim]; ++i)
-            {
-                reduce_dim_impl(result, dim + 1, reduce_dim, this_idx,result_idx, f);
-                    
-                this_idx += _strides[dim];
-                result_idx += result._strides[dim];
-            }
-        }
-    }
-    // public
-    Tensor<f64>::Tensor()
-        :_rep(new TensorStorage<f64>),
-        _dimensions(), _offset(0), _strides()
-    {
-    }
-
-    Tensor<f64>::Tensor(const DimVector& dimensions)
-        :_rep(new TensorStorage<f64>),
-        _dimensions(dimensions), _offset(0), _strides()
-    {
-        auto_strides();
-        
-        _rep->resize_(_dimensions.flat_size());
-    }
-
-    Tensor<f64>::Tensor(const DimVector& dimensions, const DimVector& strides)
-        :_rep(new TensorStorage<f64>),
-        _dimensions(dimensions), _offset(0), _strides(strides)
-    {
-        auto_strides();
-
-        _rep->resize_(_dimensions.flat_size());
-    }
-
-    Tensor<f64>::Tensor(const f64& t)
-        :_rep(new TensorStorage<f64>),
-        _dimensions(), _offset(0), _strides()
-    {
-        _dimensions.resize(1);
-        auto_strides();
-    }
-
-    void Tensor<f64>::add_(TensorInterfacePtr other)
-    {
-		// check tensor other type
-        if(other->dtype() != DataType::DOUBLE)
-            throw std::runtime_error("expected type double tensor");
-		// check broadcast.shape = this.shape
-        auto shape = broadcast_shape(this->size(), other->size());
-        if(shape != this->size())
-            throw std::runtime_error("The size of tensor a must match the size of tensor b");
-		// ok, get lhs, rhs
-		Tensor<f64> * lhs = this;
-		Tensor<f64> * rhs = dynamic_cast<Tensor<f64> *>(other.get());
-		std::function<void(Tensor<f64> *, Tensor<f64> *, idx_type, idx_type,idx_type, idx_type)> add_impl =
-			[&](Tensor<f64> * lhs, Tensor<f64> * rhs, idx_type lhs_dim, idx_type rhs_dim, idx_type lhs_idx, idx_type rhs_idx) {
-
-			auto lhs_storage = std::dynamic_pointer_cast<TensorStorage<f64>>(lhs->storage())->data_ptr();
-			auto rhs_storage = std::dynamic_pointer_cast<TensorStorage<f64>>(rhs->storage())->data_ptr();
-
-			if (lhs_dim < -(lhs->size().size()) && rhs_dim < -(rhs->size().size()))
-			{
-				lhs_storage[lhs_idx] += rhs_storage[rhs_idx];
-				return;
-			}
-
-			idx_type lsh_shape_size = lhs_dim >= -(lhs->size().size())? lhs->size(lhs_dim) : 1;
-			idx_type rsh_shape_size = rhs_dim >= -(rhs->size().size()) ? rhs->size(rhs_dim) : 1;
-			idx_type max_shape_size = std::max(lsh_shape_size, rsh_shape_size);
-
-			for (idx_type i = 0; i < max_shape_size; ++i)
-			{
-				add_impl(lhs, rhs, lhs_dim - 1, rhs_dim - 1, lhs_idx, rhs_idx);
-
-				if(lsh_shape_size > 1)
-					lhs_idx += lhs->stride(lhs_dim);
-				if (rsh_shape_size > 1)
-					rhs_idx += rhs->stride(rhs_dim);
-			}
-		};
-
-		add_impl(lhs, rhs, -1, -1, lhs->offset(), rhs->offset());
-    }
-
-    void Tensor<f64>::apply_(std::function<f64(f64)> f)
-    {
-        // sort stride for cache optimization
-		DimVector cloned_stride(_strides);
-        DimVector sorted_stride(_strides.size());
-        for(int i = 0; i<_strides.size(); ++i)
-            sorted_stride[i] = i;
-        
-        for (int i = 0; i < cloned_stride.size() - 1; i++)
-            for (int j = 0; j < cloned_stride.size() - 1 - i; j++)
-                if (cloned_stride[j] < cloned_stride[j + 1])
-                {
-                    std::swap(cloned_stride[j], cloned_stride[j+1]);
-                    std::swap(sorted_stride[j], sorted_stride[j+1]);
-                }
-        
-        std::function<void(idx_type, idx_type, std::function<f64(f64)>)> apply_impl =
-        [&](idx_type dim_idx, idx_type idx, std::function<f64(f64)> f){
-            idx_type dim = sorted_stride[dim_idx];
-            idx_type dim_size = _dimensions.size();
-
-            idx_type step_len = _strides[dim];
-            idx_type step_num = _dimensions[dim];
-            
-            for(idx_type i = 0; i < step_num; ++i)
-            {
-                if(dim_idx == dim_size - 1)
-                    _rep->data[idx] = f(_rep->data[idx]);
-                else
-                    apply_impl(dim_idx + 1, idx, f);
-                idx += step_len;
-            }
-        };
-
-        if(_dimensions.size() > 0)
-            apply_impl(0, _offset, f);
-    }
-
-    TensorInterfacePtr Tensor<f64>::clone() const
-    {
-        std::shared_ptr<Tensor<f64>> cloned_tensor(new Tensor<f64>);
-        cloned_tensor->_rep = std::dynamic_pointer_cast<TensorStorage<f64>>(_rep->clone());
-        cloned_tensor->_dimensions = _dimensions;
-        cloned_tensor->_offset = _offset;
-        cloned_tensor->_strides = _strides;
-        
-        return cloned_tensor;
-    }
-
-    void Tensor<f64>::cos_()
-    {
-        apply_([](f64 a)->f64 {return std::cos(a); });
-    }
-
-    std::shared_ptr<TensorBase<f32>> Tensor<f64>::create_grad()
-    {
-        return std::shared_ptr<TensorBase<f32>>(new Tensor<f32>(_dimensions));
-    }
-
-	f64* Tensor<f64>::data_ptr()
-    {
-        return _rep->data_ptr();
-    }
-
-    const f64* Tensor<f64>::data_ptr() const
-    {
-        return _rep->data_ptr();
-    }
-
-    device_id Tensor<f64>::device() { return 0; }
-
-    DataType Tensor<f64>::dtype() const
-    {
-        return DataType::DOUBLE;
-    }
-
-    bool Tensor<f64>::equal(std::shared_ptr<TensorInterface> other) const
-    {
-        if(other->platform() != this->platform())
-            throw std::runtime_error("equal: Two tensors must be the same platform");
-        
-        if(other->dtype() != this->dtype())
-            return false;
-
-        if(other->size() != this->size())
-            return false;
-
-        std::shared_ptr<Tensor<f64>> other_ptr = std::dynamic_pointer_cast<Tensor<f64>>(other);
-        
-        std::function<bool(idx_type, f64*, f64*)> equal_impl =
-        [&](idx_type dim, f64* lhs_idx, f64* rhs_idx){
-            idx_type dim_size = _dimensions.size();
-            
-            for(idx_type i = 0; i < _dimensions[dim]; ++i)
-            {
-                if(dim == dim - 1)
-                {
-                    if(*lhs_idx != *rhs_idx) return false;
-                }
-                else
-                {
-                    if(!equal_impl(dim + 1, lhs_idx, rhs_idx)) return false;
-                }
-                lhs_idx += _strides[dim];
-                rhs_idx += other_ptr->stride(dim);
-            }
-            return true;
-        };
-
-        return equal_impl(0, _rep->data_ptr() + _offset, other_ptr->data_ptr() + other_ptr->offset());
-    }
-
-	std::shared_ptr<TensorInterface> Tensor<f64>::inverse() const
-	{
-		return std::dynamic_pointer_cast<TensorInterface>(inverse_impl(*this));
-	}
-
-    void Tensor<f64>::fill_(f64 value)
-    {
-        apply_([&value](f64 a)->f64 {return value; });
-    }
-
-	f64 Tensor<f64>::item() const
-    {
-        if(_dimensions.flat_size() == 1)
-        {
-            return _rep->data[_offset];
-        }
-        else
-        {
-            throw std::runtime_error("item: only one element tensors can be converted to scalars");
-        }
-    }
-
-	std::shared_ptr<TensorInterface> Tensor<f64>::matmul(std::shared_ptr<TensorInterface> mat) const
-	{
-		auto right_matrix = std::dynamic_pointer_cast<Tensor<f64>>(mat);
-		return matmul_impl(*this, *right_matrix);
-	}
-
-    TensorInterfacePtr Tensor<f64>::mean() const
-    {
-        DimVector d(1);
-        d[0] = 1;
-
-        TensorPtr<f64> result(new Tensor<f64>(d));
-        auto flat_size = _dimensions.flat_size();
-        result->_rep->data[0] = reduce([](f64 a, f64 b)->f64 {return a + b; });
-        result->_rep->data[0] /= flat_size;
-        return std::dynamic_pointer_cast<TensorInterface>(result);
-    }
-
-    void Tensor<f64>::mul_(f64 value)
-    {
-        apply_([value](f64 a)->f64 {return a*value; });
-    }
-
-    void Tensor<f64>::mul_(std::shared_ptr<TensorInterface> other)
-    {
-        // check tensor other type
-        if(other->dtype() != DataType::DOUBLE)
-            throw std::runtime_error("expected type double tensor");
-		// check broadcast.shape = this.shape
-        auto shape = broadcast_shape(this->size(), other->size());
-        if(shape != this->size())
-            throw std::runtime_error("The size of tensor a must match the size of tensor b");
-		// ok, get lhs, rhs
-		Tensor<f64> * lhs = this;
-		Tensor<f64> * rhs = dynamic_cast<Tensor<f64> *>(other.get());
-		std::function<void(idx_type, idx_type, idx_type, idx_type)> mul_impl =
-			[&](idx_type lhs_dim, idx_type rhs_dim, idx_type lhs_idx, idx_type rhs_idx) {
-
-			auto lhs_storage = std::dynamic_pointer_cast<TensorStorage<f32>>(lhs->storage())->data_ptr();
-			auto rhs_storage = std::dynamic_pointer_cast<TensorStorage<f32>>(rhs->storage())->data_ptr();
-
-			idx_type lsh_shape_size = lhs_dim >= -(lhs->size().size())? lhs->size(lhs_dim) : 1;
-			idx_type rsh_shape_size = rhs_dim >= -(rhs->size().size()) ? rhs->size(rhs_dim) : 1;
-			idx_type max_shape_size = std::max(lsh_shape_size, rsh_shape_size);
-
-			for (idx_type i = 0; i < max_shape_size; ++i)
-			{
-                if (lhs_dim <= -(lhs->size().size()) && rhs_dim <= -(rhs->size().size()))
-                {
-                    lhs_storage[lhs_idx] *= rhs_storage[rhs_idx];
-                }
-                else
-                {
-                    mul_impl(lhs_dim - 1, rhs_dim - 1, lhs_idx, rhs_idx);
-                }
-
-				if(lsh_shape_size > 1)
-					lhs_idx += lhs->stride(lhs_dim);
-				if (rsh_shape_size > 1)
-					rhs_idx += rhs->stride(rhs_dim);
-			}
-		};
-
-		mul_impl(-1, -1, lhs->offset(), rhs->offset());
-    }
-
-    idx_type Tensor<f64>::ndimension() const
-    {
-        return _dimensions.size();
-    }
-
-    void Tensor<f64>::neg_()
-    {
-        apply_([](f64 a)->f64 {return -a; });
-    }
-
-    idx_type Tensor<f64>::offset() const { return _offset; }
-
-    std::shared_ptr<TensorInterface> Tensor<f64>::permute(const DimVector& dims) const
-    {
-        // check dims
-        if(dims.size() != _strides.size())
-            throw std::runtime_error("permute dimension must have the same size");
-        std::vector<int> check_vec(dims.size(), 0);
-        for(int i = 0; i < dims.size();++i)
-            if(dims[i] >= 0 && dims[i] < dims.size())
-                check_vec[dims[i]] = 1;
-            else
-                throw std::runtime_error("permute dimension must in ndimension range");
-        
-        for(int i = 0; i < check_vec.size();++i)
-        {
-            if(check_vec[i] != 1)
-                throw std::runtime_error("permute dimension error");
-        }
-        // permute
-        std::shared_ptr<Tensor<f64>> result(new Tensor<f64>);
-        result->_rep = _rep;
-        result->_dimensions = _dimensions;
-        result->_offset = _offset;
-        result->_strides = _strides;
-
-        for(int i=0; i<dims.size(); ++i)
-        {
-            result->_dimensions[i] = _dimensions[dims[i]];
-            result->_strides[i] = _strides[dims[i]];
-        }
-
-        return result;
-    }
-
-    PlatformType Tensor<f64>::platform() const { return PlatformType::CPU; }
-
-    void Tensor<f64>::pow_(f32 exp)
-    {
-        apply_([&exp](f64 a)->f64 {return std::pow(a, exp); });
-    }
-
-	f64 Tensor<f64>::reduce(std::function<f64(f64, f64)> f) const
-    {
-		f64 result{};
-        reduce_impl(result, 0, _offset, f);
-        return result;
-    }
-    
-    TensorInterfacePtr Tensor<f64>::reduce_dim(idx_type dim, std::function<f64(f64, f64)> f) const
-    {
-        DimVector reduced_dim = _dimensions;
-        reduced_dim.erase(dim); // check dim?
-        TensorBasePtr<f64> result(new Tensor<f64>(reduced_dim));
-        TensorPtr<f64> raw_result = std::dynamic_pointer_cast<Tensor<f64>>(result);
-        reduce_dim_impl(*(raw_result.get()), 0, dim, _offset, raw_result->_offset, f);
-        return std::dynamic_pointer_cast<TensorInterface>(result);
-    }
-    
-    void Tensor<f64>::reshape_(const DimVector& dims)
-    {
-
-    }
-    
-    void Tensor<f64>::resize_(const DimVector& dims)
-    {
-        _dimensions = dims;
-        _rep->resize_(dims.flat_size());
-        auto_strides();
-    }
-
-	std::shared_ptr<TensorInterface> Tensor<f64>::select(const SliceVector& slice) const
-	{
-		std::shared_ptr<Tensor<f64>> result(new Tensor<f64>);
-		result->_rep = _rep;
-
-		// dimension
-		DimVector dim;
-		std::fesetround(FE_TONEAREST);
-		for (idx_type i = 0; i < slice.size(); ++i)
-		{
-			auto& each = slice[i];
-			dim.push_back(
-				std::lrint(std::ceil((each.end.value_or(_dimensions[i]) - each.start.value_or(0)) / (float)each.step.value_or(1)))
-			);
-		}
-		result->_dimensions = dim;
-
-		// offset
-		idx_type new_offset = 1;
-		for (idx_type i = 0; i < slice.size(); ++i)
-		{
-			new_offset *= _strides[i] * slice[i].start.value_or(0);
-		}
-		result->_offset = _offset + new_offset;
-
-		// strides
-		DimVector strides;
-		for (idx_type i = 0; i < slice.size(); ++i)
-		{
-			strides.push_back(_strides[i] * slice[i].step.value_or(1));
-		}
-		result->_strides = strides;
-
-		return std::dynamic_pointer_cast<TensorInterface>(result);
-	}
-    
-    void Tensor<f64>::sin_()
-    {
-        apply_([](f64 a)->f64 {return std::sin(a); });
-    }
-    
-    DimVector Tensor<f64>::size() const { return _dimensions;}
-	
-	idx_type Tensor<f64>::size(idx_type i) const
-	{ 
-		auto shape_size = _dimensions.size();
-		if (i >= 0 && i < _dimensions.size())
-			return _dimensions[i];
-		else if (i <= -1 && i >= -_dimensions.size())
-			return _dimensions[shape_size + i];
-		else
-			throw std::runtime_error("Dimension out of range");
-	}
-    
-	std::shared_ptr<StorageBase<f64>>  Tensor<f64>::storage() const { return _rep; }
-    
-    DimVector Tensor<f64>::stride() const { return _strides; }
-	
-	idx_type Tensor<f64>::stride(idx_type i) const
-	{
-		auto stride_size = _strides.size();
-		if (i >= 0 && i < _strides.size())
-			return _strides[i];
-		else if (i <= -1 && i >= -_strides.size())
-			return _strides[stride_size + i];
-		else
-			throw std::runtime_error("Stride out of range");
-	}
-
-    void Tensor<f64>::sub_(std::shared_ptr<TensorInterface> other)
-    {
-        Tensor<f64> * lhs = this;
-		Tensor<f64> * rhs = dynamic_cast<Tensor<f64> *>(other.get());
-		std::function<void(Tensor<f64> *, Tensor<f64> *, idx_type, idx_type,idx_type, idx_type)> sub_impl =
-			[&](Tensor<f64> * lhs, Tensor<f64> * rhs, idx_type lhs_dim, idx_type rhs_dim, idx_type lhs_idx, idx_type rhs_idx) {
-
-			auto lhs_storage = std::dynamic_pointer_cast<TensorStorage<f64>>(lhs->storage())->data_ptr();
-			auto rhs_storage = std::dynamic_pointer_cast<TensorStorage<f64>>(rhs->storage())->data_ptr();
-
-			if (lhs_dim < -(lhs->size().size()) && rhs_dim < -(rhs->size().size()))
-			{
-				lhs_storage[lhs_idx] -= rhs_storage[rhs_idx];
-				return;
-			}
-
-			idx_type lhs_shape_size = lhs_dim >= -(lhs->size().size())? lhs->size(lhs_dim) : 1;
-			idx_type rhs_shape_size = rhs_dim >= -(rhs->size().size()) ? rhs->size(rhs_dim) : 1;
-			idx_type max_shape_size = std::max(lhs_shape_size, rhs_shape_size);
-
-			for (idx_type i = 0; i < max_shape_size; ++i)
-			{
-				sub_impl(lhs, rhs, lhs_dim - 1, rhs_dim - 1, lhs_idx, rhs_idx);
-
-				if(lhs_shape_size > 1)
-					lhs_idx += lhs->stride(lhs_dim);
-				if (rhs_shape_size > 1)
-					rhs_idx += rhs->stride(rhs_dim);
-			}
-		};
-
-		sub_impl(lhs, rhs, -1, -1, lhs->offset(), rhs->offset());
-    }
-    
-    TensorInterfacePtr Tensor<f64>::sum() const
-    {
-        DimVector d(1);
-        d[0] = 1;
-
-        TensorPtr<f64> result(new Tensor<f64>(d));
-        result->_rep->data[0] = reduce([](f64 a, f64 b)->f64 {return a + b; });
-        return std::dynamic_pointer_cast<TensorInterface>(result);
-    }
-    
-    std::string Tensor<f64>::to_string() const
-    {
-        std::function<std::string(const Tensor<f64>&, idx_type, idx_type)> to_string_impl =
-			[&](const Tensor<f64>& t, idx_type dim, idx_type idx)->std::string {
-            std::string result;
-			if (dim == t.size().size())
-            {
-                result += std::to_string(t.data_ptr()[idx]);
-				return result;
-            }
-
-			for (idx_type i = 0; i < t.size(dim); ++i)
-			{
-				if (dim != t.size().size() - 1 && i != 0) result += ",\n";
-				if(dim != t.size().size() - 1)	result += "[";
-				result += to_string_impl(t, dim + 1, idx);
-				if (i != t.size(dim) - 1 && dim == t.size().size() - 1)
-					result += ",";
-				if (dim != t.size().size() - 1) result += "]";
-
-				idx += t.stride(dim);
-			}
-
-			return result;
-		};
-
-		std::string result;
-		result += "[" + to_string_impl(*this, 0, offset()) + "]";
-		return result;
-    }
-
-    void Tensor<f64>::transpose_(idx_type dim0, idx_type dim1)
-    {
-        if(dim0 != dim1 &&
-            _dimensions.in_range(dim0) &&
-            _dimensions.in_range(dim1))
-        {
-            std::swap(_dimensions[dim0], _dimensions[dim1]);
-            std::swap(_strides[dim0], _strides[dim1]);
-        }
-    }
-
-    std::shared_ptr<TensorInterface> Tensor<f64>::transpose(idx_type dim0, idx_type dim1)
-    {
-        std::shared_ptr<Tensor<f64>> result(new Tensor<f64>);
-        result->_rep = _rep;
-        result->_dimensions = _dimensions;
-        result->_offset = _offset;
-        result->_strides = _strides;
-
-        result->transpose_(dim0, dim1);
-
-        return result;
-    }
-}

+ 0 - 599
traph/source/tensor/float_tensor.cpp

@@ -1,599 +0,0 @@
-#include <traph/tensor/float_tensor.h>
-
-namespace traph
-{
-	// definition
-    // private
-    void Tensor<f32>::auto_strides()
-    {
-        idx_type dim_num = _dimensions.size();
-        _strides.resize(dim_num);
-        idx_type stride = 1;
-        for (idx_type i = dim_num - 1; i >= 0; --i)
-        {
-            _strides[i] = stride;
-            stride *= _dimensions[i];
-        }
-    }
-
-    void Tensor<f32>::reduce_impl(f32& result, idx_type dim, idx_type idx, std::function<f32(f32,f32)> f) const
-    {
-        idx_type dim_size = _dimensions.size();
-
-        idx_type step_len = _strides[dim];
-        idx_type step_num = _dimensions[dim];
-
-        for(idx_type i = 0; i < step_num; ++i)
-        {
-            if(dim == dim_size - 1)
-                result = f(result, _rep->data[idx]);
-            else
-                reduce_impl(result, dim + 1, idx, f);
-            idx += step_len;
-        }
-    }
-
-    f32 Tensor<f32>::reduce_dim_kernel(idx_type begin, idx_type step_len, idx_type step_num, std::function<f32(f32,f32)> f) const
-    {
-        f32 result{};
-        for(idx_type i = 0; i < step_num; ++i)
-        {
-            result = f(result, _rep->data[begin]);
-            begin += step_len;
-        }
-        return result;
-    }
-
-    void Tensor<f32>::reduce_dim_impl(Tensor<f32>& result, idx_type dim, idx_type reduce_dim,
-        idx_type this_idx, idx_type result_idx,
-        std::function<f32(f32,f32)> f) const
-    {
-        idx_type dim_size = _dimensions.size();
-
-        if(dim == dim_size)
-        {
-            result._rep->data[result_idx] = 
-                reduce_dim_kernel(this_idx, _strides[reduce_dim], _dimensions[reduce_dim], f);
-            return;
-        }
-
-        if(dim == reduce_dim)
-        {
-            reduce_dim_impl(result, dim + 1, reduce_dim, this_idx,result_idx, f);
-        }
-        else
-        {
-            for(idx_type i = 0; i < _dimensions[dim]; ++i)
-            {
-                reduce_dim_impl(result, dim + 1, reduce_dim, this_idx,result_idx, f);
-                    
-                this_idx += _strides[dim];
-                result_idx += result._strides[dim];
-            }
-        }
-    }
-    // public
-    Tensor<f32>::Tensor()
-        :_rep(new TensorStorage<f32>),
-        _dimensions(), _offset(0), _strides()
-    {
-    }
-
-    Tensor<f32>::Tensor(const DimVector& dimensions)
-        :_rep(new TensorStorage<f32>),
-        _dimensions(dimensions), _offset(0), _strides()
-    {
-        auto_strides();
-        
-        _rep->resize_(_dimensions.flat_size());
-    }
-
-    Tensor<f32>::Tensor(const DimVector& dimensions, const DimVector& strides)
-        :_rep(new TensorStorage<f32>),
-        _dimensions(dimensions), _offset(0), _strides(strides)
-    {
-        auto_strides();
-
-        _rep->resize_(_dimensions.flat_size());
-    }
-
-    Tensor<f32>::Tensor(const f32& t)
-        :_rep(new TensorStorage<f32>),
-        _dimensions(), _offset(0), _strides()
-    {
-        _dimensions.resize(1);
-        auto_strides();
-    }
-
-    void Tensor<f32>::add_(TensorInterfacePtr other)
-    {
-		// check tensor other type
-        if(other->dtype() != DataType::FLOAT)
-            throw std::runtime_error("expected type float tensor");
-		// check broadcast.shape = this.shape
-        auto shape = broadcast_shape(this->size(), other->size());
-        if(shape != this->size())
-            throw std::runtime_error("The size of tensor a must match the size of tensor b");
-		// ok, get lhs, rhs
-		Tensor<f32> * lhs = this;
-		Tensor<f32> * rhs = dynamic_cast<Tensor<f32> *>(other.get());
-		std::function<void(idx_type, idx_type, idx_type, idx_type)> add_impl =
-			[&](idx_type lhs_dim, idx_type rhs_dim, idx_type lhs_idx, idx_type rhs_idx) {
-
-			auto lhs_storage = std::dynamic_pointer_cast<TensorStorage<f32>>(lhs->storage())->data_ptr();
-			auto rhs_storage = std::dynamic_pointer_cast<TensorStorage<f32>>(rhs->storage())->data_ptr();
-
-			idx_type lsh_shape_size = lhs_dim >= -(lhs->size().size())? lhs->size(lhs_dim) : 1;
-			idx_type rsh_shape_size = rhs_dim >= -(rhs->size().size()) ? rhs->size(rhs_dim) : 1;
-			idx_type max_shape_size = std::max(lsh_shape_size, rsh_shape_size);
-
-			for (idx_type i = 0; i < max_shape_size; ++i)
-			{
-                if (lhs_dim <= -(lhs->size().size()) && rhs_dim <= -(rhs->size().size()))
-                {
-                    lhs_storage[lhs_idx] += rhs_storage[rhs_idx];
-                }
-                else
-                {
-                    add_impl(lhs_dim - 1, rhs_dim - 1, lhs_idx, rhs_idx);
-                }
-
-				if(lsh_shape_size > 1)
-					lhs_idx += lhs->stride(lhs_dim);
-				if (rsh_shape_size > 1)
-					rhs_idx += rhs->stride(rhs_dim);
-			}
-		};
-
-		add_impl(-1, -1, lhs->offset(), rhs->offset());
-    }
-
-    void Tensor<f32>::apply_(std::function<f32(f32)> f)
-    {
-        // sort stride for cache optimization
-		DimVector cloned_stride(_strides);
-        DimVector sorted_stride(_strides.size());
-        for(int i = 0; i<_strides.size(); ++i)
-            sorted_stride[i] = i;
-        
-        for (int i = 0; i < cloned_stride.size() - 1; i++)
-            for (int j = 0; j < cloned_stride.size() - 1 - i; j++)
-                if (cloned_stride[j] < cloned_stride[j + 1])
-                {
-                    std::swap(cloned_stride[j], cloned_stride[j+1]);
-                    std::swap(sorted_stride[j], sorted_stride[j+1]);
-                }
-        
-        std::function<void(idx_type, idx_type, std::function<f32(f32)>)> apply_impl =
-        [&](idx_type dim_idx, idx_type idx, std::function<f32(f32)> f){
-            idx_type dim = sorted_stride[dim_idx];
-            idx_type dim_size = _dimensions.size();
-
-            idx_type step_len = _strides[dim];
-            idx_type step_num = _dimensions[dim];
-            
-            for(idx_type i = 0; i < step_num; ++i)
-            {
-                if(dim_idx == dim_size - 1)
-                    _rep->data[idx] = f(_rep->data[idx]);
-                else
-                    apply_impl(dim_idx + 1, idx, f);
-                idx += step_len;
-            }
-        };
-
-        if(_dimensions.size() > 0)
-            apply_impl(0, _offset, f);
-    }
-
-    TensorInterfacePtr Tensor<f32>::clone() const
-    {
-        std::shared_ptr<Tensor<f32>> cloned_tensor(new Tensor<f32>);
-        cloned_tensor->_rep = std::dynamic_pointer_cast<TensorStorage<f32>>(_rep->clone());
-        cloned_tensor->_dimensions = _dimensions;
-        cloned_tensor->_offset = _offset;
-        cloned_tensor->_strides = _strides;
-        
-        return cloned_tensor;
-    }
-
-    void Tensor<f32>::cos_()
-    {
-        apply_([](f32 a)->f32 {return std::cos(a); });
-    }
-
-    std::shared_ptr<TensorBase<f32>> Tensor<f32>::create_grad()
-    {
-        return std::shared_ptr<TensorBase<f32>>(new Tensor<f32>(_dimensions));
-    }
-
-	f32* Tensor<f32>::data_ptr()
-    {
-        return _rep->data_ptr();
-    }
-
-    const f32* Tensor<f32>::data_ptr() const
-    {
-        return _rep->data_ptr();
-    }
-
-    device_id Tensor<f32>::device() { return 0; }
-
-    DataType Tensor<f32>::dtype() const
-    {
-        return DataType::FLOAT;
-    }
-
-    bool Tensor<f32>::equal(std::shared_ptr<TensorInterface> other) const
-    {
-        if(other->platform() != this->platform())
-            throw std::runtime_error("equal: Two tensors must be the same platform");
-        
-        if(other->dtype() != this->dtype())
-            return false;
-
-        if(other->size() != this->size())
-            return false;
-
-        std::shared_ptr<Tensor<f32>> other_ptr = std::dynamic_pointer_cast<Tensor<f32>>(other);
-        
-        std::function<bool(idx_type, f32*, f32*)> equal_impl =
-        [&](idx_type dim, f32* lhs_idx, f32* rhs_idx){
-            idx_type dim_size = _dimensions.size();
-            
-            for(idx_type i = 0; i < _dimensions[dim]; ++i)
-            {
-                if(dim == dim - 1)
-                {
-                    if(*lhs_idx != *rhs_idx) return false;
-                }
-                else
-                {
-                    if(!equal_impl(dim + 1, lhs_idx, rhs_idx)) return false;
-                }
-                lhs_idx += _strides[dim];
-                rhs_idx += other_ptr->stride(dim);
-            }
-            return true;
-        };
-
-        return equal_impl(0, _rep->data_ptr() + _offset, other_ptr->data_ptr() + other_ptr->offset());
-    }
-
-	std::shared_ptr<TensorInterface> Tensor<f32>::inverse() const
-	{
-		return std::dynamic_pointer_cast<TensorInterface>(inverse_impl(*this));
-	}
-
-    void Tensor<f32>::fill_(f32 value)
-    {
-        apply_([&value](f32 a)->f32 {return value; });
-    }
-
-	f32 Tensor<f32>::item() const
-    {
-        if(_dimensions.flat_size() == 1)
-        {
-            return _rep->data[_offset];
-        }
-        else
-        {
-            throw std::runtime_error("item: only one element tensors can be converted to scalars");
-        }
-    }
-
-	std::shared_ptr<TensorInterface> Tensor<f32>::matmul(std::shared_ptr<TensorInterface> mat) const
-	{
-		auto right_matrix = std::dynamic_pointer_cast<Tensor<f32>>(mat);
-		return matmul_impl(*this, *right_matrix);
-	}
-
-    TensorInterfacePtr Tensor<f32>::mean() const
-    {
-        DimVector d(1);
-        d[0] = 1;
-
-        TensorPtr<f32> result(new Tensor<f32>(d));
-        auto flat_size = _dimensions.flat_size();
-        result->_rep->data[0] = reduce([](f32 a, f32 b)->f32 {return a + b; });
-        result->_rep->data[0] /= flat_size;
-        return std::dynamic_pointer_cast<TensorInterface>(result);
-    }
-
-    void Tensor<f32>::mul_(f32 value)
-    {
-        apply_([value](f32 a)->f32 {return a*value; });
-    }
-
-    void Tensor<f32>::mul_(std::shared_ptr<TensorInterface> other)
-    {
-        // check tensor other type
-        if(other->dtype() != DataType::FLOAT)
-            throw std::runtime_error("expected type float tensor");
-		// check broadcast.shape = this.shape
-        auto shape = broadcast_shape(this->size(), other->size());
-        if(shape != this->size())
-            throw std::runtime_error("The size of tensor a must match the size of tensor b");
-		// ok, get lhs, rhs
-		Tensor<f32> * lhs = this;
-		Tensor<f32> * rhs = dynamic_cast<Tensor<f32> *>(other.get());
-		std::function<void(idx_type, idx_type, idx_type, idx_type)> mul_impl =
-			[&](idx_type lhs_dim, idx_type rhs_dim, idx_type lhs_idx, idx_type rhs_idx) {
-
-			auto lhs_storage = std::dynamic_pointer_cast<TensorStorage<f32>>(lhs->storage())->data_ptr();
-			auto rhs_storage = std::dynamic_pointer_cast<TensorStorage<f32>>(rhs->storage())->data_ptr();
-
-			idx_type lsh_shape_size = lhs_dim >= -(lhs->size().size())? lhs->size(lhs_dim) : 1;
-			idx_type rsh_shape_size = rhs_dim >= -(rhs->size().size()) ? rhs->size(rhs_dim) : 1;
-			idx_type max_shape_size = std::max(lsh_shape_size, rsh_shape_size);
-
-			for (idx_type i = 0; i < max_shape_size; ++i)
-			{
-                if (lhs_dim <= -(lhs->size().size()) && rhs_dim <= -(rhs->size().size()))
-                {
-                    lhs_storage[lhs_idx] *= rhs_storage[rhs_idx];
-                }
-                else
-                {
-                    mul_impl(lhs_dim - 1, rhs_dim - 1, lhs_idx, rhs_idx);
-                }
-
-				if(lsh_shape_size > 1)
-					lhs_idx += lhs->stride(lhs_dim);
-				if (rsh_shape_size > 1)
-					rhs_idx += rhs->stride(rhs_dim);
-			}
-		};
-
-		mul_impl(-1, -1, lhs->offset(), rhs->offset());
-    }
-
-    idx_type Tensor<f32>::ndimension() const
-    {
-        return _dimensions.size();
-    }
-
-    void Tensor<f32>::neg_()
-    {
-        apply_([](f32 a)->f32 {return -a; });
-    }
-
-    idx_type Tensor<f32>::offset() const { return _offset; }
-
-    std::shared_ptr<TensorInterface> Tensor<f32>::permute(const DimVector& dims) const
-    {
-        // check dims
-        if(dims.size() != _strides.size())
-            throw std::runtime_error("permute dimension must have the same size");
-        std::vector<int> check_vec(dims.size(), 0);
-        for(int i = 0; i < dims.size();++i)
-            if(dims[i] >= 0 && dims[i] < dims.size())
-                check_vec[dims[i]] = 1;
-            else
-                throw std::runtime_error("permute dimension must in ndimension range");
-        
-        for(int i = 0; i < check_vec.size();++i)
-        {
-            if(check_vec[i] != 1)
-                throw std::runtime_error("permute dimension error");
-        }
-        // permute
-        std::shared_ptr<Tensor<f32>> result(new Tensor<f32>);
-        result->_rep = _rep;
-        result->_dimensions = _dimensions;
-        result->_offset = _offset;
-        result->_strides = _strides;
-
-        for(int i=0; i<dims.size(); ++i)
-        {
-            result->_dimensions[i] = _dimensions[dims[i]];
-            result->_strides[i] = _strides[dims[i]];
-        }
-
-        return result;
-    }
-
-    PlatformType Tensor<f32>::platform() const { return PlatformType::CPU; }
-
-    void Tensor<f32>::pow_(f32 exp)
-    {
-        apply_([&exp](f32 a)->f32 {return std::pow(a, exp); });
-    }
-
-	f32 Tensor<f32>::reduce(std::function<f32(f32, f32)> f) const
-    {
-		f32 result{};
-        reduce_impl(result, 0, _offset, f);
-        return result;
-    }
-    
-    TensorInterfacePtr Tensor<f32>::reduce_dim(idx_type dim, std::function<f32(f32, f32)> f) const
-    {
-        DimVector reduced_dim = _dimensions;
-        reduced_dim.erase(dim); // check dim?
-        TensorBasePtr<f32> result(new Tensor<f32>(reduced_dim));
-        TensorPtr<f32> raw_result = std::dynamic_pointer_cast<Tensor<f32>>(result);
-        reduce_dim_impl(*(raw_result.get()), 0, dim, _offset, raw_result->_offset, f);
-        return std::dynamic_pointer_cast<TensorInterface>(result);
-    }
-    
-    void Tensor<f32>::reshape_(const DimVector& dims)
-    {
-
-    }
-    
-    void Tensor<f32>::resize_(const DimVector& dims)
-    {
-        _dimensions = dims;
-        _rep->resize_(dims.flat_size());
-        auto_strides();
-    }
-
-	std::shared_ptr<TensorInterface> Tensor<f32>::select(const SliceVector& slice) const
-	{
-		std::shared_ptr<Tensor<f32>> result(new Tensor<f32>);
-		result->_rep = _rep;
-
-		// dimension
-		DimVector dim;
-		std::fesetround(FE_TONEAREST);
-		for (idx_type i = 0; i < slice.size(); ++i)
-		{
-			auto& each = slice[i];
-			dim.push_back(
-				std::lrint(std::ceil((each.end.value_or(_dimensions[i]) - each.start.value_or(0)) / (float)each.step.value_or(1)))
-			);
-		}
-		result->_dimensions = dim;
-
-		// offset
-		idx_type new_offset = 1;
-		for (idx_type i = 0; i < slice.size(); ++i)
-		{
-			new_offset *= _strides[i] * slice[i].start.value_or(0);
-		}
-		result->_offset = _offset + new_offset;
-
-		// strides
-		DimVector strides;
-		for (idx_type i = 0; i < slice.size(); ++i)
-		{
-			strides.push_back(_strides[i] * slice[i].step.value_or(1));
-		}
-		result->_strides = strides;
-
-		return std::dynamic_pointer_cast<TensorInterface>(result);
-	}
-    
-    void Tensor<f32>::sin_()
-    {
-        apply_([](f32 a)->f32 {return std::sin(a); });
-    }
-    
-    DimVector Tensor<f32>::size() const { return _dimensions;}
-	
-	idx_type Tensor<f32>::size(idx_type i) const
-	{ 
-		auto shape_size = _dimensions.size();
-		if (i >= 0 && i < _dimensions.size())
-			return _dimensions[i];
-		else if (i <= -1 && i >= -_dimensions.size())
-			return _dimensions[shape_size + i];
-		else
-			throw std::runtime_error("Dimension out of range");
-	}
-    
-	std::shared_ptr<StorageBase<f32>>  Tensor<f32>::storage() const { return _rep; }
-    
-    DimVector Tensor<f32>::stride() const { return _strides; }
-	
-	idx_type Tensor<f32>::stride(idx_type i) const
-	{
-		auto stride_size = _strides.size();
-		if (i >= 0 && i < _strides.size())
-			return _strides[i];
-		else if (i <= -1 && i >= -_strides.size())
-			return _strides[stride_size + i];
-		else
-			throw std::runtime_error("Stride out of range");
-	}
-
-    void Tensor<f32>::sub_(std::shared_ptr<TensorInterface> other)
-    {
-        Tensor<f32> * lhs = this;
-		Tensor<f32> * rhs = dynamic_cast<Tensor<f32> *>(other.get());
-		std::function<void(Tensor<f32> *, Tensor<f32> *, idx_type, idx_type,idx_type, idx_type)> sub_impl =
-			[&](Tensor<f32> * lhs, Tensor<f32> * rhs, idx_type lhs_dim, idx_type rhs_dim, idx_type lhs_idx, idx_type rhs_idx) {
-
-			auto lhs_storage = std::dynamic_pointer_cast<TensorStorage<f32>>(lhs->storage())->data_ptr();
-			auto rhs_storage = std::dynamic_pointer_cast<TensorStorage<f32>>(rhs->storage())->data_ptr();
-
-			if (lhs_dim < -(lhs->size().size()) && rhs_dim < -(rhs->size().size()))
-			{
-				lhs_storage[lhs_idx] -= rhs_storage[rhs_idx];
-				return;
-			}
-
-			idx_type lhs_shape_size = lhs_dim >= -(lhs->size().size())? lhs->size(lhs_dim) : 1;
-			idx_type rhs_shape_size = rhs_dim >= -(rhs->size().size()) ? rhs->size(rhs_dim) : 1;
-			idx_type max_shape_size = std::max(lhs_shape_size, rhs_shape_size);
-
-			for (idx_type i = 0; i < max_shape_size; ++i)
-			{
-				sub_impl(lhs, rhs, lhs_dim - 1, rhs_dim - 1, lhs_idx, rhs_idx);
-
-				if(lhs_shape_size > 1)
-					lhs_idx += lhs->stride(lhs_dim);
-				if (rhs_shape_size > 1)
-					rhs_idx += rhs->stride(rhs_dim);
-			}
-		};
-
-		sub_impl(lhs, rhs, -1, -1, lhs->offset(), rhs->offset());
-    }
-    
-    TensorInterfacePtr Tensor<f32>::sum() const
-    {
-        DimVector d(1);
-        d[0] = 1;
-
-        TensorPtr<f32> result(new Tensor<f32>(d));
-        result->_rep->data[0] = reduce([](f32 a, f32 b)->f32 {return a + b; });
-        return std::dynamic_pointer_cast<TensorInterface>(result);
-    }
-    
-    std::string Tensor<f32>::to_string() const
-    {
-        std::function<std::string(const Tensor<f32>&, idx_type, idx_type)> to_string_impl =
-			[&](const Tensor<f32>& t, idx_type dim, idx_type idx)->std::string {
-            std::string result;
-			if (dim == t.size().size())
-            {
-                result += std::to_string(t.data_ptr()[idx]);
-				return result;
-            }
-
-			for (idx_type i = 0; i < t.size(dim); ++i)
-			{
-				if (dim != t.size().size() - 1 && i != 0) result += ",\n";
-				if(dim != t.size().size() - 1)	result += "[";
-				result += to_string_impl(t, dim + 1, idx);
-				if (i != t.size(dim) - 1 && dim == t.size().size() - 1)
-					result += ",";
-				if (dim != t.size().size() - 1) result += "]";
-
-				idx += t.stride(dim);
-			}
-
-			return result;
-		};
-
-		std::string result;
-		result += "[" + to_string_impl(*this, 0, offset()) + "]";
-		return result;
-    }
-
-    void Tensor<f32>::transpose_(idx_type dim0, idx_type dim1)
-    {
-        if(dim0 != dim1 &&
-            _dimensions.in_range(dim0) &&
-            _dimensions.in_range(dim1))
-        {
-            std::swap(_dimensions[dim0], _dimensions[dim1]);
-            std::swap(_strides[dim0], _strides[dim1]);
-        }
-    }
-
-    std::shared_ptr<TensorInterface> Tensor<f32>::transpose(idx_type dim0, idx_type dim1)
-    {
-        std::shared_ptr<Tensor<f32>> result(new Tensor<f32>);
-        result->_rep = _rep;
-        result->_dimensions = _dimensions;
-        result->_offset = _offset;
-        result->_strides = _strides;
-
-        result->transpose_(dim0, dim1);
-
-        return result;
-    }
-}

+ 0 - 600
traph/source/tensor/int_tensor.cpp

@@ -1,600 +0,0 @@
-#include <traph/tensor/int_tensor.h>
-
-namespace traph
-{
-    // definition
-    // private
-    void Tensor<i32>::auto_strides()
-    {
-        idx_type dim_num = _dimensions.size();
-        _strides.resize(dim_num);
-        idx_type stride = 1;
-
-        for (idx_type i = dim_num - 1; i >= 0; --i)
-        {
-            _strides[i] = stride;
-            stride *= _dimensions[i];
-        }
-    }
-
-    void Tensor<i32>::reduce_impl(i32& result, idx_type dim, idx_type idx, std::function<i32(i32,i32)> f) const
-    {
-        idx_type dim_size = _dimensions.size();
-
-        idx_type step_len = _strides[dim];
-        idx_type step_num = _dimensions[dim];
-
-        for(idx_type i = 0; i < step_num; ++i)
-        {
-            if(dim == dim_size - 1)
-                result = f(result, _rep->data[idx]);
-            else
-                reduce_impl(result, dim + 1, idx, f);
-            idx += step_len;
-        }
-    }
-
-    i32 Tensor<i32>::reduce_dim_kernel(idx_type begin, idx_type step_len, idx_type step_num, std::function<i32(i32,i32)> f) const
-    {
-        i32 result{};
-        for(idx_type i = 0; i < step_num; ++i)
-        {
-            result = f(result, _rep->data[begin]);
-            begin += step_len;
-        }
-        return result;
-    }
-
-    void Tensor<i32>::reduce_dim_impl(Tensor<i32>& result, idx_type dim, idx_type reduce_dim,
-        idx_type this_idx, idx_type result_idx,
-        std::function<i32(i32,i32)> f) const
-    {
-        idx_type dim_size = _dimensions.size();
-
-        if(dim == dim_size)
-        {
-            result._rep->data[result_idx] = 
-                reduce_dim_kernel(this_idx, _strides[reduce_dim], _dimensions[reduce_dim], f);
-            return;
-        }
-
-        if(dim == reduce_dim)
-        {
-            reduce_dim_impl(result, dim + 1, reduce_dim, this_idx,result_idx, f);
-        }
-        else
-        {
-            for(idx_type i = 0; i < _dimensions[dim]; ++i)
-            {
-                reduce_dim_impl(result, dim + 1, reduce_dim, this_idx,result_idx, f);
-                    
-                this_idx += _strides[dim];
-                result_idx += result._strides[dim];
-            }
-        }
-    }
-    // public
-    Tensor<i32>::Tensor()
-        :_rep(new TensorStorage<i32>),
-        _dimensions(), _offset(0), _strides()
-    {
-    }
-
-    Tensor<i32>::Tensor(const DimVector& dimensions)
-        :_rep(new TensorStorage<i32>),
-        _dimensions(dimensions), _offset(0), _strides()
-    {
-        auto_strides();
-        
-        _rep->resize_(_dimensions.flat_size());
-    }
-
-    Tensor<i32>::Tensor(const DimVector& dimensions, const DimVector& strides)
-        :_rep(new TensorStorage<i32>),
-        _dimensions(dimensions), _offset(0), _strides(strides)
-    {
-        auto_strides();
-
-        _rep->resize_(_dimensions.flat_size());
-    }
-
-    Tensor<i32>::Tensor(const i32& t)
-        :_rep(new TensorStorage<i32>),
-        _dimensions(), _offset(0), _strides()
-    {
-        _dimensions.resize(1);
-        auto_strides();
-    }
-
-    void Tensor<i32>::add_(TensorInterfacePtr other)
-    {
-		// check tensor other type
-        if(other->dtype() != DataType::INT)
-            throw std::runtime_error("expected type int tensor");
-		// check broadcast.shape = this.shape
-        auto shape = broadcast_shape(this->size(), other->size());
-        if(shape != this->size())
-            throw std::runtime_error("The size of tensor a must match the size of tensor b");
-		// ok, get lhs, rhs
-		Tensor<i32> * lhs = this;
-		Tensor<i32> * rhs = dynamic_cast<Tensor<i32> *>(other.get());
-		std::function<void(Tensor<i32> *, Tensor<i32> *, idx_type, idx_type,idx_type, idx_type)> add_impl =
-			[&](Tensor<i32> * lhs, Tensor<i32> * rhs, idx_type lhs_dim, idx_type rhs_dim, idx_type lhs_idx, idx_type rhs_idx) {
-
-			auto lhs_storage = std::dynamic_pointer_cast<TensorStorage<i32>>(lhs->storage())->data_ptr();
-			auto rhs_storage = std::dynamic_pointer_cast<TensorStorage<i32>>(rhs->storage())->data_ptr();
-
-			if (lhs_dim < -(lhs->size().size()) && rhs_dim < -(rhs->size().size()))
-			{
-				lhs_storage[lhs_idx] += rhs_storage[rhs_idx];
-				return;
-			}
-
-			idx_type lsh_shape_size = lhs_dim >= -(lhs->size().size())? lhs->size(lhs_dim) : 1;
-			idx_type rsh_shape_size = rhs_dim >= -(rhs->size().size()) ? rhs->size(rhs_dim) : 1;
-			idx_type max_shape_size = std::max(lsh_shape_size, rsh_shape_size);
-
-			for (idx_type i = 0; i < max_shape_size; ++i)
-			{
-				add_impl(lhs, rhs, lhs_dim - 1, rhs_dim - 1, lhs_idx, rhs_idx);
-
-				if(lsh_shape_size > 1)
-					lhs_idx += lhs->stride(lhs_dim);
-				if (rsh_shape_size > 1)
-					rhs_idx += rhs->stride(rhs_dim);
-			}
-		};
-
-		add_impl(lhs, rhs, -1, -1, lhs->offset(), rhs->offset());
-    }
-
-    void Tensor<i32>::apply_(std::function<i32(i32)> f)
-    {
-        // sort stride for cache optimization
-		DimVector cloned_stride(_strides);
-        DimVector sorted_stride(_strides.size());
-        for(int i = 0; i<_strides.size(); ++i)
-            sorted_stride[i] = i;
-        
-        for (int i = 0; i < cloned_stride.size() - 1; i++)
-            for (int j = 0; j < cloned_stride.size() - 1 - i; j++)
-                if (cloned_stride[j] < cloned_stride[j + 1])
-                {
-                    std::swap(cloned_stride[j], cloned_stride[j+1]);
-                    std::swap(sorted_stride[j], sorted_stride[j+1]);
-                }
-        
-        std::function<void(idx_type, idx_type, std::function<i32(i32)>)> apply_impl =
-        [&](idx_type dim_idx, idx_type idx, std::function<i32(i32)> f){
-            idx_type dim = sorted_stride[dim_idx];
-            idx_type dim_size = _dimensions.size();
-
-            idx_type step_len = _strides[dim];
-            idx_type step_num = _dimensions[dim];
-            
-            for(idx_type i = 0; i < step_num; ++i)
-            {
-                if(dim_idx == dim_size - 1)
-                    _rep->data[idx] = f(_rep->data[idx]);
-                else
-                    apply_impl(dim_idx + 1, idx, f);
-                idx += step_len;
-            }
-        };
-        
-        if(_dimensions.size() > 0)
-            apply_impl(0, _offset, f);
-    }
-
-    TensorInterfacePtr Tensor<i32>::clone() const
-    {
-        std::shared_ptr<Tensor<i32>> cloned_tensor(new Tensor<i32>);
-        cloned_tensor->_rep = std::dynamic_pointer_cast<TensorStorage<i32>>(_rep->clone());
-        cloned_tensor->_dimensions = _dimensions;
-        cloned_tensor->_offset = _offset;
-        cloned_tensor->_strides = _strides;
-        
-        return cloned_tensor;
-    }
-
-    void Tensor<i32>::cos_()
-    {
-        throw std::runtime_error("No implement");
-    }
-
-    std::shared_ptr<TensorBase<f32>> Tensor<i32>::create_grad()
-    {
-        return std::shared_ptr<TensorBase<f32>>(new Tensor<f32>(_dimensions));
-    }
-
-	i32* Tensor<i32>::data_ptr()
-    {
-        return _rep->data_ptr();
-    }
-
-    const i32* Tensor<i32>::data_ptr() const
-    {
-        return _rep->data_ptr();
-    }
-
-    device_id Tensor<i32>::device() { return 0; }
-
-    DataType Tensor<i32>::dtype() const
-    {
-        return DataType::INT;
-    }
-
-    bool Tensor<i32>::equal(std::shared_ptr<TensorInterface> other) const
-    {
-        if(other->platform() != this->platform())
-            throw std::runtime_error("equal: Two tensors must be the same platform");
-        
-        if(other->dtype() != this->dtype())
-            return false;
-
-        if(other->size() != this->size())
-            return false;
-
-        std::shared_ptr<Tensor<i32>> other_ptr = std::dynamic_pointer_cast<Tensor<i32>>(other);
-        
-        std::function<bool(idx_type, i32*, i32*)> equal_impl =
-        [&](idx_type dim, i32* lhs_idx, i32* rhs_idx){
-            idx_type dim_size = _dimensions.size();
-            
-            for(idx_type i = 0; i < _dimensions[dim]; ++i)
-            {
-                if(dim == dim - 1)
-                {
-                    if(*lhs_idx != *rhs_idx) return false;
-                }
-                else
-                {
-                    if(!equal_impl(dim + 1, lhs_idx, rhs_idx)) return false;
-                }
-                lhs_idx += _strides[dim];
-                rhs_idx += other_ptr->stride(dim);
-            }
-            return true;
-        };
-
-        return equal_impl(0, _rep->data_ptr() + _offset, other_ptr->data_ptr() + other_ptr->offset());
-    }
-
-	std::shared_ptr<TensorInterface> Tensor<i32>::inverse() const
-	{
-		throw std::runtime_error("No implement");
-	}
-
-    void Tensor<i32>::fill_(i32 value)
-    {
-        apply_([&value](i32 a)->i32 {return value; });
-    }
-
-	i32 Tensor<i32>::item() const
-    {
-        if(_dimensions.flat_size() == 1)
-        {
-            return _rep->data[_offset];
-        }
-        else
-        {
-            throw std::runtime_error("item: only one element tensors can be converted to scalars");
-        }
-    }
-
-	std::shared_ptr<TensorInterface> Tensor<i32>::matmul(std::shared_ptr<TensorInterface> mat) const
-	{
-		auto right_matrix = std::dynamic_pointer_cast<Tensor<i32>>(mat);
-		return matmul_impl(*this, *right_matrix);
-	}
-
-    TensorInterfacePtr Tensor<i32>::mean() const
-    {
-        DimVector d(1);
-        d[0] = 1;
-
-        TensorPtr<i32> result(new Tensor<i32>(d));
-        auto flat_size = _dimensions.flat_size();
-        result->_rep->data[0] = reduce([](i32 a, i32 b)->i32 {return a + b; });
-        result->_rep->data[0] /= flat_size;
-        return std::dynamic_pointer_cast<TensorInterface>(result);
-    }
-
-    void Tensor<i32>::mul_(i32 value)
-    {
-        apply_([value](i32 a)->i32 {return a*value; });
-    }
-
-    void Tensor<i32>::mul_(std::shared_ptr<TensorInterface> other)
-    {
-        // check tensor other type
-        if(other->dtype() != DataType::INT)
-            throw std::runtime_error("expected type int tensor");
-		// check broadcast.shape = this.shape
-        auto shape = broadcast_shape(this->size(), other->size());
-        if(shape != this->size())
-            throw std::runtime_error("The size of tensor a must match the size of tensor b");
-		// ok, get lhs, rhs
-		Tensor<i32> * lhs = this;
-		Tensor<i32> * rhs = dynamic_cast<Tensor<i32> *>(other.get());
-		std::function<void(idx_type, idx_type, idx_type, idx_type)> mul_impl =
-			[&](idx_type lhs_dim, idx_type rhs_dim, idx_type lhs_idx, idx_type rhs_idx) {
-
-			auto lhs_storage = std::dynamic_pointer_cast<TensorStorage<f32>>(lhs->storage())->data_ptr();
-			auto rhs_storage = std::dynamic_pointer_cast<TensorStorage<f32>>(rhs->storage())->data_ptr();
-
-			idx_type lsh_shape_size = lhs_dim >= -(lhs->size().size())? lhs->size(lhs_dim) : 1;
-			idx_type rsh_shape_size = rhs_dim >= -(rhs->size().size()) ? rhs->size(rhs_dim) : 1;
-			idx_type max_shape_size = std::max(lsh_shape_size, rsh_shape_size);
-
-			for (idx_type i = 0; i < max_shape_size; ++i)
-			{
-                if (lhs_dim <= -(lhs->size().size()) && rhs_dim <= -(rhs->size().size()))
-                {
-                    lhs_storage[lhs_idx] *= rhs_storage[rhs_idx];
-                }
-                else
-                {
-                    mul_impl(lhs_dim - 1, rhs_dim - 1, lhs_idx, rhs_idx);
-                }
-
-				if(lsh_shape_size > 1)
-					lhs_idx += lhs->stride(lhs_dim);
-				if (rsh_shape_size > 1)
-					rhs_idx += rhs->stride(rhs_dim);
-			}
-		};
-
-		mul_impl(-1, -1, lhs->offset(), rhs->offset());
-    }
-
-    idx_type Tensor<i32>::ndimension() const
-    {
-        return _dimensions.size();
-    }
-
-    void Tensor<i32>::neg_()
-    {
-        apply_([](i32 a)->i32 {return -a; });
-    }
-
-    idx_type Tensor<i32>::offset() const { return _offset; }
-
-    std::shared_ptr<TensorInterface> Tensor<i32>::permute(const DimVector& dims) const
-    {
-        // check dims
-        if(dims.size() != _strides.size())
-            throw std::runtime_error("permute dimension must have the same size");
-        std::vector<int> check_vec(dims.size(), 0);
-        for(int i = 0; i < dims.size();++i)
-            if(dims[i] >= 0 && dims[i] < dims.size())
-                check_vec[dims[i]] = 1;
-            else
-                throw std::runtime_error("permute dimension must in ndimension range");
-        
-        for(int i = 0; i < check_vec.size();++i)
-        {
-            if(check_vec[i] != 1)
-                throw std::runtime_error("permute dimension error");
-        }
-        // permute
-        std::shared_ptr<Tensor<i32>> result(new Tensor<i32>);
-        result->_rep = _rep;
-        result->_dimensions = _dimensions;
-        result->_offset = _offset;
-        result->_strides = _strides;
-
-        for(int i=0; i<dims.size(); ++i)
-        {
-            result->_dimensions[i] = _dimensions[dims[i]];
-            result->_strides[i] = _strides[dims[i]];
-        }
-
-        return result;
-    }
-
-    PlatformType Tensor<i32>::platform() const { return PlatformType::CPU; }
-
-    void Tensor<i32>::pow_(f32 exp)
-    {
-        std::int32_t exp_int = static_cast<std::int32_t>(exp);
-        apply_([&exp_int](i32 a)->i32 {return static_cast<i32>(std::pow(a, exp_int)); });
-    }
-
-	i32 Tensor<i32>::reduce(std::function<i32(i32, i32)> f) const
-    {
-		i32 result{};
-        reduce_impl(result, 0, _offset, f);
-        return result;
-    }
-    
-    TensorInterfacePtr Tensor<i32>::reduce_dim(idx_type dim, std::function<i32(i32, i32)> f) const
-    {
-        DimVector reduced_dim = _dimensions;
-        reduced_dim.erase(dim); // check dim?
-        TensorBasePtr<i32> result(new Tensor<i32>(reduced_dim));
-        TensorPtr<i32> raw_result = std::dynamic_pointer_cast<Tensor<i32>>(result);
-        reduce_dim_impl(*(raw_result.get()), 0, dim, _offset, raw_result->_offset, f);
-        return std::dynamic_pointer_cast<TensorInterface>(result);
-    }
-    
-    void Tensor<i32>::reshape_(const DimVector& dims)
-    {
-
-    }
-    
-    void Tensor<i32>::resize_(const DimVector& dims)
-    {
-        _dimensions = dims;
-        _rep->resize_(dims.flat_size());
-        auto_strides();
-    }
-
-	std::shared_ptr<TensorInterface> Tensor<i32>::select(const SliceVector& slice) const
-	{
-		std::shared_ptr<Tensor<i32>> result(new Tensor<i32>);
-		result->_rep = _rep;
-
-		// dimension
-		DimVector dim;
-		std::fesetround(FE_TONEAREST);
-		for (idx_type i = 0; i < slice.size(); ++i)
-		{
-			auto& each = slice[i];
-			dim.push_back(
-				std::lrint(std::ceil((each.end.value_or(_dimensions[i]) - each.start.value_or(0)) / (float)each.step.value_or(1)))
-			);
-		}
-		result->_dimensions = dim;
-
-		// offset
-		idx_type new_offset = 1;
-		for (idx_type i = 0; i < slice.size(); ++i)
-		{
-			new_offset *= _strides[i] * slice[i].start.value_or(0);
-		}
-		result->_offset = _offset + new_offset;
-
-		// strides
-		DimVector strides;
-		for (idx_type i = 0; i < slice.size(); ++i)
-		{
-			strides.push_back(_strides[i] * slice[i].step.value_or(1));
-		}
-		result->_strides = strides;
-
-		return std::dynamic_pointer_cast<TensorInterface>(result);
-	}
-    
-    void Tensor<i32>::sin_()
-    {
-        throw std::runtime_error("No implement");
-    }
-    
-    DimVector Tensor<i32>::size() const { return _dimensions;}
-	
-	idx_type Tensor<i32>::size(idx_type i) const
-	{ 
-		auto shape_size = _dimensions.size();
-		if (i >= 0 && i < _dimensions.size())
-			return _dimensions[i];
-		else if (i <= -1 && i >= -_dimensions.size())
-			return _dimensions[shape_size + i];
-		else
-			throw std::runtime_error("Dimension out of range");
-	}
-    
-	std::shared_ptr<StorageBase<i32>>  Tensor<i32>::storage() const { return _rep; }
-    
-    DimVector Tensor<i32>::stride() const { return _strides; }
-	
-	idx_type Tensor<i32>::stride(idx_type i) const
-	{
-		auto stride_size = _strides.size();
-		if (i >= 0 && i < _strides.size())
-			return _strides[i];
-		else if (i <= -1 && i >= -_strides.size())
-			return _strides[stride_size + i];
-		else
-			throw std::runtime_error("Stride out of range");
-	}
-
-    void Tensor<i32>::sub_(std::shared_ptr<TensorInterface> other)
-    {
-        Tensor<i32> * lhs = this;
-		Tensor<i32> * rhs = dynamic_cast<Tensor<i32> *>(other.get());
-		std::function<void(Tensor<i32> *, Tensor<i32> *, idx_type, idx_type,idx_type, idx_type)> sub_impl =
-			[&](Tensor<i32> * lhs, Tensor<i32> * rhs, idx_type lhs_dim, idx_type rhs_dim, idx_type lhs_idx, idx_type rhs_idx) {
-
-			auto lhs_storage = std::dynamic_pointer_cast<TensorStorage<i32>>(lhs->storage())->data_ptr();
-			auto rhs_storage = std::dynamic_pointer_cast<TensorStorage<i32>>(rhs->storage())->data_ptr();
-
-			if (lhs_dim < -(lhs->size().size()) && rhs_dim < -(rhs->size().size()))
-			{
-				lhs_storage[lhs_idx] -= rhs_storage[rhs_idx];
-				return;
-			}
-
-			idx_type lhs_shape_size = lhs_dim >= -(lhs->size().size())? lhs->size(lhs_dim) : 1;
-			idx_type rhs_shape_size = rhs_dim >= -(rhs->size().size()) ? rhs->size(rhs_dim) : 1;
-			idx_type max_shape_size = std::max(lhs_shape_size, rhs_shape_size);
-
-			for (idx_type i = 0; i < max_shape_size; ++i)
-			{
-				sub_impl(lhs, rhs, lhs_dim - 1, rhs_dim - 1, lhs_idx, rhs_idx);
-
-				if(lhs_shape_size > 1)
-					lhs_idx += lhs->stride(lhs_dim);
-				if (rhs_shape_size > 1)
-					rhs_idx += rhs->stride(rhs_dim);
-			}
-		};
-
-		sub_impl(lhs, rhs, -1, -1, lhs->offset(), rhs->offset());
-    }
-    
-    TensorInterfacePtr Tensor<i32>::sum() const
-    {
-        DimVector d(1);
-        d[0] = 1;
-
-        TensorPtr<i32> result(new Tensor<i32>(d));
-        result->_rep->data[0] = reduce([](i32 a, i32 b)->i32 {return a + b; });
-        return std::dynamic_pointer_cast<TensorInterface>(result);
-    }
-    
-    std::string Tensor<i32>::to_string() const
-    {
-        std::function<std::string(const Tensor<i32>&, idx_type, idx_type)> to_string_impl =
-			[&](const Tensor<i32>& t, idx_type dim, idx_type idx)->std::string {
-            std::string result;
-			if (dim == t.size().size())
-            {
-                result += std::to_string(t.data_ptr()[idx]);
-				return result;
-            }
-
-			for (idx_type i = 0; i < t.size(dim); ++i)
-			{
-				if (dim != t.size().size() - 1 && i != 0) result += ",\n";
-				if(dim != t.size().size() - 1)	result += "[";
-				result += to_string_impl(t, dim + 1, idx);
-				if (i != t.size(dim) - 1 && dim == t.size().size() - 1)
-					result += ",";
-				if (dim != t.size().size() - 1) result += "]";
-
-				idx += t.stride(dim);
-			}
-
-			return result;
-		};
-
-		std::string result;
-		result += "[" + to_string_impl(*this, 0, offset()) + "]";
-		return result;
-    }
-
-    void Tensor<i32>::transpose_(idx_type dim0, idx_type dim1)
-    {
-        if(dim0 != dim1 &&
-            _dimensions.in_range(dim0) &&
-            _dimensions.in_range(dim1))
-        {
-            std::swap(_dimensions[dim0], _dimensions[dim1]);
-            std::swap(_strides[dim0], _strides[dim1]);
-        }
-    }
-
-    std::shared_ptr<TensorInterface> Tensor<i32>::transpose(idx_type dim0, idx_type dim1)
-    {
-        std::shared_ptr<Tensor<i32>> result(new Tensor<i32>);
-        result->_rep = _rep;
-        result->_dimensions = _dimensions;
-        result->_offset = _offset;
-        result->_strides = _strides;
-
-        result->transpose_(dim0, dim1);
-
-        return result;
-    }
-}

+ 0 - 601
traph/source/tensor/long_tensor.cpp

@@ -1,601 +0,0 @@
-#include <traph/tensor/long_tensor.h>
-
-namespace traph
-{
-    // definition
-    // private
-    void Tensor<i64>::auto_strides()
-    {
-        idx_type dim_num = _dimensions.size();
-        _strides.resize(dim_num);
-        idx_type stride = 1;
-
-        for (idx_type i = dim_num - 1; i >= 0; --i)
-        {
-            _strides[i] = stride;
-            stride *= _dimensions[i];
-        }
-
-    }
-
-    void Tensor<i64>::reduce_impl(i64& result, idx_type dim, idx_type idx, std::function<i64(i64,i64)> f) const
-    {
-        idx_type dim_size = _dimensions.size();
-
-        idx_type step_len = _strides[dim];
-        idx_type step_num = _dimensions[dim];
-
-        for(idx_type i = 0; i < step_num; ++i)
-        {
-            if(dim == dim_size - 1)
-                result = f(result, _rep->data[idx]);
-            else
-                reduce_impl(result, dim + 1, idx, f);
-            idx += step_len;
-        }
-    }
-
-    i64 Tensor<i64>::reduce_dim_kernel(idx_type begin, idx_type step_len, idx_type step_num, std::function<i64(i64,i64)> f) const
-    {
-        i64 result{};
-        for(idx_type i = 0; i < step_num; ++i)
-        {
-            result = f(result, _rep->data[begin]);
-            begin += step_len;
-        }
-        return result;
-    }
-
-    void Tensor<i64>::reduce_dim_impl(Tensor<i64>& result, idx_type dim, idx_type reduce_dim,
-        idx_type this_idx, idx_type result_idx,
-        std::function<i64(i64,i64)> f) const
-    {
-        idx_type dim_size = _dimensions.size();
-
-        if(dim == dim_size)
-        {
-            result._rep->data[result_idx] = 
-                reduce_dim_kernel(this_idx, _strides[reduce_dim], _dimensions[reduce_dim], f);
-            return;
-        }
-
-        if(dim == reduce_dim)
-        {
-            reduce_dim_impl(result, dim + 1, reduce_dim, this_idx,result_idx, f);
-        }
-        else
-        {
-            for(idx_type i = 0; i < _dimensions[dim]; ++i)
-            {
-                reduce_dim_impl(result, dim + 1, reduce_dim, this_idx,result_idx, f);
-                    
-                this_idx += _strides[dim];
-                result_idx += result._strides[dim];
-            }
-        }
-    }
-    // public
-    Tensor<i64>::Tensor()
-        :_rep(new TensorStorage<i64>),
-        _dimensions(), _offset(0), _strides()
-    {
-    }
-
-    Tensor<i64>::Tensor(const DimVector& dimensions)
-        :_rep(new TensorStorage<i64>),
-        _dimensions(dimensions), _offset(0), _strides()
-    {
-        auto_strides();
-        
-        _rep->resize_(_dimensions.flat_size());
-    }
-
-    Tensor<i64>::Tensor(const DimVector& dimensions, const DimVector& strides)
-        :_rep(new TensorStorage<i64>),
-        _dimensions(dimensions), _offset(0), _strides(strides)
-    {
-        auto_strides();
-
-        _rep->resize_(_dimensions.flat_size());
-    }
-
-    Tensor<i64>::Tensor(const i64& t)
-        :_rep(new TensorStorage<i64>),
-        _dimensions(), _offset(0), _strides()
-    {
-        _dimensions.resize(1);
-        auto_strides();
-    }
-
-    void Tensor<i64>::add_(TensorInterfacePtr other)
-    {
-		// check tensor other type
-        if(other->dtype() != DataType::LONG)
-            throw std::runtime_error("expected type long tensor");
-		// check broadcast.shape = this.shape
-        auto shape = broadcast_shape(this->size(), other->size());
-        if(shape != this->size())
-            throw std::runtime_error("The size of tensor a must match the size of tensor b");
-		// ok, get lhs, rhs
-		Tensor<i64> * lhs = this;
-		Tensor<i64> * rhs = dynamic_cast<Tensor<i64> *>(other.get());
-		std::function<void(Tensor<i64> *, Tensor<i64> *, idx_type, idx_type,idx_type, idx_type)> add_impl =
-			[&](Tensor<i64> * lhs, Tensor<i64> * rhs, idx_type lhs_dim, idx_type rhs_dim, idx_type lhs_idx, idx_type rhs_idx) {
-
-			auto lhs_storage = std::dynamic_pointer_cast<TensorStorage<i64>>(lhs->storage())->data_ptr();
-			auto rhs_storage = std::dynamic_pointer_cast<TensorStorage<i64>>(rhs->storage())->data_ptr();
-
-			if (lhs_dim < -(lhs->size().size()) && rhs_dim < -(rhs->size().size()))
-			{
-				lhs_storage[lhs_idx] += rhs_storage[rhs_idx];
-				return;
-			}
-
-			idx_type lsh_shape_size = lhs_dim >= -(lhs->size().size())? lhs->size(lhs_dim) : 1;
-			idx_type rsh_shape_size = rhs_dim >= -(rhs->size().size()) ? rhs->size(rhs_dim) : 1;
-			idx_type max_shape_size = std::max(lsh_shape_size, rsh_shape_size);
-
-			for (idx_type i = 0; i < max_shape_size; ++i)
-			{
-				add_impl(lhs, rhs, lhs_dim - 1, rhs_dim - 1, lhs_idx, rhs_idx);
-
-				if(lsh_shape_size > 1)
-					lhs_idx += lhs->stride(lhs_dim);
-				if (rsh_shape_size > 1)
-					rhs_idx += rhs->stride(rhs_dim);
-			}
-		};
-
-		add_impl(lhs, rhs, -1, -1, lhs->offset(), rhs->offset());
-    }
-
-    void Tensor<i64>::apply_(std::function<i64(i64)> f)
-    {
-        // sort stride for cache optimization
-		DimVector cloned_stride(_strides);
-        DimVector sorted_stride(_strides.size());
-        for(int i = 0; i<_strides.size(); ++i)
-            sorted_stride[i] = i;
-        
-        for (int i = 0; i < cloned_stride.size() - 1; i++)
-            for (int j = 0; j < cloned_stride.size() - 1 - i; j++)
-                if (cloned_stride[j] < cloned_stride[j + 1])
-                {
-                    std::swap(cloned_stride[j], cloned_stride[j+1]);
-                    std::swap(sorted_stride[j], sorted_stride[j+1]);
-                }
-        
-        std::function<void(idx_type, idx_type, std::function<i64(i64)>)> apply_impl =
-        [&](idx_type dim_idx, idx_type idx, std::function<i64(i64)> f){
-            idx_type dim = sorted_stride[dim_idx];
-            idx_type dim_size = _dimensions.size();
-
-            idx_type step_len = _strides[dim];
-            idx_type step_num = _dimensions[dim];
-            
-            for(idx_type i = 0; i < step_num; ++i)
-            {
-                if(dim_idx == dim_size - 1)
-                    _rep->data[idx] = f(_rep->data[idx]);
-                else
-                    apply_impl(dim_idx + 1, idx, f);
-                idx += step_len;
-            }
-        };
-        
-        if(_dimensions.size() > 0)
-            apply_impl(0, _offset, f);
-    }
-
-    TensorInterfacePtr Tensor<i64>::clone() const
-    {
-        std::shared_ptr<Tensor<i64>> cloned_tensor(new Tensor<i64>);
-        cloned_tensor->_rep = std::dynamic_pointer_cast<TensorStorage<i64>>(_rep->clone());
-        cloned_tensor->_dimensions = _dimensions;
-        cloned_tensor->_offset = _offset;
-        cloned_tensor->_strides = _strides;
-        
-        return cloned_tensor;
-    }
-
-    void Tensor<i64>::cos_()
-    {
-        throw std::runtime_error("No implement");
-    }
-
-    std::shared_ptr<TensorBase<f32>> Tensor<i64>::create_grad()
-    {
-        return std::shared_ptr<TensorBase<f32>>(new Tensor<f32>(_dimensions));
-    }
-
-	i64* Tensor<i64>::data_ptr()
-    {
-        return _rep->data_ptr();
-    }
-
-    const i64* Tensor<i64>::data_ptr() const
-    {
-        return _rep->data_ptr();
-    }
-
-    device_id Tensor<i64>::device() { return 0; }
-
-    DataType Tensor<i64>::dtype() const
-    {
-        return DataType::LONG;
-    }
-
-    bool Tensor<i64>::equal(std::shared_ptr<TensorInterface> other) const
-    {
-        if(other->platform() != this->platform())
-            throw std::runtime_error("equal: Two tensors must be the same platform");
-        
-        if(other->dtype() != this->dtype())
-            return false;
-
-        if(other->size() != this->size())
-            return false;
-
-        std::shared_ptr<Tensor<i64>> other_ptr = std::dynamic_pointer_cast<Tensor<i64>>(other);
-        
-        std::function<bool(idx_type, i64*, i64*)> equal_impl =
-        [&](idx_type dim, i64* lhs_idx, i64* rhs_idx){
-            idx_type dim_size = _dimensions.size();
-            
-            for(idx_type i = 0; i < _dimensions[dim]; ++i)
-            {
-                if(dim == dim - 1)
-                {
-                    if(*lhs_idx != *rhs_idx) return false;
-                }
-                else
-                {
-                    if(!equal_impl(dim + 1, lhs_idx, rhs_idx)) return false;
-                }
-                lhs_idx += _strides[dim];
-                rhs_idx += other_ptr->stride(dim);
-            }
-            return true;
-        };
-
-        return equal_impl(0, _rep->data_ptr() + _offset, other_ptr->data_ptr() + other_ptr->offset());
-    }
-
-	std::shared_ptr<TensorInterface> Tensor<i64>::inverse() const
-	{
-		throw std::runtime_error("No implement");
-	}
-
-    void Tensor<i64>::fill_(i64 value)
-    {
-        apply_([&value](i64 a)->i64 {return value; });
-    }
-
-	i64 Tensor<i64>::item() const
-    {
-        if(_dimensions.flat_size() == 1)
-        {
-            return _rep->data[_offset];
-        }
-        else
-        {
-            throw std::runtime_error("item: only one element tensors can be converted to scalars");
-        }
-    }
-
-	std::shared_ptr<TensorInterface> Tensor<i64>::matmul(std::shared_ptr<TensorInterface> mat) const
-	{
-		auto right_matrix = std::dynamic_pointer_cast<Tensor<i64>>(mat);
-		return matmul_impl(*this, *right_matrix);
-	}
-
-    TensorInterfacePtr Tensor<i64>::mean() const
-    {
-        DimVector d(1);
-        d[0] = 1;
-
-        TensorPtr<i64> result(new Tensor<i64>(d));
-        auto flat_size = _dimensions.flat_size();
-        result->_rep->data[0] = reduce([](i64 a, i64 b)->i64 {return a + b; });
-        result->_rep->data[0] /= flat_size;
-        return std::dynamic_pointer_cast<TensorInterface>(result);
-    }
-
-    void Tensor<i64>::mul_(i64 value)
-    {
-        apply_([value](i64 a)->i64 {return a*value; });
-    }
-
-    void Tensor<i64>::mul_(std::shared_ptr<TensorInterface> other)
-    {
-        // check tensor other type
-        if(other->dtype() != DataType::LONG)
-            throw std::runtime_error("expected type long tensor");
-		// check broadcast.shape = this.shape
-        auto shape = broadcast_shape(this->size(), other->size());
-        if(shape != this->size())
-            throw std::runtime_error("The size of tensor a must match the size of tensor b");
-		// ok, get lhs, rhs
-		Tensor<i64> * lhs = this;
-		Tensor<i64> * rhs = dynamic_cast<Tensor<i64> *>(other.get());
-		std::function<void(idx_type, idx_type, idx_type, idx_type)> mul_impl =
-			[&](idx_type lhs_dim, idx_type rhs_dim, idx_type lhs_idx, idx_type rhs_idx) {
-
-			auto lhs_storage = std::dynamic_pointer_cast<TensorStorage<f32>>(lhs->storage())->data_ptr();
-			auto rhs_storage = std::dynamic_pointer_cast<TensorStorage<f32>>(rhs->storage())->data_ptr();
-
-			idx_type lsh_shape_size = lhs_dim >= -(lhs->size().size())? lhs->size(lhs_dim) : 1;
-			idx_type rsh_shape_size = rhs_dim >= -(rhs->size().size()) ? rhs->size(rhs_dim) : 1;
-			idx_type max_shape_size = std::max(lsh_shape_size, rsh_shape_size);
-
-			for (idx_type i = 0; i < max_shape_size; ++i)
-			{
-                if (lhs_dim <= -(lhs->size().size()) && rhs_dim <= -(rhs->size().size()))
-                {
-                    lhs_storage[lhs_idx] *= rhs_storage[rhs_idx];
-                }
-                else
-                {
-                    mul_impl(lhs_dim - 1, rhs_dim - 1, lhs_idx, rhs_idx);
-                }
-
-				if(lsh_shape_size > 1)
-					lhs_idx += lhs->stride(lhs_dim);
-				if (rsh_shape_size > 1)
-					rhs_idx += rhs->stride(rhs_dim);
-			}
-		};
-
-		mul_impl(-1, -1, lhs->offset(), rhs->offset());
-    }
-
-    idx_type Tensor<i64>::ndimension() const
-    {
-        return _dimensions.size();
-    }
-
-    void Tensor<i64>::neg_()
-    {
-        apply_([](i64 a)->i64 {return -a; });
-    }
-
-    idx_type Tensor<i64>::offset() const { return _offset; }
-
-    std::shared_ptr<TensorInterface> Tensor<i64>::permute(const DimVector& dims) const
-    {
-        // check dims
-        if(dims.size() != _strides.size())
-            throw std::runtime_error("permute dimension must have the same size");
-        std::vector<int> check_vec(dims.size(), 0);
-        for(int i = 0; i < dims.size();++i)
-            if(dims[i] >= 0 && dims[i] < dims.size())
-                check_vec[dims[i]] = 1;
-            else
-                throw std::runtime_error("permute dimension must in ndimension range");
-        
-        for(int i = 0; i < check_vec.size();++i)
-        {
-            if(check_vec[i] != 1)
-                throw std::runtime_error("permute dimension error");
-        }
-        // permute
-        std::shared_ptr<Tensor<i64>> result(new Tensor<i64>);
-        result->_rep = _rep;
-        result->_dimensions = _dimensions;
-        result->_offset = _offset;
-        result->_strides = _strides;
-
-        for(int i=0; i<dims.size(); ++i)
-        {
-            result->_dimensions[i] = _dimensions[dims[i]];
-            result->_strides[i] = _strides[dims[i]];
-        }
-
-        return result;
-    }
-
-    PlatformType Tensor<i64>::platform() const { return PlatformType::CPU; }
-
-    void Tensor<i64>::pow_(f32 exp)
-    {
-        std::int32_t exp_int = static_cast<std::int32_t>(exp);
-        apply_([&exp_int](i64 a)->i64 {return static_cast<i64>(std::pow(a, exp_int)); });
-    }
-
-	i64 Tensor<i64>::reduce(std::function<i64(i64, i64)> f) const
-    {
-		i64 result{};
-        reduce_impl(result, 0, _offset, f);
-        return result;
-    }
-    
-    TensorInterfacePtr Tensor<i64>::reduce_dim(idx_type dim, std::function<i64(i64, i64)> f) const
-    {
-        DimVector reduced_dim = _dimensions;
-        reduced_dim.erase(dim); // check dim?
-        TensorBasePtr<i64> result(new Tensor<i64>(reduced_dim));
-        TensorPtr<i64> raw_result = std::dynamic_pointer_cast<Tensor<i64>>(result);
-        reduce_dim_impl(*(raw_result.get()), 0, dim, _offset, raw_result->_offset, f);
-        return std::dynamic_pointer_cast<TensorInterface>(result);
-    }
-    
-    void Tensor<i64>::reshape_(const DimVector& dims)
-    {
-
-    }
-    
-    void Tensor<i64>::resize_(const DimVector& dims)
-    {
-        _dimensions = dims;
-        _rep->resize_(dims.flat_size());
-        auto_strides();
-    }
-
-	std::shared_ptr<TensorInterface> Tensor<i64>::select(const SliceVector& slice) const
-	{
-		std::shared_ptr<Tensor<i64>> result(new Tensor<i64>);
-		result->_rep = _rep;
-
-		// dimension
-		DimVector dim;
-		std::fesetround(FE_TONEAREST);
-		for (idx_type i = 0; i < slice.size(); ++i)
-		{
-			auto& each = slice[i];
-			dim.push_back(
-				std::lrint(std::ceil((each.end.value_or(_dimensions[i]) - each.start.value_or(0)) / (float)each.step.value_or(1)))
-			);
-		}
-		result->_dimensions = dim;
-
-		// offset
-		idx_type new_offset = 1;
-		for (idx_type i = 0; i < slice.size(); ++i)
-		{
-			new_offset *= _strides[i] * slice[i].start.value_or(0);
-		}
-		result->_offset = _offset + new_offset;
-
-		// strides
-		DimVector strides;
-		for (idx_type i = 0; i < slice.size(); ++i)
-		{
-			strides.push_back(_strides[i] * slice[i].step.value_or(1));
-		}
-		result->_strides = strides;
-
-		return std::dynamic_pointer_cast<TensorInterface>(result);
-	}
-    
-    void Tensor<i64>::sin_()
-    {
-        throw std::runtime_error("No implement");
-    }
-    
-    DimVector Tensor<i64>::size() const { return _dimensions;}
-	
-	idx_type Tensor<i64>::size(idx_type i) const
-	{ 
-		auto shape_size = _dimensions.size();
-		if (i >= 0 && i < _dimensions.size())
-			return _dimensions[i];
-		else if (i <= -1 && i >= -_dimensions.size())
-			return _dimensions[shape_size + i];
-		else
-			throw std::runtime_error("Dimension out of range");
-	}
-    
-	std::shared_ptr<StorageBase<i64>>  Tensor<i64>::storage() const { return _rep; }
-    
-    DimVector Tensor<i64>::stride() const { return _strides; }
-	
-	idx_type Tensor<i64>::stride(idx_type i) const
-	{
-		auto stride_size = _strides.size();
-		if (i >= 0 && i < _strides.size())
-			return _strides[i];
-		else if (i <= -1 && i >= -_strides.size())
-			return _strides[stride_size + i];
-		else
-			throw std::runtime_error("Stride out of range");
-	}
-
-    void Tensor<i64>::sub_(std::shared_ptr<TensorInterface> other)
-    {
-        Tensor<i64> * lhs = this;
-		Tensor<i64> * rhs = dynamic_cast<Tensor<i64> *>(other.get());
-		std::function<void(Tensor<i64> *, Tensor<i64> *, idx_type, idx_type,idx_type, idx_type)> sub_impl =
-			[&](Tensor<i64> * lhs, Tensor<i64> * rhs, idx_type lhs_dim, idx_type rhs_dim, idx_type lhs_idx, idx_type rhs_idx) {
-
-			auto lhs_storage = std::dynamic_pointer_cast<TensorStorage<i64>>(lhs->storage())->data_ptr();
-			auto rhs_storage = std::dynamic_pointer_cast<TensorStorage<i64>>(rhs->storage())->data_ptr();
-
-			if (lhs_dim < -(lhs->size().size()) && rhs_dim < -(rhs->size().size()))
-			{
-				lhs_storage[lhs_idx] -= rhs_storage[rhs_idx];
-				return;
-			}
-
-			idx_type lhs_shape_size = lhs_dim >= -(lhs->size().size())? lhs->size(lhs_dim) : 1;
-			idx_type rhs_shape_size = rhs_dim >= -(rhs->size().size()) ? rhs->size(rhs_dim) : 1;
-			idx_type max_shape_size = std::max(lhs_shape_size, rhs_shape_size);
-
-			for (idx_type i = 0; i < max_shape_size; ++i)
-			{
-				sub_impl(lhs, rhs, lhs_dim - 1, rhs_dim - 1, lhs_idx, rhs_idx);
-
-				if(lhs_shape_size > 1)
-					lhs_idx += lhs->stride(lhs_dim);
-				if (rhs_shape_size > 1)
-					rhs_idx += rhs->stride(rhs_dim);
-			}
-		};
-
-		sub_impl(lhs, rhs, -1, -1, lhs->offset(), rhs->offset());
-    }
-    
-    TensorInterfacePtr Tensor<i64>::sum() const
-    {
-        DimVector d(1);
-        d[0] = 1;
-
-        TensorPtr<i64> result(new Tensor<i64>(d));
-        result->_rep->data[0] = reduce([](i64 a, i64 b)->i64 {return a + b; });
-        return std::dynamic_pointer_cast<TensorInterface>(result);
-    }
-    
-    std::string Tensor<i64>::to_string() const
-    {
-        std::function<std::string(const Tensor<i64>&, idx_type, idx_type)> to_string_impl =
-			[&](const Tensor<i64>& t, idx_type dim, idx_type idx)->std::string {
-            std::string result;
-			if (dim == t.size().size())
-            {
-                result += std::to_string(t.data_ptr()[idx]);
-				return result;
-            }
-
-			for (idx_type i = 0; i < t.size(dim); ++i)
-			{
-				if (dim != t.size().size() - 1 && i != 0) result += ",\n";
-				if(dim != t.size().size() - 1)	result += "[";
-				result += to_string_impl(t, dim + 1, idx);
-				if (i != t.size(dim) - 1 && dim == t.size().size() - 1)
-					result += ",";
-				if (dim != t.size().size() - 1) result += "]";
-
-				idx += t.stride(dim);
-			}
-
-			return result;
-		};
-
-		std::string result;
-		result += "[" + to_string_impl(*this, 0, offset()) + "]";
-		return result;
-    }
-
-    void Tensor<i64>::transpose_(idx_type dim0, idx_type dim1)
-    {
-        if(dim0 != dim1 &&
-            _dimensions.in_range(dim0) &&
-            _dimensions.in_range(dim1))
-        {
-            std::swap(_dimensions[dim0], _dimensions[dim1]);
-            std::swap(_strides[dim0], _strides[dim1]);
-        }
-    }
-
-    std::shared_ptr<TensorInterface> Tensor<i64>::transpose(idx_type dim0, idx_type dim1)
-    {
-        std::shared_ptr<Tensor<i64>> result(new Tensor<i64>);
-        result->_rep = _rep;
-        result->_dimensions = _dimensions;
-        result->_offset = _offset;
-        result->_strides = _strides;
-
-        result->transpose_(dim0, dim1);
-
-        return result;
-    }
-}

+ 0 - 601
traph/source/tensor/short_tensor.cpp

@@ -1,601 +0,0 @@
-#include <traph/tensor/short_tensor.h>
-
-namespace traph
-{
-    // definition
-    // private
-    void Tensor<i16>::auto_strides()
-    {
-        idx_type dim_num = _dimensions.size();
-        _strides.resize(dim_num);
-        idx_type stride = 1;
-
-        for (idx_type i = dim_num - 1; i >= 0; --i)
-        {
-            _strides[i] = stride;
-            stride *= _dimensions[i];
-        }
-    }
-
-    void Tensor<i16>::reduce_impl(i16& result, idx_type dim, idx_type idx, std::function<i16(i16,i16)> f) const
-    {
-        idx_type dim_size = _dimensions.size();
-
-        idx_type step_len = _strides[dim];
-        idx_type step_num = _dimensions[dim];
-
-        for(idx_type i = 0; i < step_num; ++i)
-        {
-            if(dim == dim_size - 1)
-                result = f(result, _rep->data[idx]);
-            else
-                reduce_impl(result, dim + 1, idx, f);
-            idx += step_len;
-        }
-    }
-
-    i16 Tensor<i16>::reduce_dim_kernel(idx_type begin, idx_type step_len, idx_type step_num, std::function<i16(i16,i16)> f) const
-    {
-        i16 result{};
-        for(idx_type i = 0; i < step_num; ++i)
-        {
-            result = f(result, _rep->data[begin]);
-            begin += step_len;
-        }
-        return result;
-    }
-
-    void Tensor<i16>::reduce_dim_impl(Tensor<i16>& result, idx_type dim, idx_type reduce_dim,
-        idx_type this_idx, idx_type result_idx,
-        std::function<i16(i16,i16)> f) const
-    {
-        idx_type dim_size = _dimensions.size();
-
-        if(dim == dim_size)
-        {
-            result._rep->data[result_idx] = 
-                reduce_dim_kernel(this_idx, _strides[reduce_dim], _dimensions[reduce_dim], f);
-            return;
-        }
-
-        if(dim == reduce_dim)
-        {
-            reduce_dim_impl(result, dim + 1, reduce_dim, this_idx,result_idx, f);
-        }
-        else
-        {
-            for(idx_type i = 0; i < _dimensions[dim]; ++i)
-            {
-                reduce_dim_impl(result, dim + 1, reduce_dim, this_idx,result_idx, f);
-                    
-                this_idx += _strides[dim];
-                result_idx += result._strides[dim];
-            }
-        }
-    }
-    // public
-    Tensor<i16>::Tensor()
-        :_rep(new TensorStorage<i16>),
-        _dimensions(), _offset(0), _strides()
-    {
-    }
-
-    Tensor<i16>::Tensor(const DimVector& dimensions)
-        :_rep(new TensorStorage<i16>),
-        _dimensions(dimensions), _offset(0), _strides()
-    {
-        auto_strides();
-        
-        _rep->resize_(_dimensions.flat_size());
-    }
-
-    Tensor<i16>::Tensor(const DimVector& dimensions, const DimVector& strides)
-        :_rep(new TensorStorage<i16>),
-        _dimensions(dimensions), _offset(0), _strides(strides)
-    {
-        auto_strides();
-
-        _rep->resize_(_dimensions.flat_size());
-    }
-
-    Tensor<i16>::Tensor(const i16& t)
-        :_rep(new TensorStorage<i16>),
-        _dimensions(), _offset(0), _strides()
-    {
-        _dimensions.resize(1);
-        auto_strides();
-    }
-
-    void Tensor<i16>::add_(TensorInterfacePtr other)
-    {
-		// check tensor other type
-        if(other->dtype() != DataType::SHORT)
-            throw std::runtime_error("expected type short tensor");
-		// check broadcast.shape = this.shape
-        auto shape = broadcast_shape(this->size(), other->size());
-        if(shape != this->size())
-            throw std::runtime_error("The size of tensor a must match the size of tensor b");
-		// ok, get lhs, rhs
-		Tensor<i16> * lhs = this;
-		Tensor<i16> * rhs = dynamic_cast<Tensor<i16> *>(other.get());
-		std::function<void(Tensor<i16> *, Tensor<i16> *, idx_type, idx_type,idx_type, idx_type)> add_impl =
-			[&](Tensor<i16> * lhs, Tensor<i16> * rhs, idx_type lhs_dim, idx_type rhs_dim, idx_type lhs_idx, idx_type rhs_idx) {
-
-			auto lhs_storage = std::dynamic_pointer_cast<TensorStorage<i16>>(lhs->storage())->data_ptr();
-			auto rhs_storage = std::dynamic_pointer_cast<TensorStorage<i16>>(rhs->storage())->data_ptr();
-
-			if (lhs_dim < -(lhs->size().size()) && rhs_dim < -(rhs->size().size()))
-			{
-				lhs_storage[lhs_idx] += rhs_storage[rhs_idx];
-				return;
-			}
-
-			idx_type lsh_shape_size = lhs_dim >= -(lhs->size().size())? lhs->size(lhs_dim) : 1;
-			idx_type rsh_shape_size = rhs_dim >= -(rhs->size().size()) ? rhs->size(rhs_dim) : 1;
-			idx_type max_shape_size = std::max(lsh_shape_size, rsh_shape_size);
-
-			for (idx_type i = 0; i < max_shape_size; ++i)
-			{
-				add_impl(lhs, rhs, lhs_dim - 1, rhs_dim - 1, lhs_idx, rhs_idx);
-
-				if(lsh_shape_size > 1)
-					lhs_idx += lhs->stride(lhs_dim);
-				if (rsh_shape_size > 1)
-					rhs_idx += rhs->stride(rhs_dim);
-			}
-		};
-
-		add_impl(lhs, rhs, -1, -1, lhs->offset(), rhs->offset());
-    }
-
-    void Tensor<i16>::apply_(std::function<i16(i16)> f)
-    {
-        // sort stride for cache optimization
-		DimVector cloned_stride(_strides);
-        DimVector sorted_stride(_strides.size());
-        for(int i = 0; i<_strides.size(); ++i)
-            sorted_stride[i] = i;
-        
-        for (int i = 0; i < cloned_stride.size() - 1; i++)
-            for (int j = 0; j < cloned_stride.size() - 1 - i; j++)
-                if (cloned_stride[j] < cloned_stride[j + 1])
-                {
-                    std::swap(cloned_stride[j], cloned_stride[j+1]);
-                    std::swap(sorted_stride[j], sorted_stride[j+1]);
-                }
-        
-        std::function<void(idx_type, idx_type, std::function<i16(i16)>)> apply_impl =
-        [&](idx_type dim_idx, idx_type idx, std::function<i16(i16)> f){
-            idx_type dim = sorted_stride[dim_idx];
-            idx_type dim_size = _dimensions.size();
-
-            idx_type step_len = _strides[dim];
-            idx_type step_num = _dimensions[dim];
-            
-            for(idx_type i = 0; i < step_num; ++i)
-            {
-                if(dim_idx == dim_size - 1)
-                    _rep->data[idx] = f(_rep->data[idx]);
-                else
-                    apply_impl(dim_idx + 1, idx, f);
-                idx += step_len;
-            }
-        };
-        
-        if(_dimensions.size() > 0)
-            apply_impl(0, _offset, f);
-    }
-
-    TensorInterfacePtr Tensor<i16>::clone() const
-    {
-        std::shared_ptr<Tensor<i16>> cloned_tensor(new Tensor<i16>);
-        cloned_tensor->_rep = std::dynamic_pointer_cast<TensorStorage<i16>>(_rep->clone());
-        cloned_tensor->_dimensions = _dimensions;
-        cloned_tensor->_offset = _offset;
-        cloned_tensor->_strides = _strides;
-        
-        return cloned_tensor;
-    }
-
-    void Tensor<i16>::cos_()
-    {
-        throw std::runtime_error("No implement");
-    }
-
-    std::shared_ptr<TensorBase<f32>> Tensor<i16>::create_grad()
-    {
-        return std::shared_ptr<TensorBase<f32>>(new Tensor<f32>(_dimensions));
-    }
-
-	i16* Tensor<i16>::data_ptr()
-    {
-        return _rep->data_ptr();
-    }
-
-    const i16* Tensor<i16>::data_ptr() const
-    {
-        return _rep->data_ptr();
-    }
-
-    device_id Tensor<i16>::device() { return 0; }
-
-    DataType Tensor<i16>::dtype() const
-    {
-        return DataType::SHORT;
-    }
-
-    bool Tensor<i16>::equal(std::shared_ptr<TensorInterface> other) const
-    {
-        if(other->platform() != this->platform())
-            throw std::runtime_error("equal: Two tensors must be the same platform");
-        
-        if(other->dtype() != this->dtype())
-            return false;
-
-        if(other->size() != this->size())
-            return false;
-
-        std::shared_ptr<Tensor<i16>> other_ptr = std::dynamic_pointer_cast<Tensor<i16>>(other);
-        
-        std::function<bool(idx_type, i16*, i16*)> equal_impl =
-        [&](idx_type dim, i16* lhs_idx, i16* rhs_idx){
-            idx_type dim_size = _dimensions.size();
-            
-            for(idx_type i = 0; i < _dimensions[dim]; ++i)
-            {
-                if(dim == dim - 1)
-                {
-                    if(*lhs_idx != *rhs_idx) return false;
-                }
-                else
-                {
-                    if(!equal_impl(dim + 1, lhs_idx, rhs_idx)) return false;
-                }
-                lhs_idx += _strides[dim];
-                rhs_idx += other_ptr->stride(dim);
-            }
-            return true;
-        };
-
-        return equal_impl(0, _rep->data_ptr() + _offset, other_ptr->data_ptr() + other_ptr->offset());
-    }
-
-	std::shared_ptr<TensorInterface> Tensor<i16>::inverse() const
-	{
-		throw std::runtime_error("No implement");
-	}
-
-    void Tensor<i16>::fill_(i16 value)
-    {
-        apply_([&value](i16 a)->i16 {return value; });
-    }
-
-	i16 Tensor<i16>::item() const
-    {
-        if(_dimensions.flat_size() == 1)
-        {
-            return _rep->data[_offset];
-        }
-        else
-        {
-            throw std::runtime_error("item: only one element tensors can be converted to scalars");
-        }
-    }
-
-	std::shared_ptr<TensorInterface> Tensor<i16>::matmul(std::shared_ptr<TensorInterface> mat) const
-	{
-		auto right_matrix = std::dynamic_pointer_cast<Tensor<i16>>(mat);
-		return matmul_impl(*this, *right_matrix);
-	}
-
-    TensorInterfacePtr Tensor<i16>::mean() const
-    {
-        DimVector d(1);
-        d[0] = 1;
-
-        TensorPtr<i16> result(new Tensor<i16>(d));
-        auto flat_size = _dimensions.flat_size();
-        result->_rep->data[0] = reduce([](i16 a, i16 b)->i16 {return a + b; });
-        result->_rep->data[0] /= flat_size;
-        return std::dynamic_pointer_cast<TensorInterface>(result);
-    }
-
-    void Tensor<i16>::mul_(i16 value)
-    {
-        apply_([value](i16 a)->i16 {return a*value; });
-    }
-
-    void Tensor<i16>::mul_(std::shared_ptr<TensorInterface> other)
-    {
-        // check tensor other type
-        if(other->dtype() != DataType::SHORT)
-            throw std::runtime_error("expected type short tensor");
-		// check broadcast.shape = this.shape
-        auto shape = broadcast_shape(this->size(), other->size());
-        if(shape != this->size())
-            throw std::runtime_error("The size of tensor a must match the size of tensor b");
-		// ok, get lhs, rhs
-		Tensor<i16> * lhs = this;
-		Tensor<i16> * rhs = dynamic_cast<Tensor<i16> *>(other.get());
-		std::function<void(idx_type, idx_type, idx_type, idx_type)> mul_impl =
-			[&](idx_type lhs_dim, idx_type rhs_dim, idx_type lhs_idx, idx_type rhs_idx) {
-
-			auto lhs_storage = std::dynamic_pointer_cast<TensorStorage<f32>>(lhs->storage())->data_ptr();
-			auto rhs_storage = std::dynamic_pointer_cast<TensorStorage<f32>>(rhs->storage())->data_ptr();
-
-			idx_type lsh_shape_size = lhs_dim >= -(lhs->size().size())? lhs->size(lhs_dim) : 1;
-			idx_type rsh_shape_size = rhs_dim >= -(rhs->size().size()) ? rhs->size(rhs_dim) : 1;
-			idx_type max_shape_size = std::max(lsh_shape_size, rsh_shape_size);
-
-			for (idx_type i = 0; i < max_shape_size; ++i)
-			{
-                if (lhs_dim <= -(lhs->size().size()) && rhs_dim <= -(rhs->size().size()))
-                {
-                    lhs_storage[lhs_idx] *= rhs_storage[rhs_idx];
-                }
-                else
-                {
-                    mul_impl(lhs_dim - 1, rhs_dim - 1, lhs_idx, rhs_idx);
-                }
-
-				if(lsh_shape_size > 1)
-					lhs_idx += lhs->stride(lhs_dim);
-				if (rsh_shape_size > 1)
-					rhs_idx += rhs->stride(rhs_dim);
-			}
-		};
-
-		mul_impl(-1, -1, lhs->offset(), rhs->offset());
-    }
-
-    idx_type Tensor<i16>::ndimension() const
-    {
-        return _dimensions.size();
-    }
-
-    void Tensor<i16>::neg_()
-    {
-        apply_([](i16 a)->i16 {return -a; });
-    }
-
-    idx_type Tensor<i16>::offset() const { return _offset; }
-
-    std::shared_ptr<TensorInterface> Tensor<i16>::permute(const DimVector& dims) const
-    {
-        // check dims
-        if(dims.size() != _strides.size())
-            throw std::runtime_error("permute dimension must have the same size");
-        std::vector<int> check_vec(dims.size(), 0);
-        for(int i = 0; i < dims.size();++i)
-            if(dims[i] >= 0 && dims[i] < dims.size())
-                check_vec[dims[i]] = 1;
-            else
-                throw std::runtime_error("permute dimension must in ndimension range");
-        
-        for(int i = 0; i < check_vec.size();++i)
-        {
-            if(check_vec[i] != 1)
-                throw std::runtime_error("permute dimension error");
-        }
-        // permute
-        std::shared_ptr<Tensor<i16>> result(new Tensor<i16>);
-        result->_rep = _rep;
-        result->_dimensions = _dimensions;
-        result->_offset = _offset;
-        result->_strides = _strides;
-
-        for(int i=0; i<dims.size(); ++i)
-        {
-            result->_dimensions[i] = _dimensions[dims[i]];
-            result->_strides[i] = _strides[dims[i]];
-        }
-
-        return result;
-    }
-
-    PlatformType Tensor<i16>::platform() const { return PlatformType::CPU; }
-
-    void Tensor<i16>::pow_(f32 exp)
-    {
-        std::int32_t exp_int = static_cast<std::int32_t>(exp);
-        apply_([&exp_int](i16 a)->i16 {return static_cast<i16>(std::pow(a, exp_int)); });
-    }
-
-	i16 Tensor<i16>::reduce(std::function<i16(i16, i16)> f) const
-    {
-		i16 result{};
-        reduce_impl(result, 0, _offset, f);
-        return result;
-    }
-    
-    TensorInterfacePtr Tensor<i16>::reduce_dim(idx_type dim, std::function<i16(i16, i16)> f) const
-    {
-        DimVector reduced_dim = _dimensions;
-        reduced_dim.erase(dim); // check dim?
-        TensorBasePtr<i16> result(new Tensor<i16>(reduced_dim));
-        TensorPtr<i16> raw_result = std::dynamic_pointer_cast<Tensor<i16>>(result);
-        reduce_dim_impl(*(raw_result.get()), 0, dim, _offset, raw_result->_offset, f);
-        return std::dynamic_pointer_cast<TensorInterface>(result);
-    }
-    
-    void Tensor<i16>::reshape_(const DimVector& dims)
-    {
-
-    }
-    
-    void Tensor<i16>::resize_(const DimVector& dims)
-    {
-        _dimensions = dims;
-        _rep->resize_(dims.flat_size());
-        auto_strides();
-    }
-
-	std::shared_ptr<TensorInterface> Tensor<i16>::select(const SliceVector& slice) const
-	{
-		std::shared_ptr<Tensor<i16>> result(new Tensor<i16>);
-		result->_rep = _rep;
-
-		// dimension
-		DimVector dim;
-		std::fesetround(FE_TONEAREST);
-		for (idx_type i = 0; i < slice.size(); ++i)
-		{
-			auto& each = slice[i];
-			dim.push_back(
-				std::lrint(std::ceil((each.end.value_or(_dimensions[i]) - each.start.value_or(0)) / (float)each.step.value_or(1)))
-			);
-		}
-		result->_dimensions = dim;
-
-		// offset
-		idx_type new_offset = 1;
-		for (idx_type i = 0; i < slice.size(); ++i)
-		{
-			new_offset *= _strides[i] * slice[i].start.value_or(0);
-		}
-		result->_offset = _offset + new_offset;
-
-		// strides
-		DimVector strides;
-		for (idx_type i = 0; i < slice.size(); ++i)
-		{
-			strides.push_back(_strides[i] * slice[i].step.value_or(1));
-		}
-		result->_strides = strides;
-
-
-		return std::dynamic_pointer_cast<TensorInterface>(result);
-	}
-    
-    void Tensor<i16>::sin_()
-    {
-        throw std::runtime_error("No implement");
-    }
-    
-    DimVector Tensor<i16>::size() const { return _dimensions;}
-	
-	idx_type Tensor<i16>::size(idx_type i) const
-	{ 
-		auto shape_size = _dimensions.size();
-		if (i >= 0 && i < _dimensions.size())
-			return _dimensions[i];
-		else if (i <= -1 && i >= -_dimensions.size())
-			return _dimensions[shape_size + i];
-		else
-			throw std::runtime_error("Dimension out of range");
-	}
-    
-	std::shared_ptr<StorageBase<i16>>  Tensor<i16>::storage() const { return _rep; }
-    
-    DimVector Tensor<i16>::stride() const { return _strides; }
-	
-	idx_type Tensor<i16>::stride(idx_type i) const
-	{
-		auto stride_size = _strides.size();
-		if (i >= 0 && i < _strides.size())
-			return _strides[i];
-		else if (i <= -1 && i >= -_strides.size())
-			return _strides[stride_size + i];
-		else
-			throw std::runtime_error("Stride out of range");
-	}
-
-    void Tensor<i16>::sub_(std::shared_ptr<TensorInterface> other)
-    {
-        Tensor<i16> * lhs = this;
-		Tensor<i16> * rhs = dynamic_cast<Tensor<i16> *>(other.get());
-		std::function<void(Tensor<i16> *, Tensor<i16> *, idx_type, idx_type,idx_type, idx_type)> sub_impl =
-			[&](Tensor<i16> * lhs, Tensor<i16> * rhs, idx_type lhs_dim, idx_type rhs_dim, idx_type lhs_idx, idx_type rhs_idx) {
-
-			auto lhs_storage = std::dynamic_pointer_cast<TensorStorage<i16>>(lhs->storage())->data_ptr();
-			auto rhs_storage = std::dynamic_pointer_cast<TensorStorage<i16>>(rhs->storage())->data_ptr();
-
-			if (lhs_dim < -(lhs->size().size()) && rhs_dim < -(rhs->size().size()))
-			{
-				lhs_storage[lhs_idx] -= rhs_storage[rhs_idx];
-				return;
-			}
-
-			idx_type lhs_shape_size = lhs_dim >= -(lhs->size().size())? lhs->size(lhs_dim) : 1;
-			idx_type rhs_shape_size = rhs_dim >= -(rhs->size().size()) ? rhs->size(rhs_dim) : 1;
-			idx_type max_shape_size = std::max(lhs_shape_size, rhs_shape_size);
-
-			for (idx_type i = 0; i < max_shape_size; ++i)
-			{
-				sub_impl(lhs, rhs, lhs_dim - 1, rhs_dim - 1, lhs_idx, rhs_idx);
-
-				if(lhs_shape_size > 1)
-					lhs_idx += lhs->stride(lhs_dim);
-				if (rhs_shape_size > 1)
-					rhs_idx += rhs->stride(rhs_dim);
-			}
-		};
-
-		sub_impl(lhs, rhs, -1, -1, lhs->offset(), rhs->offset());
-    }
-    
-    TensorInterfacePtr Tensor<i16>::sum() const
-    {
-        DimVector d(1);
-        d[0] = 1;
-
-        TensorPtr<i16> result(new Tensor<i16>(d));
-        result->_rep->data[0] = reduce([](i16 a, i16 b)->i16 {return a + b; });
-        return std::dynamic_pointer_cast<TensorInterface>(result);
-    }
-    
-    std::string Tensor<i16>::to_string() const
-    {
-        std::function<std::string(const Tensor<i16>&, idx_type, idx_type)> to_string_impl =
-			[&](const Tensor<i16>& t, idx_type dim, idx_type idx)->std::string {
-            std::string result;
-			if (dim == t.size().size())
-            {
-                result += std::to_string(t.data_ptr()[idx]);
-				return result;
-            }
-
-			for (idx_type i = 0; i < t.size(dim); ++i)
-			{
-				if (dim != t.size().size() - 1 && i != 0) result += ",\n";
-				if(dim != t.size().size() - 1)	result += "[";
-				result += to_string_impl(t, dim + 1, idx);
-				if (i != t.size(dim) - 1 && dim == t.size().size() - 1)
-					result += ",";
-				if (dim != t.size().size() - 1) result += "]";
-
-				idx += t.stride(dim);
-			}
-
-			return result;
-		};
-
-		std::string result;
-		result += "[" + to_string_impl(*this, 0, offset()) + "]";
-		return result;
-    }
-
-    void Tensor<i16>::transpose_(idx_type dim0, idx_type dim1)
-    {
-        if(dim0 != dim1 &&
-            _dimensions.in_range(dim0) &&
-            _dimensions.in_range(dim1))
-        {
-            std::swap(_dimensions[dim0], _dimensions[dim1]);
-            std::swap(_strides[dim0], _strides[dim1]);
-        }
-    }
-
-    std::shared_ptr<TensorInterface> Tensor<i16>::transpose(idx_type dim0, idx_type dim1)
-    {
-        std::shared_ptr<Tensor<i16>> result(new Tensor<i16>);
-        result->_rep = _rep;
-        result->_dimensions = _dimensions;
-        result->_offset = _offset;
-        result->_strides = _strides;
-
-        result->transpose_(dim0, dim1);
-
-        return result;
-    }
-}

+ 479 - 66
traph/source/tensor/tensor.cpp

@@ -1,25 +1,97 @@
 #include <traph/tensor/tensor.h>
 
-
 namespace traph
 {
-    // definition
+	// definition
+    // private
+    template<typename T>
+    void Tensor<T>::auto_strides()
+    {
+        idx_type dim_num = _dimensions.size();
+        _strides.resize(dim_num);
+        idx_type stride = 1;
+        for (idx_type i = dim_num - 1; i >= 0; --i)
+        {
+            _strides[i] = stride;
+            stride *= _dimensions[i];
+        }
+    }
+
+    template<typename T>
+    void Tensor<T>::reduce_impl(T& result, idx_type dim, idx_type idx, std::function<T(T,T)> f) const
+    {
+        idx_type dim_size = _dimensions.size();
+
+        idx_type step_len = _strides[dim];
+        idx_type step_num = _dimensions[dim];
+
+        for(idx_type i = 0; i < step_num; ++i)
+        {
+            if(dim == dim_size - 1)
+                result = f(result, _rep->data[idx]);
+            else
+                reduce_impl(result, dim + 1, idx, f);
+            idx += step_len;
+        }
+    }
+
+    template<typename T>
+    T Tensor<T>::reduce_dim_kernel(idx_type begin, idx_type step_len, idx_type step_num, std::function<T(T,T)> f) const
+    {
+        T result{};
+        for(idx_type i = 0; i < step_num; ++i)
+        {
+            result = f(result, _rep->data[begin]);
+            begin += step_len;
+        }
+        return result;
+    }
+
+    template<typename T>
+    void Tensor<T>::reduce_dim_impl(Tensor<T>& result, idx_type dim, idx_type reduce_dim,
+        idx_type this_idx, idx_type result_idx,
+        std::function<T(T,T)> f) const
+    {
+        idx_type dim_size = _dimensions.size();
+
+        if(dim == dim_size)
+        {
+            result._rep->data[result_idx] = 
+                reduce_dim_kernel(this_idx, _strides[reduce_dim], _dimensions[reduce_dim], f);
+            return;
+        }
+
+        if(dim == reduce_dim)
+        {
+            reduce_dim_impl(result, dim + 1, reduce_dim, this_idx,result_idx, f);
+        }
+        else
+        {
+            for(idx_type i = 0; i < _dimensions[dim]; ++i)
+            {
+                reduce_dim_impl(result, dim + 1, reduce_dim, this_idx,result_idx, f);
+                    
+                this_idx += _strides[dim];
+                result_idx += result._strides[dim];
+            }
+        }
+    }
     // public
     template<typename T>
     Tensor<T>::Tensor()
         :_rep(new TensorStorage<T>),
         _dimensions(), _offset(0), _strides()
     {
-        throw std::runtime_error("No implement");
     }
 
-
     template<typename T>
     Tensor<T>::Tensor(const DimVector& dimensions)
         :_rep(new TensorStorage<T>),
         _dimensions(dimensions), _offset(0), _strides()
     {
-        throw std::runtime_error("No implement");
+        auto_strides();
+        
+        _rep->resize_(_dimensions.flat_size());
     }
 
     template<typename T>
@@ -27,7 +99,9 @@ namespace traph
         :_rep(new TensorStorage<T>),
         _dimensions(dimensions), _offset(0), _strides(strides)
     {
-        throw std::runtime_error("No implement");
+        auto_strides();
+
+        _rep->resize_(_dimensions.flat_size());
     }
 
     template<typename T>
@@ -35,201 +109,540 @@ namespace traph
         :_rep(new TensorStorage<T>),
         _dimensions(), _offset(0), _strides()
     {
-        throw std::runtime_error("No implement");
+        _dimensions.resize(1);
+        auto_strides();
     }
 
     template<typename T>
     void Tensor<T>::add_(TensorInterfacePtr other)
     {
-		throw std::runtime_error("No implement");
+		// check tensor other type
+        if(other->dtype() != DataType::FLOAT)
+            throw std::runtime_error("expected type float tensor");
+		// check broadcast.shape = this.shape
+        auto shape = broadcast_shape(this->size(), other->size());
+        if(shape != this->size())
+            throw std::runtime_error("The size of tensor a must match the size of tensor b");
+		// ok, get lhs, rhs
+		Tensor<T> * lhs = this;
+		Tensor<T> * rhs = dynamic_cast<Tensor<T> *>(other.get());
+		std::function<void(idx_type, idx_type, idx_type, idx_type)> add_impl =
+			[&](idx_type lhs_dim, idx_type rhs_dim, idx_type lhs_idx, idx_type rhs_idx) {
+
+			auto lhs_storage = std::dynamic_pointer_cast<TensorStorage<T>>(lhs->storage())->data_ptr();
+			auto rhs_storage = std::dynamic_pointer_cast<TensorStorage<T>>(rhs->storage())->data_ptr();
+
+			idx_type lsh_shape_size = lhs_dim >= -(lhs->size().size())? lhs->size(lhs_dim) : 1;
+			idx_type rsh_shape_size = rhs_dim >= -(rhs->size().size()) ? rhs->size(rhs_dim) : 1;
+			idx_type max_shape_size = std::max(lsh_shape_size, rsh_shape_size);
+
+			for (idx_type i = 0; i < max_shape_size; ++i)
+			{
+                if (lhs_dim <= -(lhs->size().size()) && rhs_dim <= -(rhs->size().size()))
+                {
+                    lhs_storage[lhs_idx] += rhs_storage[rhs_idx];
+                }
+                else
+                {
+                    add_impl(lhs_dim - 1, rhs_dim - 1, lhs_idx, rhs_idx);
+                }
+
+				if(lsh_shape_size > 1)
+					lhs_idx += lhs->stride(lhs_dim);
+				if (rsh_shape_size > 1)
+					rhs_idx += rhs->stride(rhs_dim);
+			}
+		};
+
+		add_impl(-1, -1, lhs->offset(), rhs->offset());
     }
+
     template<typename T>
     void Tensor<T>::apply_(std::function<T(T)> f)
     {
-        throw std::runtime_error("No implement");
+        // sort stride for cache optimization
+		DimVector cloned_stride(_strides);
+        DimVector sorted_stride(_strides.size());
+        for(int i = 0; i<_strides.size(); ++i)
+            sorted_stride[i] = i;
+        
+        for (int i = 0; i < cloned_stride.size() - 1; i++)
+            for (int j = 0; j < cloned_stride.size() - 1 - i; j++)
+                if (cloned_stride[j] < cloned_stride[j + 1])
+                {
+                    std::swap(cloned_stride[j], cloned_stride[j+1]);
+                    std::swap(sorted_stride[j], sorted_stride[j+1]);
+                }
+        
+        std::function<void(idx_type, idx_type, std::function<T(T)>)> apply_impl =
+        [&](idx_type dim_idx, idx_type idx, std::function<T(T)> f){
+            idx_type dim = sorted_stride[dim_idx];
+            idx_type dim_size = _dimensions.size();
+
+            idx_type step_len = _strides[dim];
+            idx_type step_num = _dimensions[dim];
+            
+            for(idx_type i = 0; i < step_num; ++i)
+            {
+                if(dim_idx == dim_size - 1)
+                    _rep->data[idx] = f(_rep->data[idx]);
+                else
+                    apply_impl(dim_idx + 1, idx, f);
+                idx += step_len;
+            }
+        };
+
+        if(_dimensions.size() > 0)
+            apply_impl(0, _offset, f);
     }
+
     template<typename T>
     TensorInterfacePtr Tensor<T>::clone() const
     {
-        throw std::runtime_error("No implement");
+        std::shared_ptr<Tensor<T>> cloned_tensor(new Tensor<T>);
+        cloned_tensor->_rep = std::dynamic_pointer_cast<TensorStorage<T>>(_rep->clone());
+        cloned_tensor->_dimensions = _dimensions;
+        cloned_tensor->_offset = _offset;
+        cloned_tensor->_strides = _strides;
+        
+        return cloned_tensor;
     }
+
     template<typename T>
     void Tensor<T>::cos_()
     {
-        throw std::runtime_error("No implement");
+        apply_([](T a)->f32 {return std::cos(a); });
     }
+
     template<typename T>
     std::shared_ptr<TensorBase<f32>> Tensor<T>::create_grad()
     {
-        throw std::runtime_error("No implement");
+        return std::shared_ptr<TensorBase<f32>>(new Tensor<f32>(_dimensions));
     }
+
     template<typename T>
-    T* Tensor<T>::data_ptr()
+	T* Tensor<T>::data_ptr()
     {
-        throw std::runtime_error("No implement");
+        return _rep->data_ptr();
     }
+
     template<typename T>
     const T* Tensor<T>::data_ptr() const
     {
-        throw std::runtime_error("No implement");
+        return _rep->data_ptr();
     }
+
     template<typename T>
-    device_id Tensor<T>::device() { throw std::runtime_error("No implement"); }
+    device_id Tensor<T>::device() { return 0; }
 
     template<typename T>
     DataType Tensor<T>::dtype() const
     {
-        throw std::runtime_error("No implement");
+        return DataType::FLOAT;
     }
 
     template<typename T>
     bool Tensor<T>::equal(std::shared_ptr<TensorInterface> other) const
     {
-        throw std::runtime_error("No implement");
+        if(other->platform() != this->platform())
+            throw std::runtime_error("equal: Two tensors must be the same platform");
+        
+        if(other->dtype() != this->dtype())
+            return false;
+
+        if(other->size() != this->size())
+            return false;
+
+        std::shared_ptr<Tensor<T>> other_ptr = std::dynamic_pointer_cast<Tensor<T>>(other);
+        
+        std::function<bool(idx_type, T*, T*)> equal_impl =
+        [&](idx_type dim, T* lhs_idx, T* rhs_idx){
+            idx_type dim_size = _dimensions.size();
+            
+            for(idx_type i = 0; i < _dimensions[dim]; ++i)
+            {
+                if(dim == dim - 1)
+                {
+                    if(*lhs_idx != *rhs_idx) return false;
+                }
+                else
+                {
+                    if(!equal_impl(dim + 1, lhs_idx, rhs_idx)) return false;
+                }
+                lhs_idx += _strides[dim];
+                rhs_idx += other_ptr->stride(dim);
+            }
+            return true;
+        };
+
+        return equal_impl(0, _rep->data_ptr() + _offset, other_ptr->data_ptr() + other_ptr->offset());
     }
 
     template<typename T>
-    void Tensor<T>::fill_(T value)
-    {
-        throw std::runtime_error("No implement");
-    }
+	std::shared_ptr<TensorInterface> Tensor<T>::inverse() const
+	{
+		// FIX ME
+		// return std::dynamic_pointer_cast<TensorInterface>(inverse_impl(*this));
+		return nullptr;
+	}
 
     template<typename T>
-    std::shared_ptr<TensorInterface> Tensor<T>::inverse() const
+    void Tensor<T>::fill_(T value)
     {
-        // return std::dynamic_pointer_cast<TensorInterface>(inverse_impl(*this));
-		throw std::runtime_error("No implement");
+        apply_([&value](T a)->T {return value; });
     }
 
     template<typename T>
-    T Tensor<T>::item() const
+	T Tensor<T>::item() const
     {
-        throw std::runtime_error("No implement");
+        if(_dimensions.flat_size() == 1)
+        {
+            return _rep->data[_offset];
+        }
+        else
+        {
+            throw std::runtime_error("item: only one element tensors can be converted to scalars");
+        }
     }
+
     template<typename T>
-    std::shared_ptr<TensorInterface> Tensor<T>::matmul(std::shared_ptr<TensorInterface> mat) const
-    {
-		throw std::runtime_error("No implement");
-    }
+	std::shared_ptr<TensorInterface> Tensor<T>::matmul(std::shared_ptr<TensorInterface> mat) const
+	{
+		auto right_matrix = std::dynamic_pointer_cast<Tensor<T>>(mat);
+		return matmul_impl(*this, *right_matrix);
+	}
 
     template<typename T>
     TensorInterfacePtr Tensor<T>::mean() const
     {
-        throw std::runtime_error("No implement");
+        DimVector d(1);
+        d[0] = 1;
+
+        TensorPtr<T> result(new Tensor<T>(d));
+        auto flat_size = _dimensions.flat_size();
+        result->_rep->data[0] = reduce([](T a, T b)->T {return a + b; });
+        result->_rep->data[0] /= flat_size;
+        return std::dynamic_pointer_cast<TensorInterface>(result);
     }
 
     template<typename T>
     void Tensor<T>::mul_(T value)
     {
-        throw std::runtime_error("No implement");
+        apply_([value](T a)->T {return a*value; });
     }
 
     template<typename T>
     void Tensor<T>::mul_(std::shared_ptr<TensorInterface> other)
     {
-        throw std::runtime_error("No implement");
+        // check tensor other type
+        if(other->dtype() != DataType::FLOAT)
+            throw std::runtime_error("expected type float tensor");
+		// check broadcast.shape = this.shape
+        auto shape = broadcast_shape(this->size(), other->size());
+        if(shape != this->size())
+            throw std::runtime_error("The size of tensor a must match the size of tensor b");
+		// ok, get lhs, rhs
+		Tensor<T> * lhs = this;
+		Tensor<T> * rhs = dynamic_cast<Tensor<T> *>(other.get());
+		std::function<void(idx_type, idx_type, idx_type, idx_type)> mul_impl =
+			[&](idx_type lhs_dim, idx_type rhs_dim, idx_type lhs_idx, idx_type rhs_idx) {
+
+			auto lhs_storage = std::dynamic_pointer_cast<TensorStorage<T>>(lhs->storage())->data_ptr();
+			auto rhs_storage = std::dynamic_pointer_cast<TensorStorage<T>>(rhs->storage())->data_ptr();
+
+			idx_type lsh_shape_size = lhs_dim >= -(lhs->size().size())? lhs->size(lhs_dim) : 1;
+			idx_type rsh_shape_size = rhs_dim >= -(rhs->size().size()) ? rhs->size(rhs_dim) : 1;
+			idx_type max_shape_size = std::max(lsh_shape_size, rsh_shape_size);
+
+			for (idx_type i = 0; i < max_shape_size; ++i)
+			{
+                if (lhs_dim <= -(lhs->size().size()) && rhs_dim <= -(rhs->size().size()))
+                {
+                    lhs_storage[lhs_idx] *= rhs_storage[rhs_idx];
+                }
+                else
+                {
+                    mul_impl(lhs_dim - 1, rhs_dim - 1, lhs_idx, rhs_idx);
+                }
+
+				if(lsh_shape_size > 1)
+					lhs_idx += lhs->stride(lhs_dim);
+				if (rsh_shape_size > 1)
+					rhs_idx += rhs->stride(rhs_dim);
+			}
+		};
+
+		mul_impl(-1, -1, lhs->offset(), rhs->offset());
     }
 
     template<typename T>
     idx_type Tensor<T>::ndimension() const
     {
-        throw std::runtime_error("No implement");
+        return _dimensions.size();
     }
 
     template<typename T>
     void Tensor<T>::neg_()
     {
-        throw std::runtime_error("No implement");
+        apply_([](T a)->T {return -a; });
     }
 
     template<typename T>
-    idx_type Tensor<T>::offset() const { throw std::runtime_error("No implement"); }
+    idx_type Tensor<T>::offset() const { return _offset; }
 
     template<typename T>
     std::shared_ptr<TensorInterface> Tensor<T>::permute(const DimVector& dims) const
     {
-        throw std::runtime_error("No implement");
+        // check dims
+        if(dims.size() != _strides.size())
+            throw std::runtime_error("permute dimension must have the same size");
+        std::vector<int> check_vec(dims.size(), 0);
+        for(int i = 0; i < dims.size();++i)
+            if(dims[i] >= 0 && dims[i] < dims.size())
+                check_vec[dims[i]] = 1;
+            else
+                throw std::runtime_error("permute dimension must in ndimension range");
+        
+        for(int i = 0; i < check_vec.size();++i)
+        {
+            if(check_vec[i] != 1)
+                throw std::runtime_error("permute dimension error");
+        }
+        // permute
+        std::shared_ptr<Tensor<T>> result(new Tensor<T>);
+        result->_rep = _rep;
+        result->_dimensions = _dimensions;
+        result->_offset = _offset;
+        result->_strides = _strides;
+
+        for(int i=0; i<dims.size(); ++i)
+        {
+            result->_dimensions[i] = _dimensions[dims[i]];
+            result->_strides[i] = _strides[dims[i]];
+        }
+
+        return result;
     }
+
     template<typename T>
-    PlatformType Tensor<T>::platform() const { throw std::runtime_error("No implement"); }
+    PlatformType Tensor<T>::platform() const { return PlatformType::CPU; }
+
     template<typename T>
     void Tensor<T>::pow_(f32 exp)
     {
-        throw std::runtime_error("No implement");
+        apply_([&exp](T a)->T {return std::pow(a, exp); });
     }
+
     template<typename T>
-    T Tensor<T>::reduce(std::function<T(T,T)> f) const
+	T Tensor<T>::reduce(std::function<T(T, T)> f) const
     {
-        throw std::runtime_error("No implement");
+		T result{};
+        reduce_impl(result, 0, _offset, f);
+        return result;
     }
+    
     template<typename T>
-    TensorInterfacePtr Tensor<T>::reduce_dim(idx_type dim, std::function<T(T,T)> f) const
+    TensorInterfacePtr Tensor<T>::reduce_dim(idx_type dim, std::function<T(T, T)> f) const
     {
-        throw std::runtime_error("No implement");
+        DimVector reduced_dim = _dimensions;
+        reduced_dim.erase(dim); // check dim?
+        TensorBasePtr<T> result(new Tensor<T>(reduced_dim));
+        TensorPtr<T> raw_result = std::dynamic_pointer_cast<Tensor<T>>(result);
+        reduce_dim_impl(*(raw_result.get()), 0, dim, _offset, raw_result->_offset, f);
+        return std::dynamic_pointer_cast<TensorInterface>(result);
     }
+    
     template<typename T>
     void Tensor<T>::reshape_(const DimVector& dims)
     {
-        throw std::runtime_error("No implement");
+
     }
+    
     template<typename T>
     void Tensor<T>::resize_(const DimVector& dims)
     {
-        throw std::runtime_error("No implement");
+        _dimensions = dims;
+        _rep->resize_(dims.flat_size());
+        auto_strides();
     }
+
     template<typename T>
-    std::shared_ptr<TensorInterface> Tensor<T>::select(const SliceVector& slice) const
-    {
-        throw std::runtime_error("No implement");
-    }
+	std::shared_ptr<TensorInterface> Tensor<T>::select(const SliceVector& slice) const
+	{
+		std::shared_ptr<Tensor<T>> result(new Tensor<T>);
+		result->_rep = _rep;
+
+		// dimension
+		DimVector dim;
+		std::fesetround(FE_TONEAREST);
+		for (idx_type i = 0; i < slice.size(); ++i)
+		{
+			auto& each = slice[i];
+			dim.push_back(
+				std::lrint(std::ceil((each.end.value_or(_dimensions[i]) - each.start.value_or(0)) / (float)each.step.value_or(1)))
+			);
+		}
+		result->_dimensions = dim;
+
+		// offset
+		idx_type new_offset = 1;
+		for (idx_type i = 0; i < slice.size(); ++i)
+		{
+			new_offset *= _strides[i] * slice[i].start.value_or(0);
+		}
+		result->_offset = _offset + new_offset;
+
+		// strides
+		DimVector strides;
+		for (idx_type i = 0; i < slice.size(); ++i)
+		{
+			strides.push_back(_strides[i] * slice[i].step.value_or(1));
+		}
+		result->_strides = strides;
+
+		return std::dynamic_pointer_cast<TensorInterface>(result);
+	}
+
     template<typename T>
     void Tensor<T>::sin_()
     {
-        throw std::runtime_error("No implement");
+        apply_([](T a)->T {return std::sin(a); });
     }
+
+    template<typename T>
+    DimVector Tensor<T>::size() const { return _dimensions;}
+	
     template<typename T>
-    DimVector Tensor<T>::size() const { throw std::runtime_error("No implement"); }
-	template<typename T>
 	idx_type Tensor<T>::size(idx_type i) const
 	{ 
-		throw std::runtime_error("No implement");
+		auto shape_size = _dimensions.size();
+		if (i >= 0 && i < _dimensions.size())
+			return _dimensions[i];
+		else if (i <= -1 && i >= -_dimensions.size())
+			return _dimensions[shape_size + i];
+		else
+			throw std::runtime_error("Dimension out of range");
 	}
+
+    template<typename T>
+	std::shared_ptr<StorageBase<T>>  Tensor<T>::storage() const { return _rep; }
+
     template<typename T>
-	std::shared_ptr<StorageBase<T>>  Tensor<T>::storage() const { throw std::runtime_error("No implement"); }
+    DimVector Tensor<T>::stride() const { return _strides; }
+
     template<typename T>
-    DimVector Tensor<T>::stride() const { throw std::runtime_error("No implement"); }
-	template<typename T>
 	idx_type Tensor<T>::stride(idx_type i) const
 	{
-		throw std::runtime_error("No implement");
+		auto stride_size = _strides.size();
+		if (i >= 0 && i < _strides.size())
+			return _strides[i];
+		else if (i <= -1 && i >= -_strides.size())
+			return _strides[stride_size + i];
+		else
+			throw std::runtime_error("Stride out of range");
 	}
 
     template<typename T>
     void Tensor<T>::sub_(std::shared_ptr<TensorInterface> other)
     {
-        throw std::runtime_error("No implement");
-    }
+        Tensor<T> * lhs = this;
+		Tensor<T> * rhs = dynamic_cast<Tensor<T> *>(other.get());
+		std::function<void(Tensor<T> *, Tensor<T> *, idx_type, idx_type,idx_type, idx_type)> sub_impl =
+			[&](Tensor<T> * lhs, Tensor<T> * rhs, idx_type lhs_dim, idx_type rhs_dim, idx_type lhs_idx, idx_type rhs_idx) {
+
+			auto lhs_storage = std::dynamic_pointer_cast<TensorStorage<T>>(lhs->storage())->data_ptr();
+			auto rhs_storage = std::dynamic_pointer_cast<TensorStorage<T>>(rhs->storage())->data_ptr();
+
+			if (lhs_dim < -(lhs->size().size()) && rhs_dim < -(rhs->size().size()))
+			{
+				lhs_storage[lhs_idx] -= rhs_storage[rhs_idx];
+				return;
+			}
+
+			idx_type lhs_shape_size = lhs_dim >= -(lhs->size().size())? lhs->size(lhs_dim) : 1;
+			idx_type rhs_shape_size = rhs_dim >= -(rhs->size().size()) ? rhs->size(rhs_dim) : 1;
+			idx_type max_shape_size = std::max(lhs_shape_size, rhs_shape_size);
 
+			for (idx_type i = 0; i < max_shape_size; ++i)
+			{
+				sub_impl(lhs, rhs, lhs_dim - 1, rhs_dim - 1, lhs_idx, rhs_idx);
+
+				if(lhs_shape_size > 1)
+					lhs_idx += lhs->stride(lhs_dim);
+				if (rhs_shape_size > 1)
+					rhs_idx += rhs->stride(rhs_dim);
+			}
+		};
+
+		sub_impl(lhs, rhs, -1, -1, lhs->offset(), rhs->offset());
+    }
+    
     template<typename T>
     TensorInterfacePtr Tensor<T>::sum() const
     {
-        throw std::runtime_error("No implement");
+        DimVector d(1);
+        d[0] = 1;
+
+        TensorPtr<T> result(new Tensor<T>(d));
+        result->_rep->data[0] = reduce([](T a, T b)->T {return a + b; });
+        return std::dynamic_pointer_cast<TensorInterface>(result);
     }
+
     template<typename T>
     std::string Tensor<T>::to_string() const
     {
-        throw std::runtime_error("No implement");
+        std::function<std::string(const Tensor<T>&, idx_type, idx_type)> to_string_impl =
+			[&](const Tensor<T>& t, idx_type dim, idx_type idx)->std::string {
+            std::string result;
+			if (dim == t.size().size())
+            {
+                result += std::to_string(t.data_ptr()[idx]);
+				return result;
+            }
+
+			for (idx_type i = 0; i < t.size(dim); ++i)
+			{
+				if (dim != t.size().size() - 1 && i != 0) result += ",\n";
+				if(dim != t.size().size() - 1)	result += "[";
+				result += to_string_impl(t, dim + 1, idx);
+				if (i != t.size(dim) - 1 && dim == t.size().size() - 1)
+					result += ",";
+				if (dim != t.size().size() - 1) result += "]";
+
+				idx += t.stride(dim);
+			}
+
+			return result;
+		};
+
+		std::string result;
+		result += "[" + to_string_impl(*this, 0, offset()) + "]";
+		return result;
     }
 
     template<typename T>
-    void transpose_(idx_type dim0, idx_type dim1)
+    void Tensor<T>::transpose_(idx_type dim0, idx_type dim1)
     {
-        throw std::runtime_error("No implement");
+        if(dim0 != dim1 &&
+            _dimensions.in_range(dim0) &&
+            _dimensions.in_range(dim1))
+        {
+            std::swap(_dimensions[dim0], _dimensions[dim1]);
+            std::swap(_strides[dim0], _strides[dim1]);
+        }
     }
 
     template<typename T>
-    std::shared_ptr<TensorInterface> transpose(idx_type dim0, idx_type dim1)
+    std::shared_ptr<TensorInterface> Tensor<T>::transpose(idx_type dim0, idx_type dim1)
     {
-        throw std::runtime_error("No implement");
+        std::shared_ptr<Tensor<T>> result(new Tensor<T>);
+        result->_rep = _rep;
+        result->_dimensions = _dimensions;
+        result->_offset = _offset;
+        result->_strides = _strides;
+
+        result->transpose_(dim0, dim1);
+
+        return result;
     }
 }