6 yıl önce · 1506cb8974
--- a/README.md
+++ b/README.md
@@ -1,2 +1,12 @@
 
				 # traph
			
 
				-Traph is a open-source machine learning platform.
			
 
				+Traph is a open-source `toy` machine learning platform. It is inspired by torch.
			
 
				+
			
 
				+# Installation
			
 
				+To install the CPU-only traph:
			
 
				+`pip install pytraph`
			
 
				+
			
 
				+# Compilation
			
 
				+
			
 
				+# License
			
 
				+MIT
			
 
				+
			
--- a/traph/include/traph/core/tensor.h
+++ b/traph/include/traph/core/tensor.h
@@ -35,10 +35,12 @@ namespace traph
 
				         virtual device_id device() = 0;
			
 
				         virtual std::shared_ptr<TensorInterface> inverse() const = 0;
			
 
				         virtual std::shared_ptr<TensorInterface> matmul(std::shared_ptr<TensorInterface> mat) const = 0;
			
 
				+        virtual void neg_() = 0;
			
 
				         virtual idx_type offset() const = 0;
			
 
				 		virtual layout_type order() const = 0;
			
 
				         // virtual std::shared_ptr<TensorInterface> permute(const DimVector& dims) const = 0;
			
 
				         virtual platform_type platform() = 0;
			
 
				+        virtual void pow_(f32 exp) = 0;
			
 
				         virtual void reshape_(const DimVector& dims) = 0;
			
 
				         virtual void resize_(const DimVector& dims) = 0;
			
 
				         virtual std::shared_ptr<TensorInterface> select(const SliceVector& slice) const = 0;
			
@@ -47,6 +49,7 @@ namespace traph
 
				 		virtual idx_type size(idx_type i) const = 0;
			
 
				 		virtual DimVector stride() const = 0;
			
 
				 		virtual idx_type stride(idx_type i) const = 0;
			
 
				+        virtual void sub_(std::shared_ptr<TensorInterface> other) = 0;
			
 
				         virtual shared_pointer sum() const = 0;
			
 
				         virtual std::string to_string() const = 0;
			
 
				         virtual void transpose_(idx_type dim0, idx_type dim1) = 0;
			
@@ -84,9 +87,11 @@ namespace traph
 
				         virtual std::shared_ptr<TensorInterface> inverse() const = 0;
			
 
				         virtual T item() const = 0;
			
 
				         virtual std::shared_ptr<TensorInterface> matmul(std::shared_ptr<TensorInterface> mat) const = 0;
			
 
				+        virtual void neg_() = 0;
			
 
				         virtual idx_type offset() const = 0;
			
 
				 		virtual layout_type order() const = 0;
			
 
				         virtual platform_type platform() = 0;
			
 
				+        virtual void pow_(f32 exp) = 0;
			
 
				         virtual T reduce_(std::function<T(T,T)> f) const = 0;
			
 
				         virtual TensorInterfacePtr reduce_dim(idx_type dim, std::function<T(T,T)> f) const = 0;
			
 
				         virtual void reshape_(const DimVector& dims) = 0;
			
@@ -98,6 +103,7 @@ namespace traph
 
				         virtual std::shared_ptr<StorageBase<T>> storage() const = 0;
			
 
				 		virtual DimVector stride() const = 0;
			
 
				 		virtual idx_type stride(idx_type i) const = 0;
			
 
				+        virtual void sub_(std::shared_ptr<TensorInterface> other) = 0;
			
 
				         virtual TensorInterfacePtr sum() const = 0;
			
 
				         virtual std::string to_string() const = 0;
			
 
				         virtual void transpose_(idx_type dim0, idx_type dim1) = 0;
			
--- a/traph/include/traph/nn/function.h
+++ b/traph/include/traph/nn/function.h
@@ -206,6 +206,31 @@ namespace traph
 
				 		return result;
			
 
				 	}
			
 
				 
			
 
				+	VariableInterfacePtr sub(VariableInterfacePtr left, VariableInterfacePtr right)
			
 
				+	{
			
 
				+		DimVector result_dim;
			
 
				+
			
 
				+        VariableInterfacePtr result = left->new_empty(result_dim, true);
			
 
				+		std::shared_ptr<SubOp> op(new SubOp);
			
 
				+		result->data_(op->forward({ left->data(), right->data() }));
			
 
				+		result->leaf_(false);
			
 
				+		if (left->requires_grad() || right->requires_grad())
			
 
				+		{
			
 
				+			std::vector<VariableInterfacePtr> result_inputs{ left, right };
			
 
				+			result->grad_(result->data()->create_grad());
			
 
				+			result->grad()->fill_(0);
			
 
				+			result->requires_grad_(true);
			
 
				+			result->grad_fn_(op);
			
 
				+			result->inputs_(result_inputs);
			
 
				+		}
			
 
				+		else
			
 
				+		{
			
 
				+			result->requires_grad_(false);
			
 
				+		}
			
 
				+
			
 
				+		return result;
			
 
				+	}
			
 
				+
			
 
				 	VariableInterfacePtr transpose(VariableInterfacePtr input, idx_type dim0, idx_type dim1)
			
 
				 	{
			
 
				 		DimVector result_dim;
			
--- a/traph/include/traph/nn/layers/linear.h
+++ b/traph/include/traph/nn/layers/linear.h
@@ -0,0 +1,42 @@
 
				+#ifndef TRAPH_NN_LAYERS_LINEAR
			
 
				+#define TRAPH_NN_LAYERS_LINEAR
			
 
				+
			
 
				+
			
 
				+#include <traph/nn/module.h>
			
 
				+
			
 
				+namespace traph
			
 
				+{
			
 
				+    class Linear: public Module
			
 
				+    {
			
 
				+    private:
			
 
				+        int _in_features;
			
 
				+        int _out_features;
			
 
				+        std::shared_ptr<VariableInterface> _weight;
			
 
				+        std::shared_ptr<VariableInterface> _bias;
			
 
				+    public:
			
 
				+        Linear(int in_features, int out_features, bool bias)
			
 
				+        {
			
 
				+            _in_features = in_features;
			
 
				+            _out_features = out_features;
			
 
				+            _weight = std::shared_ptr<VariableInterface>(new FloatParameter({out_features, in_features}));
			
 
				+            if(bias)
			
 
				+                _bias = std::shared_ptr<VariableInterface>(new FloatParameter({out_features}));
			
 
				+            
			
 
				+            register_parameter("weight", std::dynamic_pointer_cast<FloatParameter>(_weight));
			
 
				+            register_parameter("bias", std::dynamic_pointer_cast<FloatParameter>(_bias));
			
 
				+        }
			
 
				+
			
 
				+        std::shared_ptr<VariableInterface> forward(std::shared_ptr<VariableInterface> input)
			
 
				+        {
			
 
				+            std::shared_ptr<VariableInterface> result;
			
 
				+            if(_bias)
			
 
				+                result = add(matmul(input, transpose(_weight, 0, 1)), _bias);
			
 
				+            else
			
 
				+                result = matmul(input, transpose(_weight, 0, 1));
			
 
				+            
			
 
				+            return result;
			
 
				+        }
			
 
				+    };
			
 
				+}
			
 
				+
			
 
				+#endif // TRAPH_NN_LAYERS_LINEAR
			
--- a/traph/include/traph/nn/layers/loss.h
+++ b/traph/include/traph/nn/layers/loss.h
@@ -0,0 +1,46 @@
 
				+#ifndef TRAPH_NN_LAYERS_LOSS
			
 
				+#define TRAPH_NN_LAYERS_LOSS
			
 
				+
			
 
				+#include <traph/nn/module.h>
			
 
				+
			
 
				+namespace traph
			
 
				+{
			
 
				+    enum class MSELossReduction
			
 
				+    {
			
 
				+        NONE,
			
 
				+        MEAN,
			
 
				+        SUM
			
 
				+    };
			
 
				+
			
 
				+    class MSELoss: public Module
			
 
				+    {
			
 
				+    private:
			
 
				+        MSELossReduction _reduction;
			
 
				+    public:
			
 
				+        MSELoss(MSELossReduction reduction = MSELossReduction::MEAN)
			
 
				+            :_reduction(reduction)
			
 
				+        {
			
 
				+        }
			
 
				+
			
 
				+        std::shared_ptr<VariableInterface> forward(std::shared_ptr<VariableInterface> input, std::shared_ptr<VariableInterface> target)
			
 
				+        {
			
 
				+            std::shared_ptr<VariableInterface> ret;
			
 
				+            if(_reduction == MSELossReduction::SUM)
			
 
				+            {
			
 
				+                ret = sum(sub(input, target));
			
 
				+            }
			
 
				+            else if(_reduction == MSELossReduction::MEAN)
			
 
				+            {
			
 
				+                // fixme: use mean if it impled
			
 
				+                ret = sum(sub(input, target));
			
 
				+            }
			
 
				+            else
			
 
				+            {
			
 
				+                ret = sum(sub(input, target));
			
 
				+            }
			
 
				+            return ret;
			
 
				+        }
			
 
				+    };
			
 
				+}
			
 
				+
			
 
				+#endif // TRAPH_NN_LAYERS_LOSS
			
--- a/traph/include/traph/nn/module.h
+++ b/traph/include/traph/nn/module.h
@@ -40,38 +40,6 @@ namespace traph
 
				             _parameters[name] = param;
			
 
				         }
			
 
				     };
			
 
				-
			
 
				-    class LinearModule: public Module
			
 
				-    {
			
 
				-    private:
			
 
				-        int _in_features;
			
 
				-        int _out_features;
			
 
				-        std::shared_ptr<VariableInterface> _weight;
			
 
				-        std::shared_ptr<VariableInterface> _bias;
			
 
				-    public:
			
 
				-        LinearModule(int in_features, int out_features, bool bias)
			
 
				-        {
			
 
				-            _in_features = in_features;
			
 
				-            _out_features = out_features;
			
 
				-            _weight = std::shared_ptr<VariableInterface>(new FloatParameter({out_features, in_features}));
			
 
				-            if(bias)
			
 
				-                _bias = std::shared_ptr<VariableInterface>(new FloatParameter({out_features}));
			
 
				-            
			
 
				-            register_parameter("weight", std::dynamic_pointer_cast<FloatParameter>(_weight));
			
 
				-            register_parameter("bias", std::dynamic_pointer_cast<FloatParameter>(_bias));
			
 
				-        }
			
 
				-
			
 
				-        std::shared_ptr<VariableInterface> forward(std::shared_ptr<VariableInterface> input)
			
 
				-        {
			
 
				-            std::shared_ptr<VariableInterface> result;
			
 
				-            if(_bias)
			
 
				-                result = add(matmul(input, transpose(_weight, 0, 1)), _bias);
			
 
				-            else
			
 
				-                result = matmul(input, transpose(_weight, 0, 1));
			
 
				-            
			
 
				-            return result;
			
 
				-        }
			
 
				-    };
			
 
				 } // traph
			
 
				 
			
 
				 #endif
			
--- a/traph/include/traph/nn/operation.h
+++ b/traph/include/traph/nn/operation.h
@@ -162,6 +162,30 @@ namespace traph
 
				 		}
			
 
				 	};
			
 
				 
			
 
				+	class SubOp : public OpBase
			
 
				+	{
			
 
				+	public:
			
 
				+		virtual TensorInterfacePtr forward(std::vector<TensorInterfacePtr> inputs) override
			
 
				+		{
			
 
				+			assert(inputs.size() == 2);
			
 
				+
			
 
				+			TensorInterfacePtr left_input = inputs[0];
			
 
				+			TensorInterfacePtr right_input = inputs[1];
			
 
				+			TensorInterfacePtr result = left_input->clone();
			
 
				+            result->sub_(right_input);
			
 
				+
			
 
				+			return result;
			
 
				+		}
			
 
				+
			
 
				+		virtual std::vector<TensorBasePtr<f32>> backward(TensorBasePtr<f32> output_grad) override
			
 
				+		{
			
 
				+			auto left = output_grad;
			
 
				+			auto right = output_grad->clone();
			
 
				+			right->neg_();
			
 
				+			return { output_grad, std::dynamic_pointer_cast<TensorBase<f32>>(right) };
			
 
				+		}
			
 
				+	};
			
 
				+
			
 
				 	class TransposeOp : public OpBase
			
 
				 	{
			
 
				 	private:
			
--- a/traph/include/traph/nn/variable.h
+++ b/traph/include/traph/nn/variable.h
@@ -189,9 +189,10 @@ namespace traph
 
				 			std::vector<TensorBasePtr<f32>> back_grad = cur_node->grad_fn()->backward(cur_node->grad());
			
 
				 
			
 
				 			assert(back_grad.size() == cur_node->inputs().size());
			
 
				-			for (int i = 0; i < cur_node->inputs().size(); ++i)
			
 
				+			for (int j = 0; j < cur_node->inputs().size(); ++j)
			
 
				 			{
			
 
				-				cur_node->inputs()[i]->grad()->add_(back_grad[i]);
			
 
				+				if(cur_node->inputs()[j]->requires_grad())
			
 
				+					cur_node->inputs()[j]->grad()->add_(back_grad[j]);
			
 
				 			}
			
 
				 		}
			
 
				 
			
@@ -260,7 +261,7 @@ namespace traph
 
				     template<typename T>
			
 
				     bool Variable<T>::is_leaf() const
			
 
				     {
			
 
				-        return _leaf;
			
 
				+        return !_grad_fn;
			
 
				     }
			
 
				 
			
 
				 	template<typename T>
			
--- a/traph/include/traph/tensor/byte_tensor.h
+++ b/traph/include/traph/tensor/byte_tensor.h
@@ -66,9 +66,11 @@ namespace traph
 
				 		virtual std::shared_ptr<TensorInterface> inverse() const override;
			
 
				 		virtual u8 item() const override;
			
 
				 		virtual std::shared_ptr<TensorInterface> matmul(std::shared_ptr<TensorInterface> mat) const override;
			
 
				-		virtual idx_type offset() const override;
			
 
				+		virtual void neg_() override;
			
 
				+        virtual idx_type offset() const override;
			
 
				 		virtual layout_type order() const override;
			
 
				 		virtual platform_type platform() override;
			
 
				+        virtual void pow_(f32 exp) override;
			
 
				 		virtual u8 reduce_(std::function<u8(u8, u8)> f) const override;
			
 
				 		virtual TensorInterfacePtr reduce_dim(idx_type dim, std::function<u8(u8, u8)> f) const override;
			
 
				 		virtual void reshape_(const DimVector& dims) override;
			
@@ -80,6 +82,7 @@ namespace traph
 
				 		virtual std::shared_ptr<StorageBase<u8>> storage() const override;
			
 
				 		virtual DimVector stride() const override;
			
 
				 		virtual idx_type stride(idx_type i) const override;
			
 
				+        virtual void sub_(std::shared_ptr<TensorInterface> other) override;
			
 
				 		virtual TensorInterfacePtr sum() const override;
			
 
				 		virtual std::string to_string() const override;
			
 
				         virtual void transpose_(idx_type dim0, idx_type dim1) override;
			
--- a/traph/include/traph/tensor/char_tensor.h
+++ b/traph/include/traph/tensor/char_tensor.h
@@ -66,9 +66,11 @@ namespace traph
 
				 		virtual std::shared_ptr<TensorInterface> inverse() const override;
			
 
				 		virtual i8 item() const override;
			
 
				 		virtual std::shared_ptr<TensorInterface> matmul(std::shared_ptr<TensorInterface> mat) const override;
			
 
				-		virtual idx_type offset() const override;
			
 
				+		virtual void neg_() override;
			
 
				+        virtual idx_type offset() const override;
			
 
				 		virtual layout_type order() const override;
			
 
				 		virtual platform_type platform() override;
			
 
				+        virtual void pow_(f32 exp) override;
			
 
				 		virtual i8 reduce_(std::function<i8(i8, i8)> f) const override;
			
 
				 		virtual TensorInterfacePtr reduce_dim(idx_type dim, std::function<i8(i8, i8)> f) const override;
			
 
				 		virtual void reshape_(const DimVector& dims) override;
			
@@ -80,6 +82,7 @@ namespace traph
 
				 		virtual std::shared_ptr<StorageBase<i8>> storage() const override;
			
 
				 		virtual DimVector stride() const override;
			
 
				 		virtual idx_type stride(idx_type i) const override;
			
 
				+        virtual void sub_(std::shared_ptr<TensorInterface> other) override;
			
 
				 		virtual TensorInterfacePtr sum() const override;
			
 
				 		virtual std::string to_string() const override;
			
 
				         virtual void transpose_(idx_type dim0, idx_type dim1) override;
			
--- a/traph/include/traph/tensor/double_tensor.h
+++ b/traph/include/traph/tensor/double_tensor.h
@@ -66,9 +66,11 @@ namespace traph
 
				 		virtual std::shared_ptr<TensorInterface> inverse() const override;
			
 
				 		virtual f64 item() const override;
			
 
				 		virtual std::shared_ptr<TensorInterface> matmul(std::shared_ptr<TensorInterface> mat) const override;
			
 
				-		virtual idx_type offset() const override;
			
 
				+		virtual void neg_() override;
			
 
				+        virtual idx_type offset() const override;
			
 
				 		virtual layout_type order() const override;
			
 
				 		virtual platform_type platform() override;
			
 
				+        virtual void pow_(f32 exp) override;
			
 
				 		virtual f64 reduce_(std::function<f64(f64, f64)> f) const override;
			
 
				 		virtual TensorInterfacePtr reduce_dim(idx_type dim, std::function<f64(f64, f64)> f) const override;
			
 
				 		virtual void reshape_(const DimVector& dims) override;
			
@@ -80,6 +82,7 @@ namespace traph
 
				 		virtual std::shared_ptr<StorageBase<f64>> storage() const override;
			
 
				 		virtual DimVector stride() const override;
			
 
				 		virtual idx_type stride(idx_type i) const override;
			
 
				+        virtual void sub_(std::shared_ptr<TensorInterface> other) override;
			
 
				 		virtual TensorInterfacePtr sum() const override;
			
 
				 		virtual std::string to_string() const override;
			
 
				         virtual void transpose_(idx_type dim0, idx_type dim1) override;
			
--- a/traph/include/traph/tensor/float_tensor.h
+++ b/traph/include/traph/tensor/float_tensor.h
@@ -67,9 +67,11 @@ namespace traph
 
				 		virtual std::shared_ptr<TensorInterface> inverse() const override;
			
 
				 		virtual f32 item() const override;
			
 
				 		virtual std::shared_ptr<TensorInterface> matmul(std::shared_ptr<TensorInterface> mat) const override;
			
 
				-		virtual idx_type offset() const override;
			
 
				+		virtual void neg_() override;
			
 
				+        virtual idx_type offset() const override;
			
 
				 		virtual layout_type order() const override;
			
 
				 		virtual platform_type platform() override;
			
 
				+        virtual void pow_(f32 exp) override;
			
 
				 		virtual f32 reduce_(std::function<f32(f32, f32)> f) const override;
			
 
				 		virtual TensorInterfacePtr reduce_dim(idx_type dim, std::function<f32(f32, f32)> f) const override;
			
 
				 		virtual void reshape_(const DimVector& dims) override;
			
@@ -81,6 +83,7 @@ namespace traph
 
				 		virtual std::shared_ptr<StorageBase<f32>> storage() const override;
			
 
				 		virtual DimVector stride() const override;
			
 
				 		virtual idx_type stride(idx_type i) const override;
			
 
				+        virtual void sub_(std::shared_ptr<TensorInterface> other) override;
			
 
				 		virtual TensorInterfacePtr sum() const override;
			
 
				 		virtual std::string to_string() const override;
			
 
				         virtual void transpose_(idx_type dim0, idx_type dim1) override;
			
--- a/traph/include/traph/tensor/int_tensor.h
+++ b/traph/include/traph/tensor/int_tensor.h
@@ -66,9 +66,11 @@ namespace traph
 
				 		virtual std::shared_ptr<TensorInterface> inverse() const override;
			
 
				 		virtual i32 item() const override;
			
 
				 		virtual std::shared_ptr<TensorInterface> matmul(std::shared_ptr<TensorInterface> mat) const override;
			
 
				-		virtual idx_type offset() const override;
			
 
				+		virtual void neg_() override;
			
 
				+        virtual idx_type offset() const override;
			
 
				 		virtual layout_type order() const override;
			
 
				 		virtual platform_type platform() override;
			
 
				+        virtual void pow_(f32 exp) override;
			
 
				 		virtual i32 reduce_(std::function<i32(i32, i32)> f) const override;
			
 
				 		virtual TensorInterfacePtr reduce_dim(idx_type dim, std::function<i32(i32, i32)> f) const override;
			
 
				 		virtual void reshape_(const DimVector& dims) override;
			
@@ -80,6 +82,7 @@ namespace traph
 
				 		virtual std::shared_ptr<StorageBase<i32>> storage() const override;
			
 
				 		virtual DimVector stride() const override;
			
 
				 		virtual idx_type stride(idx_type i) const override;
			
 
				+        virtual void sub_(std::shared_ptr<TensorInterface> other) override;
			
 
				 		virtual TensorInterfacePtr sum() const override;
			
 
				 		virtual std::string to_string() const override;
			
 
				         virtual void transpose_(idx_type dim0, idx_type dim1) override;
			
--- a/traph/include/traph/tensor/long_tensor.h
+++ b/traph/include/traph/tensor/long_tensor.h
@@ -66,9 +66,11 @@ namespace traph
 
				 		virtual std::shared_ptr<TensorInterface> inverse() const override;
			
 
				 		virtual i64 item() const override;
			
 
				 		virtual std::shared_ptr<TensorInterface> matmul(std::shared_ptr<TensorInterface> mat) const override;
			
 
				+		virtual void neg_() override;
			
 
				 		virtual idx_type offset() const override;
			
 
				 		virtual layout_type order() const override;
			
 
				 		virtual platform_type platform() override;
			
 
				+        virtual void pow_(f32 exp) override;
			
 
				 		virtual i64 reduce_(std::function<i64(i64, i64)> f) const override;
			
 
				 		virtual TensorInterfacePtr reduce_dim(idx_type dim, std::function<i64(i64, i64)> f) const override;
			
 
				 		virtual void reshape_(const DimVector& dims) override;
			
@@ -80,6 +82,7 @@ namespace traph
 
				 		virtual std::shared_ptr<StorageBase<i64>> storage() const override;
			
 
				 		virtual DimVector stride() const override;
			
 
				 		virtual idx_type stride(idx_type i) const override;
			
 
				+        virtual void sub_(std::shared_ptr<TensorInterface> other) override;
			
 
				 		virtual TensorInterfacePtr sum() const override;
			
 
				 		virtual std::string to_string() const override;
			
 
				         virtual void transpose_(idx_type dim0, idx_type dim1) override;
			
--- a/traph/include/traph/tensor/short_tensor.h
+++ b/traph/include/traph/tensor/short_tensor.h
@@ -66,9 +66,11 @@ namespace traph
 
				 		virtual std::shared_ptr<TensorInterface> inverse() const override;
			
 
				 		virtual i16 item() const override;
			
 
				 		virtual std::shared_ptr<TensorInterface> matmul(std::shared_ptr<TensorInterface> mat) const override;
			
 
				-		virtual idx_type offset() const override;
			
 
				+		virtual void neg_() override;
			
 
				+        virtual idx_type offset() const override;
			
 
				 		virtual layout_type order() const override;
			
 
				 		virtual platform_type platform() override;
			
 
				+        virtual void pow_(f32 exp) override;
			
 
				 		virtual i16 reduce_(std::function<i16(i16, i16)> f) const override;
			
 
				 		virtual TensorInterfacePtr reduce_dim(idx_type dim, std::function<i16(i16, i16)> f) const override;
			
 
				 		virtual void reshape_(const DimVector& dims) override;
			
@@ -80,6 +82,7 @@ namespace traph
 
				 		virtual std::shared_ptr<StorageBase<i16>> storage() const override;
			
 
				 		virtual DimVector stride() const override;
			
 
				 		virtual idx_type stride(idx_type i) const override;
			
 
				+        virtual void sub_(std::shared_ptr<TensorInterface> other) override;
			
 
				 		virtual TensorInterfacePtr sum() const override;
			
 
				 		virtual std::string to_string() const override;
			
 
				         virtual void transpose_(idx_type dim0, idx_type dim1) override;
			
--- a/traph/include/traph/tensor/tensor.h
+++ b/traph/include/traph/tensor/tensor.h
@@ -68,9 +68,11 @@ namespace traph
 
				         virtual std::shared_ptr<TensorInterface> inverse() const override;
			
 
				         virtual T item() const override;
			
 
				         virtual std::shared_ptr<TensorInterface> matmul(std::shared_ptr<TensorInterface> mat) const override;
			
 
				-		virtual idx_type offset() const override;
			
 
				+		virtual void neg_() override;
			
 
				+        virtual idx_type offset() const override;
			
 
				 		virtual layout_type order() const override;
			
 
				         virtual platform_type platform() override;
			
 
				+        virtual void pow_(f32 exp) override;
			
 
				         virtual T reduce_(std::function<T(T,T)> f) const override;
			
 
				         virtual TensorInterfacePtr reduce_dim(idx_type dim, std::function<T(T,T)> f) const override;
			
 
				         virtual void reshape_(const DimVector& dims) override;
			
@@ -82,6 +84,7 @@ namespace traph
 
				         virtual std::shared_ptr<StorageBase<T>> storage() const override;
			
 
				 		virtual DimVector stride() const override;
			
 
				 		virtual idx_type stride(idx_type i) const override;
			
 
				+        virtual void sub_(std::shared_ptr<TensorInterface> other) override;
			
 
				         virtual TensorInterfacePtr sum() const override;
			
 
				         virtual std::string to_string() const override;
			
 
				         virtual void transpose_(idx_type dim0, idx_type dim1) override;
			
--- a/traph/source/tensor/byte_tensor.cpp
+++ b/traph/source/tensor/byte_tensor.cpp
@@ -257,12 +257,23 @@ namespace traph
 
				 		return matmul_impl(*this, *right_matrix);
			
 
				 	}
			
 
				 
			
 
				+    void Tensor<u8>::neg_()
			
 
				+    {
			
 
				+        apply_([](u8 a)->u8 {return -a; });
			
 
				+    }
			
 
				+
			
 
				     idx_type Tensor<u8>::offset() const { return _offset; }
			
 
				 
			
 
				     layout_type Tensor<u8>::order() const { return _order; }
			
 
				 
			
 
				     platform_type Tensor<u8>::platform() { return platform_type::none; }
			
 
				 
			
 
				+    void Tensor<u8>::pow_(f32 exp)
			
 
				+    {
			
 
				+        std::int32_t exp_int = static_cast<std::int32_t>(exp);
			
 
				+        apply_([&exp_int](u8 a)->u8 {return static_cast<u8>(std::pow(a, exp_int)); });
			
 
				+    }
			
 
				+
			
 
				 	u8 Tensor<u8>::reduce_(std::function<u8(u8, u8)> f) const
			
 
				     {
			
 
				 		u8 result{};
			
@@ -362,6 +373,40 @@ namespace traph
 
				 		else
			
 
				 			throw std::runtime_error("Stride out of range");
			
 
				 	}
			
 
				+
			
 
				+    void Tensor<u8>::sub_(std::shared_ptr<TensorInterface> other)
			
 
				+    {
			
 
				+        Tensor<u8> * lhs = this;
			
 
				+		Tensor<u8> * rhs = dynamic_cast<Tensor<u8> *>(other.get());
			
 
				+		std::function<void(Tensor<u8> *, Tensor<u8> *, idx_type, idx_type,idx_type, idx_type)> sub_impl =
			
 
				+			[&](Tensor<u8> * lhs, Tensor<u8> * rhs, idx_type lhs_dim, idx_type rhs_dim, idx_type lhs_idx, idx_type rhs_idx) {
			
 
				+
			
 
				+			auto lhs_storage = std::dynamic_pointer_cast<TensorStorage<u8>>(lhs->storage())->data_ptr();
			
 
				+			auto rhs_storage = std::dynamic_pointer_cast<TensorStorage<u8>>(rhs->storage())->data_ptr();
			
 
				+
			
 
				+			if (lhs_dim < -(lhs->size().size()) && rhs_dim < -(rhs->size().size()))
			
 
				+			{
			
 
				+				lhs_storage[lhs_idx] -= rhs_storage[rhs_idx];
			
 
				+				return;
			
 
				+			}
			
 
				+
			
 
				+			idx_type lhs_shape_size = lhs_dim >= -(lhs->size().size())? lhs->size(lhs_dim) : 1;
			
 
				+			idx_type rhs_shape_size = rhs_dim >= -(rhs->size().size()) ? rhs->size(rhs_dim) : 1;
			
 
				+			idx_type max_shape_size = std::max(lhs_shape_size, rhs_shape_size);
			
 
				+
			
 
				+			for (idx_type i = 0; i < max_shape_size; ++i)
			
 
				+			{
			
 
				+				sub_impl(lhs, rhs, lhs_dim - 1, rhs_dim - 1, lhs_idx, rhs_idx);
			
 
				+
			
 
				+				if(lhs_shape_size > 1)
			
 
				+					lhs_idx += lhs->stride(lhs_dim);
			
 
				+				if (rhs_shape_size > 1)
			
 
				+					rhs_idx += rhs->stride(rhs_dim);
			
 
				+			}
			
 
				+		};
			
 
				+
			
 
				+		sub_impl(lhs, rhs, -1, -1, lhs->offset(), rhs->offset());
			
 
				+    }
			
 
				     
			
 
				     TensorInterfacePtr Tensor<u8>::sum() const
			
 
				     {
			
--- a/traph/source/tensor/char_tensor.cpp
+++ b/traph/source/tensor/char_tensor.cpp
@@ -257,12 +257,23 @@ namespace traph
 
				 		return matmul_impl(*this, *right_matrix);
			
 
				 	}
			
 
				 
			
 
				+    void Tensor<i8>::neg_()
			
 
				+    {
			
 
				+        apply_([](i8 a)->i8 {return -a; });
			
 
				+    }
			
 
				+
			
 
				     idx_type Tensor<i8>::offset() const { return _offset; }
			
 
				 
			
 
				     layout_type Tensor<i8>::order() const { return _order; }
			
 
				 
			
 
				     platform_type Tensor<i8>::platform() { return platform_type::none; }
			
 
				 
			
 
				+    void Tensor<i8>::pow_(f32 exp)
			
 
				+    {
			
 
				+        std::int32_t exp_int = static_cast<std::int32_t>(exp);
			
 
				+        apply_([&exp_int](i8 a)->i8 {return static_cast<i8>(std::pow(a, exp_int)); });
			
 
				+    }
			
 
				+
			
 
				 	i8 Tensor<i8>::reduce_(std::function<i8(i8, i8)> f) const
			
 
				     {
			
 
				 		i8 result{};
			
@@ -362,6 +373,40 @@ namespace traph
 
				 		else
			
 
				 			throw std::runtime_error("Stride out of range");
			
 
				 	}
			
 
				+
			
 
				+    void Tensor<i8>::sub_(std::shared_ptr<TensorInterface> other)
			
 
				+    {
			
 
				+        Tensor<i8> * lhs = this;
			
 
				+		Tensor<i8> * rhs = dynamic_cast<Tensor<i8> *>(other.get());
			
 
				+		std::function<void(Tensor<i8> *, Tensor<i8> *, idx_type, idx_type,idx_type, idx_type)> sub_impl =
			
 
				+			[&](Tensor<i8> * lhs, Tensor<i8> * rhs, idx_type lhs_dim, idx_type rhs_dim, idx_type lhs_idx, idx_type rhs_idx) {
			
 
				+
			
 
				+			auto lhs_storage = std::dynamic_pointer_cast<TensorStorage<i8>>(lhs->storage())->data_ptr();
			
 
				+			auto rhs_storage = std::dynamic_pointer_cast<TensorStorage<i8>>(rhs->storage())->data_ptr();
			
 
				+
			
 
				+			if (lhs_dim < -(lhs->size().size()) && rhs_dim < -(rhs->size().size()))
			
 
				+			{
			
 
				+				lhs_storage[lhs_idx] -= rhs_storage[rhs_idx];
			
 
				+				return;
			
 
				+			}
			
 
				+
			
 
				+			idx_type lhs_shape_size = lhs_dim >= -(lhs->size().size())? lhs->size(lhs_dim) : 1;
			
 
				+			idx_type rhs_shape_size = rhs_dim >= -(rhs->size().size()) ? rhs->size(rhs_dim) : 1;
			
 
				+			idx_type max_shape_size = std::max(lhs_shape_size, rhs_shape_size);
			
 
				+
			
 
				+			for (idx_type i = 0; i < max_shape_size; ++i)
			
 
				+			{
			
 
				+				sub_impl(lhs, rhs, lhs_dim - 1, rhs_dim - 1, lhs_idx, rhs_idx);
			
 
				+
			
 
				+				if(lhs_shape_size > 1)
			
 
				+					lhs_idx += lhs->stride(lhs_dim);
			
 
				+				if (rhs_shape_size > 1)
			
 
				+					rhs_idx += rhs->stride(rhs_dim);
			
 
				+			}
			
 
				+		};
			
 
				+
			
 
				+		sub_impl(lhs, rhs, -1, -1, lhs->offset(), rhs->offset());
			
 
				+    }
			
 
				     
			
 
				     TensorInterfacePtr Tensor<i8>::sum() const
			
 
				     {
			
--- a/traph/source/tensor/double_tensor.cpp
+++ b/traph/source/tensor/double_tensor.cpp
@@ -257,12 +257,22 @@ namespace traph
 
				 		return matmul_impl(*this, *right_matrix);
			
 
				 	}
			
 
				 
			
 
				+    void Tensor<f64>::neg_()
			
 
				+    {
			
 
				+        apply_([](f64 a)->f64 {return -a; });
			
 
				+    }
			
 
				+
			
 
				     idx_type Tensor<f64>::offset() const { return _offset; }
			
 
				 
			
 
				     layout_type Tensor<f64>::order() const { return _order; }
			
 
				 
			
 
				     platform_type Tensor<f64>::platform() { return platform_type::none; }
			
 
				 
			
 
				+    void Tensor<f64>::pow_(f32 exp)
			
 
				+    {
			
 
				+        apply_([&exp](f64 a)->f64 {return std::pow(a, exp); });
			
 
				+    }
			
 
				+
			
 
				 	f64 Tensor<f64>::reduce_(std::function<f64(f64, f64)> f) const
			
 
				     {
			
 
				 		f64 result{};
			
@@ -362,6 +372,40 @@ namespace traph
 
				 		else
			
 
				 			throw std::runtime_error("Stride out of range");
			
 
				 	}
			
 
				+
			
 
				+    void Tensor<f64>::sub_(std::shared_ptr<TensorInterface> other)
			
 
				+    {
			
 
				+        Tensor<f64> * lhs = this;
			
 
				+		Tensor<f64> * rhs = dynamic_cast<Tensor<f64> *>(other.get());
			
 
				+		std::function<void(Tensor<f64> *, Tensor<f64> *, idx_type, idx_type,idx_type, idx_type)> sub_impl =
			
 
				+			[&](Tensor<f64> * lhs, Tensor<f64> * rhs, idx_type lhs_dim, idx_type rhs_dim, idx_type lhs_idx, idx_type rhs_idx) {
			
 
				+
			
 
				+			auto lhs_storage = std::dynamic_pointer_cast<TensorStorage<f64>>(lhs->storage())->data_ptr();
			
 
				+			auto rhs_storage = std::dynamic_pointer_cast<TensorStorage<f64>>(rhs->storage())->data_ptr();
			
 
				+
			
 
				+			if (lhs_dim < -(lhs->size().size()) && rhs_dim < -(rhs->size().size()))
			
 
				+			{
			
 
				+				lhs_storage[lhs_idx] -= rhs_storage[rhs_idx];
			
 
				+				return;
			
 
				+			}
			
 
				+
			
 
				+			idx_type lhs_shape_size = lhs_dim >= -(lhs->size().size())? lhs->size(lhs_dim) : 1;
			
 
				+			idx_type rhs_shape_size = rhs_dim >= -(rhs->size().size()) ? rhs->size(rhs_dim) : 1;
			
 
				+			idx_type max_shape_size = std::max(lhs_shape_size, rhs_shape_size);
			
 
				+
			
 
				+			for (idx_type i = 0; i < max_shape_size; ++i)
			
 
				+			{
			
 
				+				sub_impl(lhs, rhs, lhs_dim - 1, rhs_dim - 1, lhs_idx, rhs_idx);
			
 
				+
			
 
				+				if(lhs_shape_size > 1)
			
 
				+					lhs_idx += lhs->stride(lhs_dim);
			
 
				+				if (rhs_shape_size > 1)
			
 
				+					rhs_idx += rhs->stride(rhs_dim);
			
 
				+			}
			
 
				+		};
			
 
				+
			
 
				+		sub_impl(lhs, rhs, -1, -1, lhs->offset(), rhs->offset());
			
 
				+    }
			
 
				     
			
 
				     TensorInterfacePtr Tensor<f64>::sum() const
			
 
				     {
			
--- a/traph/source/tensor/float_tensor.cpp
+++ b/traph/source/tensor/float_tensor.cpp
@@ -258,12 +258,22 @@ namespace traph
 
				 		return matmul_impl(*this, *right_matrix);
			
 
				 	}
			
 
				 
			
 
				+    void Tensor<f32>::neg_()
			
 
				+    {
			
 
				+        apply_([](f32 a)->f32 {return -a; });
			
 
				+    }
			
 
				+
			
 
				     idx_type Tensor<f32>::offset() const { return _offset; }
			
 
				 
			
 
				     layout_type Tensor<f32>::order() const { return _order; }
			
 
				 
			
 
				     platform_type Tensor<f32>::platform() { return platform_type::none; }
			
 
				 
			
 
				+    void Tensor<f32>::pow_(f32 exp)
			
 
				+    {
			
 
				+        apply_([&exp](f32 a)->f32 {return std::pow(a, exp); });
			
 
				+    }
			
 
				+
			
 
				 	f32 Tensor<f32>::reduce_(std::function<f32(f32, f32)> f) const
			
 
				     {
			
 
				 		f32 result{};
			
@@ -363,6 +373,40 @@ namespace traph
 
				 		else
			
 
				 			throw std::runtime_error("Stride out of range");
			
 
				 	}
			
 
				+
			
 
				+    void Tensor<f32>::sub_(std::shared_ptr<TensorInterface> other)
			
 
				+    {
			
 
				+        Tensor<f32> * lhs = this;
			
 
				+		Tensor<f32> * rhs = dynamic_cast<Tensor<f32> *>(other.get());
			
 
				+		std::function<void(Tensor<f32> *, Tensor<f32> *, idx_type, idx_type,idx_type, idx_type)> sub_impl =
			
 
				+			[&](Tensor<f32> * lhs, Tensor<f32> * rhs, idx_type lhs_dim, idx_type rhs_dim, idx_type lhs_idx, idx_type rhs_idx) {
			
 
				+
			
 
				+			auto lhs_storage = std::dynamic_pointer_cast<TensorStorage<f32>>(lhs->storage())->data_ptr();
			
 
				+			auto rhs_storage = std::dynamic_pointer_cast<TensorStorage<f32>>(rhs->storage())->data_ptr();
			
 
				+
			
 
				+			if (lhs_dim < -(lhs->size().size()) && rhs_dim < -(rhs->size().size()))
			
 
				+			{
			
 
				+				lhs_storage[lhs_idx] -= rhs_storage[rhs_idx];
			
 
				+				return;
			
 
				+			}
			
 
				+
			
 
				+			idx_type lhs_shape_size = lhs_dim >= -(lhs->size().size())? lhs->size(lhs_dim) : 1;
			
 
				+			idx_type rhs_shape_size = rhs_dim >= -(rhs->size().size()) ? rhs->size(rhs_dim) : 1;
			
 
				+			idx_type max_shape_size = std::max(lhs_shape_size, rhs_shape_size);
			
 
				+
			
 
				+			for (idx_type i = 0; i < max_shape_size; ++i)
			
 
				+			{
			
 
				+				sub_impl(lhs, rhs, lhs_dim - 1, rhs_dim - 1, lhs_idx, rhs_idx);
			
 
				+
			
 
				+				if(lhs_shape_size > 1)
			
 
				+					lhs_idx += lhs->stride(lhs_dim);
			
 
				+				if (rhs_shape_size > 1)
			
 
				+					rhs_idx += rhs->stride(rhs_dim);
			
 
				+			}
			
 
				+		};
			
 
				+
			
 
				+		sub_impl(lhs, rhs, -1, -1, lhs->offset(), rhs->offset());
			
 
				+    }
			
 
				     
			
 
				     TensorInterfacePtr Tensor<f32>::sum() const
			
 
				     {
			
--- a/traph/source/tensor/int_tensor.cpp
+++ b/traph/source/tensor/int_tensor.cpp
@@ -257,12 +257,23 @@ namespace traph
 
				 		return matmul_impl(*this, *right_matrix);
			
 
				 	}
			
 
				 
			
 
				+    void Tensor<i32>::neg_()
			
 
				+    {
			
 
				+        apply_([](i32 a)->i32 {return -a; });
			
 
				+    }
			
 
				+
			
 
				     idx_type Tensor<i32>::offset() const { return _offset; }
			
 
				 
			
 
				     layout_type Tensor<i32>::order() const { return _order; }
			
 
				 
			
 
				     platform_type Tensor<i32>::platform() { return platform_type::none; }
			
 
				 
			
 
				+    void Tensor<i32>::pow_(f32 exp)
			
 
				+    {
			
 
				+        std::int32_t exp_int = static_cast<std::int32_t>(exp);
			
 
				+        apply_([&exp_int](i32 a)->i32 {return static_cast<i32>(std::pow(a, exp_int)); });
			
 
				+    }
			
 
				+
			
 
				 	i32 Tensor<i32>::reduce_(std::function<i32(i32, i32)> f) const
			
 
				     {
			
 
				 		i32 result{};
			
@@ -362,6 +373,40 @@ namespace traph
 
				 		else
			
 
				 			throw std::runtime_error("Stride out of range");
			
 
				 	}
			
 
				+
			
 
				+    void Tensor<i32>::sub_(std::shared_ptr<TensorInterface> other)
			
 
				+    {
			
 
				+        Tensor<i32> * lhs = this;
			
 
				+		Tensor<i32> * rhs = dynamic_cast<Tensor<i32> *>(other.get());
			
 
				+		std::function<void(Tensor<i32> *, Tensor<i32> *, idx_type, idx_type,idx_type, idx_type)> sub_impl =
			
 
				+			[&](Tensor<i32> * lhs, Tensor<i32> * rhs, idx_type lhs_dim, idx_type rhs_dim, idx_type lhs_idx, idx_type rhs_idx) {
			
 
				+
			
 
				+			auto lhs_storage = std::dynamic_pointer_cast<TensorStorage<i32>>(lhs->storage())->data_ptr();
			
 
				+			auto rhs_storage = std::dynamic_pointer_cast<TensorStorage<i32>>(rhs->storage())->data_ptr();
			
 
				+
			
 
				+			if (lhs_dim < -(lhs->size().size()) && rhs_dim < -(rhs->size().size()))
			
 
				+			{
			
 
				+				lhs_storage[lhs_idx] -= rhs_storage[rhs_idx];
			
 
				+				return;
			
 
				+			}
			
 
				+
			
 
				+			idx_type lhs_shape_size = lhs_dim >= -(lhs->size().size())? lhs->size(lhs_dim) : 1;
			
 
				+			idx_type rhs_shape_size = rhs_dim >= -(rhs->size().size()) ? rhs->size(rhs_dim) : 1;
			
 
				+			idx_type max_shape_size = std::max(lhs_shape_size, rhs_shape_size);
			
 
				+
			
 
				+			for (idx_type i = 0; i < max_shape_size; ++i)
			
 
				+			{
			
 
				+				sub_impl(lhs, rhs, lhs_dim - 1, rhs_dim - 1, lhs_idx, rhs_idx);
			
 
				+
			
 
				+				if(lhs_shape_size > 1)
			
 
				+					lhs_idx += lhs->stride(lhs_dim);
			
 
				+				if (rhs_shape_size > 1)
			
 
				+					rhs_idx += rhs->stride(rhs_dim);
			
 
				+			}
			
 
				+		};
			
 
				+
			
 
				+		sub_impl(lhs, rhs, -1, -1, lhs->offset(), rhs->offset());
			
 
				+    }
			
 
				     
			
 
				     TensorInterfacePtr Tensor<i32>::sum() const
			
 
				     {
			
--- a/traph/source/tensor/long_tensor.cpp
+++ b/traph/source/tensor/long_tensor.cpp
@@ -257,12 +257,23 @@ namespace traph
 
				 		return matmul_impl(*this, *right_matrix);
			
 
				 	}
			
 
				 
			
 
				+    void Tensor<i64>::neg_()
			
 
				+    {
			
 
				+        apply_([](i64 a)->i64 {return -a; });
			
 
				+    }
			
 
				+
			
 
				     idx_type Tensor<i64>::offset() const { return _offset; }
			
 
				 
			
 
				     layout_type Tensor<i64>::order() const { return _order; }
			
 
				 
			
 
				     platform_type Tensor<i64>::platform() { return platform_type::none; }
			
 
				 
			
 
				+    void Tensor<i64>::pow_(f32 exp)
			
 
				+    {
			
 
				+        std::int32_t exp_int = static_cast<std::int32_t>(exp);
			
 
				+        apply_([&exp_int](i64 a)->i64 {return static_cast<i64>(std::pow(a, exp_int)); });
			
 
				+    }
			
 
				+
			
 
				 	i64 Tensor<i64>::reduce_(std::function<i64(i64, i64)> f) const
			
 
				     {
			
 
				 		i64 result{};
			
@@ -362,6 +373,40 @@ namespace traph
 
				 		else
			
 
				 			throw std::runtime_error("Stride out of range");
			
 
				 	}
			
 
				+
			
 
				+    void Tensor<i64>::sub_(std::shared_ptr<TensorInterface> other)
			
 
				+    {
			
 
				+        Tensor<i64> * lhs = this;
			
 
				+		Tensor<i64> * rhs = dynamic_cast<Tensor<i64> *>(other.get());
			
 
				+		std::function<void(Tensor<i64> *, Tensor<i64> *, idx_type, idx_type,idx_type, idx_type)> sub_impl =
			
 
				+			[&](Tensor<i64> * lhs, Tensor<i64> * rhs, idx_type lhs_dim, idx_type rhs_dim, idx_type lhs_idx, idx_type rhs_idx) {
			
 
				+
			
 
				+			auto lhs_storage = std::dynamic_pointer_cast<TensorStorage<i64>>(lhs->storage())->data_ptr();
			
 
				+			auto rhs_storage = std::dynamic_pointer_cast<TensorStorage<i64>>(rhs->storage())->data_ptr();
			
 
				+
			
 
				+			if (lhs_dim < -(lhs->size().size()) && rhs_dim < -(rhs->size().size()))
			
 
				+			{
			
 
				+				lhs_storage[lhs_idx] -= rhs_storage[rhs_idx];
			
 
				+				return;
			
 
				+			}
			
 
				+
			
 
				+			idx_type lhs_shape_size = lhs_dim >= -(lhs->size().size())? lhs->size(lhs_dim) : 1;
			
 
				+			idx_type rhs_shape_size = rhs_dim >= -(rhs->size().size()) ? rhs->size(rhs_dim) : 1;
			
 
				+			idx_type max_shape_size = std::max(lhs_shape_size, rhs_shape_size);
			
 
				+
			
 
				+			for (idx_type i = 0; i < max_shape_size; ++i)
			
 
				+			{
			
 
				+				sub_impl(lhs, rhs, lhs_dim - 1, rhs_dim - 1, lhs_idx, rhs_idx);
			
 
				+
			
 
				+				if(lhs_shape_size > 1)
			
 
				+					lhs_idx += lhs->stride(lhs_dim);
			
 
				+				if (rhs_shape_size > 1)
			
 
				+					rhs_idx += rhs->stride(rhs_dim);
			
 
				+			}
			
 
				+		};
			
 
				+
			
 
				+		sub_impl(lhs, rhs, -1, -1, lhs->offset(), rhs->offset());
			
 
				+    }
			
 
				     
			
 
				     TensorInterfacePtr Tensor<i64>::sum() const
			
 
				     {
			
--- a/traph/source/tensor/short_tensor.cpp
+++ b/traph/source/tensor/short_tensor.cpp
@@ -257,12 +257,23 @@ namespace traph
 
				 		return matmul_impl(*this, *right_matrix);
			
 
				 	}
			
 
				 
			
 
				+    void Tensor<i16>::neg_()
			
 
				+    {
			
 
				+        apply_([](i16 a)->i16 {return -a; });
			
 
				+    }
			
 
				+
			
 
				     idx_type Tensor<i16>::offset() const { return _offset; }
			
 
				 
			
 
				     layout_type Tensor<i16>::order() const { return _order; }
			
 
				 
			
 
				     platform_type Tensor<i16>::platform() { return platform_type::none; }
			
 
				 
			
 
				+    void Tensor<i16>::pow_(f32 exp)
			
 
				+    {
			
 
				+        std::int32_t exp_int = static_cast<std::int32_t>(exp);
			
 
				+        apply_([&exp_int](i16 a)->i16 {return static_cast<i16>(std::pow(a, exp_int)); });
			
 
				+    }
			
 
				+
			
 
				 	i16 Tensor<i16>::reduce_(std::function<i16(i16, i16)> f) const
			
 
				     {
			
 
				 		i16 result{};
			
@@ -362,6 +373,40 @@ namespace traph
 
				 		else
			
 
				 			throw std::runtime_error("Stride out of range");
			
 
				 	}
			
 
				+
			
 
				+    void Tensor<i16>::sub_(std::shared_ptr<TensorInterface> other)
			
 
				+    {
			
 
				+        Tensor<i16> * lhs = this;
			
 
				+		Tensor<i16> * rhs = dynamic_cast<Tensor<i16> *>(other.get());
			
 
				+		std::function<void(Tensor<i16> *, Tensor<i16> *, idx_type, idx_type,idx_type, idx_type)> sub_impl =
			
 
				+			[&](Tensor<i16> * lhs, Tensor<i16> * rhs, idx_type lhs_dim, idx_type rhs_dim, idx_type lhs_idx, idx_type rhs_idx) {
			
 
				+
			
 
				+			auto lhs_storage = std::dynamic_pointer_cast<TensorStorage<i16>>(lhs->storage())->data_ptr();
			
 
				+			auto rhs_storage = std::dynamic_pointer_cast<TensorStorage<i16>>(rhs->storage())->data_ptr();
			
 
				+
			
 
				+			if (lhs_dim < -(lhs->size().size()) && rhs_dim < -(rhs->size().size()))
			
 
				+			{
			
 
				+				lhs_storage[lhs_idx] -= rhs_storage[rhs_idx];
			
 
				+				return;
			
 
				+			}
			
 
				+
			
 
				+			idx_type lhs_shape_size = lhs_dim >= -(lhs->size().size())? lhs->size(lhs_dim) : 1;
			
 
				+			idx_type rhs_shape_size = rhs_dim >= -(rhs->size().size()) ? rhs->size(rhs_dim) : 1;
			
 
				+			idx_type max_shape_size = std::max(lhs_shape_size, rhs_shape_size);
			
 
				+
			
 
				+			for (idx_type i = 0; i < max_shape_size; ++i)
			
 
				+			{
			
 
				+				sub_impl(lhs, rhs, lhs_dim - 1, rhs_dim - 1, lhs_idx, rhs_idx);
			
 
				+
			
 
				+				if(lhs_shape_size > 1)
			
 
				+					lhs_idx += lhs->stride(lhs_dim);
			
 
				+				if (rhs_shape_size > 1)
			
 
				+					rhs_idx += rhs->stride(rhs_dim);
			
 
				+			}
			
 
				+		};
			
 
				+
			
 
				+		sub_impl(lhs, rhs, -1, -1, lhs->offset(), rhs->offset());
			
 
				+    }
			
 
				     
			
 
				     TensorInterfacePtr Tensor<i16>::sum() const
			
 
				     {
			
--- a/traph/source/tensor/tensor.cpp
+++ b/traph/source/tensor/tensor.cpp
@@ -114,6 +114,13 @@ namespace traph
 
				     {
			
 
				 		throw std::runtime_error("No implement");
			
 
				     }
			
 
				+
			
 
				+    template<typename T>
			
 
				+    void Tensor<T>::neg_()
			
 
				+    {
			
 
				+        throw std::runtime_error("No implement");
			
 
				+    }
			
 
				+
			
 
				     template<typename T>
			
 
				     idx_type Tensor<T>::offset() const { throw std::runtime_error("No implement"); }
			
 
				     template<typename T>
			
@@ -121,6 +128,11 @@ namespace traph
 
				     template<typename T>
			
 
				     platform_type Tensor<T>::platform() { throw std::runtime_error("No implement"); }
			
 
				     template<typename T>
			
 
				+    void Tensor<T>::pow_(f32 exp)
			
 
				+    {
			
 
				+        throw std::runtime_error("No implement");
			
 
				+    }
			
 
				+    template<typename T>
			
 
				     T Tensor<T>::reduce_(std::function<T(T,T)> f) const
			
 
				     {
			
 
				         throw std::runtime_error("No implement");
			
@@ -166,6 +178,13 @@ namespace traph
 
				 	{
			
 
				 		throw std::runtime_error("No implement");
			
 
				 	}
			
 
				+
			
 
				+    template<typename T>
			
 
				+    void Tensor<T>::sub_(std::shared_ptr<TensorInterface> other)
			
 
				+    {
			
 
				+        throw std::runtime_error("No implement");
			
 
				+    }
			
 
				+
			
 
				     template<typename T>
			
 
				     TensorInterfacePtr Tensor<T>::sum() const
			
 
				     {
			
--- a/traph/source/test/main.cpp
+++ b/traph/source/test/main.cpp
@@ -1,6 +1,7 @@
 
				 #include <algorithm>
			
 
				 
			
 
				-#include <traph/nn/module.h>
			
 
				+#include <traph/nn/layers/linear.h>
			
 
				+#include <traph/nn/layers/loss.h>
			
 
				 
			
 
				 #include <iostream>
			
 
				 
			
@@ -57,10 +58,17 @@ int main()
 
				 	*/
			
 
				 
			
 
				 	int batch_size = 16;
			
 
				-	auto a = traph::ones<traph::f32>({ batch_size,4 });
			
 
				+	auto x = traph::ones<traph::f32>({ batch_size,4 });
			
 
				+	auto y = traph::zeros<traph::f32>({ batch_size,2 });
			
 
				 
			
 
				-	traph::LinearModule linear_model(4, 2, false);
			
 
				-	auto out = linear_model.forward(a);
			
 
				+	traph::Linear linear_model(4, 2, false);
			
 
				+	traph::MSELoss loss;
			
 
				+
			
 
				+	auto out = linear_model.forward(x);
			
 
				+	auto result = loss.forward(out, y);
			
 
				+
			
 
				+	result->backward();
			
 
				+	std::cout << result->data()->to_string();
			
 
				 
			
 
				     return 0;
			
 
				 }