JasonWang před 6 roky
rodič
revize
fe8b410046

+ 3 - 3
traph/include/traph/core/operation.h

@@ -100,9 +100,9 @@ namespace traph
 		virtual std::vector<TensorBasePtr<f32>> backward(TensorBasePtr<f32> output_grad) override
 		{
 			auto saved_tensors = context.get_saved_tensors();
-			assert(saved_tensors.size() == 1);
-			std::shared_ptr<TensorBase<f32>> left_out = std::dynamic_pointer_cast<TensorBase<f32>>(output_grad->matmul(saved_tensors[0]->inverse()));
-			std::shared_ptr<TensorBase<f32>> right_out = std::dynamic_pointer_cast<TensorBase<f32>>(saved_tensors[0]->inverse()->matmul(output_grad));
+			assert(saved_tensors.size() == 2);
+			std::shared_ptr<TensorBase<f32>> left_out = std::dynamic_pointer_cast<TensorBase<f32>>(output_grad->matmul(saved_tensors[1]->transpose(0, 1)));
+			std::shared_ptr<TensorBase<f32>> right_out = std::dynamic_pointer_cast<TensorBase<f32>>(saved_tensors[0]->transpose(0, 1)->matmul(output_grad));
 			return { left_out, right_out };
 		}
 	};

+ 4 - 0
traph/include/traph/core/tensor.h

@@ -48,6 +48,8 @@ namespace traph
 		virtual idx_type stride(idx_type i) const = 0;
         virtual shared_pointer sum() const = 0;
         virtual std::string to_string() const = 0;
+        virtual void transpose_(idx_type dim0, idx_type dim1) = 0;
+        virtual std::shared_ptr<TensorInterface> transpose(idx_type dim0, idx_type dim1) = 0;
     };
 
     using TensorInterfacePtr = std::shared_ptr<TensorInterface>;
@@ -97,6 +99,8 @@ namespace traph
 		virtual idx_type stride(idx_type i) const = 0;
         virtual TensorInterfacePtr sum() const = 0;
         virtual std::string to_string() const = 0;
+        virtual void transpose_(idx_type dim0, idx_type dim1) = 0;
+        virtual std::shared_ptr<TensorInterface> transpose(idx_type dim0, idx_type dim1) = 0;
     };
 
     using DoubleTensorBase = TensorBase<f64>;

+ 26 - 0
traph/include/traph/nn/arithmetic.h

@@ -66,6 +66,32 @@ namespace traph
 		return result;
 	}
 
+	template<class T>
+	VariablePtr<T> matmul(VariablePtr<T> left, VariablePtr<T> right)
+	{
+		VariablePtr<T> result(new Variable<T>);
+		std::shared_ptr<MatmulOp> op(new MatmulOp);
+		if (left->_requires_grad || right->_requires_grad)
+		{
+			std::vector<VariableInterfacePtr> result_inputs{ left, right };
+			result->_data = std::dynamic_pointer_cast<TensorBase<T>>(op->forward({ left->_data, right->_data }));
+			result->_grad = result->_data->create_grad();
+			result->_grad->fill_(0);
+			result->_requires_grad = true;
+			result->_leaf = false;
+			result->_grad_fn = op;
+			result->_inputs = result_inputs;
+		}
+		else
+		{
+			result->_data = std::dynamic_pointer_cast<TensorBase<T>>(op->forward({ left->_data, right->_data }));
+			result->_requires_grad = false;
+			result->_leaf = false;
+		}
+
+		return result;
+	}
+
 	
 	template<class T>
 	VariablePtr<T> select(VariablePtr<T> input, const SliceVector& slice)

+ 3 - 0
traph/include/traph/nn/variable.h

@@ -52,6 +52,9 @@ namespace traph
 		template<class T>
 		friend std::shared_ptr<Variable<T>> add(std::shared_ptr<Variable<T>> left, std::shared_ptr<Variable<T>> right);
 
+		template<class T>
+		friend std::shared_ptr<Variable<T>> matmul(std::shared_ptr<Variable<T>> left, std::shared_ptr<Variable<T>> right);
+
 		template<class T>
 		friend std::shared_ptr<Variable<T>> select(std::shared_ptr<Variable<T>> input, const SliceVector& slice);
 

+ 2 - 0
traph/include/traph/tensor/byte_tensor.h

@@ -82,6 +82,8 @@ namespace traph
 		virtual idx_type stride(idx_type i) const override;
 		virtual TensorInterfacePtr sum() const override;
 		virtual std::string to_string() const override;
+        virtual void transpose_(idx_type dim0, idx_type dim1) override;
+        virtual std::shared_ptr<TensorInterface> transpose(idx_type dim0, idx_type dim1) override;
     };
 
     using ByteTensor = Tensor<u8>;

+ 2 - 0
traph/include/traph/tensor/char_tensor.h

@@ -82,6 +82,8 @@ namespace traph
 		virtual idx_type stride(idx_type i) const override;
 		virtual TensorInterfacePtr sum() const override;
 		virtual std::string to_string() const override;
+        virtual void transpose_(idx_type dim0, idx_type dim1) override;
+        virtual std::shared_ptr<TensorInterface> transpose(idx_type dim0, idx_type dim1) override;
     };
 
     using CharTensor = Tensor<i8>;

+ 2 - 0
traph/include/traph/tensor/double_tensor.h

@@ -82,6 +82,8 @@ namespace traph
 		virtual idx_type stride(idx_type i) const override;
 		virtual TensorInterfacePtr sum() const override;
 		virtual std::string to_string() const override;
+        virtual void transpose_(idx_type dim0, idx_type dim1) override;
+        virtual std::shared_ptr<TensorInterface> transpose(idx_type dim0, idx_type dim1) override;
     };
 
     using DoubleTensor = Tensor<f64>;

+ 2 - 0
traph/include/traph/tensor/float_tensor.h

@@ -83,6 +83,8 @@ namespace traph
 		virtual idx_type stride(idx_type i) const override;
 		virtual TensorInterfacePtr sum() const override;
 		virtual std::string to_string() const override;
+        virtual void transpose_(idx_type dim0, idx_type dim1) override;
+        virtual std::shared_ptr<TensorInterface> transpose(idx_type dim0, idx_type dim1) override;
     };
 
     using FloatTensor = Tensor<f32>;

+ 2 - 0
traph/include/traph/tensor/int_tensor.h

@@ -82,6 +82,8 @@ namespace traph
 		virtual idx_type stride(idx_type i) const override;
 		virtual TensorInterfacePtr sum() const override;
 		virtual std::string to_string() const override;
+        virtual void transpose_(idx_type dim0, idx_type dim1) override;
+        virtual std::shared_ptr<TensorInterface> transpose(idx_type dim0, idx_type dim1) override;
     };
 
     using IntTensor = Tensor<i32>;

+ 2 - 0
traph/include/traph/tensor/long_tensor.h

@@ -82,6 +82,8 @@ namespace traph
 		virtual idx_type stride(idx_type i) const override;
 		virtual TensorInterfacePtr sum() const override;
 		virtual std::string to_string() const override;
+        virtual void transpose_(idx_type dim0, idx_type dim1) override;
+        virtual std::shared_ptr<TensorInterface> transpose(idx_type dim0, idx_type dim1) override;
     };
 
     using LongTensor = Tensor<i64>;

+ 2 - 0
traph/include/traph/tensor/short_tensor.h

@@ -82,6 +82,8 @@ namespace traph
 		virtual idx_type stride(idx_type i) const override;
 		virtual TensorInterfacePtr sum() const override;
 		virtual std::string to_string() const override;
+        virtual void transpose_(idx_type dim0, idx_type dim1) override;
+        virtual std::shared_ptr<TensorInterface> transpose(idx_type dim0, idx_type dim1) override;
     };
 
     using ShortTensor = Tensor<i16>;

+ 2 - 0
traph/include/traph/tensor/tensor.h

@@ -84,6 +84,8 @@ namespace traph
 		virtual idx_type stride(idx_type i) const override;
         virtual TensorInterfacePtr sum() const override;
         virtual std::string to_string() const override;
+        virtual void transpose_(idx_type dim0, idx_type dim1) override;
+        virtual std::shared_ptr<TensorInterface> transpose(idx_type dim0, idx_type dim1) override;
     };
 
 	template<typename T>

+ 29 - 8
traph/source/tensor/arithmetic.cpp

@@ -23,7 +23,10 @@ namespace traph
 		// check
 		matmul_check(a, b);
 		// result
-		std::shared_ptr<Tensor<u8>> result(new Tensor<u8>(a.size()[0], b.size()[1]));
+		DimVector dim;
+		dim.push_back(a.size()[0]);
+		dim.push_back(b.size()[1]);
+		std::shared_ptr<Tensor<u8>> result(new Tensor<u8>(dim));
 
 		// copy data
 		Eigen::Map<const Eigen::Matrix<u8, Eigen::Dynamic, Eigen::Dynamic>> eigen_a(a.data_ptr() + a.offset(), a.size()[0], a.size()[1]);
@@ -40,7 +43,10 @@ namespace traph
 		// check
 		matmul_check(a, b);
 		// result
-		std::shared_ptr<Tensor<i8>> result(new Tensor<i8>(a.size()[0], b.size()[1]));
+		DimVector dim;
+		dim.push_back(a.size()[0]);
+		dim.push_back(b.size()[1]);
+		std::shared_ptr<Tensor<i8>> result(new Tensor<i8>(dim));
 
 		// copy data
 		Eigen::Map<const Eigen::Matrix<i8, Eigen::Dynamic, Eigen::Dynamic>> eigen_a(a.data_ptr() + a.offset(), a.size()[0], a.size()[1]);
@@ -57,7 +63,10 @@ namespace traph
 		// check
 		matmul_check(a, b);
 		// result
-		std::shared_ptr<Tensor<i16>> result(new Tensor<i16>(a.size()[0], b.size()[1]));
+		DimVector dim;
+		dim.push_back(a.size()[0]);
+		dim.push_back(b.size()[1]);
+		std::shared_ptr<Tensor<i16>> result(new Tensor<i16>(dim));
 
 		// copy data
 		Eigen::Map<const Eigen::Matrix<i16, Eigen::Dynamic, Eigen::Dynamic>> eigen_a(a.data_ptr() + a.offset(), a.size()[0], a.size()[1]);
@@ -74,7 +83,10 @@ namespace traph
 		// check
 		matmul_check(a, b);
 		// result
-		std::shared_ptr<Tensor<i32>> result(new Tensor<i32>(a.size()[0], b.size()[1]));
+		DimVector dim;
+		dim.push_back(a.size()[0]);
+		dim.push_back(b.size()[1]);
+		std::shared_ptr<Tensor<i32>> result(new Tensor<i32>(dim));
 
 		// copy data
 		Eigen::Map<const Eigen::Matrix<i32, Eigen::Dynamic, Eigen::Dynamic>> eigen_a(a.data_ptr() + a.offset(), a.size()[0], a.size()[1]);
@@ -91,7 +103,10 @@ namespace traph
 		// check
 		matmul_check(a, b);
 		// result
-		std::shared_ptr<Tensor<i64>> result(new Tensor<i64>(a.size()[0], b.size()[1]));
+		DimVector dim;
+		dim.push_back(a.size()[0]);
+		dim.push_back(b.size()[1]);
+		std::shared_ptr<Tensor<i64>> result(new Tensor<i64>(dim));
 
 		// copy data
 		Eigen::Map<const Eigen::Matrix<i64, Eigen::Dynamic, Eigen::Dynamic>> eigen_a(a.data_ptr() + a.offset(), a.size()[0], a.size()[1]);
@@ -108,7 +123,10 @@ namespace traph
 		// check
 		matmul_check(a, b);
 		// result
-		std::shared_ptr<Tensor<f32>> result(new Tensor<f32>(a.size()[0], b.size()[1]));
+		DimVector dim;
+		dim.push_back(a.size()[0]);
+		dim.push_back(b.size()[1]);
+		std::shared_ptr<Tensor<f32>> result(new Tensor<f32>(dim));
 
 #ifdef TRAPH_BUILD_EIGEN
 		// copy data
@@ -144,7 +162,10 @@ namespace traph
 		// check
 		matmul_check(a, b);
 		// result
-		std::shared_ptr<Tensor<f64>> result(new Tensor<f64>(a.size()[0], b.size()[1]));
+		DimVector dim;
+		dim.push_back(a.size()[0]);
+		dim.push_back(b.size()[1]);
+		std::shared_ptr<Tensor<f64>> result(new Tensor<f64>(dim));
 
 #ifdef TRAPH_BUILD_EIGEN
 		// copy data
@@ -153,7 +174,7 @@ namespace traph
 
 		Eigen::Matrix<f64, Eigen::Dynamic, Eigen::Dynamic> eigen_c = eigen_a * eigen_b;
 		// copy to result
-		std::copy(eigen_c.data(), eigen_c.data() + a.size()[0] * b.size()[1], result.data_ptr());
+		std::copy(eigen_c.data(), eigen_c.data() + a.size()[0] * b.size()[1], result->data_ptr());
 #elif defined TRAPH_BUILD_MKL
 		CBLAS_LAYOUT a_layout = a.order() == layout_type::column_major ? CBLAS_LAYOUT::CblasColMajor : CBLAS_LAYOUT::CblasRowMajor;
 

+ 10 - 0
traph/source/tensor/byte_tensor.cpp

@@ -403,4 +403,14 @@ namespace traph
 		result += "[" + to_string_impl(*this, 0, offset()) + "]";
 		return result;
     }
+
+    void Tensor<u8>::transpose_(idx_type dim0, idx_type dim1)
+    {
+
+    }
+
+    std::shared_ptr<TensorInterface> Tensor<u8>::transpose(idx_type dim0, idx_type dim1)
+    {
+
+    }
 }

+ 10 - 0
traph/source/tensor/char_tensor.cpp

@@ -403,4 +403,14 @@ namespace traph
 		result += "[" + to_string_impl(*this, 0, offset()) + "]";
 		return result;
     }
+
+    void Tensor<i8>::transpose_(idx_type dim0, idx_type dim1)
+    {
+
+    }
+
+    std::shared_ptr<TensorInterface> Tensor<i8>::transpose(idx_type dim0, idx_type dim1)
+    {
+
+    }
 }

+ 10 - 0
traph/source/tensor/double_tensor.cpp

@@ -403,4 +403,14 @@ namespace traph
 		result += "[" + to_string_impl(*this, 0, offset()) + "]";
 		return result;
     }
+
+    void Tensor<f64>::transpose_(idx_type dim0, idx_type dim1)
+    {
+
+    }
+
+    std::shared_ptr<TensorInterface> Tensor<f64>::transpose(idx_type dim0, idx_type dim1)
+    {
+
+    }
 }

+ 10 - 0
traph/source/tensor/float_tensor.cpp

@@ -403,4 +403,14 @@ namespace traph
 		result += "[" + to_string_impl(*this, 0, offset()) + "]";
 		return result;
     }
+
+    void Tensor<f32>::transpose_(idx_type dim0, idx_type dim1)
+    {
+
+    }
+
+    std::shared_ptr<TensorInterface> Tensor<f32>::transpose(idx_type dim0, idx_type dim1)
+    {
+
+    }
 }

+ 10 - 0
traph/source/tensor/int_tensor.cpp

@@ -403,4 +403,14 @@ namespace traph
 		result += "[" + to_string_impl(*this, 0, offset()) + "]";
 		return result;
     }
+
+    void Tensor<i32>::transpose_(idx_type dim0, idx_type dim1)
+    {
+
+    }
+
+    std::shared_ptr<TensorInterface> Tensor<i32>::transpose(idx_type dim0, idx_type dim1)
+    {
+
+    }
 }

+ 10 - 0
traph/source/tensor/long_tensor.cpp

@@ -403,4 +403,14 @@ namespace traph
 		result += "[" + to_string_impl(*this, 0, offset()) + "]";
 		return result;
     }
+
+    void Tensor<i64>::transpose_(idx_type dim0, idx_type dim1)
+    {
+
+    }
+
+    std::shared_ptr<TensorInterface> Tensor<i64>::transpose(idx_type dim0, idx_type dim1)
+    {
+
+    }
 }

+ 10 - 0
traph/source/tensor/short_tensor.cpp

@@ -403,4 +403,14 @@ namespace traph
 		result += "[" + to_string_impl(*this, 0, offset()) + "]";
 		return result;
     }
+
+    void Tensor<i16>::transpose_(idx_type dim0, idx_type dim1)
+    {
+
+    }
+
+    std::shared_ptr<TensorInterface> Tensor<i16>::transpose(idx_type dim0, idx_type dim1)
+    {
+
+    }
 }

+ 12 - 0
traph/source/tensor/tensor.cpp

@@ -176,4 +176,16 @@ namespace traph
     {
         throw std::runtime_error("No implement");
     }
+
+    template<typename T>
+    void transpose_(idx_type dim0, idx_type dim1)
+    {
+        throw std::runtime_error("No implement");
+    }
+
+    template<typename T>
+    std::shared_ptr<TensorInterface> transpose(idx_type dim0, idx_type dim1)
+    {
+        throw std::runtime_error("No implement");
+    }
 }

+ 7 - 5
traph/source/test/main.cpp

@@ -49,10 +49,12 @@ int main()
 	std::cout << a->grad()->to_string();
 */
 	auto a = traph::ones<traph::f32>({ 2,3 });
-	traph::SliceVector slice;
-	slice.push_back(traph::Slice(0, 1, 1));
-	slice.push_back(traph::Slice(0, 1, 2));
-	auto b = traph::select(a, slice);
-	std::cout << b->data()->to_string();
+	a->requires_grad_(true);
+	auto b = traph::ones<traph::f32>({ 3,2 });
+	b->requires_grad_(true);
+	auto c = traph::matmul(a, b);
+	auto d = traph::sum(c);
+	d->backward();
+	std::cout << a->grad()->to_string();
     return 0;
 }