Ver Fonte

linear model and Optimizer

JasonWang há 6 anos atrás
pai
commit
308b53a4fe

+ 1 - 0
traph/include/traph/core/tensor.h

@@ -37,6 +37,7 @@ namespace traph
         virtual std::shared_ptr<TensorInterface> matmul(std::shared_ptr<TensorInterface> mat) const = 0;
         virtual idx_type offset() const = 0;
 		virtual layout_type order() const = 0;
+        // virtual std::shared_ptr<TensorInterface> permute(const DimVector& dims) const = 0;
         virtual platform_type platform() = 0;
         virtual void reshape_(const DimVector& dims) = 0;
         virtual void resize_(const DimVector& dims) = 0;

+ 10 - 0
traph/include/traph/core/variable.h

@@ -20,12 +20,17 @@ namespace traph
     public:
         virtual void backward() = 0;
         virtual TensorInterfacePtr data() = 0;
+        virtual void data_(TensorInterfacePtr d) = 0;
         virtual device_id device() = 0;
         virtual TensorBasePtr<f32> grad() = 0;
+        virtual void grad_(TensorInterfacePtr g) = 0;
         virtual std::shared_ptr<OpBase> grad_fn() = 0;
+        virtual void grad_fn_(std::shared_ptr<OpBase> fn) = 0;
         virtual std::vector<VariableInterfacePtr>& inputs() = 0;
+        virtual void inputs_(const std::vector<VariableInterfacePtr>& i) = 0;
         virtual bool is_leaf() const = 0;
 		virtual void leaf_(bool state) = 0;
+        virtual std::shared_ptr<VariableInterface> new_empty(const DimVector& size, bool requires_grad) const = 0;
         virtual idx_type offset() const = 0;
 		virtual layout_type order() const = 0;
         virtual platform_type platform() = 0;
@@ -59,14 +64,19 @@ namespace traph
     public:
         virtual void backward() = 0;
         virtual TensorInterfacePtr data() = 0;
+        virtual void data_(TensorInterfacePtr d) = 0;
         virtual device_id device() = 0;
         virtual void fill_(T value) = 0;
         virtual TensorBasePtr<f32> grad() = 0;
+        virtual void grad_(TensorInterfacePtr g) = 0;
         virtual std::shared_ptr<OpBase> grad_fn() = 0;
+        virtual void grad_fn_(std::shared_ptr<OpBase> fn) = 0;
         virtual std::vector<VariableInterfacePtr>& inputs() = 0;
+        virtual void inputs_(const std::vector<VariableInterfacePtr>& i) = 0;
         virtual bool is_leaf() const = 0;
         virtual T item() const = 0;
 		virtual void leaf_(bool state) = 0;
+        virtual std::shared_ptr<VariableInterface> new_empty(const DimVector& size, bool requires_grad) const = 0;
         virtual idx_type offset() const = 0;
 		virtual layout_type order() const = 0;
         virtual platform_type platform() = 0;

+ 129 - 72
traph/include/traph/nn/function.h

@@ -16,162 +16,219 @@
 namespace traph
 {
 	// creation function
-	template<class T>
-	VariablePtr<T> zeros(std::initializer_list<idx_type> l, bool requires_grad = false)
+	template<typename T>
+	VariableInterfacePtr empty(std::initializer_list<idx_type> l, bool requires_grad = false)
 	{
 		DimVector dim;
 		for (auto i : l)
 			dim.push_back(i);
 
-		std::shared_ptr<Variable<T>> result(new Variable<T>(dim, false));
+		std::shared_ptr<VariableInterface> result(new Variable<T>(dim, false));
 		result->leaf_(true);
-		result->fill_(0);
 
 		return result;
 	}
 
-	template<class T>
-	VariablePtr<T> ones(std::initializer_list<idx_type> l, bool requires_grad = false)
+	template<typename T>
+	VariableInterfacePtr zeros(std::initializer_list<idx_type> l, bool requires_grad = false)
 	{
 		DimVector dim;
 		for (auto i : l)
 			dim.push_back(i);
 
-		std::shared_ptr<Variable<T>> result(new Variable<T>(dim, false));
+		std::shared_ptr<VariableInterface> result(new Variable<T>(dim, false));
+		result->leaf_(true);
+		std::dynamic_pointer_cast<TensorBase<T>>(result->data())->fill_(0);
+
+		return result;
+	}
+
+	template<typename T>
+	VariableInterfacePtr ones(std::initializer_list<idx_type> l, bool requires_grad = false)
+	{
+		DimVector dim;
+		for (auto i : l)
+			dim.push_back(i);
+
+		std::shared_ptr<VariableInterface> result(new Variable<T>(dim, false));
+		result->leaf_(true);
+		std::dynamic_pointer_cast<TensorBase<T>>(result->data())->fill_(1);
+
+		return result;
+	}
+
+	template<typename T>
+	VariableInterfacePtr empty_like(VariableInterfacePtr input, bool requires_grad = false)
+	{
+		std::shared_ptr<VariableInterface> result(new Variable<T>(input->size(), false));
 		result->leaf_(true);
-		result->fill_(1);
 
 		return result;
 	}
 
 	// arithmetic function
-    template<class T>
-	VariablePtr<T> sum(VariablePtr<T> input)
+	VariableInterfacePtr sum(VariableInterfacePtr input)
     {
-        VariablePtr<T> result(new Variable<T>);
+		DimVector result_dim(1);
+		result_dim[0] = 1;
+
+        VariableInterfacePtr result = input->new_empty(result_dim, true);
         std::shared_ptr<SumOp> op(new SumOp);
-        if(input->_requires_grad)
+        if(input->requires_grad())
         {
-			std::vector<VariableInterfacePtr> result_inputs { std::dynamic_pointer_cast<VariableInterface>(input) };
-            result->_data = std::dynamic_pointer_cast<TensorBase<T>>(op->forward({ input->_data }));
-			result->_grad = result->_data->create_grad();
-            result->_requires_grad = true;
-            result->_leaf = false;
-            result->_grad_fn = op;
-            result->_inputs = result_inputs;
+			std::vector<VariableInterfacePtr> result_inputs { input };
+            result->data_(op->forward({ input->data() }));
+			result->grad_(result->data()->create_grad());
+			result->grad()->fill_(0);
+            result->requires_grad_(true);
+            result->leaf_(false);
+            result->grad_fn_(op);
+            result->inputs_(result_inputs);
         }
         else
         {
-            result->_data = std::dynamic_pointer_cast<TensorBase<T>>(op->forward({ input->_data }));
-            result->_requires_grad = false;
-            result->_leaf = false;
+            result->data_(op->forward({ input->data() }));
+            result->requires_grad_(false);
+            result->leaf_(false);
         }
 
         return result;
     }
 
-	template<class T>
-	VariablePtr<T> add(VariablePtr<T> left, VariablePtr<T> right)
+	VariableInterfacePtr add(VariableInterfacePtr left, VariableInterfacePtr right)
 	{
-		VariablePtr<T> result(new Variable<T>);
+		DimVector result_dim;
+
+        VariableInterfacePtr result = left->new_empty(result_dim, true);
 		std::shared_ptr<AddOp> op(new AddOp);
-		if (left->_requires_grad || right->_requires_grad)
+		if (left->requires_grad() || right->requires_grad())
 		{
 			std::vector<VariableInterfacePtr> result_inputs{ left, right };
-			result->_data = std::dynamic_pointer_cast<TensorBase<T>>(op->forward({ left->_data, right->_data }));
-			result->_grad = result->_data->create_grad();
-			result->_grad->fill_(0);
-			result->_requires_grad = true;
-			result->_leaf = false;
-			result->_grad_fn = op;
-			result->_inputs = result_inputs;
+			result->data_(op->forward({ left->data(), right->data() }));
+			result->grad_(result->data()->create_grad());
+			result->grad()->fill_(0);
+			result->requires_grad_(true);
+			result->leaf_(false);
+			result->grad_fn_(op);
+			result->inputs_(result_inputs);
 		}
 		else
 		{
-			result->_data = std::dynamic_pointer_cast<TensorBase<T>>(op->forward({ left->_data, right->_data }));
-			result->_requires_grad = false;
-			result->_leaf = false;
+			result->data_(op->forward({ left->data(), right->data() }));
+			result->requires_grad_(false);
+			result->leaf_(false);
 		}
 
 		return result;
 	}
 
-	template<class T>
-	VariablePtr<T> matmul(VariablePtr<T> left, VariablePtr<T> right)
+	VariableInterfacePtr matmul(VariableInterfacePtr left, VariableInterfacePtr right)
 	{
-		VariablePtr<T> result(new Variable<T>);
+		DimVector result_dim;
+
+        VariableInterfacePtr result = left->new_empty(result_dim, true);
 		std::shared_ptr<MatmulOp> op(new MatmulOp);
-		if (left->_requires_grad || right->_requires_grad)
+		if (left->requires_grad() || right->requires_grad())
 		{
 			std::vector<VariableInterfacePtr> result_inputs{ left, right };
-			result->_data = std::dynamic_pointer_cast<TensorBase<T>>(op->forward({ left->_data, right->_data }));
-			result->_grad = result->_data->create_grad();
-			result->_grad->fill_(0);
-			result->_requires_grad = true;
-			result->_leaf = false;
-			result->_grad_fn = op;
-			result->_inputs = result_inputs;
+			result->data_(op->forward({ left->data(), right->data() }));
+			result->grad_(result->data()->create_grad());
+			result->grad()->fill_(0);
+			result->requires_grad_(true);
+			result->leaf_(false);
+			result->grad_fn_(op);
+			result->inputs_(result_inputs);
 		}
 		else
 		{
-			result->_data = std::dynamic_pointer_cast<TensorBase<T>>(op->forward({ left->_data, right->_data }));
-			result->_requires_grad = false;
-			result->_leaf = false;
+			result->data_(op->forward({ left->data(), right->data() }));
+			result->requires_grad_(false);
+			result->leaf_(false);
 		}
 
 		return result;
 	}
 
 	
-	template<class T>
-	VariablePtr<T> select(VariablePtr<T> input, const SliceVector& slice)
+	VariableInterfacePtr select(VariableInterfacePtr input, const SliceVector& slice)
 	{
-		VariablePtr<T> result(new Variable<T>);
+		DimVector result_dim;
+
+        VariableInterfacePtr result = input->new_empty(result_dim, true);
 		std::shared_ptr<SelectOp> op(new SelectOp);
 		op->set_slice(slice);
 
 		std::vector<VariableInterfacePtr> result_inputs{ input };
-		result->_data = std::dynamic_pointer_cast<TensorBase<T>>(op->forward({ input->_data }));
-		result->_leaf = false;
+		result->data_(op->forward({ input->data() }));
+		result->leaf_(false);
 
 		if (input->requires_grad())
 		{
-			result->_grad = result->_data->create_grad();
-			result->_grad->fill_(0);
-			result->_requires_grad = true;
-			result->_grad_fn = op;
-			result->_inputs = result_inputs;
+			result->grad_(result->data()->create_grad());
+			result->grad()->fill_(0);
+			result->requires_grad_(true);
+			result->grad_fn_(op);
+			result->inputs_(result_inputs);
 		}
 		else
 		{
-			result->_requires_grad = false;
+			result->requires_grad_(false);
 		}
 
 		return result;
 	}
 
 
-	template<class T>
-	VariablePtr<T> sin(VariablePtr<T> input)
+	VariableInterfacePtr sin(VariableInterfacePtr input)
 	{
-		VariablePtr<T> result(new Variable<T>);
+		DimVector result_dim;
+
+        VariableInterfacePtr result = input->new_empty(result_dim, true);
 		std::shared_ptr<SinOp> op(new SinOp);
 
 		std::vector<VariableInterfacePtr> result_inputs{ input };
-		result->_data = std::dynamic_pointer_cast<TensorBase<T>>(op->forward({ input->_data }));
-		result->_leaf = false;
+		result->data_(op->forward({ input->data() }));
+		result->leaf_(false);
+
+		if (input->requires_grad())
+		{
+			result->grad_(result->data()->create_grad());
+			result->grad()->fill_(0);
+			result->requires_grad_(true);
+			result->grad_fn_(op);
+			result->inputs_(result_inputs);
+		}
+		else
+		{
+			result->requires_grad_(false);
+		}
+
+		return result;
+	}
+
+	VariableInterfacePtr transpose(VariableInterfacePtr input, idx_type dim0, idx_type dim1)
+	{
+		DimVector result_dim;
+
+        VariableInterfacePtr result = input->new_empty(result_dim, true);
+		std::shared_ptr<TransposeOp> op(new TransposeOp);
+		op->set_dim(dim0, dim1);
+
+		std::vector<VariableInterfacePtr> result_inputs{ input };
+		result->data_(op->forward({ input->data() }));
+		result->leaf_(false);
 
 		if (input->requires_grad())
 		{
-			result->_grad = result->_data->create_grad();
-			result->_grad->fill_(0);
-			result->_requires_grad = true;
-			result->_grad_fn = op;
-			result->_inputs = result_inputs;
+			result->grad_(result->data()->create_grad());
+			result->grad()->fill_(0);
+			result->requires_grad_(true);
+			result->grad_fn_(op);
+			result->inputs_(result_inputs);
 		}
 		else
 		{
-			result->_requires_grad = false;
+			result->requires_grad_(false);
 		}
 
 		return result;

+ 55 - 2
traph/include/traph/nn/module.h

@@ -3,20 +3,73 @@
 
 #include <memory>
 #include <vector>
+#include <map>
 
 #include <traph/nn/variable.h>
 #include <traph/nn/operation.h>
+#include <traph/nn/parameter.h>
+#include <traph/nn/function.h>
 
 namespace traph
 {
     class Module
     {
     private:
-        std::vector<std::shared_ptr<VariableInterface>> parameters;
+        std::map<std::string, std::shared_ptr<ParameterInterface>> _parameters;
+        std::vector<std::shared_ptr<Module>> _children;
     public:
-        virtual std::vector<TensorBasePtr<f32>> backward(TensorBasePtr<f32> output_grad)
+        std::vector<std::shared_ptr<ParameterInterface>> parameters(bool recurse)
         {
+            std::vector<std::shared_ptr<ParameterInterface>> result;
+            if(recurse)
+            {
+                // fixme: children params recurse
+                for (const auto &p : _parameters)
+                    result.push_back(p.second);
+            }
+            else
+            {
+                for (const auto &p : _parameters)
+                    result.push_back(p.second);
+            }
+            return result;
+        }
+
+        void register_parameter(const std::string& name, std::shared_ptr<ParameterInterface> param)
+        {
+            _parameters[name] = param;
+        }
+    };
+
+    class LinearModule: public Module
+    {
+    private:
+        int _in_features;
+        int _out_features;
+        std::shared_ptr<VariableInterface> _weight;
+        std::shared_ptr<VariableInterface> _bias;
+    public:
+        LinearModule(int in_features, int out_features, bool bias)
+        {
+            _in_features = in_features;
+            _out_features = out_features;
+            _weight = std::shared_ptr<VariableInterface>(new FloatParameter({out_features, in_features}));
+            if(bias)
+                _bias = std::shared_ptr<VariableInterface>(new FloatParameter({out_features}));
+            
+            register_parameter("weight", std::dynamic_pointer_cast<FloatParameter>(_weight));
+            register_parameter("bias", std::dynamic_pointer_cast<FloatParameter>(_bias));
+        }
+
+        std::shared_ptr<VariableInterface> forward(std::shared_ptr<VariableInterface> input)
+        {
+            std::shared_ptr<VariableInterface> result;
+            if(_bias)
+                result = add(matmul(input, transpose(_weight, 0, 1)), _bias);
+            else
+                result = matmul(input, transpose(_weight, 0, 1));
             
+            return result;
         }
     };
 } // traph

+ 28 - 0
traph/include/traph/nn/operation.h

@@ -161,6 +161,34 @@ namespace traph
 			return { result };
 		}
 	};
+
+	class TransposeOp : public OpBase
+	{
+	private:
+		idx_type dim0, dim1;
+	public:
+		void set_dim(idx_type d0, idx_type d1)
+		{
+			dim0 = d0;
+			dim1 = d1;
+		}
+
+		virtual TensorInterfacePtr forward(std::vector<TensorInterfacePtr> inputs) override
+		{
+			assert(inputs.size() == 1);
+
+			TensorInterfacePtr input = inputs[0];
+			TensorInterfacePtr result = input->transpose(dim0, dim1);
+
+			return result;
+		}
+
+		virtual std::vector<TensorBasePtr<f32>> backward(TensorBasePtr<f32> output_grad) override
+		{
+			TensorBasePtr<f32> result = std::dynamic_pointer_cast<TensorBase<f32>>(output_grad->transpose(dim0, dim1));
+			return { result };
+		}
+	};
 }
 
 #endif

+ 29 - 0
traph/include/traph/nn/optim.h

@@ -0,0 +1,29 @@
+#ifndef TRAPH_NN_OPTIM_H_
+#define TRAPH_NN_OPTIM_H_
+
+#include <memory>
+#include <vector>
+
+#include <traph/nn/parameter.h>
+
+namespace traph
+{
+    class Optimizer
+    {
+    private:
+        std::vector<std::shared_ptr<ParameterInterface>> _params;
+    public:
+        Optimizer(std::vector<std::shared_ptr<ParameterInterface>> params)
+            :_params(params)
+        {
+        }
+
+        virtual void step() = 0;
+
+        void zero_grad()
+        {
+        }
+    };
+}
+
+#endif

+ 9 - 2
traph/include/traph/nn/parameter.h

@@ -5,8 +5,12 @@
 
 namespace traph
 {
+    class ParameterInterface
+    {
+    };
+
     template<typename T>
-    class Parameter:public Variable<T>
+    class Parameter:public Variable<T>, public ParameterInterface
     {
     public:
         Parameter();
@@ -52,25 +56,28 @@ namespace traph
 	Parameter<T>::Parameter()
 		:Variable<T>()
 	{
-
+        this->requires_grad_(true);
 	}
 
 	template<typename T>
 	Parameter<T>::Parameter(std::shared_ptr<TensorBase<T>> data)
 		:Variable<T>(data)
 	{
+        this->requires_grad_(true);
 	}
 
 	template<typename T>
 	Parameter<T>::Parameter(const DimVector& dim)
 		:Variable<T>(dim)
 	{
+        this->requires_grad_(true);
 	}
 
 	template<typename T>
 	Parameter<T>::Parameter(std::initializer_list<idx_type> l)
 		:Variable<T>(l)
 	{
+        this->requires_grad_(true);
 	}
 
 	template<typename T>

+ 37 - 0
traph/include/traph/nn/variable.h

@@ -63,14 +63,19 @@ namespace traph
 
         virtual void backward() override;
         virtual TensorInterfacePtr data() override;
+		virtual void data_(TensorInterfacePtr d) override;
         virtual device_id device() override;
         virtual void fill_(T value) override;
         virtual TensorBasePtr<f32> grad() override;
+		virtual void grad_(TensorInterfacePtr g) override;
         virtual std::shared_ptr<OpBase> grad_fn() override;
+		virtual void grad_fn_(std::shared_ptr<OpBase> fn) override;
         virtual std::vector<VariableInterfacePtr>& inputs() override;
+		virtual void inputs_(const std::vector<VariableInterfacePtr>& i) override;
         virtual bool is_leaf() const override;
         virtual T item() const override;
 		virtual void leaf_(bool state) override;
+		virtual std::shared_ptr<VariableInterface> new_empty(const DimVector& size, bool requires_grad) const override;
         virtual idx_type offset() const override;
 		virtual layout_type order() const override;
         virtual platform_type platform() override;
@@ -198,6 +203,12 @@ namespace traph
         return std::dynamic_pointer_cast<TensorInterface>(_data);
     }
 
+	template<typename T>
+	void Variable<T>::data_(TensorInterfacePtr d)
+	{
+		this->_data = std::dynamic_pointer_cast<TensorBase<T>>(d);
+	}
+
 	template<typename T>
 	device_id Variable<T>::device()
 	{
@@ -216,18 +227,36 @@ namespace traph
 		return _grad;
 	}
 
+	template<typename T>
+	void Variable<T>::grad_(TensorInterfacePtr g)
+	{
+		this->_grad = std::dynamic_pointer_cast<TensorBase<f32>>(g);
+	}
+
 	template<typename T>
 	std::shared_ptr<OpBase> Variable<T>::grad_fn()
 	{
 		return _grad_fn;
 	}
 
+	template<typename T>
+	void Variable<T>::grad_fn_(std::shared_ptr<OpBase> fn)
+	{
+		this->_grad_fn = fn;
+	}
+
 	template<typename T>
 	std::vector<VariableInterfacePtr>& Variable<T>::inputs()
 	{
 		return _inputs;
 	}
 
+	template<typename T>
+	void Variable<T>::inputs_(const std::vector<VariableInterfacePtr>& i)
+	{
+		this->_inputs = i;
+	}
+
     template<typename T>
     bool Variable<T>::is_leaf() const
     {
@@ -246,6 +275,14 @@ namespace traph
 		_leaf = state;
 	}
 
+	template<typename T>
+	std::shared_ptr<VariableInterface> Variable<T>::new_empty(const DimVector& size, bool requires_grad) const
+	{
+		std::shared_ptr<VariableInterface> ret = std::shared_ptr<Variable<T>>(new Variable<T>(size));
+		ret->requires_grad_(requires_grad);
+		return ret;
+	}
+
 	template<typename T>
 	idx_type Variable<T>::offset() const
 	{

+ 1 - 1
traph/source/nn/CMakeLists.txt

@@ -11,7 +11,7 @@ SET(NN_LIST
 	${HEADER_PATH}/graph.h
 	${HEADER_PATH}/executor.h
 	${SOURCE_PATH}/executor.cpp
-	${HEADER_PATH}/arithmetic.h
+	${HEADER_PATH}/function.h
 	${HEADER_PATH}/operation.h
 	${SOURCE_PATH}/operation.cpp
 )

+ 2 - 1
traph/source/tensor/float_tensor.cpp

@@ -192,7 +192,8 @@ namespace traph
 
     void Tensor<f32>::apply_(std::function<f32(f32)> f)
     {
-        apply_impl(0, _offset, f);
+		if(_dimensions.size() > 0)
+			apply_impl(0, _offset, f);
     }
 
     TensorInterfacePtr Tensor<f32>::clone() const

+ 13 - 7
traph/source/test/main.cpp

@@ -1,10 +1,6 @@
 #include <algorithm>
 
-#include <traph/core/tensor.h>
-#include <traph/tensor/tensor.h>
-#include <traph/nn/function.h>
-#include <traph/core/variable.h>
-#include <traph/nn/variable.h>
+#include <traph/nn/module.h>
 
 #include <iostream>
 
@@ -35,7 +31,7 @@ int main()
 	std::cout << b;
 	*/
 	// auto a = traph::Variable<traph::f32>({ 2, 3 });
-/*
+	/*
 	auto a = traph::ones<traph::f32>({ 2,3,2 });
 	a->requires_grad_(true);
 	auto b = traph::sin<traph::f32>(a);
@@ -47,7 +43,9 @@ int main()
 	e->backward();
 
 	std::cout << a->grad()->to_string();
-*/
+	*/
+
+	/*
 	auto a = traph::ones<traph::f32>({ 2,3 });
 	a->requires_grad_(true);
 	auto b = traph::ones<traph::f32>({ 3,2 });
@@ -56,5 +54,13 @@ int main()
 	auto d = traph::sum(c);
 	d->backward();
 	std::cout << a->grad()->to_string();
+	*/
+
+	int batch_size = 16;
+	auto a = traph::ones<traph::f32>({ batch_size,4 });
+
+	traph::LinearModule linear_model(4, 2, false);
+	auto out = linear_model.forward(a);
+
     return 0;
 }