6 年前 · 96506adeb3
--- a/traph/include/traph/nn/layers/linear.h
+++ b/traph/include/traph/nn/layers/linear.h
@@ -18,9 +18,9 @@ namespace traph
 
				         {
			
 
				             _in_features = in_features;
			
 
				             _out_features = out_features;
			
 
				-            _weight = randn<f32>({out_features, in_features}, true);
			
 
				+            _weight = zeros<f32>({out_features, in_features}, true);
			
 
				             if(bias)
			
 
				-                _bias = randn<f32>({out_features}, true);
			
 
				+                _bias = zeros<f32>({out_features}, true);
			
 
				             
			
 
				             register_parameter("weight", _weight);
			
 
				             register_parameter("bias", _bias);
			
--- a/traph/include/traph/nn/layers/loss.h
+++ b/traph/include/traph/nn/layers/loss.h
@@ -27,12 +27,12 @@ namespace traph
 
				             std::shared_ptr<VariableInterface> ret;
			
 
				             if(_reduction == MSELossReduction::SUM)
			
 
				             {
			
 
				-                ret = sum(pow(sub(input, target), 2));
			
 
				+                ret = sum(pow(sub(input, target), 2.f));
			
 
				             }
			
 
				             else if(_reduction == MSELossReduction::MEAN)
			
 
				             {
			
 
				                 // fixme: use mean if it impled
			
 
				-                ret = sum(pow(sub(input, target), 2));
			
 
				+                ret = sum(pow(sub(input, target), 2.f));
			
 
				             }
			
 
				             else
			
 
				             {
			
--- a/traph/include/traph/nn/operation.h
+++ b/traph/include/traph/nn/operation.h
@@ -124,15 +124,21 @@ namespace traph
 
				 			TensorInterfacePtr input = inputs[0];
			
 
				 			auto output = input->clone();
			
 
				 			output->pow_(_exp);
			
 
				+
			
 
				+			context.save(input);
			
 
				 			
			
 
				 			return output;
			
 
				 		}
			
 
				 
			
 
				 		virtual std::vector<TensorBasePtr<f32>> backward(TensorBasePtr<f32> output_grad) override
			
 
				 		{
			
 
				-			auto output = std::dynamic_pointer_cast<TensorBase<f32>>(output_grad->clone());
			
 
				-			output->mul_(_exp);
			
 
				-			return { output };
			
 
				+			auto saved_tensors = context.get_saved_tensors();
			
 
				+			assert(saved_tensors.size() == 1);
			
 
				+			auto cloned_x = std::dynamic_pointer_cast<TensorBase<f32>>(saved_tensors[0]->clone());
			
 
				+			
			
 
				+			//FIXME x^n = n*x^(n-1)
			
 
				+			cloned_x->mul_(_exp);
			
 
				+			return { cloned_x };
			
 
				 		}
			
 
				 	};
			
 
				 
			
--- a/traph/include/traph/nn/optim.h
+++ b/traph/include/traph/nn/optim.h
@@ -49,7 +49,7 @@ namespace traph
 
				                 auto d_p = each->grad();
			
 
				 
			
 
				                 auto cloned_d_p = std::dynamic_pointer_cast<TensorBase<f32>>(d_p->clone());
			
 
				-                cloned_d_p->mul_(_lr);
			
 
				+                cloned_d_p->mul_(-_lr);
			
 
				                 each->data()->add_(cloned_d_p);
			
 
				             }
			
 
				         }
			
--- a/traph/include/traph/nn/variable.h
+++ b/traph/include/traph/nn/variable.h
@@ -166,11 +166,11 @@ namespace traph
 
				 	template<typename T>
			
 
				 	void Variable<T>::clear_graph()
			
 
				 	{
			
 
				-		_grad_fn = nullptr;
			
 
				 		for(auto &each:_inputs)
			
 
				 		{
			
 
				 			each->clear_graph();
			
 
				 		}
			
 
				+		_grad_fn = nullptr;
			
 
				 		_inputs.clear();
			
 
				 	}
			
 
				 
			
--- a/traph/source/tensor/float_tensor.cpp
+++ b/traph/source/tensor/float_tensor.cpp
@@ -146,25 +146,26 @@ namespace traph
 
				 		// ok, get lhs, rhs
			
 
				 		Tensor<f32> * lhs = this;
			
 
				 		Tensor<f32> * rhs = dynamic_cast<Tensor<f32> *>(other.get());
			
 
				-		std::function<void(Tensor<f32> *, Tensor<f32> *, idx_type, idx_type,idx_type, idx_type)> add_impl =
			
 
				-			[&](Tensor<f32> * lhs, Tensor<f32> * rhs, idx_type lhs_dim, idx_type rhs_dim, idx_type lhs_idx, idx_type rhs_idx) {
			
 
				+		std::function<void(idx_type, idx_type, idx_type, idx_type)> add_impl =
			
 
				+			[&](idx_type lhs_dim, idx_type rhs_dim, idx_type lhs_idx, idx_type rhs_idx) {
			
 
				 
			
 
				 			auto lhs_storage = std::dynamic_pointer_cast<TensorStorage<f32>>(lhs->storage())->data_ptr();
			
 
				 			auto rhs_storage = std::dynamic_pointer_cast<TensorStorage<f32>>(rhs->storage())->data_ptr();
			
 
				 
			
 
				-			if (lhs_dim < -(lhs->size().size()) && rhs_dim < -(rhs->size().size()))
			
 
				-			{
			
 
				-				lhs_storage[lhs_idx] += rhs_storage[rhs_idx];
			
 
				-				return;
			
 
				-			}
			
 
				-
			
 
				 			idx_type lsh_shape_size = lhs_dim >= -(lhs->size().size())? lhs->size(lhs_dim) : 1;
			
 
				 			idx_type rsh_shape_size = rhs_dim >= -(rhs->size().size()) ? rhs->size(rhs_dim) : 1;
			
 
				 			idx_type max_shape_size = std::max(lsh_shape_size, rsh_shape_size);
			
 
				 
			
 
				 			for (idx_type i = 0; i < max_shape_size; ++i)
			
 
				 			{
			
 
				-				add_impl(lhs, rhs, lhs_dim - 1, rhs_dim - 1, lhs_idx, rhs_idx);
			
 
				+                if (lhs_dim <= -(lhs->size().size()) && rhs_dim <= -(rhs->size().size()))
			
 
				+                {
			
 
				+                    lhs_storage[lhs_idx] += rhs_storage[rhs_idx];
			
 
				+                }
			
 
				+                else
			
 
				+                {
			
 
				+                    add_impl(lhs_dim - 1, rhs_dim - 1, lhs_idx, rhs_idx);
			
 
				+                }
			
 
				 
			
 
				 				if(lsh_shape_size > 1)
			
 
				 					lhs_idx += lhs->stride(lhs_dim);
			
@@ -173,7 +174,7 @@ namespace traph
 
				 			}
			
 
				 		};
			
 
				 
			
 
				-		add_impl(lhs, rhs, -1, -1, lhs->offset(), rhs->offset());
			
 
				+		add_impl(-1, -1, lhs->offset(), rhs->offset());
			
 
				     }
			
 
				 
			
 
				     void Tensor<f32>::apply_(std::function<f32(f32)> f)
			
--- a/traph/source/test/main.cpp
+++ b/traph/source/test/main.cpp
@@ -67,12 +67,12 @@ int main()
 
				 
			
 
				 	traph::Linear linear_model(4, 2, false);
			
 
				 	traph::MSELoss criterion;
			
 
				-	traph::SGD optimizer(linear_model.parameters(), 0.001f);
			
 
				+	traph::SGD optimizer(linear_model.parameters(), 0.0001f);
			
 
				 	std::cout << y->data()->to_string() << std::endl;
			
 
				 
			
 
				 	std::cout << "Start Training..." << std::endl;
			
 
				 
			
 
				-	for (int epoch = 0; epoch < 100; ++epoch)
			
 
				+	for (int epoch = 0; epoch < 10000; ++epoch)
			
 
				 	{
			
 
				 		float loss100 = 0.f;
			
 
				 
			
@@ -82,9 +82,18 @@ int main()
 
				 		loss->backward();
			
 
				 		optimizer.step();
			
 
				 		// loss100 += loss->item();
			
 
				-		std::cout << loss->data()->to_string()<<std::endl;
			
 
				+		// std::cout << linear_model.parameters()[0]->data()->to_string()<<std::endl;
			
 
				+		std::cout << loss->data()->to_string() << std::endl;
			
 
				 	}
			
 
				 	
			
 
				+	//auto a = traph::ones<traph::f32>({ 2,3 });
			
 
				+	//a->requires_grad_(true);
			
 
				+	//auto b = traph::ones<traph::f32>({ 3,4 });
			
 
				+	//b->requires_grad_(true);
			
 
				+	//auto c = matmul(a, b);
			
 
				+	//auto d = sum(c);
			
 
				+	//d->backward();
			
 
				+	//std::cout << a->grad()->to_string();
			
 
				 	
			
 
				     return 0;
			
 
				 }