Ver Fonte

add cuda cmake

jstzwj há 7 anos atrás
pai
commit
1fc940fd91

+ 21 - 3
CMakeLists.txt

@@ -21,12 +21,24 @@ if(Boost_FOUND)
 	message(STATUS Boost found: ${Boost_INCLUDE_DIRS})
 endif()
 
+
+# languge feature
+set(ENABLE_PYTHON FALSE CACHE BOOL "Language: Python")
+
+# gpgpu feature
 set(ENABLE_CUDA FALSE CACHE BOOL "Feature: OPENCL")
 set(ENABLE_OPENCL FALSE CACHE BOOL "Feature: OPENCL")
 
 IF(ENABLE_CUDA)
 	SET(TRAPH_ENABLE_CUDA TRUE)
 	ADD_DEFINITIONS(-DTRAPH_ENABLE_CUDA)
+
+	FIND_PACKAGE(CUDA REQUIRED)
+	SET(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-gencode arch=compute_61,code=sm_61;-std=c++11;)
+	IF(CUDA_FOUND)
+		include_directories(${CUDA_INCLUDE_DIRS})
+		message(STATUS cuda found)
+	ENDIF()
 ENDIF()
 
 IF(ENABLE_OPENCL)
@@ -40,13 +52,19 @@ IF(ENABLE_OPENCL)
 	endif()
 ENDIF()
 
-
-SET(TRAPH_ACCELERATE 1 CACHE STRING
+# blas feature
+SET(TRAPH_ACCELERATE 0 CACHE STRING
 "Specify the feature Possible values:
+0 - only eigen
 1 - mkl
 2 - openblas")
 
-IF(TRAPH_ACCELERATE EQUAL 1)
+IF(TRAPH_ACCELERATE EQUAL 0)
+	SET(TRAPH_BUILD_EIGEN TRUE)
+	ADD_DEFINITIONS(-DTRAPH_BUILD_EIGEN)
+
+	message(STATUS using eigen blas)
+ELSEIF(TRAPH_ACCELERATE EQUAL 1)
 	SET(TRAPH_BUILD_MKL TRUE)
 	ADD_DEFINITIONS(-DTRAPH_BUILD_MKL)
 

+ 19 - 0
traph/include/traph/cutensor/cucontext.h

@@ -0,0 +1,19 @@
+#ifndef TRAPH_CUTENSOR_CUCONTEXT_H_
+#define TRAPH_CUTENSOR_CUCONTEXT_H_
+
+#include <stdexcept>
+#include <vector>
+
+
+namespace traph
+{
+    class CUContext
+    {
+    public:
+        
+    private:
+
+    };
+}
+
+#endif

+ 19 - 0
traph/include/traph/cutensor/cudevice.h

@@ -0,0 +1,19 @@
+#ifndef TRAPH_CUTENSOR_CUDEVICE_H_
+#define TRAPH_CUTENSOR_CUDEVICE_H_
+
+#include <stdexcept>
+#include <vector>
+
+
+namespace traph
+{
+    class CUDevice
+    {
+    public:
+        
+    private:
+
+    };
+}
+
+#endif

+ 67 - 0
traph/include/traph/cutensor/cutensor.h

@@ -0,0 +1,67 @@
+#ifndef TRAPH_CUTENSOR_CUTENSOR_H_
+#define TRAPH_CUTENSOR_CUTENSOR_H_
+
+#include <cuda.h>
+
+namespace traph
+{
+
+    // The real representation of all tensors.
+    template<typename T>
+    class CUTensorStorage
+    {
+    public:
+        using CUDoubleStorage = CUTensorStorage<f64>;
+        using CUFloatStorage = CUTensorStorage<f32>;
+        using CULongStorage = CUTensorStorage<i64>;
+        using CUIntStorage = CUTensorStorage<i32>;
+        using CUShortStorage = CUTensorStorage<i16>;
+        using CUCharStorage = CUTensorStorage<i8>;
+        using CUByteStorage = CUTensorStorage<u8>;
+        // using CUHalfStorage = CUTensorStorage<f16>;
+    public:
+        CUTensorStorage()
+        {
+        }
+
+        CUTensorStorage(const CUTensorStorage& other)
+        {
+        }
+
+        CUTensorStorage(CUTensorStorage&& other)
+        {
+        }
+
+        CUTensorStorage& operator=(const CUTensorStorage& other)
+        {
+        }
+
+        CUTensorStorage& operator=(CUTensorStorage&& other)
+        {
+        }
+
+        // size
+        idx_type size() const {}
+        size_type element_size() const {}
+
+        void resize_(idx_type size)
+        {
+        }
+
+        // type cast
+        FloatStorage to_float() const
+        {
+        }
+
+        DoubleStorage to_double() const
+        {
+        }
+    };
+
+    class CUTensor
+    {
+
+    };
+}
+
+#endif

+ 0 - 19
traph/source/core/CMakeLists.txt

@@ -14,23 +14,4 @@ ADD_LIBRARY(${LIB_OUTNAME} ${CORE_LIST})
 
 
 
-IF(TRAPH_ACCELERATE EQUAL 1)
-	if (BLAS_FOUND)
-		target_link_libraries(${LIB_OUTNAME} ${BLAS_LIBRARIES})
-	endif()
-ELSEIF(TRAPH_ACCELERATE EQUAL 2)
-	if (BLAS_FOUND)
-		target_link_libraries(${LIB_OUTNAME} ${BLAS_LIBRARIES})
-	endif()
-ELSEIF(TRAPH_ACCELERATE EQUAL 3)
-	find_package(clBLAS CONFIG REQUIRED)
-	target_link_libraries(main PRIVATE clBLAS)
-ELSEIF(TRAPH_ACCELERATE EQUAL 4)
-	# find_package(clBLAS CONFIG REQUIRED)
-	# target_link_libraries(${LIB_OUTNAME} PRIVATE clBLAS)
-ELSE()
-	MESSAGE(FATAL_ERROR "Unsupported build platform: " ${OCTOON_BUILD_PLATFORM})
-ENDIF()
-
-
 

+ 11 - 10
traph/source/cutensor/CMakeLists.txt

@@ -4,17 +4,24 @@ SET(LIB_OUTNAME traph-${LIB_NAME})
 SET(HEADER_PATH ${TRAPH_PATH_HEADER}/${LIB_NAME})
 SET(SOURCE_PATH ${TRAPH_PATH_SOURCE}/${LIB_NAME})
 
-SET(CORE_LIST
-	${HEADER_PATH}/utils.h
+# add_subdirectory(cuda)
+
+SET(CUTENSOR_LIST
+	${HEADER_PATH}/cutensor.h
+	${SOURCE_PATH}/cutensor.cpp
 )
 
-ADD_LIBRARY(${LIB_OUTNAME} ${CORE_LIST})
+ADD_LIBRARY(${LIB_OUTNAME} ${CUTENSOR_LIST})
+target_link_libraries(${LIB_OUTNAME} traph-cutensor-cuda)
 
 IF(Boost_FOUND)
 	target_link_libraries(${LIB_OUTNAME} ${Boost_LIBRARIES})
 ENDIF()
 
-IF(TRAPH_ACCELERATE EQUAL 1)
+
+IF(TRAPH_ACCELERATE EQUAL 0)
+	# do something
+ELSEIF(TRAPH_ACCELERATE EQUAL 1)
 	if (BLAS_FOUND)
 		target_link_libraries(${LIB_OUTNAME} ${BLAS_LIBRARIES})
 	endif()
@@ -22,12 +29,6 @@ ELSEIF(TRAPH_ACCELERATE EQUAL 2)
 	if (BLAS_FOUND)
 		target_link_libraries(${LIB_OUTNAME} ${BLAS_LIBRARIES})
 	endif()
-ELSEIF(TRAPH_ACCELERATE EQUAL 3)
-	find_package(clBLAS CONFIG REQUIRED)
-	target_link_libraries(main PRIVATE clBLAS)
-ELSEIF(TRAPH_ACCELERATE EQUAL 4)
-	# find_package(clBLAS CONFIG REQUIRED)
-	# target_link_libraries(${LIB_OUTNAME} PRIVATE clBLAS)
 ELSE()
 	MESSAGE(FATAL_ERROR "Unsupported build platform: " ${OCTOON_BUILD_PLATFORM})
 ENDIF()

+ 12 - 0
traph/source/cutensor/cuda/CMakeLists.txt

@@ -0,0 +1,12 @@
+SET(LIB_NAME cutensor)
+SET(LIB_OUTNAME traph-${LIB_NAME}-cuda)
+
+SET(HEADER_PATH ${TRAPH_PATH_HEADER}/${LIB_NAME}/cuda)
+SET(SOURCE_PATH ${TRAPH_PATH_SOURCE}/${LIB_NAME}/cuda)
+
+SET(CUDA_LIST
+	
+)
+set(CUDA_NVCC_FLAGS -O3;-G;-g)
+
+cuda_add_library(${LIB_OUTNAME} STATIC ${CUDA_LIST})

+ 0 - 0
traph/source/cutensor/cutensor.cpp


+ 10 - 6
traph/source/interface/CMakeLists.txt

@@ -6,15 +6,19 @@ SET(SOURCE_PATH ${TRAPH_PATH_SOURCE}/${LIB_NAME})
 FIND_PACKAGE(SWIG REQUIRED)
 INCLUDE(${SWIG_USE_FILE})
 
-FIND_PACKAGE(PythonLibs)
-INCLUDE_DIRECTORIES(${PYTHON_INCLUDE_PATH})
-
 INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
 
 SET(CMAKE_SWIG_FLAGS "")
 
 SET_SOURCE_FILES_PROPERTIES(traph_tensor.i PROPERTIES CPLUSPLUS ON)
 SET_SOURCE_FILES_PROPERTIES(traph_tensor.i PROPERTIES SWIG_FLAGS "-includeall")
-SWIG_ADD_LIBRARY(traph_tensor LANGUAGE python SOURCES traph_tensor.i)
-SWIG_LINK_LIBRARIES(traph_tensor ${PYTHON_LIBRARIES})
-SWIG_LINK_LIBRARIES(traph_tensor traph-core)
+
+IF(ENABLE_PYTHON)
+    FIND_PACKAGE(PythonLibs)
+    INCLUDE_DIRECTORIES(${PYTHON_INCLUDE_PATH})
+
+    SWIG_ADD_LIBRARY(traph_tensor LANGUAGE python SOURCES traph_tensor.i)
+    SWIG_LINK_LIBRARIES(traph_tensor ${PYTHON_LIBRARIES})
+    SWIG_LINK_LIBRARIES(traph_tensor traph-core)
+ENDIF()
+

+ 3 - 7
traph/source/tensor/CMakeLists.txt

@@ -20,7 +20,9 @@ IF(Boost_FOUND)
 	target_link_libraries(${LIB_OUTNAME} ${Boost_LIBRARIES})
 ENDIF()
 
-IF(TRAPH_ACCELERATE EQUAL 1)
+IF(TRAPH_ACCELERATE EQUAL 0)
+	# link eigen?
+ELSEIF(TRAPH_ACCELERATE EQUAL 1)
 	if (BLAS_FOUND)
 		target_link_libraries(${LIB_OUTNAME} ${BLAS_LIBRARIES})
 	endif()
@@ -28,12 +30,6 @@ ELSEIF(TRAPH_ACCELERATE EQUAL 2)
 	if (BLAS_FOUND)
 		target_link_libraries(${LIB_OUTNAME} ${BLAS_LIBRARIES})
 	endif()
-ELSEIF(TRAPH_ACCELERATE EQUAL 3)
-	find_package(clBLAS CONFIG REQUIRED)
-	target_link_libraries(main PRIVATE clBLAS)
-ELSEIF(TRAPH_ACCELERATE EQUAL 4)
-	# find_package(clBLAS CONFIG REQUIRED)
-	# target_link_libraries(${LIB_OUTNAME} PRIVATE clBLAS)
 ELSE()
 	MESSAGE(FATAL_ERROR "Unsupported build platform: " ${OCTOON_BUILD_PLATFORM})
 ENDIF()

+ 38 - 11
traph/source/tensor/arithmetic.cpp

@@ -4,16 +4,15 @@
 
 #include <traph/tensor/arithmetic.h>
 
-#include <eigen3/Eigen/Dense>
-
-#ifdef TRAPH_BUILD_OPENBLAS
-#include <traph/core/openblas_backend.h>
-#endif
 
-#ifdef TRAPH_BUILD_MKL
+#ifdef TRAPH_BUILD_EIGEN
+#include <eigen3/Eigen/Dense>
+#elif defined TRAPH_BUILD_MKL
 #include <mkl.h>
 #include <mkl_blas.h>
 #include <mkl_cblas.h>
+#elif defined TRAPH_BUILD_OPENBLAS
+#include <traph/core/openblas_backend.h>
 #endif
 
 namespace traph
@@ -21,7 +20,13 @@ namespace traph
 	Tensor<f32> add(const Tensor<f32> &t, f32 v)
 	{
 		Tensor<f32> result(t.size());
-#ifdef TRAPH_BUILD_MKL
+#ifdef TRAPH_BUILD_EIGEN
+		idx_type flat_size_end = t.size().flat_size();
+		for (idx_type i = 0; i < flat_size_end; ++i)
+		{
+			result.data()[i] = t.data()[i] + v;
+		}
+#elif defined TRAPH_BUILD_MKL
 		result.fill_(v);
 		cblas_saxpy(t.size().flat_size(), 1.f, t.data(), 1, result.data(), 1);
 #endif
@@ -31,7 +36,13 @@ namespace traph
 	Tensor<f64> add(const Tensor<f64> &t, f64 v)
 	{
 		Tensor<f64> result(t.size());
-#ifdef TRAPH_BUILD_MKL
+#ifdef TRAPH_BUILD_EIGEN
+		idx_type flat_size_end = t.size().flat_size();
+		for (idx_type i = 0; i < flat_size_end; ++i)
+		{
+			result.data()[i] = t.data()[i] + v;
+		}
+#elif defined TRAPH_BUILD_MKL
 		result.fill_(v);
 		cblas_daxpy(t.size().flat_size(), 1.f, t.data(), 1, result.data(), 1);
 #endif
@@ -130,7 +141,15 @@ namespace traph
 		// result
 		Tensor<f32> result = zeros<f32>({ a.size()[0], b.size()[1] });
 
-	#ifdef TRAPH_BUILD_MKL
+#ifdef TRAPH_BUILD_EIGEN
+		// copy data
+		Eigen::Map<const Eigen::Matrix<f32, Eigen::Dynamic, Eigen::Dynamic>> eigen_a(a.data() + a.offset(), a.size()[0], a.size()[1]);
+		Eigen::Map<const Eigen::Matrix<f32, Eigen::Dynamic, Eigen::Dynamic>> eigen_b(b.data() + b.offset(), b.size()[0], b.size()[1]);
+
+		Eigen::Matrix<f32, Eigen::Dynamic, Eigen::Dynamic> eigen_c = eigen_a * eigen_b;
+		// copy to result
+		std::copy(eigen_c.data(), eigen_c.data() + a.size()[0] * b.size()[1], result.data());
+#elif defined TRAPH_BUILD_MKL
 		CBLAS_LAYOUT a_layout = a.layout() == layout_type::column_major ? CBLAS_LAYOUT::CblasColMajor : CBLAS_LAYOUT::CblasRowMajor;
 
 		cblas_sgemm(a_layout,
@@ -147,7 +166,7 @@ namespace traph
 			0.f,
 			result.data(),
 			result.size()[0]);
-	#endif
+#endif
 		return result;
 	}
 
@@ -158,7 +177,15 @@ namespace traph
 		// result
 		Tensor<f64> result = zeros<f64>({ a.size()[0], b.size()[1] });
 
-#ifdef TRAPH_BUILD_MKL
+#ifdef TRAPH_BUILD_EIGEN
+		// copy data
+		Eigen::Map<const Eigen::Matrix<f64, Eigen::Dynamic, Eigen::Dynamic>> eigen_a(a.data() + a.offset(), a.size()[0], a.size()[1]);
+		Eigen::Map<const Eigen::Matrix<f64, Eigen::Dynamic, Eigen::Dynamic>> eigen_b(b.data() + b.offset(), b.size()[0], b.size()[1]);
+
+		Eigen::Matrix<f64, Eigen::Dynamic, Eigen::Dynamic> eigen_c = eigen_a * eigen_b;
+		// copy to result
+		std::copy(eigen_c.data(), eigen_c.data() + a.size()[0] * b.size()[1], result.data());
+#elif defined TRAPH_BUILD_MKL
 		CBLAS_LAYOUT a_layout = a.layout() == layout_type::column_major ? CBLAS_LAYOUT::CblasColMajor : CBLAS_LAYOUT::CblasRowMajor;
 
 		cblas_dgemm(a_layout,