Add cuda compile define and cmake setting
This commit is contained in:
@@ -4,9 +4,14 @@ project(Aurora)
|
|||||||
set(CMAKE_CXX_STANDARD 14)
|
set(CMAKE_CXX_STANDARD 14)
|
||||||
set(CMAKE_INCLUDE_CURRENT_DIR ON)
|
set(CMAKE_INCLUDE_CURRENT_DIR ON)
|
||||||
|
|
||||||
|
set(Aurora_USE_CUDA ON)
|
||||||
|
|
||||||
|
if (Aurora_USE_CUDA)
|
||||||
set(CMAKE_CUDA_COMPILER /usr/local/cuda/bin/nvcc)
|
set(CMAKE_CUDA_COMPILER /usr/local/cuda/bin/nvcc)
|
||||||
enable_language(CUDA)
|
enable_language(CUDA)
|
||||||
find_package(CUDAToolkit REQUIRED)
|
find_package(CUDAToolkit REQUIRED)
|
||||||
|
add_definitions(-DUSE_CUDA)
|
||||||
|
endif(Aurora_USE_CUDA)
|
||||||
|
|
||||||
find_package (OpenMP REQUIRED)
|
find_package (OpenMP REQUIRED)
|
||||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=native")
|
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=native")
|
||||||
@@ -30,13 +35,14 @@ target_include_directories(Aurora PUBLIC $<TARGET_PROPERTY:MKL::MKL,INTERFACE_IN
|
|||||||
target_link_libraries(Aurora PUBLIC $<LINK_ONLY:MKL::MKL>)
|
target_link_libraries(Aurora PUBLIC $<LINK_ONLY:MKL::MKL>)
|
||||||
target_link_libraries(Aurora PUBLIC OpenMP::OpenMP_CXX)
|
target_link_libraries(Aurora PUBLIC OpenMP::OpenMP_CXX)
|
||||||
target_link_libraries(Aurora PUBLIC matio)
|
target_link_libraries(Aurora PUBLIC matio)
|
||||||
|
if (Aurora_USE_CUDA)
|
||||||
target_include_directories(Aurora PRIVATE ./src /usr/local/cuda/include)
|
target_include_directories(Aurora PRIVATE ./src /usr/local/cuda/include)
|
||||||
set_target_properties(Aurora PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
|
set_target_properties(Aurora PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
|
||||||
target_compile_options(Aurora PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:
|
target_compile_options(Aurora PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:
|
||||||
-arch=sm_75
|
-arch=sm_75
|
||||||
>)
|
>)
|
||||||
target_link_libraries(Aurora PRIVATE ${CUDA_RUNTIME_LIBRARY} CUDA::cufft CUDA::cudart)
|
target_link_libraries(Aurora PRIVATE ${CUDA_RUNTIME_LIBRARY} CUDA::cufft CUDA::cudart)
|
||||||
|
endif(Aurora_USE_CUDA)
|
||||||
|
|
||||||
find_package(GTest REQUIRED)
|
find_package(GTest REQUIRED)
|
||||||
INCLUDE_DIRECTORIES(${GTEST_INCLUDE_DIRS})
|
INCLUDE_DIRECTORIES(${GTEST_INCLUDE_DIRS})
|
||||||
@@ -55,11 +61,13 @@ target_link_libraries(Aurora_Test PUBLIC OpenMP::OpenMP_CXX)
|
|||||||
target_link_libraries(Aurora_Test PUBLIC matio)
|
target_link_libraries(Aurora_Test PUBLIC matio)
|
||||||
target_link_libraries(Aurora_Test PUBLIC ${GTEST_BOTH_LIBRARIES} )
|
target_link_libraries(Aurora_Test PUBLIC ${GTEST_BOTH_LIBRARIES} )
|
||||||
|
|
||||||
|
if (Aurora_USE_CUDA)
|
||||||
target_include_directories(Aurora_Test PRIVATE ./src /usr/local/cuda/include)
|
target_include_directories(Aurora_Test PRIVATE ./src /usr/local/cuda/include)
|
||||||
set_target_properties(Aurora_Test PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
|
set_target_properties(Aurora_Test PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
|
||||||
target_compile_options(Aurora_Test PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:
|
target_compile_options(Aurora_Test PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:
|
||||||
-arch=sm_75
|
-arch=sm_75
|
||||||
>)
|
>)
|
||||||
target_link_libraries(Aurora_Test PRIVATE ${CUDA_RUNTIME_LIBRARY} CUDA::cufft CUDA::cudart)
|
target_link_libraries(Aurora_Test PRIVATE ${CUDA_RUNTIME_LIBRARY} CUDA::cufft CUDA::cudart)
|
||||||
|
endif(Aurora_USE_CUDA)
|
||||||
gtest_discover_tests(Aurora_Test )
|
gtest_discover_tests(Aurora_Test )
|
||||||
#target_link_libraries(CreateMatchedFilter PRIVATE TBB::tbb)
|
#target_link_libraries(CreateMatchedFilter PRIVATE TBB::tbb)
|
||||||
@@ -1,3 +1,4 @@
|
|||||||
|
#ifdef USE_CUDA
|
||||||
#include "CudaMatrix.h"
|
#include "CudaMatrix.h"
|
||||||
|
|
||||||
#include "Function.h"
|
#include "Function.h"
|
||||||
@@ -218,6 +219,7 @@ CudaMatrix CudaMatrix::block(int aDim,int aBeginIndex, int aEndIndex) const
|
|||||||
return CudaMatrix::fromRawData(dataOutput,getDimSize(0),dimLength,getDimSize(2),getValueType());
|
return CudaMatrix::fromRawData(dataOutput,getDimSize(0),dimLength,getDimSize(2),getValueType());
|
||||||
}
|
}
|
||||||
case 2:
|
case 2:
|
||||||
|
default:
|
||||||
{
|
{
|
||||||
int copySize = dimLength*sliceStride;
|
int copySize = dimLength*sliceStride;
|
||||||
cudaMemcpy(dataOutput,
|
cudaMemcpy(dataOutput,
|
||||||
@@ -235,4 +237,6 @@ bool CudaMatrix::setBlockValue(int aDim,int aBeginIndx, int aEndIndex,float valu
|
|||||||
std::cerr<<"CudaMatrix block only support 1D-3D data!"<<std::endl;
|
std::cerr<<"CudaMatrix block only support 1D-3D data!"<<std::endl;
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
return true;
|
||||||
|
}
|
||||||
|
#endif // USE_CUDA
|
||||||
@@ -1,6 +1,6 @@
|
|||||||
#ifndef CUDAMATRIX_H
|
#ifndef CUDAMATRIX_H
|
||||||
#define CUDAMATRIX_H
|
#define CUDAMATRIX_H
|
||||||
|
#ifdef USE_CUDA
|
||||||
#include "Matrix.h"
|
#include "Matrix.h"
|
||||||
|
|
||||||
|
|
||||||
@@ -235,5 +235,5 @@ namespace Aurora
|
|||||||
std::vector<int> mInfo;
|
std::vector<int> mInfo;
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
#endif // USE_CUDA
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@@ -13,7 +13,10 @@
|
|||||||
#include <Eigen/Core>
|
#include <Eigen/Core>
|
||||||
#include <Eigen/Eigen>
|
#include <Eigen/Eigen>
|
||||||
#include <Eigen/Dense>
|
#include <Eigen/Dense>
|
||||||
|
|
||||||
|
#ifdef USE_CUDA
|
||||||
#include <cuda_runtime.h>
|
#include <cuda_runtime.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
namespace Aurora {
|
namespace Aurora {
|
||||||
|
|
||||||
@@ -26,9 +29,10 @@ namespace Aurora {
|
|||||||
void free(void* ptr){
|
void free(void* ptr){
|
||||||
mkl_free(ptr);
|
mkl_free(ptr);
|
||||||
}
|
}
|
||||||
|
#ifdef USE_CUDA
|
||||||
void gpuFree(void* ptr)
|
void gpuFree(void* ptr)
|
||||||
{
|
{
|
||||||
cudaFree(ptr);
|
cudaFree(ptr);
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
@@ -17,7 +17,9 @@
|
|||||||
#include "Eigen/src/Core/Matrix.h"
|
#include "Eigen/src/Core/Matrix.h"
|
||||||
#include "Function.h"
|
#include "Function.h"
|
||||||
|
|
||||||
|
#ifdef USE_CUDA
|
||||||
#include <cuda_runtime.h>
|
#include <cuda_runtime.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
namespace Aurora{
|
namespace Aurora{
|
||||||
typedef void(*CalcFuncD)(const MKL_INT n, const float a[], const MKL_INT inca, const float b[],
|
typedef void(*CalcFuncD)(const MKL_INT n, const float a[], const MKL_INT inca, const float b[],
|
||||||
@@ -394,6 +396,7 @@ namespace Aurora {
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef USE_CUDA
|
||||||
CudaMatrix Matrix::toDeviceMatrix() const
|
CudaMatrix Matrix::toDeviceMatrix() const
|
||||||
{
|
{
|
||||||
float* deviceData = nullptr;
|
float* deviceData = nullptr;
|
||||||
@@ -401,7 +404,7 @@ namespace Aurora {
|
|||||||
cudaMemcpy(deviceData, mData.get(), sizeof(float) * getDataSize() * getValueType(), cudaMemcpyHostToDevice);
|
cudaMemcpy(deviceData, mData.get(), sizeof(float) * getDataSize() * getValueType(), cudaMemcpyHostToDevice);
|
||||||
return CudaMatrix::fromRawData(deviceData, mInfo[0], mInfo[1], mInfo[2], getValueType());
|
return CudaMatrix::fromRawData(deviceData, mInfo[0], mInfo[1], mInfo[2], getValueType());
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
Matrix Matrix::New(float *data, const Matrix &shapeMatrix) {
|
Matrix Matrix::New(float *data, const Matrix &shapeMatrix) {
|
||||||
return New(data,
|
return New(data,
|
||||||
shapeMatrix.getDimSize(0),
|
shapeMatrix.getDimSize(0),
|
||||||
|
|||||||
Reference in New Issue
Block a user