Add std, and fix fft, ifft bug for cuda
This commit is contained in:
@@ -1,6 +1,8 @@
|
||||
set(MKL_INTERFACE_FULL intel_lp64)
|
||||
find_package(OpenMP REQUIRED)
|
||||
find_package(MKL CONFIG REQUIRED)
|
||||
enable_language(CUDA)
|
||||
find_package(CUDAToolkit REQUIRED)
|
||||
|
||||
set(Aurora_MAJOR_VERSION 1)
|
||||
set(Aurora_MINOR_VERSION 0)
|
||||
@@ -9,12 +11,11 @@ set(Aurora_BUILD_VERSION 0)
|
||||
get_filename_component(Aurora_DIR "${CMAKE_CURRENT_LIST_DIR}/" PATH)
|
||||
|
||||
message("Aurora_DIR: ${Aurora_DIR}")
|
||||
file(GLOB_RECURSE Aurora_Source "${Aurora_DIR}/src/*.cpp")
|
||||
|
||||
file(GLOB_RECURSE Aurora_Source "${Aurora_DIR}/src/[AFSC]*.cpp" "${Aurora_DIR}/src/Matrix*.cpp" "${Aurora_DIR}/src/*.cu")
|
||||
message( ${Aurora_Source})
|
||||
set(Aurora_INCLUDE_DIRS "${Aurora_DIR}/src" "${Aurora_DIR}/thirdparty/include" $<TARGET_PROPERTY:MKL::MKL,INTERFACE_INCLUDE_DIRECTORIES>)
|
||||
|
||||
|
||||
set(Aurora_Complie_Options $<TARGET_PROPERTY:MKL::MKL,INTERFACE_COMPILE_OPTIONS> )
|
||||
set(Aurora_Libraries $<LINK_ONLY:MKL::MKL> OpenMP::OpenMP_CXX)
|
||||
set(Aurora_Libraries $<LINK_ONLY:MKL::MKL> OpenMP::OpenMP_CXX ${CUDA_cublas_LIBRARY} ${CUDA_cusolver_LIBRARY})
|
||||
set(Aurora_FOUND TRUE)
|
||||
message(Aurora Found)
|
||||
@@ -156,7 +156,6 @@ CudaMatrix Aurora::max(const CudaMatrix &aMatrix, FunctionDirection direction, l
|
||||
CudaMatrix vxmMax(CudaMatrix aVec, CudaMatrix aMat) {
|
||||
//col-vec x mat
|
||||
if (aVec.getDimSize(1) == 1 && aVec.getDimSize(0) == aMat.getDimSize(0)) {
|
||||
std::cout<<"max mat and col-vec "<<std::endl;
|
||||
size_t size = aMat.getDataSize();
|
||||
float* data = nullptr;
|
||||
cudaMalloc((void**)&data, sizeof(float) * size);
|
||||
@@ -175,7 +174,6 @@ CudaMatrix vxmMax(CudaMatrix aVec, CudaMatrix aMat) {
|
||||
// row-vec x mat
|
||||
else if (aVec.getDimSize(0) == 1 && aVec.getDimSize(1) == aMat.getDimSize(1))
|
||||
{
|
||||
std::cout<<"max mat and row-vec "<<std::endl;
|
||||
size_t size = aMat.getDataSize() ;
|
||||
float* data = nullptr;
|
||||
cudaMalloc((void**)&data, sizeof(float) * size);
|
||||
@@ -376,7 +374,6 @@ CudaMatrix Aurora::min(const CudaMatrix &aMatrix, FunctionDirection direction, l
|
||||
CudaMatrix vxmMin(CudaMatrix aVec, CudaMatrix aMat) {
|
||||
//col-vec x mat
|
||||
if (aVec.getDimSize(1) == 1 && aVec.getDimSize(0) == aMat.getDimSize(0)) {
|
||||
std::cout<<"min mat and col-vec "<<std::endl;
|
||||
size_t size = aMat.getDataSize();
|
||||
float* data = nullptr;
|
||||
cudaMalloc((void**)&data, sizeof(float) * size);
|
||||
@@ -395,7 +392,6 @@ CudaMatrix vxmMin(CudaMatrix aVec, CudaMatrix aMat) {
|
||||
// row-vec x mat
|
||||
else if (aVec.getDimSize(0) == 1 && aVec.getDimSize(1) == aMat.getDimSize(1))
|
||||
{
|
||||
std::cout<<"min mat and row-vec "<<std::endl;
|
||||
size_t size = aMat.getDataSize() ;
|
||||
float* data = nullptr;
|
||||
cudaMalloc((void**)&data, sizeof(float) * size);
|
||||
@@ -682,7 +678,6 @@ CudaMatrix Aurora::sum(const CudaMatrix &aMatrix, FunctionDirection direction ){
|
||||
case Column:
|
||||
default:
|
||||
{
|
||||
std::cout<<"Column sum"<<std::endl;
|
||||
float* matData = aMatrix.getData();
|
||||
float* retData = nullptr;
|
||||
int colElementCount = aMatrix.getDimSize(0);
|
||||
@@ -793,6 +788,21 @@ CudaMatrix Aurora::mean(const CudaMatrix &aMatrix, FunctionDirection direction )
|
||||
return CudaMatrix();
|
||||
}
|
||||
}
|
||||
|
||||
CudaMatrix Aurora::std(const CudaMatrix &aMatrix){
|
||||
if (aMatrix.getDimSize(2) > 1 || aMatrix.isComplex()) {
|
||||
std::cerr
|
||||
<< (aMatrix.getDimSize(2) > 1 ? "std() not support 3D data!" : "std() not support complex value type!")
|
||||
<< std::endl;
|
||||
return CudaMatrix();
|
||||
}
|
||||
|
||||
auto src = aMatrix.isComplex() ? Aurora::abs(aMatrix) : aMatrix.deepCopy();
|
||||
int calc_size = src.getDimSize(0) == 1 ? src.getDimSize(1) : src.getDimSize(0);
|
||||
auto meanM = Aurora::mean(src);
|
||||
return sqrt(Aurora::sum((src-meanM)^2.0)/((float)calc_size-1.0f));
|
||||
}
|
||||
|
||||
template <typename ValueType>
|
||||
class RowElementIterator:public thrust::iterator_facade<
|
||||
RowElementIterator<ValueType>,
|
||||
@@ -1294,12 +1304,13 @@ __global__ void complexFillKernel(float* aInputData, float* aOutput,unsigned int
|
||||
for (int offset = 0; offset < aDesColEleCount; offset+=blockDim.x)
|
||||
{
|
||||
if(threadIdx.x + offset< aCopySize){
|
||||
aOutput[2*idx_d] = aInputData[idx_s];
|
||||
aOutput[2*idx_d + 1] = 0;
|
||||
aOutput[2 * idx_d + offset * 2] = aInputData[idx_s + offset];
|
||||
aOutput[2 * idx_d + offset * 2 + 1] = 0;
|
||||
|
||||
}
|
||||
else if(threadIdx.x + offset< aDesColEleCount){
|
||||
aOutput[2*idx_d] = 0;
|
||||
aOutput[2*idx_d + 1] = 0;
|
||||
aOutput[2 * idx_d + offset * 2] = 0;
|
||||
aOutput[2 * idx_d + offset * 2 + 1] = 0;
|
||||
}
|
||||
else{
|
||||
return;
|
||||
@@ -1316,12 +1327,12 @@ __global__ void complexCopyKernel(float* aInputData, float* aOutput,unsigned int
|
||||
for (int offset = 0; offset < aDesColEleCount; offset+=blockDim.x)
|
||||
{
|
||||
if(threadIdx.x + offset< aCopySize){
|
||||
aOutput[2*idx_d] = aInputData[idx_s*2];
|
||||
aOutput[2*idx_d + 1] = aInputData[idx_s*2+1];
|
||||
aOutput[2*idx_d + offset * 2 ] = aInputData[idx_s*2 + offset*2];
|
||||
aOutput[2*idx_d + offset*2+ 1] = aInputData[idx_s*2+ offset*2+1];
|
||||
}
|
||||
else if(threadIdx.x + offset< aDesColEleCount){
|
||||
aOutput[2*idx_d] = 0;
|
||||
aOutput[2*idx_d + 1] = 0;
|
||||
aOutput[2*idx_d + offset*2] = 0;
|
||||
aOutput[2*idx_d + offset*2+ 1] = 0;
|
||||
}
|
||||
else{
|
||||
return;
|
||||
@@ -1344,7 +1355,9 @@ if (aMatrix.isComplex()){
|
||||
complexFillKernel<<<aMatrix.getDimSize(1), 256>>>(aMatrix.getData(), data, needCopySize, aMatrix.getDimSize(0),ColEleCount);
|
||||
}
|
||||
auto ret = Aurora::CudaMatrix::fromRawData(data,ColEleCount,aMatrix.getDimSize(1),1,Complex);
|
||||
auto mm = ret.toHostMatrix();
|
||||
ExecFFT(ret,0);
|
||||
mm = ret.toHostMatrix();
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
@@ -26,6 +26,14 @@ namespace Aurora
|
||||
*/
|
||||
CudaMatrix mean(const CudaMatrix &aMatrix, FunctionDirection direction = Column);
|
||||
|
||||
/**
|
||||
* @brief 标准差,只支持列方向
|
||||
*
|
||||
* @param aMatrix
|
||||
* @return CudaMatrix
|
||||
*/
|
||||
CudaMatrix std(const CudaMatrix &aMatrix);
|
||||
|
||||
CudaMatrix sort(const CudaMatrix &aMatrix,FunctionDirection direction = Column);
|
||||
CudaMatrix sort(CudaMatrix &&aMatrix,FunctionDirection direction = Column);
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -39,7 +39,7 @@ TEST_F(Function2D_Cuda_Test, min)
|
||||
B = Aurora::Matrix::fromRawData(dataB, 4096, 41472);
|
||||
dB = B.toDeviceMatrix();
|
||||
long r,c;
|
||||
|
||||
// column
|
||||
auto ret1 = Aurora::min(B, Aurora::Column,r,c);
|
||||
|
||||
auto ret2 = Aurora::min(dB, Aurora::Column,r,c);
|
||||
@@ -53,7 +53,7 @@ TEST_F(Function2D_Cuda_Test, min)
|
||||
ASSERT_FLOAT_EQ(ret1[i], ret2.getValue(i))<<", index at :"<<i;
|
||||
}
|
||||
|
||||
|
||||
//row
|
||||
ret1 = Aurora::min(B, Aurora::FunctionDirection::Row,r,c);
|
||||
|
||||
ret2 = Aurora::min(dB, Aurora::FunctionDirection::Row,r,c);
|
||||
@@ -66,6 +66,23 @@ TEST_F(Function2D_Cuda_Test, min)
|
||||
{
|
||||
ASSERT_FLOAT_EQ(ret1[i], ret2.getValue(i))<<", index at :"<<i;
|
||||
}
|
||||
|
||||
//ALL
|
||||
long r1,c1;
|
||||
ret1 = Aurora::min(B, Aurora::All,r,c);
|
||||
|
||||
ret2 = Aurora::min(dB, Aurora::All,r1,c1);
|
||||
|
||||
ASSERT_EQ(ret1.getDimSize(0),ret2.getDimSize(0));
|
||||
ASSERT_EQ(ret1.getDimSize(1),ret2.getDimSize(1));
|
||||
ASSERT_EQ(ret1.getDimSize(2),ret2.getDimSize(2));
|
||||
ASSERT_EQ(r, r1);
|
||||
ASSERT_EQ(c, c1);
|
||||
|
||||
for (size_t i = 0; i < ret1.getDataSize(); i++)
|
||||
{
|
||||
ASSERT_FLOAT_EQ(ret1[i], ret2.getValue(i))<<", index at :"<<i;
|
||||
}
|
||||
}
|
||||
// different size speed
|
||||
// Aurora::Matrix Aurora::min(const Aurora::Matrix &aMatrix,
|
||||
@@ -100,6 +117,22 @@ TEST_F(Function2D_Cuda_Test, min)
|
||||
ASSERT_EQ(ret1.getDimSize(1),ret2.getDimSize(1));
|
||||
ASSERT_EQ(ret1.getDimSize(2),ret2.getDimSize(2));
|
||||
|
||||
for (size_t i = 0; i < ret1.getDataSize(); i++)
|
||||
{
|
||||
ASSERT_FLOAT_EQ(ret1[i], ret2.getValue(i))<<", index at :"<<i;
|
||||
}
|
||||
//ALL
|
||||
long r1,c1;
|
||||
ret1 = Aurora::min(B, Aurora::All,r,c);
|
||||
|
||||
ret2 = Aurora::min(dB, Aurora::All,r1,c1);
|
||||
|
||||
ASSERT_EQ(ret1.getDimSize(0),ret2.getDimSize(0));
|
||||
ASSERT_EQ(ret1.getDimSize(1),ret2.getDimSize(1));
|
||||
ASSERT_EQ(ret1.getDimSize(2),ret2.getDimSize(2));
|
||||
ASSERT_EQ(r, r1);
|
||||
ASSERT_EQ(c, c1);
|
||||
|
||||
for (size_t i = 0; i < ret1.getDataSize(); i++)
|
||||
{
|
||||
ASSERT_FLOAT_EQ(ret1[i], ret2.getValue(i))<<", index at :"<<i;
|
||||
@@ -254,6 +287,22 @@ TEST_F(Function2D_Cuda_Test, max)
|
||||
ASSERT_EQ(ret1.getDimSize(1),ret2.getDimSize(1));
|
||||
ASSERT_EQ(ret1.getDimSize(2),ret2.getDimSize(2));
|
||||
|
||||
for (size_t i = 0; i < ret1.getDataSize(); i++)
|
||||
{
|
||||
ASSERT_FLOAT_EQ(ret1[i], ret2.getValue(i))<<", index at :"<<i;
|
||||
}
|
||||
//ALL
|
||||
long r1,c1;
|
||||
ret1 = Aurora::max(B, Aurora::All,r,c);
|
||||
|
||||
ret2 = Aurora::max(dB, Aurora::All,r1,c1);
|
||||
|
||||
ASSERT_EQ(ret1.getDimSize(0),ret2.getDimSize(0));
|
||||
ASSERT_EQ(ret1.getDimSize(1),ret2.getDimSize(1));
|
||||
ASSERT_EQ(ret1.getDimSize(2),ret2.getDimSize(2));
|
||||
ASSERT_EQ(r, r1);
|
||||
ASSERT_EQ(c, c1);
|
||||
|
||||
for (size_t i = 0; i < ret1.getDataSize(); i++)
|
||||
{
|
||||
ASSERT_FLOAT_EQ(ret1[i], ret2.getValue(i))<<", index at :"<<i;
|
||||
@@ -296,6 +345,23 @@ TEST_F(Function2D_Cuda_Test, max)
|
||||
{
|
||||
ASSERT_FLOAT_EQ(ret1[i], ret2.getValue(i))<<", index at :"<<i;
|
||||
}
|
||||
|
||||
//ALL
|
||||
long r1,c1;
|
||||
ret1 = Aurora::max(B, Aurora::All,r,c);
|
||||
|
||||
ret2 = Aurora::max(dB, Aurora::All,r1,c1);
|
||||
|
||||
ASSERT_EQ(ret1.getDimSize(0),ret2.getDimSize(0));
|
||||
ASSERT_EQ(ret1.getDimSize(1),ret2.getDimSize(1));
|
||||
ASSERT_EQ(ret1.getDimSize(2),ret2.getDimSize(2));
|
||||
ASSERT_EQ(r, r1);
|
||||
ASSERT_EQ(c, c1);
|
||||
|
||||
for (size_t i = 0; i < ret1.getDataSize(); i++)
|
||||
{
|
||||
ASSERT_FLOAT_EQ(ret1[i], ret2.getValue(i))<<", index at :"<<i;
|
||||
}
|
||||
}
|
||||
// test
|
||||
// Aurora::Matrix Aurora::max(const Aurora::Matrix &aMatrix, float aValue)
|
||||
@@ -907,4 +973,14 @@ TEST_F(Function2D_Cuda_Test, ifft_symmetric) {
|
||||
{
|
||||
EXPECT_FLOAT_AE(result1[i], result2[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(Function2D_Cuda_Test, std){
|
||||
float *dataMA= new float [9]{1, 2, 3, 2, 2, 6, 3, 3, 6};
|
||||
auto A = Aurora::Matrix::fromRawData(dataMA,3,3);
|
||||
auto D= Aurora::std(A.toDeviceMatrix());
|
||||
EXPECT_FLOAT_EQ(1.0, D.getValue(0));
|
||||
EXPECT_FLOAT_EQ(2.3094, fourDecimalRound(D.getValue(1)));
|
||||
EXPECT_FLOAT_EQ(1.7321, fourDecimalRound(D.getValue(2)));
|
||||
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user