From cca2358825e3cb028670beee6571e30e4ff2572d Mon Sep 17 00:00:00 2001 From: sunwen Date: Tue, 12 Dec 2023 15:20:26 +0800 Subject: [PATCH] Add cuda sub2ind and unitest. --- src/Function2D.cu | 54 +++++++++++++++++++++++++++++++++++ src/Function2D.cuh | 10 +++++++ test/Function2D_Cuda_Test.cpp | 22 ++++++++++++-- 3 files changed, 84 insertions(+), 2 deletions(-) diff --git a/src/Function2D.cu b/src/Function2D.cu index dccba5a..4cc3e95 100644 --- a/src/Function2D.cu +++ b/src/Function2D.cu @@ -1444,4 +1444,58 @@ void Aurora::ifftshift(CudaMatrix &aMatrix){ aMatrix.getDimSize(0) * aMatrix.getValueType()); cudaFree(data); } +} + +__global__ void sub2indKernel(float* aVMatrixSize, float** aindexMatrix, float* aOutputData, unsigned int aRowSize, unsigned int aColumnSize) +{ + unsigned int idx = blockIdx.x * blockDim.x + threadIdx.x; + if(idx < aRowSize) + { + aOutputData[idx] = 0; + for(unsigned int i=aColumnSize; i>0; --i) + { + unsigned int subSize = 1; + for(unsigned int j=0; j aSliceIdxs) +{ + if (aSliceIdxs.size() != aVMatrixSize.getDataSize()) + { + std::cerr<<"cuda sub2ind size not match"<getDataSize(); + unsigned int indexMatrixColumns = aSliceIdxs.size(); + float** indexMatrixData = nullptr; + float** tempPointer = new float*[indexMatrixColumns]; + cudaMalloc((void **)&indexMatrixData, sizeof(float*) * indexMatrixColumns); + for(unsigned int i=0; i>>(aVMatrixSize.getData(), indexMatrixData, data, indexMatrixRows, indexMatrixColumns); + cudaDeviceSynchronize(); + cudaFree(indexMatrixData); + delete[] tempPointer; + return CudaMatrix::fromRawData(data, indexMatrixRows); } \ No newline at end of file diff --git a/src/Function2D.cuh b/src/Function2D.cuh index d9f39a6..8abaa45 100644 --- a/src/Function2D.cuh +++ b/src/Function2D.cuh @@ -59,6 +59,16 @@ namespace Aurora void fftshift(CudaMatrix &aMatrix); void ifftshift(CudaMatrix &aMatrix); + + /** + * 转换下标为索引值 + * @attention 索引值按照其实为1与matlab对应,在C++中使用需要-1 + * @param aVMatrixSize + * @param aSliceIdxs + * @return + */ + CudaMatrix sub2ind(const CudaMatrix &aVMatrixSize, std::vector aSliceIdxs); + } #endif // __FUNCTION2D_CUDA_H__ \ No newline at end of file diff --git a/test/Function2D_Cuda_Test.cpp b/test/Function2D_Cuda_Test.cpp index 5cc3bd6..a68c455 100644 --- a/test/Function2D_Cuda_Test.cpp +++ b/test/Function2D_Cuda_Test.cpp @@ -843,8 +843,8 @@ TEST_F(Function2D_Cuda_Test, prod) { EXPECT_FLOAT_AE(result1[i], result2[i]); } - auto matrixHostComplex = Aurora::Matrix::fromRawData(new float[40], 4,5, 1,Aurora::Complex); - for(unsigned int i=0; i<40;++i) + auto matrixHostComplex = Aurora::Matrix::fromRawData(new float[20], 2,5, 1,Aurora::Complex); + for(unsigned int i=0; i<20;++i) { matrixHost[i] = i + 1; } @@ -858,4 +858,22 @@ TEST_F(Function2D_Cuda_Test, prod) { { EXPECT_FLOAT_AE(result1[i], result2[i]); } +} + +TEST_F(Function2D_Cuda_Test, sub2ind) { + float* dI1= new float[4]{1,2,1,2}; + Aurora::Matrix I1(std::shared_ptr(dI1,std::default_delete()),std::vector{4}); + float* dI2= new float[4]{2,2,1,1}; + Aurora::Matrix I2(std::shared_ptr(dI2,std::default_delete()),std::vector{4}); + float* dI4= new float[4]{1,1,2,2}; + Aurora::Matrix I3(std::shared_ptr(dI4,std::default_delete()),std::vector{4}); + float* dsz= new float[3]{2,2,2}; + Aurora::Matrix sz(std::shared_ptr(dsz,std::default_delete()),std::vector{3}); + auto result1 = Aurora::sub2ind(sz, {I1, I2, I3}); + auto result2 = Aurora::sub2ind(sz.toDeviceMatrix(), {I1.toDeviceMatrix(), I2.toDeviceMatrix(), I3.toDeviceMatrix()}).toHostMatrix(); + EXPECT_FLOAT_EQ(result1.getDataSize(), result2.getDataSize()); + for(unsigned int i=0; i