From 3b8dbe0f31da8d484a37c52d8feefc65026f496d Mon Sep 17 00:00:00 2001 From: sunwen Date: Tue, 19 Dec 2023 13:31:05 +0800 Subject: [PATCH] Add cuda complex real + imag and unittest. --- src/Function1D.cu | 26 ++++++++++++++++++++++++++ src/Function1D.cuh | 2 ++ test/Function1D_Cuda_Test.cpp | 9 +++++++++ 3 files changed, 37 insertions(+) diff --git a/src/Function1D.cu b/src/Function1D.cu index 1991706..c99e1ba 100644 --- a/src/Function1D.cu +++ b/src/Function1D.cu @@ -39,6 +39,16 @@ __global__ void complexKernel(float* aInputData, float* aOutput, unsigned int aS } } +__global__ void complexKernel(float* aInputRealData, float* aInputImagData, float* aOutput, unsigned int aSize) +{ + unsigned int idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx < aSize) + { + aOutput[2*idx] = aInputRealData[idx]; + aOutput[2*idx + 1] = aInputImagData[idx]; + } +} + CudaMatrix Aurora::complex(const CudaMatrix& aMatrix) { if(aMatrix.isComplex()) @@ -55,6 +65,22 @@ CudaMatrix Aurora::complex(const CudaMatrix& aMatrix) return Aurora::CudaMatrix::fromRawData(data, aMatrix.getDimSize(0), aMatrix.getDimSize(1), aMatrix.getDimSize(2), Aurora::Complex); } +CudaMatrix Aurora::complex(const CudaMatrix& aReal, const CudaMatrix& aImag) +{ + if(aReal.isComplex() || aImag.isComplex() || aReal.getDataSize() != aImag.getDataSize()) + { + return CudaMatrix(); + } + + size_t size = aReal.getDataSize(); + float* data = nullptr; + cudaMalloc((void**)&data, sizeof(float) * size * Aurora::Complex); + int blocksPerGrid = (size + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK; + complexKernel<<>>(aReal.getData(), aImag.getData(), data, size); + cudaDeviceSynchronize(); + return Aurora::CudaMatrix::fromRawData(data, aReal.getDimSize(0), aReal.getDimSize(1), aReal.getDimSize(2), Aurora::Complex); +} + __global__ void realKernel(float* aInputData, float* aOutput, unsigned int aSize) { unsigned int idx = blockIdx.x * blockDim.x + threadIdx.x; diff --git a/src/Function1D.cuh b/src/Function1D.cuh index a0f3380..24761bc 100644 --- a/src/Function1D.cuh +++ b/src/Function1D.cuh @@ -8,6 +8,8 @@ namespace Aurora { CudaMatrix complex(const CudaMatrix& aMatrix); + CudaMatrix complex(const CudaMatrix& aReal, const CudaMatrix& aImag); + CudaMatrix real(const CudaMatrix& aMatrix); CudaMatrix imag(const CudaMatrix& aMatrix); diff --git a/test/Function1D_Cuda_Test.cpp b/test/Function1D_Cuda_Test.cpp index 89f7142..688fe79 100644 --- a/test/Function1D_Cuda_Test.cpp +++ b/test/Function1D_Cuda_Test.cpp @@ -36,6 +36,15 @@ TEST_F(Function1D_Cuda_Test, complex) { EXPECT_EQ(result1[i], result2[i]); } + + Aurora::Matrix hostMatrix2 = Aurora::Matrix::fromRawData(new float[8]{1,2,3,4,5,6,7,8}, 2,2,2); + Aurora::CudaMatrix deviceMatrix2 = hostMatrix.toDeviceMatrix(); + result2 = Aurora::complex(deviceMatrix, deviceMatrix2).toHostMatrix(); + for(size_t i=0; i