Add cuda complex real + imag and unittest.
This commit is contained in:
@@ -39,6 +39,16 @@ __global__ void complexKernel(float* aInputData, float* aOutput, unsigned int aS
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
__global__ void complexKernel(float* aInputRealData, float* aInputImagData, float* aOutput, unsigned int aSize)
|
||||||
|
{
|
||||||
|
unsigned int idx = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
|
if (idx < aSize)
|
||||||
|
{
|
||||||
|
aOutput[2*idx] = aInputRealData[idx];
|
||||||
|
aOutput[2*idx + 1] = aInputImagData[idx];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
CudaMatrix Aurora::complex(const CudaMatrix& aMatrix)
|
CudaMatrix Aurora::complex(const CudaMatrix& aMatrix)
|
||||||
{
|
{
|
||||||
if(aMatrix.isComplex())
|
if(aMatrix.isComplex())
|
||||||
@@ -55,6 +65,22 @@ CudaMatrix Aurora::complex(const CudaMatrix& aMatrix)
|
|||||||
return Aurora::CudaMatrix::fromRawData(data, aMatrix.getDimSize(0), aMatrix.getDimSize(1), aMatrix.getDimSize(2), Aurora::Complex);
|
return Aurora::CudaMatrix::fromRawData(data, aMatrix.getDimSize(0), aMatrix.getDimSize(1), aMatrix.getDimSize(2), Aurora::Complex);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
CudaMatrix Aurora::complex(const CudaMatrix& aReal, const CudaMatrix& aImag)
|
||||||
|
{
|
||||||
|
if(aReal.isComplex() || aImag.isComplex() || aReal.getDataSize() != aImag.getDataSize())
|
||||||
|
{
|
||||||
|
return CudaMatrix();
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t size = aReal.getDataSize();
|
||||||
|
float* data = nullptr;
|
||||||
|
cudaMalloc((void**)&data, sizeof(float) * size * Aurora::Complex);
|
||||||
|
int blocksPerGrid = (size + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK;
|
||||||
|
complexKernel<<<blocksPerGrid, THREADS_PER_BLOCK>>>(aReal.getData(), aImag.getData(), data, size);
|
||||||
|
cudaDeviceSynchronize();
|
||||||
|
return Aurora::CudaMatrix::fromRawData(data, aReal.getDimSize(0), aReal.getDimSize(1), aReal.getDimSize(2), Aurora::Complex);
|
||||||
|
}
|
||||||
|
|
||||||
__global__ void realKernel(float* aInputData, float* aOutput, unsigned int aSize)
|
__global__ void realKernel(float* aInputData, float* aOutput, unsigned int aSize)
|
||||||
{
|
{
|
||||||
unsigned int idx = blockIdx.x * blockDim.x + threadIdx.x;
|
unsigned int idx = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
|
|||||||
@@ -8,6 +8,8 @@ namespace Aurora
|
|||||||
{
|
{
|
||||||
CudaMatrix complex(const CudaMatrix& aMatrix);
|
CudaMatrix complex(const CudaMatrix& aMatrix);
|
||||||
|
|
||||||
|
CudaMatrix complex(const CudaMatrix& aReal, const CudaMatrix& aImag);
|
||||||
|
|
||||||
CudaMatrix real(const CudaMatrix& aMatrix);
|
CudaMatrix real(const CudaMatrix& aMatrix);
|
||||||
|
|
||||||
CudaMatrix imag(const CudaMatrix& aMatrix);
|
CudaMatrix imag(const CudaMatrix& aMatrix);
|
||||||
|
|||||||
@@ -36,6 +36,15 @@ TEST_F(Function1D_Cuda_Test, complex)
|
|||||||
{
|
{
|
||||||
EXPECT_EQ(result1[i], result2[i]);
|
EXPECT_EQ(result1[i], result2[i]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Aurora::Matrix hostMatrix2 = Aurora::Matrix::fromRawData(new float[8]{1,2,3,4,5,6,7,8}, 2,2,2);
|
||||||
|
Aurora::CudaMatrix deviceMatrix2 = hostMatrix.toDeviceMatrix();
|
||||||
|
result2 = Aurora::complex(deviceMatrix, deviceMatrix2).toHostMatrix();
|
||||||
|
for(size_t i=0; i<result2.getDataSize(); ++i)
|
||||||
|
{
|
||||||
|
EXPECT_EQ(hostMatrix[i], result2[2 * i]);
|
||||||
|
EXPECT_EQ(hostMatrix2[i], result2[2 * i + 1]);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(Function1D_Cuda_Test, real)
|
TEST_F(Function1D_Cuda_Test, real)
|
||||||
|
|||||||
Reference in New Issue
Block a user