Add cuda ifft_symmetric and unittest.
This commit is contained in:
@@ -1337,12 +1337,12 @@ CudaMatrix Aurora::fft(const CudaMatrix &aMatrix, long aFFTSize){
|
|||||||
float* data = nullptr;
|
float* data = nullptr;
|
||||||
|
|
||||||
cudaMalloc((void**)&data, sizeof(float)*2*bufferSize);
|
cudaMalloc((void**)&data, sizeof(float)*2*bufferSize);
|
||||||
if (aMatrix.isComplex()){
|
if (aMatrix.isComplex()){
|
||||||
complexCopyKernel<<<aMatrix.getDimSize(1), 256>>>(aMatrix.getData(), data, needCopySize, aMatrix.getDimSize(0),ColEleCount);
|
complexCopyKernel<<<aMatrix.getDimSize(1), 256>>>(aMatrix.getData(), data, needCopySize, aMatrix.getDimSize(0),ColEleCount);
|
||||||
}
|
}
|
||||||
else{
|
else{
|
||||||
complexFillKernel<<<aMatrix.getDimSize(1), 256>>>(aMatrix.getData(), data, needCopySize, aMatrix.getDimSize(0),ColEleCount);
|
complexFillKernel<<<aMatrix.getDimSize(1), 256>>>(aMatrix.getData(), data, needCopySize, aMatrix.getDimSize(0),ColEleCount);
|
||||||
}
|
}
|
||||||
auto ret = Aurora::CudaMatrix::fromRawData(data,ColEleCount,aMatrix.getDimSize(1),1,Complex);
|
auto ret = Aurora::CudaMatrix::fromRawData(data,ColEleCount,aMatrix.getDimSize(1),1,Complex);
|
||||||
ExecFFT(ret,0);
|
ExecFFT(ret,0);
|
||||||
return ret;
|
return ret;
|
||||||
@@ -1503,4 +1503,35 @@ CudaMatrix Aurora::sub2ind(const CudaMatrix &aVMatrixSize, std::vector<CudaMatri
|
|||||||
cudaFree(indexMatrixData);
|
cudaFree(indexMatrixData);
|
||||||
delete[] tempPointer;
|
delete[] tempPointer;
|
||||||
return CudaMatrix::fromRawData(data, indexMatrixRows);
|
return CudaMatrix::fromRawData(data, indexMatrixRows);
|
||||||
|
}
|
||||||
|
|
||||||
|
__global__ void ifft_symmetricKernel(float* aMatrix, unsigned int aMatrixDataSize)
|
||||||
|
{
|
||||||
|
unsigned int idx = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
|
if(idx < aMatrixDataSize)
|
||||||
|
{
|
||||||
|
unsigned int indexOutput = (idx + aMatrixDataSize + 2) * 2;
|
||||||
|
unsigned int indexInput = 2 * (aMatrixDataSize - idx);
|
||||||
|
aMatrix[indexOutput] = aMatrix[indexInput];
|
||||||
|
aMatrix[indexOutput + 1] = -aMatrix[indexInput + 1];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
CudaMatrix Aurora::ifft_symmetric(const CudaMatrix &aMatrix, long aLength)
|
||||||
|
{
|
||||||
|
if(!aMatrix.isVector())
|
||||||
|
{
|
||||||
|
std::cerr<<"cuda ifft_symmetric only support vector!"<<std::endl;
|
||||||
|
return CudaMatrix();
|
||||||
|
}
|
||||||
|
int matrixLength = aMatrix.getDataSize();
|
||||||
|
float* data = nullptr;
|
||||||
|
unsigned int size = aLength * 2;
|
||||||
|
cudaMalloc((void **)&data, sizeof(float) * size);
|
||||||
|
cudaMemset(data, 0.0, size);
|
||||||
|
cudaMemcpy(data, aMatrix.getData(), sizeof(float) * aLength, cudaMemcpyDeviceToDevice);
|
||||||
|
int blocksPerGrid = (aLength - 1 + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK;
|
||||||
|
ifft_symmetricKernel<<<blocksPerGrid, THREADS_PER_BLOCK>>>(data, aLength / 2 - 1);
|
||||||
|
cudaDeviceSynchronize();
|
||||||
|
return real(ifft(CudaMatrix::fromRawData(data,aLength,1,1,Complex)));
|
||||||
}
|
}
|
||||||
@@ -69,6 +69,13 @@ namespace Aurora
|
|||||||
*/
|
*/
|
||||||
CudaMatrix sub2ind(const CudaMatrix &aVMatrixSize, std::vector<CudaMatrix> aSliceIdxs);
|
CudaMatrix sub2ind(const CudaMatrix &aVMatrixSize, std::vector<CudaMatrix> aSliceIdxs);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Symmetric逆fft,支持到2维,输入必须是复数,输出必是实数
|
||||||
|
* @param aMatrix
|
||||||
|
* @return ifft后的实数矩阵
|
||||||
|
*/
|
||||||
|
CudaMatrix ifft_symmetric(const CudaMatrix &aMatrix,long aLength);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif // __FUNCTION2D_CUDA_H__
|
#endif // __FUNCTION2D_CUDA_H__
|
||||||
@@ -894,4 +894,17 @@ TEST_F(Function2D_Cuda_Test, sub2ind) {
|
|||||||
{
|
{
|
||||||
EXPECT_FLOAT_EQ(result1[i], result2[i]);
|
EXPECT_FLOAT_EQ(result1[i], result2[i]);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(Function2D_Cuda_Test, ifft_symmetric) {
|
||||||
|
float *input = new float[18]{10,2,1,3,4,4,16,3,1,2,15,-2,1,-3,4,-4,1,-3};
|
||||||
|
auto matrixHost = Aurora::Matrix::fromRawData(input,9,1,1,Aurora::Complex);
|
||||||
|
auto matrixDevice = matrixHost.toDeviceMatrix();
|
||||||
|
auto result1 = Aurora::ifft_symmetric(matrixHost,18);
|
||||||
|
auto result2 = Aurora::ifft_symmetric(matrixDevice,18).toHostMatrix();
|
||||||
|
EXPECT_FLOAT_EQ(result1.getDataSize(),result2.getDataSize());
|
||||||
|
for(unsigned int i=0; i<result1.getDataSize(); ++i)
|
||||||
|
{
|
||||||
|
EXPECT_FLOAT_AE(result1[i], result2[i]);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
Reference in New Issue
Block a user