Add fft,ifft,fftshift,ifftshift and their unittest
This commit is contained in:
@@ -74,7 +74,7 @@ if (Aurora_USE_CUDA)
|
||||
target_include_directories(Aurora_Test PRIVATE ./src /usr/local/cuda/include)
|
||||
set_target_properties(Aurora_Test PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
|
||||
target_compile_options(Aurora_Test PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:
|
||||
-arch=sm_75 --expt-extended-lambda -Icub/
|
||||
-arch=sm_75 --expt-extended-lambda
|
||||
>)
|
||||
target_link_libraries(Aurora_Test PRIVATE ${CUDA_RUNTIME_LIBRARY} CUDA::cufft CUDA::cudart)
|
||||
target_link_libraries(Aurora_Test PRIVATE ${CUDA_cublas_LIBRARY})
|
||||
|
||||
@@ -26,6 +26,8 @@
|
||||
#include "Function1D.cuh"
|
||||
#include "Matrix.h"
|
||||
|
||||
#include "cufft.h"
|
||||
#include <cufftXt.h>
|
||||
using namespace Aurora;
|
||||
|
||||
namespace
|
||||
@@ -1051,3 +1053,189 @@ CudaMatrix Aurora::inv(const CudaMatrix &aMatrix)
|
||||
|
||||
return CudaMatrix::fromRawData(data, n, n);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* @brief
|
||||
*
|
||||
*
|
||||
* @param aMatrix
|
||||
* @param aDirection 0 FORWARD, 1 BACKWARD
|
||||
*/
|
||||
void ExecFFT(CudaMatrix& aMatrix,int aDirection){
|
||||
cufftHandle plan;
|
||||
cudaStream_t stream = NULL;
|
||||
int batch_size = aMatrix.getDimSize(1);
|
||||
cufftComplex *d_data = (cufftComplex *)aMatrix.getData();
|
||||
cufftCreate(&plan);
|
||||
// int gpus[1] ={0};
|
||||
// cufftXtSetGPUs(plan,1,gpus);
|
||||
cufftPlan1d(&plan, aMatrix.getDimSize(0), CUFFT_C2C, batch_size);
|
||||
cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking);
|
||||
cufftSetStream(plan, stream);
|
||||
cufftExecC2C(plan, d_data, d_data, aDirection==0?CUFFT_FORWARD:CUFFT_INVERSE);
|
||||
cudaStreamSynchronize(stream);
|
||||
cufftDestroy(plan);
|
||||
cudaStreamDestroy(stream);
|
||||
}
|
||||
|
||||
__global__ void complexFillKernel(float* aInputData, float* aOutput,unsigned int aCopySize,
|
||||
unsigned int aSrcColEleCount, unsigned int aDesColEleCount)
|
||||
{
|
||||
unsigned int idx_d = blockIdx.x * aDesColEleCount + threadIdx.x;
|
||||
unsigned int idx_s = blockIdx.x * aSrcColEleCount + threadIdx.x;
|
||||
|
||||
for (int offset = 0; offset < aDesColEleCount; offset+=blockDim.x)
|
||||
{
|
||||
if(threadIdx.x + offset< aCopySize){
|
||||
aOutput[2*idx_d] = aInputData[idx_s];
|
||||
aOutput[2*idx_d + 1] = 0;
|
||||
}
|
||||
else if(threadIdx.x + offset< aDesColEleCount){
|
||||
aOutput[2*idx_d] = 0;
|
||||
aOutput[2*idx_d + 1] = 0;
|
||||
}
|
||||
else{
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void complexCopyKernel(float* aInputData, float* aOutput,unsigned int aCopySize,
|
||||
unsigned int aSrcColEleCount, unsigned int aDesColEleCount)
|
||||
{
|
||||
unsigned int idx_d = blockIdx.x * aDesColEleCount + threadIdx.x;
|
||||
unsigned int idx_s = blockIdx.x * aSrcColEleCount + threadIdx.x;
|
||||
|
||||
for (int offset = 0; offset < aDesColEleCount; offset+=blockDim.x)
|
||||
{
|
||||
if(threadIdx.x + offset< aCopySize){
|
||||
aOutput[2*idx_d] = aInputData[idx_s*2];
|
||||
aOutput[2*idx_d + 1] = aInputData[idx_s*2+1];
|
||||
}
|
||||
else if(threadIdx.x + offset< aDesColEleCount){
|
||||
aOutput[2*idx_d] = 0;
|
||||
aOutput[2*idx_d + 1] = 0;
|
||||
}
|
||||
else{
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
CudaMatrix Aurora::fft(const CudaMatrix &aMatrix, long aFFTSize){
|
||||
size_t ColEleCount = (aFFTSize>0)?aFFTSize:aMatrix.getDimSize(0);
|
||||
//实际需要copy赋值的非0值
|
||||
size_t needCopySize = (ColEleCount<aMatrix.getDimSize(0))?ColEleCount:aMatrix.getDimSize(0);
|
||||
size_t bufferSize = ColEleCount*aMatrix.getDimSize(1);
|
||||
float* data = nullptr;
|
||||
cudaMalloc((void**)&data, sizeof(float)*2*bufferSize);
|
||||
|
||||
complexFillKernel<<<aMatrix.getDimSize(1), 256>>>(aMatrix.getData(), data, needCopySize, aMatrix.getDimSize(0),ColEleCount);
|
||||
auto ret = Aurora::CudaMatrix::fromRawData(data,ColEleCount,aMatrix.getDimSize(1),1,Complex);
|
||||
ExecFFT(ret,0);
|
||||
return ret;
|
||||
}
|
||||
|
||||
CudaMatrix Aurora::ifft(const CudaMatrix &aMatrix, long aFFTSize){
|
||||
if (!aMatrix.isComplex()){
|
||||
std::cerr<<"ifft input must be complex value"<<std::endl;
|
||||
return CudaMatrix();
|
||||
}
|
||||
size_t ColEleCount = (aFFTSize>0)?aFFTSize:aMatrix.getDimSize(0);
|
||||
//实际需要copy赋值的非0值
|
||||
size_t needCopySize = (ColEleCount<aMatrix.getDimSize(0))?ColEleCount:aMatrix.getDimSize(0);
|
||||
size_t bufferSize = ColEleCount*aMatrix.getDimSize(1);
|
||||
float* data = nullptr;
|
||||
cudaMalloc((void**)&data, sizeof(float)*2*bufferSize);
|
||||
|
||||
complexCopyKernel<<<aMatrix.getDimSize(1), 256>>>(aMatrix.getData(), data, needCopySize, aMatrix.getDimSize(0),ColEleCount);
|
||||
auto ret = Aurora::CudaMatrix::fromRawData(data,ColEleCount,aMatrix.getDimSize(1),1,Complex);
|
||||
ExecFFT(ret,1);
|
||||
float colEleCountf = 1.f/ColEleCount;
|
||||
auto lambda = [=]__device__(const float& v){
|
||||
return v*colEleCountf;
|
||||
};
|
||||
thrust::transform(thrust::device,ret.getData(),ret.getData()+ret.getDataSize()*2,ret.getData(),lambda);
|
||||
return ret;
|
||||
}
|
||||
|
||||
__global__ void fftshiftSwapKernel(float* aData, unsigned int aColEleCount){
|
||||
unsigned int idx = blockIdx.x *aColEleCount + threadIdx.x;
|
||||
unsigned int stride = aColEleCount/2;
|
||||
for (int i = 0; i < stride; i+=blockDim.x)
|
||||
{
|
||||
if (threadIdx.x + i < stride)
|
||||
{
|
||||
float temp = aData[idx];
|
||||
aData[idx] = aData[idx+stride];
|
||||
aData[idx+stride] = temp;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void memcpyColKernel(float* aInputData,float* aOutputData, unsigned int aCopySize,
|
||||
unsigned int aSrcColEleCount,unsigned int aDesColEleCount){
|
||||
unsigned int idx = blockIdx.x *aSrcColEleCount + threadIdx.x;
|
||||
unsigned int idx2 = blockIdx.x *aDesColEleCount + threadIdx.x;
|
||||
for (int i = 0; i < aCopySize; i+=blockDim.x)
|
||||
{
|
||||
if (threadIdx.x + i < aCopySize)
|
||||
{
|
||||
aOutputData[idx2+i] = aInputData[idx+i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Aurora::fftshift(CudaMatrix &aMatrix){
|
||||
if (aMatrix.getDimSize(0) % 2 == 0) {
|
||||
fftshiftSwapKernel<<<aMatrix.getDimSize(1), 256>>>(
|
||||
aMatrix.getData(), aMatrix.getDimSize(0) * aMatrix.getValueType());
|
||||
} else {
|
||||
int copySize = aMatrix.getDimSize(0) / 2 + 1;
|
||||
float *data = nullptr;
|
||||
cudaMalloc((void **)&data, copySize * aMatrix.getValueType());
|
||||
memcpyColKernel<<<aMatrix.getDimSize(1), 256>>>(
|
||||
aMatrix.getData(), data, copySize * aMatrix.getValueType(),
|
||||
aMatrix.getDimSize(0) * aMatrix.getValueType(),
|
||||
copySize * aMatrix.getValueType());
|
||||
memcpyColKernel<<<aMatrix.getDimSize(1), 256>>>(
|
||||
aMatrix.getData() + copySize* aMatrix.getValueType(), aMatrix.getData(),
|
||||
(copySize - 1) * aMatrix.getValueType(),
|
||||
aMatrix.getDimSize(0) * aMatrix.getValueType(),
|
||||
aMatrix.getDimSize(0) * aMatrix.getValueType());
|
||||
memcpyColKernel<<<aMatrix.getDimSize(1), 256>>>(
|
||||
data, aMatrix.getData() + (copySize - 1) * aMatrix.getValueType(),
|
||||
copySize * aMatrix.getValueType(),
|
||||
copySize * aMatrix.getValueType(),
|
||||
aMatrix.getDimSize(0) * aMatrix.getValueType());
|
||||
cudaFree(data);
|
||||
}
|
||||
}
|
||||
|
||||
void Aurora::ifftshift(CudaMatrix &aMatrix){
|
||||
if (aMatrix.getDimSize(0) % 2 == 0) {
|
||||
fftshiftSwapKernel<<<aMatrix.getDimSize(1), 256>>>(
|
||||
aMatrix.getData(), aMatrix.getDimSize(0) * aMatrix.getValueType());
|
||||
} else {
|
||||
int copySize = aMatrix.getDimSize(0) / 2 + 1;
|
||||
float *data = nullptr;
|
||||
cudaMalloc((void **)&data, copySize * aMatrix.getValueType());
|
||||
memcpyColKernel<<<aMatrix.getDimSize(1), 256>>>(
|
||||
aMatrix.getData()+(copySize-1)* aMatrix.getValueType(),
|
||||
data, copySize * aMatrix.getValueType(),
|
||||
aMatrix.getDimSize(0) * aMatrix.getValueType(),
|
||||
copySize * aMatrix.getValueType());
|
||||
memcpyColKernel<<<aMatrix.getDimSize(1), 256>>>(
|
||||
aMatrix.getData(), aMatrix.getData() + (copySize) * aMatrix.getValueType(),
|
||||
(copySize-1) * aMatrix.getValueType(),
|
||||
aMatrix.getDimSize(0) * aMatrix.getValueType(),
|
||||
aMatrix.getDimSize(0) * aMatrix.getValueType());
|
||||
memcpyColKernel<<<aMatrix.getDimSize(1), 256>>>(
|
||||
data, aMatrix.getData(),
|
||||
copySize * aMatrix.getValueType(),
|
||||
copySize * aMatrix.getValueType(),
|
||||
aMatrix.getDimSize(0) * aMatrix.getValueType());
|
||||
cudaFree(data);
|
||||
}
|
||||
}
|
||||
@@ -25,6 +25,7 @@ namespace Aurora
|
||||
* @return CudaMatrix
|
||||
*/
|
||||
CudaMatrix mean(const CudaMatrix &aMatrix, FunctionDirection direction = Column);
|
||||
|
||||
CudaMatrix sort(const CudaMatrix &aMatrix,FunctionDirection direction = Column);
|
||||
CudaMatrix sort(CudaMatrix &&aMatrix,FunctionDirection direction = Column);
|
||||
|
||||
@@ -44,6 +45,11 @@ namespace Aurora
|
||||
|
||||
CudaMatrix inv(CudaMatrix &&aMatrix);
|
||||
|
||||
CudaMatrix fft(const CudaMatrix &aMatrix, long aFFTSize = -1);
|
||||
CudaMatrix ifft(const CudaMatrix &aMatrix, long aFFTSize = -1);
|
||||
|
||||
void fftshift(CudaMatrix &aMatrix);
|
||||
void ifftshift(CudaMatrix &aMatrix);
|
||||
}
|
||||
|
||||
#endif // __FUNCTION2D_CUDA_H__
|
||||
@@ -40,9 +40,9 @@ TEST_F(Function2D_Cuda_Test, min)
|
||||
dB = B.toDeviceMatrix();
|
||||
long r,c;
|
||||
|
||||
auto ret1 = Aurora::min(B, Aurora::FunctionDirection::Column,r,c);
|
||||
auto ret1 = Aurora::min(B, Aurora::Column,r,c);
|
||||
|
||||
auto ret2 = Aurora::min(dB, Aurora::FunctionDirection::Column,r,c);
|
||||
auto ret2 = Aurora::min(dB, Aurora::Column,r,c);
|
||||
|
||||
ASSERT_EQ(ret1.getDimSize(0),ret2.getDimSize(0));
|
||||
ASSERT_EQ(ret1.getDimSize(1),ret2.getDimSize(1));
|
||||
@@ -615,6 +615,120 @@ TEST_F(Function2D_Cuda_Test, sort)
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(Function2D_Cuda_Test, fft)
|
||||
{
|
||||
{
|
||||
float *input = new float[20]{1,1,0,2,2,0,1,1,0,2,1,1,0,2,2,0,1,1,0,2};
|
||||
auto ma = Aurora::Matrix::fromRawData(input,10,2).toDeviceMatrix();
|
||||
auto fftrett = Aurora::fft(ma);
|
||||
auto fftrettH = fftrett.toHostMatrix();
|
||||
std::complex<float>* result = (std::complex<float>*)fftrettH.getData();
|
||||
//检验fft结果与matlab是否对应
|
||||
EXPECT_FLOAT_EQ(0.0729, fourDecimalRound(result[1].real()));
|
||||
EXPECT_FLOAT_EQ(2.4899, fourDecimalRound(result[2].imag()));
|
||||
EXPECT_FLOAT_EQ(0.0729, fourDecimalRound(result[11].real()));
|
||||
EXPECT_FLOAT_EQ(2.4899, fourDecimalRound(result[12].imag()));
|
||||
//检验fft的结果是否共轭
|
||||
EXPECT_FLOAT_EQ(0, result[4].imag()+result[6].imag());
|
||||
EXPECT_FLOAT_EQ(0, result[4].real()-result[6].real());
|
||||
|
||||
|
||||
auto ret= Aurora::ifft(fftrett);
|
||||
auto retH = ret.toHostMatrix();
|
||||
std::complex<float>* ifftResult = (std::complex<float>*)retH.getData();
|
||||
EXPECT_FLOAT_EQ(fourDecimalRound(ifftResult[1].real()),1.0);
|
||||
EXPECT_FLOAT_EQ(fourDecimalRound(ifftResult[3].real()),2.0);
|
||||
EXPECT_FLOAT_EQ(fourDecimalRound(ifftResult[11].real()),1.0);
|
||||
EXPECT_FLOAT_EQ(fourDecimalRound(ifftResult[13].real()),2.0);
|
||||
// Aurora::fftshift(fftrett);
|
||||
// EXPECT_FLOAT_EQ(0.0729, fourDecimalRound(result[6].real()));
|
||||
// EXPECT_FLOAT_EQ(2.4899, fourDecimalRound(result[7].imag()));
|
||||
// EXPECT_FLOAT_EQ(-2., fourDecimalRound(result[10].real()));
|
||||
// EXPECT_FLOAT_EQ(-0.2245, fourDecimalRound(result[11].imag()));
|
||||
|
||||
auto fftrett2 = Aurora::fft(ma,5);
|
||||
auto fftrett2H =fftrett2.toHostMatrix();
|
||||
|
||||
auto result2 = (std::complex<float>*)fftrett2H.getData();
|
||||
//检验fft结果与matlab是否对应
|
||||
EXPECT_FLOAT_EQ(0.3090, fourDecimalRound(result2[1].real()));
|
||||
EXPECT_FLOAT_EQ(-1.3143, fourDecimalRound(result2[2].imag()));
|
||||
EXPECT_FLOAT_EQ(-0.8090, fourDecimalRound(result2[7].real()));
|
||||
EXPECT_FLOAT_EQ(1.3143, fourDecimalRound(result2[8].imag()));
|
||||
|
||||
auto fftrett3 = Aurora::fft(ma,12);
|
||||
auto fftrett3H = fftrett3.toHostMatrix();
|
||||
|
||||
auto result3 = (std::complex<float>*)fftrett3H.getData();
|
||||
//检验fft结果与matlab是否对应
|
||||
EXPECT_FLOAT_EQ(-1.0, fourDecimalRound(result3[1].real()));
|
||||
EXPECT_FLOAT_EQ(-3.4641, fourDecimalRound(result3[4].imag()));
|
||||
EXPECT_FLOAT_EQ(-1.0, fourDecimalRound(result3[13].real()));
|
||||
EXPECT_FLOAT_EQ(-3.4641, fourDecimalRound(result3[16].imag()));
|
||||
}
|
||||
{
|
||||
float *input = new float[20]{1,1,0,2,2,0,1,1,0,2,1,1,0,2,2,0,1,1,0,2};
|
||||
auto ma = Aurora::Matrix::fromRawData(input,10,2);
|
||||
auto maD = ma.toDeviceMatrix();
|
||||
auto ret1 = Aurora::ifft(Aurora::fft(ma),12);
|
||||
auto ret2 = Aurora::ifft(Aurora::fft(maD),12);
|
||||
for (size_t i = 0; i < ret1.getDataSize(); i++)
|
||||
{
|
||||
EXPECT_FLOAT_AE(ret1[i], ret2.getValue(i));
|
||||
}
|
||||
ret1 = Aurora::ifft(Aurora::fft(ma),8);
|
||||
ret2 = Aurora::ifft(Aurora::fft(maD),8);
|
||||
for (size_t i = 0; i < ret1.getDataSize(); i++)
|
||||
{
|
||||
EXPECT_FLOAT_AE(ret1[i], ret2.getValue(i));
|
||||
}
|
||||
|
||||
}
|
||||
{
|
||||
float * data = new float[250000];
|
||||
for (size_t i = 0; i < 250000; i++)
|
||||
{
|
||||
data[i] = i%500;
|
||||
}
|
||||
auto fm = Aurora::Matrix::fromRawData(data,500,250,1,Aurora::Complex);
|
||||
auto fmD = fm.toDeviceMatrix();
|
||||
Aurora::fftshift(fm);
|
||||
Aurora::fftshift(fmD);
|
||||
for (size_t i = 0; i < fm.getDataSize()*2; i++)
|
||||
{
|
||||
EXPECT_FLOAT_AE(fm[i], fmD.getValue(i));
|
||||
}
|
||||
Aurora::ifftshift(fm);
|
||||
Aurora::ifftshift(fmD);
|
||||
for (size_t i = 0; i < fm.getDataSize()*2; i++)
|
||||
{
|
||||
EXPECT_FLOAT_AE(fm[i], fmD.getValue(i));
|
||||
}
|
||||
}
|
||||
{
|
||||
float * data = new float[333*500];
|
||||
for (size_t i = 0; i < 333*500; i++)
|
||||
{
|
||||
data[i] = i%333;
|
||||
}
|
||||
auto fm = Aurora::Matrix::fromRawData(data,333,250,1,Aurora::Complex);
|
||||
auto fmD = fm.toDeviceMatrix();
|
||||
Aurora::fftshift(fm);
|
||||
Aurora::fftshift(fmD);
|
||||
for (size_t i = 0; i < fm.getDataSize()*2; i++)
|
||||
{
|
||||
ASSERT_FLOAT_EQ(fm[i], fmD.getValue(i))<<"index:"<<i;
|
||||
}
|
||||
Aurora::ifftshift(fm);
|
||||
Aurora::ifftshift(fmD);
|
||||
for (size_t i = 0; i < fm.getDataSize(); i++)
|
||||
{
|
||||
EXPECT_FLOAT_AE(fm[i], fmD.getValue(i));
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(Function2D_Cuda_Test, immse) {
|
||||
auto matrixHost1 = Aurora::Matrix::fromRawData(Aurora::random(10000), 50,200);
|
||||
auto matrixHost2 = Aurora::Matrix::fromRawData(Aurora::random(10000), 50,200);
|
||||
|
||||
Reference in New Issue
Block a user