Add cuda reshape, xcorr and unittest.
This commit is contained in:
@@ -1382,3 +1382,54 @@ CudaMatrix Aurora::intersect(const CudaMatrix& aMatrix1, const CudaMatrix& aMatr
|
|||||||
aIa = CudaMatrix::fromRawData(iaResult,size);
|
aIa = CudaMatrix::fromRawData(iaResult,size);
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
CudaMatrix Aurora::reshape(const CudaMatrix& aMatrix, int aRows, int aColumns, int aSlices)
|
||||||
|
{
|
||||||
|
if(aMatrix.isNull() || (aMatrix.getDataSize() != aRows * aColumns * aSlices))
|
||||||
|
{
|
||||||
|
std::cerr<<"reshape diffirent size with cudamatrix"<<std::endl;
|
||||||
|
return CudaMatrix();
|
||||||
|
}
|
||||||
|
return CudaMatrix::copyFromRawData(aMatrix.getData(),aRows,aColumns,aSlices);
|
||||||
|
}
|
||||||
|
|
||||||
|
__global__ void xcorrKernel(float* aInputData1, float* aInputData2,unsigned int aInputSize, float* aOutput, unsigned int aOutputSize)
|
||||||
|
{
|
||||||
|
unsigned int idx = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
|
if (idx < aOutputSize/2 + 1)
|
||||||
|
{
|
||||||
|
for(unsigned int i=0; i<=idx; ++i)
|
||||||
|
{
|
||||||
|
aOutput[idx] += aInputData1[i] * aInputData2[aInputSize - idx - 1 + i];
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (idx < aOutputSize)
|
||||||
|
{
|
||||||
|
for(int i=0; i<idx-aOutputSize/2; ++i)
|
||||||
|
{
|
||||||
|
aOutput[aOutputSize - idx + aOutputSize/2] += aInputData1[aInputSize + i - idx + aOutputSize/2] * aInputData2[i];
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
CudaMatrix Aurora::xcorr(const CudaMatrix& aMatrix1, const CudaMatrix& aMatrix2)
|
||||||
|
{
|
||||||
|
if (aMatrix1.isNull() || aMatrix2.isNull() || aMatrix1.getDataSize() != aMatrix2.getDataSize() || aMatrix1.isComplex() || aMatrix2.isComplex())
|
||||||
|
{
|
||||||
|
std::cerr<<"xcorr not surpport with diffirent input size or complex cudamatrix"<<std::endl;
|
||||||
|
return CudaMatrix();
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t size = aMatrix1.getDataSize() * 2 - 1;
|
||||||
|
float* data = nullptr;
|
||||||
|
cudaMalloc((void**)&data, sizeof(float) * size);
|
||||||
|
cudaMemset(data, 0.0, size);
|
||||||
|
int blocksPerGrid = (size + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK;
|
||||||
|
xcorrKernel<<<blocksPerGrid, THREADS_PER_BLOCK>>>(aMatrix1.getData(), aMatrix2.getData(), aMatrix1.getDataSize(), data, size);
|
||||||
|
cudaDeviceSynchronize();
|
||||||
|
|
||||||
|
return CudaMatrix::fromRawData(data, size);
|
||||||
|
}
|
||||||
|
|||||||
@@ -77,7 +77,13 @@ namespace Aurora
|
|||||||
|
|
||||||
CudaMatrix intersect(const CudaMatrix& aMatrix1, const CudaMatrix& aMatrix2);
|
CudaMatrix intersect(const CudaMatrix& aMatrix1, const CudaMatrix& aMatrix2);
|
||||||
|
|
||||||
CudaMatrix intersect(const CudaMatrix& aMatrix1, const CudaMatrix& aMatrix2, CudaMatrix& aIa); /**
|
CudaMatrix intersect(const CudaMatrix& aMatrix1, const CudaMatrix& aMatrix2, CudaMatrix& aIa);
|
||||||
|
|
||||||
|
CudaMatrix reshape(const CudaMatrix& aMatrix, int aRows, int aColumns, int aSlices);
|
||||||
|
|
||||||
|
CudaMatrix xcorr(const CudaMatrix& aMatrix1, const CudaMatrix& aMatrix2);
|
||||||
|
|
||||||
|
/**
|
||||||
* 将所有nan值设置为特定值
|
* 将所有nan值设置为特定值
|
||||||
* @attention 直接在原数据上进行修改!
|
* @attention 直接在原数据上进行修改!
|
||||||
* @param aMatrix 向量
|
* @param aMatrix 向量
|
||||||
|
|||||||
@@ -1065,3 +1065,41 @@ TEST_F(Function1D_Cuda_Test, intersect) {
|
|||||||
EXPECT_FLOAT_AE(iaHost.getData()[1],3);
|
EXPECT_FLOAT_AE(iaHost.getData()[1],3);
|
||||||
EXPECT_FLOAT_AE(iaHost.getData()[2],9);
|
EXPECT_FLOAT_AE(iaHost.getData()[2],9);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_F(Function1D_Cuda_Test, reshape) {
|
||||||
|
float* data = new float[9]{3,3,2,2,2,1,4,4,7};
|
||||||
|
auto matrixHost = Aurora::Matrix::fromRawData(data, 9,1,1);
|
||||||
|
auto matrixDevice = matrixHost.toDeviceMatrix();
|
||||||
|
auto result = Aurora::reshape(matrixDevice,3,3,1);
|
||||||
|
EXPECT_FLOAT_AE(result.getDimSize(0),3);
|
||||||
|
EXPECT_FLOAT_AE(result.getDimSize(1),3);
|
||||||
|
EXPECT_FLOAT_AE(result.getDimSize(2),1);
|
||||||
|
result = Aurora::reshape(matrixDevice,3,1,3);
|
||||||
|
EXPECT_FLOAT_AE(result.getDimSize(0),3);
|
||||||
|
EXPECT_FLOAT_AE(result.getDimSize(1),1);
|
||||||
|
EXPECT_FLOAT_AE(result.getDimSize(2),3);
|
||||||
|
result = Aurora::reshape(matrixDevice,1,3,3);
|
||||||
|
EXPECT_FLOAT_AE(result.getDimSize(0),1);
|
||||||
|
EXPECT_FLOAT_AE(result.getDimSize(1),3);
|
||||||
|
EXPECT_FLOAT_AE(result.getDimSize(2),3);
|
||||||
|
auto resultHost = result.toHostMatrix();
|
||||||
|
for(int i=0; i<9; ++i)
|
||||||
|
{
|
||||||
|
EXPECT_FLOAT_AE(resultHost[i], data[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(Function1D_Cuda_Test, xcorr) {
|
||||||
|
auto matrixHost1 = Aurora::Matrix::fromRawData(new float[4]{7,8,9,10}, 4,1,1);
|
||||||
|
auto matrixHost2 = Aurora::Matrix::fromRawData(new float[4]{2,3,6,7}, 4,1,1);
|
||||||
|
auto matrixDevice1 = matrixHost1.toDeviceMatrix();
|
||||||
|
auto matrixDevice2 = matrixHost2.toDeviceMatrix();
|
||||||
|
auto result = Aurora::xcorr(matrixDevice1,matrixDevice2).toHostMatrix();
|
||||||
|
EXPECT_FLOAT_AE(result[0], 49);
|
||||||
|
EXPECT_FLOAT_AE(result[1], 98);
|
||||||
|
EXPECT_FLOAT_AE(result[2], 132);
|
||||||
|
EXPECT_FLOAT_AE(result[3], 162);
|
||||||
|
EXPECT_FLOAT_AE(result[4], 103);
|
||||||
|
EXPECT_FLOAT_AE(result[5], 48);
|
||||||
|
EXPECT_FLOAT_AE(result[6], 20);
|
||||||
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user