Add cuda linspace and unittest.

This commit is contained in:
sunwen
2023-12-01 16:15:51 +08:00
parent 7db741502e
commit 65c78cd878
3 changed files with 32 additions and 0 deletions

View File

@@ -1212,3 +1212,23 @@ CudaMatrix Aurora::vecnorm(const CudaMatrix& aMatrix, NormMethod aNormMethod, in
cudaDeviceSynchronize();
return Aurora::CudaMatrix::fromRawData(data,column);
}
__global__ void linspaceKernel(float* aOutput, unsigned int aOutputSize, float aStartNum, float aStepNum)
{
unsigned int idx = blockIdx.x * blockDim.x + threadIdx.x;
if (idx < aOutputSize)
{
aOutput[idx] = aStartNum + idx * aStepNum;
}
}
CudaMatrix Aurora::linspaceCuda(float aStart, float aEnd, int aNum)
{
float step = (aEnd - aStart) / (aNum - 1);
float* data = nullptr;
cudaMalloc((void**)&data, sizeof(float) * aNum);
int blocksPerGrid = (aNum + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK;
linspaceKernel<<<blocksPerGrid, THREADS_PER_BLOCK>>>(data, aNum, aStart, step);
cudaDeviceSynchronize();
return Aurora::CudaMatrix::fromRawData(data,aNum);
}

View File

@@ -71,6 +71,8 @@ namespace Aurora
CudaMatrix vecnorm(const CudaMatrix& aMatrix, NormMethod aNormMethod, int aDim);
CudaMatrix linspaceCuda(float aStart, float aEnd, int aNum);
// ------compareSet----------------------------------------------------

View File

@@ -940,3 +940,13 @@ TEST_F(Function1D_Cuda_Test, vecnorm) {
EXPECT_FLOAT_AE(result.getData()[0],9.5394);
EXPECT_FLOAT_AE(result.getData()[1],43.3474);
}
TEST_F(Function1D_Cuda_Test, linspace) {
auto result1 = Aurora::linspace(-5,5,7);
auto result2 = Aurora::linspaceCuda(-5,5,7).toHostMatrix();
EXPECT_FLOAT_EQ(result1.getDataSize(), result2.getDataSize());
for(int i=0; i<result1.getDataSize(); ++i)
{
EXPECT_FLOAT_AE(result1[i], result2[i]);
}
}