Add cuda linspace and unittest.

This commit is contained in:
sunwen
2023-12-01 16:15:51 +08:00
parent 7db741502e
commit 65c78cd878
3 changed files with 32 additions and 0 deletions

View File

@@ -1212,3 +1212,23 @@ CudaMatrix Aurora::vecnorm(const CudaMatrix& aMatrix, NormMethod aNormMethod, in
cudaDeviceSynchronize();
return Aurora::CudaMatrix::fromRawData(data,column);
}
__global__ void linspaceKernel(float* aOutput, unsigned int aOutputSize, float aStartNum, float aStepNum)
{
unsigned int idx = blockIdx.x * blockDim.x + threadIdx.x;
if (idx < aOutputSize)
{
aOutput[idx] = aStartNum + idx * aStepNum;
}
}
CudaMatrix Aurora::linspaceCuda(float aStart, float aEnd, int aNum)
{
float step = (aEnd - aStart) / (aNum - 1);
float* data = nullptr;
cudaMalloc((void**)&data, sizeof(float) * aNum);
int blocksPerGrid = (aNum + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK;
linspaceKernel<<<blocksPerGrid, THREADS_PER_BLOCK>>>(data, aNum, aStart, step);
cudaDeviceSynchronize();
return Aurora::CudaMatrix::fromRawData(data,aNum);
}

View File

@@ -71,6 +71,8 @@ namespace Aurora
CudaMatrix vecnorm(const CudaMatrix& aMatrix, NormMethod aNormMethod, int aDim);
CudaMatrix linspaceCuda(float aStart, float aEnd, int aNum);
// ------compareSet----------------------------------------------------