Add cuda linspace and unittest.
This commit is contained in:
@@ -1212,3 +1212,23 @@ CudaMatrix Aurora::vecnorm(const CudaMatrix& aMatrix, NormMethod aNormMethod, in
|
||||
cudaDeviceSynchronize();
|
||||
return Aurora::CudaMatrix::fromRawData(data,column);
|
||||
}
|
||||
|
||||
__global__ void linspaceKernel(float* aOutput, unsigned int aOutputSize, float aStartNum, float aStepNum)
|
||||
{
|
||||
unsigned int idx = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
if (idx < aOutputSize)
|
||||
{
|
||||
aOutput[idx] = aStartNum + idx * aStepNum;
|
||||
}
|
||||
}
|
||||
|
||||
CudaMatrix Aurora::linspaceCuda(float aStart, float aEnd, int aNum)
|
||||
{
|
||||
float step = (aEnd - aStart) / (aNum - 1);
|
||||
float* data = nullptr;
|
||||
cudaMalloc((void**)&data, sizeof(float) * aNum);
|
||||
int blocksPerGrid = (aNum + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK;
|
||||
linspaceKernel<<<blocksPerGrid, THREADS_PER_BLOCK>>>(data, aNum, aStart, step);
|
||||
cudaDeviceSynchronize();
|
||||
return Aurora::CudaMatrix::fromRawData(data,aNum);
|
||||
}
|
||||
|
||||
@@ -71,6 +71,8 @@ namespace Aurora
|
||||
|
||||
CudaMatrix vecnorm(const CudaMatrix& aMatrix, NormMethod aNormMethod, int aDim);
|
||||
|
||||
CudaMatrix linspaceCuda(float aStart, float aEnd, int aNum);
|
||||
|
||||
// ------compareSet----------------------------------------------------
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user