diff --git a/src/Function1D.cu b/src/Function1D.cu index 13a1af0..df54c33 100644 --- a/src/Function1D.cu +++ b/src/Function1D.cu @@ -1212,3 +1212,23 @@ CudaMatrix Aurora::vecnorm(const CudaMatrix& aMatrix, NormMethod aNormMethod, in cudaDeviceSynchronize(); return Aurora::CudaMatrix::fromRawData(data,column); } + +__global__ void linspaceKernel(float* aOutput, unsigned int aOutputSize, float aStartNum, float aStepNum) +{ + unsigned int idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx < aOutputSize) + { + aOutput[idx] = aStartNum + idx * aStepNum; + } +} + +CudaMatrix Aurora::linspaceCuda(float aStart, float aEnd, int aNum) +{ + float step = (aEnd - aStart) / (aNum - 1); + float* data = nullptr; + cudaMalloc((void**)&data, sizeof(float) * aNum); + int blocksPerGrid = (aNum + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK; + linspaceKernel<<>>(data, aNum, aStart, step); + cudaDeviceSynchronize(); + return Aurora::CudaMatrix::fromRawData(data,aNum); +} diff --git a/src/Function1D.cuh b/src/Function1D.cuh index 1ee223c..491471c 100644 --- a/src/Function1D.cuh +++ b/src/Function1D.cuh @@ -71,6 +71,8 @@ namespace Aurora CudaMatrix vecnorm(const CudaMatrix& aMatrix, NormMethod aNormMethod, int aDim); + CudaMatrix linspaceCuda(float aStart, float aEnd, int aNum); + // ------compareSet---------------------------------------------------- diff --git a/test/Function1D_Cuda_Test.cpp b/test/Function1D_Cuda_Test.cpp index 10bb610..07824b8 100644 --- a/test/Function1D_Cuda_Test.cpp +++ b/test/Function1D_Cuda_Test.cpp @@ -940,3 +940,13 @@ TEST_F(Function1D_Cuda_Test, vecnorm) { EXPECT_FLOAT_AE(result.getData()[0],9.5394); EXPECT_FLOAT_AE(result.getData()[1],43.3474); } + +TEST_F(Function1D_Cuda_Test, linspace) { + auto result1 = Aurora::linspace(-5,5,7); + auto result2 = Aurora::linspaceCuda(-5,5,7).toHostMatrix(); + EXPECT_FLOAT_EQ(result1.getDataSize(), result2.getDataSize()); + for(int i=0; i