From a65ee38196c5fa3a7c0c4b6f0c24004974d39c49 Mon Sep 17 00:00:00 2001 From: kradchen Date: Thu, 7 Dec 2023 15:46:36 +0800 Subject: [PATCH] Fix sort and min Unit test --- src/Function2D.cu | 13 ++++++++++--- test/Function2D_Cuda_Test.cpp | 14 +++++++------- 2 files changed, 17 insertions(+), 10 deletions(-) diff --git a/src/Function2D.cu b/src/Function2D.cu index 505a86e..5cf6a06 100644 --- a/src/Function2D.cu +++ b/src/Function2D.cu @@ -1,6 +1,7 @@ #include "AuroraDefs.h" #include "CudaMatrix.h" #include "Function1D.h" +#include "Function1D.cuh" #include "Matrix.h" #include #include @@ -272,8 +273,9 @@ __global__ void minColKernel(float* aInputData, float* aOutput, unsigned int aCo } // 规约最前面一段 for (int i = blockDim.x/2; i >0; i>>=1) { + if (threadIdx.x < i) { - shared_data[threadIdx.x] += fminf(shared_data[threadIdx.x], shared_data[threadIdx.x+i]); + shared_data[threadIdx.x] = fminf(shared_data[threadIdx.x], shared_data[i + threadIdx.x]); } __syncthreads(); } @@ -302,7 +304,7 @@ __global__ void minRowKernel(float* aInputData, float* aOutput,unsigned int aCol // 规约最前面一段 for (int i = blockDim.x/2; i >0; i>>=1) { if (threadIdx.x < i) { - shared_data[threadIdx.x] += fminf(shared_data[threadIdx.x], shared_data[threadIdx.x+i]); + shared_data[threadIdx.x] = fminf(shared_data[threadIdx.x], shared_data[threadIdx.x+i]); } __syncthreads(); } @@ -876,7 +878,12 @@ CudaMatrix Aurora::sort(CudaMatrix &&aMatrix,FunctionDirection direction) case Column: { int rowElementCount = aMatrix.getDimSize(1); - // softKernel<<>>(data,colElementCount); + for (size_t i = 0; i < rowElementCount; i++) + { + thrust::sort(thrust::device, data+i*colElementCount, + data+(i+1)*colElementCount); + } + return aMatrix; } default: diff --git a/test/Function2D_Cuda_Test.cpp b/test/Function2D_Cuda_Test.cpp index 110989f..27076bb 100644 --- a/test/Function2D_Cuda_Test.cpp +++ b/test/Function2D_Cuda_Test.cpp @@ -426,14 +426,14 @@ TEST_F(Function2D_Cuda_Test, sum) { // { - float *dataB = Aurora::random(4096*50000); - // float* dataB = new float[4096*50000]; - // for (size_t i = 0; i < 4096*50000; i++) - // { - // dataB[i] = (float)(i/4096); - // } + // float *dataB = Aurora::random(4096*50000); + float* dataB = new float[4096*5000]; + for (size_t i = 0; i < 4096*5000; i++) + { + dataB[i] = (i%2==0?1.0f:0.0f); + } - B = Aurora::Matrix::fromRawData(dataB, 4096, 50000); + B = Aurora::Matrix::fromRawData(dataB, 4096, 5000); // B = Aurora::Matrix::fromRawData(dataB, 200, 200); auto dD = B.toDeviceMatrix();