From 57fc2219ce0333bfdc0abc47523854eba934a0b1 Mon Sep 17 00:00:00 2001 From: sunwen Date: Fri, 8 Dec 2023 11:30:10 +0800 Subject: [PATCH] Add cuda inv and unittest. --- src/Function2D.cu | 60 +++++++++++++++++++++++++++++++++++ src/Function2D.cuh | 4 +++ test/Function2D_Cuda_Test.cpp | 12 +++++++ 3 files changed, 76 insertions(+) diff --git a/src/Function2D.cu b/src/Function2D.cu index c97c6f9..505a86e 100644 --- a/src/Function2D.cu +++ b/src/Function2D.cu @@ -5,6 +5,7 @@ #include #include #include +#include #include #include @@ -19,6 +20,8 @@ #include #include #include +#include +#include #include "Function1D.cuh" #include "Matrix.h" @@ -984,3 +987,60 @@ CudaMatrix Aurora::sortrows(const CudaMatrix &aMatrix, CudaMatrix& indexMatrix) return transpose(CudaMatrix::fromRawData(data, rows, columns)); } + +__global__ void invKernel(float** aInputPointer, float* aInputData, float** aOutputPointer, float* aOutputData) +{ + unsigned int idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx == 0) + { + aInputPointer[0] = aInputData; + aOutputPointer[0] = aOutputData; + } +} + +CudaMatrix Aurora::inv(const CudaMatrix &aMatrix) +{ + if (aMatrix.getDims() != 2) + { + std::cerr << "Fail! cuda inv args must be 2d matrix!"; + return aMatrix; + } + if (aMatrix.getDimSize(0) != aMatrix.getDimSize(1)) + { + std::cerr << "Fail! cuda inv args must be square matrix!"; + return aMatrix; + } + if (aMatrix.getValueType() != Normal) + { + std::cerr << "Fail! cuda inv args must be normal value type!"; + return aMatrix; + } + unsigned int n = aMatrix.getDimSize(0); + unsigned int size = aMatrix.getDataSize(); + cublasHandle_t handle; + cublasCreate(&handle); + float* data; + cudaMalloc((void**)&data, sizeof(float) * size); + + float** deviceInputPinter; + cudaMalloc((void**)&deviceInputPinter, sizeof(float *)); + + float **deviceOutputPointer; + cudaMalloc((void**)&deviceOutputPointer, sizeof(float *)); + + invKernel<<<1, 1>>>(deviceInputPinter, aMatrix.getData(), deviceOutputPointer, data); + cudaDeviceSynchronize(); + int* devicePivotArray; + cudaMalloc((void**)&devicePivotArray, n * sizeof(int)); + int* deviceInfoArray; + cudaMalloc((void**)&deviceInfoArray, sizeof(int)); + cublasSgetrfBatched(handle, n, deviceInputPinter, n, devicePivotArray, deviceInfoArray, 1); + cublasSgetriBatched(handle, n, deviceInputPinter, n, devicePivotArray, deviceOutputPointer, n, deviceInfoArray, 1); + cudaFree(devicePivotArray); + cudaFree(deviceInfoArray); + cudaFree(deviceOutputPointer); + cudaFree(deviceInputPinter); + cublasDestroy(handle); + + return CudaMatrix::fromRawData(data, n, n); +} diff --git a/src/Function2D.cuh b/src/Function2D.cuh index b855b3f..0501f03 100644 --- a/src/Function2D.cuh +++ b/src/Function2D.cuh @@ -40,6 +40,10 @@ namespace Aurora */ CudaMatrix sortrows(const CudaMatrix &aMatrix, CudaMatrix& indexMatrix); + CudaMatrix inv(const CudaMatrix &aMatrix); + + CudaMatrix inv(CudaMatrix &&aMatrix); + } #endif // __FUNCTION2D_CUDA_H__ \ No newline at end of file diff --git a/test/Function2D_Cuda_Test.cpp b/test/Function2D_Cuda_Test.cpp index 2801730..110989f 100644 --- a/test/Function2D_Cuda_Test.cpp +++ b/test/Function2D_Cuda_Test.cpp @@ -645,4 +645,16 @@ TEST_F(Function2D_Cuda_Test, sortRows) { { ASSERT_FLOAT_EQ(result3[i], result4[i]); } +} + +TEST_F(Function2D_Cuda_Test, inv) { + auto matrixHost = Aurora::Matrix::fromRawData(new float[16]{4,6,7,8,9,3,7,5,4,3,2,1,2,3,4,5}, 4,4); + auto matrixDevice = matrixHost.toDeviceMatrix(); + auto result1 = Aurora::inv(matrixHost); + auto result2 = Aurora::inv(matrixDevice).toHostMatrix(); + ASSERT_FLOAT_EQ(result1.getDataSize(), result2.getDataSize()); + for (size_t i = 0; i < result1.getDataSize(); i++) + { + EXPECT_FLOAT_AE(result1[i], result2[i]); + } } \ No newline at end of file