Add cuda inv and unittest.

This commit is contained in:
sunwen
2023-12-08 11:30:10 +08:00
parent 42ecc66106
commit 57fc2219ce
3 changed files with 76 additions and 0 deletions

View File

@@ -5,6 +5,7 @@
#include <Function2D.cuh>
#include <cfloat>
#include <cstddef>
#include <cstdio>
#include <iostream>
#include <cmath>
@@ -19,6 +20,8 @@
#include <thrust/functional.h>
#include <thrust/complex.h>
#include <cuda_runtime.h>
#include <cublas_v2.h>
#include <cusolverDn.h>
#include "Function1D.cuh"
#include "Matrix.h"
@@ -984,3 +987,60 @@ CudaMatrix Aurora::sortrows(const CudaMatrix &aMatrix, CudaMatrix& indexMatrix)
return transpose(CudaMatrix::fromRawData(data, rows, columns));
}
__global__ void invKernel(float** aInputPointer, float* aInputData, float** aOutputPointer, float* aOutputData)
{
unsigned int idx = blockIdx.x * blockDim.x + threadIdx.x;
if (idx == 0)
{
aInputPointer[0] = aInputData;
aOutputPointer[0] = aOutputData;
}
}
CudaMatrix Aurora::inv(const CudaMatrix &aMatrix)
{
if (aMatrix.getDims() != 2)
{
std::cerr << "Fail! cuda inv args must be 2d matrix!";
return aMatrix;
}
if (aMatrix.getDimSize(0) != aMatrix.getDimSize(1))
{
std::cerr << "Fail! cuda inv args must be square matrix!";
return aMatrix;
}
if (aMatrix.getValueType() != Normal)
{
std::cerr << "Fail! cuda inv args must be normal value type!";
return aMatrix;
}
unsigned int n = aMatrix.getDimSize(0);
unsigned int size = aMatrix.getDataSize();
cublasHandle_t handle;
cublasCreate(&handle);
float* data;
cudaMalloc((void**)&data, sizeof(float) * size);
float** deviceInputPinter;
cudaMalloc((void**)&deviceInputPinter, sizeof(float *));
float **deviceOutputPointer;
cudaMalloc((void**)&deviceOutputPointer, sizeof(float *));
invKernel<<<1, 1>>>(deviceInputPinter, aMatrix.getData(), deviceOutputPointer, data);
cudaDeviceSynchronize();
int* devicePivotArray;
cudaMalloc((void**)&devicePivotArray, n * sizeof(int));
int* deviceInfoArray;
cudaMalloc((void**)&deviceInfoArray, sizeof(int));
cublasSgetrfBatched(handle, n, deviceInputPinter, n, devicePivotArray, deviceInfoArray, 1);
cublasSgetriBatched(handle, n, deviceInputPinter, n, devicePivotArray, deviceOutputPointer, n, deviceInfoArray, 1);
cudaFree(devicePivotArray);
cudaFree(deviceInfoArray);
cudaFree(deviceOutputPointer);
cudaFree(deviceInputPinter);
cublasDestroy(handle);
return CudaMatrix::fromRawData(data, n, n);
}

View File

@@ -40,6 +40,10 @@ namespace Aurora
*/
CudaMatrix sortrows(const CudaMatrix &aMatrix, CudaMatrix& indexMatrix);
CudaMatrix inv(const CudaMatrix &aMatrix);
CudaMatrix inv(CudaMatrix &&aMatrix);
}
#endif // __FUNCTION2D_CUDA_H__

View File

@@ -646,3 +646,15 @@ TEST_F(Function2D_Cuda_Test, sortRows) {
ASSERT_FLOAT_EQ(result3[i], result4[i]);
}
}
TEST_F(Function2D_Cuda_Test, inv) {
auto matrixHost = Aurora::Matrix::fromRawData(new float[16]{4,6,7,8,9,3,7,5,4,3,2,1,2,3,4,5}, 4,4);
auto matrixDevice = matrixHost.toDeviceMatrix();
auto result1 = Aurora::inv(matrixHost);
auto result2 = Aurora::inv(matrixDevice).toHostMatrix();
ASSERT_FLOAT_EQ(result1.getDataSize(), result2.getDataSize());
for (size_t i = 0; i < result1.getDataSize(); i++)
{
EXPECT_FLOAT_AE(result1[i], result2[i]);
}
}