Add cuda immse, sortrows and unitest.
This commit is contained in:
@@ -19,8 +19,16 @@
|
||||
#include <thrust/functional.h>
|
||||
#include <thrust/complex.h>
|
||||
#include <cuda_runtime.h>
|
||||
#include "Function1D.cuh"
|
||||
#include "Matrix.h"
|
||||
|
||||
using namespace Aurora;
|
||||
|
||||
namespace
|
||||
{
|
||||
const int THREADS_PER_BLOCK = 256;
|
||||
}
|
||||
|
||||
|
||||
__global__ void maxColKernel(float* aInputData, float* aOutput, unsigned int aColSize)
|
||||
{
|
||||
@@ -878,4 +886,101 @@ CudaMatrix Aurora::sort(CudaMatrix &&aMatrix,FunctionDirection direction)
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void immseKernel(float* aInputData1, float* aInputData2, float* aOutputData, unsigned int aInputSize)
|
||||
{
|
||||
unsigned int idx = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
if (idx < aInputSize)
|
||||
{
|
||||
aOutputData[idx] = powf(aInputData1[idx] - aInputData2[idx], 2);
|
||||
}
|
||||
}
|
||||
|
||||
float Aurora::immse(const CudaMatrix &aImageA, const CudaMatrix &aImageB)
|
||||
{
|
||||
if (aImageA.getDims()!=2|| aImageB.getDims()!=2)
|
||||
{
|
||||
std::cerr<<"Fail! cuda immse args must all 2d matrix!";
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
if (!aImageB.compareShape(aImageA))
|
||||
{
|
||||
std::cerr<<"Fail! cuda immse args must be same shape!";
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
if (aImageA.getValueType() != Normal || aImageB.getValueType() != Normal)
|
||||
{
|
||||
std::cerr << "Fail! cuda immse args must be normal value type!";
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
unsigned int size = aImageA.getDataSize();
|
||||
float* data = nullptr;
|
||||
cudaMalloc((void**)&data, sizeof(float) * size);
|
||||
int blocksPerGrid = (size + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK;
|
||||
immseKernel<<<blocksPerGrid, THREADS_PER_BLOCK>>>(aImageA.getData(), aImageB.getData(), data, size);
|
||||
cudaDeviceSynchronize();
|
||||
float result = thrust::reduce(thrust::device, data, data+size, 0.0, thrust::plus<float>()) / size;
|
||||
cudaFree(data);
|
||||
return result;
|
||||
}
|
||||
|
||||
struct compareMatrixByRows
|
||||
{
|
||||
compareMatrixByRows(unsigned int aSize)
|
||||
: mSize(aSize)
|
||||
{
|
||||
};
|
||||
unsigned int mSize;
|
||||
__host__ __device__
|
||||
bool operator()(const float* aVector1, const float* aVector2) const
|
||||
{
|
||||
for(unsigned int i=0; i<mSize; ++i)
|
||||
{
|
||||
if(aVector1[i] < aVector2[i])
|
||||
{
|
||||
return true;
|
||||
}
|
||||
else if(aVector1[i] > aVector2[i])
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
CudaMatrix Aurora::sortrows(const CudaMatrix &aMatrix, CudaMatrix& indexMatrix)
|
||||
{
|
||||
CudaMatrix transposeMatrix = transpose(aMatrix);
|
||||
size_t rows = transposeMatrix.getDimSize(0);
|
||||
size_t columns = transposeMatrix.getDimSize(1);
|
||||
thrust::device_vector<float*> vector(columns);
|
||||
for(unsigned int i=0; i<columns; ++i)
|
||||
{
|
||||
vector[i] = transposeMatrix.getData() + i*rows;
|
||||
}
|
||||
thrust::device_vector<float*> vectorBack = vector;
|
||||
thrust::sort(thrust::device, vector.begin(), vector.end(), compareMatrixByRows(rows));
|
||||
|
||||
float* data = nullptr;
|
||||
float* indexResult = new float[columns];
|
||||
cudaMalloc((void**)&data, sizeof(float) * rows * columns);
|
||||
for(unsigned int i=0; i<columns; ++i)
|
||||
{
|
||||
cudaMemcpy(data + i*rows, vector[i], sizeof(float) * rows, cudaMemcpyDeviceToDevice);
|
||||
}
|
||||
|
||||
for(unsigned int i=0; i<columns; ++i)
|
||||
{
|
||||
auto index = thrust::find(thrust::device, vectorBack.begin(), vectorBack.end(), vector[i]);
|
||||
indexResult[i] = index - vectorBack.begin();
|
||||
}
|
||||
|
||||
indexMatrix = Aurora::Matrix::fromRawData(indexResult, columns).toDeviceMatrix();
|
||||
|
||||
return transpose(CudaMatrix::fromRawData(data, rows, columns));
|
||||
}
|
||||
|
||||
@@ -28,6 +28,18 @@ namespace Aurora
|
||||
CudaMatrix sort(const CudaMatrix &aMatrix,FunctionDirection direction = Column);
|
||||
CudaMatrix sort(CudaMatrix &&aMatrix,FunctionDirection direction = Column);
|
||||
|
||||
float immse(const CudaMatrix &aImageA, const CudaMatrix &aImageB);
|
||||
|
||||
/**
|
||||
* 基于第一列中的元素按升序对矩阵行进行排序。
|
||||
* 当第一列包含重复的元素时,sortrows 会根据下一列中的值进行排序,并对后续的相等值重复此行为。
|
||||
* @attention 目前不支持三维,不支持复数
|
||||
* @param aMatrix 目标矩阵
|
||||
* @param indexMatrix 排序后各行的原索引矩阵指针,非必须
|
||||
* @return 排序后矩阵
|
||||
*/
|
||||
CudaMatrix sortrows(const CudaMatrix &aMatrix, CudaMatrix& indexMatrix);
|
||||
|
||||
}
|
||||
|
||||
#endif // __FUNCTION2D_CUDA_H__
|
||||
@@ -613,4 +613,36 @@ TEST_F(Function2D_Cuda_Test, sort)
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(Function2D_Cuda_Test, immse) {
|
||||
auto matrixHost1 = Aurora::Matrix::fromRawData(Aurora::random(10000), 50,200);
|
||||
auto matrixHost2 = Aurora::Matrix::fromRawData(Aurora::random(10000), 50,200);
|
||||
auto matrixDevice1 = matrixHost1.toDeviceMatrix();
|
||||
auto matrixDevice2 = matrixHost1.toDeviceMatrix();
|
||||
auto result1 = Aurora::immse(matrixHost1, matrixHost2);
|
||||
auto result2 = Aurora::immse(matrixDevice1, matrixDevice2);
|
||||
EXPECT_FLOAT_AE(result1, result2);
|
||||
}
|
||||
|
||||
TEST_F(Function2D_Cuda_Test, sortRows) {
|
||||
auto matrixHost1 = Aurora::Matrix::fromRawData(Aurora::random(10000), 50,200);
|
||||
Aurora::Matrix matrixHost2;
|
||||
auto matrixDevice1 = matrixHost1.toDeviceMatrix();
|
||||
Aurora::CudaMatrix matrixDevice2;
|
||||
auto result1 = Aurora::sortrows(matrixHost1, &matrixHost2);
|
||||
auto result2 = Aurora::sortrows(matrixDevice1, matrixDevice2).toHostMatrix();
|
||||
auto result3 = matrixHost2;
|
||||
auto result4 = matrixDevice2.toHostMatrix();
|
||||
ASSERT_FLOAT_EQ(result1.getDataSize(), result2.getDataSize());
|
||||
for (size_t i = 0; i < result1.getDataSize(); i++)
|
||||
{
|
||||
ASSERT_FLOAT_EQ(result1[i], result2[i]);
|
||||
}
|
||||
|
||||
ASSERT_FLOAT_EQ(result3.getDataSize(), result4.getDataSize());
|
||||
for (size_t i = 0; i < result3.getDataSize(); i++)
|
||||
{
|
||||
ASSERT_FLOAT_EQ(result3[i], result4[i]);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user