Add cuda intersect and unittest.
This commit is contained in:
@@ -1253,3 +1253,60 @@ CudaMatrix Aurora::auroraUnion(const CudaMatrix& aMatrix1, const CudaMatrix& aMa
|
|||||||
|
|
||||||
return CudaMatrix::fromRawData(data, endPointer - data);
|
return CudaMatrix::fromRawData(data, endPointer - data);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
CudaMatrix Aurora::intersect(const CudaMatrix& aMatrix1, const CudaMatrix& aMatrix2)
|
||||||
|
{
|
||||||
|
if(aMatrix1.isNull() || aMatrix2.isNull() || aMatrix1.isComplex() || aMatrix2.isComplex())
|
||||||
|
{
|
||||||
|
std::cerr<<"intersect not support complex cudamatrix"<<std::endl;
|
||||||
|
return CudaMatrix();
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t size1= aMatrix1.getDataSize();
|
||||||
|
size_t size2= aMatrix2.getDataSize();
|
||||||
|
float* data = nullptr;
|
||||||
|
cudaMalloc((void**)&data, sizeof(float) * (size1 + size2));
|
||||||
|
cudaMemcpy(data, aMatrix1.getData(), sizeof(float) * size1, cudaMemcpyDeviceToDevice);
|
||||||
|
cudaMemcpy(data + size1, aMatrix2.getData(), sizeof(float) * size2, cudaMemcpyDeviceToDevice);
|
||||||
|
thrust::sort(thrust::device, data, data+size1);
|
||||||
|
thrust::sort(thrust::device, data+size1, data+size1+size2);
|
||||||
|
float* end = thrust::set_intersection(thrust::device, data, data+size1,data+size1, data+size1+size2,data);
|
||||||
|
|
||||||
|
return CudaMatrix::fromRawData(data, end - data);
|
||||||
|
}
|
||||||
|
|
||||||
|
__global__ void intersectKernel(float* aMatrixData, float* aIntersectData, unsigned int aMatrixDataSize, float* aOutputData, unsigned int aOutputDataSize)
|
||||||
|
{
|
||||||
|
unsigned int idx = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
|
if (idx < aOutputDataSize)
|
||||||
|
{
|
||||||
|
for(unsigned int i=0; i<aMatrixDataSize; ++i)
|
||||||
|
{
|
||||||
|
if(aMatrixData[i] == aIntersectData[idx])
|
||||||
|
{
|
||||||
|
aOutputData[idx] = i+1;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
CudaMatrix Aurora::intersect(const CudaMatrix& aMatrix1, const CudaMatrix& aMatrix2, CudaMatrix& aIa)
|
||||||
|
{
|
||||||
|
if(aMatrix1.isNull() || aMatrix2.isNull() || aMatrix1.isComplex() || aMatrix2.isComplex())
|
||||||
|
{
|
||||||
|
std::cerr<<"intersect not support complex cudamatrix"<<std::endl;
|
||||||
|
return CudaMatrix();
|
||||||
|
}
|
||||||
|
CudaMatrix result = intersect(aMatrix1,aMatrix2);
|
||||||
|
|
||||||
|
size_t size = result.getDataSize();
|
||||||
|
float* iaResult = nullptr;
|
||||||
|
cudaMalloc((void**)&iaResult, sizeof(float) * size);
|
||||||
|
int blocksPerGrid = (size + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK;
|
||||||
|
intersectKernel<<<blocksPerGrid, THREADS_PER_BLOCK>>>(aMatrix1.getData(), result.getData(), aMatrix1.getDataSize(), iaResult, size);
|
||||||
|
cudaDeviceSynchronize();
|
||||||
|
|
||||||
|
aIa = CudaMatrix::fromRawData(iaResult,size);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|||||||
@@ -75,6 +75,10 @@ namespace Aurora
|
|||||||
|
|
||||||
CudaMatrix auroraUnion(const CudaMatrix& aMatrix1, const CudaMatrix& aMatrix2);
|
CudaMatrix auroraUnion(const CudaMatrix& aMatrix1, const CudaMatrix& aMatrix2);
|
||||||
|
|
||||||
|
CudaMatrix intersect(const CudaMatrix& aMatrix1, const CudaMatrix& aMatrix2);
|
||||||
|
|
||||||
|
CudaMatrix intersect(const CudaMatrix& aMatrix1, const CudaMatrix& aMatrix2, CudaMatrix& aIa);
|
||||||
|
|
||||||
// ------compareSet----------------------------------------------------
|
// ------compareSet----------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -965,3 +965,25 @@ TEST_F(Function1D_Cuda_Test, auroraUnion) {
|
|||||||
EXPECT_FLOAT_AE(result1[i], result2[i]);
|
EXPECT_FLOAT_AE(result1[i], result2[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_F(Function1D_Cuda_Test, intersect) {
|
||||||
|
float* data1 = new float[9]{3,3,2,2,2,1,4,4,7};
|
||||||
|
auto matrix1 = Aurora::Matrix::fromRawData(data1, 9,1,1).toDeviceMatrix();
|
||||||
|
float* data2 = new float[8]{6,6,7,7,8,1,2};
|
||||||
|
auto matrix2 = Aurora::Matrix::fromRawData(data2, 7,1,1).toDeviceMatrix();
|
||||||
|
|
||||||
|
auto result = Aurora::intersect(matrix1, matrix2).toHostMatrix();
|
||||||
|
EXPECT_FLOAT_AE(result.getData()[0],1);
|
||||||
|
EXPECT_FLOAT_AE(result.getData()[1],2);
|
||||||
|
EXPECT_FLOAT_AE(result.getData()[2],7);
|
||||||
|
|
||||||
|
Aurora::CudaMatrix ia;
|
||||||
|
result = Aurora::intersect(matrix1, matrix2, ia).toHostMatrix();
|
||||||
|
auto iaHost = ia.toHostMatrix();
|
||||||
|
EXPECT_FLOAT_AE(result.getData()[0],1);
|
||||||
|
EXPECT_FLOAT_AE(result.getData()[1],2);
|
||||||
|
EXPECT_FLOAT_AE(result.getData()[2],7);
|
||||||
|
EXPECT_FLOAT_AE(iaHost.getData()[0],6);
|
||||||
|
EXPECT_FLOAT_AE(iaHost.getData()[1],3);
|
||||||
|
EXPECT_FLOAT_AE(iaHost.getData()[2],9);
|
||||||
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user