Add cuda sub2ind and unitest.
This commit is contained in:
@@ -1444,4 +1444,58 @@ void Aurora::ifftshift(CudaMatrix &aMatrix){
|
|||||||
aMatrix.getDimSize(0) * aMatrix.getValueType());
|
aMatrix.getDimSize(0) * aMatrix.getValueType());
|
||||||
cudaFree(data);
|
cudaFree(data);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
__global__ void sub2indKernel(float* aVMatrixSize, float** aindexMatrix, float* aOutputData, unsigned int aRowSize, unsigned int aColumnSize)
|
||||||
|
{
|
||||||
|
unsigned int idx = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
|
if(idx < aRowSize)
|
||||||
|
{
|
||||||
|
aOutputData[idx] = 0;
|
||||||
|
for(unsigned int i=aColumnSize; i>0; --i)
|
||||||
|
{
|
||||||
|
unsigned int subSize = 1;
|
||||||
|
for(unsigned int j=0; j<i-1; ++j)
|
||||||
|
{
|
||||||
|
|
||||||
|
subSize *= aVMatrixSize[j];
|
||||||
|
}
|
||||||
|
aOutputData[idx] += (aindexMatrix[i-1][idx] - 1) * subSize;
|
||||||
|
}
|
||||||
|
aOutputData[idx] += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
CudaMatrix Aurora::sub2ind(const CudaMatrix &aVMatrixSize, std::vector<CudaMatrix> aSliceIdxs)
|
||||||
|
{
|
||||||
|
if (aSliceIdxs.size() != aVMatrixSize.getDataSize())
|
||||||
|
{
|
||||||
|
std::cerr<<"cuda sub2ind size not match"<<std::endl;
|
||||||
|
return CudaMatrix();
|
||||||
|
}
|
||||||
|
if (aSliceIdxs.size() == 0)
|
||||||
|
{
|
||||||
|
std::cerr<<"cuda sub2ind no index need calc!"<<std::endl;
|
||||||
|
return CudaMatrix();
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned int indexMatrixRows = aSliceIdxs.begin()->getDataSize();
|
||||||
|
unsigned int indexMatrixColumns = aSliceIdxs.size();
|
||||||
|
float** indexMatrixData = nullptr;
|
||||||
|
float** tempPointer = new float*[indexMatrixColumns];
|
||||||
|
cudaMalloc((void **)&indexMatrixData, sizeof(float*) * indexMatrixColumns);
|
||||||
|
for(unsigned int i=0; i<indexMatrixColumns; ++i)
|
||||||
|
{
|
||||||
|
tempPointer[i] = aSliceIdxs[i].getData();
|
||||||
|
}
|
||||||
|
cudaMemcpy(indexMatrixData, tempPointer, sizeof(float*) * indexMatrixColumns, cudaMemcpyHostToDevice);
|
||||||
|
|
||||||
|
float* data = nullptr;
|
||||||
|
cudaMalloc((void **)&data, sizeof(float) * indexMatrixRows);
|
||||||
|
int blocksPerGrid = (indexMatrixRows + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK;
|
||||||
|
sub2indKernel<<<blocksPerGrid, THREADS_PER_BLOCK>>>(aVMatrixSize.getData(), indexMatrixData, data, indexMatrixRows, indexMatrixColumns);
|
||||||
|
cudaDeviceSynchronize();
|
||||||
|
cudaFree(indexMatrixData);
|
||||||
|
delete[] tempPointer;
|
||||||
|
return CudaMatrix::fromRawData(data, indexMatrixRows);
|
||||||
}
|
}
|
||||||
@@ -59,6 +59,16 @@ namespace Aurora
|
|||||||
|
|
||||||
void fftshift(CudaMatrix &aMatrix);
|
void fftshift(CudaMatrix &aMatrix);
|
||||||
void ifftshift(CudaMatrix &aMatrix);
|
void ifftshift(CudaMatrix &aMatrix);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 转换下标为索引值
|
||||||
|
* @attention 索引值按照其实为1与matlab对应,在C++中使用需要-1
|
||||||
|
* @param aVMatrixSize
|
||||||
|
* @param aSliceIdxs
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
CudaMatrix sub2ind(const CudaMatrix &aVMatrixSize, std::vector<CudaMatrix> aSliceIdxs);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif // __FUNCTION2D_CUDA_H__
|
#endif // __FUNCTION2D_CUDA_H__
|
||||||
@@ -843,8 +843,8 @@ TEST_F(Function2D_Cuda_Test, prod) {
|
|||||||
EXPECT_FLOAT_AE(result1[i], result2[i]);
|
EXPECT_FLOAT_AE(result1[i], result2[i]);
|
||||||
}
|
}
|
||||||
|
|
||||||
auto matrixHostComplex = Aurora::Matrix::fromRawData(new float[40], 4,5, 1,Aurora::Complex);
|
auto matrixHostComplex = Aurora::Matrix::fromRawData(new float[20], 2,5, 1,Aurora::Complex);
|
||||||
for(unsigned int i=0; i<40;++i)
|
for(unsigned int i=0; i<20;++i)
|
||||||
{
|
{
|
||||||
matrixHost[i] = i + 1;
|
matrixHost[i] = i + 1;
|
||||||
}
|
}
|
||||||
@@ -858,4 +858,22 @@ TEST_F(Function2D_Cuda_Test, prod) {
|
|||||||
{
|
{
|
||||||
EXPECT_FLOAT_AE(result1[i], result2[i]);
|
EXPECT_FLOAT_AE(result1[i], result2[i]);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(Function2D_Cuda_Test, sub2ind) {
|
||||||
|
float* dI1= new float[4]{1,2,1,2};
|
||||||
|
Aurora::Matrix I1(std::shared_ptr<float>(dI1,std::default_delete<float[]>()),std::vector<int>{4});
|
||||||
|
float* dI2= new float[4]{2,2,1,1};
|
||||||
|
Aurora::Matrix I2(std::shared_ptr<float>(dI2,std::default_delete<float[]>()),std::vector<int>{4});
|
||||||
|
float* dI4= new float[4]{1,1,2,2};
|
||||||
|
Aurora::Matrix I3(std::shared_ptr<float>(dI4,std::default_delete<float[]>()),std::vector<int>{4});
|
||||||
|
float* dsz= new float[3]{2,2,2};
|
||||||
|
Aurora::Matrix sz(std::shared_ptr<float>(dsz,std::default_delete<float[]>()),std::vector<int>{3});
|
||||||
|
auto result1 = Aurora::sub2ind(sz, {I1, I2, I3});
|
||||||
|
auto result2 = Aurora::sub2ind(sz.toDeviceMatrix(), {I1.toDeviceMatrix(), I2.toDeviceMatrix(), I3.toDeviceMatrix()}).toHostMatrix();
|
||||||
|
EXPECT_FLOAT_EQ(result1.getDataSize(), result2.getDataSize());
|
||||||
|
for(unsigned int i=0; i<result1.getDataSize(); ++i)
|
||||||
|
{
|
||||||
|
EXPECT_FLOAT_EQ(result1[i], result2[i]);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
Reference in New Issue
Block a user