Add cuda sub2ind and unitest.
This commit is contained in:
@@ -1444,4 +1444,58 @@ void Aurora::ifftshift(CudaMatrix &aMatrix){
|
||||
aMatrix.getDimSize(0) * aMatrix.getValueType());
|
||||
cudaFree(data);
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void sub2indKernel(float* aVMatrixSize, float** aindexMatrix, float* aOutputData, unsigned int aRowSize, unsigned int aColumnSize)
|
||||
{
|
||||
unsigned int idx = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
if(idx < aRowSize)
|
||||
{
|
||||
aOutputData[idx] = 0;
|
||||
for(unsigned int i=aColumnSize; i>0; --i)
|
||||
{
|
||||
unsigned int subSize = 1;
|
||||
for(unsigned int j=0; j<i-1; ++j)
|
||||
{
|
||||
|
||||
subSize *= aVMatrixSize[j];
|
||||
}
|
||||
aOutputData[idx] += (aindexMatrix[i-1][idx] - 1) * subSize;
|
||||
}
|
||||
aOutputData[idx] += 1;
|
||||
}
|
||||
}
|
||||
|
||||
CudaMatrix Aurora::sub2ind(const CudaMatrix &aVMatrixSize, std::vector<CudaMatrix> aSliceIdxs)
|
||||
{
|
||||
if (aSliceIdxs.size() != aVMatrixSize.getDataSize())
|
||||
{
|
||||
std::cerr<<"cuda sub2ind size not match"<<std::endl;
|
||||
return CudaMatrix();
|
||||
}
|
||||
if (aSliceIdxs.size() == 0)
|
||||
{
|
||||
std::cerr<<"cuda sub2ind no index need calc!"<<std::endl;
|
||||
return CudaMatrix();
|
||||
}
|
||||
|
||||
unsigned int indexMatrixRows = aSliceIdxs.begin()->getDataSize();
|
||||
unsigned int indexMatrixColumns = aSliceIdxs.size();
|
||||
float** indexMatrixData = nullptr;
|
||||
float** tempPointer = new float*[indexMatrixColumns];
|
||||
cudaMalloc((void **)&indexMatrixData, sizeof(float*) * indexMatrixColumns);
|
||||
for(unsigned int i=0; i<indexMatrixColumns; ++i)
|
||||
{
|
||||
tempPointer[i] = aSliceIdxs[i].getData();
|
||||
}
|
||||
cudaMemcpy(indexMatrixData, tempPointer, sizeof(float*) * indexMatrixColumns, cudaMemcpyHostToDevice);
|
||||
|
||||
float* data = nullptr;
|
||||
cudaMalloc((void **)&data, sizeof(float) * indexMatrixRows);
|
||||
int blocksPerGrid = (indexMatrixRows + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK;
|
||||
sub2indKernel<<<blocksPerGrid, THREADS_PER_BLOCK>>>(aVMatrixSize.getData(), indexMatrixData, data, indexMatrixRows, indexMatrixColumns);
|
||||
cudaDeviceSynchronize();
|
||||
cudaFree(indexMatrixData);
|
||||
delete[] tempPointer;
|
||||
return CudaMatrix::fromRawData(data, indexMatrixRows);
|
||||
}
|
||||
Reference in New Issue
Block a user