feat: Add interp3 in Function3D.

This commit is contained in:
sunwen
2025-06-27 11:37:53 +08:00
parent 9dd7d97237
commit e36ca5c82f
2 changed files with 95 additions and 0 deletions

82
src/Function3D.cu Normal file
View File

@@ -0,0 +1,82 @@
#include "Function3D.cuh"
using namespace Aurora;
__global__ void interp3Kernel(cudaTextureObject_t aTexObj, float* aOutputData, float aStartX, float aDx, float aEndX, float aStartY, float aDy
, float aEndY, float aStartZ, float aDz, float aEndZ, float* aNewX, float* aNewY, float* aNewZ
, int aOutputRowSize, int aOutputColumnSize, int aOutputSliceSize, float aOutValue)
{
int xIndex = blockIdx.x * blockDim.x + threadIdx.x;
int yIndex = blockIdx.y * blockDim.y + threadIdx.y;
int zIndex = blockIdx.z * blockDim.z + threadIdx.z;
if(xIndex > aOutputRowSize - 1 || yIndex > aOutputColumnSize - 1 || zIndex > aOutputSliceSize - 1)
{
return;
}
size_t index = zIndex * aOutputRowSize * aOutputColumnSize + yIndex * aOutputRowSize + xIndex;
float x = aNewX[index];
float y = aNewY[index];
float z = aNewZ[index];
if(x > aEndX || x < aStartX || y > aEndY || y < aStartY || z > aEndZ || z < aStartZ)
{
aOutputData[index] = aOutValue;
}
else
{
aOutputData[index] = tex3D<float>(aTexObj, (x - aStartX) / aDx + 0.5, (y - aStartY) / aDy + 0.5, (z - aStartZ) / aDz + 0.5);
}
}
CudaMatrix Aurora::interp3(float aStartX, float aDx, float aEndX, float aStartY, float aDy, float aEndY,
float aStartZ, float aDz, float aEndZ, const CudaMatrix& aValue,
const CudaMatrix& aNewX, const CudaMatrix& aNewY, const CudaMatrix& aNewZ, float aOutValue)
{
cudaTextureObject_t texObj;
size_t dimX = aValue.getDimSize(1);
size_t dimY = aValue.getDimSize(0);
size_t dimZ = aValue.getDimSize(2);
cudaExtent extent = make_cudaExtent(dimX, dimY, dimZ);
cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<float>();
cudaArray* cuArray;
cudaMalloc3DArray(&cuArray, &channelDesc, extent);
cudaMemcpy3DParms copyParams = {0};
copyParams.srcPtr = make_cudaPitchedPtr(aValue.getData(), dimX * sizeof(float), dimX, dimY);
copyParams.dstArray = cuArray;
copyParams.extent = extent;
copyParams.kind = cudaMemcpyDeviceToDevice;
cudaMemcpy3D(&copyParams);
cudaResourceDesc resDesc;
memset(&resDesc, 0, sizeof(resDesc));
resDesc.resType = cudaResourceTypeArray;
resDesc.res.array.array = cuArray;
cudaTextureDesc texDesc;
memset(&texDesc, 0, sizeof(texDesc));
texDesc.filterMode = cudaFilterModeLinear;
texDesc.addressMode[0] = cudaAddressModeClamp;
texDesc.addressMode[1] = cudaAddressModeClamp;
texDesc.addressMode[2] = cudaAddressModeClamp;
texDesc.readMode = cudaReadModeElementType;
cudaCreateTextureObject(&texObj, &resDesc, &texDesc, NULL);
int row = aNewX.getDimSize(0);
int column = aNewX.getDimSize(1);
int slice = aNewX.getDimSize(2);
dim3 blockDim(4,4,4);
dim3 gridDim(row / 4 + 1, column / 4 + 1, slice / 4 + 1);
float *data = nullptr;
cudaMalloc((void **)&data, sizeof(float) * row * column * slice);
CudaMatrix result = Aurora::CudaMatrix::fromRawData(data, row, column, slice);
interp3Kernel<<<gridDim, blockDim>>>(texObj, data, aStartX, aDx, aEndX, aStartY, aDy, aEndY, aStartZ, aDz, aEndZ,
aNewX.getData(), aNewY.getData(), aNewZ.getData(), row, column, slice, aOutValue);
cudaDeviceSynchronize();
return result;
}

13
src/Function3D.cuh Normal file
View File

@@ -0,0 +1,13 @@
#ifndef __FUNCTION3D_CUDA__
#define __FUNCTION3D_CUDA__
#include "CudaMatrix.h"
#include "AuroraDefs.h"
namespace Aurora
{
CudaMatrix interp3(float aStartX, float aDx, float aEndX, float aStartY, float aDy, float aEndY,
float aStartZ, float aDz, float aEndZ, const CudaMatrix& aValue,
const CudaMatrix& aNewX, const CudaMatrix& aNewY, const CudaMatrix& aNewZ, float aOutValue);
}
#endif // __FUNCTION3D_CUDA_H__