From e36ca5c82f9ad591c09fb2cbd547aad93c82af4a Mon Sep 17 00:00:00 2001 From: sunwen Date: Fri, 27 Jun 2025 11:37:53 +0800 Subject: [PATCH] feat: Add interp3 in Function3D. --- src/Function3D.cu | 82 ++++++++++++++++++++++++++++++++++++++++++++++ src/Function3D.cuh | 13 ++++++++ 2 files changed, 95 insertions(+) create mode 100644 src/Function3D.cu create mode 100644 src/Function3D.cuh diff --git a/src/Function3D.cu b/src/Function3D.cu new file mode 100644 index 0000000..2a7a841 --- /dev/null +++ b/src/Function3D.cu @@ -0,0 +1,82 @@ +#include "Function3D.cuh" + +using namespace Aurora; + +__global__ void interp3Kernel(cudaTextureObject_t aTexObj, float* aOutputData, float aStartX, float aDx, float aEndX, float aStartY, float aDy + , float aEndY, float aStartZ, float aDz, float aEndZ, float* aNewX, float* aNewY, float* aNewZ + , int aOutputRowSize, int aOutputColumnSize, int aOutputSliceSize, float aOutValue) +{ + int xIndex = blockIdx.x * blockDim.x + threadIdx.x; + int yIndex = blockIdx.y * blockDim.y + threadIdx.y; + int zIndex = blockIdx.z * blockDim.z + threadIdx.z; + + if(xIndex > aOutputRowSize - 1 || yIndex > aOutputColumnSize - 1 || zIndex > aOutputSliceSize - 1) + { + return; + } + size_t index = zIndex * aOutputRowSize * aOutputColumnSize + yIndex * aOutputRowSize + xIndex; + float x = aNewX[index]; + float y = aNewY[index]; + float z = aNewZ[index]; + if(x > aEndX || x < aStartX || y > aEndY || y < aStartY || z > aEndZ || z < aStartZ) + { + aOutputData[index] = aOutValue; + } + else + { + aOutputData[index] = tex3D(aTexObj, (x - aStartX) / aDx + 0.5, (y - aStartY) / aDy + 0.5, (z - aStartZ) / aDz + 0.5); + } +} + +CudaMatrix Aurora::interp3(float aStartX, float aDx, float aEndX, float aStartY, float aDy, float aEndY, + float aStartZ, float aDz, float aEndZ, const CudaMatrix& aValue, + const CudaMatrix& aNewX, const CudaMatrix& aNewY, const CudaMatrix& aNewZ, float aOutValue) +{ + cudaTextureObject_t texObj; + size_t dimX = aValue.getDimSize(1); + size_t dimY = aValue.getDimSize(0); + size_t dimZ = aValue.getDimSize(2); + cudaExtent extent = make_cudaExtent(dimX, dimY, dimZ); + + cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(); + + cudaArray* cuArray; + cudaMalloc3DArray(&cuArray, &channelDesc, extent); + + + cudaMemcpy3DParms copyParams = {0}; + copyParams.srcPtr = make_cudaPitchedPtr(aValue.getData(), dimX * sizeof(float), dimX, dimY); + copyParams.dstArray = cuArray; + copyParams.extent = extent; + copyParams.kind = cudaMemcpyDeviceToDevice; + cudaMemcpy3D(©Params); + + cudaResourceDesc resDesc; + memset(&resDesc, 0, sizeof(resDesc)); + resDesc.resType = cudaResourceTypeArray; + resDesc.res.array.array = cuArray; + + cudaTextureDesc texDesc; + memset(&texDesc, 0, sizeof(texDesc)); + texDesc.filterMode = cudaFilterModeLinear; + texDesc.addressMode[0] = cudaAddressModeClamp; + texDesc.addressMode[1] = cudaAddressModeClamp; + texDesc.addressMode[2] = cudaAddressModeClamp; + texDesc.readMode = cudaReadModeElementType; + cudaCreateTextureObject(&texObj, &resDesc, &texDesc, NULL); + + int row = aNewX.getDimSize(0); + int column = aNewX.getDimSize(1); + int slice = aNewX.getDimSize(2); + dim3 blockDim(4,4,4); + dim3 gridDim(row / 4 + 1, column / 4 + 1, slice / 4 + 1); + float *data = nullptr; + cudaMalloc((void **)&data, sizeof(float) * row * column * slice); + CudaMatrix result = Aurora::CudaMatrix::fromRawData(data, row, column, slice); + interp3Kernel<<>>(texObj, data, aStartX, aDx, aEndX, aStartY, aDy, aEndY, aStartZ, aDz, aEndZ, + aNewX.getData(), aNewY.getData(), aNewZ.getData(), row, column, slice, aOutValue); + + + cudaDeviceSynchronize(); + return result; +} diff --git a/src/Function3D.cuh b/src/Function3D.cuh new file mode 100644 index 0000000..5c3a663 --- /dev/null +++ b/src/Function3D.cuh @@ -0,0 +1,13 @@ +#ifndef __FUNCTION3D_CUDA__ +#define __FUNCTION3D_CUDA__ +#include "CudaMatrix.h" +#include "AuroraDefs.h" + +namespace Aurora +{ + CudaMatrix interp3(float aStartX, float aDx, float aEndX, float aStartY, float aDy, float aEndY, + float aStartZ, float aDz, float aEndZ, const CudaMatrix& aValue, + const CudaMatrix& aNewX, const CudaMatrix& aNewY, const CudaMatrix& aNewZ, float aOutValue); +} + +#endif // __FUNCTION3D_CUDA_H__ \ No newline at end of file