diff --git a/src/transmissionReconstruction/reconstruction/buildMatrix/Bresenham.cpp b/src/transmissionReconstruction/reconstruction/buildMatrix/Bresenham.cpp index 1308ec4..420cf3b 100644 --- a/src/transmissionReconstruction/reconstruction/buildMatrix/Bresenham.cpp +++ b/src/transmissionReconstruction/reconstruction/buildMatrix/Bresenham.cpp @@ -115,7 +115,6 @@ float * b3dMexDouble2(float *startPoint,float *endPoint, size_t& outputSize) { std::vector points; float i, l, m, n, x_inc, y_inc, z_inc,err_0, err_1, err_2, dx2, dy2, dz2,d, x,y,z; - x = startPoint[0]; y = startPoint[1]; z = startPoint[2]; @@ -134,24 +133,28 @@ float * b3dMexDouble2(float *startPoint,float *endPoint, size_t& outputSize) { dy2 = m*2;// << 1; dz2 = n*2;// << 1; float maxV; + int f = -1; if ((l >= m) && (l >= n)){ maxV = l; d = dx2; dx2 = 0; + f =0; } else if((m >= l) && (m >= n)){ maxV = m; d = dy2; dy2 = 0; + f = 1; } else{ maxV = n; d = dz2; dz2 = 0; + f =2; } - err_0 = dx2==0?0:(dx2 - maxV); - err_1 = dy2==0?0:(dy2 - maxV); - err_2 = dz2==0?0:(dz2 - maxV); + err_0 = f==0?0:(dx2 - maxV); + err_1 = f==1?0:(dy2 - maxV); + err_2 = f==2?0:(dz2 - maxV); for (i = 0; i < maxV; i++) { @@ -173,9 +176,9 @@ float * b3dMexDouble2(float *startPoint,float *endPoint, size_t& outputSize) { err_0 += dx2; err_1 += dy2; err_2 += dz2; - if (err_0 == 0)x += x_inc; - if (err_1 == 0)y += y_inc; - if (err_2 == 0)z += z_inc; + if (f == 0)x += x_inc; + if (f == 1)y += y_inc; + if (f == 2)z += z_inc; } float* output = new float[points.size()]; @@ -187,11 +190,9 @@ float * b3dMexDouble2(float *startPoint,float *endPoint, size_t& outputSize) { Aurora::Matrix b3dMexDouble(const Aurora::Matrix& StartPt, const Aurora::Matrix& endPts) { size_t size = 0; - size_t size1 = 0; - auto temp = Recon::b3dMexDouble(StartPt.getData(), endPts.getData(), size); - return Aurora::Matrix::fromRawData(temp, 3, size / 3); + return Aurora::Matrix::fromRawData(temp, 3, size / 3); } float * b3dMexDouble(float *startPoint,float *endPoint, size_t& outputSize) { diff --git a/src/transmissionReconstruction/reconstruction/buildMatrix/buildMatrix.cu b/src/transmissionReconstruction/reconstruction/buildMatrix/buildMatrix.cu index b2ac2d7..3c944de 100644 --- a/src/transmissionReconstruction/reconstruction/buildMatrix/buildMatrix.cu +++ b/src/transmissionReconstruction/reconstruction/buildMatrix/buildMatrix.cu @@ -1,9 +1,17 @@ -#include "CudaMatrix.h" +#include "Function2D.cuh" + + +#include #include #include #include - #include + +#include "CudaMatrix.h" +#include "buildMatrix.cuh" +#include "Sparse.h" + + namespace { const int THREADS_PER_BLOCK = 256; } @@ -12,59 +20,69 @@ namespace { __device__ float getPixelLengthKernel(float* aStartPt, float*aEndPt, float* aRes, int aPathLenDisc) { - float3 temp; - temp.x = aEndPt[0]- aStartPt[0]; - temp.y = aEndPt[1]- aStartPt[1]; - temp.z = aEndPt[2]- aStartPt[2]; - temp.x *= aRes[0]; - temp.y *= aRes[1]; - temp.z *= aRes[2]; - temp.x *= temp.x; - temp.y *= temp.y; - temp.z *= temp.x; - return sqrtf(temp.x+temp.y+temp.z)/aPathLenDisc; + float temp1; + float temp2; + float temp3; + + temp1 = aEndPt[0]- aStartPt[0]; + temp2 = aEndPt[1]- aStartPt[1]; + temp3 = aEndPt[2]- aStartPt[2]; + temp1 *= aRes[0]; + temp2 *= aRes[1]; + temp3 *= aRes[2]; + temp1 = temp1*temp1; + temp2 = temp2*temp2; + temp3 = temp3*temp3; + return (sqrtf(temp3+temp2+temp1))/(float)aPathLenDisc; } __global__ struct b3dStruct{ - float max, x_inc, y_inc, z_inc,err_0, err_1, err_2, dx2, dy2, dz2, d, x, y, z, weight; + int max, f, pathlen; + float x_inc, y_inc, z_inc,err_0, err_1, err_2, dx2, dy2, dz2, d, x, y, z, weight; }; -__device__ float * getRayBLen(float *startPoint, float *endPoint, struct b3dStruct* aOut) { - - aOut->x = startPoint[0]; - aOut->y = startPoint[1]; - aOut->z = startPoint[2]; +__device__ void getRayBLen(float *startPoint, float *endPoint, b3dStruct& aOut) { + aOut.x = startPoint[0]; + aOut.y = startPoint[1]; + aOut.z = startPoint[2]; - float dx = endPoint[0] - aOut->x; - float dy = endPoint[1] - aOut->y; - float dz = endPoint[2] - aOut->z; + float dx = endPoint[0] - aOut.x; + float dy = endPoint[1] - aOut.y; + float dz = endPoint[2] - aOut.z; - aOut->x_inc = (dx < 0) ? -1 : 1; + aOut.x_inc = (dx < 0) ? -1 : 1; float l = abs(dx); - aOut->y_inc = (dy < 0) ? -1 : 1; + aOut.y_inc = (dy < 0) ? -1 : 1; float m = abs(dy); - aOut->z_inc = (dz < 0) ? -1 : 1; + aOut.z_inc = (dz < 0) ? -1 : 1; float n = abs(dz); - aOut->dx2 = l*2;// << 1; - aOut->dy2 = m*2;// << 1; - aOut->dz2 = n*2;// << 1; + aOut.dx2 = l*2;// << 1; + aOut.dy2 = m*2;// << 1; + aOut.dz2 = n*2;// << 1; + float vmax ; if ((l >= m) && (l >= n)){ - aOut->max = l; - aOut->d = aOut->dx2; - aOut->dx2 = 0; + aOut.max =ceilf(l); + vmax = l; + aOut.d = aOut.dx2; + aOut.dx2 = 0; + aOut.f = 0; } else if((m >= l) && (m >= n)){ - aOut->max = m; - aOut->d = aOut->dy2; - aOut->dy2 = 0; + aOut.max = ceilf(m); + vmax = m; + aOut.d = aOut.dy2; + aOut.dy2 = 0; + aOut.f = 1; } else{ - aOut->max = n; - aOut->d = aOut->dz2; - aOut->dz2 = 0; + aOut.max = ceilf(n); + vmax = n; + aOut.d = aOut.dz2; + aOut.dz2 = 0; + aOut.f = 2; } - aOut->err_0 = aOut->dx2==0?0:(aOut->dx2 - aOut->max); - aOut->err_1 = aOut->dy2==0?0:(aOut->dy2 - aOut->max); - aOut->err_2 = aOut->dz2==0?0:(aOut->dz2 - aOut->max); + aOut.err_0 = aOut.f==0?0:(aOut.dx2 - vmax); + aOut.err_1 = aOut.f==1?0:(aOut.dy2 - vmax); + aOut.err_2 = aOut.f==2?0:(aOut.dz2 - vmax); } /** @@ -77,51 +95,60 @@ __device__ float * getRayBLen(float *startPoint, float *endPoint, struct b3dStru * @param aOuts * @return _ */ -__global__ void calcLenWeightKernel(float* aStratPt, float* aEndPt, int aSize,float* aRes, b3dStruct* aOuts ){ +__global__ void calcLenWeightKernel(float* aStratPt, float* aEndPt, float* aRes,int aSize, b3dStruct* aOuts ){ unsigned int idx = blockIdx.x*blockDim.x + threadIdx.x; if (idx > aSize)return; - getRayBLen(aStratPt + idx*3, aEndPt + idx*3, aOuts+idx); - aOuts[idx].weight = getPixelLengthKernel(aStratPt + idx*3, aEndPt + idx*3, aRes + idx*3,aOuts[idx].max); + getRayBLen(aStratPt + idx*3, aEndPt + idx*3, aOuts[idx]); + + aOuts[idx].weight = getPixelLengthKernel(aStratPt + idx*3, aEndPt + idx*3, aRes,aOuts[idx].max); }; -__global__ void calcPathOffsetKernel(b3dStruct* aStructs ,float* aPath, int aSize){ +__global__ void calcPathOffsetKernel(b3dStruct* aStructs ,unsigned int* aPath, int aSize){ aPath[0] = aStructs[0].max; for (int i=1; i aSize)return; - b3dStruct* in = aInstruct+idx; -float max, x_inc, y_inc, z_inc,err_0, err_1, err_2, dx2, dy2, dz2,d, x,y,z; + b3dStruct* in = aInstruct+idx; + int f; + float max, x_inc, y_inc, z_inc,err_0, err_1, err_2, dx2, dy2, dz2, d, x,y,z; x = in->x; y = in->y; z = in->z; x_inc = in->x_inc; y_inc = in->y_inc; z_inc = in->z_inc; + + dx2 = in->dx2; + dy2 = in->dy2; + dz2 = in->dz2; + d = in->d; + err_0 = in->err_0; err_1 = in->err_1; err_2 = in->err_2; max = in->max; + f = in->f; float3 dims3{dims[0],dims[1],dims[2]}; - float* output_ptr = aOutput + aOffset[idx]; + float* output_ptr = aJOut + (idx ==0?0:aOffset[idx-1]); for (float i = 0; i < max; i++) { - convertToLinearIndices(dims3, {roundf(x),roundf(y),roundf(z)}, output_ptr); + output_ptr[0]=convertToLinearIndices(dims3, {roundf(x),roundf(y),roundf(z)}); output_ptr ++; if (err_0 > 0) { x += x_inc; @@ -138,32 +165,54 @@ float max, x_inc, y_inc, z_inc,err_0, err_1, err_2, dx2, dy2, dz2,d, x,y,z; err_0 += dx2; err_1 += dy2; err_2 += dz2; - if (err_0 == 0)x += x_inc; - if (err_1 == 0)y += y_inc; - if (err_2 == 0)z += z_inc; + if (f == 0) x += x_inc; + if (f == 1) y += y_inc; + if (f == 2) z += z_inc; } } -void buildMatrix(const Aurora::CudaMatrix &senderList, +__global__ void fillIAndSKernel(struct b3dStruct* aInstructs,unsigned int * aOffset,float* aIOut,float* aSOut){ + b3dStruct& s = aInstructs[blockIdx.x]; + __shared__ unsigned int beginIdx; + __shared__ unsigned int length; + __shared__ float weight; + + if (threadIdx.x == 0){ + beginIdx = (blockIdx.x == 0) ? 0 : aOffset[blockIdx.x - 1]; + length = s.max; + weight = s.weight; + } + __syncthreads(); + + for (int i = 0; threadIdx.x + i < length; i+=blockDim.x) { + aIOut[beginIdx + threadIdx.x + i] = blockIdx.x; + aSOut[beginIdx + threadIdx.x + i] = weight; + } +} + +Recon::BuildMatrixResult Recon::buildMatrix(const Aurora::CudaMatrix &senderList, const Aurora::CudaMatrix &receiverList, Aurora::CudaMatrix& res, const Aurora::CudaMatrix &dims, bool BENT, const Aurora::CudaMatrix &potentialMap){ + Recon::BuildMatrixResult result; unsigned int numDims = senderList.getDimSize(0); unsigned int nTotalRays = senderList.getDimSize(1); - unsigned int cnt = 0; - unsigned int coeffIndex = 0; - unsigned int rayCount = 0; b3dStruct* structList= nullptr; cudaMalloc((void**)&structList, sizeof(b3dStruct)*nTotalRays); int blocksPerGrid = (nTotalRays + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK; calcLenWeightKernel<<>>( - senderList.getData(), receiverList.getData(), res.getData(),structList); + senderList.getData(), receiverList.getData(), res.getData(),nTotalRays,structList); + cudaDeviceSynchronize(); + unsigned int* offset= nullptr; cudaMalloc((void**)&offset, sizeof(unsigned int)*nTotalRays); calcPathOffsetKernel<<<1,1>>>(structList,offset,nTotalRays); + cudaDeviceSynchronize(); + unsigned int size; - cudaMemcpy(&size, offset+(nTotalRays-1), 1, cudaMemcpyDeviceToHost); + cudaMemcpy(&size, offset+(nTotalRays-1), sizeof(unsigned int), cudaMemcpyDeviceToHost); + float* iData = nullptr; float* jData = nullptr; float* sData = nullptr; @@ -172,9 +221,17 @@ void buildMatrix(const Aurora::CudaMatrix &senderList, cudaMalloc((void**)&jData, sizeof(float)*size); cudaMalloc((void**)&sData, sizeof(float)*size); - calcRayBLPath(structList, dims.getData(), offset, jData); + calcRayBLPathKernel<<>>(structList, dims.getData(), offset, jData, nTotalRays); + fillIAndSKernel<<>>(structList, offset, iData, sData); + cudaDeviceSynchronize(); cudaFree(structList); + auto mI = Aurora::CudaMatrix::fromRawData(iData, 1, size ,1); + auto mJ = Aurora::CudaMatrix::fromRawData(jData, 1, size,1); + auto mS = Aurora::CudaMatrix::fromRawData(sData, 1, size,1); + result.M = Aurora::Sparse(mI.toHostMatrix(), mJ.toHostMatrix(),mS.toHostMatrix(),nTotalRays, Aurora::prod(dims).getValue(0)); + return result; + } diff --git a/src/transmissionReconstruction/reconstruction/buildMatrix/buildMatrix.cuh b/src/transmissionReconstruction/reconstruction/buildMatrix/buildMatrix.cuh new file mode 100644 index 0000000..61a2c06 --- /dev/null +++ b/src/transmissionReconstruction/reconstruction/buildMatrix/buildMatrix.cuh @@ -0,0 +1,12 @@ +#ifndef __BUILDMATRIX_CUH__ +#define __BUILDMATRIX_CUH__ +#include "buildMatrix.h" +namespace Recon { + +BuildMatrixResult buildMatrix(const Aurora::CudaMatrix &senderList, + const Aurora::CudaMatrix &receiverList, + Aurora::CudaMatrix& res, + const Aurora::CudaMatrix &dims, bool BENT, + const Aurora::CudaMatrix &potentialMap); +} +#endif // __BUILDMATRIX_H__ \ No newline at end of file