diff --git a/src/transmissionReconstruction/reconstruction/buildMatrix/buildMatrix.cu b/src/transmissionReconstruction/reconstruction/buildMatrix/buildMatrix.cu new file mode 100644 index 0000000..4807c0f --- /dev/null +++ b/src/transmissionReconstruction/reconstruction/buildMatrix/buildMatrix.cu @@ -0,0 +1,246 @@ +#include "Function2D.cuh" + + +#include +#include +#include +#include +#include + +#include "CudaMatrix.h" +#include "buildMatrix.cuh" +#include "Sparse.h" + + +namespace { + const int THREADS_PER_BLOCK = 256; +} + + + +__device__ float getPixelLengthKernel(float* aStartPt, float*aEndPt, float* aRes, int aPathLenDisc) +{ + float temp1; + float temp2; + float temp3; + + temp1 = aEndPt[0]- aStartPt[0]; + temp2 = aEndPt[1]- aStartPt[1]; + temp3 = aEndPt[2]- aStartPt[2]; + temp1 *= aRes[0]; + temp2 *= aRes[1]; + temp3 *= aRes[2]; + temp1 = temp1*temp1; + temp2 = temp2*temp2; + temp3 = temp3*temp3; + return (sqrtf(temp3+temp2+temp1))/(float)aPathLenDisc; +} + +__global__ struct b3dStruct{ + int max, f, pathlen; + float x_inc, y_inc, z_inc,err_0, err_1, err_2, dx2, dy2, dz2, d, x, y, z, weight; +}; +__device__ void getRayBLen(float *startPoint, float *endPoint, b3dStruct& aOut) { + aOut.x = startPoint[0]; + aOut.y = startPoint[1]; + aOut.z = startPoint[2]; + + float dx = endPoint[0] - aOut.x; + float dy = endPoint[1] - aOut.y; + float dz = endPoint[2] - aOut.z; + + aOut.x_inc = (dx < 0) ? -1 : 1; + float l = abs(dx); + aOut.y_inc = (dy < 0) ? -1 : 1; + float m = abs(dy); + aOut.z_inc = (dz < 0) ? -1 : 1; + float n = abs(dz); + aOut.dx2 = l*2;// << 1; + aOut.dy2 = m*2;// << 1; + aOut.dz2 = n*2;// << 1; + float vmax ; + if ((l >= m) && (l >= n)){ + aOut.max =ceilf(l); + vmax = l; + aOut.d = aOut.dx2; + aOut.dx2 = 0; + aOut.f = 0; + } + else if((m >= l) && (m >= n)){ + aOut.max = ceilf(m); + vmax = m; + aOut.d = aOut.dy2; + aOut.dy2 = 0; + aOut.f = 1; + } + else{ + aOut.max = ceilf(n); + vmax = n; + aOut.d = aOut.dz2; + aOut.dz2 = 0; + aOut.f = 2; + } + aOut.err_0 = aOut.f==0?0:(aOut.dx2 - vmax); + aOut.err_1 = aOut.f==1?0:(aOut.dy2 - vmax); + aOut.err_2 = aOut.f==2?0:(aOut.dz2 - vmax); +} + +/** + * @brief 计算traceStraightRayBresenham的pathlen和weight的核函数 + * + * @param aStratPt + * @param aEndPt + * @param aSize + * @param aRes + * @param aOuts + * @return _ + */ +__global__ void calcLenWeightKernel(float* aStratPt, float* aEndPt, float* aRes,int aSize, b3dStruct* aOuts ){ + unsigned int idx = blockIdx.x*blockDim.x + threadIdx.x; + if (idx > aSize)return; + getRayBLen(aStratPt + idx*3, aEndPt + idx*3, aOuts[idx]); + + aOuts[idx].weight = getPixelLengthKernel(aStratPt + idx*3, aEndPt + idx*3, aRes,aOuts[idx].max); + if (idx == 196){ + int v=0; + for (int i =0;i<196; i++) { + v+=aOuts[i].max; + } + } +}; + +__global__ void calcPathOffsetKernel(b3dStruct* aStructs ,unsigned int* aPath, int aSize){ + aPath[0] = aStructs[0].max; + for (int i=1; i aSize)return; + b3dStruct* in = aInstruct+idx; + int f; + float max, x_inc, y_inc, z_inc,err_0, err_1, err_2, dx2, dy2, dz2, d, x,y,z; + x = in->x; + y = in->y; + z = in->z; + x_inc = in->x_inc; + y_inc = in->y_inc; + z_inc = in->z_inc; + + dx2 = in->dx2; + dy2 = in->dy2; + dz2 = in->dz2; + d = in->d; + + err_0 = in->err_0; + err_1 = in->err_1; + err_2 = in->err_2; + max = in->max; + f = in->f; + + float3 dims3{dims[0],dims[1],dims[2]}; + float* output_ptr = aJOut + (idx ==0?0:aOffset[idx-1]); + for (float i = 0; i < max; i++) { + output_ptr[0]=convertToLinearIndices(dims3, {roundf(x),roundf(y),roundf(z)}); + // output_ptr[0]=i; + + output_ptr ++; + if (err_0 > 0) { + x += x_inc; + err_0 -= d; + } + if (err_1 > 0) { + y += y_inc; + err_1 -= d; + } + if (err_2 > 0) { + z += z_inc; + err_2 -= d; + } + err_0 += dx2; + err_1 += dy2; + err_2 += dz2; + if (f == 0) x += x_inc; + if (f == 1) y += y_inc; + if (f == 2) z += z_inc; + } +} + +__global__ void fillIAndSKernel(struct b3dStruct* aInstructs,unsigned int * aOffset,float* aIOut,float* aSOut){ + b3dStruct& s = aInstructs[blockIdx.x]; + __shared__ unsigned int beginIdx; + __shared__ unsigned int length; + __shared__ float weight; + + if (threadIdx.x == 0){ + beginIdx = (blockIdx.x == 0) ? 0 : aOffset[blockIdx.x - 1]; + length = s.max; + weight = s.weight; + } + __syncthreads(); + + for (int i = 0; threadIdx.x + i < length; i+=blockDim.x) { + aIOut[beginIdx + threadIdx.x + i] = blockIdx.x; + aSOut[beginIdx + threadIdx.x + i] = weight; + } +} + +Recon::BuildMatrixResult Recon::buildMatrix(const Aurora::CudaMatrix &senderList, + const Aurora::CudaMatrix &receiverList, + Aurora::CudaMatrix& res, + const Aurora::CudaMatrix &dims, bool BENT, + const Aurora::CudaMatrix &potentialMap){ + Recon::BuildMatrixResult result; + unsigned int numDims = senderList.getDimSize(0); + unsigned int nTotalRays = senderList.getDimSize(1); + b3dStruct* structList= nullptr; + cudaMalloc((void**)&structList, sizeof(b3dStruct)*nTotalRays); + int blocksPerGrid = (nTotalRays + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK; + calcLenWeightKernel<<>>( + senderList.getData(), receiverList.getData(), res.getData(),nTotalRays,structList); + cudaDeviceSynchronize(); + + unsigned int* offset= nullptr; + cudaMalloc((void**)&offset, sizeof(unsigned int)*nTotalRays); + calcPathOffsetKernel<<<1,1>>>(structList,offset,nTotalRays); + cudaDeviceSynchronize(); + + unsigned int size; + cudaMemcpy(&size, offset+(nTotalRays-1), sizeof(unsigned int), cudaMemcpyDeviceToHost); + + float* iData = nullptr; + float* jData = nullptr; + float* sData = nullptr; + + cudaMalloc((void**)&iData, sizeof(float)*size); + cudaMalloc((void**)&jData, sizeof(float)*size); + cudaMalloc((void**)&sData, sizeof(float)*size); + + calcRayBLPathKernel<<>>(structList, dims.getData(), offset, jData, nTotalRays); + fillIAndSKernel<<>>(structList, offset, iData, sData); + cudaDeviceSynchronize(); + cudaFree(structList); + + auto mI = Aurora::CudaMatrix::fromRawData(iData, 1, size ,1); + auto mJ = Aurora::CudaMatrix::fromRawData(jData, 1, size,1); + auto mS = Aurora::CudaMatrix::fromRawData(sData, 1, size,1); + result.M = Aurora::Sparse(mI.toHostMatrix(), mJ.toHostMatrix(),mS.toHostMatrix(),nTotalRays, Aurora::prod(dims).getValue(0)); + return result; + +} + + + diff --git a/src/transmissionReconstruction/reconstruction/buildMatrix/buildMatrix.cuh b/src/transmissionReconstruction/reconstruction/buildMatrix/buildMatrix.cuh new file mode 100644 index 0000000..61a2c06 --- /dev/null +++ b/src/transmissionReconstruction/reconstruction/buildMatrix/buildMatrix.cuh @@ -0,0 +1,12 @@ +#ifndef __BUILDMATRIX_CUH__ +#define __BUILDMATRIX_CUH__ +#include "buildMatrix.h" +namespace Recon { + +BuildMatrixResult buildMatrix(const Aurora::CudaMatrix &senderList, + const Aurora::CudaMatrix &receiverList, + Aurora::CudaMatrix& res, + const Aurora::CudaMatrix &dims, bool BENT, + const Aurora::CudaMatrix &potentialMap); +} +#endif // __BUILDMATRIX_H__ \ No newline at end of file diff --git a/src/transmissionReconstruction/reconstruction/reconstruction.cpp b/src/transmissionReconstruction/reconstruction/reconstruction.cpp index 1fee8d8..908bb13 100644 --- a/src/transmissionReconstruction/reconstruction/reconstruction.cpp +++ b/src/transmissionReconstruction/reconstruction/reconstruction.cpp @@ -9,6 +9,7 @@ #include "CudaEnvInit.h" #include "log/notify.h" #include "transmissionReconstruction/reconstruction/buildMatrix/buildMatrix.h" +#include "transmissionReconstruction/reconstruction/buildMatrix/buildMatrix.cuh" #include "src/transmissionReconstruction/reconstruction/solvingEquationSystem/solve.h" #include @@ -194,8 +195,9 @@ namespace Recon { BuildMatrixResult buildMatrixR; for(int iter=1; iter<=numIter; ++iter) { + auto resDevice = res.toDeviceMatrix(); //1200ms - buildMatrixR = buildMatrix(senderList, receiverList, res, dims, bentRecon && (iter!=1), potentialMap); + buildMatrixR = buildMatrix(senderList.toDeviceMatrix(), receiverList.toDeviceMatrix(), resDevice, dims.toDeviceMatrix(), bentRecon && (iter!=1), potentialMap.toDeviceMatrix()); if(!data.isNull() && bentRecon && iter != numIter) { //与默认配置bentRecon不符,暂不实现todo @@ -229,51 +231,21 @@ namespace Recon { allHitMaps.push_back(buildMatrixR.hitmap); } - std::promise sosPromise; - std::future sosFuture = sosPromise.get_future(); - - std::promise attPromise; - std::future attFuture = attPromise.get_future(); - solveParameterIteratorFunctionType solveParameterIteratorFunctionPointer = &solveParameterIterator; - slownessToSOSFunctionType slownessToSOSFunctionPointer = &slownessToSOS; - std::thread sosThread; - std::thread attThread; - if(!data.isNull()) { //1500ms - sosThread = std::thread([solveParameterIteratorFunctionPointer, slownessToSOSFunctionPointer, &buildMatrixR, &b, &dims, &SOS_IN_WATER, &sosPromise]() - { - //auto bCopy = b.deepCopy(); - //auto dimCopy = dims.deepCopy(); - Matrix result = solveParameterIteratorFunctionPointer(buildMatrixR.M, b, dims, false, transParams::nonNeg, 0)[0][0]; - result = slownessToSOSFunctionPointer(result, SOS_IN_WATER); - sosPromise.set_value(result); - }); - //Matrix sosValue = solveParameterIterator(buildMatrixR.M, b, dims, false, transParams::nonNeg)[0][0]; + Matrix sosValue = solveParameterIterator(buildMatrixR.M, b, dims, false, transParams::nonNeg)[0][0]; //1ms - //result.outSOS = slownessToSOS(sosValue, SOS_IN_WATER) ; + result.outSOS = slownessToSOS(sosValue, SOS_IN_WATER) ; } if(!dataAtt.isNull()) { - attThread = std::thread([solveParameterIteratorFunctionPointer, slownessToSOSFunctionPointer, &buildMatrixR, &bAtt, &dims, &attPromise]() - { - //auto bAttCopy = bAtt.deepCopy(); - //auto dimCopy = dims.deepCopy(); - Matrix result = solveParameterIteratorFunctionPointer(buildMatrixR.M, bAtt, dims, false, transParams::nonNeg, 1)[0][0]; - result = result / 100; - attPromise.set_value(result); - }); //1500ms - //Matrix attValue = solveParameterIterator(buildMatrixR.M, bAtt, dims, false, transParams::nonNeg)[0][0]; + Matrix attValue = solveParameterIterator(buildMatrixR.M, bAtt, dims, false, transParams::nonNeg)[0][0]; //1ms - //result.outATT = attValue/100 ; + result.outATT = attValue/100 ; } - sosThread.join(); - attThread.join(); - result.outSOS = sosFuture.get(); - result.outATT = attFuture.get(); Recon::notifyProgress(10 + 10 * (iter/numIter)); } diff --git a/src/transmissionReconstruction/reconstruction/solvingEquationSystem/TVAL/TVAL.cpp b/src/transmissionReconstruction/reconstruction/solvingEquationSystem/TVAL/TVAL.cpp index c2c03bf..9fd82a2 100644 --- a/src/transmissionReconstruction/reconstruction/solvingEquationSystem/TVAL/TVAL.cpp +++ b/src/transmissionReconstruction/reconstruction/solvingEquationSystem/TVAL/TVAL.cpp @@ -16,28 +16,33 @@ #include #include #include +#include + +#include namespace Recon { + bool isEigsFinished = false; + bool isEigsStatus = false; - double eigs(Aurora::Sparse& aM) + float eigs(Aurora::Sparse& aM) { - double result = NAN; size_t size = aM.getM(); if(size < aM.getN()) { size = aM.getN(); } - Eigen::SparseMatrix M(size, size); - std::vector> triplets; + Eigen::SparseMatrix M(size,size); Aurora::Matrix rows = aM.getRowVector(); Aurora::Matrix columns = aM.getColVector(); Aurora::Matrix values = aM.getValVector(); + std::vector> triplets(rows.getDataSize()); + #pragma omp parallel for for (int i = 0; i < rows.getDataSize(); ++i) { - triplets.push_back(Eigen::Triplet(rows[i], columns[i], values[i])); + triplets[i] = Eigen::Triplet(rows[i], columns[i], values[i]); } - M.setFromTriplets(triplets.begin(), triplets.end()); + float result; Spectra::SparseGenMatProd op(M); Spectra::GenEigsSolver> eigs(op, 1, 6); eigs.init(); @@ -49,14 +54,24 @@ namespace Recon std::complex complex = evalues[0]; result = complex.real(); } - + isEigsFinished = true; + if (result> 1 + 1e-10) + { + isEigsStatus = true; + } return result; } - void checkAndScale(Aurora::Sparse& M, Aurora::Matrix& b,size_t n){ - bool isreal = M.getValVector().getValueType() == Aurora::Normal; - auto s2 = eigs(M); - if (s2> 1 + 1e-10) + void checkAndScale(Aurora::Sparse& M, Aurora::Matrix& b,size_t n) + { + float s2 = 0.0; + if(!isEigsFinished) + { + s2 = eigs(M); + return;; + } + + if (isEigsStatus) { b = b / sqrt(s2); M.getValVector() = M.getValVector() / sqrt( s2); @@ -65,23 +80,23 @@ namespace Recon Aurora::Matrix callTval3(Aurora::Sparse& M, Aurora::Matrix& b,const Aurora::Matrix& dims,int device, TVALOptions& opt) { - checkAndScale(M,b,(size_t)Aurora::prod(dims).getScalar()); + checkAndScale(M,b,(size_t)Aurora::prod(dims).getScalar()); int * yIdxs = new int[M.getColVector().getDataSize()]; std::copy(M.getColVector().getData(),M.getColVector().getData()+M.getColVector().getDataSize(),yIdxs); int * xIdxs = new int[M.getRowVector().getDataSize()]; std::copy(M.getRowVector().getData(),M.getRowVector().getData()+M.getRowVector().getDataSize(),xIdxs); Aurora::Matrix values = M.getValVector(); - size_t cols = M.getM(), rows = M.getN(); - int nz = std::max(M.getColVector().getDataSize(),std::max(M.getRowVector().getDataSize(),M.getValVector().getDataSize())); + size_t cols = M.getM(), rows = M.getN(); + int nz = std::max(M.getColVector().getDataSize(),std::max(M.getRowVector().getDataSize(),M.getValVector().getDataSize())); sparse_matrix_t A; sparse_matrix_t csrA; - mkl_sparse_s_create_coo(&A, sparse_index_base_t::SPARSE_INDEX_BASE_ZERO, rows, cols, nz, yIdxs, xIdxs,values.getData()); - mkl_sparse_convert_csr(A, sparse_operation_t::SPARSE_OPERATION_NON_TRANSPOSE, &csrA); + mkl_sparse_s_create_coo(&A, sparse_index_base_t::SPARSE_INDEX_BASE_ZERO, rows, cols, nz, yIdxs, xIdxs,values.getData()); + mkl_sparse_convert_csr(A, sparse_operation_t::SPARSE_OPERATION_NON_TRANSPOSE, &csrA); int n_rows,n_cols; int *rows_start,*rows_end,*col_indx; float * csrValues; sparse_index_base_t index; - mkl_sparse_s_export_csr(csrA, &index, &n_rows, &n_cols, &rows_start, &rows_end, &col_indx, &csrValues); + mkl_sparse_s_export_csr(csrA, &index, &n_rows, &n_cols, &rows_start, &rows_end, &col_indx, &csrValues); mkl_sparse_destroy(A); delete [] xIdxs; delete [] yIdxs; @@ -101,4 +116,4 @@ namespace Recon return Aurora::Matrix::fromRawData(result.data, result.dims[0],result.dims[1],result.dims[2]); // return Aurora::Matrix(); } -} \ No newline at end of file +} diff --git a/src/transmissionReconstruction/reconstruction/solvingEquationSystem/TVAL/TVAL.h b/src/transmissionReconstruction/reconstruction/solvingEquationSystem/TVAL/TVAL.h index 35a8630..64b634a 100644 --- a/src/transmissionReconstruction/reconstruction/solvingEquationSystem/TVAL/TVAL.h +++ b/src/transmissionReconstruction/reconstruction/solvingEquationSystem/TVAL/TVAL.h @@ -7,7 +7,7 @@ namespace Recon { void checkAndScale(Aurora::Sparse& M, Aurora::Matrix& b,size_t n); -double eigs(Aurora::Sparse& aM); +float eigs(Aurora::Sparse& aM); Aurora::Matrix callTval3(Aurora::Sparse &M, Aurora::Matrix &b, const Aurora::Matrix &dims, int device,