Update SegImg function.

This commit is contained in:
sunwen
2025-10-22 10:31:36 +08:00
parent 1b1450e85c
commit 9af07def99
14 changed files with 908 additions and 0 deletions

View File

@@ -0,0 +1,78 @@
#include "Data3DCluster.h"
#include "SegImgFunction.h"
#include "Function2D.h"
#include "Function3D.h"
#include "maxConnectedComponent3D.cuh"
#include <algorithm>
#include <float.h>
#include <MatlabWriter.h>
using namespace Aurora;
using namespace Recon;
Data3DClusterResult Recon::Data3DCluster(const Aurora::Matrix& aMatrix, float aTopZ)
{
int clusNum = 2;
Aurora::Matrix imgSamp = downSample(aMatrix);
int ite = 1;
int LStartNum = 101;
float nipThre = 0;
Aurora::Matrix clusImg;
Aurora::Matrix nipRe;
while(LStartNum > 100)
{
if(ite < 5)
{
size_t size = imgSamp.getDataSize();
imgSamp.forceReshape(size,1,1);
KmeansResult kResult = kmeans(imgSamp, 2);
clusImg = kResult.Label;
nipThre = max(kResult.CenterPoint, FunctionDirection::All)[0];
ite++;
}
else
{
nipThre+=0.02;
Aurora::Matrix oneMatrix = zeros(clusImg.getDataSize(),1) + 1;
for(int i=0; i<clusImg.getDataSize(); ++i)
{
if(imgSamp[i] > nipThre)
{
clusImg[i] += 1;
}
}
}
nipRe = zeros(aMatrix.getDimSize(0), aMatrix.getDimSize(1),aMatrix.getDimSize(2));
size_t effectiveDataSize = aMatrix.getDataSize() - ((aTopZ + 1) * aMatrix.getDimSize(0) * aMatrix.getDimSize(1));
for(size_t i=0; i<effectiveDataSize; ++i)
{
if(aMatrix[i] > nipThre)
{
nipRe[i] = 1;
}
}
size_t nipRe20Size = nipRe.getDimSize(0) * nipRe.getDimSize(1) * 20;
float* nipRe20Data = new float[nipRe20Size];
std::copy(nipRe.getData(), nipRe.getData() + nipRe20Size, nipRe20Data);
Aurora::Matrix nipRe20 = Aurora::Matrix::fromRawData(nipRe20Data, nipRe.getDimSize(0), nipRe.getDimSize(1), 20);
LStartNum = bwconncomp3D(nipRe20);
nipRe = maxConnectedComponent3D(nipRe);
}
std::vector<float> C3D(clusNum, FLT_MAX);
for(size_t i=0; i<imgSamp.getDataSize(); ++i)
{
if( imgSamp[i] < C3D[clusImg[i] - 1])
{
C3D[clusImg[i] - 1] = imgSamp[i];
}
}
auto it = std::max_element(C3D.begin(), C3D.end());
Data3DClusterResult result;
result.nipRe = nipRe;
result.thre3D = *it;
return result;
}

View File

@@ -0,0 +1,14 @@
#ifndef DATA3DCLUSTER_H
#define DATA3DCLUSTER_H
#include <Matrix.h>
namespace Recon
{
struct Data3DClusterResult
{
Aurora::Matrix nipRe;
float thre3D;
};
Data3DClusterResult Data3DCluster(const Aurora::Matrix& aMatrix, float aTopZ);
}
#endif //DATA3DCLUSTER_H

View File

@@ -0,0 +1,17 @@
#include "DealBW.h"
#include "SegImgFunction.h"
using namespace Recon;
using namespace Aurora;
Aurora::Matrix Recon::DealBW(const Aurora::Matrix& aMatrix)
{
Aurora::Matrix tmp3 = maxConnRegoin(aMatrix);
tmp3 = imdilate(tmp3);
tmp3 = imerode(tmp3);
tmp3 = imfill(tmp3);
tmp3 = maxConnRegoin(tmp3);
tmp3 = bwconvhull(tmp3);
return tmp3;
}

View File

@@ -0,0 +1,8 @@
#ifndef DEALBW_H
#define DEALBW_H
#include <Matrix.h>
namespace Recon
{
Aurora::Matrix DealBW(const Aurora::Matrix& aMatrix);
}
#endif //DEALBW_H

View File

@@ -0,0 +1,38 @@
#include "FindStartZSliceBaseCluster.h"
#include "Function2D.h"
#include "SegImgFunction.h"
#include <math.h>
using namespace Recon;
using namespace Aurora;
unsigned int Recon::FindStartZSliceBaseCluster(const Aurora::Matrix& aMatrix, float aReso, int aTopZ)
{
unsigned int startZ;
float nipThre = M_PI * pow(roundf32(5e-3 / aReso), 2);
int i=1;
while(i < aMatrix.getDimSize(2) - aTopZ)
{
Aurora::Matrix label = aMatrix.block(2, i-1, i-1);
if(sum(label, FunctionDirection::All)[0] > 0)
{
label = maxConnRegoin(label);
if(sum(label, FunctionDirection::All)[0] < nipThre)
{
++i;
}
else
{
startZ = i;
break;
}
}
else
{
++i;
}
}
return startZ;
}

View File

@@ -0,0 +1,8 @@
#ifndef FINDSTARTZSLICEBASECLUSTER_H
#define FINDSTARTZSLICEBASECLUSTER_H
#include <Matrix.h>
namespace Recon
{
unsigned int FindStartZSliceBaseCluster(const Aurora::Matrix& aMatrix, float aReso, int aTopZ);
}
#endif //FINDSTARTZSLICEBASECLUSTER_H

View File

@@ -0,0 +1,45 @@
#include "PreSegImg.h"
#include "SegImgFunction.h"
#include "Function3D.h"
#include "Function2D.h"
#include "DealBW.h"
using namespace Recon;
using namespace Aurora;
PreSegImgResult Recon::PreSegImg(const Aurora::Matrix& aNipRe, unsigned int aTopZ, unsigned int aStartZ)
{
int x = aNipRe.getDimSize(0);
int y = aNipRe.getDimSize(1);
int z = aNipRe.getDimSize(2);
Aurora::Matrix SegRes = zeros(aNipRe.getDimSize(0), aNipRe.getDimSize(1), aNipRe.getDimSize(2));
unsigned int endZ = aStartZ;
for(int i= z - aTopZ; i>=aStartZ; --i)
{
Aurora::Matrix label = aNipRe.block(2, i-1, i-1);
Aurora::Matrix labelH = imfill(label);
Aurora::Matrix hole = labelH - label;
if(sum(hole, FunctionDirection::All)[0] > 2*sum(label, FunctionDirection::All)[0])
{
endZ = i;
break;
}
}
for(int i=aStartZ; i<=endZ; ++i)
{
Aurora::Matrix label = aNipRe.block(2, i-1, i-1);
Aurora::Matrix maskIn = SegRes.block(2, i-2, i-2);
Aurora::Matrix maskOut = imerode(maskIn);
label = label * (1-maskOut);
Aurora::Matrix SegI = (label + maskIn) > 0;
SegI = maxConnRegoin(SegI);
SegI = DealBW(SegI);
SegRes.setBlock(2, i-1, i-1, SegI);
}
PreSegImgResult result;
result.endZ = endZ;
result.SegRes = SegRes;
return result;
}

View File

@@ -0,0 +1,14 @@
#ifndef PRESEGIMG_H
#define PRESEGIMG_H
#include <Matrix.h>
namespace Recon
{
struct PreSegImgResult
{
Aurora::Matrix SegRes;
unsigned int endZ;
};
PreSegImgResult PreSegImg(const Aurora::Matrix& aNipRe, unsigned int aTopZ, unsigned int aStartZ);
}
#endif //PRESEGIMG_H

View File

@@ -0,0 +1,170 @@
#include "SegImg.h"
#include "config/config.h"
#include "Function2D.h"
#include "Function3D.h"
#include "SegImgFunction.h"
#include "DealBW.h"
#include "Data3DCluster.h"
#include "FindStartZSliceBaseCluster.h"
#include "PreSegImg.h"
#include <float.h>
using namespace Aurora;
namespace
{
Aurora::Matrix create1ToXMatrix(int aEndValue)
{
float* data = new float[aEndValue];
for(int i=1; i<=aEndValue; ++i)
{
data[i-1] = i;
}
return Aurora::Matrix::fromRawData(data, 1, aEndValue);
}
}
Aurora::Matrix Recon::SegImgMain(const Aurora::Matrix& aMatrix, float aImageResolution, const Aurora::Matrix& aImageStartPoint, const Aurora::Matrix& aImageEndPoint)
{
int topZ = roundf32((aImageEndPoint.getData()[2]-0) / aImageResolution);
Aurora::Matrix temp = aMatrix.deepCopy();
temp.forceReshape(temp.getDataSize(), 1, 1);
Aurora::Matrix img = (aMatrix - min(temp,FunctionDirection::All)) / (max(temp,FunctionDirection::All) - min(temp,FunctionDirection::All));
Data3DClusterResult data3DClusterResult = Data3DCluster(img, topZ);
unsigned int startZ = FindStartZSliceBaseCluster(data3DClusterResult.nipRe, aImageResolution, topZ);
PreSegImgResult preSegImgResult = PreSegImg(data3DClusterResult.nipRe, topZ, startZ);
Aurora::Matrix segRes = SegImg(preSegImgResult.SegRes, img, preSegImgResult.endZ, data3DClusterResult.thre3D, aImageResolution, aImageStartPoint, aImageEndPoint);
return aMatrix * segRes;
}
Aurora::Matrix Recon::SegImg(const Aurora::Matrix& aSegRes, const Aurora::Matrix& aImg, int aEndZ, float aThre3D,
float aImageResolution, const Aurora::Matrix& aImageStartPoint, const Aurora::Matrix& aImageEndPoint)
{
Aurora::Matrix result = aSegRes.deepCopy();
int clustNumZ = 2;
int x = result.getDimSize(0);
int y = result.getDimSize(1);
int z = result.getDimSize(2);
Aurora::Matrix xIdx = transpose(repmat(create1ToXMatrix(x),y,1));
Aurora::Matrix yIdx = repmat(create1ToXMatrix(y),x,1);
float CenterP1 = roundf32(-aImageStartPoint[0] / aImageResolution);
float CenterP2 = roundf32(-aImageStartPoint[1] / aImageResolution);
Aurora::Matrix xDistance = (xIdx-CenterP1)^2;
Aurora::Matrix yDistance = (yIdx-CenterP2)^2;
Aurora::Matrix centerDis = Aurora::sqrt(xDistance + yDistance);
for(int i=aEndZ+1; i<=z; ++i)
{
int updateFlag = 1;
Aurora::Matrix tmp = result.block(2, i-2, i-2);
float zDis = aImageEndPoint[2] - (z-i)*aImageResolution;
Aurora::Matrix centerDisZ = centerDis*tmp;
float centerDis_Z = Aurora::max(centerDisZ)[0]*aImageResolution;
float ThSlice;
if(centerDis_Z > 0.08 || zDis > -0.03)
{
tmp = aImg.block(2, i-1, i-1);
Aurora::Matrix imgData = downSample(tmp);
Aurora::Matrix imgDataCopy = imgData.deepCopy();
imgDataCopy.forceReshape(imgData.getDataSize(),1,1);
KmeansResult kResult = kmeans(imgDataCopy, clustNumZ);
Aurora::Matrix clusImg = kResult.Label;
Aurora::Matrix clus = zeros(clustNumZ,2);
for(int k=1; k<=clustNumZ;++k)
{
float cIDx = 0;
float minValue = MAXFLOAT;
for(unsigned int j=0; j<imgData.getDataSize(); ++j)
{
if(clusImg[j] == k)
{
cIDx++;
if(minValue > imgData[j])
{
minValue = imgData[j];
}
}
}
clus[k-1] = minValue;
clus[k+1] = cIDx;
}
Aurora::Matrix idx;
clus = sortrows(clus, &idx);
if(clus[0] < clus[1] && clus[2] > clus[3])
{
ThSlice = clus[1];
}
else
{
tmp = aImg.block(2, i-1, i-1);
Aurora::Matrix mask = result.block(2, i-2, i-2);
mask = imerode(mask);
tmp = tmp*(1-mask) + mask * mean(tmp,FunctionDirection::All)[0];
imgData = downSample(tmp);
Aurora::Matrix imgDataCopy = imgData.deepCopy();
imgDataCopy.forceReshape(imgData.getDataSize(),1,1);
kResult = kmeans(imgDataCopy, 2);
clusImg = kResult.Label;
clus = zeros(clustNumZ,2);
for(int k=1; k<=clustNumZ;++k)
{
float cIDx = 0;
float minValue = MAXFLOAT;
for(unsigned int j=0; j<imgData.getDataSize(); ++j)
{
if(clusImg[j] == k)
{
cIDx++;
if(minValue > imgData[j])
{
minValue = imgData[j];
}
}
}
clus[k-1] = minValue;
clus[k+1] = cIDx;
}
Aurora::Matrix idx;
clus = sortrows(clus, &idx);
if(clus[0] < clus[1] && clus[2] > clus[3])
{
ThSlice = clus[1];
}
else
{
updateFlag = 0;
}
}
}
else
{
ThSlice = aThre3D;
}
if(zDis > -0.03)
{
Aurora::Matrix tmp1 = result.block(2, i-2, i-2);
Aurora::Matrix tmp2 = result.block(2, i-1, i-1);
float idx = ((sum(tmp2,FunctionDirection::All) - sum(tmp1,FunctionDirection::All)) / sum(tmp1,FunctionDirection::All)).getData()[0];
if(idx>0.15)
{
updateFlag = 0;
}
}
Aurora::Matrix segI;
if(updateFlag == 1)
{
Aurora::Matrix ref = aImg > ThSlice;
Aurora::Matrix refI = ref.block(2, i-1, i-1);
segI = (refI + result.block(2, i-2, i-2)) > 0;
segI = DealBW(segI);
}
else
{
segI = result.block(2, i-2, i-2);
}
result.setBlock(2, i-1, i-1, segI);
}
return result;
}

View File

@@ -0,0 +1,10 @@
#ifndef SEGIMG_H
#define SEGIMG_H
#include <Matrix.h>
namespace Recon
{
Aurora::Matrix SegImgMain(const Aurora::Matrix& aMatrix, float aImageResolution, const Aurora::Matrix& aImageStartPoint, const Aurora::Matrix& aImageEndPoint);
Aurora::Matrix SegImg(const Aurora::Matrix& aSegRes, const Aurora::Matrix& aImg, int aEndZ, float aThre3D,
float aImageResolution, const Aurora::Matrix& aImageStartPoint, const Aurora::Matrix& aImageEndPoint);
}
#endif //SEGIMG_H

View File

@@ -0,0 +1,218 @@
#include "SegImgFunction.h"
#include <opencv2/opencv.hpp>
#include "Function3D.h"
using namespace Recon;
Aurora::Matrix Recon::maxConnRegoin(const Aurora::Matrix& aMatrix)
{
int column = aMatrix.getDimSize(0);
int row = aMatrix.getDimSize(1);
size_t size = column * row;
uint8_t* data = new uint8_t[size];
std::copy(aMatrix.getData(), aMatrix.getData() + size, data);
cv::Mat src = cv::Mat(row, column, CV_8U, data);
cv::Mat labels, stats, centroids;;
int nLabels = connectedComponentsWithStats(src, labels,stats,centroids, 4, CV_32S);
if(nLabels<=1)
{
return Aurora::Matrix();
}
int maxArea = 0;
int maxAreaIndex = 1;
for (int i = 1; i < nLabels; ++i)
{
int area = stats.at<int>(i, cv::CC_STAT_AREA);
if (area > maxArea) {
maxArea = area;
maxAreaIndex = i;
}
}
Aurora::Matrix result = Aurora::zeros(column, row);
for(int i=0; i<row; ++i)
{
for(int j=0; j<column; ++j)
{
if (labels.at<int>(i, j) == maxAreaIndex)
{
result[i*column + j] = 1;
}
}
}
return result;
}
Aurora::Matrix Recon::imfill(const Aurora::Matrix& aMatrix)
{
int width = aMatrix.getDimSize(0);
int height = aMatrix.getDimSize(1);
size_t dataSize = aMatrix.getDataSize();
uint8_t* data = new uint8_t[aMatrix.getDataSize()];
std::copy(aMatrix.getData(), aMatrix.getData() + dataSize, data);
cv::Mat src = cv::Mat(height, width, CV_8U, data);
cv::Mat inv = 1 - src;
cv::Mat labels;
int nLabels = connectedComponents(inv, labels, 8, CV_32S);
cv::Mat filled = src.clone();
std::vector<bool> isHole(nLabels, true);
int rows = src.rows;
int cols = src.cols;
for (int x = 0; x < cols; x++) {
isHole[ labels.at<int>(0, x) ] = false;
isHole[ labels.at<int>(rows-1, x) ] = false;
}
for (int y = 0; y < rows; y++) {
isHole[ labels.at<int>(y, 0) ] = false;
isHole[ labels.at<int>(y, cols-1) ] = false;
}
for (int y = 0; y < rows; y++) {
for (int x = 0; x < cols; x++) {
int label = labels.at<int>(y, x);
if (isHole[label]) {
filled.at<uchar>(y, x) = 1;
}
}
}
float* resultData = new float[aMatrix.getDataSize()];
std::copy(filled.data, filled.data + dataSize, resultData);
Aurora::Matrix result = Aurora::Matrix::fromRawData(resultData, width, height);
delete[] data;
return result;
}
Aurora::Matrix Recon::bwconvhull(const Aurora::Matrix& aMatrix)
{
int width = aMatrix.getDimSize(0);
int height = aMatrix.getDimSize(1);
size_t dataSize = aMatrix.getDataSize();
uint8_t* data = new uint8_t[aMatrix.getDataSize()];
std::copy(aMatrix.getData(), aMatrix.getData() + dataSize, data);
cv::Mat input = cv::Mat(height, width, CV_8U, data);
std::vector<cv::Point> points;
cv::findNonZero(input, points);
if (points.empty())
{
delete[] data;
return Aurora::zeros(width, height);;
}
std::vector<cv::Point> hull;
cv::convexHull(points, hull);
cv::Mat hullMask = cv::Mat::zeros(input.size(), CV_8UC1);
cv::fillConvexPoly(hullMask, hull, cv::Scalar(1));
float* resultData = new float[aMatrix.getDataSize()];
std::copy(hullMask.data, hullMask.data + dataSize, resultData);
Aurora::Matrix result = Aurora::Matrix::fromRawData(resultData, width, height);
delete[] data;
return result;
}
KmeansResult Recon::kmeans(const Aurora::Matrix& aMatrix, int aClusNum)
{
KmeansResult result;
auto input = aMatrix.deepCopy();
size_t dataSize = input.getDataSize();
cv::Mat data(dataSize, 1, CV_32F, input.getData());
int k = aClusNum;
cv::Mat labels;
cv::Mat centers;
cv::TermCriteria criteria(cv::TermCriteria::EPS + cv::TermCriteria::MAX_ITER, 100, 1e-6);
int attempts = 5;
int flags = cv::KMEANS_PP_CENTERS;
double compactness = cv::kmeans(data, k, labels, criteria, attempts, flags, centers);
// std::cout << "聚类中心: " << std::endl << centers << std::endl;
// std::cout << "紧密度: " << compactness << std::endl;
float* labelDataResult = new float[dataSize];
float* centerPointDataResult = new float[k];
int* labelData = reinterpret_cast<int*>(labels.data);
float* centerPointData = reinterpret_cast<float*>(centers.data);
std::copy(labelData, labelData + dataSize, labelDataResult);
std::copy(centerPointData, centerPointData + k, centerPointDataResult);
Aurora::Matrix label = Aurora::Matrix::fromRawData(labelDataResult, dataSize, 1) + 1;
Aurora::Matrix center = Aurora::Matrix::fromRawData(centerPointDataResult, k, 1);
result.Label = label;
result.CenterPoint = center;
return result;
}
Aurora::Matrix Recon::imerode(const Aurora::Matrix& aMatrix)
{
float* data = aMatrix.getData();
unsigned int width = aMatrix.getDimSize(0);
unsigned int heigh = aMatrix.getDimSize(1);
unsigned int dataSize = width * heigh;
cv::Mat kernel = cv::getStructuringElement(cv::MORPH_RECT, cv::Size(3,3));
cv::Mat mat(heigh, width, CV_32F, data );
cv::Mat out;
cv::erode(mat, out, kernel);
float* outData = reinterpret_cast<float*>(out.data);
float* resultData = new float[dataSize];
std::copy(outData, outData + dataSize, resultData);
return Aurora::Matrix::fromRawData(resultData, width, heigh);
}
Aurora::Matrix Recon::imdilate(const Aurora::Matrix& aMatrix)
{
float* data = aMatrix.getData();
unsigned int width = aMatrix.getDimSize(0);
unsigned int heigh = aMatrix.getDimSize(1);
unsigned int dataSize = width * heigh;
cv::Mat kernel = cv::getStructuringElement(cv::MORPH_RECT, cv::Size(3,3));
cv::Mat mat(heigh, width, CV_32F, data );
cv::Mat out;
cv::dilate(mat, out, kernel);
float* outData = reinterpret_cast<float*>(out.data);
float* resultData = new float[dataSize];
std::copy(outData, outData + dataSize, resultData);
return Aurora::Matrix::fromRawData(resultData, width, heigh);
}
Aurora::Matrix Recon::downSample(const Aurora::Matrix& aMatrix)
{
int row = aMatrix.getDimSize(0);
int column = aMatrix.getDimSize(1);
int slice = aMatrix.getDimSize(2);
int newRow = (row + 3) / 4;
int newColumn = (column +3) / 4;
int newSlice = (slice + 3) / 4;
float* resultData = new float[newColumn * newRow * newSlice];
for (int s = 0; s < newSlice; ++s)
{
int oldSlice = s * 4;
if (oldSlice >= slice) oldSlice = slice - 1;
for (int c = 0; c < newColumn; ++c)
{
int oldColumn = c * 4;
if (oldColumn >= column) oldColumn = column - 1;
for (int r = 0; r < newRow; ++r)
{
int oldRow = r * 4;
if (oldRow >= row) oldRow = row - 1;
resultData[s * newRow * newColumn + c * newRow + r] = aMatrix.getData()[oldSlice * row * column + oldColumn * row + oldRow];
}
}
}
return Aurora::Matrix::fromRawData(resultData, newRow, newColumn, newSlice);
}

View File

@@ -0,0 +1,20 @@
#ifndef SEGIMGFUNCTION_H
#define SEGIMGFUNCTION_H
#include <Matrix.h>
namespace Recon
{
struct KmeansResult
{
Aurora::Matrix Label;
Aurora::Matrix CenterPoint;
};
Aurora::Matrix maxConnRegoin(const Aurora::Matrix& aMatrix);
Aurora::Matrix imfill(const Aurora::Matrix& aMatrix);
Aurora::Matrix bwconvhull(const Aurora::Matrix& aMatrix);
Aurora::Matrix imdilate(const Aurora::Matrix& aMatrix);
KmeansResult kmeans(const Aurora::Matrix& aMatrix, int aClusNum);
Aurora::Matrix imerode(const Aurora::Matrix& aMatrix);
Aurora::Matrix downSample(const Aurora::Matrix& aMatrix);
}
#endif //SEGIMGFUNCTION_H

View File

@@ -0,0 +1,263 @@
#include "maxConnectedComponent3D.cuh"
#include <cuda_runtime.h>
#include <cstdio>
#include <cstdint>
#include <unordered_set>
#include <unordered_map>
#include <vector>
#include <algorithm>
#define INDEX(x,y,z,W,C,S) ((x)*(C) + (y) + (z)*(W)*(C))
__constant__ int NEIGH_OFFS[26*3];
static int HOST_OFFSET[26*3];
__global__ void floatToMask(const float* __restrict__ in,
uint8_t* __restrict__ mask,
int W, int C, int S)
{
int x = blockIdx.x * blockDim.x + threadIdx.x;
int y = blockIdx.y * blockDim.y + threadIdx.y;
int z = blockIdx.z * blockDim.z + threadIdx.z;
if (x>=W || y>=C || z>=S) return;
size_t idx = INDEX(x,y,z,W,C,S);
float v = in[idx];
mask[idx] = (v > 0.5f) ? 1 : 0;
}
__global__ void initLabels(const uint8_t* __restrict__ mask,
uint32_t* __restrict__ labels,
int W, int C, int S)
{
int x = blockIdx.x * blockDim.x + threadIdx.x;
int y = blockIdx.y * blockDim.y + threadIdx.y;
int z = blockIdx.z * blockDim.z + threadIdx.z;
if (x>=W || y>=C || z>=S) return;
size_t idx = INDEX(x,y,z,W,C,S);
labels[idx] = mask[idx] ? (uint32_t)(idx + 1) : 0u;
}
__global__ void propagateLabels(const uint8_t* __restrict__ mask,
uint32_t* __restrict__ labels,
int W, int C, int S,
int* __restrict__ d_changed)
{
int x = blockIdx.x * blockDim.x + threadIdx.x;
int y = blockIdx.y * blockDim.y + threadIdx.y;
int z = blockIdx.z * blockDim.z + threadIdx.z;
if (x>=W || y>=C || z>=S) return;
size_t idx = INDEX(x,y,z,W,C,S);
if (!mask[idx]) return;
uint32_t cur = labels[idx];
uint32_t minLabel = cur;
// 遍历 26 邻域
#pragma unroll
for (int k = 0; k < 26; ++k)
{
int nx = x + NEIGH_OFFS[3*k + 0];
int ny = y + NEIGH_OFFS[3*k + 1];
int nz = z + NEIGH_OFFS[3*k + 2];
if ((unsigned)nx < (unsigned)W &&
(unsigned)ny < (unsigned)C &&
(unsigned)nz < (unsigned)S)
{
size_t nidx = INDEX(nx,ny,nz,W,C,S);
if (mask[nidx]) {
uint32_t nl = labels[nidx];
if (nl != 0u && nl < minLabel) minLabel = nl;
}
}
}
if (minLabel < cur)
{
labels[idx] = minLabel;
*d_changed = 1;
}
}
__global__ void writeLargestComponent(const uint32_t* __restrict__ labels,
float* __restrict__ out,
uint32_t keep_label,
int W, int C, int S)
{
int x = blockIdx.x * blockDim.x + threadIdx.x;
int y = blockIdx.y * blockDim.y + threadIdx.y;
int z = blockIdx.z * blockDim.z + threadIdx.z;
if (x>=W || y>=C || z>=S) return;
size_t idx = INDEX(x,y,z,W,C,S);
out[idx] = (labels[idx] == keep_label) ? 1.0f : 0.0f;
}
static uint32_t findLargestLabel(const std::vector<uint32_t>& h_labels)
{
std::unordered_map<uint32_t, uint32_t> cnt;
cnt.reserve(h_labels.size()/16 + 1);
for (auto L : h_labels) {
if (L) ++cnt[L];
}
uint32_t best_label = 0;
uint32_t best_size = 0;
for (auto& kv : cnt) {
if (kv.second > best_size) {
best_size = kv.second;
best_label = kv.first;
}
}
return best_label;
}
unsigned int bwconncomp3D(const Aurora::Matrix aInput, int max_iters)
{
int W = aInput.getDimSize(0);
int C = aInput.getDimSize(1);
int S = aInput.getDimSize(2);
float* inVol = aInput.getData();
size_t N = W * C * S;
if (S <= 1) return 0;
{
int t = 0;
for (int dz = -1; dz <= 1; ++dz)
for (int dx = -1; dx <= 1; ++dx)
for (int dy = -1; dy <= 1; ++dy) {
if (dx==0 && dy==0 && dz==0) continue;
HOST_OFFSET[3*t+0] = dx;
HOST_OFFSET[3*t+1] = dy;
HOST_OFFSET[3*t+2] = dz;
++t;
}
cudaMemcpyToSymbol(NEIGH_OFFS, HOST_OFFSET, sizeof(HOST_OFFSET));
}
float *d_in = nullptr, *d_out = nullptr;
uint8_t *d_mask = nullptr;
uint32_t *d_labels = nullptr;
int *d_changed = nullptr;
cudaMalloc(&d_in, N * sizeof(float));
cudaMalloc(&d_out, N * sizeof(float));
cudaMalloc(&d_mask, N * sizeof(uint8_t));
cudaMalloc(&d_labels, N * sizeof(uint32_t));
cudaMalloc(&d_changed,sizeof(int));
cudaMemcpy(d_in, inVol, N*sizeof(float), cudaMemcpyHostToDevice);
dim3 block(8,8,8);
dim3 grid( (W+block.x-1)/block.x,
(C+block.y-1)/block.y,
(S+block.z-1)/block.z );
floatToMask<<<grid, block>>>(d_in, d_mask, W, C, S);
initLabels<<<grid, block>>>(d_mask, d_labels, W, C, S);
int h_changed = 0;
int iter = 0;
do
{
h_changed = 0;
cudaMemcpy(d_changed, &h_changed, sizeof(int), cudaMemcpyHostToDevice);
propagateLabels<<<grid, block>>>(d_mask, d_labels, W, C, S, d_changed);
cudaMemcpy(&h_changed, d_changed, sizeof(int), cudaMemcpyDeviceToHost);
++iter;
if (iter >= max_iters)
{
fprintf(stderr, "[Warn] Reached max_iters=%d before convergence.\n", max_iters);
break;
}
} while (h_changed != 0);
std::vector<uint32_t> h_labels(N);
cudaMemcpy(h_labels.data(), d_labels, N*sizeof(uint32_t), cudaMemcpyDeviceToHost);
std::unordered_set<uint32_t> uniqueSet(h_labels.begin(), h_labels.end());
return uniqueSet.size() - 1;
}
Aurora::Matrix maxConnectedComponent3D(const Aurora::Matrix aInput, int max_iters)
{
int W = aInput.getDimSize(0);
int C = aInput.getDimSize(1);
int S = aInput.getDimSize(2);
float* inVol = aInput.getData();
size_t N = W * C * S;
if (S <= 1) return Aurora::Matrix();
{
int t = 0;
for (int dz = -1; dz <= 1; ++dz)
for (int dx = -1; dx <= 1; ++dx)
for (int dy = -1; dy <= 1; ++dy) {
if (dx==0 && dy==0 && dz==0) continue;
HOST_OFFSET[3*t+0] = dx;
HOST_OFFSET[3*t+1] = dy;
HOST_OFFSET[3*t+2] = dz;
++t;
}
cudaMemcpyToSymbol(NEIGH_OFFS, HOST_OFFSET, sizeof(HOST_OFFSET));
}
float *d_in = nullptr, *d_out = nullptr;
uint8_t *d_mask = nullptr;
uint32_t *d_labels = nullptr;
int *d_changed = nullptr;
cudaMalloc(&d_in, N * sizeof(float));
cudaMalloc(&d_out, N * sizeof(float));
cudaMalloc(&d_mask, N * sizeof(uint8_t));
cudaMalloc(&d_labels, N * sizeof(uint32_t));
cudaMalloc(&d_changed,sizeof(int));
cudaMemcpy(d_in, inVol, N*sizeof(float), cudaMemcpyHostToDevice);
dim3 block(8,8,8);
dim3 grid( (W+block.x-1)/block.x,
(C+block.y-1)/block.y,
(S+block.z-1)/block.z );
floatToMask<<<grid, block>>>(d_in, d_mask, W, C, S);
initLabels<<<grid, block>>>(d_mask, d_labels, W, C, S);
int h_changed = 0;
int iter = 0;
do
{
h_changed = 0;
cudaMemcpy(d_changed, &h_changed, sizeof(int), cudaMemcpyHostToDevice);
propagateLabels<<<grid, block>>>(d_mask, d_labels, W, C, S, d_changed);
cudaMemcpy(&h_changed, d_changed, sizeof(int), cudaMemcpyDeviceToHost);
++iter;
if (iter >= max_iters)
{
fprintf(stderr, "[Warn] Reached max_iters=%d before convergence.\n", max_iters);
break;
}
} while (h_changed != 0);
std::vector<uint32_t> h_labels(N);
cudaMemcpy(h_labels.data(), d_labels, N*sizeof(uint32_t), cudaMemcpyDeviceToHost);
uint32_t keep_label = findLargestLabel(h_labels);
writeLargestComponent<<<grid, block>>>(d_labels, d_out, keep_label, W, C, S);
float* outputData = new float[N];
cudaMemcpy(outputData, d_out, N*sizeof(float), cudaMemcpyDeviceToHost);
cudaFree(d_in);
cudaFree(d_out);
cudaFree(d_mask);
cudaFree(d_labels);
cudaFree(d_changed);
return Aurora::Matrix::fromRawData(outputData, W, C, S);
}

View File

@@ -0,0 +1,5 @@
#include <Matrix.h>
Aurora::Matrix maxConnectedComponent3D(const Aurora::Matrix aInput, int max_iters = 5000);
unsigned int bwconncomp3D(const Aurora::Matrix aInput, int max_iters = 5000);