diff --git a/src/common/SegImg/Data3DCluster.cpp b/src/common/SegImg/Data3DCluster.cpp
new file mode 100644
index 0000000..dc3035b
--- /dev/null
+++ b/src/common/SegImg/Data3DCluster.cpp
@@ -0,0 +1,78 @@
+#include "Data3DCluster.h"
+#include "SegImgFunction.h"
+#include "Function2D.h"
+#include "Function3D.h"
+#include "maxConnectedComponent3D.cuh"
+
+#include <algorithm>
+#include <float.h>
+
+#include <MatlabWriter.h>
+
+using namespace Aurora;
+using namespace Recon;
+
+Data3DClusterResult Recon::Data3DCluster(const Aurora::Matrix& aMatrix, float aTopZ)
+ {
+    int clusNum = 2;
+    Aurora::Matrix imgSamp = downSample(aMatrix);
+    int ite = 1;
+    int LStartNum = 101;
+    float nipThre = 0;
+    Aurora::Matrix clusImg;
+    Aurora::Matrix nipRe;
+    while(LStartNum > 100)
+    {
+        if(ite < 5)
+        {
+            size_t size = imgSamp.getDataSize();
+            imgSamp.forceReshape(size,1,1);
+            KmeansResult kResult = kmeans(imgSamp, 2);
+            clusImg = kResult.Label;
+            nipThre = max(kResult.CenterPoint, FunctionDirection::All)[0];
+            ite++;
+        }
+        else
+        {
+            nipThre+=0.02;
+            Aurora::Matrix oneMatrix = zeros(clusImg.getDataSize(),1) + 1;
+            for(int i=0; i<clusImg.getDataSize(); ++i)
+            {
+                if(imgSamp[i] > nipThre)
+                {
+                    clusImg[i] += 1;
+                }
+            }
+        }
+        nipRe = zeros(aMatrix.getDimSize(0), aMatrix.getDimSize(1),aMatrix.getDimSize(2));
+        size_t effectiveDataSize = aMatrix.getDataSize() - ((aTopZ + 1) * aMatrix.getDimSize(0) * aMatrix.getDimSize(1));
+        for(size_t i=0; i<effectiveDataSize; ++i)
+        {
+            if(aMatrix[i] > nipThre)
+            {
+                nipRe[i] = 1;
+            }
+        }
+        size_t nipRe20Size = nipRe.getDimSize(0) * nipRe.getDimSize(1) * 20;
+        float* nipRe20Data = new float[nipRe20Size];
+        std::copy(nipRe.getData(), nipRe.getData() + nipRe20Size, nipRe20Data);
+        Aurora::Matrix nipRe20 = Aurora::Matrix::fromRawData(nipRe20Data, nipRe.getDimSize(0), nipRe.getDimSize(1), 20);
+        LStartNum = bwconncomp3D(nipRe20);
+        nipRe = maxConnectedComponent3D(nipRe);
+    }
+    std::vector<float> C3D(clusNum, FLT_MAX);
+    for(size_t i=0; i<imgSamp.getDataSize(); ++i)
+    {
+        if( imgSamp[i] < C3D[clusImg[i] - 1])
+        {
+            C3D[clusImg[i] - 1] = imgSamp[i];
+        }
+    }
+    auto it = std::max_element(C3D.begin(), C3D.end());
+
+    Data3DClusterResult result;
+    result.nipRe = nipRe;
+    result.thre3D = *it;
+    return  result;
+ }
+ 
\ No newline at end of file
diff --git a/src/common/SegImg/Data3DCluster.h b/src/common/SegImg/Data3DCluster.h
new file mode 100644
index 0000000..661027a
--- /dev/null
+++ b/src/common/SegImg/Data3DCluster.h
@@ -0,0 +1,14 @@
+#ifndef DATA3DCLUSTER_H
+#define DATA3DCLUSTER_H
+#include <Matrix.h>
+namespace Recon
+{
+    struct Data3DClusterResult
+    {
+        Aurora::Matrix nipRe;
+        float thre3D;
+    };
+    Data3DClusterResult Data3DCluster(const Aurora::Matrix& aMatrix, float aTopZ);
+
+}
+#endif //DATA3DCLUSTER_H
\ No newline at end of file
diff --git a/src/common/SegImg/DealBW.cpp b/src/common/SegImg/DealBW.cpp
new file mode 100644
index 0000000..3329de5
--- /dev/null
+++ b/src/common/SegImg/DealBW.cpp
@@ -0,0 +1,17 @@
+#include "DealBW.h"
+
+#include "SegImgFunction.h"
+
+using namespace Recon;
+using namespace Aurora;
+
+Aurora::Matrix Recon::DealBW(const Aurora::Matrix& aMatrix)
+{
+    Aurora::Matrix tmp3 = maxConnRegoin(aMatrix);
+    tmp3 = imdilate(tmp3);
+    tmp3 = imerode(tmp3);
+    tmp3 = imfill(tmp3);
+    tmp3 = maxConnRegoin(tmp3);
+    tmp3 = bwconvhull(tmp3);
+    return tmp3;
+}
\ No newline at end of file
diff --git a/src/common/SegImg/DealBW.h b/src/common/SegImg/DealBW.h
new file mode 100644
index 0000000..860b60a
--- /dev/null
+++ b/src/common/SegImg/DealBW.h
@@ -0,0 +1,8 @@
+#ifndef DEALBW_H
+#define DEALBW_H
+#include <Matrix.h>
+namespace Recon
+{
+    Aurora::Matrix DealBW(const Aurora::Matrix& aMatrix);
+}
+#endif //DEALBW_H
\ No newline at end of file
diff --git a/src/common/SegImg/FindStartZSliceBaseCluster.cpp b/src/common/SegImg/FindStartZSliceBaseCluster.cpp
new file mode 100644
index 0000000..a615450
--- /dev/null
+++ b/src/common/SegImg/FindStartZSliceBaseCluster.cpp
@@ -0,0 +1,38 @@
+#include "FindStartZSliceBaseCluster.h"
+
+#include "Function2D.h"
+#include "SegImgFunction.h"
+
+#include <math.h>
+
+using namespace Recon;
+using namespace Aurora;
+
+unsigned int Recon::FindStartZSliceBaseCluster(const Aurora::Matrix& aMatrix, float aReso, int aTopZ)
+{
+    unsigned int startZ;
+    float nipThre = M_PI * pow(roundf32(5e-3 / aReso), 2);
+    int i=1;
+    while(i < aMatrix.getDimSize(2) - aTopZ)
+    {
+        Aurora::Matrix label = aMatrix.block(2, i-1, i-1);
+        if(sum(label, FunctionDirection::All)[0] > 0)
+        {
+            label = maxConnRegoin(label);
+            if(sum(label, FunctionDirection::All)[0] < nipThre)
+            {
+                ++i;
+            }
+            else
+            {
+                startZ = i;
+                break;
+            }
+        }
+        else
+        {
+            ++i;
+        }
+    }
+    return startZ;
+}
\ No newline at end of file
diff --git a/src/common/SegImg/FindStartZSliceBaseCluster.h b/src/common/SegImg/FindStartZSliceBaseCluster.h
new file mode 100644
index 0000000..a7dc340
--- /dev/null
+++ b/src/common/SegImg/FindStartZSliceBaseCluster.h
@@ -0,0 +1,8 @@
+#ifndef FINDSTARTZSLICEBASECLUSTER_H
+#define FINDSTARTZSLICEBASECLUSTER_H
+#include <Matrix.h>
+namespace Recon
+{
+    unsigned int FindStartZSliceBaseCluster(const Aurora::Matrix& aMatrix, float aReso, int aTopZ);
+}
+#endif //FINDSTARTZSLICEBASECLUSTER_H
\ No newline at end of file
diff --git a/src/common/SegImg/PreSegImg.cpp b/src/common/SegImg/PreSegImg.cpp
new file mode 100644
index 0000000..2d76b7c
--- /dev/null
+++ b/src/common/SegImg/PreSegImg.cpp
@@ -0,0 +1,45 @@
+#include "PreSegImg.h"
+#include "SegImgFunction.h"
+#include "Function3D.h"
+#include "Function2D.h"
+#include "DealBW.h"
+
+using namespace Recon;
+using namespace Aurora;
+
+PreSegImgResult Recon::PreSegImg(const Aurora::Matrix& aNipRe, unsigned int aTopZ, unsigned int aStartZ)
+{
+    int x = aNipRe.getDimSize(0);
+    int y = aNipRe.getDimSize(1);
+    int z = aNipRe.getDimSize(2);
+    Aurora::Matrix SegRes = zeros(aNipRe.getDimSize(0), aNipRe.getDimSize(1), aNipRe.getDimSize(2));
+    unsigned int endZ = aStartZ;
+    for(int i= z - aTopZ; i>=aStartZ; --i)
+    {
+        Aurora::Matrix label = aNipRe.block(2, i-1, i-1);
+        Aurora::Matrix labelH = imfill(label);
+        Aurora::Matrix hole = labelH - label;
+        if(sum(hole, FunctionDirection::All)[0] > 2*sum(label, FunctionDirection::All)[0])
+        {
+            endZ = i;
+            break;
+        }
+    }
+
+    for(int i=aStartZ; i<=endZ; ++i)
+    {
+        Aurora::Matrix label = aNipRe.block(2, i-1, i-1);
+        Aurora::Matrix maskIn = SegRes.block(2, i-2, i-2);
+        Aurora::Matrix maskOut = imerode(maskIn);
+        label = label * (1-maskOut);
+        Aurora::Matrix SegI = (label + maskIn) > 0;
+        SegI = maxConnRegoin(SegI);
+        SegI = DealBW(SegI);
+        SegRes.setBlock(2, i-1, i-1, SegI);
+    }
+
+    PreSegImgResult result;
+    result.endZ = endZ;
+    result.SegRes = SegRes;
+    return result;
+}
\ No newline at end of file
diff --git a/src/common/SegImg/PreSegImg.h b/src/common/SegImg/PreSegImg.h
new file mode 100644
index 0000000..1337f50
--- /dev/null
+++ b/src/common/SegImg/PreSegImg.h
@@ -0,0 +1,14 @@
+#ifndef PRESEGIMG_H
+#define PRESEGIMG_H
+#include <Matrix.h>
+namespace Recon
+{
+    struct PreSegImgResult
+    {
+        Aurora::Matrix SegRes;
+        unsigned int endZ;
+    };
+    
+    PreSegImgResult PreSegImg(const Aurora::Matrix& aNipRe, unsigned int aTopZ, unsigned int aStartZ);
+}
+#endif //PRESEGIMG_H
\ No newline at end of file
diff --git a/src/common/SegImg/SegImg.cpp b/src/common/SegImg/SegImg.cpp
new file mode 100644
index 0000000..594bdea
--- /dev/null
+++ b/src/common/SegImg/SegImg.cpp
@@ -0,0 +1,170 @@
+#include "SegImg.h"
+#include "config/config.h"
+#include "Function2D.h"
+#include "Function3D.h"
+#include "SegImgFunction.h"
+#include "DealBW.h"
+#include "Data3DCluster.h"
+#include "FindStartZSliceBaseCluster.h"
+#include "PreSegImg.h"
+
+#include <float.h>
+
+using namespace Aurora;
+
+namespace
+{
+    Aurora::Matrix create1ToXMatrix(int aEndValue)
+    {
+        float* data = new float[aEndValue];
+        for(int i=1; i<=aEndValue; ++i)
+        {
+            data[i-1] = i;
+        }
+        return Aurora::Matrix::fromRawData(data, 1, aEndValue);
+    }
+}
+
+Aurora::Matrix Recon::SegImgMain(const Aurora::Matrix& aMatrix, float aImageResolution, const Aurora::Matrix& aImageStartPoint, const Aurora::Matrix& aImageEndPoint)
+{
+    int topZ = roundf32((aImageEndPoint.getData()[2]-0) / aImageResolution);
+    Aurora::Matrix temp = aMatrix.deepCopy();
+    temp.forceReshape(temp.getDataSize(), 1, 1);
+    Aurora::Matrix img = (aMatrix - min(temp,FunctionDirection::All)) / (max(temp,FunctionDirection::All) - min(temp,FunctionDirection::All));
+    Data3DClusterResult data3DClusterResult = Data3DCluster(img, topZ);
+    unsigned int startZ = FindStartZSliceBaseCluster(data3DClusterResult.nipRe, aImageResolution, topZ);
+    PreSegImgResult preSegImgResult = PreSegImg(data3DClusterResult.nipRe, topZ, startZ);
+    Aurora::Matrix segRes = SegImg(preSegImgResult.SegRes, img, preSegImgResult.endZ, data3DClusterResult.thre3D, aImageResolution, aImageStartPoint, aImageEndPoint);
+    return aMatrix * segRes;
+}
+
+Aurora::Matrix Recon::SegImg(const Aurora::Matrix& aSegRes, const Aurora::Matrix& aImg, int aEndZ, float aThre3D,
+                             float aImageResolution, const Aurora::Matrix& aImageStartPoint, const Aurora::Matrix& aImageEndPoint)
+{
+    Aurora::Matrix result = aSegRes.deepCopy();
+    int clustNumZ = 2;
+    int x = result.getDimSize(0);
+    int y = result.getDimSize(1);
+    int z = result.getDimSize(2);
+    Aurora::Matrix xIdx = transpose(repmat(create1ToXMatrix(x),y,1));
+    Aurora::Matrix yIdx = repmat(create1ToXMatrix(y),x,1);
+    float CenterP1 = roundf32(-aImageStartPoint[0] / aImageResolution);
+    float CenterP2 = roundf32(-aImageStartPoint[1] / aImageResolution);
+    Aurora::Matrix xDistance = (xIdx-CenterP1)^2;
+    Aurora::Matrix yDistance = (yIdx-CenterP2)^2;
+    Aurora::Matrix centerDis = Aurora::sqrt(xDistance + yDistance);
+    for(int i=aEndZ+1; i<=z; ++i)
+    {
+        int updateFlag = 1;
+        Aurora::Matrix tmp = result.block(2, i-2, i-2);
+        float zDis = aImageEndPoint[2] - (z-i)*aImageResolution;
+        Aurora::Matrix centerDisZ = centerDis*tmp;
+        float centerDis_Z = Aurora::max(centerDisZ)[0]*aImageResolution;
+        float ThSlice;
+        if(centerDis_Z > 0.08 || zDis > -0.03)
+        {
+            tmp = aImg.block(2, i-1, i-1);
+            Aurora::Matrix imgData = downSample(tmp);
+            Aurora::Matrix imgDataCopy = imgData.deepCopy();
+            imgDataCopy.forceReshape(imgData.getDataSize(),1,1);
+            KmeansResult kResult = kmeans(imgDataCopy, clustNumZ);
+            Aurora::Matrix clusImg = kResult.Label;
+            Aurora::Matrix clus = zeros(clustNumZ,2);
+            for(int k=1; k<=clustNumZ;++k)
+            {
+                float cIDx = 0;
+                float minValue = MAXFLOAT;
+                for(unsigned int j=0; j<imgData.getDataSize(); ++j)
+                {
+                    if(clusImg[j] == k)
+                    {
+                        cIDx++;
+                        if(minValue > imgData[j])
+                        {
+                            minValue = imgData[j];
+                        }
+                    }
+                }
+                clus[k-1] = minValue;
+                clus[k+1] = cIDx;
+            }
+            Aurora::Matrix idx;
+            clus = sortrows(clus, &idx);
+            if(clus[0] < clus[1] && clus[2] > clus[3])
+            {
+                ThSlice = clus[1];
+            }
+            else
+            {
+                tmp = aImg.block(2, i-1, i-1);
+                Aurora::Matrix mask = result.block(2, i-2, i-2);
+                mask = imerode(mask);
+                tmp = tmp*(1-mask) + mask * mean(tmp,FunctionDirection::All)[0];
+                imgData = downSample(tmp);
+                Aurora::Matrix imgDataCopy = imgData.deepCopy();
+                imgDataCopy.forceReshape(imgData.getDataSize(),1,1);
+                kResult = kmeans(imgDataCopy, 2);
+                clusImg = kResult.Label;
+                clus = zeros(clustNumZ,2);
+                for(int k=1; k<=clustNumZ;++k)
+                {
+                    float cIDx = 0;
+                    float minValue = MAXFLOAT;
+                    for(unsigned int j=0; j<imgData.getDataSize(); ++j)
+                    {
+                        if(clusImg[j] == k)
+                        {
+                            cIDx++;
+                            if(minValue > imgData[j])
+                            {
+                                minValue = imgData[j];
+                            }
+                        }
+                    }
+                    clus[k-1] = minValue;
+                    clus[k+1] = cIDx;
+                }
+                Aurora::Matrix idx;
+                clus = sortrows(clus, &idx);
+                if(clus[0] < clus[1] && clus[2] > clus[3])
+                {
+                    ThSlice = clus[1];
+                }
+                else
+                {
+                    updateFlag = 0;
+                }
+            }
+        }
+        else
+        {
+            ThSlice = aThre3D;
+        }
+
+        if(zDis > -0.03)
+        {
+            Aurora::Matrix tmp1 = result.block(2, i-2, i-2);
+            Aurora::Matrix tmp2 = result.block(2, i-1, i-1);
+            float idx = ((sum(tmp2,FunctionDirection::All) - sum(tmp1,FunctionDirection::All)) /  sum(tmp1,FunctionDirection::All)).getData()[0];
+            if(idx>0.15)
+            {
+                updateFlag = 0;
+            }
+        }
+
+        Aurora::Matrix segI;
+        if(updateFlag == 1)
+        {
+            Aurora::Matrix ref = aImg > ThSlice;
+            Aurora::Matrix refI = ref.block(2, i-1, i-1);
+            segI = (refI + result.block(2, i-2, i-2)) > 0;
+            segI = DealBW(segI);
+        }
+        else
+        {
+            segI = result.block(2, i-2, i-2);
+        }
+        result.setBlock(2, i-1, i-1, segI);
+    }
+    return result;
+}
\ No newline at end of file
diff --git a/src/common/SegImg/SegImg.h b/src/common/SegImg/SegImg.h
new file mode 100644
index 0000000..4188e63
--- /dev/null
+++ b/src/common/SegImg/SegImg.h
@@ -0,0 +1,10 @@
+#ifndef SEGIMG_H
+#define SEGIMG_H
+#include <Matrix.h>
+namespace Recon
+{
+    Aurora::Matrix SegImgMain(const Aurora::Matrix& aMatrix, float aImageResolution, const Aurora::Matrix& aImageStartPoint, const Aurora::Matrix& aImageEndPoint);
+    Aurora::Matrix SegImg(const Aurora::Matrix& aSegRes, const Aurora::Matrix& aImg, int aEndZ, float aThre3D,
+                             float aImageResolution, const Aurora::Matrix& aImageStartPoint, const Aurora::Matrix& aImageEndPoint);
+}
+#endif //SEGIMG_H
diff --git a/src/common/SegImg/SegImgFunction.cpp b/src/common/SegImg/SegImgFunction.cpp
new file mode 100644
index 0000000..d6ceff7
--- /dev/null
+++ b/src/common/SegImg/SegImgFunction.cpp
@@ -0,0 +1,218 @@
+#include "SegImgFunction.h"
+
+#include <opencv2/opencv.hpp>
+
+#include "Function3D.h"
+
+
+using namespace Recon;
+
+Aurora::Matrix Recon::maxConnRegoin(const Aurora::Matrix& aMatrix)
+{
+    int column = aMatrix.getDimSize(0);
+    int row = aMatrix.getDimSize(1);
+    size_t size = column * row;
+    uint8_t* data = new uint8_t[size];
+
+    std::copy(aMatrix.getData(), aMatrix.getData() + size, data);
+    cv::Mat src = cv::Mat(row, column, CV_8U, data);
+    cv::Mat labels, stats, centroids;;
+    int nLabels = connectedComponentsWithStats(src, labels,stats,centroids, 4, CV_32S);
+    if(nLabels<=1)
+    {
+        return Aurora::Matrix();
+    }
+    int maxArea = 0;
+    int maxAreaIndex = 1;
+    for (int i = 1; i < nLabels; ++i)
+    {
+        int area = stats.at<int>(i, cv::CC_STAT_AREA);
+        if (area > maxArea) {
+            maxArea = area;
+            maxAreaIndex = i;
+        }
+    }
+
+    Aurora::Matrix result = Aurora::zeros(column, row);
+    for(int i=0; i<row; ++i)
+    {
+        for(int j=0; j<column; ++j)
+        {
+            if (labels.at<int>(i, j) == maxAreaIndex)
+            {
+                result[i*column + j] = 1;
+            }
+        }
+    }
+
+    return result;
+}
+
+Aurora::Matrix Recon::imfill(const Aurora::Matrix& aMatrix)
+{
+
+    int width = aMatrix.getDimSize(0);
+    int height = aMatrix.getDimSize(1);
+    size_t dataSize = aMatrix.getDataSize();
+    uint8_t* data = new uint8_t[aMatrix.getDataSize()];
+    std::copy(aMatrix.getData(), aMatrix.getData() + dataSize, data);
+    cv::Mat src = cv::Mat(height, width, CV_8U, data);   
+    cv::Mat inv = 1 - src;
+    cv::Mat labels;
+    int nLabels = connectedComponents(inv, labels, 8, CV_32S);
+    cv::Mat filled = src.clone();
+    std::vector<bool> isHole(nLabels, true);
+
+    int rows = src.rows;
+    int cols = src.cols;
+
+    for (int x = 0; x < cols; x++) {
+        isHole[ labels.at<int>(0, x) ] = false;             
+        isHole[ labels.at<int>(rows-1, x) ] = false;        
+    }
+    for (int y = 0; y < rows; y++) {
+        isHole[ labels.at<int>(y, 0) ] = false;           
+        isHole[ labels.at<int>(y, cols-1) ] = false;       
+    }
+
+    for (int y = 0; y < rows; y++) {
+        for (int x = 0; x < cols; x++) {
+            int label = labels.at<int>(y, x);
+            if (isHole[label]) {
+                filled.at<uchar>(y, x) = 1;
+            }
+        }
+    }
+
+    float* resultData = new float[aMatrix.getDataSize()];
+
+    std::copy(filled.data, filled.data + dataSize, resultData);
+    Aurora::Matrix result = Aurora::Matrix::fromRawData(resultData, width, height);
+    delete[] data;
+    return result;
+}
+
+Aurora::Matrix Recon::bwconvhull(const Aurora::Matrix& aMatrix)
+{
+    int width = aMatrix.getDimSize(0);
+    int height = aMatrix.getDimSize(1);
+    size_t dataSize = aMatrix.getDataSize();
+    uint8_t* data = new uint8_t[aMatrix.getDataSize()];
+    std::copy(aMatrix.getData(), aMatrix.getData() + dataSize, data);
+    cv::Mat input = cv::Mat(height, width, CV_8U, data);
+    std::vector<cv::Point> points;
+    cv::findNonZero(input, points);
+
+    if (points.empty())
+    {
+        delete[] data;
+        return Aurora::zeros(width, height);;
+    }
+
+    std::vector<cv::Point> hull;
+    cv::convexHull(points, hull);
+
+
+    cv::Mat hullMask = cv::Mat::zeros(input.size(), CV_8UC1);
+    cv::fillConvexPoly(hullMask, hull, cv::Scalar(1));
+    float* resultData = new float[aMatrix.getDataSize()];
+
+    std::copy(hullMask.data, hullMask.data + dataSize, resultData);
+    Aurora::Matrix result = Aurora::Matrix::fromRawData(resultData, width, height);
+
+    delete[] data;
+    return result;
+}
+
+KmeansResult Recon::kmeans(const Aurora::Matrix& aMatrix, int aClusNum)
+{
+    KmeansResult result;
+    auto input = aMatrix.deepCopy();
+    size_t dataSize = input.getDataSize();
+    cv::Mat data(dataSize, 1, CV_32F, input.getData());
+
+    int k = aClusNum;
+    cv::Mat labels;
+    cv::Mat centers;
+    cv::TermCriteria criteria(cv::TermCriteria::EPS + cv::TermCriteria::MAX_ITER, 100, 1e-6);
+    int attempts = 5;
+    int flags = cv::KMEANS_PP_CENTERS;
+    double compactness = cv::kmeans(data, k, labels, criteria, attempts, flags, centers);
+    
+    //  std::cout << "聚类中心: " << std::endl << centers << std::endl;
+    // std::cout << "紧密度: " << compactness << std::endl;
+
+    float* labelDataResult = new float[dataSize];
+    float* centerPointDataResult = new float[k];
+    int* labelData = reinterpret_cast<int*>(labels.data);
+    float* centerPointData = reinterpret_cast<float*>(centers.data);
+
+    std::copy(labelData, labelData + dataSize, labelDataResult);
+    std::copy(centerPointData, centerPointData + k, centerPointDataResult);
+    Aurora::Matrix label = Aurora::Matrix::fromRawData(labelDataResult, dataSize, 1) + 1;
+    Aurora::Matrix center = Aurora::Matrix::fromRawData(centerPointDataResult, k, 1);
+    result.Label = label;
+    result.CenterPoint = center;
+    return result;
+}
+
+Aurora::Matrix Recon::imerode(const Aurora::Matrix& aMatrix)
+{
+    float* data = aMatrix.getData();
+    unsigned int width = aMatrix.getDimSize(0);
+    unsigned int heigh = aMatrix.getDimSize(1);
+    unsigned int dataSize = width * heigh;
+    cv::Mat kernel = cv::getStructuringElement(cv::MORPH_RECT, cv::Size(3,3));
+    cv::Mat mat(heigh, width, CV_32F, data );
+    cv::Mat out;
+    cv::erode(mat, out, kernel);
+    float* outData = reinterpret_cast<float*>(out.data);
+    float* resultData = new float[dataSize];
+    std::copy(outData, outData + dataSize, resultData);
+    return Aurora::Matrix::fromRawData(resultData, width, heigh);
+}
+
+Aurora::Matrix Recon::imdilate(const Aurora::Matrix& aMatrix)
+{
+    float* data = aMatrix.getData();
+    unsigned int width = aMatrix.getDimSize(0);
+    unsigned int heigh = aMatrix.getDimSize(1);
+    unsigned int dataSize = width * heigh;
+    cv::Mat kernel = cv::getStructuringElement(cv::MORPH_RECT, cv::Size(3,3));
+    cv::Mat mat(heigh, width, CV_32F, data );
+    cv::Mat out;
+    cv::dilate(mat, out, kernel);
+    float* outData = reinterpret_cast<float*>(out.data);
+    float* resultData = new float[dataSize];
+    std::copy(outData, outData + dataSize, resultData);
+    return Aurora::Matrix::fromRawData(resultData, width, heigh);
+}
+
+
+ Aurora::Matrix Recon::downSample(const Aurora::Matrix& aMatrix)
+ {
+    int row = aMatrix.getDimSize(0);
+    int column = aMatrix.getDimSize(1);
+    int slice = aMatrix.getDimSize(2);
+    int newRow = (row + 3) / 4;
+    int newColumn = (column +3) / 4;
+    int newSlice = (slice + 3) / 4;
+    float* resultData = new float[newColumn * newRow * newSlice];
+    for (int s = 0; s < newSlice; ++s)
+    {
+        int oldSlice = s * 4;
+        if (oldSlice >= slice) oldSlice = slice - 1;      
+        for (int c = 0; c < newColumn; ++c)
+        {
+            int oldColumn = c * 4;
+            if (oldColumn >= column) oldColumn = column - 1;
+            for (int r = 0; r < newRow; ++r)
+            {
+                int oldRow = r * 4;
+                if (oldRow >= row) oldRow = row - 1;                    
+                resultData[s * newRow * newColumn +  c * newRow + r] = aMatrix.getData()[oldSlice * row * column + oldColumn * row + oldRow];
+            }
+        }
+    }
+    return Aurora::Matrix::fromRawData(resultData, newRow, newColumn, newSlice);
+ }
diff --git a/src/common/SegImg/SegImgFunction.h b/src/common/SegImg/SegImgFunction.h
new file mode 100644
index 0000000..30c33a3
--- /dev/null
+++ b/src/common/SegImg/SegImgFunction.h
@@ -0,0 +1,20 @@
+#ifndef SEGIMGFUNCTION_H
+#define SEGIMGFUNCTION_H
+#include <Matrix.h>
+namespace Recon
+{
+    struct KmeansResult
+    {
+        Aurora::Matrix Label;
+        Aurora::Matrix CenterPoint;
+    };
+    
+    Aurora::Matrix maxConnRegoin(const Aurora::Matrix& aMatrix);
+    Aurora::Matrix imfill(const Aurora::Matrix& aMatrix);
+    Aurora::Matrix bwconvhull(const Aurora::Matrix& aMatrix);
+    Aurora::Matrix imdilate(const Aurora::Matrix& aMatrix);
+    KmeansResult kmeans(const Aurora::Matrix& aMatrix, int aClusNum);
+    Aurora::Matrix imerode(const Aurora::Matrix& aMatrix);
+    Aurora::Matrix downSample(const Aurora::Matrix& aMatrix);
+}
+#endif //SEGIMGFUNCTION_H
\ No newline at end of file
diff --git a/src/common/SegImg/maxConnectedComponent3D.cu b/src/common/SegImg/maxConnectedComponent3D.cu
new file mode 100644
index 0000000..93d434a
--- /dev/null
+++ b/src/common/SegImg/maxConnectedComponent3D.cu
@@ -0,0 +1,263 @@
+#include "maxConnectedComponent3D.cuh"
+#include <cuda_runtime.h>
+#include <cstdio>
+#include <cstdint>
+#include <unordered_set>
+#include <unordered_map>
+#include <vector>
+#include <algorithm>
+
+#define INDEX(x,y,z,W,C,S) ((x)*(C) + (y) + (z)*(W)*(C))
+
+__constant__ int NEIGH_OFFS[26*3];
+
+static int HOST_OFFSET[26*3];
+
+__global__ void floatToMask(const float* __restrict__ in,
+                              uint8_t* __restrict__ mask,
+                              int W, int C, int S)
+{
+    int x = blockIdx.x * blockDim.x + threadIdx.x;
+    int y = blockIdx.y * blockDim.y + threadIdx.y;
+    int z = blockIdx.z * blockDim.z + threadIdx.z;
+    if (x>=W || y>=C || z>=S) return;
+
+    size_t idx = INDEX(x,y,z,W,C,S);
+    float v = in[idx];
+    mask[idx] = (v > 0.5f) ? 1 : 0;
+}
+
+__global__ void initLabels(const uint8_t* __restrict__ mask,
+                            uint32_t* __restrict__ labels,
+                            int W, int C, int S)
+{
+    int x = blockIdx.x * blockDim.x + threadIdx.x;
+    int y = blockIdx.y * blockDim.y + threadIdx.y;
+    int z = blockIdx.z * blockDim.z + threadIdx.z;
+    if (x>=W || y>=C || z>=S) return;
+
+    size_t idx = INDEX(x,y,z,W,C,S);
+    labels[idx] = mask[idx] ? (uint32_t)(idx + 1) : 0u;
+}
+
+__global__ void propagateLabels(const uint8_t* __restrict__ mask,
+                                 uint32_t* __restrict__ labels,
+                                 int W, int C, int S,
+                                 int* __restrict__ d_changed)
+{
+    int x = blockIdx.x * blockDim.x + threadIdx.x;
+    int y = blockIdx.y * blockDim.y + threadIdx.y;
+    int z = blockIdx.z * blockDim.z + threadIdx.z;
+    if (x>=W || y>=C || z>=S) return;
+
+    size_t idx = INDEX(x,y,z,W,C,S);
+    if (!mask[idx]) return;
+
+    uint32_t cur = labels[idx];
+    uint32_t minLabel = cur;
+
+    // 遍历 26 邻域
+    #pragma unroll
+    for (int k = 0; k < 26; ++k)
+    {
+        int nx = x + NEIGH_OFFS[3*k + 0];
+        int ny = y + NEIGH_OFFS[3*k + 1];
+        int nz = z + NEIGH_OFFS[3*k + 2];
+        if ((unsigned)nx < (unsigned)W &&
+            (unsigned)ny < (unsigned)C &&
+            (unsigned)nz < (unsigned)S)
+        {
+            size_t nidx = INDEX(nx,ny,nz,W,C,S);
+            if (mask[nidx]) {
+                uint32_t nl = labels[nidx];
+                if (nl != 0u && nl < minLabel) minLabel = nl;
+            }
+        }
+    }
+
+    if (minLabel < cur)
+    {
+        labels[idx] = minLabel;
+        *d_changed = 1;
+    }
+}
+
+__global__ void writeLargestComponent(const uint32_t* __restrict__ labels,
+                                        float* __restrict__ out,
+                                        uint32_t keep_label,
+                                        int W, int C, int S)
+{
+    int x = blockIdx.x * blockDim.x + threadIdx.x;
+    int y = blockIdx.y * blockDim.y + threadIdx.y;
+    int z = blockIdx.z * blockDim.z + threadIdx.z;
+    if (x>=W || y>=C || z>=S) return;
+
+    size_t idx = INDEX(x,y,z,W,C,S);
+    out[idx] = (labels[idx] == keep_label) ? 1.0f : 0.0f;
+}
+
+static uint32_t findLargestLabel(const std::vector<uint32_t>& h_labels)
+{
+    std::unordered_map<uint32_t, uint32_t> cnt;
+    cnt.reserve(h_labels.size()/16 + 1);
+    for (auto L : h_labels) {
+        if (L) ++cnt[L];
+    }
+    uint32_t best_label = 0;
+    uint32_t best_size  = 0;
+    for (auto& kv : cnt) {
+        if (kv.second > best_size) {
+            best_size  = kv.second;
+            best_label = kv.first;
+        }
+    }
+    return best_label;
+}
+
+unsigned int bwconncomp3D(const Aurora::Matrix aInput, int max_iters)
+{
+    int W = aInput.getDimSize(0);
+    int C = aInput.getDimSize(1);
+    int S = aInput.getDimSize(2);
+    float* inVol = aInput.getData();
+    size_t N = W * C * S;
+    if (S <= 1) return 0;
+
+
+    {
+        int t = 0;
+        for (int dz = -1; dz <= 1; ++dz)
+        for (int dx = -1; dx <= 1; ++dx)
+        for (int dy = -1; dy <= 1; ++dy) {
+            if (dx==0 && dy==0 && dz==0) continue;
+            HOST_OFFSET[3*t+0] = dx;
+            HOST_OFFSET[3*t+1] = dy;
+            HOST_OFFSET[3*t+2] = dz;
+            ++t;
+        }
+        cudaMemcpyToSymbol(NEIGH_OFFS, HOST_OFFSET, sizeof(HOST_OFFSET));
+    }
+
+    float    *d_in = nullptr, *d_out = nullptr;
+    uint8_t  *d_mask = nullptr;
+    uint32_t *d_labels = nullptr;
+    int      *d_changed = nullptr;
+
+    cudaMalloc(&d_in,     N * sizeof(float));
+    cudaMalloc(&d_out,    N * sizeof(float));
+    cudaMalloc(&d_mask,   N * sizeof(uint8_t));
+    cudaMalloc(&d_labels, N * sizeof(uint32_t));
+    cudaMalloc(&d_changed,sizeof(int));
+
+    cudaMemcpy(d_in, inVol, N*sizeof(float), cudaMemcpyHostToDevice);
+
+    dim3 block(8,8,8);
+    dim3 grid( (W+block.x-1)/block.x,
+               (C+block.y-1)/block.y,
+               (S+block.z-1)/block.z );
+
+    floatToMask<<<grid, block>>>(d_in, d_mask, W, C, S);
+
+    initLabels<<<grid, block>>>(d_mask, d_labels, W, C, S);
+
+    int h_changed = 0;
+    int iter = 0;
+    do
+    {
+        h_changed = 0;
+        cudaMemcpy(d_changed, &h_changed, sizeof(int), cudaMemcpyHostToDevice);
+        propagateLabels<<<grid, block>>>(d_mask, d_labels, W, C, S, d_changed);
+        cudaMemcpy(&h_changed, d_changed, sizeof(int), cudaMemcpyDeviceToHost);
+        ++iter;
+        if (iter >= max_iters)
+        {
+            fprintf(stderr, "[Warn] Reached max_iters=%d before convergence.\n", max_iters);
+            break;
+        }
+    } while (h_changed != 0);
+    std::vector<uint32_t> h_labels(N);
+    cudaMemcpy(h_labels.data(), d_labels, N*sizeof(uint32_t), cudaMemcpyDeviceToHost);
+    std::unordered_set<uint32_t> uniqueSet(h_labels.begin(), h_labels.end());
+    return uniqueSet.size() - 1;
+}
+
+Aurora::Matrix maxConnectedComponent3D(const Aurora::Matrix aInput, int max_iters)
+{
+    int W = aInput.getDimSize(0);
+    int C = aInput.getDimSize(1);
+    int S = aInput.getDimSize(2);
+    float* inVol = aInput.getData();
+    size_t N = W * C * S;
+    if (S <= 1) return Aurora::Matrix();
+
+
+    {
+        int t = 0;
+        for (int dz = -1; dz <= 1; ++dz)
+        for (int dx = -1; dx <= 1; ++dx)
+        for (int dy = -1; dy <= 1; ++dy) {
+            if (dx==0 && dy==0 && dz==0) continue;
+            HOST_OFFSET[3*t+0] = dx;
+            HOST_OFFSET[3*t+1] = dy;
+            HOST_OFFSET[3*t+2] = dz;
+            ++t;
+        }
+        cudaMemcpyToSymbol(NEIGH_OFFS, HOST_OFFSET, sizeof(HOST_OFFSET));
+    }
+
+    float    *d_in = nullptr, *d_out = nullptr;
+    uint8_t  *d_mask = nullptr;
+    uint32_t *d_labels = nullptr;
+    int      *d_changed = nullptr;
+
+    cudaMalloc(&d_in,     N * sizeof(float));
+    cudaMalloc(&d_out,    N * sizeof(float));
+    cudaMalloc(&d_mask,   N * sizeof(uint8_t));
+    cudaMalloc(&d_labels, N * sizeof(uint32_t));
+    cudaMalloc(&d_changed,sizeof(int));
+
+    cudaMemcpy(d_in, inVol, N*sizeof(float), cudaMemcpyHostToDevice);
+
+    dim3 block(8,8,8);
+    dim3 grid( (W+block.x-1)/block.x,
+               (C+block.y-1)/block.y,
+               (S+block.z-1)/block.z );
+
+    floatToMask<<<grid, block>>>(d_in, d_mask, W, C, S);
+
+    initLabels<<<grid, block>>>(d_mask, d_labels, W, C, S);
+
+    int h_changed = 0;
+    int iter = 0;
+    do
+    {
+        h_changed = 0;
+        cudaMemcpy(d_changed, &h_changed, sizeof(int), cudaMemcpyHostToDevice);
+        propagateLabels<<<grid, block>>>(d_mask, d_labels, W, C, S, d_changed);
+        cudaMemcpy(&h_changed, d_changed, sizeof(int), cudaMemcpyDeviceToHost);
+        ++iter;
+        if (iter >= max_iters)
+        {
+            fprintf(stderr, "[Warn] Reached max_iters=%d before convergence.\n", max_iters);
+            break;
+        }
+    } while (h_changed != 0);
+    std::vector<uint32_t> h_labels(N);
+    cudaMemcpy(h_labels.data(), d_labels, N*sizeof(uint32_t), cudaMemcpyDeviceToHost);
+
+    uint32_t keep_label = findLargestLabel(h_labels);
+
+    writeLargestComponent<<<grid, block>>>(d_labels, d_out, keep_label, W, C, S);
+
+    float* outputData = new float[N];
+    cudaMemcpy(outputData, d_out, N*sizeof(float), cudaMemcpyDeviceToHost);
+    
+
+    cudaFree(d_in);
+    cudaFree(d_out);
+    cudaFree(d_mask);
+    cudaFree(d_labels);
+    cudaFree(d_changed);
+
+    return Aurora::Matrix::fromRawData(outputData, W, C, S);
+}
\ No newline at end of file
diff --git a/src/common/SegImg/maxConnectedComponent3D.cuh b/src/common/SegImg/maxConnectedComponent3D.cuh
new file mode 100644
index 0000000..422fd4a
--- /dev/null
+++ b/src/common/SegImg/maxConnectedComponent3D.cuh
@@ -0,0 +1,5 @@
+#include <Matrix.h>
+
+Aurora::Matrix maxConnectedComponent3D(const Aurora::Matrix aInput, int max_iters = 5000);
+
+unsigned int bwconncomp3D(const Aurora::Matrix aInput, int max_iters = 5000);
\ No newline at end of file