diff --git a/src/transmissionReconstruction/reconstruction/buildMatrix/buildMatrix.cu b/src/transmissionReconstruction/reconstruction/buildMatrix/buildMatrix.cu
new file mode 100644
index 0000000..4807c0f
--- /dev/null
+++ b/src/transmissionReconstruction/reconstruction/buildMatrix/buildMatrix.cu
@@ -0,0 +1,246 @@
+#include "Function2D.cuh"
+
+
+#include <cstdio>
+#include <thrust/reduce.h>
+#include <thrust/execution_policy.h>
+#include <cuda_runtime.h>
+#include <math.h>
+
+#include "CudaMatrix.h"
+#include "buildMatrix.cuh"
+#include "Sparse.h"
+
+
+namespace  {
+    const int THREADS_PER_BLOCK = 256;
+}
+
+
+
+__device__ float getPixelLengthKernel(float* aStartPt, float*aEndPt, float* aRes, int aPathLenDisc)
+{
+    float temp1;
+    float temp2;
+    float temp3;
+
+    temp1 = aEndPt[0]- aStartPt[0];
+    temp2 = aEndPt[1]- aStartPt[1];
+    temp3 = aEndPt[2]- aStartPt[2];
+    temp1 *= aRes[0];
+    temp2 *= aRes[1];
+    temp3 *= aRes[2];
+    temp1 = temp1*temp1;
+    temp2 = temp2*temp2;
+    temp3 = temp3*temp3;
+    return (sqrtf(temp3+temp2+temp1))/(float)aPathLenDisc;
+}
+
+__global__ struct b3dStruct{
+    int max, f, pathlen;
+    float  x_inc, y_inc, z_inc,err_0, err_1, err_2, dx2, dy2, dz2, d, x, y, z, weight;
+};
+__device__ void getRayBLen(float *startPoint, float *endPoint, b3dStruct& aOut) {  
+    aOut.x = startPoint[0];
+    aOut.y = startPoint[1];
+    aOut.z = startPoint[2];
+    
+    float dx = endPoint[0] - aOut.x;
+    float dy = endPoint[1] - aOut.y;
+    float dz = endPoint[2] - aOut.z;
+    
+    aOut.x_inc = (dx < 0) ? -1 : 1;
+    float l = abs(dx);
+    aOut.y_inc = (dy < 0) ? -1 : 1;
+    float m = abs(dy);
+    aOut.z_inc = (dz < 0) ? -1 : 1;
+    float n = abs(dz);
+    aOut.dx2 = l*2;// << 1;
+    aOut.dy2 = m*2;// << 1;
+    aOut.dz2 = n*2;// << 1;
+    float vmax ;
+    if ((l >= m) && (l >= n)){
+        aOut.max =ceilf(l);
+        vmax = l;
+        aOut.d = aOut.dx2;
+        aOut.dx2 = 0;
+        aOut.f = 0;
+    }
+    else if((m >= l) && (m >= n)){
+        aOut.max = ceilf(m);
+        vmax = m;
+        aOut.d = aOut.dy2;
+        aOut.dy2 = 0;
+        aOut.f = 1;
+    }
+    else{
+        aOut.max = ceilf(n);
+        vmax = n;
+        aOut.d = aOut.dz2;
+        aOut.dz2 = 0;
+        aOut.f = 2;
+    }
+    aOut.err_0 = aOut.f==0?0:(aOut.dx2 - vmax);
+    aOut.err_1 = aOut.f==1?0:(aOut.dy2 - vmax);
+    aOut.err_2 = aOut.f==2?0:(aOut.dz2 - vmax);
+}
+
+/**
+ * @brief 计算traceStraightRayBresenham的pathlen和weight的核函数
+ * 
+ * @param aStratPt 
+ * @param aEndPt 
+ * @param aSize 
+ * @param aRes 
+ * @param aOuts 
+ * @return _
+ */
+__global__ void calcLenWeightKernel(float* aStratPt, float* aEndPt, float* aRes,int aSize, b3dStruct* aOuts ){
+    unsigned int idx = blockIdx.x*blockDim.x + threadIdx.x;
+    if (idx > aSize)return;
+    getRayBLen(aStratPt + idx*3, aEndPt + idx*3, aOuts[idx]);
+
+    aOuts[idx].weight = getPixelLengthKernel(aStratPt + idx*3, aEndPt + idx*3, aRes,aOuts[idx].max);
+    if (idx == 196){
+        int v=0;
+        for (int i =0;i<196; i++) {
+            v+=aOuts[i].max;
+        }
+    }
+};
+
+__global__ void calcPathOffsetKernel(b3dStruct* aStructs ,unsigned int* aPath, int aSize){
+    aPath[0] = aStructs[0].max;
+    for (int i=1; i<aSize; i++) {
+        aPath[i] =aStructs[i].max+aPath[i-1]; 
+    }
+}
+
+__device__ float convertToLinearIndices(const float3& aDims, const float3& aPath){
+    float matrixSize_1 = aDims.x;
+    float matrixSize_2 = aDims.y;
+    float coord_col1 = aPath.x;
+    float coord_col2Sub1 = aPath.y - 1;
+    float coord_col3Sub1 = aPath.z - 1;
+    return coord_col1 + coord_col2Sub1 * matrixSize_1 + coord_col3Sub1 * matrixSize_1 * matrixSize_2 - 1 ;
+}
+
+__global__ void calcRayBLPathKernel(struct b3dStruct* aInstruct,
+            float* dims, unsigned int * aOffset,
+            float* aJOut, unsigned int aSize) { 
+    unsigned int idx = blockIdx.x;
+    if (idx > aSize)return;
+    b3dStruct* in = aInstruct+idx;
+    int f;
+    float max, x_inc, y_inc, z_inc,err_0, err_1, err_2, dx2, dy2, dz2, d, x,y,z;
+    x = in->x;
+    y = in->y;
+    z = in->z;
+    x_inc = in->x_inc;
+    y_inc = in->y_inc;
+    z_inc = in->z_inc;
+
+    dx2 = in->dx2;
+    dy2 = in->dy2;
+    dz2 = in->dz2;
+    d = in->d;
+
+    err_0 = in->err_0;
+    err_1 = in->err_1;
+    err_2 = in->err_2;
+    max = in->max;
+    f = in->f;
+
+    float3 dims3{dims[0],dims[1],dims[2]};
+    float* output_ptr = aJOut + (idx ==0?0:aOffset[idx-1]);
+    for (float i = 0; i < max; i++) {
+        output_ptr[0]=convertToLinearIndices(dims3, {roundf(x),roundf(y),roundf(z)});
+        // output_ptr[0]=i;
+
+        output_ptr ++;
+        if (err_0 > 0) {
+          x += x_inc;
+          err_0 -= d;
+        }
+        if (err_1 > 0) {
+          y += y_inc;
+          err_1 -= d;
+        }
+        if (err_2 > 0) {
+          z += z_inc;
+          err_2 -= d;
+        }
+        err_0 += dx2;
+        err_1 += dy2;
+        err_2 += dz2;
+        if (f == 0) x += x_inc;
+        if (f == 1) y += y_inc;
+        if (f == 2) z += z_inc;
+    }
+}
+
+__global__ void fillIAndSKernel(struct b3dStruct* aInstructs,unsigned int * aOffset,float* aIOut,float* aSOut){
+    b3dStruct& s = aInstructs[blockIdx.x];
+    __shared__ unsigned int beginIdx; 
+    __shared__ unsigned int length;
+    __shared__ float weight; 
+
+    if (threadIdx.x == 0){
+        beginIdx = (blockIdx.x == 0) ? 0 : aOffset[blockIdx.x - 1];
+        length = s.max;
+        weight = s.weight;
+    }
+    __syncthreads();
+
+    for (int i = 0; threadIdx.x + i < length; i+=blockDim.x) {
+        aIOut[beginIdx + threadIdx.x + i] = blockIdx.x; 
+        aSOut[beginIdx + threadIdx.x + i] = weight; 
+    }
+}
+
+Recon::BuildMatrixResult Recon::buildMatrix(const Aurora::CudaMatrix &senderList,
+                                  const Aurora::CudaMatrix &receiverList,
+                                  Aurora::CudaMatrix& res,
+                                  const Aurora::CudaMatrix &dims, bool BENT,
+                                  const Aurora::CudaMatrix &potentialMap){
+    Recon::BuildMatrixResult result;
+    unsigned int numDims = senderList.getDimSize(0);
+    unsigned int nTotalRays = senderList.getDimSize(1);
+    b3dStruct* structList= nullptr;
+    cudaMalloc((void**)&structList, sizeof(b3dStruct)*nTotalRays);
+    int blocksPerGrid = (nTotalRays + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK;
+    calcLenWeightKernel<<<blocksPerGrid, THREADS_PER_BLOCK>>>(
+        senderList.getData(), receiverList.getData(), res.getData(),nTotalRays,structList);
+    cudaDeviceSynchronize();
+
+    unsigned int* offset= nullptr;
+    cudaMalloc((void**)&offset, sizeof(unsigned int)*nTotalRays);
+    calcPathOffsetKernel<<<1,1>>>(structList,offset,nTotalRays);
+    cudaDeviceSynchronize();
+
+    unsigned int size;
+    cudaMemcpy(&size, offset+(nTotalRays-1), sizeof(unsigned int), cudaMemcpyDeviceToHost);
+
+    float* iData = nullptr;
+    float* jData = nullptr;
+    float* sData = nullptr;
+
+    cudaMalloc((void**)&iData, sizeof(float)*size);
+    cudaMalloc((void**)&jData, sizeof(float)*size);
+    cudaMalloc((void**)&sData, sizeof(float)*size);
+
+    calcRayBLPathKernel<<<nTotalRays,1>>>(structList, dims.getData(), offset, jData, nTotalRays);
+    fillIAndSKernel<<<nTotalRays,THREADS_PER_BLOCK>>>(structList, offset, iData, sData);
+    cudaDeviceSynchronize();
+    cudaFree(structList);
+
+    auto mI = Aurora::CudaMatrix::fromRawData(iData, 1, size ,1);
+    auto mJ = Aurora::CudaMatrix::fromRawData(jData, 1, size,1);
+    auto mS = Aurora::CudaMatrix::fromRawData(sData, 1, size,1);
+    result.M = Aurora::Sparse(mI.toHostMatrix(), mJ.toHostMatrix(),mS.toHostMatrix(),nTotalRays, Aurora::prod(dims).getValue(0));
+    return result;   
+
+}
+
+
+
diff --git a/src/transmissionReconstruction/reconstruction/buildMatrix/buildMatrix.cuh b/src/transmissionReconstruction/reconstruction/buildMatrix/buildMatrix.cuh
new file mode 100644
index 0000000..61a2c06
--- /dev/null
+++ b/src/transmissionReconstruction/reconstruction/buildMatrix/buildMatrix.cuh
@@ -0,0 +1,12 @@
+#ifndef __BUILDMATRIX_CUH__
+#define __BUILDMATRIX_CUH__
+#include "buildMatrix.h"
+namespace Recon {
+    
+BuildMatrixResult buildMatrix(const Aurora::CudaMatrix &senderList,
+                                  const Aurora::CudaMatrix &receiverList,
+                                  Aurora::CudaMatrix& res,
+                                  const Aurora::CudaMatrix &dims, bool BENT,
+                                  const Aurora::CudaMatrix &potentialMap);
+}
+#endif // __BUILDMATRIX_H__
\ No newline at end of file
diff --git a/src/transmissionReconstruction/reconstruction/reconstruction.cpp b/src/transmissionReconstruction/reconstruction/reconstruction.cpp
index 1fee8d8..908bb13 100644
--- a/src/transmissionReconstruction/reconstruction/reconstruction.cpp
+++ b/src/transmissionReconstruction/reconstruction/reconstruction.cpp
@@ -9,6 +9,7 @@
 #include "CudaEnvInit.h"
 #include "log/notify.h"
 #include "transmissionReconstruction/reconstruction/buildMatrix/buildMatrix.h"
+#include "transmissionReconstruction/reconstruction/buildMatrix/buildMatrix.cuh"
 #include "src/transmissionReconstruction/reconstruction/solvingEquationSystem/solve.h"
 
 #include <algorithm>
@@ -194,8 +195,9 @@ namespace Recon {
         BuildMatrixResult buildMatrixR;
         for(int iter=1; iter<=numIter; ++iter)
         {
+            auto resDevice = res.toDeviceMatrix();
             //1200ms
-            buildMatrixR = buildMatrix(senderList, receiverList, res, dims, bentRecon && (iter!=1), potentialMap);
+            buildMatrixR = buildMatrix(senderList.toDeviceMatrix(), receiverList.toDeviceMatrix(), resDevice, dims.toDeviceMatrix(), bentRecon && (iter!=1), potentialMap.toDeviceMatrix());
             if(!data.isNull() && bentRecon && iter != numIter)
             {
                 //与默认配置bentRecon不符，暂不实现todo
@@ -229,51 +231,21 @@ namespace Recon {
                 allHitMaps.push_back(buildMatrixR.hitmap);
             }
 
-            std::promise<Matrix> sosPromise;
-            std::future<Matrix> sosFuture = sosPromise.get_future();
-
-            std::promise<Matrix> attPromise;
-            std::future<Matrix> attFuture = attPromise.get_future();
-            solveParameterIteratorFunctionType solveParameterIteratorFunctionPointer = &solveParameterIterator;
-            slownessToSOSFunctionType slownessToSOSFunctionPointer = &slownessToSOS;
-            std::thread sosThread;
-            std::thread attThread;
-
             if(!data.isNull())
             {
                 //1500ms
-                sosThread = std::thread([solveParameterIteratorFunctionPointer, slownessToSOSFunctionPointer, &buildMatrixR, &b, &dims, &SOS_IN_WATER, &sosPromise]()
-                {
-                    //auto bCopy = b.deepCopy();
-                    //auto dimCopy = dims.deepCopy();
-                    Matrix result = solveParameterIteratorFunctionPointer(buildMatrixR.M, b, dims, false, transParams::nonNeg, 0)[0][0];
-                    result = slownessToSOSFunctionPointer(result, SOS_IN_WATER);
-                    sosPromise.set_value(result);
-                });
-                //Matrix sosValue = solveParameterIterator(buildMatrixR.M, b, dims, false, transParams::nonNeg)[0][0];
+                Matrix sosValue = solveParameterIterator(buildMatrixR.M, b, dims, false, transParams::nonNeg)[0][0];
                 //1ms
-                //result.outSOS = slownessToSOS(sosValue, SOS_IN_WATER) ;
+                result.outSOS = slownessToSOS(sosValue, SOS_IN_WATER) ;
             }
 
             if(!dataAtt.isNull())
             {
-                attThread = std::thread([solveParameterIteratorFunctionPointer, slownessToSOSFunctionPointer, &buildMatrixR, &bAtt, &dims, &attPromise]()
-                {
-                    //auto bAttCopy = bAtt.deepCopy();
-                    //auto dimCopy = dims.deepCopy();
-                    Matrix result = solveParameterIteratorFunctionPointer(buildMatrixR.M, bAtt, dims, false, transParams::nonNeg, 1)[0][0];
-                    result = result / 100;
-                    attPromise.set_value(result);
-                });
                 //1500ms
-                //Matrix attValue = solveParameterIterator(buildMatrixR.M, bAtt, dims, false, transParams::nonNeg)[0][0];
+                Matrix attValue = solveParameterIterator(buildMatrixR.M, bAtt, dims, false, transParams::nonNeg)[0][0];
                 //1ms
-                //result.outATT = attValue/100 ;
+                result.outATT = attValue/100 ;
             }
-            sosThread.join();
-            attThread.join();
-            result.outSOS = sosFuture.get();
-            result.outATT = attFuture.get();
             Recon::notifyProgress(10 + 10 * (iter/numIter));
         }
 
diff --git a/src/transmissionReconstruction/reconstruction/solvingEquationSystem/TVAL/TVAL.cpp b/src/transmissionReconstruction/reconstruction/solvingEquationSystem/TVAL/TVAL.cpp
index c2c03bf..9fd82a2 100644
--- a/src/transmissionReconstruction/reconstruction/solvingEquationSystem/TVAL/TVAL.cpp
+++ b/src/transmissionReconstruction/reconstruction/solvingEquationSystem/TVAL/TVAL.cpp
@@ -16,28 +16,33 @@
 #include <Spectra/MatOp/SparseGenMatProd.h>
 #include <Eigen/Core>
 #include <Eigen/SparseCore>
+#include <Eigen/Eigenvalues>
+
+#include <sys/time.h>
 namespace Recon
 {
+    bool isEigsFinished = false;
+    bool isEigsStatus = false;
 
-    double eigs(Aurora::Sparse& aM)
+    float eigs(Aurora::Sparse& aM)
     {
-        double result = NAN;
         size_t size = aM.getM();
         if(size < aM.getN())
         {
             size = aM.getN();
         }
-        Eigen::SparseMatrix<double> M(size, size);
-        std::vector<Eigen::Triplet<double>> triplets;
+        Eigen::SparseMatrix<double> M(size,size);
         Aurora::Matrix rows = aM.getRowVector();
         Aurora::Matrix columns = aM.getColVector();
         Aurora::Matrix values = aM.getValVector();
+        std::vector<Eigen::Triplet<double>> triplets(rows.getDataSize());
+        #pragma omp parallel for
         for (int i = 0; i < rows.getDataSize(); ++i)
         {
-            triplets.push_back(Eigen::Triplet<double>(rows[i], columns[i], values[i]));
+            triplets[i] = Eigen::Triplet<double>(rows[i], columns[i], values[i]);
         }
-
         M.setFromTriplets(triplets.begin(), triplets.end());
+        float result;
         Spectra::SparseGenMatProd<double> op(M);
         Spectra::GenEigsSolver<Spectra::SparseGenMatProd<double>> eigs(op, 1, 6);
         eigs.init();
@@ -49,14 +54,24 @@ namespace Recon
             std::complex<double> complex = evalues[0];
             result = complex.real();
         }
-
+        isEigsFinished = true;
+        if (result> 1 + 1e-10)
+        {
+            isEigsStatus = true;
+        }
         return result;
     }
 
-    void checkAndScale(Aurora::Sparse& M, Aurora::Matrix& b,size_t n){
-        bool isreal = M.getValVector().getValueType() == Aurora::Normal;
-        auto s2 = eigs(M);
-        if (s2> 1 + 1e-10)
+    void checkAndScale(Aurora::Sparse& M, Aurora::Matrix& b,size_t n)
+    {
+        float s2 = 0.0;
+        if(!isEigsFinished)
+        {
+            s2 = eigs(M);
+            return;;
+        }
+
+        if (isEigsStatus)
         {
             b = b / sqrt(s2);
             M.getValVector() = M.getValVector() / sqrt( s2);
@@ -65,23 +80,23 @@ namespace Recon
 
     Aurora::Matrix callTval3(Aurora::Sparse& M, Aurora::Matrix& b,const Aurora::Matrix& dims,int device, TVALOptions& opt)
     {
-        checkAndScale(M,b,(size_t)Aurora::prod(dims).getScalar());
+        checkAndScale(M,b,(size_t)Aurora::prod(dims).getScalar());     
         int * yIdxs = new int[M.getColVector().getDataSize()];
         std::copy(M.getColVector().getData(),M.getColVector().getData()+M.getColVector().getDataSize(),yIdxs);
         int * xIdxs = new int[M.getRowVector().getDataSize()];
         std::copy(M.getRowVector().getData(),M.getRowVector().getData()+M.getRowVector().getDataSize(),xIdxs);
         Aurora::Matrix values = M.getValVector();
-        size_t cols = M.getM(), rows = M.getN();
-        int nz = std::max(M.getColVector().getDataSize(),std::max(M.getRowVector().getDataSize(),M.getValVector().getDataSize()));
+        size_t cols = M.getM(), rows = M.getN();        
+        int nz = std::max(M.getColVector().getDataSize(),std::max(M.getRowVector().getDataSize(),M.getValVector().getDataSize()));        
         sparse_matrix_t A;
         sparse_matrix_t csrA;
-        mkl_sparse_s_create_coo(&A, sparse_index_base_t::SPARSE_INDEX_BASE_ZERO,  rows, cols,  nz,  yIdxs, xIdxs,values.getData());
-        mkl_sparse_convert_csr(A, sparse_operation_t::SPARSE_OPERATION_NON_TRANSPOSE, &csrA);
+        mkl_sparse_s_create_coo(&A, sparse_index_base_t::SPARSE_INDEX_BASE_ZERO,  rows, cols,  nz,  yIdxs, xIdxs,values.getData());        
+        mkl_sparse_convert_csr(A, sparse_operation_t::SPARSE_OPERATION_NON_TRANSPOSE, &csrA);      
         int n_rows,n_cols;
         int *rows_start,*rows_end,*col_indx;
         float * csrValues;
         sparse_index_base_t index;
-        mkl_sparse_s_export_csr(csrA, &index, &n_rows, &n_cols, &rows_start, &rows_end, &col_indx, &csrValues);
+        mkl_sparse_s_export_csr(csrA, &index, &n_rows, &n_cols, &rows_start, &rows_end, &col_indx, &csrValues);      
         mkl_sparse_destroy(A);
         delete [] xIdxs;
         delete [] yIdxs;
@@ -101,4 +116,4 @@ namespace Recon
         return Aurora::Matrix::fromRawData(result.data, result.dims[0],result.dims[1],result.dims[2]);
         // return Aurora::Matrix();
     }
-}
\ No newline at end of file
+}
diff --git a/src/transmissionReconstruction/reconstruction/solvingEquationSystem/TVAL/TVAL.h b/src/transmissionReconstruction/reconstruction/solvingEquationSystem/TVAL/TVAL.h
index 35a8630..64b634a 100644
--- a/src/transmissionReconstruction/reconstruction/solvingEquationSystem/TVAL/TVAL.h
+++ b/src/transmissionReconstruction/reconstruction/solvingEquationSystem/TVAL/TVAL.h
@@ -7,7 +7,7 @@ namespace Recon {
 
 void checkAndScale(Aurora::Sparse& M, Aurora::Matrix& b,size_t n);
 
-double eigs(Aurora::Sparse& aM);
+float eigs(Aurora::Sparse& aM);
 
 Aurora::Matrix callTval3(Aurora::Sparse &M, Aurora::Matrix &b,
                          const Aurora::Matrix &dims, int device,