cuda2023

2023-12-26 16:29:40 +08:00
parent 34d7086b47 c40dc8e938
commit 4dce7c90ce
30 changed files with 1564 additions and 311 deletions
--- a/src/common/convertfp16tofloat.cpp
+++ b/src/common/convertfp16tofloat.cpp
@@ -5,98 +5,103 @@
 #include <immintrin.h>
 #include <sys/types.h>
 namespace  {
-    const ushort CONVERT_AND_VALUE = 15;
-    // andblack
-    const __m128i andBlock = _mm_set_epi16(15, 15, 15, 15, 15, 15, 15, 15);
-    const __m128i andBlock2 =
-        _mm_set_epi16(2047, 2047, 2047, 2047, 2047, 2047, 2047, 2047);
-    const __m128i zeroBlock = _mm_set_epi16(0, 0, 0, 0, 0, 0, 0, 0);
-    const  __m128i oneBlock = _mm_set_epi16(1, 1, 1, 1, 1, 1, 1, 1);
-    const __m128i twokBlock =
-        _mm_set_epi16(2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048);
-    const uint CONVERT_ADD_VALUE = UINT32_MAX - 4095;
-    void convert(short * ptr, float* des,bool single = false){
-        // 初始化值
-        auto value = _mm_set_epi16(ptr[0], ptr[1], ptr[2], ptr[3], single?ptr[0]:ptr[4], single?ptr[0]:ptr[5],
-                                    single?ptr[0]:ptr[6], single?ptr[0]:ptr[7]);
-        auto uvalue = _mm_set_epi16(
-            (ushort)ptr[0], (ushort)ptr[1], (ushort)ptr[2], (ushort)ptr[3],
-            (ushort)(single?ptr[0]:ptr[4]), (ushort)(single?ptr[0]:ptr[5]),
-             (ushort)(single?ptr[0]:ptr[6]), (ushort)(single?ptr[0]:ptr[7]));
-        // 位移
-        auto sign_bit = _mm_srli_epi16(value, 15); // 右移16位取符号位
-        auto exponent = _mm_srli_epi16(uvalue, 11);
-        // and
-        exponent = _mm_and_si128(exponent, andBlock);
-        // and ，then convert to int 32 bits
-        auto fraction3 = _mm256_cvtepi16_epi32(_mm_and_si128(uvalue, andBlock2));
-        auto hidden_bit_mask =
-            (_mm_cmp_epi16_mask(sign_bit, oneBlock, _MM_CMPINT_EQ) &
-            _mm_cmp_epi16_mask(exponent, zeroBlock, _MM_CMPINT_EQ)) |
-            (_mm_cmp_epi16_mask(sign_bit, zeroBlock, _MM_CMPINT_EQ) &
-            _mm_cmp_epi16_mask(exponent, zeroBlock, _MM_CMPINT_NE));
-        auto hidden_bit16 = _mm_maskz_set1_epi16(hidden_bit_mask, 2048);
-        auto hidden_bit32 = _mm256_cvtepi16_epi32(hidden_bit16);
-        auto outputBlock = _mm256_add_epi32(fraction3, hidden_bit32);
-        auto sign_bit_add_value = _mm256_maskz_set1_epi32(
-            _mm_cmp_epi16_mask(sign_bit, oneBlock, _MM_CMPINT_EQ),
-            CONVERT_ADD_VALUE);
-        outputBlock = _mm256_add_epi32(outputBlock, sign_bit_add_value);
-        auto exponent_mask =
-            _mm_cmp_epi16_mask(oneBlock, exponent, _MM_CMPINT_LT);
-        exponent = _mm_sub_epi16(exponent, oneBlock);
-        auto exponent32 = _mm256_cvtepi16_epi32(exponent);
-        auto zeroBlock32 = _mm256_cvtepi16_epi32(zeroBlock);
-        auto offsetCount =
-            _mm256_mask_blend_epi32(exponent_mask, zeroBlock32, exponent32);

-        outputBlock = _mm256_sllv_epi32(outputBlock, offsetCount);
+    // const ushort CONVERT_AND_VALUE = 15;
+    // // andblack
+    // const __m128i andBlock = _mm_set_epi16(15, 15, 15, 15, 15, 15, 15, 15);
+    // const __m128i andBlock2 =
+    //     _mm_set_epi16(2047, 2047, 2047, 2047, 2047, 2047, 2047, 2047);
+    // const __m128i zeroBlock = _mm_set_epi16(0, 0, 0, 0, 0, 0, 0, 0);
+    // const  __m128i oneBlock = _mm_set_epi16(1, 1, 1, 1, 1, 1, 1, 1);
+    // const __m128i twokBlock =
+    //     _mm_set_epi16(2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048);
+    // const uint CONVERT_ADD_VALUE = UINT32_MAX - 4095;
+    // void convert(short * ptr, double* des,bool single = false){
+    //     // 初始化值
+    //     auto value = _mm_set_epi16(ptr[0], ptr[1], ptr[2], ptr[3], single?ptr[0]:ptr[4], single?ptr[0]:ptr[5],
+    //                                 single?ptr[0]:ptr[6], single?ptr[0]:ptr[7]);
+    //     auto uvalue = _mm_set_epi16(
+    //         (ushort)ptr[0], (ushort)ptr[1], (ushort)ptr[2], (ushort)ptr[3],
+    //         (ushort)(single?ptr[0]:ptr[4]), (ushort)(single?ptr[0]:ptr[5]),
+    //          (ushort)(single?ptr[0]:ptr[6]), (ushort)(single?ptr[0]:ptr[7]));
+    //     // 位移
+    //     auto sign_bit = _mm_srli_epi16(value, 15); // 右移16位取符号位
+    //     auto exponent = _mm_srli_epi16(uvalue, 11);
+    //     // and
+    //     exponent = _mm_and_si128(exponent, andBlock);
+    //     // and ，then convert to int 32 bits
+    //     auto fraction3 = _mm256_cvtepi16_epi32(_mm_and_si128(uvalue, andBlock2));
+    //     auto hidden_bit_mask =
+    //         (_mm_cmp_epi16_mask(sign_bit, oneBlock, _MM_CMPINT_EQ) &
+    //         _mm_cmp_epi16_mask(exponent, zeroBlock, _MM_CMPINT_EQ)) |
+    //         (_mm_cmp_epi16_mask(sign_bit, zeroBlock, _MM_CMPINT_EQ) &
+    //         _mm_cmp_epi16_mask(exponent, zeroBlock, _MM_CMPINT_NE));
+    //     auto hidden_bit16 = _mm_maskz_set1_epi16(hidden_bit_mask, 2048);
+    //     auto hidden_bit32 = _mm256_cvtepi16_epi32(hidden_bit16);
+    //     auto outputBlock = _mm256_add_epi32(fraction3, hidden_bit32);
+    //     auto sign_bit_add_value = _mm256_maskz_set1_epi32(
+    //         _mm_cmp_epi16_mask(sign_bit, oneBlock, _MM_CMPINT_EQ),
+    //         CONVERT_ADD_VALUE);
+    //     outputBlock = _mm256_add_epi32(outputBlock, sign_bit_add_value);
+    //     auto exponent_mask =
+    //         _mm_cmp_epi16_mask(oneBlock, exponent, _MM_CMPINT_LT);
+    //     exponent = _mm_sub_epi16(exponent, oneBlock);
+    //     auto exponent32 = _mm256_cvtepi16_epi32(exponent);
+    //     auto zeroBlock32 = _mm256_cvtepi16_epi32(zeroBlock);
+    //     auto offsetCount =
+    //         _mm256_mask_blend_epi32(exponent_mask, zeroBlock32, exponent32);
+
+
+    //     outputBlock = _mm256_sllv_epi32(outputBlock, offsetCount);
        
-        des[3] = _mm256_extract_epi32(outputBlock, 4);
-        des[2] = _mm256_extract_epi32(outputBlock, 5);
-        des[1] = _mm256_extract_epi32(outputBlock, 6);
-        des[0] = _mm256_extract_epi32(outputBlock, 7);
-        if(single) return;
-        des[7] = _mm256_extract_epi32(outputBlock, 0);
-        des[6] = _mm256_extract_epi32(outputBlock, 1);
-        des[5] = _mm256_extract_epi32(outputBlock, 2);
-        des[4] = _mm256_extract_epi32(outputBlock, 3);
+    //     des[3] = _mm256_extract_epi32(outputBlock, 4);
+    //     des[2] = _mm256_extract_epi32(outputBlock, 5);
+    //     des[1] = _mm256_extract_epi32(outputBlock, 6);
+    //     des[0] = _mm256_extract_epi32(outputBlock, 7);
+    //     if(single) return;
+    //     des[7] = _mm256_extract_epi32(outputBlock, 0);
+    //     des[6] = _mm256_extract_epi32(outputBlock, 1);
+    //     des[5] = _mm256_extract_epi32(outputBlock, 2);
+    //     des[4] = _mm256_extract_epi32(outputBlock, 3);

-    }
+    // }
 }

 Aurora::Matrix Recon::convertfp16tofloat(Aurora::Matrix aMatrix) {
-    auto input = aMatrix.getData();
-    // uint16变换为float(32位)输出大小翻倍
-    auto output = Aurora::malloc(aMatrix.getDataSize() * 4);
-    size_t rows = aMatrix.getDataSize() * sizeof(float) / sizeof(short);
-    size_t total_count = aMatrix.getDataSize();


-    #pragma omp parallel for
-    for (size_t i = 0; i < total_count; i += 8) {
-        // 循环展开以避免过度的线程调用
-        if (i < total_count) {
-        auto ptr = (short *)(input + i);
-        float *des = output + i * 4;
-            ::convert(ptr, des,i+1>total_count);
-        }
-        if (i+2 < total_count) {
-        auto ptr = (short *)(input + i + 2);
-        float *des = output + (i+2) * 4;
-            ::convert(ptr, des,i+3>total_count);
-        }
-        if (i+4 < total_count) {
-        auto ptr = (short *)(input + i + 4);
-        float *des = output + (i+4) * 4;
-            ::convert(ptr, des,i+5>total_count);
-        }
-        if (i+6 < total_count) {
-        auto ptr = (short *)(input + i + 6);
-        float *des = output + (i+6) * 4;
-            ::convert(ptr, des,i+7>total_count);
-        }
-    }
-    return Aurora::Matrix::New(output, aMatrix.getDimSize(0),
-                                aMatrix.getDimSize(1), aMatrix.getDimSize(2));
+    // auto input = aMatrix.getData();
+    // // uint16变换为float(32位)输出大小翻倍
+    // auto output = Aurora::malloc(aMatrix.getDataSize() * 4);
+    // size_t rows = aMatrix.getDataSize() * sizeof(double) / sizeof(short);
+    // size_t total_count = aMatrix.getDataSize();
+
+
+    // #pragma omp parallel for
+    // for (size_t i = 0; i < total_count; i += 8) {
+    //     // 循环展开以避免过度的线程调用
+    //     if (i < total_count) {
+    //     auto ptr = (short *)(input + i);
+    //     double *des = output + i * 4;
+    //         ::convert(ptr, des,i+1>total_count);
+    //     }
+    //     if (i+2 < total_count) {
+    //     auto ptr = (short *)(input + i + 2);
+    //     double *des = output + (i+2) * 4;
+    //         ::convert(ptr, des,i+3>total_count);
+    //     }
+    //     if (i+4 < total_count) {
+    //     auto ptr = (short *)(input + i + 4);
+    //     double *des = output + (i+4) * 4;
+    //         ::convert(ptr, des,i+5>total_count);
+    //     }
+    //     if (i+6 < total_count) {
+    //     auto ptr = (short *)(input + i + 6);
+    //     double *des = output + (i+6) * 4;
+    //         ::convert(ptr, des,i+7>total_count);
+    //     }
+    // }
+    // return Aurora::Matrix::New(output, aMatrix.getDimSize(0),
+    //                             aMatrix.getDimSize(1), aMatrix.getDimSize(2));
+
 }
--- a/src/common/dataBlockCreation/getAScanBlockPreprocessed.cpp
+++ b/src/common/dataBlockCreation/getAScanBlockPreprocessed.cpp
@@ -1,5 +1,6 @@
 #include "getAScanBlockPreprocessed.h"

+#include "CudaMatrix.h"
 #include "Matrix.h"
 #include "blockingGeometryInfo.h"
 #include "removeDataFromArrays.h"
@@ -10,15 +11,36 @@
 #include "src/transmissionReconstruction/dataFilter/dataFilter.h"
 #include "src/reflectionReconstruction/dataFilter.h"

+#include "Aurora.h"
+
 using namespace Aurora;
 using namespace Recon;

+#include <sys/time.h>
+#include <iostream>
+void printTime()
+{
+    struct timeval tpend;
+    gettimeofday(&tpend,NULL);
+    int secofday = (tpend.tv_sec + 3600 * 8 ) % 86400;
+    int hours = secofday / 3600;
+    int minutes = (secofday - hours * 3600 ) / 60;
+    int seconds = secofday % 60; 
+    int milliseconds = tpend.tv_usec/1000;
+    std::cout<< hours << ":" <<minutes<<":"<<seconds<<"."<<milliseconds<<std::endl;
+}
+
 AscanBlockPreprocessed Recon::getAscanBlockPreprocessed(Parser* aParser, const Aurora::Matrix& aMp, const Aurora::Matrix& aSl, const Aurora::Matrix& aSn,
                                                        const Aurora::Matrix& aRl, const Aurora::Matrix& aRn, GeometryInfo& aGeom, const MeasurementInfo& aMeasInfo,
                                                        bool aApplyFilter, bool aTransReco)
 {
+    //std::cout<<"strart"<<std::endl;
+//printTime();
+//550ms
    AscanBlockPreprocessed result;
    AscanBlock ascanBlock = getAscanBlock(aParser, aMp, aSl, aSn, aRl, aRn);
+//printTime();
+//10ms
    result.gainBlock = ascanBlock.gainBlock;
    result.mpBlock = ascanBlock.mpBlock;
    result.rlBlock = ascanBlock.rlBlock;
@@ -26,6 +48,8 @@ AscanBlockPreprocessed Recon::getAscanBlockPreprocessed(Parser* aParser, const A
    result.slBlock = ascanBlock.slBlock;
    result.snBlock = ascanBlock.snBlock;
    GeometryBlock geometryBlock = blockingGeometryInfos(aGeom, ascanBlock.rnBlock, ascanBlock.rlBlock, ascanBlock.snBlock, ascanBlock.slBlock, ascanBlock.mpBlock);
+//printTime();
+//3ms    
    result.receiverPositionBlock = geometryBlock.receiverPositionBlock;
    result.senderPositionBlock = geometryBlock.senderPositionBlock;
    if(aApplyFilter)
@@ -40,7 +64,8 @@ AscanBlockPreprocessed Recon::getAscanBlockPreprocessed(Parser* aParser, const A
        {
            usedData = filterReflectionData(geometryBlock.receiverPositionBlock, geometryBlock.senderPositionBlock, geometryBlock.senderNormalBlock, reflectParams::constrictReflectionAngles);
        }
-
+//printTime();
+//150ms
        ascanBlock.ascanBlock = removeDataFromArrays(ascanBlock.ascanBlock, usedData);
        result.mpBlock = removeDataFromArrays(ascanBlock.mpBlock, usedData);
        result.slBlock = removeDataFromArrays(ascanBlock.slBlock, usedData);
@@ -51,7 +76,8 @@ AscanBlockPreprocessed Recon::getAscanBlockPreprocessed(Parser* aParser, const A
        result.senderPositionBlock = removeDataFromArrays(geometryBlock.senderPositionBlock, usedData);
        result.receiverPositionBlock = removeDataFromArrays(geometryBlock.receiverPositionBlock, usedData);
        result.gainBlock = removeDataFromArrays(ascanBlock.gainBlock, usedData);
-
+//printTime();
+//120ms
    }

    if (ascanBlock.ascanBlock.getDataSize() > 0)
@@ -62,6 +88,72 @@ AscanBlockPreprocessed Recon::getAscanBlockPreprocessed(Parser* aParser, const A
    {
        result.ascanBlockPreprocessed = ascanBlock.ascanBlock;
    }
-
+//printTime();    
    return result;
-}
+}
+
+
+AscanBlockPreprocessedCuda Recon::getAscanBlockPreprocessedCuda(Parser* aParser, const Aurora::Matrix& aMp, const Aurora::Matrix& aSl, const Aurora::Matrix& aSn, 
+                                                         const Aurora::Matrix& aRl, const Aurora::Matrix& aRn, GeometryInfo& aGeom, const MeasurementInfo& aMeasInfo,
+                                                         bool aApplyFilter, bool aTransReco)
+{
+//std::cout<<"strart"<<std::endl;
+//printTime();
+//550ms
+    AscanBlockPreprocessedCuda result;
+    AscanBlock ascanBlock = getAscanBlock(aParser, aMp, aSl, aSn, aRl, aRn);
+//printTime();
+//300ms
+    result.ascanBlockPreprocessed = ascanBlock.ascanBlock.toDeviceMatrix();
+    result.gainBlock = ascanBlock.gainBlock.toDeviceMatrix();
+    result.mpBlock = ascanBlock.mpBlock;
+    result.rlBlock = ascanBlock.rlBlock;
+    result.rnBlock = ascanBlock.rnBlock;
+    result.slBlock = ascanBlock.slBlock;
+    result.snBlock = ascanBlock.snBlock;
+    GeometryBlock geometryBlock = blockingGeometryInfos(aGeom, ascanBlock.rnBlock, ascanBlock.rlBlock, ascanBlock.snBlock, ascanBlock.slBlock, ascanBlock.mpBlock);
+//printTime();
+//3ms    
+    result.receiverPositionBlock = geometryBlock.receiverPositionBlock;
+    result.senderPositionBlock = geometryBlock.senderPositionBlock;
+    if(aApplyFilter)
+    {
+        Matrix usedData;
+        if(aTransReco)
+        {
+            usedData = filterTransmissionData(ascanBlock.slBlock, ascanBlock.snBlock, ascanBlock.rlBlock, ascanBlock.rnBlock,
+                                              aGeom.sensData, geometryBlock.senderNormalBlock, geometryBlock.receiverNormalBlock);
+        }
+        else
+        {
+            usedData = filterReflectionData(geometryBlock.receiverPositionBlock, geometryBlock.senderPositionBlock, geometryBlock.senderNormalBlock, reflectParams::constrictReflectionAngles);
+        }
+//printTime();
+//40ms
+        CudaMatrix usedDataDevice = usedData.toDeviceMatrix();
+        result.ascanBlockPreprocessed = valid(result.ascanBlockPreprocessed, usedDataDevice);
+        result.mpBlock = removeDataFromArrays(ascanBlock.mpBlock, usedData);
+        result.slBlock = removeDataFromArrays(ascanBlock.slBlock, usedData);
+        result.snBlock = removeDataFromArrays(ascanBlock.snBlock, usedData);
+        result.rlBlock = removeDataFromArrays(ascanBlock.rlBlock, usedData);
+        result.rnBlock = removeDataFromArrays(ascanBlock.rnBlock, usedData);
+
+        result.senderPositionBlock = removeDataFromArrays(geometryBlock.senderPositionBlock, usedData);
+        result.receiverPositionBlock = removeDataFromArrays(geometryBlock.receiverPositionBlock, usedData);
+        result.gainBlock = valid(result.gainBlock, usedDataDevice);
+//printTime();
+//10ms
+    }
+
+    if (ascanBlock.ascanBlock.getDataSize() > 0)
+    {
+        result.ascanBlockPreprocessed = preprocessAscanBlockCuda(result.ascanBlockPreprocessed, aMeasInfo);
+    }
+    // else
+    // {
+    //     result.ascanBlockPreprocessed = ascanBlock.ascanBlock;
+    // }
+//printTime();
+//std::cout<<"end"<<std::endl;    
+    return result;
+}                                                         
--- a/src/common/dataBlockCreation/getAScanBlockPreprocessed.h
+++ b/src/common/dataBlockCreation/getAScanBlockPreprocessed.h
@@ -2,6 +2,7 @@
 #define GETASCANBLOCK_PREPROCESSED_H

 #include "Matrix.h"
+#include "CudaMatrix.h"
 #include "src/common/getGeometryInfo.h"
 #include "src/common/getMeasurementMetaData.h"

@@ -25,6 +26,23 @@ namespace Recon
    AscanBlockPreprocessed getAscanBlockPreprocessed(Parser* aParser, const Aurora::Matrix& aMp, const Aurora::Matrix& aSl, const Aurora::Matrix& aSn, 
                                                     const Aurora::Matrix& aRl, const Aurora::Matrix& aRn, GeometryInfo& aGeom, const MeasurementInfo& aMeasInfo,
                                                     bool aApplyFilter, bool aTransReco);
+
+    struct AscanBlockPreprocessedCuda
+    {
+        Aurora::CudaMatrix ascanBlockPreprocessed;
+        Aurora::Matrix mpBlock;
+        Aurora::Matrix slBlock;
+        Aurora::Matrix snBlock;
+        Aurora::Matrix rlBlock;
+        Aurora::Matrix rnBlock;
+        Aurora::Matrix senderPositionBlock;
+        Aurora::Matrix receiverPositionBlock;
+        Aurora::CudaMatrix gainBlock;
+    };
+
+    AscanBlockPreprocessedCuda getAscanBlockPreprocessedCuda(Parser* aParser, const Aurora::Matrix& aMp, const Aurora::Matrix& aSl, const Aurora::Matrix& aSn, 
+                                                         const Aurora::Matrix& aRl, const Aurora::Matrix& aRn, GeometryInfo& aGeom, const MeasurementInfo& aMeasInfo,
+                                                         bool aApplyFilter, bool aTransReco);
 }


--- a/src/common/dataBlockCreation/getAscanBlock.cpp
+++ b/src/common/dataBlockCreation/getAscanBlock.cpp
@@ -70,13 +70,13 @@ AscanBlock Recon::getAscanBlock(Parser* aParser, const Aurora::Matrix& aMp, cons
        for(int slIndex=0; slIndex<aSl.getDataSize();++slIndex)
        {
            OneTasAScanData oneTasData = aParser->getOneTasAscanDataOfMotorPosition(aSl[slIndex]);
-
+            #pragma omp parallel for
            for(int snIndex=0; snIndex<aSn.getDataSize();++snIndex)
            {
                //int mapperIndex = 0;           
-                #pragma omp parallel for
+                //#pragma omp parallel for
                for(int rlIndex=0; rlIndex<aRl.getDataSize();++rlIndex)
-                {        
+                {                            
                    for(int rnIndex=0; rnIndex<aRn.getDataSize(); ++rnIndex)
                    {
                        size_t mapperIndex = rnIndex + rlIndex*aRn.getDataSize();
--- a/src/common/fileHelper.h
+++ b/src/common/fileHelper.h
@@ -6,8 +6,8 @@
 namespace Recon
 {
    const std::string DEFAULT_CONFIG_PATH = "/home/UR/ConfigFiles/";
-    const std::string DEFAULT_OUTPUT_PATH = "/home/UR/ReconResult/USCT_Result.mat";
-    const std::string DEFAULT_OUTPUT_FILENAME = "USCT_Result.mat";
+    const std::string DEFAULT_OUTPUT_PATH = "/home/UR/ReconResult/";
+    const std::string DEFAULT_OUTPUT_FILENAME = "sun.mat";

    std::string getPath(const std::string &aFullPath);
    bool endsWithMat(const std::string &aStr);
--- a/src/common/preprocessAscanBlock.cpp
+++ b/src/common/preprocessAscanBlock.cpp
@@ -1,5 +1,6 @@
 #include "preprocessAscanBlock.h"
 #include "Function1D.h"
+#include "Function1D.cuh"
 #include <cstddef>

 Aurora::Matrix Recon::preprocessAscanBlock(const Aurora::Matrix& aAscans, const MeasurementInfo& aMeasInfo)
@@ -20,4 +21,14 @@ Aurora::Matrix Recon::preprocessAscanBlock(const Aurora::Matrix& aAscans, const
    // end

    return result;
+}
+
+Aurora::CudaMatrix Recon::preprocessAscanBlockCuda(const Aurora::CudaMatrix& aAscans, const MeasurementInfo& aMeasInfo)
+{
+    if(aMeasInfo.ascanDataType == "float16")
+    {
+        return Aurora::convertfp16tofloatCuda(aAscans, aAscans.getDimSize(0), aAscans.getDimSize(1));
+    }
+
+    return aAscans;
 }
--- a/src/common/preprocessAscanBlock.h
+++ b/src/common/preprocessAscanBlock.h
@@ -7,6 +7,8 @@
 namespace Recon
 {
    Aurora::Matrix preprocessAscanBlock(const Aurora::Matrix& aAscans, const MeasurementInfo& aMeasInfo);
+
+    Aurora::CudaMatrix preprocessAscanBlockCuda(const Aurora::CudaMatrix& aAscans, const MeasurementInfo& aMeasInfo);
 }

 #endif