feat: Add cuda function in getTransmission and detection.

This commit is contained in:
sunwen
2024-12-25 11:11:31 +08:00
parent 20ca7f0ce5
commit 4daf8e0c0b
3 changed files with 60 additions and 91 deletions

View File

@@ -10,6 +10,7 @@
#include "common/getMeasurementMetaData.h" #include "common/getMeasurementMetaData.h"
#include "config/config.h" #include "config/config.h"
#include "transmissionReconstruction/detection/detection.cuh"
//remove detectTofAndAttMex function, use detectTofAndAtt function instead of //remove detectTofAndAttMex function, use detectTofAndAtt function instead of
//保留函数和函数申明备忘以防以后再次使用以下头文件从动态库中映入了detectTofAndAttMex的核心函数 //保留函数和函数申明备忘以防以后再次使用以下头文件从动态库中映入了detectTofAndAttMex的核心函数
@@ -417,18 +418,17 @@ namespace Recon {
// return result; // return result;
// } // }
DetectResult transmissionDetection(const Aurora::Matrix &AscanBlock,
const Aurora::Matrix &AscanRefBlock, DetectResult transmissionDetection(const Aurora::CudaMatrix &AscanBlock,
const Aurora::Matrix &distBlock, const Aurora::CudaMatrix &AscanRefBlock,
const Aurora::Matrix &distRefBlock, const Aurora::CudaMatrix &distBlock,
const Aurora::CudaMatrix &distRefBlock,
const Aurora::Matrix &sosWaterBlock, const Aurora::Matrix &sosWaterBlock,
const Aurora::Matrix &sosWaterRefBlock, const Aurora::Matrix &sosWaterRefBlock,
float expectedSOSWater) { float expectedSOSWater) {
auto _sosWaterBlock = temperatureToSoundSpeed(sosWaterBlock, "marczak"); auto _sosWaterBlock = temperatureToSoundSpeed(sosWaterBlock, "marczak").toDeviceMatrix();
auto _sosWaterRefBlock = temperatureToSoundSpeed(sosWaterRefBlock, "marczak"); auto _sosWaterRefBlock = temperatureToSoundSpeed(sosWaterRefBlock, "marczak").toDeviceMatrix();
switch (Recon::transParams::version) { switch (Recon::transParams::version) {
//remove detectTofAndAttMex function, use detectTofAndAtt function instead of
//保留函数和函数申明备忘,以防以后再次使用
// case 1: { // case 1: {
// return detectTofAndAttMex( // return detectTofAndAttMex(
// AscanBlock, AscanRefBlock, distBlock, distRefBlock, // AscanBlock, AscanRefBlock, distBlock, distRefBlock,
@@ -440,13 +440,18 @@ namespace Recon {
// } // }
// case 2: // case 2:
default: default:
return detectTofAndAtt( auto r = detectTofAndAtt(
AscanBlock, AscanRefBlock, distBlock, distRefBlock, AscanBlock, AscanRefBlock, distBlock, distRefBlock,
_sosWaterBlock, _sosWaterRefBlock, Recon::transParams::resampleFactor, Recon::transParams::nThreads, _sosWaterBlock, _sosWaterRefBlock, Recon::transParams::resampleFactor, Recon::transParams::nThreads,
expectedSOSWater, Recon::transParams::useTimeWindowing, expectedSOSWater, Recon::transParams::useTimeWindowing,
Recon::transParams::aScanReconstructionFrequency, Recon::transParams::detectionWindowATT, Recon::transParams::aScanReconstructionFrequency, Recon::transParams::detectionWindowATT,
Recon::transParams::offsetElectronic, Recon::transParams::detectionWindowSOS, Recon::transParams::minSpeedOfSound, Recon::transParams::offsetElectronic, Recon::transParams::detectionWindowSOS, Recon::transParams::minSpeedOfSound,
Recon::transParams::maxSpeedOfSound, Recon::transParams::gaussWindow); Recon::transParams::maxSpeedOfSound, Recon::transParams::gaussWindow);
DetectResult ret;
ret.att = r.att.toHostMatrix();
ret.sosValue = r.sosValue.toHostMatrix();
ret.tof = r.tof.toHostMatrix();
return ret;
} }
} }
} }

View File

@@ -77,8 +77,8 @@ detectTofAndAtt(
DetectResult DetectResult
transmissionDetection( transmissionDetection(
const Aurora::Matrix &AscanBlock, const Aurora::Matrix &AscanRefBlock, const Aurora::CudaMatrix &AscanBlock, const Aurora::CudaMatrix &AscanRefBlock,
const Aurora::Matrix &distBlock, const Aurora::Matrix &distRefBlock, const Aurora::CudaMatrix &distBlock, const Aurora::CudaMatrix &distRefBlock,
const Aurora::Matrix &sosWaterBlock, const Aurora::Matrix &sosWaterRefBlock, float expectedSOSWater); const Aurora::Matrix &sosWaterBlock, const Aurora::Matrix &sosWaterRefBlock, float expectedSOSWater);
} // namespace Recon } // namespace Recon

View File

@@ -1,4 +1,7 @@
#include "getTransmissionData.h" #include "getTransmissionData.h"
#include "getTransmissionData.cuh"
#include "AuroraDefs.h"
#include "CudaMatrix.h"
#include <cstddef> #include <cstddef>
#include <iostream> #include <iostream>
@@ -10,10 +13,13 @@
#include <mkl_cblas.h> #include <mkl_cblas.h>
#include <mkl_vml_functions.h> #include <mkl_vml_functions.h>
#include <cuda_runtime.h>
#include "Function.h" #include "Function.h"
#include "Function2D.h" #include "Function2D.h"
#include "Function3D.h" #include "Function3D.h"
#include "Function1D.cuh"
#include "Function2D.cuh"
#include "config/config.h" #include "config/config.h"
#include "common/dataBlockCreation/removeDataFromArrays.h" #include "common/dataBlockCreation/removeDataFromArrays.h"
@@ -42,8 +48,8 @@ namespace
Matrix waterTempBlock; Matrix waterTempBlock;
MetaInfos metaInfos; MetaInfos metaInfos;
Matrix ascanBlock; CudaMatrix ascanBlock;
Matrix ascanBlockRef; CudaMatrix ascanBlockRef;
Matrix dists; Matrix dists;
Matrix distRefBlock; Matrix distRefBlock;
Matrix waterTempRefBlock; Matrix waterTempRefBlock;
@@ -58,13 +64,20 @@ namespace
int BUFFER_COUNT = 0; int BUFFER_COUNT = 0;
int BUFFER_SIZE = 4;//<=8 int BUFFER_SIZE = 4;//<=8
Matrix prepareAScansForTransmissionDetection(const Matrix& aAscanBlock, const Matrix& aGainBlock) CudaMatrix prepareAScansForTransmissionDetection(const CudaMatrix& aAscanBlock, const CudaMatrix& aGainBlock)
{ {
Matrix result = aAscanBlock / repmat(aGainBlock, aAscanBlock.getDimSize(0), 1); CudaMatrix result = aAscanBlock / repmat(aGainBlock, aAscanBlock.getDimSize(0), 1);
result = result - repmat(mean(result,FunctionDirection::Column), result.getDimSize(0), 1); result = result - repmat(mean(result,FunctionDirection::Column), result.getDimSize(0), 1);
return result; return result;
} }
Aurora::CudaMatrix calculateSnr(const Aurora::CudaMatrix &aMDataBlock,
float aReferenceNoise) {
auto maxSignal = max(abs(aMDataBlock));
auto snrBlock = 10 * log(maxSignal / aReferenceNoise, 10);
return snrBlock;
}
BlockOfTransmissionData getBlockOfTransmissionData(const Matrix& aMp, const Matrix& aMpRef, const Matrix& aSl, const Matrix& aSn, const Matrix& aRlList, const Matrix& aRnList, BlockOfTransmissionData getBlockOfTransmissionData(const Matrix& aMp, const Matrix& aMpRef, const Matrix& aSl, const Matrix& aSn, const Matrix& aRlList, const Matrix& aRnList,
const TasTemps& aTasTemps, const Matrix& aExpectedSOSWater, GeometryInfo aGeom, GeometryInfo& aGeomRef, const TasTemps& aTasTemps, const Matrix& aExpectedSOSWater, GeometryInfo aGeom, GeometryInfo& aGeomRef,
const Matrix& aSnrRmsNoise, const Matrix& aSnrRmsNoiseRef, const MeasurementInfo& aExpInfo, const MeasurementInfo& aExpInfoRef, const Matrix& aSnrRmsNoise, const Matrix& aSnrRmsNoiseRef, const MeasurementInfo& aExpInfo, const MeasurementInfo& aExpInfoRef,
@@ -72,13 +85,13 @@ namespace
{ {
BlockOfTransmissionData result; BlockOfTransmissionData result;
MetaInfos metaInfos; MetaInfos metaInfos;
auto blockData = getAscanBlockPreprocessed(aParser, aMp, aSl, aSn, aRlList, aRnList, aGeom, aExpInfo, true, true); auto blockData = getAscanBlockPreprocessedCuda(aParser, aMp, aSl, aSn, aRlList, aRnList, aGeom, aExpInfo, true, true);
auto blockDataRef = getAscanBlockPreprocessed(aParserRef, aMpRef, aSl, aSn, aRlList, aRnList, aGeomRef, aExpInfoRef, true, true); auto blockDataRef = getAscanBlockPreprocessedCuda(aParserRef, aMpRef, aSl, aSn, aRlList, aRnList, aGeomRef, aExpInfoRef, true, true);
Matrix ascanBlock = prepareAScansForTransmissionDetection(blockData.ascanBlockPreprocessed, blockData.gainBlock); CudaMatrix ascanBlock = prepareAScansForTransmissionDetection(blockData.ascanBlockPreprocessed, blockData.gainBlock);
Matrix ascanBlockRef = prepareAScansForTransmissionDetection(blockDataRef.ascanBlockPreprocessed, blockDataRef.gainBlock); CudaMatrix ascanBlockRef = prepareAScansForTransmissionDetection(blockDataRef.ascanBlockPreprocessed, blockDataRef.gainBlock);
blockData.ascanBlockPreprocessed = Matrix(); blockData.ascanBlockPreprocessed = CudaMatrix();
blockDataRef.ascanBlockPreprocessed = Matrix(); blockDataRef.ascanBlockPreprocessed = CudaMatrix();
if(aExpInfo.Hardware == "USCT3dv3") if(aExpInfo.Hardware == "USCT3dv3")
{ {
Matrix channelList = precalculateChannelList(aRlList, aRnList, aExpInfo, aPreComputes); Matrix channelList = precalculateChannelList(aRlList, aRnList, aExpInfo, aPreComputes);
float* channelListSizeData = Aurora::malloc(2); float* channelListSizeData = Aurora::malloc(2);
@@ -93,70 +106,19 @@ namespace
channelListBlockData[i] = channelList[ind[i] - 1]; channelListBlockData[i] = channelList[ind[i] - 1];
} }
Matrix channelListBlock = Matrix::New(channelListBlockData, 1, channelListBlockSize); Matrix channelListBlock = Matrix::New(channelListBlockData, 1, channelListBlockSize);
Matrix fx = fft(ascanBlock); CudaMatrix fx = fft(ascanBlock);
float* fhData = Aurora::malloc(aExpInfo.matchedFilter.getDimSize(0) * channelListBlockSize, true); Aurora::CudaMatrix matchedFilterDevice = aExpInfo.matchedFilter.toDeviceMatrix();
Matrix fh = Matrix::New(fhData, aExpInfo.matchedFilter.getDimSize(0), channelListBlockSize, 1, Aurora::Complex); Aurora::CudaMatrix channelListBlockDevice = channelListBlock.toDeviceMatrix();
size_t matchedFilterRowDataSize = aExpInfo.matchedFilter.getDimSize(0)*2; Aurora::CudaMatrix fh = createFhMatrix(matchedFilterDevice, channelListBlockDevice);
for(size_t i=0; i<channelListBlockSize; ++i)
{
cblas_scopy(matchedFilterRowDataSize, aExpInfo.matchedFilter.getData() + (size_t)(channelListBlock[i] - 1) * matchedFilterRowDataSize, 1 , fhData ,1);
fhData += matchedFilterRowDataSize;
}
// Matrix fxReal = Aurora::real(fx);
// Matrix fhReal = Aurora::real(fh);
// Matrix fxImag = Aurora::imag(fx);
// Matrix fhImag = Aurora::imag(fh);
// Matrix real = fxReal * fhReal + fxImag * fhImag;
// Matrix image = fxImag * fhReal - fxReal * fhImag;
float* value1 = Aurora::malloc(fx.getDataSize());
vsMulI(fx.getDataSize(), fx.getData(), 2, fh.getData(), 2, value1, 1);
float* value2 = Aurora::malloc(fx.getDataSize());
vsMulI(fx.getDataSize(), fx.getData() + 1, 2, fh.getData() + 1, 2, value2, 1);
float* realData = Aurora::malloc(fx.getDataSize());
vsAdd(fx.getDataSize(), value1, value2, realData);
Matrix real = Matrix::New(realData, fx.getDimSize(0), fx.getDimSize(1));
vsMulI(fx.getDataSize(), fx.getData() + 1, 2, fh.getData(), 2, value1, 1);
vsMulI(fx.getDataSize(), fx.getData(), 2, fh.getData() + 1, 2, value2, 1);
float* imagData = Aurora::malloc(fx.getDataSize());
vsSub(fx.getDataSize(), value1, value2, imagData);
Matrix image = Matrix::New(imagData, fx.getDimSize(0), fx.getDimSize(1));
float* complexData = Aurora::malloc(real.getDataSize(), true);
cblas_scopy(real.getDataSize(), real.getData(), 1 , complexData ,2);
cblas_scopy(image.getDataSize(), image.getData(), 1 , complexData + 1 ,2);
Matrix complex = Matrix::New(complexData, real.getDimSize(0), real.getDimSize(1), 1, Aurora::Complex);
ascanBlock = Aurora::real(ifft(complex));
fx = fft(ascanBlockRef);
fhData = Aurora::malloc(aExpInfoRef.matchedFilter.getDimSize(0) * channelListBlockSize, true);
fh = Matrix::New(fhData, aExpInfoRef.matchedFilter.getDimSize(0), channelListBlockSize, 1, Aurora::Complex);
matchedFilterRowDataSize = aExpInfoRef.matchedFilter.getDimSize(0)*2;
for(size_t i=0; i<channelListBlockSize; ++i)
{
cblas_scopy(matchedFilterRowDataSize, aExpInfoRef.matchedFilter.getData() + (size_t)(channelListBlock[i] - 1) * matchedFilterRowDataSize, 1 , fhData ,1);
fhData += matchedFilterRowDataSize;
}
// real = Aurora::real(fx) * Aurora::real(fh) + Aurora::imag(fx) * Aurora::imag(fh); // real = Aurora::real(fx) * Aurora::real(fh) + Aurora::imag(fx) * Aurora::imag(fh);
// image = Aurora::imag(fx) * Aurora::real(fh) - Aurora::real(fx) * Aurora::imag(fh); // image = Aurora::imag(fx) * Aurora::real(fh) - Aurora::real(fx) * Aurora::imag(fh);
vsMulI(fx.getDataSize(), fx.getData(), 2, fh.getData(), 2, value1, 1); // complex = real,image
vsMulI(fx.getDataSize(), fx.getData() + 1, 2, fh.getData() + 1, 2, value2, 1); CudaMatrix complex = getTransmissionDataSubFunction(fx, fh);
realData = Aurora::malloc(fx.getDataSize()); ascanBlock = Aurora::real(ifft(complex));
vsAdd(fx.getDataSize(), value1, value2, realData); fx = fft(ascanBlockRef);
real = Matrix::New(realData, fx.getDimSize(0), fx.getDimSize(1)); matchedFilterDevice = aExpInfoRef.matchedFilter.toDeviceMatrix();
fh = createFhMatrix(matchedFilterDevice, channelListBlockDevice);
vsMulI(fx.getDataSize(), fx.getData() + 1, 2, fh.getData(), 2, value1, 1); complex = getTransmissionDataSubFunction(fx, fh);
vsMulI(fx.getDataSize(), fx.getData(), 2, fh.getData() + 1, 2, value2, 1);
imagData = Aurora::malloc(fx.getDataSize());
vsSub(fx.getDataSize(), value1, value2, imagData);
image = Matrix::New(imagData, fx.getDimSize(0), fx.getDimSize(1));
Aurora::free(value1);
Aurora::free(value2);
complexData = Aurora::malloc(real.getDataSize(), true);
cblas_scopy(real.getDataSize(), real.getData(), 1 , complexData ,2);
cblas_scopy(image.getDataSize(), image.getData(), 1 , complexData + 1 ,2);
complex = Matrix::New(complexData, real.getDimSize(0), real.getDimSize(1), 1, Aurora::Complex);
ascanBlockRef = Aurora::real(ifft(complex)); ascanBlockRef = Aurora::real(ifft(complex));
} }
else else
@@ -167,8 +129,8 @@ namespace
if(transParams::applyCalib) if(transParams::applyCalib)
{ {
metaInfos.snrValues = calculateSnr(ascanBlock, aSnrRmsNoise[0]); metaInfos.snrValues = calculateSnr(ascanBlock, aSnrRmsNoise[0]).toHostMatrix();
metaInfos.snrValuesRef = calculateSnr(ascanBlockRef, aSnrRmsNoiseRef[0]); metaInfos.snrValuesRef = calculateSnr(ascanBlockRef, aSnrRmsNoiseRef[0]).toHostMatrix();
} }
Matrix dists = distanceBetweenTwoPoints(blockData.senderPositionBlock, blockData.receiverPositionBlock); Matrix dists = distanceBetweenTwoPoints(blockData.senderPositionBlock, blockData.receiverPositionBlock);
@@ -208,8 +170,9 @@ namespace
void getBlockOfTransmissionDataInThread(size_t aIndex, const Matrix& aMp, const Matrix& aMpRef, const Matrix& aSl, const Matrix& aSn, const Matrix& aRlList, const Matrix& aRnList, void getBlockOfTransmissionDataInThread(size_t aIndex, const Matrix& aMp, const Matrix& aMpRef, const Matrix& aSl, const Matrix& aSn, const Matrix& aRlList, const Matrix& aRnList,
const TasTemps& aTasTemps, const Matrix& aExpectedSOSWater, GeometryInfo aGeom, GeometryInfo aGeomRef, const TasTemps& aTasTemps, const Matrix& aExpectedSOSWater, GeometryInfo aGeom, GeometryInfo aGeomRef,
const Matrix& aSnrRmsNoise, const Matrix& aSnrRmsNoiseRef, const MeasurementInfo& aExpInfo, const MeasurementInfo& aExpInfoRef, const Matrix& aSnrRmsNoise, const Matrix& aSnrRmsNoiseRef, const MeasurementInfo& aExpInfo, const MeasurementInfo& aExpInfoRef,
const PreComputes& aPreComputes, Parser* aParser, Parser* aParserRef) const PreComputes& aPreComputes, Parser* aParser, Parser* aParserRef, unsigned int aGPUId)
{ {
cudaSetDevice(aGPUId);
auto buffer = getBlockOfTransmissionData(aMp, aMpRef, aSl, aSn, aRlList, aRnList, aTasTemps, auto buffer = getBlockOfTransmissionData(aMp, aMpRef, aSl, aSn, aRlList, aRnList, aTasTemps,
aExpectedSOSWater, aGeom, aGeomRef, aSnrRmsNoise, aSnrRmsNoiseRef, aExpectedSOSWater, aGeom, aGeomRef, aSnrRmsNoise, aSnrRmsNoiseRef,
aExpInfo, aExpInfoRef, aPreComputes, aParser, aParserRef); aExpInfo, aExpInfoRef, aPreComputes, aParser, aParserRef);
@@ -245,7 +208,7 @@ void createThreadForGetBlockOfTransmissionData(const Matrix& aMotorPos, const M
CREATE_BUFFER_CONDITION.wait(lock, []{return BUFFER_COUNT<BUFFER_SIZE;}); CREATE_BUFFER_CONDITION.wait(lock, []{return BUFFER_COUNT<BUFFER_SIZE;});
++BUFFER_COUNT; ++BUFFER_COUNT;
lock.unlock(); lock.unlock();
speedUpThread[index] = std::thread(getBlockOfTransmissionDataInThread,index,mp,mpRef,sl,sn,aRlList,aRnList,aTasTemps,aExpectedSOSWater,aGeom,aGeomRef,aSnrRmsNoise,aSnrRmsNoiseRef,aExpInfo,aExpInfoRef,aPreComputes,aParser, aParserRef); speedUpThread[index] = std::thread(getBlockOfTransmissionDataInThread,index,mp,mpRef,sl,sn,aRlList,aRnList,aTasTemps,aExpectedSOSWater,aGeom,aGeomRef,aSnrRmsNoise,aSnrRmsNoiseRef,aExpInfo,aExpInfoRef,aPreComputes,aParser, aParserRef, index % BUFFER_SIZE);
} }
} }
} }
@@ -334,8 +297,9 @@ TransmissionData Recon::getTransmissionData(const Aurora::Matrix& aMotorPos, con
lock.unlock(); lock.unlock();
auto blockData = BLOCK_OF_TRANSIMISSIONDARA_BUFFER[std::to_string(index)]; auto blockData = BLOCK_OF_TRANSIMISSIONDARA_BUFFER[std::to_string(index)];
cudaSetDevice(index % BUFFER_SIZE);
DetectResult detect = transmissionDetection( blockData.ascanBlock, blockData.ascanBlockRef, DetectResult detect = transmissionDetection( blockData.ascanBlock, blockData.ascanBlockRef,
blockData.dists, blockData.distRefBlock, blockData.dists.toDeviceMatrix(), blockData.distRefBlock.toDeviceMatrix(),
blockData.waterTempBlock, blockData.waterTempRefBlock, blockData.waterTempBlock, blockData.waterTempRefBlock,
aTemp.expectedSOSWater[0]); aTemp.expectedSOSWater[0]);
blockData.attData = detect.att; blockData.attData = detect.att;