feat: Add cuda function in getTransmission and detection.

This commit is contained in:
sunwen
2024-12-25 11:11:31 +08:00
parent 20ca7f0ce5
commit 4daf8e0c0b
3 changed files with 60 additions and 91 deletions

View File

@@ -10,6 +10,7 @@
#include "common/getMeasurementMetaData.h"
#include "config/config.h"
#include "transmissionReconstruction/detection/detection.cuh"
//remove detectTofAndAttMex function, use detectTofAndAtt function instead of
//保留函数和函数申明备忘以防以后再次使用以下头文件从动态库中映入了detectTofAndAttMex的核心函数
@@ -417,18 +418,17 @@ namespace Recon {
// return result;
// }
DetectResult transmissionDetection(const Aurora::Matrix &AscanBlock,
const Aurora::Matrix &AscanRefBlock,
const Aurora::Matrix &distBlock,
const Aurora::Matrix &distRefBlock,
DetectResult transmissionDetection(const Aurora::CudaMatrix &AscanBlock,
const Aurora::CudaMatrix &AscanRefBlock,
const Aurora::CudaMatrix &distBlock,
const Aurora::CudaMatrix &distRefBlock,
const Aurora::Matrix &sosWaterBlock,
const Aurora::Matrix &sosWaterRefBlock,
float expectedSOSWater) {
auto _sosWaterBlock = temperatureToSoundSpeed(sosWaterBlock, "marczak");
auto _sosWaterRefBlock = temperatureToSoundSpeed(sosWaterRefBlock, "marczak");
auto _sosWaterBlock = temperatureToSoundSpeed(sosWaterBlock, "marczak").toDeviceMatrix();
auto _sosWaterRefBlock = temperatureToSoundSpeed(sosWaterRefBlock, "marczak").toDeviceMatrix();
switch (Recon::transParams::version) {
//remove detectTofAndAttMex function, use detectTofAndAtt function instead of
//保留函数和函数申明备忘,以防以后再次使用
// case 1: {
// return detectTofAndAttMex(
// AscanBlock, AscanRefBlock, distBlock, distRefBlock,
@@ -440,13 +440,18 @@ namespace Recon {
// }
// case 2:
default:
return detectTofAndAtt(
auto r = detectTofAndAtt(
AscanBlock, AscanRefBlock, distBlock, distRefBlock,
_sosWaterBlock, _sosWaterRefBlock, Recon::transParams::resampleFactor, Recon::transParams::nThreads,
expectedSOSWater, Recon::transParams::useTimeWindowing,
Recon::transParams::aScanReconstructionFrequency, Recon::transParams::detectionWindowATT,
Recon::transParams::offsetElectronic, Recon::transParams::detectionWindowSOS, Recon::transParams::minSpeedOfSound,
Recon::transParams::maxSpeedOfSound, Recon::transParams::gaussWindow);
DetectResult ret;
ret.att = r.att.toHostMatrix();
ret.sosValue = r.sosValue.toHostMatrix();
ret.tof = r.tof.toHostMatrix();
return ret;
}
}
}

View File

@@ -77,8 +77,8 @@ detectTofAndAtt(
DetectResult
transmissionDetection(
const Aurora::Matrix &AscanBlock, const Aurora::Matrix &AscanRefBlock,
const Aurora::Matrix &distBlock, const Aurora::Matrix &distRefBlock,
const Aurora::CudaMatrix &AscanBlock, const Aurora::CudaMatrix &AscanRefBlock,
const Aurora::CudaMatrix &distBlock, const Aurora::CudaMatrix &distRefBlock,
const Aurora::Matrix &sosWaterBlock, const Aurora::Matrix &sosWaterRefBlock, float expectedSOSWater);
} // namespace Recon

View File

@@ -1,4 +1,7 @@
#include "getTransmissionData.h"
#include "getTransmissionData.cuh"
#include "AuroraDefs.h"
#include "CudaMatrix.h"
#include <cstddef>
#include <iostream>
@@ -10,10 +13,13 @@
#include <mkl_cblas.h>
#include <mkl_vml_functions.h>
#include <cuda_runtime.h>
#include "Function.h"
#include "Function2D.h"
#include "Function3D.h"
#include "Function1D.cuh"
#include "Function2D.cuh"
#include "config/config.h"
#include "common/dataBlockCreation/removeDataFromArrays.h"
@@ -42,8 +48,8 @@ namespace
Matrix waterTempBlock;
MetaInfos metaInfos;
Matrix ascanBlock;
Matrix ascanBlockRef;
CudaMatrix ascanBlock;
CudaMatrix ascanBlockRef;
Matrix dists;
Matrix distRefBlock;
Matrix waterTempRefBlock;
@@ -58,13 +64,20 @@ namespace
int BUFFER_COUNT = 0;
int BUFFER_SIZE = 4;//<=8
Matrix prepareAScansForTransmissionDetection(const Matrix& aAscanBlock, const Matrix& aGainBlock)
CudaMatrix prepareAScansForTransmissionDetection(const CudaMatrix& aAscanBlock, const CudaMatrix& aGainBlock)
{
Matrix result = aAscanBlock / repmat(aGainBlock, aAscanBlock.getDimSize(0), 1);
CudaMatrix result = aAscanBlock / repmat(aGainBlock, aAscanBlock.getDimSize(0), 1);
result = result - repmat(mean(result,FunctionDirection::Column), result.getDimSize(0), 1);
return result;
}
Aurora::CudaMatrix calculateSnr(const Aurora::CudaMatrix &aMDataBlock,
float aReferenceNoise) {
auto maxSignal = max(abs(aMDataBlock));
auto snrBlock = 10 * log(maxSignal / aReferenceNoise, 10);
return snrBlock;
}
BlockOfTransmissionData getBlockOfTransmissionData(const Matrix& aMp, const Matrix& aMpRef, const Matrix& aSl, const Matrix& aSn, const Matrix& aRlList, const Matrix& aRnList,
const TasTemps& aTasTemps, const Matrix& aExpectedSOSWater, GeometryInfo aGeom, GeometryInfo& aGeomRef,
const Matrix& aSnrRmsNoise, const Matrix& aSnrRmsNoiseRef, const MeasurementInfo& aExpInfo, const MeasurementInfo& aExpInfoRef,
@@ -72,13 +85,13 @@ namespace
{
BlockOfTransmissionData result;
MetaInfos metaInfos;
auto blockData = getAscanBlockPreprocessed(aParser, aMp, aSl, aSn, aRlList, aRnList, aGeom, aExpInfo, true, true);
auto blockDataRef = getAscanBlockPreprocessed(aParserRef, aMpRef, aSl, aSn, aRlList, aRnList, aGeomRef, aExpInfoRef, true, true);
Matrix ascanBlock = prepareAScansForTransmissionDetection(blockData.ascanBlockPreprocessed, blockData.gainBlock);
Matrix ascanBlockRef = prepareAScansForTransmissionDetection(blockDataRef.ascanBlockPreprocessed, blockDataRef.gainBlock);
blockData.ascanBlockPreprocessed = Matrix();
blockDataRef.ascanBlockPreprocessed = Matrix();
if(aExpInfo.Hardware == "USCT3dv3")
auto blockData = getAscanBlockPreprocessedCuda(aParser, aMp, aSl, aSn, aRlList, aRnList, aGeom, aExpInfo, true, true);
auto blockDataRef = getAscanBlockPreprocessedCuda(aParserRef, aMpRef, aSl, aSn, aRlList, aRnList, aGeomRef, aExpInfoRef, true, true);
CudaMatrix ascanBlock = prepareAScansForTransmissionDetection(blockData.ascanBlockPreprocessed, blockData.gainBlock);
CudaMatrix ascanBlockRef = prepareAScansForTransmissionDetection(blockDataRef.ascanBlockPreprocessed, blockDataRef.gainBlock);
blockData.ascanBlockPreprocessed = CudaMatrix();
blockDataRef.ascanBlockPreprocessed = CudaMatrix();
if(aExpInfo.Hardware == "USCT3dv3")
{
Matrix channelList = precalculateChannelList(aRlList, aRnList, aExpInfo, aPreComputes);
float* channelListSizeData = Aurora::malloc(2);
@@ -92,71 +105,20 @@ namespace
{
channelListBlockData[i] = channelList[ind[i] - 1];
}
Matrix channelListBlock = Matrix::New(channelListBlockData, 1, channelListBlockSize);
Matrix fx = fft(ascanBlock);
float* fhData = Aurora::malloc(aExpInfo.matchedFilter.getDimSize(0) * channelListBlockSize, true);
Matrix fh = Matrix::New(fhData, aExpInfo.matchedFilter.getDimSize(0), channelListBlockSize, 1, Aurora::Complex);
size_t matchedFilterRowDataSize = aExpInfo.matchedFilter.getDimSize(0)*2;
for(size_t i=0; i<channelListBlockSize; ++i)
{
cblas_scopy(matchedFilterRowDataSize, aExpInfo.matchedFilter.getData() + (size_t)(channelListBlock[i] - 1) * matchedFilterRowDataSize, 1 , fhData ,1);
fhData += matchedFilterRowDataSize;
}
// Matrix fxReal = Aurora::real(fx);
// Matrix fhReal = Aurora::real(fh);
// Matrix fxImag = Aurora::imag(fx);
// Matrix fhImag = Aurora::imag(fh);
// Matrix real = fxReal * fhReal + fxImag * fhImag;
// Matrix image = fxImag * fhReal - fxReal * fhImag;
float* value1 = Aurora::malloc(fx.getDataSize());
vsMulI(fx.getDataSize(), fx.getData(), 2, fh.getData(), 2, value1, 1);
float* value2 = Aurora::malloc(fx.getDataSize());
vsMulI(fx.getDataSize(), fx.getData() + 1, 2, fh.getData() + 1, 2, value2, 1);
float* realData = Aurora::malloc(fx.getDataSize());
vsAdd(fx.getDataSize(), value1, value2, realData);
Matrix real = Matrix::New(realData, fx.getDimSize(0), fx.getDimSize(1));
vsMulI(fx.getDataSize(), fx.getData() + 1, 2, fh.getData(), 2, value1, 1);
vsMulI(fx.getDataSize(), fx.getData(), 2, fh.getData() + 1, 2, value2, 1);
float* imagData = Aurora::malloc(fx.getDataSize());
vsSub(fx.getDataSize(), value1, value2, imagData);
Matrix image = Matrix::New(imagData, fx.getDimSize(0), fx.getDimSize(1));
float* complexData = Aurora::malloc(real.getDataSize(), true);
cblas_scopy(real.getDataSize(), real.getData(), 1 , complexData ,2);
cblas_scopy(image.getDataSize(), image.getData(), 1 , complexData + 1 ,2);
Matrix complex = Matrix::New(complexData, real.getDimSize(0), real.getDimSize(1), 1, Aurora::Complex);
ascanBlock = Aurora::real(ifft(complex));
fx = fft(ascanBlockRef);
fhData = Aurora::malloc(aExpInfoRef.matchedFilter.getDimSize(0) * channelListBlockSize, true);
fh = Matrix::New(fhData, aExpInfoRef.matchedFilter.getDimSize(0), channelListBlockSize, 1, Aurora::Complex);
matchedFilterRowDataSize = aExpInfoRef.matchedFilter.getDimSize(0)*2;
for(size_t i=0; i<channelListBlockSize; ++i)
{
cblas_scopy(matchedFilterRowDataSize, aExpInfoRef.matchedFilter.getData() + (size_t)(channelListBlock[i] - 1) * matchedFilterRowDataSize, 1 , fhData ,1);
fhData += matchedFilterRowDataSize;
}
Matrix channelListBlock = Matrix::New(channelListBlockData, 1, channelListBlockSize);
CudaMatrix fx = fft(ascanBlock);
Aurora::CudaMatrix matchedFilterDevice = aExpInfo.matchedFilter.toDeviceMatrix();
Aurora::CudaMatrix channelListBlockDevice = channelListBlock.toDeviceMatrix();
Aurora::CudaMatrix fh = createFhMatrix(matchedFilterDevice, channelListBlockDevice);
// real = Aurora::real(fx) * Aurora::real(fh) + Aurora::imag(fx) * Aurora::imag(fh);
// image = Aurora::imag(fx) * Aurora::real(fh) - Aurora::real(fx) * Aurora::imag(fh);
vsMulI(fx.getDataSize(), fx.getData(), 2, fh.getData(), 2, value1, 1);
vsMulI(fx.getDataSize(), fx.getData() + 1, 2, fh.getData() + 1, 2, value2, 1);
realData = Aurora::malloc(fx.getDataSize());
vsAdd(fx.getDataSize(), value1, value2, realData);
real = Matrix::New(realData, fx.getDimSize(0), fx.getDimSize(1));
vsMulI(fx.getDataSize(), fx.getData() + 1, 2, fh.getData(), 2, value1, 1);
vsMulI(fx.getDataSize(), fx.getData(), 2, fh.getData() + 1, 2, value2, 1);
imagData = Aurora::malloc(fx.getDataSize());
vsSub(fx.getDataSize(), value1, value2, imagData);
image = Matrix::New(imagData, fx.getDimSize(0), fx.getDimSize(1));
Aurora::free(value1);
Aurora::free(value2);
complexData = Aurora::malloc(real.getDataSize(), true);
cblas_scopy(real.getDataSize(), real.getData(), 1 , complexData ,2);
cblas_scopy(image.getDataSize(), image.getData(), 1 , complexData + 1 ,2);
complex = Matrix::New(complexData, real.getDimSize(0), real.getDimSize(1), 1, Aurora::Complex);
// image = Aurora::imag(fx) * Aurora::real(fh) - Aurora::real(fx) * Aurora::imag(fh);
// complex = real,image
CudaMatrix complex = getTransmissionDataSubFunction(fx, fh);
ascanBlock = Aurora::real(ifft(complex));
fx = fft(ascanBlockRef);
matchedFilterDevice = aExpInfoRef.matchedFilter.toDeviceMatrix();
fh = createFhMatrix(matchedFilterDevice, channelListBlockDevice);
complex = getTransmissionDataSubFunction(fx, fh);
ascanBlockRef = Aurora::real(ifft(complex));
}
else
@@ -167,8 +129,8 @@ namespace
if(transParams::applyCalib)
{
metaInfos.snrValues = calculateSnr(ascanBlock, aSnrRmsNoise[0]);
metaInfos.snrValuesRef = calculateSnr(ascanBlockRef, aSnrRmsNoiseRef[0]);
metaInfos.snrValues = calculateSnr(ascanBlock, aSnrRmsNoise[0]).toHostMatrix();
metaInfos.snrValuesRef = calculateSnr(ascanBlockRef, aSnrRmsNoiseRef[0]).toHostMatrix();
}
Matrix dists = distanceBetweenTwoPoints(blockData.senderPositionBlock, blockData.receiverPositionBlock);
@@ -208,8 +170,9 @@ namespace
void getBlockOfTransmissionDataInThread(size_t aIndex, const Matrix& aMp, const Matrix& aMpRef, const Matrix& aSl, const Matrix& aSn, const Matrix& aRlList, const Matrix& aRnList,
const TasTemps& aTasTemps, const Matrix& aExpectedSOSWater, GeometryInfo aGeom, GeometryInfo aGeomRef,
const Matrix& aSnrRmsNoise, const Matrix& aSnrRmsNoiseRef, const MeasurementInfo& aExpInfo, const MeasurementInfo& aExpInfoRef,
const PreComputes& aPreComputes, Parser* aParser, Parser* aParserRef)
const PreComputes& aPreComputes, Parser* aParser, Parser* aParserRef, unsigned int aGPUId)
{
cudaSetDevice(aGPUId);
auto buffer = getBlockOfTransmissionData(aMp, aMpRef, aSl, aSn, aRlList, aRnList, aTasTemps,
aExpectedSOSWater, aGeom, aGeomRef, aSnrRmsNoise, aSnrRmsNoiseRef,
aExpInfo, aExpInfoRef, aPreComputes, aParser, aParserRef);
@@ -245,7 +208,7 @@ void createThreadForGetBlockOfTransmissionData(const Matrix& aMotorPos, const M
CREATE_BUFFER_CONDITION.wait(lock, []{return BUFFER_COUNT<BUFFER_SIZE;});
++BUFFER_COUNT;
lock.unlock();
speedUpThread[index] = std::thread(getBlockOfTransmissionDataInThread,index,mp,mpRef,sl,sn,aRlList,aRnList,aTasTemps,aExpectedSOSWater,aGeom,aGeomRef,aSnrRmsNoise,aSnrRmsNoiseRef,aExpInfo,aExpInfoRef,aPreComputes,aParser, aParserRef);
speedUpThread[index] = std::thread(getBlockOfTransmissionDataInThread,index,mp,mpRef,sl,sn,aRlList,aRnList,aTasTemps,aExpectedSOSWater,aGeom,aGeomRef,aSnrRmsNoise,aSnrRmsNoiseRef,aExpInfo,aExpInfoRef,aPreComputes,aParser, aParserRef, index % BUFFER_SIZE);
}
}
}
@@ -334,8 +297,9 @@ TransmissionData Recon::getTransmissionData(const Aurora::Matrix& aMotorPos, con
lock.unlock();
auto blockData = BLOCK_OF_TRANSIMISSIONDARA_BUFFER[std::to_string(index)];
cudaSetDevice(index % BUFFER_SIZE);
DetectResult detect = transmissionDetection( blockData.ascanBlock, blockData.ascanBlockRef,
blockData.dists, blockData.distRefBlock,
blockData.dists.toDeviceMatrix(), blockData.distRefBlock.toDeviceMatrix(),
blockData.waterTempBlock, blockData.waterTempRefBlock,
aTemp.expectedSOSWater[0]);
blockData.attData = detect.att;