Add cudaDeviceSynchronize to detection

This commit is contained in:
kradchen
2023-12-26 15:20:26 +08:00
parent 3c8268263c
commit 32ad08f8d7

View File

@@ -55,7 +55,7 @@ CudaMatrix Recon::calculateAttenuationCuda(const CudaMatrix &ascans,
ascans.getData(), ascans2.getData(), ascans.getData(), ascans2.getData(),
ascansRef.getData(), ascansRef2.getData(), ascansRef.getData(), ascansRef2.getData(),
ascans2.getDimSize(0)); ascans2.getDimSize(0));
cudaDeviceSynchronize();
auto pulseEnergy = Aurora::sum(ascans2^2); auto pulseEnergy = Aurora::sum(ascans2^2);
auto pulseEnergyEmpty = Aurora::sum(ascansRef2^2); auto pulseEnergyEmpty = Aurora::sum(ascansRef2^2);
@@ -86,6 +86,7 @@ Recon::detectAttVectorizedCuda(const CudaMatrix &Ascan, const CudaMatrix &AscanR
endPos.getData(), startPosRef.getData(), endPosRef.getData(), endPos.getData(), startPosRef.getData(), endPosRef.getData(),
tof.getData(), tof2.getData(), sizeAscan.getData(), sampleRate, tof.getData(), tof2.getData(), sizeAscan.getData(), sampleRate,
offsetElectronicSamples,detectionWindowATT); offsetElectronicSamples,detectionWindowATT);
cudaDeviceSynchronize();
return calculateAttenuationCuda(envelopeOfAScan, startPos, endPos, return calculateAttenuationCuda(envelopeOfAScan, startPos, endPos,
envelopeOfReferenceAScan, startPosRef, envelopeOfReferenceAScan, startPosRef,
endPosRef); endPosRef);
@@ -189,6 +190,7 @@ Recon::TimeWindowResultC Recon::applyTimeWindowing(const Aurora::CudaMatrix &Asc
calcResult.endSearch.getData(),AscanBlock.getData(),AscanBlockProcessed.getData(), calcResult.endSearch.getData(),AscanBlock.getData(),AscanBlockProcessed.getData(),
AscanBlock.getDimSize(0)); AscanBlock.getDimSize(0));
} }
cudaDeviceSynchronize();
Recon::TimeWindowResultC result; Recon::TimeWindowResultC result;
result.startSearch = calcResult.startSearch; result.startSearch = calcResult.startSearch;
result.AscanBlockProcessed = AscanBlockProcessed; result.AscanBlockProcessed = AscanBlockProcessed;
@@ -306,6 +308,7 @@ Recon::DetectResultC Recon::detectTofAndAtt(
} }
auto shiftInSamples = zerosCuda(1, c1.getDimSize(1)); auto shiftInSamples = zerosCuda(1, c1.getDimSize(1));
findMaxIndexKernel<<<c.getDimSize(1),256>>>(c.getData(),c.getDimSize(0),shiftInSamples.getData(),maxlag); findMaxIndexKernel<<<c.getDimSize(1),256>>>(c.getData(),c.getDimSize(0),shiftInSamples.getData(),maxlag);
cudaDeviceSynchronize();
if (useTimeWindowing) { if (useTimeWindowing) {
shiftInSamples = shiftInSamples - diffStartSearch; shiftInSamples = shiftInSamples - diffStartSearch;
} }