Speed up reflection. CPU-GPU concurrent computing

This commit is contained in:
sunwen
2023-11-09 15:33:40 +08:00
parent fedacced53
commit d015b38845

View File

@@ -20,15 +20,24 @@
#include <cstdio>
#include <iostream>
#include <queue>
#include <thread>
using namespace Aurora;
using namespace Recon;
Aurora::Matrix Recon::startReflectionReconstruction( Parser* aParser, int aSAFT_mode, const Aurora::Matrix& aMotorPos,
namespace
{
std::queue<preprocessAScanRResult> PRODUCER_PROCESSDATAS;
std::queue<AscanBlockPreprocessed> PRODUCER_BLOCKDATAS;
std::mutex PRODUCER_MUTEX;
std::condition_variable PRODUCER_CONDITION;
}
void producerThread( Parser* aParser, const Aurora::Matrix& aMotorPos,
const Aurora::Matrix& aSlList, const Aurora::Matrix& aSnList,
const Aurora::Matrix& aRlList, const Aurora::Matrix& aRnList,
GeometryInfo& aGeom, TransRecos& aTransRecos,
MeasurementInfo& aExpInfo, PreComputes& aPreComputes)
GeometryInfo aGeom, MeasurementInfo aExpInfo, PreComputes aPreComputes)
{
printf("Reflection reconstruction is carried out.");
@@ -48,18 +57,13 @@ Aurora::Matrix Recon::startReflectionReconstruction( Parser* aParser, int aSAFT_
{
aPreComputes.sincPeak_ft = determineOptimalPulse(aPreComputes.timeInterval, aExpInfo.expectedAScanLength);
}
printf(" - channel list");
auto channelList = precalculateChannelList(aRlList, aRnList, aExpInfo, aPreComputes);
size_t numScans = aMotorPos.getDataSize() * aSlList.getDataSize() *
aSnList.getDataSize() * aRlList.getDataSize() *
aRnList.getDataSize();
printf(" - blocking");
Matrix Env = Aurora::zeros((int)reflectParams::imageXYZ[0],(int)reflectParams::imageXYZ[1],(int)reflectParams::imageXYZ[2]);
int numTakenScans = 0,numProcessedScans = 0,numPossibleScans = 0;
for(int i=0; i<aMotorPos.getDataSize(); ++i)
{
//#pragma omp parallel for num_threads(24)
for(int j=0; j<aSlList.getDataSize() / transParams::senderTASSize; ++j)
{
for(int k=0; k<aSnList.getDataSize() / transParams::senderElementSize; ++k)
@@ -84,17 +88,61 @@ Aurora::Matrix Recon::startReflectionReconstruction( Parser* aParser, int aSAFT_
auto preprocessData = preprocessAScanBlockForReflection(blockData.ascanBlockPreprocessed, blockData.mpBlock, blockData.slBlock,
blockData.snBlock, blockData.rlBlock, blockData.rnBlock, blockData.senderPositionBlock,
blockData.receiverPositionBlock, blockData.gainBlock, channelBlock, aExpInfo, aPreComputes);
Env = recontructSAFT(removeDataFromArrays(preprocessData.AscanBlock, preprocessData.usedData),
removeDataFromArrays(blockData.senderPositionBlock, preprocessData.usedData),
removeDataFromArrays(blockData.receiverPositionBlock, preprocessData.usedData),
removeDataFromArrays(blockData.mpBlock, preprocessData.usedData),
std::unique_lock<std::mutex> lock(PRODUCER_MUTEX);
PRODUCER_BLOCKDATAS.push(blockData);
PRODUCER_PROCESSDATAS.push(preprocessData);
lock.unlock();
PRODUCER_CONDITION.notify_one();
}
}
}
}
Aurora::Matrix Recon::startReflectionReconstruction( Parser* aParser, int aSAFT_mode, const Aurora::Matrix& aMotorPos,
const Aurora::Matrix& aSlList, const Aurora::Matrix& aSnList,
const Aurora::Matrix& aRlList, const Aurora::Matrix& aRnList,
GeometryInfo& aGeom, TransRecos& aTransRecos,
MeasurementInfo& aExpInfo, PreComputes& aPreComputes)
{
for (size_t i = 0; i < reflectParams::gpuSelectionList.getDataSize(); i++)
{
std::string msg;
if (!resetGPUDevice((int)reflectParams::gpuSelectionList[i],msg))
{
std::cerr<<msg<<std::endl;
}
}
std::thread thread = std::thread(producerThread, aParser, aMotorPos, aSlList, aSnList, aRlList, aRnList, aGeom, aExpInfo, aPreComputes);
Matrix Env = Aurora::zeros((int)reflectParams::imageXYZ[0],(int)reflectParams::imageXYZ[1],(int)reflectParams::imageXYZ[2]);
for(int i=0; i<aMotorPos.getDataSize(); ++i)
{
//#pragma omp parallel for num_threads(24)
for(int j=0; j<aSlList.getDataSize() / transParams::senderTASSize; ++j)
{
for(int k=0; k<aSnList.getDataSize() / transParams::senderElementSize; ++k)
{
std::unique_lock<std::mutex> lock(PRODUCER_MUTEX);
PRODUCER_CONDITION.wait(lock, []{return !PRODUCER_PROCESSDATAS.empty() && !PRODUCER_BLOCKDATAS.empty();});
lock.unlock();
Env = recontructSAFT(removeDataFromArrays(PRODUCER_PROCESSDATAS.front().AscanBlock, PRODUCER_PROCESSDATAS.front().usedData),
removeDataFromArrays(PRODUCER_BLOCKDATAS.front().senderPositionBlock, PRODUCER_PROCESSDATAS.front().usedData),
removeDataFromArrays(PRODUCER_BLOCKDATAS.front().receiverPositionBlock, PRODUCER_PROCESSDATAS.front().usedData),
removeDataFromArrays(PRODUCER_BLOCKDATAS.front().mpBlock, PRODUCER_PROCESSDATAS.front().usedData),
aSAFT_mode, aTransRecos, Env);
lock.lock();
PRODUCER_PROCESSDATAS.pop();
PRODUCER_BLOCKDATAS.pop();
lock.unlock();
std::cout<<Env[0]<<"-" << Env[1] <<"-" << Env[2] <<"-" << Env[3]<<std::endl;
RECON_INFO("Reflection Reconstructon: " + std::to_string(j));
}
Recon::notifyProgress(25+73*((j*i)/(aMotorPos.getDataSize() * aSlList.getDataSize())));
}
}
thread.join();
return Env;
}