Speed up reflection. CPU-GPU concurrent computing
This commit is contained in:
@@ -20,17 +20,26 @@
|
||||
|
||||
#include <cstdio>
|
||||
#include <iostream>
|
||||
#include <queue>
|
||||
#include <thread>
|
||||
|
||||
using namespace Aurora;
|
||||
using namespace Recon;
|
||||
|
||||
Aurora::Matrix Recon::startReflectionReconstruction( Parser* aParser, int aSAFT_mode, const Aurora::Matrix& aMotorPos,
|
||||
namespace
|
||||
{
|
||||
std::queue<preprocessAScanRResult> PRODUCER_PROCESSDATAS;
|
||||
std::queue<AscanBlockPreprocessed> PRODUCER_BLOCKDATAS;
|
||||
std::mutex PRODUCER_MUTEX;
|
||||
std::condition_variable PRODUCER_CONDITION;
|
||||
}
|
||||
|
||||
void producerThread( Parser* aParser, const Aurora::Matrix& aMotorPos,
|
||||
const Aurora::Matrix& aSlList, const Aurora::Matrix& aSnList,
|
||||
const Aurora::Matrix& aRlList, const Aurora::Matrix& aRnList,
|
||||
GeometryInfo& aGeom, TransRecos& aTransRecos,
|
||||
MeasurementInfo& aExpInfo, PreComputes& aPreComputes)
|
||||
GeometryInfo aGeom, MeasurementInfo aExpInfo, PreComputes aPreComputes)
|
||||
{
|
||||
printf("Reflection reconstruction is carried out.");
|
||||
printf("Reflection reconstruction is carried out.");
|
||||
|
||||
printf("Preperations for reconstructions.");
|
||||
|
||||
@@ -48,18 +57,13 @@ Aurora::Matrix Recon::startReflectionReconstruction( Parser* aParser, int aSAFT_
|
||||
{
|
||||
aPreComputes.sincPeak_ft = determineOptimalPulse(aPreComputes.timeInterval, aExpInfo.expectedAScanLength);
|
||||
}
|
||||
printf(" - channel list");
|
||||
auto channelList = precalculateChannelList(aRlList, aRnList, aExpInfo, aPreComputes);
|
||||
size_t numScans = aMotorPos.getDataSize() * aSlList.getDataSize() *
|
||||
aSnList.getDataSize() * aRlList.getDataSize() *
|
||||
aRnList.getDataSize();
|
||||
printf(" - blocking");
|
||||
Matrix Env = Aurora::zeros((int)reflectParams::imageXYZ[0],(int)reflectParams::imageXYZ[1],(int)reflectParams::imageXYZ[2]);
|
||||
|
||||
int numTakenScans = 0,numProcessedScans = 0,numPossibleScans = 0;
|
||||
for(int i=0; i<aMotorPos.getDataSize(); ++i)
|
||||
{
|
||||
//#pragma omp parallel for num_threads(24)
|
||||
for(int j=0; j<aSlList.getDataSize() / transParams::senderTASSize; ++j)
|
||||
{
|
||||
for(int k=0; k<aSnList.getDataSize() / transParams::senderElementSize; ++k)
|
||||
@@ -84,17 +88,61 @@ Aurora::Matrix Recon::startReflectionReconstruction( Parser* aParser, int aSAFT_
|
||||
auto preprocessData = preprocessAScanBlockForReflection(blockData.ascanBlockPreprocessed, blockData.mpBlock, blockData.slBlock,
|
||||
blockData.snBlock, blockData.rlBlock, blockData.rnBlock, blockData.senderPositionBlock,
|
||||
blockData.receiverPositionBlock, blockData.gainBlock, channelBlock, aExpInfo, aPreComputes);
|
||||
Env = recontructSAFT(removeDataFromArrays(preprocessData.AscanBlock, preprocessData.usedData),
|
||||
removeDataFromArrays(blockData.senderPositionBlock, preprocessData.usedData),
|
||||
removeDataFromArrays(blockData.receiverPositionBlock, preprocessData.usedData),
|
||||
removeDataFromArrays(blockData.mpBlock, preprocessData.usedData),
|
||||
|
||||
std::unique_lock<std::mutex> lock(PRODUCER_MUTEX);
|
||||
PRODUCER_BLOCKDATAS.push(blockData);
|
||||
PRODUCER_PROCESSDATAS.push(preprocessData);
|
||||
lock.unlock();
|
||||
PRODUCER_CONDITION.notify_one();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Aurora::Matrix Recon::startReflectionReconstruction( Parser* aParser, int aSAFT_mode, const Aurora::Matrix& aMotorPos,
|
||||
const Aurora::Matrix& aSlList, const Aurora::Matrix& aSnList,
|
||||
const Aurora::Matrix& aRlList, const Aurora::Matrix& aRnList,
|
||||
GeometryInfo& aGeom, TransRecos& aTransRecos,
|
||||
MeasurementInfo& aExpInfo, PreComputes& aPreComputes)
|
||||
{
|
||||
for (size_t i = 0; i < reflectParams::gpuSelectionList.getDataSize(); i++)
|
||||
{
|
||||
std::string msg;
|
||||
if (!resetGPUDevice((int)reflectParams::gpuSelectionList[i],msg))
|
||||
{
|
||||
std::cerr<<msg<<std::endl;
|
||||
}
|
||||
}
|
||||
std::thread thread = std::thread(producerThread, aParser, aMotorPos, aSlList, aSnList, aRlList, aRnList, aGeom, aExpInfo, aPreComputes);
|
||||
|
||||
Matrix Env = Aurora::zeros((int)reflectParams::imageXYZ[0],(int)reflectParams::imageXYZ[1],(int)reflectParams::imageXYZ[2]);
|
||||
|
||||
for(int i=0; i<aMotorPos.getDataSize(); ++i)
|
||||
{
|
||||
//#pragma omp parallel for num_threads(24)
|
||||
for(int j=0; j<aSlList.getDataSize() / transParams::senderTASSize; ++j)
|
||||
{
|
||||
for(int k=0; k<aSnList.getDataSize() / transParams::senderElementSize; ++k)
|
||||
{
|
||||
std::unique_lock<std::mutex> lock(PRODUCER_MUTEX);
|
||||
PRODUCER_CONDITION.wait(lock, []{return !PRODUCER_PROCESSDATAS.empty() && !PRODUCER_BLOCKDATAS.empty();});
|
||||
lock.unlock();
|
||||
Env = recontructSAFT(removeDataFromArrays(PRODUCER_PROCESSDATAS.front().AscanBlock, PRODUCER_PROCESSDATAS.front().usedData),
|
||||
removeDataFromArrays(PRODUCER_BLOCKDATAS.front().senderPositionBlock, PRODUCER_PROCESSDATAS.front().usedData),
|
||||
removeDataFromArrays(PRODUCER_BLOCKDATAS.front().receiverPositionBlock, PRODUCER_PROCESSDATAS.front().usedData),
|
||||
removeDataFromArrays(PRODUCER_BLOCKDATAS.front().mpBlock, PRODUCER_PROCESSDATAS.front().usedData),
|
||||
aSAFT_mode, aTransRecos, Env);
|
||||
lock.lock();
|
||||
PRODUCER_PROCESSDATAS.pop();
|
||||
PRODUCER_BLOCKDATAS.pop();
|
||||
lock.unlock();
|
||||
std::cout<<Env[0]<<"-" << Env[1] <<"-" << Env[2] <<"-" << Env[3]<<std::endl;
|
||||
RECON_INFO("Reflection Reconstructon: " + std::to_string(j));
|
||||
}
|
||||
Recon::notifyProgress(25+73*((j*i)/(aMotorPos.getDataSize() * aSlList.getDataSize())));
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
thread.join();
|
||||
return Env;
|
||||
}
|
||||
Reference in New Issue
Block a user