Profile SAFT

This commit is contained in:
kradchen
2023-10-11 13:51:50 +08:00
parent b527b108e1
commit d63f255db2
107 changed files with 25577 additions and 11 deletions

View File

@@ -20,6 +20,9 @@
#include <array>
#include <chrono>
#include <spdlog/spdlog.h>
#include "spdlog/sinks/stdout_color_sinks.h"
// TODO: Blockgroesse (z > 1) fuehrt zu Kernelabbruechen
//pthread handle
@@ -231,7 +234,6 @@ void multithreaded_processing(
#ifdef debug_OutputFunctions
printf( "==> multithreaded_processing - Start\n");
#endif
dim3 fixedBlockDimensions( // convert int3 to dim3
BlockDim_XYZ.x,
BlockDim_XYZ.y,
@@ -450,6 +452,8 @@ void multithreaded_processing(
std::vector<std::future<void>> futures;
futures.resize(selectedNumberGPUs);
SPDLOG_INFO("Start GPU execute!");
for ( j = 0; j < num_devices_factor; j++ ) {
@@ -483,6 +487,7 @@ void multithreaded_processing(
}
//gettimeofday(&stopAllThreads, NULL);
SPDLOG_INFO("GPU execute finish!");
auto stopAllThreads = std::chrono::steady_clock::now();
diff_time = std::chrono::duration_cast<std::chrono::microseconds>(stopAllThreads - startAllThreads).count(); // total duration in µs
@@ -869,7 +874,14 @@ Matrix_t SAFT_TOFI(std::vector<Matrix_t>& params){
#ifdef debug_OutputFormat_German
setlocale(LC_NUMERIC, "de_DE"); // German Format , instead . for numbers
#endif
auto console_sink = std::make_shared<spdlog::sinks::stdout_color_sink_mt>();
console_sink->set_level(spdlog::level::info);
console_sink->set_pattern(fmt::format("[%Y-%m-%d %T .%f][{}] [%^%l%$] %v", "SAFT"));
std::shared_ptr<spdlog::logger> logger(new spdlog::logger("SAFT", {console_sink}));
logger->set_level(spdlog::level::info);
logger->flush_on(spdlog::level::info);
SPDLOG_INFO("Start SAFT!");
size_t AScan_Nx, AScan_Mx,
pix_vect_Nx, pix_vect_Mx,
receiver_index_Nx, receiver_index_Mx,
@@ -1566,7 +1578,7 @@ Matrix_t SAFT_TOFI(std::vector<Matrix_t>& params){
// }
uint64_t IMAGE_SUM_Count = GetNumberOfElements(IMAGE_SUM);
double *IMAGE_SUM_vec_ptr = (double*)GetPr(IMAGE_SUM);
float *IMAGE_SUM_vec_ptr = (float*)GetPr(IMAGE_SUM);
#ifdef debug_OutputVariables
printf( " -> IMAGE_SUM: %i = [%f %f %f]\n",0 , IMAGE_SUM_vec_ptr[0], IMAGE_SUM_vec_ptr[1], IMAGE_SUM_vec_ptr[2]);
@@ -1740,9 +1752,9 @@ Matrix_t SAFT_TOFI(std::vector<Matrix_t>& params){
Output_Voxels.Dims[0] = dims[0];
Output_Voxels.Dims[1] = dims[1];
Output_Voxels.Dims[2] = dims[2];
Output_Voxels.Data = new double[dims[0]*dims[1]*(dims[2]?dims[2]:1)];
Output_Voxels.Data = new float[dims[0]*dims[1]*(dims[2]?dims[2]:1)];
double *Output_Voxels_ptr = (double*)GetPr(Output_Voxels);
double *Output_Voxels_ptr = new double[dims[0]*dims[1]*(dims[2]?dims[2]:1)];;
// ~~~~ Create Pointer to return value from Duration of Kernel
@@ -1784,6 +1796,7 @@ Matrix_t SAFT_TOFI(std::vector<Matrix_t>& params){
printf( "=================================================================================================\n\n"); // End of Outputparameter
#endif
SPDLOG_INFO("preintegrateAscans!");
//================================================================================================================ Preintegrate Ascans
#ifdef preAscanIntegrationToMatchSamplerateToResolution // Preintegrate Ascans for matching of Samplerate and Resolution
@@ -1839,6 +1852,7 @@ Matrix_t SAFT_TOFI(std::vector<Matrix_t>& params){
struct timeval startMultithreadProcessing, stopMultithreadProcessing;
gettimeofday(&startMultithreadProcessing, NULL);
#endif
SPDLOG_INFO("multithreaded_processing!");
multithreaded_processing( aScan_ptr,
Output_Voxels_ptr,
@@ -1908,10 +1922,12 @@ Matrix_t SAFT_TOFI(std::vector<Matrix_t>& params){
struct timeval startSumIMAGE_SUM, stopSumIMAGE_SUM;
gettimeofday(&startSumIMAGE_SUM, NULL);
#endif
SPDLOG_INFO("multithreaded_processing finish!");
float* outData = (float*)Output_Voxels.Data;
for(uint64_t i=0; i < IMAGE_SUM_Count; i++)
{
Output_Voxels_ptr[i] += IMAGE_SUM_vec_ptr[i];
outData[i]= Output_Voxels_ptr[i] + IMAGE_SUM_vec_ptr[i];
}
#ifdef debug_OutputHostStepsPerformance
@@ -1942,6 +1958,8 @@ Matrix_t SAFT_TOFI(std::vector<Matrix_t>& params){
#endif
delete [] AscansOut_ptr;
delete [] Duration_ptr;
delete [] Output_Voxels_ptr;
SPDLOG_INFO("SAFT finish!");
return Output_Voxels;
}