feat: Add Profile log to SAFT
This commit is contained in:
@@ -340,8 +340,10 @@ void multithreaded_processing(float *aScan_ptr, ///< AScan-Daten
|
||||
SPDLOG_INFO("Start GPU execute!");
|
||||
|
||||
for (j = 0; j < num_devices_factor; j++)
|
||||
// for (j = 0; j < 1; j++)
|
||||
{
|
||||
for (k = 0; k < selectedNumberGPUs; k++)
|
||||
// for (k = 0; k < 1; k++)
|
||||
{
|
||||
// new async threads
|
||||
futures[k] = std::async(std::forward<std::function<void(void *)>>(thread_function),
|
||||
@@ -350,6 +352,7 @@ void multithreaded_processing(float *aScan_ptr, ///< AScan-Daten
|
||||
|
||||
// Synchronization and termination -------------------------------------------------------------------------------------------------------------------
|
||||
for (k = 0; k < selectedNumberGPUs; k++)
|
||||
// for (k = 0; k < 1; k++)
|
||||
{
|
||||
// new async threads
|
||||
futures[k].wait(); // advantage: async are packaged tasks after c++ with os handling, and consistency handling (if destructor is called, it executes task)
|
||||
|
||||
@@ -2,6 +2,9 @@
|
||||
#include "rayTracing.cuh"
|
||||
#include "saftKernel.cuh"
|
||||
#include "saft.hpp"
|
||||
// #include "spdlog/spdlog.h"
|
||||
|
||||
#define SPDLOG_INFO(...)
|
||||
|
||||
void SAFTHandler::precalculateAscanIndex_usePaths(int ascanIndex_i, int aScanWindowSize, int currentSpeedOfSoundZLayer, int maxFeasibleSosZLayerCount)
|
||||
{
|
||||
@@ -546,6 +549,7 @@ void SAFTHandler::performSAFT(
|
||||
CUDA_CHECK(cudaBindTextureToArray(&texTableAscanIndexFloat2_3, deviceTextureAscanIndexFloatCuArray[3], &texChannelDescTableAscanIndexFloat));
|
||||
}
|
||||
}
|
||||
SPDLOG_INFO("performSAFT init end");
|
||||
// Vorberechnung der Koordinaten --> schnelleres Bestimmen der Voxelposition
|
||||
float VoxelIncrement = IMAGE_RESOLUTION / SOS_RESOLUTION;
|
||||
float3 SosVoxelStartPosition;
|
||||
@@ -564,6 +568,10 @@ void SAFTHandler::performSAFT(
|
||||
// ==================================================== Blockmode with SOS-value per Ascan
|
||||
// ==================================================== 3DVolume Mode with SOS-Correction no ATT-Correction
|
||||
// ==================================================== 3DVolume Mode with SOS- and ATT-Correction
|
||||
SPDLOG_INFO("perform kernel start");
|
||||
SPDLOG_INFO("SosVoxelStartPosition {0} {1} {2}",SosVoxelStartPosition.x,SosVoxelStartPosition.y,SosVoxelStartPosition.z);
|
||||
SPDLOG_INFO("TableAscanIndexAllocationCount = ({0}/{1}) = {2} = {3}", aScanWindowSize, maxAscanIndexArraysInTexture, TableAscanIndexAllocationCount,
|
||||
ceil(aScanWindowSize/maxAscanIndexArraysInTexture));
|
||||
|
||||
if ((SOSMode_3DVolume == false) && (ATTMode_3DVolume == false))
|
||||
{ // ==================================================== Blockmode with SOS-value per Ascan
|
||||
@@ -571,6 +579,7 @@ void SAFTHandler::performSAFT(
|
||||
}
|
||||
else if ((SOSMode_3DVolume == true) && (ATTMode_3DVolume == false))
|
||||
{
|
||||
SPDLOG_INFO("perform kernel saftKernelAscanIndex_SOS");
|
||||
// ==================================================== 3DVolume Mode with SOS-Correction no ATT-Correction
|
||||
saftKernelAscanIndex_SOS<<<reducedGridDimensions, reducedBlockDimensions>>>( // , 0, stream>>>(
|
||||
|
||||
@@ -580,10 +589,12 @@ void SAFTHandler::performSAFT(
|
||||
maxAscanIndexArraysInTexture, // maxSoSReceiverArrayForTexture
|
||||
TableAscanIndexAllocationCount, // Anzahl der genutzten Teiltabellen
|
||||
IMAGE_SIZE_XYZ, SosVoxelStartPosition, IMAGE_RESOLUTION, VoxelIncrement, blockIndexOffset, speedOfSoundZLayer, gridDimensions, blockDimensions, deviceOutput);
|
||||
SPDLOG_INFO("perform kernel saftKernelAscanIndex_SOS finish");
|
||||
|
||||
}
|
||||
else if ((SOSMode_3DVolume == true) && (ATTMode_3DVolume == true))
|
||||
{ // ==================================================== 3DVolume Mode with SOS- and ATT-Correction
|
||||
|
||||
SPDLOG_INFO("perform kernel saftKernelAscanIndex_SOS_ATT, grid:{0},{1},{2}",reducedGridDimensions.x,reducedGridDimensions.y, reducedGridDimensions.z);
|
||||
saftKernelAscanIndex_SOS_ATT<<<reducedGridDimensions, reducedBlockDimensions>>>( // , 0, stream>>>(
|
||||
aScanIndex,
|
||||
(float)aScanWindowSize, // maxAscanIndexArraysInTexture
|
||||
@@ -595,7 +606,11 @@ void SAFTHandler::performSAFT(
|
||||
deviceOutput
|
||||
|
||||
);
|
||||
SPDLOG_INFO("perform kernel saftKernelAscanIndex_SOS_ATT finish");
|
||||
}
|
||||
SPDLOG_INFO("perform kernel finish");
|
||||
|
||||
SPDLOG_INFO("perform unbind start");
|
||||
|
||||
// Unbind Textures
|
||||
CUDA_CHECK(cudaUnbindTexture(&texRefAscans));
|
||||
@@ -615,4 +630,5 @@ void SAFTHandler::performSAFT(
|
||||
CUDA_CHECK(cudaUnbindTexture(&texTableAscanIndexFloat2_3));
|
||||
}
|
||||
CUDA_CHECK(cudaGetLastError());
|
||||
SPDLOG_INFO("perform unbind finish");
|
||||
}
|
||||
@@ -9,6 +9,8 @@
|
||||
#include <iostream>
|
||||
|
||||
#include "saft.hpp"
|
||||
// #include "spdlog/spdlog.h"
|
||||
#define SPDLOG_INFO(...)
|
||||
|
||||
extern float3 *constEmitterPtr;
|
||||
extern float3 *constReceiverPtr;
|
||||
@@ -24,6 +26,7 @@ extern unsigned short *constLookUpGeometryMemoryListReceiverPtr;
|
||||
*/
|
||||
void SAFTHandler::performCoreReconstruction()
|
||||
{
|
||||
// SPDLOG_INFO("performCoreReconstruction start!");
|
||||
// Mitlaufender Zeiger fuer Speicherbereich der Outputdaten uebergeben.
|
||||
double *currentHostOutputAdress = output; // Offset des OutputSpeichers am Anfang = 0
|
||||
CUDA_CHECK(cudaGetLastError());
|
||||
@@ -76,7 +79,7 @@ void SAFTHandler::performCoreReconstruction()
|
||||
}
|
||||
|
||||
currentZLayerCount = maxFeasibleZLayerCount; // Mit maximal moeglicher Anzahl an Z-Layer rechnen und in Schleife abhaengig von Randbedingungen auf aktuelle maximale Anzahl verringern
|
||||
|
||||
SPDLOG_INFO("main loop start!GPUMemoryFree:{0}",memoryGPUfreeInGiga());
|
||||
// performCoreReconstruction - While-Loop over all Z-Layers~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
while ((zOffset < IMAGE_SIZE_XYZ.z))
|
||||
@@ -129,12 +132,18 @@ void SAFTHandler::performCoreReconstruction()
|
||||
zSoSOffset_old = zSoSOffset;
|
||||
// SoS-Pfade für Z-Layer vorberechnen ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
SPDLOG_INFO("precalculateAverageSpeedOfSound Emitter start!GPUMemoryFree:{0}",memoryGPUfreeInGiga());
|
||||
|
||||
// Emitter ----------------------------------------------------------------------- Emitter
|
||||
precalculateAverageSpeedOfSound(currentSpeedOfSoundZLayer, maxFeasibleSosZLayerCount, 0, emitter_list_Size);
|
||||
SPDLOG_INFO("precalculateAverageSpeedOfSound Emitter finish!GPUMemoryFree:{0}",memoryGPUfreeInGiga());
|
||||
|
||||
SPDLOG_INFO("precalculateAverageSpeedOfSound Receiver start!GPUMemoryFree:{0}",memoryGPUfreeInGiga());
|
||||
// Receiver ----------------------------------------------------------------------- Receiver
|
||||
|
||||
precalculateAverageSpeedOfSound(currentSpeedOfSoundZLayer, maxFeasibleSosZLayerCount, 1, receiver_list_Size);
|
||||
SPDLOG_INFO("precalculateAverageSpeedOfSound finish!GPUMemoryFree:{0}",memoryGPUfreeInGiga());
|
||||
|
||||
}
|
||||
// Go overall A-Scanblocks ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
// PrecalAndPerformSAFTAllAscans - Berechne Performanz fuer Durchlauf von allen Ascans ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
@@ -160,15 +169,27 @@ void SAFTHandler::performCoreReconstruction()
|
||||
// neededAscanBatchCount = Anzahl an benoetigten Durchlaeufen des SAFTs um alle Ascans abarbeiten zu koennen
|
||||
|
||||
// AscanIndex Emitter -> Voxel --> Receiver vorberechnen fuer AscansBatchSize
|
||||
precalculateAscanIndex_usePaths(ascanIndexBatchOffset, aScanWindowSize, currentSpeedOfSoundZLayer, maxFeasibleSosZLayerCount); //, deviceTextureAscanIndexFloatCuArray);
|
||||
SPDLOG_INFO("precalculateAscanIndex_usePaths start!GPUMemoryFree:{0}",memoryGPUfreeInGiga());
|
||||
|
||||
precalculateAscanIndex_usePaths(ascanIndexBatchOffset, aScanWindowSize, currentSpeedOfSoundZLayer, maxFeasibleSosZLayerCount); //, deviceTextureAscanIndexFloatCuArray);
|
||||
SPDLOG_INFO("precalculateAscanIndex_usePaths finish!GPUMemoryFree:{0}",memoryGPUfreeInGiga());
|
||||
// if (copyed){
|
||||
// // SPDLOG_INFO("cudaStreamSynchronize start!");
|
||||
// cudaStreamSynchronize(stream);
|
||||
// // SPDLOG_INFO("cudaStreamSynchronize finish!");
|
||||
|
||||
// }
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
if (currentZLayerCount > 0) // bugprevention because windows and linux would stop execution for a bug - Issue 100
|
||||
{
|
||||
SPDLOG_INFO("performSAFT start!GPUMemoryFree:{0}",memoryGPUfreeInGiga());
|
||||
performSAFT(ascanIndexBatchOffset, aScanWindowSize, IMAGE_SIZE_XYZ, SOSGrid_XYZ, blockIndexOffset, static_cast<int>(currentOutputZLayerVoxelCount),
|
||||
static_cast<int>(currentSpeedOfSoundZLayer), static_cast<int>(partialSpeedOfSoundVoxelCount), static_cast<int>(maxFeasibleSosZLayerCount),
|
||||
static_cast<int>(currentEmIndexUsedForAscanIndexCalculation), windowGridDimensions, genericSAFTGridDimensions, genericSAFTBlockDimensions, deviceSpeedOfSoundField,
|
||||
deviceAScansCuArray[0]);
|
||||
SPDLOG_INFO("performSAFT finish!GPUMemoryFree:{0}",memoryGPUfreeInGiga());
|
||||
}
|
||||
// , calculationStream);
|
||||
ascanIndexBatchOffset += aScanWindowSize;
|
||||
|
||||
@@ -212,6 +233,7 @@ void SAFTHandler::performCoreReconstruction()
|
||||
currentZLayerCount = maxFeasibleZLayerCount;
|
||||
}; // End While-Loop over all Z-Layer
|
||||
// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
SPDLOG_INFO("main loop finish!GPUMemoryFree:{0}",memoryGPUfreeInGiga());
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -223,6 +223,9 @@ std::size_t memoryGPUfree()
|
||||
CUDA_CHECK(cudaMemGetInfo(&freeMemory, &totalMemory));
|
||||
return freeMemory;
|
||||
}
|
||||
float memoryGPUfreeInGiga(){
|
||||
return (float)memoryGPUfree()/(1024.f*1025.f*1024.f);
|
||||
}
|
||||
|
||||
/**
|
||||
Determine free memory available on the current device.
|
||||
|
||||
@@ -397,6 +397,7 @@ class SAFTHandler
|
||||
|
||||
extern void memoryCheck();
|
||||
extern std::size_t memoryGPUfree();
|
||||
extern float memoryGPUfreeInGiga();
|
||||
extern std::size_t memoryGPUtotal();
|
||||
|
||||
extern void performCUDAResultCheck(cudaError_t result, std::string const &file, int line);
|
||||
|
||||
Reference in New Issue
Block a user