feat: Add Profile log to SAFT
This commit is contained in:
@@ -340,8 +340,10 @@ void multithreaded_processing(float *aScan_ptr, ///< AScan-Daten
|
|||||||
SPDLOG_INFO("Start GPU execute!");
|
SPDLOG_INFO("Start GPU execute!");
|
||||||
|
|
||||||
for (j = 0; j < num_devices_factor; j++)
|
for (j = 0; j < num_devices_factor; j++)
|
||||||
|
// for (j = 0; j < 1; j++)
|
||||||
{
|
{
|
||||||
for (k = 0; k < selectedNumberGPUs; k++)
|
for (k = 0; k < selectedNumberGPUs; k++)
|
||||||
|
// for (k = 0; k < 1; k++)
|
||||||
{
|
{
|
||||||
// new async threads
|
// new async threads
|
||||||
futures[k] = std::async(std::forward<std::function<void(void *)>>(thread_function),
|
futures[k] = std::async(std::forward<std::function<void(void *)>>(thread_function),
|
||||||
@@ -350,6 +352,7 @@ void multithreaded_processing(float *aScan_ptr, ///< AScan-Daten
|
|||||||
|
|
||||||
// Synchronization and termination -------------------------------------------------------------------------------------------------------------------
|
// Synchronization and termination -------------------------------------------------------------------------------------------------------------------
|
||||||
for (k = 0; k < selectedNumberGPUs; k++)
|
for (k = 0; k < selectedNumberGPUs; k++)
|
||||||
|
// for (k = 0; k < 1; k++)
|
||||||
{
|
{
|
||||||
// new async threads
|
// new async threads
|
||||||
futures[k].wait(); // advantage: async are packaged tasks after c++ with os handling, and consistency handling (if destructor is called, it executes task)
|
futures[k].wait(); // advantage: async are packaged tasks after c++ with os handling, and consistency handling (if destructor is called, it executes task)
|
||||||
|
|||||||
@@ -2,6 +2,9 @@
|
|||||||
#include "rayTracing.cuh"
|
#include "rayTracing.cuh"
|
||||||
#include "saftKernel.cuh"
|
#include "saftKernel.cuh"
|
||||||
#include "saft.hpp"
|
#include "saft.hpp"
|
||||||
|
// #include "spdlog/spdlog.h"
|
||||||
|
|
||||||
|
#define SPDLOG_INFO(...)
|
||||||
|
|
||||||
void SAFTHandler::precalculateAscanIndex_usePaths(int ascanIndex_i, int aScanWindowSize, int currentSpeedOfSoundZLayer, int maxFeasibleSosZLayerCount)
|
void SAFTHandler::precalculateAscanIndex_usePaths(int ascanIndex_i, int aScanWindowSize, int currentSpeedOfSoundZLayer, int maxFeasibleSosZLayerCount)
|
||||||
{
|
{
|
||||||
@@ -546,6 +549,7 @@ void SAFTHandler::performSAFT(
|
|||||||
CUDA_CHECK(cudaBindTextureToArray(&texTableAscanIndexFloat2_3, deviceTextureAscanIndexFloatCuArray[3], &texChannelDescTableAscanIndexFloat));
|
CUDA_CHECK(cudaBindTextureToArray(&texTableAscanIndexFloat2_3, deviceTextureAscanIndexFloatCuArray[3], &texChannelDescTableAscanIndexFloat));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
SPDLOG_INFO("performSAFT init end");
|
||||||
// Vorberechnung der Koordinaten --> schnelleres Bestimmen der Voxelposition
|
// Vorberechnung der Koordinaten --> schnelleres Bestimmen der Voxelposition
|
||||||
float VoxelIncrement = IMAGE_RESOLUTION / SOS_RESOLUTION;
|
float VoxelIncrement = IMAGE_RESOLUTION / SOS_RESOLUTION;
|
||||||
float3 SosVoxelStartPosition;
|
float3 SosVoxelStartPosition;
|
||||||
@@ -564,6 +568,10 @@ void SAFTHandler::performSAFT(
|
|||||||
// ==================================================== Blockmode with SOS-value per Ascan
|
// ==================================================== Blockmode with SOS-value per Ascan
|
||||||
// ==================================================== 3DVolume Mode with SOS-Correction no ATT-Correction
|
// ==================================================== 3DVolume Mode with SOS-Correction no ATT-Correction
|
||||||
// ==================================================== 3DVolume Mode with SOS- and ATT-Correction
|
// ==================================================== 3DVolume Mode with SOS- and ATT-Correction
|
||||||
|
SPDLOG_INFO("perform kernel start");
|
||||||
|
SPDLOG_INFO("SosVoxelStartPosition {0} {1} {2}",SosVoxelStartPosition.x,SosVoxelStartPosition.y,SosVoxelStartPosition.z);
|
||||||
|
SPDLOG_INFO("TableAscanIndexAllocationCount = ({0}/{1}) = {2} = {3}", aScanWindowSize, maxAscanIndexArraysInTexture, TableAscanIndexAllocationCount,
|
||||||
|
ceil(aScanWindowSize/maxAscanIndexArraysInTexture));
|
||||||
|
|
||||||
if ((SOSMode_3DVolume == false) && (ATTMode_3DVolume == false))
|
if ((SOSMode_3DVolume == false) && (ATTMode_3DVolume == false))
|
||||||
{ // ==================================================== Blockmode with SOS-value per Ascan
|
{ // ==================================================== Blockmode with SOS-value per Ascan
|
||||||
@@ -571,6 +579,7 @@ void SAFTHandler::performSAFT(
|
|||||||
}
|
}
|
||||||
else if ((SOSMode_3DVolume == true) && (ATTMode_3DVolume == false))
|
else if ((SOSMode_3DVolume == true) && (ATTMode_3DVolume == false))
|
||||||
{
|
{
|
||||||
|
SPDLOG_INFO("perform kernel saftKernelAscanIndex_SOS");
|
||||||
// ==================================================== 3DVolume Mode with SOS-Correction no ATT-Correction
|
// ==================================================== 3DVolume Mode with SOS-Correction no ATT-Correction
|
||||||
saftKernelAscanIndex_SOS<<<reducedGridDimensions, reducedBlockDimensions>>>( // , 0, stream>>>(
|
saftKernelAscanIndex_SOS<<<reducedGridDimensions, reducedBlockDimensions>>>( // , 0, stream>>>(
|
||||||
|
|
||||||
@@ -579,11 +588,13 @@ void SAFTHandler::performSAFT(
|
|||||||
(float)aScanWindowSize, // maxAscanIndexArraysInTexture
|
(float)aScanWindowSize, // maxAscanIndexArraysInTexture
|
||||||
maxAscanIndexArraysInTexture, // maxSoSReceiverArrayForTexture
|
maxAscanIndexArraysInTexture, // maxSoSReceiverArrayForTexture
|
||||||
TableAscanIndexAllocationCount, // Anzahl der genutzten Teiltabellen
|
TableAscanIndexAllocationCount, // Anzahl der genutzten Teiltabellen
|
||||||
IMAGE_SIZE_XYZ, SosVoxelStartPosition, IMAGE_RESOLUTION, VoxelIncrement, blockIndexOffset, speedOfSoundZLayer, gridDimensions, blockDimensions, deviceOutput);
|
IMAGE_SIZE_XYZ, SosVoxelStartPosition, IMAGE_RESOLUTION, VoxelIncrement, blockIndexOffset, speedOfSoundZLayer, gridDimensions, blockDimensions, deviceOutput);
|
||||||
|
SPDLOG_INFO("perform kernel saftKernelAscanIndex_SOS finish");
|
||||||
|
|
||||||
}
|
}
|
||||||
else if ((SOSMode_3DVolume == true) && (ATTMode_3DVolume == true))
|
else if ((SOSMode_3DVolume == true) && (ATTMode_3DVolume == true))
|
||||||
{ // ==================================================== 3DVolume Mode with SOS- and ATT-Correction
|
{ // ==================================================== 3DVolume Mode with SOS- and ATT-Correction
|
||||||
|
SPDLOG_INFO("perform kernel saftKernelAscanIndex_SOS_ATT, grid:{0},{1},{2}",reducedGridDimensions.x,reducedGridDimensions.y, reducedGridDimensions.z);
|
||||||
saftKernelAscanIndex_SOS_ATT<<<reducedGridDimensions, reducedBlockDimensions>>>( // , 0, stream>>>(
|
saftKernelAscanIndex_SOS_ATT<<<reducedGridDimensions, reducedBlockDimensions>>>( // , 0, stream>>>(
|
||||||
aScanIndex,
|
aScanIndex,
|
||||||
(float)aScanWindowSize, // maxAscanIndexArraysInTexture
|
(float)aScanWindowSize, // maxAscanIndexArraysInTexture
|
||||||
@@ -595,7 +606,11 @@ void SAFTHandler::performSAFT(
|
|||||||
deviceOutput
|
deviceOutput
|
||||||
|
|
||||||
);
|
);
|
||||||
|
SPDLOG_INFO("perform kernel saftKernelAscanIndex_SOS_ATT finish");
|
||||||
}
|
}
|
||||||
|
SPDLOG_INFO("perform kernel finish");
|
||||||
|
|
||||||
|
SPDLOG_INFO("perform unbind start");
|
||||||
|
|
||||||
// Unbind Textures
|
// Unbind Textures
|
||||||
CUDA_CHECK(cudaUnbindTexture(&texRefAscans));
|
CUDA_CHECK(cudaUnbindTexture(&texRefAscans));
|
||||||
@@ -615,4 +630,5 @@ void SAFTHandler::performSAFT(
|
|||||||
CUDA_CHECK(cudaUnbindTexture(&texTableAscanIndexFloat2_3));
|
CUDA_CHECK(cudaUnbindTexture(&texTableAscanIndexFloat2_3));
|
||||||
}
|
}
|
||||||
CUDA_CHECK(cudaGetLastError());
|
CUDA_CHECK(cudaGetLastError());
|
||||||
|
SPDLOG_INFO("perform unbind finish");
|
||||||
}
|
}
|
||||||
@@ -9,6 +9,8 @@
|
|||||||
#include <iostream>
|
#include <iostream>
|
||||||
|
|
||||||
#include "saft.hpp"
|
#include "saft.hpp"
|
||||||
|
// #include "spdlog/spdlog.h"
|
||||||
|
#define SPDLOG_INFO(...)
|
||||||
|
|
||||||
extern float3 *constEmitterPtr;
|
extern float3 *constEmitterPtr;
|
||||||
extern float3 *constReceiverPtr;
|
extern float3 *constReceiverPtr;
|
||||||
@@ -24,6 +26,7 @@ extern unsigned short *constLookUpGeometryMemoryListReceiverPtr;
|
|||||||
*/
|
*/
|
||||||
void SAFTHandler::performCoreReconstruction()
|
void SAFTHandler::performCoreReconstruction()
|
||||||
{
|
{
|
||||||
|
// SPDLOG_INFO("performCoreReconstruction start!");
|
||||||
// Mitlaufender Zeiger fuer Speicherbereich der Outputdaten uebergeben.
|
// Mitlaufender Zeiger fuer Speicherbereich der Outputdaten uebergeben.
|
||||||
double *currentHostOutputAdress = output; // Offset des OutputSpeichers am Anfang = 0
|
double *currentHostOutputAdress = output; // Offset des OutputSpeichers am Anfang = 0
|
||||||
CUDA_CHECK(cudaGetLastError());
|
CUDA_CHECK(cudaGetLastError());
|
||||||
@@ -76,7 +79,7 @@ void SAFTHandler::performCoreReconstruction()
|
|||||||
}
|
}
|
||||||
|
|
||||||
currentZLayerCount = maxFeasibleZLayerCount; // Mit maximal moeglicher Anzahl an Z-Layer rechnen und in Schleife abhaengig von Randbedingungen auf aktuelle maximale Anzahl verringern
|
currentZLayerCount = maxFeasibleZLayerCount; // Mit maximal moeglicher Anzahl an Z-Layer rechnen und in Schleife abhaengig von Randbedingungen auf aktuelle maximale Anzahl verringern
|
||||||
|
SPDLOG_INFO("main loop start!GPUMemoryFree:{0}",memoryGPUfreeInGiga());
|
||||||
// performCoreReconstruction - While-Loop over all Z-Layers~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
// performCoreReconstruction - While-Loop over all Z-Layers~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
while ((zOffset < IMAGE_SIZE_XYZ.z))
|
while ((zOffset < IMAGE_SIZE_XYZ.z))
|
||||||
@@ -129,19 +132,25 @@ void SAFTHandler::performCoreReconstruction()
|
|||||||
zSoSOffset_old = zSoSOffset;
|
zSoSOffset_old = zSoSOffset;
|
||||||
// SoS-Pfade für Z-Layer vorberechnen ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
// SoS-Pfade für Z-Layer vorberechnen ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
|
SPDLOG_INFO("precalculateAverageSpeedOfSound Emitter start!GPUMemoryFree:{0}",memoryGPUfreeInGiga());
|
||||||
|
|
||||||
// Emitter ----------------------------------------------------------------------- Emitter
|
// Emitter ----------------------------------------------------------------------- Emitter
|
||||||
precalculateAverageSpeedOfSound(currentSpeedOfSoundZLayer, maxFeasibleSosZLayerCount, 0, emitter_list_Size);
|
precalculateAverageSpeedOfSound(currentSpeedOfSoundZLayer, maxFeasibleSosZLayerCount, 0, emitter_list_Size);
|
||||||
|
SPDLOG_INFO("precalculateAverageSpeedOfSound Emitter finish!GPUMemoryFree:{0}",memoryGPUfreeInGiga());
|
||||||
|
|
||||||
|
SPDLOG_INFO("precalculateAverageSpeedOfSound Receiver start!GPUMemoryFree:{0}",memoryGPUfreeInGiga());
|
||||||
// Receiver ----------------------------------------------------------------------- Receiver
|
// Receiver ----------------------------------------------------------------------- Receiver
|
||||||
|
|
||||||
precalculateAverageSpeedOfSound(currentSpeedOfSoundZLayer, maxFeasibleSosZLayerCount, 1, receiver_list_Size);
|
precalculateAverageSpeedOfSound(currentSpeedOfSoundZLayer, maxFeasibleSosZLayerCount, 1, receiver_list_Size);
|
||||||
|
SPDLOG_INFO("precalculateAverageSpeedOfSound finish!GPUMemoryFree:{0}",memoryGPUfreeInGiga());
|
||||||
|
|
||||||
}
|
}
|
||||||
// Go overall A-Scanblocks ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
// Go overall A-Scanblocks ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
// PrecalAndPerformSAFTAllAscans - Berechne Performanz fuer Durchlauf von allen Ascans ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
// PrecalAndPerformSAFTAllAscans - Berechne Performanz fuer Durchlauf von allen Ascans ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
// aScanWindowSize auf maximal moegliche Anzahl setzen
|
// aScanWindowSize auf maximal moegliche Anzahl setzen
|
||||||
std::size_t aScanWindowSize = (maxAscanIndexArraysInTexture * maxSupportedTexturesForAscanIndex); // Anzahl maximaler Ascans die auf einmal verarbeitet werden kann.
|
std::size_t aScanWindowSize = (maxAscanIndexArraysInTexture * maxSupportedTexturesForAscanIndex); // Anzahl maximaler Ascans die auf einmal verarbeitet werden kann.
|
||||||
|
|
||||||
int ascanIndexBatchOffset = 0;
|
int ascanIndexBatchOffset = 0;
|
||||||
while (ascanIndexBatchOffset < aScanCount)
|
while (ascanIndexBatchOffset < aScanCount)
|
||||||
{ // Alle Emitter oder Receiver in der Liste von Matlab durchgehen
|
{ // Alle Emitter oder Receiver in der Liste von Matlab durchgehen
|
||||||
@@ -160,15 +169,27 @@ void SAFTHandler::performCoreReconstruction()
|
|||||||
// neededAscanBatchCount = Anzahl an benoetigten Durchlaeufen des SAFTs um alle Ascans abarbeiten zu koennen
|
// neededAscanBatchCount = Anzahl an benoetigten Durchlaeufen des SAFTs um alle Ascans abarbeiten zu koennen
|
||||||
|
|
||||||
// AscanIndex Emitter -> Voxel --> Receiver vorberechnen fuer AscansBatchSize
|
// AscanIndex Emitter -> Voxel --> Receiver vorberechnen fuer AscansBatchSize
|
||||||
precalculateAscanIndex_usePaths(ascanIndexBatchOffset, aScanWindowSize, currentSpeedOfSoundZLayer, maxFeasibleSosZLayerCount); //, deviceTextureAscanIndexFloatCuArray);
|
SPDLOG_INFO("precalculateAscanIndex_usePaths start!GPUMemoryFree:{0}",memoryGPUfreeInGiga());
|
||||||
|
|
||||||
|
precalculateAscanIndex_usePaths(ascanIndexBatchOffset, aScanWindowSize, currentSpeedOfSoundZLayer, maxFeasibleSosZLayerCount); //, deviceTextureAscanIndexFloatCuArray);
|
||||||
|
SPDLOG_INFO("precalculateAscanIndex_usePaths finish!GPUMemoryFree:{0}",memoryGPUfreeInGiga());
|
||||||
|
// if (copyed){
|
||||||
|
// // SPDLOG_INFO("cudaStreamSynchronize start!");
|
||||||
|
// cudaStreamSynchronize(stream);
|
||||||
|
// // SPDLOG_INFO("cudaStreamSynchronize finish!");
|
||||||
|
|
||||||
|
// }
|
||||||
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
if (currentZLayerCount > 0) // bugprevention because windows and linux would stop execution for a bug - Issue 100
|
if (currentZLayerCount > 0) // bugprevention because windows and linux would stop execution for a bug - Issue 100
|
||||||
|
{
|
||||||
|
SPDLOG_INFO("performSAFT start!GPUMemoryFree:{0}",memoryGPUfreeInGiga());
|
||||||
performSAFT(ascanIndexBatchOffset, aScanWindowSize, IMAGE_SIZE_XYZ, SOSGrid_XYZ, blockIndexOffset, static_cast<int>(currentOutputZLayerVoxelCount),
|
performSAFT(ascanIndexBatchOffset, aScanWindowSize, IMAGE_SIZE_XYZ, SOSGrid_XYZ, blockIndexOffset, static_cast<int>(currentOutputZLayerVoxelCount),
|
||||||
static_cast<int>(currentSpeedOfSoundZLayer), static_cast<int>(partialSpeedOfSoundVoxelCount), static_cast<int>(maxFeasibleSosZLayerCount),
|
static_cast<int>(currentSpeedOfSoundZLayer), static_cast<int>(partialSpeedOfSoundVoxelCount), static_cast<int>(maxFeasibleSosZLayerCount),
|
||||||
static_cast<int>(currentEmIndexUsedForAscanIndexCalculation), windowGridDimensions, genericSAFTGridDimensions, genericSAFTBlockDimensions, deviceSpeedOfSoundField,
|
static_cast<int>(currentEmIndexUsedForAscanIndexCalculation), windowGridDimensions, genericSAFTGridDimensions, genericSAFTBlockDimensions, deviceSpeedOfSoundField,
|
||||||
deviceAScansCuArray[0]);
|
deviceAScansCuArray[0]);
|
||||||
|
SPDLOG_INFO("performSAFT finish!GPUMemoryFree:{0}",memoryGPUfreeInGiga());
|
||||||
|
}
|
||||||
// , calculationStream);
|
// , calculationStream);
|
||||||
ascanIndexBatchOffset += aScanWindowSize;
|
ascanIndexBatchOffset += aScanWindowSize;
|
||||||
|
|
||||||
@@ -212,6 +233,7 @@ void SAFTHandler::performCoreReconstruction()
|
|||||||
currentZLayerCount = maxFeasibleZLayerCount;
|
currentZLayerCount = maxFeasibleZLayerCount;
|
||||||
}; // End While-Loop over all Z-Layer
|
}; // End While-Loop over all Z-Layer
|
||||||
// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
SPDLOG_INFO("main loop finish!GPUMemoryFree:{0}",memoryGPUfreeInGiga());
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|||||||
@@ -223,6 +223,9 @@ std::size_t memoryGPUfree()
|
|||||||
CUDA_CHECK(cudaMemGetInfo(&freeMemory, &totalMemory));
|
CUDA_CHECK(cudaMemGetInfo(&freeMemory, &totalMemory));
|
||||||
return freeMemory;
|
return freeMemory;
|
||||||
}
|
}
|
||||||
|
float memoryGPUfreeInGiga(){
|
||||||
|
return (float)memoryGPUfree()/(1024.f*1025.f*1024.f);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
Determine free memory available on the current device.
|
Determine free memory available on the current device.
|
||||||
|
|||||||
@@ -397,6 +397,7 @@ class SAFTHandler
|
|||||||
|
|
||||||
extern void memoryCheck();
|
extern void memoryCheck();
|
||||||
extern std::size_t memoryGPUfree();
|
extern std::size_t memoryGPUfree();
|
||||||
|
extern float memoryGPUfreeInGiga();
|
||||||
extern std::size_t memoryGPUtotal();
|
extern std::size_t memoryGPUtotal();
|
||||||
|
|
||||||
extern void performCUDAResultCheck(cudaError_t result, std::string const &file, int line);
|
extern void performCUDAResultCheck(cudaError_t result, std::string const &file, int line);
|
||||||
|
|||||||
Reference in New Issue
Block a user