diff --git a/SAFT_TOFI/src/SAFT_TOFI.cpp b/SAFT_TOFI/src/SAFT_TOFI.cpp index 5ebd929..e05c486 100644 --- a/SAFT_TOFI/src/SAFT_TOFI.cpp +++ b/SAFT_TOFI/src/SAFT_TOFI.cpp @@ -340,8 +340,10 @@ void multithreaded_processing(float *aScan_ptr, ///< AScan-Daten SPDLOG_INFO("Start GPU execute!"); for (j = 0; j < num_devices_factor; j++) + // for (j = 0; j < 1; j++) { for (k = 0; k < selectedNumberGPUs; k++) + // for (k = 0; k < 1; k++) { // new async threads futures[k] = std::async(std::forward>(thread_function), @@ -350,6 +352,7 @@ void multithreaded_processing(float *aScan_ptr, ///< AScan-Daten // Synchronization and termination ------------------------------------------------------------------------------------------------------------------- for (k = 0; k < selectedNumberGPUs; k++) + // for (k = 0; k < 1; k++) { // new async threads futures[k].wait(); // advantage: async are packaged tasks after c++ with os handling, and consistency handling (if destructor is called, it executes task) diff --git a/SAFT_TOFI/src/kernel/saftPrivate.cu b/SAFT_TOFI/src/kernel/saftPrivate.cu index 6fa33c5..23823bb 100644 --- a/SAFT_TOFI/src/kernel/saftPrivate.cu +++ b/SAFT_TOFI/src/kernel/saftPrivate.cu @@ -2,6 +2,9 @@ #include "rayTracing.cuh" #include "saftKernel.cuh" #include "saft.hpp" +// #include "spdlog/spdlog.h" + +#define SPDLOG_INFO(...) void SAFTHandler::precalculateAscanIndex_usePaths(int ascanIndex_i, int aScanWindowSize, int currentSpeedOfSoundZLayer, int maxFeasibleSosZLayerCount) { @@ -546,6 +549,7 @@ void SAFTHandler::performSAFT( CUDA_CHECK(cudaBindTextureToArray(&texTableAscanIndexFloat2_3, deviceTextureAscanIndexFloatCuArray[3], &texChannelDescTableAscanIndexFloat)); } } + SPDLOG_INFO("performSAFT init end"); // Vorberechnung der Koordinaten --> schnelleres Bestimmen der Voxelposition float VoxelIncrement = IMAGE_RESOLUTION / SOS_RESOLUTION; float3 SosVoxelStartPosition; @@ -564,6 +568,10 @@ void SAFTHandler::performSAFT( // ==================================================== Blockmode with SOS-value per Ascan // ==================================================== 3DVolume Mode with SOS-Correction no ATT-Correction // ==================================================== 3DVolume Mode with SOS- and ATT-Correction + SPDLOG_INFO("perform kernel start"); + SPDLOG_INFO("SosVoxelStartPosition {0} {1} {2}",SosVoxelStartPosition.x,SosVoxelStartPosition.y,SosVoxelStartPosition.z); + SPDLOG_INFO("TableAscanIndexAllocationCount = ({0}/{1}) = {2} = {3}", aScanWindowSize, maxAscanIndexArraysInTexture, TableAscanIndexAllocationCount, + ceil(aScanWindowSize/maxAscanIndexArraysInTexture)); if ((SOSMode_3DVolume == false) && (ATTMode_3DVolume == false)) { // ==================================================== Blockmode with SOS-value per Ascan @@ -571,6 +579,7 @@ void SAFTHandler::performSAFT( } else if ((SOSMode_3DVolume == true) && (ATTMode_3DVolume == false)) { + SPDLOG_INFO("perform kernel saftKernelAscanIndex_SOS"); // ==================================================== 3DVolume Mode with SOS-Correction no ATT-Correction saftKernelAscanIndex_SOS<<>>( // , 0, stream>>>( @@ -579,11 +588,13 @@ void SAFTHandler::performSAFT( (float)aScanWindowSize, // maxAscanIndexArraysInTexture maxAscanIndexArraysInTexture, // maxSoSReceiverArrayForTexture TableAscanIndexAllocationCount, // Anzahl der genutzten Teiltabellen - IMAGE_SIZE_XYZ, SosVoxelStartPosition, IMAGE_RESOLUTION, VoxelIncrement, blockIndexOffset, speedOfSoundZLayer, gridDimensions, blockDimensions, deviceOutput); + IMAGE_SIZE_XYZ, SosVoxelStartPosition, IMAGE_RESOLUTION, VoxelIncrement, blockIndexOffset, speedOfSoundZLayer, gridDimensions, blockDimensions, deviceOutput); + SPDLOG_INFO("perform kernel saftKernelAscanIndex_SOS finish"); + } else if ((SOSMode_3DVolume == true) && (ATTMode_3DVolume == true)) { // ==================================================== 3DVolume Mode with SOS- and ATT-Correction - + SPDLOG_INFO("perform kernel saftKernelAscanIndex_SOS_ATT, grid:{0},{1},{2}",reducedGridDimensions.x,reducedGridDimensions.y, reducedGridDimensions.z); saftKernelAscanIndex_SOS_ATT<<>>( // , 0, stream>>>( aScanIndex, (float)aScanWindowSize, // maxAscanIndexArraysInTexture @@ -595,7 +606,11 @@ void SAFTHandler::performSAFT( deviceOutput ); + SPDLOG_INFO("perform kernel saftKernelAscanIndex_SOS_ATT finish"); } + SPDLOG_INFO("perform kernel finish"); + + SPDLOG_INFO("perform unbind start"); // Unbind Textures CUDA_CHECK(cudaUnbindTexture(&texRefAscans)); @@ -615,4 +630,5 @@ void SAFTHandler::performSAFT( CUDA_CHECK(cudaUnbindTexture(&texTableAscanIndexFloat2_3)); } CUDA_CHECK(cudaGetLastError()); + SPDLOG_INFO("perform unbind finish"); } \ No newline at end of file diff --git a/SAFT_TOFI/src/processAScans.cpp b/SAFT_TOFI/src/processAScans.cpp index 741b549..e26a60c 100644 --- a/SAFT_TOFI/src/processAScans.cpp +++ b/SAFT_TOFI/src/processAScans.cpp @@ -9,6 +9,8 @@ #include #include "saft.hpp" +// #include "spdlog/spdlog.h" +#define SPDLOG_INFO(...) extern float3 *constEmitterPtr; extern float3 *constReceiverPtr; @@ -24,6 +26,7 @@ extern unsigned short *constLookUpGeometryMemoryListReceiverPtr; */ void SAFTHandler::performCoreReconstruction() { + // SPDLOG_INFO("performCoreReconstruction start!"); // Mitlaufender Zeiger fuer Speicherbereich der Outputdaten uebergeben. double *currentHostOutputAdress = output; // Offset des OutputSpeichers am Anfang = 0 CUDA_CHECK(cudaGetLastError()); @@ -76,7 +79,7 @@ void SAFTHandler::performCoreReconstruction() } currentZLayerCount = maxFeasibleZLayerCount; // Mit maximal moeglicher Anzahl an Z-Layer rechnen und in Schleife abhaengig von Randbedingungen auf aktuelle maximale Anzahl verringern - + SPDLOG_INFO("main loop start!GPUMemoryFree:{0}",memoryGPUfreeInGiga()); // performCoreReconstruction - While-Loop over all Z-Layers~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while ((zOffset < IMAGE_SIZE_XYZ.z)) @@ -129,19 +132,25 @@ void SAFTHandler::performCoreReconstruction() zSoSOffset_old = zSoSOffset; // SoS-Pfade für Z-Layer vorberechnen ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + SPDLOG_INFO("precalculateAverageSpeedOfSound Emitter start!GPUMemoryFree:{0}",memoryGPUfreeInGiga()); + // Emitter ----------------------------------------------------------------------- Emitter precalculateAverageSpeedOfSound(currentSpeedOfSoundZLayer, maxFeasibleSosZLayerCount, 0, emitter_list_Size); + SPDLOG_INFO("precalculateAverageSpeedOfSound Emitter finish!GPUMemoryFree:{0}",memoryGPUfreeInGiga()); + SPDLOG_INFO("precalculateAverageSpeedOfSound Receiver start!GPUMemoryFree:{0}",memoryGPUfreeInGiga()); // Receiver ----------------------------------------------------------------------- Receiver precalculateAverageSpeedOfSound(currentSpeedOfSoundZLayer, maxFeasibleSosZLayerCount, 1, receiver_list_Size); + SPDLOG_INFO("precalculateAverageSpeedOfSound finish!GPUMemoryFree:{0}",memoryGPUfreeInGiga()); + } // Go overall A-Scanblocks ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ // PrecalAndPerformSAFTAllAscans - Berechne Performanz fuer Durchlauf von allen Ascans ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ // aScanWindowSize auf maximal moegliche Anzahl setzen std::size_t aScanWindowSize = (maxAscanIndexArraysInTexture * maxSupportedTexturesForAscanIndex); // Anzahl maximaler Ascans die auf einmal verarbeitet werden kann. - + int ascanIndexBatchOffset = 0; while (ascanIndexBatchOffset < aScanCount) { // Alle Emitter oder Receiver in der Liste von Matlab durchgehen @@ -160,15 +169,27 @@ void SAFTHandler::performCoreReconstruction() // neededAscanBatchCount = Anzahl an benoetigten Durchlaeufen des SAFTs um alle Ascans abarbeiten zu koennen // AscanIndex Emitter -> Voxel --> Receiver vorberechnen fuer AscansBatchSize - precalculateAscanIndex_usePaths(ascanIndexBatchOffset, aScanWindowSize, currentSpeedOfSoundZLayer, maxFeasibleSosZLayerCount); //, deviceTextureAscanIndexFloatCuArray); + SPDLOG_INFO("precalculateAscanIndex_usePaths start!GPUMemoryFree:{0}",memoryGPUfreeInGiga()); + precalculateAscanIndex_usePaths(ascanIndexBatchOffset, aScanWindowSize, currentSpeedOfSoundZLayer, maxFeasibleSosZLayerCount); //, deviceTextureAscanIndexFloatCuArray); + SPDLOG_INFO("precalculateAscanIndex_usePaths finish!GPUMemoryFree:{0}",memoryGPUfreeInGiga()); + // if (copyed){ + // // SPDLOG_INFO("cudaStreamSynchronize start!"); + // cudaStreamSynchronize(stream); + // // SPDLOG_INFO("cudaStreamSynchronize finish!"); + + // } ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// if (currentZLayerCount > 0) // bugprevention because windows and linux would stop execution for a bug - Issue 100 + { + SPDLOG_INFO("performSAFT start!GPUMemoryFree:{0}",memoryGPUfreeInGiga()); performSAFT(ascanIndexBatchOffset, aScanWindowSize, IMAGE_SIZE_XYZ, SOSGrid_XYZ, blockIndexOffset, static_cast(currentOutputZLayerVoxelCount), static_cast(currentSpeedOfSoundZLayer), static_cast(partialSpeedOfSoundVoxelCount), static_cast(maxFeasibleSosZLayerCount), static_cast(currentEmIndexUsedForAscanIndexCalculation), windowGridDimensions, genericSAFTGridDimensions, genericSAFTBlockDimensions, deviceSpeedOfSoundField, deviceAScansCuArray[0]); + SPDLOG_INFO("performSAFT finish!GPUMemoryFree:{0}",memoryGPUfreeInGiga()); + } // , calculationStream); ascanIndexBatchOffset += aScanWindowSize; @@ -212,6 +233,7 @@ void SAFTHandler::performCoreReconstruction() currentZLayerCount = maxFeasibleZLayerCount; }; // End While-Loop over all Z-Layer // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + SPDLOG_INFO("main loop finish!GPUMemoryFree:{0}",memoryGPUfreeInGiga()); } /** diff --git a/SAFT_TOFI/src/saft.cpp b/SAFT_TOFI/src/saft.cpp index 262153e..d2ce245 100644 --- a/SAFT_TOFI/src/saft.cpp +++ b/SAFT_TOFI/src/saft.cpp @@ -223,6 +223,9 @@ std::size_t memoryGPUfree() CUDA_CHECK(cudaMemGetInfo(&freeMemory, &totalMemory)); return freeMemory; } +float memoryGPUfreeInGiga(){ + return (float)memoryGPUfree()/(1024.f*1025.f*1024.f); +} /** Determine free memory available on the current device. diff --git a/SAFT_TOFI/src/saft.hpp b/SAFT_TOFI/src/saft.hpp index 8b620b6..e54788b 100644 --- a/SAFT_TOFI/src/saft.hpp +++ b/SAFT_TOFI/src/saft.hpp @@ -397,6 +397,7 @@ class SAFTHandler extern void memoryCheck(); extern std::size_t memoryGPUfree(); +extern float memoryGPUfreeInGiga(); extern std::size_t memoryGPUtotal(); extern void performCUDAResultCheck(cudaError_t result, std::string const &file, int line);