2023-06-09 11:19:30 +08:00
2023-05-18 16:04:27 +08:00
# include <iostream>
# include <vector>
# include <cstdlib>
# include <ctime>
# include <cmath>
//#include <sys/time.h>
//#include <ail/file.hpp>
//#include <ail/string.hpp>
//#include <ail/time.hpp>
//#include "configuration.hpp"
# include "saft.hpp"
/**
Clumsy constructor of the core reconstruction class .
- Unbeholfener Konstruktor der Kern Rekonstuktionsklasse
*/
SAFTHandler : : SAFTHandler (
int deviceId , ///< CUDA ID of the device to be used.
int deviceIndex , ///< Index given by MATLAB (An welcher Position steht die GPU in der Liste?)
float * aScan_ptr , ///< Zeiger zu den AScandaten //std::string const & aScanSamplesPath, ///< Path to the actual A-scan samples.
double * output_ptr , ///< Zeiger zu den daten // std::string const & Path, ///< Path to a file in which the output of the image reconstruction is to be stored.
double * Duration_ptr , ///< Zeiger auf R<> ckgabewert fuer Matlab fuer Laufzeit des Kernels
unsigned short * receiver_index_ptr , ///<
unsigned short * emitter_index_ptr , ///<
float * receiver_list_ptr , ///<
int receiver_list_Size ,
float * emitter_list_ptr , ///<
int emitter_list_Size ,
float * speed_vec_ptr , ///< Zeiger auf die SoS-Daten in Block-/Gridmode
int3 SOSGrid_XYZ ,
float3 sosOffset , ///< Startpoint of SoSGrid
float SOS_RESOLUTION , ///< Aufloesung des SoSGrid
float * att_vec_ptr , ///< Zeiger auf die Att-Daten inm Gridmode
int aScanCount ,
int aScanLength ,
int3 IMAGE_SIZE_XYZ ,
float sampleRate ,
float3 regionOfInterestOffset ,
float IMAGE_RESOLUTION ,
dim3 const & fixedBlockDimensions , ///< If fixed block dimensions are enabled, they will be used over the ones determined by auto-tuning.
int medianWindowSize , ///< define width of used median filter
float debugMode ,
float debugModeParameter ,
bool SOSMode_3DVolume ,
bool ATTMode_3DVolume ,
int SAFT_MODE ,
int * SAFT_VARIANT
) :
deviceId ( deviceId ) , // Das hier ist eine Initialisation der Klassenvariablen mit den <20> bergebenen Werten aehnlich Konstruktor, called Initializer list
deviceIndex ( deviceIndex ) ,
aScan_ptr ( aScan_ptr ) , //aScanSamplesPath(aScanSamplesPath),
output_ptr ( output_ptr ) , //Path(Path),
Duration_ptr ( Duration_ptr ) ,
receiver_index_ptr ( receiver_index_ptr ) , //
emitter_index_ptr ( emitter_index_ptr ) , //
receiver_list_ptr ( receiver_list_ptr ) , //
receiver_list_Size ( receiver_list_Size ) ,
emitter_list_ptr ( emitter_list_ptr ) , //
emitter_list_Size ( emitter_list_Size ) ,
speed_vec_ptr ( speed_vec_ptr ) , ///< SoS-Daten im Blockmode oder SoSGrid
SOSGrid_XYZ ( SOSGrid_XYZ ) , // Groesse des SoSGrids
sosOffset ( sosOffset ) , ///< Startpoint of SoSGrid
SOS_RESOLUTION ( SOS_RESOLUTION ) , ///< Aufloesung des SoSGrid
att_vec_ptr ( att_vec_ptr ) , ///< Att-Daten als ATTGrid
aScanCount ( aScanCount ) ,
aScanLength ( aScanLength ) ,
IMAGE_SIZE_XYZ ( IMAGE_SIZE_XYZ ) ,
sampleRate ( sampleRate ) ,
regionOfInterestOffset ( regionOfInterestOffset ) ,
IMAGE_RESOLUTION ( IMAGE_RESOLUTION ) ,
fixedBlockDimensions ( fixedBlockDimensions ) ,
medianWindowSize ( medianWindowSize ) ,
debugMode ( debugMode ) ,
debugModeParameter ( debugModeParameter ) ,
SOSMode_3DVolume ( SOSMode_3DVolume ) ,
ATTMode_3DVolume ( ATTMode_3DVolume ) ,
SAFT_MODE ( SAFT_MODE ) ,
SAFT_VARIANT ( SAFT_VARIANT )
{
# ifdef debug_OutputFunctions
// printf( "==> SAFTHandler::SAFTHandler - Start\n");
# endif
# ifdef debug_OutputInfo
// printf( "SAFTHandler Constructor\n");
# endif
aScanAllocationCount = 1 ; // Speicher der Allokiert wird, es reicht einer statt 2! 2 nur wenn Streams fuer Copy genutzt werden sollen.
IMAGE_RESOLUTION_FACTOR = 1 / IMAGE_RESOLUTION ; // Auflösung im OutputVolumen
SOS_RESOLUTION_FACTOR = 1 / SOS_RESOLUTION ; // Auflösung im SoS-Grid
# ifdef debug_OutputVariables
// printf( "IMAGE_RESOLUTION_FACTOR = %e\n", IMAGE_RESOLUTION_FACTOR);
// printf( "SOS_RESOLUTION_FACTOR = %e\n", SOS_RESOLUTION_FACTOR);
// printf( "Samplerate = %e\n", sampleRate);
# endif
# ifdef debug_OutputFunctions
// printf( "<== SAFTHandler::SAFTHandler - End\n");
# endif
}
/**
Top level function of the SAFTHandler class that performs the image reconstruction .
- Top Level Funktion der SAFTHandler Klasse die die Bildrekonstruktion durchf <EFBFBD> hrt .
*/
void SAFTHandler : : performReconstruction ( )
{
# ifdef debug_OutputFunctions
// printf( "==> SAFTHandler::performReconstruction - Start\n");
# endif
# ifdef debug_OutputInfo // Name des Device mit ID ausgeben
// printf( "Device ID: %i\n", deviceId);
# endif
# ifdef debug_OutputFunctions
// printf( "==> loadDevices - Start\n");
# endif
int deviceCount ;
CUDA_CHECK ( cudaGetDeviceCount ( & deviceCount ) ) ;
// Noch mal umstrukturieren!!!!! DA das so nicht sein muss, könnte auch nur einmal ausgelesen werden aber zweitrangig.~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
//DeviceProperties & outputProb = deviceProperties; // lokalen Zeiger auf Vektor erstellen der auf Klassenvektor zeigt.
//// printf("1: size(%i) capacity(%i) max_size(%i)\n", outputProb.size(), outputProb.capacity(), outputProb.max_size());
//outputProb.reserve(static_cast<std::size_t>(deviceCount)); // Request Vector with size deviceCount
deviceProperties . reserve ( static_cast < std : : size_t > ( deviceCount ) ) ; // Request Vector with size deviceCount
//cudaDeviceProp & device = outputProb[deviceId]; //
cudaDeviceProp & device = deviceProperties [ deviceId ] ; //
CUDA_CHECK ( cudaGetDeviceProperties ( & device , deviceId ) ) ;
//// printf("%i. %s\n", deviceId, device.name);
//// printf("%i. %s\n", deviceId, deviceProperties[deviceId].name);
# ifdef debug_OutputInfo
// printf("%i. %s\n", deviceId, device.name);
// printf(" Byte Total Global Mem: %lld \n", device.totalGlobalMem);
// printf(" Compute Capability: %i.%i\n", device.major,device.minor);
// printf(" Name: %s\n", device.name);
// printf(" Major revision number: %d\n", device.major);
// printf(" Minor revision number: %d\n", device.minor);
// printf(" Total global memory: %lld\n", device.totalGlobalMem);
// printf(" Total shared memory per block: %u\n", device.sharedMemPerBlock);
// printf(" Total registers per block: %d\n", device.regsPerBlock);
// printf(" Warp size: %d\n", device.warpSize);
// printf(" Maximum memory pitch: %lld\n", device.memPitch);
// printf(" Maximum threads per block: %d\n", device.maxThreadsPerBlock);
for ( int i = 0 ; i < 3 ; + + i ) {
// printf(" Maximum dimension %d of block: %lld\n", i, device.maxThreadsDim[i]);
}
for ( int i = 0 ; i < 3 ; + + i ) {
// printf(" Maximum dimension %d of grid: %lld\n", i, device.maxGridSize[i]);
}
// printf(" Clock rate: %d\n", device.clockRate);
// printf(" Total constant memory: %u\n", device.totalConstMem);
// printf(" Texture alignment: %u\n", device.textureAlignment);
// printf(" Concurrent copy and execution: %s\n", (device.deviceOverlap ? "Yes" : "No"));
// printf(" Number of multiprocessors: %d\n", device.multiProcessorCount);
// printf(" Kernel execution timeout: %s\n\n", (device.kernelExecTimeoutEnabled ? "Yes" : "No"));
# endif
//outputProb.push_back(device); // Add element at the end of the vector outputProb
deviceProperties . push_back ( device ) ; // Add element at the end of the vector outputProb
//// printf("2: size(%i) capacity(%i) max_size(%i)\n", outputProb.size(), outputProb.capacity(), outputProb.max_size());
# ifdef debug_OutputFunctions
// printf( "<== loadDevices - End\n");
# endif
// Noch mal umstrukturieren!!!!! DA das so nicht sein muss, aber erstmal egal.~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
// siehe http://gpucoder.livejournal.com/1064.html
// int devCount;
// cudaGetDeviceCount(&devCount);
// // printf("CUDA Device Query...\n");
// // printf("There are %d CUDA devices.\n", devCount);
//
// // Iterate through devices
// for (int i = 0; i < devCount; ++i)
// {
// // Get device properties
// // printf("\nCUDA Device #%d\n", i);
// cudaDeviceProp devProp;
// cudaGetDeviceProperties(&devProp, i);
// printDevProp(devProp);
// }
//
// // printf("\nPress any key to exit...");
// char c;
// scanf("%c", &c);
//cudaDeviceProp & device = deviceProperties[deviceId];
//CUDA_CHECK(cudaGetDeviceProperties(&device, deviceId)); // Eingenschaften des Devices auslesen
//#ifdef debug_OutputInfo // Name des Device mit ID ausgeben
// printf( "Device used: %18s (HW-ID %i) (Idx %i)\n", device.name , deviceId, deviceIndex);
//#endif
CUDA_CHECK ( cudaSetDevice ( deviceId ) ) ;
# ifdef debug_OutputInfo // Reset Device
// printf("Reset Device\n");
# endif
//CUDA_CHECK(cudaDeviceReset());
// std::string errorMessage = cudaGetErrorString(cudaPeekAtLastError());
// std::cout << errorMessage << std::endl;
//memoryCheck(); // Freier Speicher am Anfang
// Check and set Block and Grid-Dimensions
genericSAFTBlockDimensions = fixedBlockDimensions ;
genericSAFTGridDimensions = dim3 (
( IMAGE_SIZE_XYZ . x + genericSAFTBlockDimensions . x - 1 ) / genericSAFTBlockDimensions . x , // hier wird aufgerundet! Wenn ungerade Aufloesung nicht genau
( IMAGE_SIZE_XYZ . y + genericSAFTBlockDimensions . y - 1 ) / genericSAFTBlockDimensions . y , // in Blockgroesse geteilt werden kann, muss ein weiterer
( IMAGE_SIZE_XYZ . z + genericSAFTBlockDimensions . z - 1 ) / genericSAFTBlockDimensions . z // Block berechnet werden. Zu Viele werden im Kernel aussortiert.
) ;
# if defined(debug_OutputVariables) || defined(debug_OutputZSteps)
if ( deviceIndex = = DebugOutputGPUIdx ) {
// printf( "genericSAFTBlockDimensions X,Y,Z = (%i %i %i)\n",genericSAFTBlockDimensions.x, genericSAFTBlockDimensions.y, genericSAFTBlockDimensions.z);
// printf( "genericSAFTGridDimensions X,Y,Z = (%i %i %i)\n",genericSAFTGridDimensions.x, genericSAFTGridDimensions.y, genericSAFTGridDimensions.z);
}
# endif
//Pointeruebergabe der AScan-Daten Geometrie-Daten und Output-Daten von Matlab
# ifdef debug_OutputInfo
// printf( "Give Pointer Names for AScan, Geometry, Output and SoS-Data from Matlab\n");
// printf( "Uebergebener Pointer SoSData fuer SoS-Daten aus Matlab\n");
# endif
aScanSamples = ( float * ) aScan_ptr ;
# ifdef debug_OutputInfo
// printf( "Uebergebene Geometry Pointer fuer Index sowie der Zuordnungs-Tabelle aus Matlab\n");
# endif
emitter_index = ( unsigned short * ) emitter_index_ptr ; // Index for associating emitter to corresponding coordinates
receiver_index = ( unsigned short * ) receiver_index_ptr ; // Index for associating receiver to corresponding coordinates
emitter_list = ( float3 * ) emitter_list_ptr ; // Lookuptable for emitter coordinates
receiver_list = ( float3 * ) receiver_list_ptr ; // Lookuptable for receiver coordinates
# ifdef debug_OutputInfo
// printf( "Uebergebener Pointer output fuer Ausgabe-Daten aus Matlab\n");
# endif
output = ( double * ) output_ptr ;
speedOfSoundFieldVoxelCount = SOSGrid_XYZ . x * SOSGrid_XYZ . y * SOSGrid_XYZ . z ;
speedOfSoundFieldBytes = speedOfSoundFieldVoxelCount * sizeof ( float ) ;
# ifdef debug_OutputVariables
// printf(" speedOfSoundFieldVoxelCount [%ix%ix%i] = %i\n", SOSGrid_XYZ.x, SOSGrid_XYZ.y, SOSGrid_XYZ.z, speedOfSoundFieldVoxelCount);
// printf(" speedOfSoundFieldBytes = speedOfSoundFieldVoxelCount(%i) x sizeof(float = 4)] = %i\n", speedOfSoundFieldVoxelCount, speedOfSoundFieldBytes);
# endif
# ifdef debug_OutputInfo
// printf( "Uebergebener Pointer speedOfSoundField fuer SoS-Daten aus Matlab\n");
// printf( "Uebergebener Pointer SoSData fuer SoS-Daten aus Matlab\n");
// printf( "Uebergebener Pointer attenuationField fuer ATT-Daten aus Matlab\n");
# endif
speedOfSoundField = ( float * ) speed_vec_ptr ; // Fuer SoSGrid-Mode fuer korrekte Schallgeschwindigkeitskorrektur
SoSData = ( float * ) speed_vec_ptr ; // Fuer Blockmode
attenuationField = ( float * ) att_vec_ptr ; // Fuer SoSGrid-Mode fuer Daempfungskorrektur
// Uebergabe der Outputgroessen aus Matlab.
regionOfInterestVoxelCount = ( uint64_t ) IMAGE_SIZE_XYZ . x * ( uint64_t ) IMAGE_SIZE_XYZ . y * ( uint64_t ) IMAGE_SIZE_XYZ . z ; // Anzahl der Voxel im Volumen
outputSize = regionOfInterestVoxelCount * sizeof ( double ) ; // Speicherbedarf fuer alle Voxel im Volumen
# ifdef debug_OutputVariables
// printf(" regionOfInterestVoxelCount [%ix%ix%i]= %lld\n",IMAGE_SIZE_XYZ.x, IMAGE_SIZE_XYZ.y, IMAGE_SIZE_XYZ.z, regionOfInterestVoxelCount);
// printf(" outputSize [%lld x sizeof(double = 8)] = %lld\n", regionOfInterestVoxelCount, outputSize);
# endif
//Hier auf maximale Outputgroesse von 32-BitSystem ueberpruefen --> falls Probleme mit 32-Bitsystemen hier noch Abfrage und Abbruch implementieren
if ( regionOfInterestVoxelCount > ( uint64_t ) ( 2 ^ 32 / sizeof ( double ) ) ) { // 2^32 / sizeof(double) = 536870912
// printf("outputSize > 2^32 !!! => works only in 64-Bit System\n");
}
//Groesse der Datenbloecke fuer die Blockverarbeitung wird mit aScanCount angegeben
//Die selbe Anzahl wird auch fuer die Geometriedaten erwartet
# ifdef debug_OutputVariables
// printf( "AScan Blockgroesse (aScanCount)= %i\n", aScanCount);
# endif
aScanSize = aScanLength * sizeof ( float ) ;
batchSize = aScanCount ; // Anzahl der Blockgroesse d.h. wie viele AScans gleichzeitig verarbeitet werden. Batchgroesse ist gleich der Anzahl der uebergebenen Blockgroesse aus Matlab
aScanBatchSize = batchSize * aScanSize ; // Batchgroesse der AScans (* 3000 * sizeof(float)) in Byte
# ifdef debug_OutputVariables
// printf( "aScanSize = aScanLength(%i) * sizeof(float=4) = %i\n", aScanLength, aScanSize);
// printf( "batchSize = aScanCount = %i\n", batchSize);
// printf( "aScanBatchSize = batchSize * aScanSize ( = %i * sizeof(float)) = %i\n", aScanLength, aScanBatchSize);
# endif
// if(batchSize > aScanCount) // Abfrage macht keinen Sinn mehr wenn batchSize = aScanCount;
// {
2023-06-09 11:19:30 +08:00
// printf("A-scan window size cannot be larger than the total number of A-scans");
2023-05-18 16:04:27 +08:00
// //throw ail::exception("A-scan window size cannot be larger than the total number of A-scans");
// }
# ifdef debug_OutputInfo
// printf("\nParameter for Image Reconstruction\n");
// printf( "========================================================================\n");
//std::cout << "ROI dimensions: " << regionOfInterestResolutionX << " x " << regionOfInterestResolutionY << " x " << regionOfInterestResolutionZ << std::endl;
std : : cout < < " IMAGE_SIZE_XYZ: [ " < < IMAGE_SIZE_XYZ . x < < " x " < < IMAGE_SIZE_XYZ . y < < " x " < < IMAGE_SIZE_XYZ . z < < " ] " < < std : : endl ;
std : : cout < < " Voxel count in Volume: " < < regionOfInterestVoxelCount < < std : : endl ;
//std::cout << "Increment vector: (" << regionOfInterestIncrementVector.x << ", " << regionOfInterestIncrementVector.y << ", " << regionOfInterestIncrementVector.z << ")" << std::endl;
std : : cout < < " Increment vector/Resolution: ( " < < IMAGE_RESOLUTION < < " ) " < < std : : endl ;
std : : cout < < " IMAGE_STARTPOINT in meters: " < < regionOfInterestOffset . x < < " " < < regionOfInterestOffset . y < < " " < < regionOfInterestOffset . z < < std : : endl ;
regionOfInterestSize . x = IMAGE_SIZE_XYZ . x * IMAGE_RESOLUTION ;
regionOfInterestSize . y = IMAGE_SIZE_XYZ . y * IMAGE_RESOLUTION ;
regionOfInterestSize . z = IMAGE_SIZE_XYZ . z * IMAGE_RESOLUTION ;
std : : cout < < " ROI size in metres: " < < regionOfInterestSize . x < < " " < < regionOfInterestSize . y < < " " < < regionOfInterestSize . z < < std : : endl ;
std : : cout < < " Batch size/Blocks(Ascan, R/E-Combi): " < < batchSize < < std : : endl ;
// printf( "========================================================================\n\n");
# endif
// #ifdef debug_OutputPerformance
// struct timeval startPerformCoreReconstruction, stopPerformCoreReconstruction;
// gettimeofday(&startPerformCoreReconstruction, NULL);
// #endif
//perform processing with AScan-Data
//===========================================================================================================
ullong duration ;
processAScans ( duration ) ;
//===========================================================================================================
# ifdef debug_OutputPerformance
double numerator = static_cast < double > ( aScanCount ) * regionOfInterestVoxelCount ; // Performanz [Ascans * GVoxel/s]
double performance = numerator / duration ;
//adjust for the change from voxels per millisecond to gigavoxels per second (=> 10^3 * 10^-9 = 10^-6)
performance / = 1e9 ;
//std::cout << "# Device ("<< (int)deviceId <<"): Duration of main processing: " << (int)duration << " us" << std::endl;
//std::cout << "# Device ("<< (int)deviceId <<"): Performance: " << performance << " AScan * GVoxel/s" << std::endl;
# endif
//Duration_ptr[(deviceId+1)] = (double)duration; // Für jede GPU einen Laufzeitwert in µs übermitteln // Angabe von ID der GPU abhaengig
Duration_ptr [ ( deviceIndex + 1 ) ] = ( double ) duration ; // Angabe von Reihenfolge der angegebenen GPU-IDs abhaengig
# ifdef debug_OutputVariables
//// printf( "Duration_ptr[%i] = duration(%i) = %f\n", (deviceId+1), duration, Duration_ptr[(deviceId+1)]);
// printf( " GPU (%s:ID %i,Index %i): => Duration_ptr[%i] = duration(%i µs) = %.2f s\n", device.name, deviceId, deviceIndex, (deviceIndex+1), duration, Duration_ptr[(deviceIndex+1)]/1000/1000);
# endif
// #ifdef debug_OutputVariables
// // printf( "Duration_ptr[0] = duration(%i) = %f\n", duration, Duration_ptr[0]);
// #endif
// Reset Device
// #ifdef debug_OutputInfo
// // printf( "Device was used: %s (%i)\n", deviceProperties[deviceId].name , deviceId);
// #endif
// CUDA_CHECK(cudaSetDevice(deviceId));
# ifdef debug_OutputInfo // Reset Device
// printf("Reset Device\n");
# endif
//CUDA_CHECK(cudaDeviceReset());
# ifdef debug_OutputFunctions
// printf( "<== SAFTHandler::performReconstruction - End\n");
# endif
}
/**
The SAFT kernel expects arguments in which the grid dimensions have been reduced to less than three dimensions and the block dimensions are reduced to only one dimension .
This also depends on the properties of the hardware available ( shader model ) .
- Der SAFT Kernel erwartet Argumente in den die Grid Dimension auf drei Dimensionen reduziert wurde und die Block - Dimensionen auf nur eine Dimension reduziert ist .
- Das haengt auch von den Eigenschaften der verfuegbaren HW ab ( shader model )
*/
void SAFTHandler : : reduceKernelDimensions (
dim3 const & gridDimensions , ///< Input grid dimensions.
dim3 const & blockDimensions , ///< Input block dimensions.
dim3 & reducedGridDimensions , ///< Reduced output grid dimensions.
dim3 & reducedBlockDimensions ///< Reduced output block dimensions.
)
{
# ifdef debug_OutputFunctions
// printf( "==> SAFTHandler::reduceKernelDimensions - Start\n");
# endif
if ( deviceProperties [ deviceId ] . maxGridSize [ 2 ] > 1 )
{
reducedGridDimensions = gridDimensions ;
# ifdef debug_OutputParameter
// printf( "reducedGridDimensions X,Y,Z = (%i %i %i)\n",reducedGridDimensions.x, reducedGridDimensions.y, reducedGridDimensions.z);
# endif
}
else
{
reducedGridDimensions = dim3 (
gridDimensions . x * gridDimensions . y ,
gridDimensions . z ,
1
) ;
# ifdef debug_OutputParameter
// printf( "reducedGridDimensions X,Y,Z = (%i %i %i)\n",reducedGridDimensions.x, reducedGridDimensions.y, reducedGridDimensions.z);
# endif
}
reducedBlockDimensions = dim3 ( blockDimensions . x * blockDimensions . y * blockDimensions . z ) ;
# ifdef debug_OutputParameter
// printf( "reducedBlockDimensions X,Y,Z = (%i %i %i)\n", reducedBlockDimensions.x, reducedBlockDimensions.y, reducedBlockDimensions.z);
# endif
# ifdef debug_OutputFunctions
// printf( "<== SAFTHandler::reduceKernelDimensions - End\n");
# endif
}
/**
Utility function to perform integer based divison which rounds up instead of down .
- N <EFBFBD> tzliche Funktion : eine Integerbasierte Division die aufrundet und nicht abrundet
@ return Quotient of the divison , rounded up .
*/
std : : size_t ceilingDivision (
std : : size_t dividend , ///< Dividend of the division.
std : : size_t divisor ///< Divisor of the division.
)
{
std : : size_t output = dividend / divisor ;
if ( dividend % divisor )
output + + ;
return output ;
}
/**
Converts an offset based on two different resolutions .
This is a utility function used to deal with the number of z - layers in the speed of sound grid .
- Konvertiert einen Offset , basierend auf zwei verschiedenen Aufloesungen
- Diese n <EFBFBD> tzliche Funktion wird genutzt um mit der Anzahl der z - Layer in dem Spallgeschwindigkeits - Grid umzugehen .
@ return Result of the conversion .
*/
std : : size_t SAFTHandler : : resolutionConversion (
std : : size_t input , ///< Offset.
std : : size_t greaterResolution , ///< Greater resolution.
std : : size_t lowerResolution ///< Lower resolution.
)
{
# ifdef debug_OutputFunctions
// printf( "==> SAFTHandler::resolutionConversion - Start\n");
// printf( "<== SAFTHandler::resolutionConversion - End\n");
# endif
return ceilingDivision ( input * lowerResolution , greaterResolution ) ;
}
/**
Perform calculations pertaining to the execution of the speed of sound pre - calculations .
- F <EFBFBD> hre Berechnungen der Schallgeschwindigkeit - Vorberechnung aus
*/
void SAFTHandler : : determineSpeedOfSoundData (
std : : size_t regionOfInterestZLayers ///< Number of z-layers within the region of interest that are currently being processed. This number is often smaller than the total number of z-layers.
)
{
# ifdef debug_OutputFunctions
// printf( "==> SAFTHandler::determineSpeedOfSoundData - Start\n");
# endif
// //Determine the maximum number of z-layers to be pre-calculated within the speed of sound grid
// //Bestimme die maximale Anzahl an Z-layer, die in dem SoS-Grid Vorberechnet werden.
// //std::size_t maximumSpeedOfSoundPartialZLayerCount = resolutionConversion(regionOfInterestZLayers, regionOfInterestResolutionZ, regionOfInterestGridSizeZ);
// std::size_t maximumSpeedOfSoundPartialZLayerCount = resolutionConversion(regionOfInterestZLayers, IMAGE_SIZE_XYZ.z, regionOfInterestGridSizeZ);
//
// partialSpeedOfSoundVoxelCount = maximumSpeedOfSoundPartialZLayerCount * regionOfInterestGridSizeX * regionOfInterestGridSizeY;
//
//// deviceTableVoxelToEmitterPathCountSize = sosZLayerVoxelCount * emitter_list_Size * partialSoSZLayerCount * sizeof(VoxelCountType); // Gr<47> <72> e f<> r Speicher der Pfadanzahl * die Anzahl der gleichzeitig genutzten Z-Layer f<> r alle Emitter
//// deviceTableVoxelToEmitterPathSosSumSize = sosZLayerVoxelCount * emitter_list_Size * partialSoSZLayerCount * sizeof(float);
//// deviceTableVoxelToReceiverPathCountSize = sosZLayerVoxelCount * receiver_list_Size * partialSoSZLayerCount * sizeof(VoxelCountType); // Gr<47> <72> e f<> r Speicher der Pfadanzahl * die Anzahl der gleichzeitig genutzten Z-Layer f<> r alle Receiver
//// deviceTableVoxelToReceiverPathSosSumSize = sosZLayerVoxelCount * receiver_list_Size * partialSoSZLayerCount * sizeof(float);
//
// std::size_t
// emitterSpeedOfSoundVoxelCombinations = emitterCount * partialSpeedOfSoundVoxelCount,
// receiverSpeedOfSoundVoxelCombinations = receiverCount * partialSpeedOfSoundVoxelCount;
//
// emitterToVoxelPathVoxelDataSize = emitterSpeedOfSoundVoxelCombinations * sizeof(VoxelCountType);
// emitterToVoxelPathSpeedDataSize = emitterSpeedOfSoundVoxelCombinations * sizeof(float);
//
// voxelToReceiverPathVoxelDataSize = receiverSpeedOfSoundVoxelCombinations * sizeof(VoxelCountType);
// voxelToReceiverPathSpeedDataSize = receiverSpeedOfSoundVoxelCombinations * sizeof(float);
# ifdef debug_OutputFunctions
// printf( "<== SAFTHandler::determineSpeedOfSoundData - End\n");
# endif
}
/**
Perform initialisations for the partial reconstructions for both the speed of sound pre - calculation and the actual reconstruction .
- F <EFBFBD> hre Initialisierungen fuer eine Teilrekonstruktion von beiden durch : Der Schallgeschwindigkeit und der aktuellen Rekonstruktion
*/
void SAFTHandler : : partialReconstructionInitialisation ( )
{
# ifdef debug_OutputFunctions
// printf( "==> SAFTHandler::partialReconstructionInitialisation - Start\n");
# endif
//
// if(!partialReconstructionInitialised)
// {
// std::cout << "Initialising partial reconstruction data" << std::endl;
//
// //zLayerVoxelCount = regionOfInterestResolutionX * regionOfInterestResolutionY;
// zLayerVoxelCount = IMAGE_SIZE_XYZ.x * IMAGE_SIZE_XYZ.y; // Anzahl der X-Y-Voxel bestimmen den Schritt in das naechste Layer.
//
// partialOutputVoxelCount = partialOutputSize / sizeof(double);
//// if(partialOutputVoxelCount % zLayerVoxelCount != 0) //Sicherheitsabfrage nun im kernel
2023-06-09 11:19:30 +08:00
//// printf("The partial output size must consist of a discrete number of z-layers for the chosen resolution");
2023-05-18 16:04:27 +08:00
// //throw ail::exception("The partial output size must consist of a discrete number of z-layers for the chosen resolution");
// partialOutputZLayerCount = partialOutputVoxelCount / zLayerVoxelCount;
//
//// if(partialOutputZLayerCount % genericSAFTBlockDimensions.z != 0) //Sicherheitsabfrage nun im kernel
2023-06-09 11:19:30 +08:00
//// printf("The number of Z-layers in the output window must be a multiple of the reconstruction block dimensions");
2023-05-18 16:04:27 +08:00
// //throw ail::exception("The number of Z-layers in the output window must be a multiple of the reconstruction block dimensions");
//
// //Make dynamically sized allocations for the pre-calculated speed of sound data.
// //The size depends on the number of z-layers in the output window.
// //These particular pre-calculations are no longer performed only once for all voxels.
// //Instead, they are performed partially, prior to each launch of the SAFT kernel.
// //This lowers the pressure on GPU global memory.
//
// //F<> hre Allokationen mit dynamischer Groesse aus fuer die Vor-Verarbeitung der SoS-Daten
// //Die Groesse haengt von der Anzahl der z-Layer in dem -Fenster ab.
// //Diese partielle-Vorberechnung muss nur einmal fuer alle Voxel durchgef<65> hrt werden.
// //Stattdessen werden sie immer partiell durchgef<65> hrt, vor jedem Start des SAFT-Kernels.
// //Das entlastet den globalen GPU-Speicher.
//
// determineSpeedOfSoundData(partialOutputZLayerCount);
//
// // printf( "CUDA:Memory Allokation: deviceEmitterToVoxelPathVoxelCounts der Groesse:%i\n", emitterToVoxelPathVoxelDataSize);
// CUDA_CHECK(cudaMalloc(reinterpret_cast<void **>(&deviceEmitterToVoxelPathVoxelCounts), emitterToVoxelPathVoxelDataSize));
// // printf( "CUDA:Memory Allokation: deviceEmitterToVoxelPathSpeedOfSoundSum der Groesse:%i\n", emitterToVoxelPathSpeedDataSize);
// CUDA_CHECK(cudaMalloc(reinterpret_cast<void **>(&deviceEmitterToVoxelPathSpeedOfSoundSum), emitterToVoxelPathSpeedDataSize));
//
// // printf( "CUDA:Memory Allokation: deviceVoxelToReceiverPathVoxelCounts der Groesse:%i\n", voxelToReceiverPathVoxelDataSize);
// CUDA_CHECK(cudaMalloc(reinterpret_cast<void **>(&deviceVoxelToReceiverPathVoxelCounts), voxelToReceiverPathVoxelDataSize));
// // printf( "CUDA:Memory Allokation: deviceVoxelToReceiverPathSpeedOfSoundSum der Groesse:%i\n", voxelToReceiverPathSpeedDataSize);
// CUDA_CHECK(cudaMalloc(reinterpret_cast<void **>(&deviceVoxelToReceiverPathSpeedOfSoundSum), voxelToReceiverPathSpeedDataSize));
//
// partialReconstructionInitialised = true;
// }
# ifdef debug_OutputFunctions
// printf( "<== SAFTHandler::partialReconstructionInitialisation - End\n");
# endif
}
/**
Print free / total memory available on the chosen device .
- Gibt freien / totalen zur verf <EFBFBD> gung stehenden Speicher auf dem gew <EFBFBD> hlten Device aus .
*/
void memoryCheck ( )
{
# ifdef debug_OutputFunctions
// printf( "==> memoryCheck - Start\n");
# endif
std : : size_t
totalMemory ,
freeMemory ;
CUDA_CHECK ( cudaMemGetInfo ( & freeMemory , & totalMemory ) ) ;
# if defined(debug_OutputInfo) || defined(debug_OutputMaxMemory)
//printSize(" Total memory ", totalMemory);
//std::cout << " ( " << totalMemory << " )" << std::endl;
//printSize(" Free memory ", freeMemory);
//std::cout << " ( " << freeMemory << " )" << std::endl;
//printSize(" => Used memory ", (totalMemory-freeMemory));
//std::cout << " ( " << (totalMemory-freeMemory) << " )" << std::endl;
# endif
# ifdef debug_OutputFunctions
// printf( "<== memoryCheck - End\n");
# endif
}
/**
Generic CUDA call wrapper .
Check the result of a CUDA operation and throw an exception if an error occurred .
This is used in combination with a macro in saft . hpp .
- Generischer CUDA Call Wrapper
- <EFBFBD> berpr <EFBFBD> ft die Ergebnisse einer CUDA Operation und wirft eine Exception wenn ein Fehler auftritt
- Das wird wird mit einer Kombination mit einem Makro in saft . hpp genutzt .
*/
//inline // Da performCUDAResultCheck in allen Files genutzt werden soll funktioniert inline und etern nicht zusammen
void performCUDAResultCheck (
cudaError_t result , ///< Result of the CUDA operation.
std : : string const & file , ///< Path to the source code file.
int line ///< Line within the source code
)
{
if ( result ! = cudaSuccess )
{
//// printf("A CUDA operation failed in file \"%s\" (line %i): %s \n", file, line, cudaGetErrorString(result).c_str() );
// printf("%s\n", cudaGetErrorString( cudaGetLastError() ) );
//std::string errorMessage = "A CUDA operation failed in file \"" + file + "\" (line " + ail::number_to_string(line) + "): " + std::string(cudaGetErrorString(result));
//std::cout << errorMessage << std::endl;
2023-06-09 11:19:30 +08:00
printf ( " -> Error occurred " ) ;
2023-05-18 16:04:27 +08:00
}
}