1948 lines
84 KiB
C++
1948 lines
84 KiB
C++
#include "SAFT_TOFI.h"
|
||
#include "saft.hpp"
|
||
|
||
#include <cstddef>
|
||
#include <math.h>
|
||
|
||
#include <locale.h> // For German printf output format: Float with , instead of .
|
||
|
||
#include <iostream>
|
||
#include <map>
|
||
#include <string>
|
||
|
||
#include <stdint.h>
|
||
|
||
#include <future>
|
||
#include <array>
|
||
#include <chrono>
|
||
|
||
// TODO: Blockgroesse (z > 1) fuehrt zu Kernelabbruechen
|
||
|
||
//pthread handle
|
||
typedef struct thread_handle_t {
|
||
//pthread_t pthread;
|
||
|
||
int deviceId;
|
||
int deviceIndex;
|
||
float *aScan_ptr;
|
||
double *output_ptr;
|
||
double *Duration_ptr;
|
||
unsigned short *receiver_index_ptr;
|
||
unsigned short *emitter_index_ptr;
|
||
float *receiver_list_ptr;
|
||
int receiver_list_Size;
|
||
float *emitter_list_ptr;
|
||
int emitter_list_Size;
|
||
float *speed_vec_ptr;
|
||
int3 SOSGrid_XYZ;
|
||
float3 sosOffset;
|
||
float SOS_RESOLUTION;
|
||
|
||
float *att_vec_ptr;
|
||
|
||
int aScanCount;
|
||
int aScanLength;
|
||
|
||
float inc;
|
||
int3 res;
|
||
float sampleRate;
|
||
float3 volposition;
|
||
|
||
int num_threads;
|
||
dim3 fixedBlockDimensions;
|
||
|
||
float debugMode;
|
||
float debugModeParameter;
|
||
bool SOSMode_3DVolume;
|
||
bool ATTMode_3DVolume;
|
||
int SAFT_MODE;
|
||
int *SAFT_VARIANT;
|
||
int SAFT_VARIANT_Size;
|
||
int *Abort_ptr;
|
||
|
||
} thread_handle;
|
||
|
||
|
||
//Convenient typedefs for GPU-DeviceProperties container
|
||
typedef std::vector<cudaDeviceProp> DeviceProperties;
|
||
|
||
/**
|
||
Load CUDA devices and write them to a container.
|
||
*/
|
||
void loadDevices(
|
||
DeviceProperties & output ///< This argument is written to. Container in which the device data are stored.
|
||
)
|
||
{
|
||
#ifdef debug_OutputFunctions
|
||
printf( "==> loadDevices - Start\n");
|
||
#endif
|
||
int deviceCount;
|
||
CUDA_CHECK(cudaGetDeviceCount(&deviceCount));
|
||
|
||
#ifdef debug_OutputInfo
|
||
printf( "There are %i devices present:\n", deviceCount);
|
||
#endif
|
||
|
||
output.reserve(static_cast<std::size_t>(deviceCount));
|
||
for(int i = 0; i < deviceCount; i++)
|
||
{
|
||
cudaDeviceProp & device = output[i];
|
||
CUDA_CHECK(cudaGetDeviceProperties(&device, i));
|
||
#ifdef debug_OutputInfo
|
||
std::cout << " " << (i + 1) << ". " << device.name << std::endl;
|
||
std::cout << " " << "Byte Total Global Mem: " << device.totalGlobalMem <<std::endl;
|
||
std::cout << " " << "Compute Capability: " << device.major << "." << device.minor <<std::endl;
|
||
#endif
|
||
output.push_back(device);
|
||
}
|
||
|
||
#ifdef debug_OutputFunctions
|
||
std::cout << "<== loadDevices - End" << std::endl;
|
||
#endif
|
||
}
|
||
|
||
|
||
//pthread call function
|
||
void thread_function (void *arg) {
|
||
|
||
thread_handle *pthread_handle = (thread_handle*)arg;
|
||
|
||
#ifdef debug_OutputSAFTHandlerThreadPerformance
|
||
struct timeval startSAFTHandler, stopSAFTHandler;
|
||
gettimeofday(&startSAFTHandler, NULL);
|
||
#endif
|
||
|
||
// Create Instance of SAFT-Handler and call constructor
|
||
SAFTHandler saft(
|
||
pthread_handle->deviceId, // deviceId
|
||
pthread_handle->deviceIndex, // deviceIndex
|
||
|
||
pthread_handle->aScan_ptr, // aScan_ptr,
|
||
pthread_handle->output_ptr, // output_ptr,
|
||
pthread_handle->Duration_ptr, // Duration_ptr,
|
||
|
||
pthread_handle->receiver_index_ptr, // receiver_index_ptr ///<
|
||
pthread_handle->emitter_index_ptr, // emitter_index_ptr ///<
|
||
pthread_handle->receiver_list_ptr, // receiver_list_ptr ///<
|
||
pthread_handle->receiver_list_Size, // receiver_list_Size ///<
|
||
pthread_handle->emitter_list_ptr, // emitter_list_ptr ///<
|
||
pthread_handle->emitter_list_Size, // emitter_list_Size ///<
|
||
|
||
pthread_handle->speed_vec_ptr, // speed_vec_ptr,
|
||
|
||
pthread_handle->SOSGrid_XYZ, // SOSGrid_XYZ,
|
||
pthread_handle->sosOffset, // sosOffset, ///< Startpoint of SoSGrid
|
||
pthread_handle->SOS_RESOLUTION, // SOS_RESOLUTION, ///< Resolution of SoSGrid
|
||
|
||
pthread_handle->att_vec_ptr, // att_vec_ptr
|
||
|
||
pthread_handle->aScanCount, // aScanCount,
|
||
pthread_handle->aScanLength, // aScanLength
|
||
pthread_handle->res, // resolution => IMAGE_SIZE_XYZ,
|
||
pthread_handle->sampleRate, // sampleRate
|
||
pthread_handle->volposition, // => regionOfInterestOffset,
|
||
pthread_handle->inc, // IMAGE_RESOLUTION,
|
||
|
||
pthread_handle->fixedBlockDimensions, // fixedBlockDimensions
|
||
pthread_handle->debugMode, // debugMode
|
||
pthread_handle->debugModeParameter, // Parameter for DebugMode
|
||
|
||
pthread_handle->SOSMode_3DVolume,
|
||
pthread_handle->ATTMode_3DVolume,
|
||
|
||
pthread_handle->SAFT_MODE,
|
||
pthread_handle->SAFT_VARIANT,
|
||
pthread_handle->SAFT_VARIANT_Size,
|
||
pthread_handle->Abort_ptr // If there is not enough memory abort reconstruction. Wenn Fehler --> Abbruch;
|
||
|
||
);
|
||
|
||
saft.performReconstruction();
|
||
|
||
#ifdef debug_OutputSAFTHandlerThreadPerformance
|
||
CUDA_CHECK(cudaDeviceSynchronize());
|
||
gettimeofday(&stopSAFTHandler, NULL);
|
||
double diff_time = (double)((stopSAFTHandler.tv_sec * 1000000.0 + stopSAFTHandler.tv_usec) - (startSAFTHandler.tv_sec * 1000000.0 + startSAFTHandler.tv_usec));
|
||
printf ("{~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~}\n");
|
||
printf ("{ ~~~> } Device (%i) - SAFTHandler-Thread : %8.0f µs\n", pthread_handle->deviceId, diff_time);
|
||
|
||
double performance_Thread = (((double)pthread_handle->aScanCount * (double)pthread_handle->res.x * (double)pthread_handle->res.y * (double)pthread_handle->res.z) / diff_time )/ 1000.0;
|
||
printf ("{ ~~~> } Device (%i) - SAFTHandler-Thread Performance: %.6lf A-Scan * GVoxel/s\n", pthread_handle->deviceId, performance_Thread);
|
||
printf ("{~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~}\n");
|
||
#endif
|
||
|
||
//pthread_exit(NULL);
|
||
|
||
}
|
||
|
||
|
||
|
||
|
||
/**
|
||
Check amount of GPUs and divide Volume ins parts with same size
|
||
Abfrage der Anzahl an GPU-Devices und Einteilung des Volumens in möglichst gleichgroße Volumen in Z-Richtung (3D-Volumen) oder Y-Richtung (2D-Volumen) auf.
|
||
*/
|
||
|
||
void multithreaded_processing(
|
||
float *aScan_ptr, ///< AScan-Daten
|
||
double *output_ptr, ///< OutputDaten der Voxel
|
||
|
||
unsigned short *receiver_index_ptr, ///< Index Receiver per Ascan
|
||
unsigned short *emitter_index_ptr, ///< Index Emitter per Ascan
|
||
float *receiver_list_ptr, ///< Positionskoordinaten Receiver
|
||
int receiver_list_Size, ///< Menge an Receiver
|
||
float *emitter_list_ptr, ///< Positionskoordinaten Emitter
|
||
int emitter_list_Size, ///< Menge an Emitter
|
||
|
||
float *speed_vec_ptr, ///< SoS Daten im Blockmode oder als SoSGrid
|
||
|
||
int3 SOSGrid_XYZ, ///< Size of SoSGrid
|
||
float3 sosOffset, ///< Startpoint of SoSGrid
|
||
float SOS_RESOLUTION, ///< Aufloesung des SoSGrid
|
||
|
||
float *att_vec_ptr, ///< Attenuation Daten als ATTGrid
|
||
|
||
int aScanCount, ///< Anzahl der AScans die im Blockmode verarbeitet werden sollen
|
||
int aScanLength, ///< Laenge der AscanDaten (normal 3000)
|
||
float3 regionOfInterestOffset,
|
||
int3 IMAGE_SIZE_XYZ, ///< Groesse des Bildbereichs in Voxel
|
||
float IMAGE_RESOLUTION, ///< Aufloesung des Bildbereichs
|
||
float sampleRate, ///< Samplerate für AScans
|
||
int3 BlockDim_XYZ, ///< BlockDimension für GPU
|
||
double *Duration_ptr, ///< Rückgabepointer an Matlab für Laufzeit des SAFT-Kernels
|
||
int selectedNumberGPUs, ///< Anzahl der ausgewählten GPUs bzw. auf maximale Anzhal vorhandener begrenzt
|
||
int *enableGPUs_ptr, ///< Gibt an welche GPUs genutzt werden und welche nicht
|
||
float debugMode, ///< Ausgabe im Debugmode -> Verschiedene Werte können ausgegeben werden
|
||
float debugModeParameter, ///< Parameter der mit fuer Debugmode uebermittelt werden kann
|
||
|
||
bool SOSMode_3DVolume, ///< Wird 3D Volumen für SOS-Korrektur genutzt?
|
||
bool ATTMode_3DVolume, ///< Wird 3D Volumen für ATT-Korrektur genutzt?
|
||
|
||
int SAFT_MODE, ///< Modus für SAFT-Rekonstruktion
|
||
int *SAFT_VARIANT, ///< Verschiedene Parameter der Rekonstruktion
|
||
int SAFT_VARIANT_Size, ///< Menge der verschiedenen Parameter für Rekonstruktion
|
||
int *Abort_ptr ///< FehlerArray
|
||
)
|
||
{
|
||
#ifdef debug_OutputFunctions
|
||
printf( "==> multithreaded_processing - Start\n");
|
||
#endif
|
||
|
||
dim3 fixedBlockDimensions( // convert int3 to dim3
|
||
BlockDim_XYZ.x,
|
||
BlockDim_XYZ.y,
|
||
BlockDim_XYZ.z
|
||
);
|
||
|
||
// Divide workload and show Information ------------------------------------------------------------------------------------------------------------------
|
||
// Divide workload in pieces with the same size for all available GPUs.
|
||
// If the workload can not be divided in pieces with the same size, the last piece will be the one with little less workload.
|
||
|
||
// Testfall simuliert die mehrfache Anzahl an GPUs
|
||
int num_devices_factor = 1; // Vielfache an GPUs simulieren bzw. kleinere Pakete erzeugen
|
||
int num_workingPackages = selectedNumberGPUs*num_devices_factor;
|
||
|
||
float3 *position = (float3*)malloc(num_workingPackages * sizeof(float3));
|
||
int3 *resolution = (int3*) malloc(num_workingPackages * sizeof(int3));
|
||
int3 *volumeStartpoint = (int3*) malloc(num_workingPackages * sizeof(int3));
|
||
size_t *volumePtr = (size_t*)malloc(num_workingPackages * sizeof(size_t));
|
||
//int *Abort_ptr = (int*) malloc(num_workingPackages * sizeof(int));
|
||
|
||
float3 volposition = regionOfInterestOffset; // Uebergabe der Parameter
|
||
float inc = IMAGE_RESOLUTION; // koennte auch direkt umbenannt werden
|
||
|
||
#ifdef debug_OutputMultiGpu
|
||
printf("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n");
|
||
printf("~ Dividing workload between %i devices ~\n", num_devices );
|
||
printf("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n");
|
||
#endif
|
||
|
||
size_t volume_size = (size_t)IMAGE_SIZE_XYZ.x * (size_t)IMAGE_SIZE_XYZ.y * (size_t)IMAGE_SIZE_XYZ.z * (size_t)sizeof(double); // = Groesse des Outputvolumens in Byte
|
||
|
||
|
||
if (( IMAGE_SIZE_XYZ.y == 1 ) && ( IMAGE_SIZE_XYZ.z == 1 )){
|
||
selectedNumberGPUs = 1;
|
||
num_workingPackages = 1;
|
||
#ifdef debug_OutputMultiGpu
|
||
printf( "IMAGE_SIZE_XYZ.y = IMAGE_SIZE_XYZ.z = 1 ==> Use only one GPU\n", num_devices);
|
||
#endif
|
||
}
|
||
|
||
|
||
volumeStartpoint[0].x = 0;
|
||
volumeStartpoint[0].y = 0;
|
||
volumeStartpoint[0].z = 0;
|
||
|
||
position[0].x = volposition.x; // Startposition
|
||
position[0].y = volposition.y;
|
||
position[0].z = volposition.z;
|
||
|
||
std::vector<int> resolutionZs(num_workingPackages, IMAGE_SIZE_XYZ.z / num_workingPackages);
|
||
for (size_t i = 0; i < IMAGE_SIZE_XYZ.z % num_workingPackages; i++)
|
||
resolutionZs[i]++;
|
||
|
||
int i,j,k;
|
||
|
||
for (i=0; i < num_workingPackages; i++ ) {
|
||
#ifdef debug_OutputMultiGpu
|
||
printf("Working Package [%i]\n", i);
|
||
#endif
|
||
|
||
if ( IMAGE_SIZE_XYZ.z > 1 ) { // Divide in Z-Direction
|
||
#ifdef debug_OutputMultiGpu
|
||
printf( "( IMAGE_SIZE_XYZ.z > 1 ) => Divide through %i WP in Z-Direction for %i GPUs\n", num_workingPackages, selectedNumberGPUs);
|
||
#endif
|
||
|
||
resolution[i].x = IMAGE_SIZE_XYZ.x; // Initialization
|
||
resolution[i].y = IMAGE_SIZE_XYZ.y;
|
||
resolution[i].z = resolutionZs[i];
|
||
|
||
if (i>0)
|
||
{
|
||
volumeStartpoint[i].x = 0; // Koordinaten als Startpunkt für Layer in einzelnen GPUs
|
||
volumeStartpoint[i].y = 0;
|
||
volumeStartpoint[i].z = volumeStartpoint[i-1].z + resolution[i-1].z;
|
||
}
|
||
|
||
volumePtr[i] = (size_t)((size_t)resolution[0].x * (size_t)resolution[0].y * (size_t)volumeStartpoint[i].z); //Startpunkt der Speicherstellen fuer das Outputvolumen
|
||
|
||
#ifdef debug_OutputMultiGpu
|
||
printf(" - volumeStartpoint[%i] = [%d %d %d]\n", i, volumeStartpoint[i].x, volumeStartpoint[i].y, volumeStartpoint[i].z);
|
||
printf(" => volume_size[%i] = [%i %i %i]*double = %lld kB\n", i, resolution[i].x, resolution[i].y, resolution[i].z, ((uint64_t)resolution[i].x * (uint64_t)resolution[i].y * (uint64_t)resolution[i].z * (uint64_t)sizeof(double)) / ((uint64_t)1024));
|
||
printf(" => volumePtr[%i] = [%lld]\n", i, (uint64_t)volumePtr[i]);
|
||
#endif
|
||
|
||
}
|
||
else { // Divide in Y-Direction
|
||
#ifdef debug_OutputMultiGpu
|
||
printf( "( IMAGE_SIZE_XYZ.z = 1 ) => Divide through %i in Y-Direction for %i GPUs\n", num_workingPackages, selectedNumberGPUs);
|
||
#endif
|
||
|
||
resolution[i].x = IMAGE_SIZE_XYZ.x; // Initialization
|
||
resolution[i].z = IMAGE_SIZE_XYZ.z;
|
||
|
||
if ( IMAGE_SIZE_XYZ.y % num_workingPackages == 0) { // Volume is divisible
|
||
|
||
resolution[i].y = IMAGE_SIZE_XYZ.y / num_workingPackages;
|
||
}
|
||
else { // if not divisible,
|
||
|
||
if (i != (num_workingPackages - 1)){ // increment each GPU slice by one
|
||
resolution[i].y = IMAGE_SIZE_XYZ.y / num_workingPackages + 1;
|
||
}
|
||
else { // except the last one, which get the remaining Layers
|
||
resolution[i].y = IMAGE_SIZE_XYZ.y % resolution[0].y;
|
||
}
|
||
|
||
}
|
||
}
|
||
|
||
#ifdef debug_OutputMultiGpu
|
||
printf("WP[%i] on deviceId[%i]=deviceIndex[%i]\n", i, enableGPUs_ptr[(i % selectedNumberGPUs)], (i % selectedNumberGPUs));
|
||
printf(" - resolution[%i].x = %i\n", i, resolution[i].x );
|
||
printf(" - resolution[%i].y = %i\n", i, resolution[i].y );
|
||
printf(" - resolution[%i].z = %i\n", i, resolution[i].z );
|
||
#endif
|
||
|
||
position[i].x = volposition.x; // Startposition
|
||
position[i].y = volposition.y;
|
||
position[i].z = volposition.z;
|
||
|
||
if ( IMAGE_SIZE_XYZ.z > 1 ) position[i].z += i*inc*resolution[0].z; // Calculate Startpositions for the workload-pieces
|
||
else position[i].y += i*inc*resolution[0].y;
|
||
|
||
#ifdef debug_OutputMultiGpu
|
||
printf(" - position[%i].x = %f\n", i, position[i].x );
|
||
printf(" - position[%i].y = %f\n", i, position[i].y );
|
||
printf(" - position[%i].z = %f\n", i, position[i].z );
|
||
#endif
|
||
|
||
}
|
||
|
||
// Create one thread per GPU ------------------------------------------------------------------------------------------------------------------------
|
||
thread_handle *pthread_handle = (thread_handle*)malloc(selectedNumberGPUs * sizeof(thread_handle));
|
||
|
||
#ifdef debug_OutputMultiGpu
|
||
printf("-> pthread_handles for %i workingPackages created!\n", num_workingPackages);
|
||
printf("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n");
|
||
#endif
|
||
|
||
|
||
|
||
for ( i = 0; i < num_workingPackages; i++ ) {
|
||
|
||
//printf("num_workingPackages = %i !\n", num_workingPackages);
|
||
//printf("num_devices = %i\n", num_devices);
|
||
//printf("(i %% num_devices) = %i !\n", (i % num_devices));
|
||
//printf("deviceId = enableGPUs_ptr[%i] = %i !\n", (i % num_devices), enableGPUs_ptr[(i % num_devices)]);
|
||
//printf("deviceIndex = (i %% num_devices) = %i !\n", (i % num_devices));
|
||
|
||
// initialize control block
|
||
pthread_handle[i].deviceId = enableGPUs_ptr[(i % selectedNumberGPUs)]; // Hier DeviceID der GPU setzen
|
||
pthread_handle[i].deviceIndex = (i % selectedNumberGPUs);
|
||
|
||
pthread_handle[i].aScan_ptr = aScan_ptr;
|
||
if ( IMAGE_SIZE_XYZ.z > 1 )
|
||
pthread_handle[i].output_ptr = &output_ptr[volumePtr[i]]; // Startpoint for Outputvolume.
|
||
//volumePtr[i] = (size_t)(resolution[0].x * resolution[0].y * volumeStartpoint[i].z); //Startpunkt der Speicherstellen fuer das Outputvolumen
|
||
else
|
||
//pthread_handle[i].output_ptr = &output_ptr[ i * resolution[0].x * resolution[0].y * resolution[0].z ]; // Startpoint for Outputvolume. [0] da nur der letzte eine andere Gr<47><72>e hat.
|
||
pthread_handle[i].output_ptr = &output_ptr[ (size_t)resolution[0].x * (size_t)i * (size_t)resolution[0].y]; // Startpoint for Outputvolume. [0] da nur der letzte eine andere Groesse hat. Z spielt hier keine Rolle
|
||
|
||
|
||
pthread_handle[i].Duration_ptr = Duration_ptr;
|
||
pthread_handle[i].receiver_index_ptr = receiver_index_ptr;
|
||
pthread_handle[i].emitter_index_ptr = emitter_index_ptr;
|
||
pthread_handle[i].receiver_list_ptr = receiver_list_ptr;
|
||
pthread_handle[i].receiver_list_Size = receiver_list_Size;
|
||
pthread_handle[i].emitter_list_ptr = emitter_list_ptr;
|
||
pthread_handle[i].emitter_list_Size = emitter_list_Size;
|
||
pthread_handle[i].speed_vec_ptr = speed_vec_ptr;
|
||
pthread_handle[i].SOSGrid_XYZ = SOSGrid_XYZ;
|
||
pthread_handle[i].sosOffset = sosOffset;
|
||
pthread_handle[i].SOS_RESOLUTION = SOS_RESOLUTION;
|
||
pthread_handle[i].att_vec_ptr = att_vec_ptr;
|
||
|
||
pthread_handle[i].aScanCount = aScanCount;
|
||
pthread_handle[i].aScanLength = aScanLength,
|
||
pthread_handle[i].inc = IMAGE_RESOLUTION;
|
||
pthread_handle[i].res = resolution[i];
|
||
pthread_handle[i].sampleRate = sampleRate;
|
||
pthread_handle[i].volposition = position[i]; //regionOfInterestOffset
|
||
|
||
pthread_handle[i].num_threads = num_workingPackages;
|
||
|
||
pthread_handle[i].fixedBlockDimensions = fixedBlockDimensions; //
|
||
pthread_handle[i].debugMode = debugMode;
|
||
pthread_handle[i].debugModeParameter = debugModeParameter;
|
||
pthread_handle[i].SOSMode_3DVolume = SOSMode_3DVolume;
|
||
pthread_handle[i].ATTMode_3DVolume = ATTMode_3DVolume;
|
||
|
||
pthread_handle[i].SAFT_MODE = SAFT_MODE;
|
||
pthread_handle[i].SAFT_VARIANT = SAFT_VARIANT;
|
||
pthread_handle[i].SAFT_VARIANT_Size = SAFT_VARIANT_Size;
|
||
|
||
Abort_ptr[i] = 0; // Initialisieren mit kein Fehler
|
||
pthread_handle[i].Abort_ptr = &Abort_ptr[i];
|
||
|
||
}
|
||
|
||
auto startAllThreads = std::chrono::steady_clock::now();
|
||
double diff_time = 0.0;
|
||
|
||
#ifdef debug_OutputMultiGpu
|
||
printf("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n");
|
||
printf("pthread_handles for %i devices initialized!\n", selectedNumberGPUs)-1;
|
||
|
||
#endif
|
||
|
||
// Zeitmessung ueber alle Threads
|
||
//gettimeofday(&startAllThreads, NULL);
|
||
|
||
#ifdef debug_OutputMultiGpu
|
||
printf("-> pthread_handles for %i workingPackages initialized!\n", num_workingPackages);
|
||
printf("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n");
|
||
#endif
|
||
|
||
std::vector<std::future<void>> futures;
|
||
futures.resize(selectedNumberGPUs);
|
||
for ( j = 0; j < num_devices_factor; j++ ) {
|
||
|
||
|
||
for ( k = 0; k < selectedNumberGPUs; k++ ) {
|
||
//for ( i = 0; i < num_devices; i++ ) {
|
||
|
||
#ifdef debug_OutputMultiGpu
|
||
printf("]~~~~> create thread (%i) [WP (%i) on Device (%i)] and execute thread_function!\n", (j*selectedNumberGPUs+k), (j*selectedNumberGPUs+k), enableGPUs_ptr[k]);
|
||
#endif
|
||
//create thread and start execution by calling thread_function
|
||
//===========================================================================================
|
||
//pthread_create(&pthread_handle[(j*selectedNumberGPUs+k)].pthread, NULL, &thread_function, &pthread_handle[(j*selectedNumberGPUs+k)] );
|
||
//===========================================================================================
|
||
|
||
//new async threads
|
||
futures[k] = std::async(std::forward<std::function<void (void*)>>(thread_function), std::forward<void*>((void*)&pthread_handle[(j * selectedNumberGPUs + k)])); //forward used for perfect forwarding
|
||
|
||
}
|
||
|
||
//Synchronization and termination -------------------------------------------------------------------------------------------------------------------
|
||
for ( k = 0; k < selectedNumberGPUs; k++ ) {
|
||
|
||
//wait for threads to finish processing
|
||
//pthread_join(pthread_handle[(j*selectedNumberGPUs+k)].pthread, NULL);
|
||
#ifdef debug_OutputMultiGpu
|
||
printf("<~~~~[ joined thread (%i) [WP (%i) on Device (%i)]!\n", (j*selectedNumberGPUs+k), (j*selectedNumberGPUs+k), enableGPUs_ptr[k]);
|
||
#endif
|
||
//new async threads
|
||
futures[k].wait(); //advantage: async are packaged tasks after c++ with os handling, and consistency handling (if destructor is called, it executes task)
|
||
}
|
||
}
|
||
|
||
//gettimeofday(&stopAllThreads, NULL);
|
||
|
||
auto stopAllThreads = std::chrono::steady_clock::now();
|
||
diff_time = std::chrono::duration_cast<std::chrono::microseconds>(stopAllThreads - startAllThreads).count(); // total duration in µs
|
||
|
||
#ifdef debug_OutputPerformance
|
||
printf("\n# NUM_VOXEL = %i * %i * %i = %i\n", IMAGE_SIZE_XYZ.x, IMAGE_SIZE_XYZ.y, IMAGE_SIZE_XYZ.z, IMAGE_SIZE_XYZ.x * IMAGE_SIZE_XYZ.y * IMAGE_SIZE_XYZ.z );
|
||
printf( "# aScanCount = %i\n", aScanCount);
|
||
//printf("volposition.x = %f, .y=%f, .z=%f\n", volposition.x, volposition.y, volposition.z );
|
||
|
||
|
||
double performance_all = (((double)aScanCount * (double)IMAGE_SIZE_XYZ.x * (double)IMAGE_SIZE_XYZ.y * (double)IMAGE_SIZE_XYZ.z) / diff_time )/ 1000.0;
|
||
printf("# Duration of main Processing (all GPUs): %.3f us\n", diff_time);
|
||
printf("{~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~}\n");
|
||
printf("{~~~~~~~~~} Performance: %.6lf A-Scan * GVoxel/s {~~~~~~~~~}\n", performance_all);
|
||
printf("{~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~}\n");
|
||
#endif
|
||
|
||
Duration_ptr[0] = diff_time; // Return total duration in µs
|
||
|
||
|
||
// Speicher wieder freigeben
|
||
free (position);
|
||
free (resolution);
|
||
free (volumeStartpoint);
|
||
free (volumePtr);
|
||
//free (Abort_ptr);
|
||
free (pthread_handle);
|
||
|
||
#ifdef debug_OutputFunctions
|
||
printf( "<== multithreaded_processing - End\n");
|
||
#endif
|
||
}
|
||
|
||
|
||
|
||
|
||
/**
|
||
preintegrateAscans
|
||
Determine maximal SampleWidth, matching to the resolution to be used for reconstruction, and integrate A-scan over an window of this SampleWidth
|
||
*/
|
||
void preintegrateAscans(
|
||
float *aScan_ptr, ///< AScan-Daten
|
||
float *AscansOut_ptr, ///< AScan-OutputDaten fuer Testrueckgabe
|
||
float *speed_vec_ptr, ///< SoS Daten im Blockmode
|
||
int aScanCount, ///< Anzahl der AScans die im Blockmode verarbeitet werden sollen
|
||
int aScanLength, ///< Laenge der AscanDaten (normal 3000)
|
||
float IMAGE_RESOLUTION, ///< Aufloesung des Bildbereichs
|
||
float sampleRate, ///< Samplerate fuer AScans
|
||
float debugMode, ///< Ausgabe im Debugmode -> Verschiedene Werte können ausgegeben werden
|
||
float debugModeParameter ///< Parameter der mit fuer Debugmode uebermittelt werden kann
|
||
)
|
||
{
|
||
#ifdef debug_OutputFunctions
|
||
printf( "==> preintegrateAscans - Start\n");
|
||
#endif
|
||
|
||
float* AscanBuffer;
|
||
AscanBuffer = (float*)malloc(aScanLength * sizeof(float)); // Zweiten AScan-Buffer erzeugen mit Länge eines AScans (z.B. 3000)
|
||
|
||
|
||
// //Debuging: Test: Alle Daten uebertragen von Input-Ascans auf AscansOut_ptr[0..aScanLength] ueber AscanBuffer
|
||
// for (int i = 0; i<aScanLength; i++){
|
||
// printf( " -> i (%3i) \n",i);
|
||
// //AscansOut_ptr[i] = *(aScan_ptr+i);
|
||
// AscanBuffer[i] = aScan_ptr[i];
|
||
// AscansOut_ptr[i] = AscanBuffer[i];
|
||
// }
|
||
|
||
#ifdef preAscanIntegrationVersion1Michael // direkt übernommene Version von Michael Zapf
|
||
|
||
int windowWidth = 0;
|
||
|
||
/////////////////////// Integration über Anzahl "sampl" voxel ///////////////////////
|
||
unsigned int i,j,sampl = 0;
|
||
double i_buffer=0.0;
|
||
/////xsum ueber diagonale der voxellänge
|
||
sampl = (unsigned int)(ceil((float)1.7*(( IMAGE_RESOLUTION / speed_vec_ptr[0])/ (sampleRate)) /2)); //halbe Breite bestimmen
|
||
windowWidth = (int)2*IMAGE_RESOLUTION/sampleRate/speed_vec_ptr[0];
|
||
#ifdef debug_preAscanIntegration
|
||
printf( "~~~~~~~~~~~~~~~~~ V1 Michael ~~~~~~~~~~~~~~~~~~~~~~~~~~\n");
|
||
printf( " aScanCount = %5i\n",aScanCount);
|
||
printf( " aScanLength = %5i\n",aScanLength);
|
||
printf( " IMAGE_RESOLUTION = %12.10f\n",IMAGE_RESOLUTION);
|
||
printf( " speed_vec_ptr[%3i] = %12.10f\n",i,speed_vec_ptr[i]);
|
||
printf( " sampleRate = %12.10f\n\n",sampleRate);
|
||
|
||
printf( " => windowWidth = %5i (Ganze Breite) \n",windowWidth);
|
||
printf( " => sampl = %5i (Halbe Breite) \n",sampl);
|
||
printf( "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n");
|
||
#endif
|
||
|
||
printf( " => windowWidth = %5i (Ganze Breite) \n",windowWidth);
|
||
printf( " => sampl = %5i (Halbe Breite) \n",sampl);
|
||
|
||
|
||
// Zugriff über
|
||
// sec_buffer, das mit INTERP_RATIO gestreckt ist
|
||
// bufferz ist der vorher schon angelegte Buffer der gleichen Größe wie sec_buffer
|
||
// sampl = ist die Breite des Additionsfensters für die Addition
|
||
|
||
|
||
if ((debugMode == 0.0) && (debugModeParameter == 2.0)){
|
||
printf( "~~~~~~~~~~~~~~~~~~~~~ !!!Use Abs before Preintegrate Ascans!!! ~~~~~~~~~~~~~~~~~~~~~\n");
|
||
for (int j = 0; j<aScanCount; j++){
|
||
for (int i = 0; i<aScanLength; i++){
|
||
//printf( " i (%4i) = %6.3f ",i,AscanBuffer[i]);
|
||
//AscansOut_ptr[i] = *(aScan_ptr+i);
|
||
//AscanBuffer[i] = aScan_ptr[i];
|
||
aScan_ptr[j*aScanLength+i] = fabs(aScan_ptr[j*aScanLength+i]); // In AscanSpeicher schreiben
|
||
//AscansOut_ptr[j*aScanLength+i] = fabs(aScan_ptr[j*aScanLength+i]); // Ebenso nach Matlab zurueckgeben
|
||
}
|
||
}
|
||
}
|
||
else if ((debugMode == 0.0) && (debugModeParameter == 3.0)){
|
||
printf( "~~~~~~~~~~~~~~~~~~~~~ !!!Use sqrt before Preintegrate Ascans!!! ~~~~~~~~~~~~~~~~~~~~~\n");
|
||
for (int j = 0; j<aScanCount; j++){
|
||
for (int i = 0; i<aScanLength; i++){
|
||
//printf( " i (%4i) = %6.3f ",i,AscanBuffer[i]);
|
||
//AscansOut_ptr[i] = *(aScan_ptr+i);
|
||
//AscanBuffer[i] = aScan_ptr[i];
|
||
aScan_ptr[j*aScanLength+i] = pow(aScan_ptr[j*aScanLength+i],2); // In AscanSpeicher schreiben
|
||
//AscansOut_ptr[j*aScanLength+i] = fabs(aScan_ptr[j*aScanLength+i]); // Ebenso nach Matlab zurueckgeben
|
||
}
|
||
}
|
||
}
|
||
|
||
|
||
for (int j = 0; j<aScanCount; j++){
|
||
#ifdef debug_preAscanIntegration
|
||
printf( "\n -> Ascan (j=%3i) ==> j*aScanLength+0 = %i \n\n",j, j*aScanLength);
|
||
#endif
|
||
i_buffer = 0;
|
||
for (i=0;i<sampl;i++)
|
||
{ i_buffer = i_buffer + aScan_ptr[j*aScanLength+i]/(2*sampl);
|
||
#ifdef debug_preAscanIntegration
|
||
printf( " i_buffer = %6.3f ",i_buffer);
|
||
#endif
|
||
}
|
||
|
||
for (i=0;i<sampl;i++)
|
||
{ if (i+sampl<aScanLength){
|
||
i_buffer = i_buffer +aScan_ptr[j*aScanLength+i+sampl]/(2*sampl);
|
||
AscanBuffer[i] = i_buffer;}
|
||
//printf( " i_buffer = %6.3f ",i_buffer);
|
||
}
|
||
|
||
for (i=sampl;i<(aScanLength)-sampl;i++)
|
||
{ if (i+sampl<aScanLength){
|
||
i_buffer = i_buffer + aScan_ptr[j*aScanLength+i+sampl]/(2*sampl) - aScan_ptr[j*aScanLength+i-sampl]/(2*sampl);
|
||
AscanBuffer[i] = i_buffer;}
|
||
}
|
||
|
||
for (i=aScanLength-sampl;i<aScanLength;i++)
|
||
{
|
||
if (i-sampl>=0){
|
||
i_buffer = i_buffer - aScan_ptr[j*aScanLength+i-sampl]/(2*sampl);
|
||
//AscanBuffer[i] = i_buffer / (sampl-(aScanLength)-i); // Michaels alter Code
|
||
AscanBuffer[i] = i_buffer ; // passt so besser zu Anfangsstrategie
|
||
}
|
||
}
|
||
|
||
//Alle Daten uebertragen von AscanBuffer auf AscansOut_ptr[0..aScanLength]
|
||
for (int i = 0; i<aScanLength; i++){
|
||
//printf( " i (%4i) = %6.3f ",i,AscanBuffer[i]);
|
||
//AscansOut_ptr[i] = *(aScan_ptr+i);
|
||
//AscanBuffer[i] = aScan_ptr[i];
|
||
aScan_ptr[j*aScanLength+i] = AscanBuffer[i]; // In AscanSpeicher schreiben
|
||
AscansOut_ptr[j*aScanLength+i] = AscanBuffer[i]; // Ebenso nach Matlab zurueckgeben
|
||
}
|
||
}
|
||
#endif
|
||
|
||
#ifdef preAscanIntegrationVersion2Ernst
|
||
|
||
int i, i_start, i_end = 0;
|
||
float nSample = 0.0f;
|
||
|
||
float windowWidth = 0.0f;
|
||
float windowWidthHalf = 0.0f;
|
||
float windowWidthHalf_minus1 = 0.0f;
|
||
float windowSum = 0.0f;
|
||
|
||
// maximale Schrittweite ueber einen Voxel = sqr(3)*2*IMAGE_RESOLUTION*fs/c // sqr(3)*2 = 3.464101615
|
||
// width = ( ceil( 1.7*(( resz / speedz)/ (timeintz/INTERP_RATIO)) )); % Breite berechnen
|
||
windowWidth = (float)3.464101615 * IMAGE_RESOLUTION / sampleRate / speed_vec_ptr[0];
|
||
windowWidthHalf = (float)1.732050808 * IMAGE_RESOLUTION / sampleRate / speed_vec_ptr[0]; //halbe Fenster Breite
|
||
|
||
|
||
#ifdef debug_preAscanIntegration
|
||
printf( "~~~~~~~~~~~~~~~~~ V2 Ernst ~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n");
|
||
printf( " aScanCount = %5i\n",aScanCount);
|
||
printf( " aScanLength = %5i\n",aScanLength);
|
||
printf( " IMAGE_RESOLUTION = %12.10f\n",IMAGE_RESOLUTION);
|
||
printf( " speed_vec_ptr[%3i] = %12.10f\n",i,speed_vec_ptr[i]);
|
||
printf( " sampleRate = %12.10f\n\n",sampleRate);
|
||
|
||
printf( " => windowWidth = %5.2f (Ganze Breite) \n",windowWidth);
|
||
printf( " => windowWidthHalf = %5.2f (Halbe Breite) \n",windowWidthHalf);
|
||
printf( "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n");
|
||
|
||
printf ("\n 2. Testvariante mit Start und Endpkt-berechnung --> Neue Implementierung mit Zetrum in der Mitte \n#######################################################################################\n");
|
||
#endif
|
||
|
||
//printf( " => windowWidth = %5i (Ganze Breite) \n",(int)ceil(windowWidth));
|
||
//printf( " => windowWidthHalf = %5.2f (Halbe Breite) \n",windowWidthHalf);
|
||
|
||
for (int j = 0; j<aScanCount; j++){ // über alle A-scans gehen.
|
||
|
||
if ((int)ceil(windowWidth)%2 == 1){ // Uneven / Ungerade
|
||
// // Bei ungeraden Breiten kann symmetrisch sampl = widthHalf_minus1 = floor((ceil(width)-1)/2) genutzt werden
|
||
|
||
windowWidthHalf_minus1 = floor(( ceil(windowWidth)-1)/2);
|
||
#ifdef debug_preAscanIntegration
|
||
printf( " ==> Ungerade ==> widthHalf_minus1 = %i \n",(int)windowWidthHalf_minus1);
|
||
#endif
|
||
|
||
for (int i = 0; i < aScanLength; i++){ // über gesamte Breite des A-scans gehen.
|
||
i_start = i - (int)windowWidthHalf_minus1;
|
||
i_end = i + (int)windowWidthHalf_minus1;
|
||
|
||
// Grenzen einhalten
|
||
if (i_start < 0) i_start=0;
|
||
if (i_end > aScanLength-1) i_end =aScanLength-1;
|
||
|
||
// Anzahl Sample bestimmen
|
||
nSample = i_end-i_start+1; // +1 da erstes Element auch dazugehört.
|
||
|
||
windowSum = 0;
|
||
for (int k = i_start; k <= i_end; k++){
|
||
#ifdef debug_preAscanIntegration
|
||
if ((i >= DebugSammleMin) && (i<=DebugSammleMax)){
|
||
printf("Index %3i: windowSum (%3i:%3i) += aScan_ptr(k=%i) %07.5f \n", i, i_start, i_end, k, windowSum);
|
||
}
|
||
#endif
|
||
windowSum += aScan_ptr[j*aScanLength+k];
|
||
}
|
||
|
||
// geteilt durch nur die genutzten Samples
|
||
//AscanBuffer[i] = windowSum/ceil(nSample);
|
||
//Michael teilt durch die gesamte Breite an Samples,auch wenn sie an dem
|
||
//Rand nicht vorhanden/bzw = 0 sind. --> Abflachung am Rand
|
||
AscanBuffer[i] = windowSum/ceil(windowWidth);
|
||
|
||
#ifdef debug_preAscanIntegration
|
||
//if ((i >= 1720) && (i<=1730)){
|
||
if ((i >= DebugSammleMin) && (i<=DebugSammleMax)){
|
||
printf("Index %3i: Summe über %5i Samples (%3i:%3i) = %07.5f \n", i, (int)nSample, i_start, i_end, AscanBuffer[i]);
|
||
}
|
||
#endif
|
||
}
|
||
|
||
|
||
}
|
||
else if ((int)ceil(windowWidth)%2 == 0){ // Even / Gerade
|
||
|
||
// Bei geraden Breiten symmetrisch mit den beiden äußeren Samplewerten zu je 1/2 gewichten.
|
||
|
||
windowWidthHalf_minus1 = floor(( ceil(windowWidth)-1)/2);
|
||
#ifdef debug_preAscanIntegration
|
||
printf( " ==> Gerade ==> widthHalf_minus1 = %i \n", (int)windowWidthHalf_minus1);
|
||
#endif
|
||
|
||
for (int i = 0; i < aScanLength; i++){ // über gesamte Breite des A-scans gehen.
|
||
i_start = i - (int)windowWidthHalf_minus1;
|
||
i_end = i + (int)windowWidthHalf_minus1;
|
||
|
||
// Grenzen einhalten
|
||
if (i_start < 0) i_start=0;
|
||
if (i_end > aScanLength-1) i_end =aScanLength-1;
|
||
|
||
// Anzahl Sample bestimmen
|
||
nSample = i_end-i_start+1; // +1 da erstes Element auch dazugehört.
|
||
|
||
windowSum = 0;
|
||
for (int k = i_start; k <= i_end; k++){
|
||
#ifdef debug_preAscanIntegration
|
||
//if ((i >= 1720) && (i<=1730)){
|
||
if ((i >= DebugSammleMin) && (i<=DebugSammleMax)){
|
||
printf("Index %3i: windowSum (%3i:%3i) += aScan_ptr(k=%i) %07.5f \n", i, i_start, i_end, k, windowSum);
|
||
}
|
||
#endif
|
||
windowSum += aScan_ptr[j*aScanLength+k];
|
||
}
|
||
|
||
// geteilt wird durch nur die genutzten Samples
|
||
//AscanBuffer[i] = windowSum/ceil(nSample);
|
||
//Michael teilt durch die gesamte Breite an Samples,auch wenn sie an dem
|
||
//Rand nicht vorhanden/bzw = 0 sind.--> Abflachung am Rand
|
||
windowSum = windowSum/ceil(windowWidth);
|
||
|
||
// Halbe Samplewerte an Grenzen miteinberechen aber absolute Grenzen einhalten
|
||
if (i_start >= 0){
|
||
windowSum = windowSum + aScan_ptr[j*aScanLength+(i_start-1)]/(2*ceil(windowWidth)); // Linken Nachbarn zu 1/2 mit dazunehmen
|
||
nSample = nSample + 0.5;
|
||
}
|
||
if (i_end < aScanLength-1){
|
||
windowSum = windowSum + aScan_ptr[j*aScanLength+(i_end+1)]/(2*ceil(windowWidth)); // Rechten Nachbarn zu 1/2 mit dazunehmen
|
||
nSample = nSample + 0.5;
|
||
}
|
||
|
||
AscanBuffer[i] = windowSum;
|
||
#ifdef debug_preAscanIntegration
|
||
//if ((i >= 1720) && (i<=1730)){
|
||
if ((i >= DebugSammleMin) && (i<=DebugSammleMax)){
|
||
printf("Index %3i: Summe über %5.1f Samples (%3i:%3i) = %07.5f \n", i, nSample, i_start, i_end, AscanBuffer[i]);
|
||
}
|
||
#endif
|
||
|
||
}
|
||
|
||
}
|
||
|
||
// Transfer Data from Buffer to Memory regions
|
||
for (int i = 0; i < aScanLength; i++){
|
||
//printf( " i (%4i) = %6.3f ",i,AscanBuffer[i]);
|
||
//AscansOut_ptr[i] = *(aScan_ptr+i);
|
||
//AscanBuffer[i] = aScan_ptr[i];
|
||
aScan_ptr[j*aScanLength+i] = AscanBuffer[i]; // Write in A-scans Memory
|
||
AscansOut_ptr[j*aScanLength+i] = AscanBuffer[i]; // Also write back for Matlab
|
||
}
|
||
|
||
}
|
||
|
||
#ifdef debug_preAscanIntegration
|
||
printf ("#######################################################################################\n\n");
|
||
#endif
|
||
|
||
|
||
|
||
#endif
|
||
|
||
#ifdef debug_preAscanIntegration
|
||
printf( " \n\n\n\n ");
|
||
#ifdef debug_OutputVariables
|
||
// Ausgabe einzelner AScan-Samples der <20>bergabewerte
|
||
//float *aScan_ptr;
|
||
//aScan_ptr = (float*)mxGetPr(AScan);
|
||
//int aScanSampleCountPerReceiver=3000;
|
||
//printf( " -> AScan (%3i) [ 0: 2] = [%f %f %f]\n",0 , *(aScan_ptr+0), *(aScan_ptr+1), *(aScan_ptr+2));
|
||
printf( " -> AScan (%3i) [1726:1729] = [%f %f %f %f]\n",0 , *(aScan_ptr+1726),*(aScan_ptr+1726+1), *(aScan_ptr+1726+2), *(aScan_ptr+1726+3));
|
||
// Ale drei Moeglichkeiten koennen angewandt werden
|
||
//*(AscansOut_ptr+1726) = *(aScan_ptr+1726);
|
||
//*(AscansOut_ptr+1726+1) = 0;
|
||
//AscansOut_ptr[1726+2] = (float)*(aScan_ptr+1726);
|
||
printf( " -> AScan (%3i) [1726:1729] = [%f %f %f %f]\n",0 , *(aScan_ptr+1726),*(aScan_ptr+1726+1), *(aScan_ptr+1726+2), *(aScan_ptr+1726+3));
|
||
//printf( " -> AScan (%3i) [2997:2999] = [%f %f %f]\n",0 , *(aScan_ptr+2997), *(aScan_ptr+2998), *(aScan_ptr+2999));
|
||
//printf( " -> AScan (%3i) [ 0: 2] = [%f %f %f]\n",1 , aScan_ptr[3000], aScan_ptr[3001], aScan_ptr[3002]);
|
||
//printf( " -> AScan (%3i) [2997:2999] = [%f %f %f]\n",1 , aScan_ptr[5997], aScan_ptr[5998], aScan_ptr[5999]);
|
||
//printf( " -> AScan (%3i) [ 0: 2] = [%f %f %f]\n",156 , aScan_ptr[0+(156*aScanSampleCountPerReceiver)], aScan_ptr[1+(156*aScanSampleCountPerReceiver)], aScan_ptr[2+(156*aScanSampleCountPerReceiver)]);
|
||
//printf( " -> AScan (%3i) [2997:2999] = [%f %f %f]\n",156 , aScan_ptr[2997+(156*aScanSampleCountPerReceiver)], aScan_ptr[2998+(156*aScanSampleCountPerReceiver)], aScan_ptr[2999+(156*aScanSampleCountPerReceiver)]);
|
||
#endif
|
||
#endif
|
||
|
||
free(AscanBuffer);
|
||
|
||
#ifdef debug_OutputFunctions
|
||
printf( "<== preintegrateAscans - End\n");
|
||
#endif
|
||
}
|
||
|
||
const size_t* GetDimensions(const Matrix_t& matrix)
|
||
{
|
||
return matrix.Dims;
|
||
}
|
||
const void* GetPr(const Matrix_t& matrix){
|
||
return matrix.Data;
|
||
}
|
||
|
||
size_t GetNumberOfDimensions(const Matrix_t& matrix){
|
||
return matrix.NumberOfDims;
|
||
}
|
||
|
||
size_t GetNumberOfElements(const Matrix_t& matrix){
|
||
return matrix.DataSize;
|
||
}
|
||
|
||
Matrix_t SAFT_TOFI(std::vector<Matrix_t>& params){
|
||
#ifdef debug_OutputFunctions
|
||
printf( "==> mexFunction - Start\n");
|
||
#endif
|
||
|
||
#ifdef debug_OutputFormat_German
|
||
setlocale(LC_NUMERIC, "de_DE"); // German Format , instead . for numbers
|
||
#endif
|
||
|
||
size_t AScan_Nx, AScan_Mx,
|
||
pix_vect_Nx, pix_vect_Mx,
|
||
receiver_index_Nx, receiver_index_Mx,
|
||
emitter_index_Nx, emitter_index_Mx,
|
||
receiver_list_Nx, receiver_list_Mx,
|
||
emitter_list_Nx, emitter_list_Mx,
|
||
|
||
SAFT_mode_Nx, SAFT_mode_Mx,
|
||
SAFT_variant_Nx, SAFT_variant_Mx,
|
||
|
||
speed_Nx, speed_Mx,
|
||
SOSGrid_Xx, SOSGrid_Yx, SOSGrid_Zx,
|
||
sos_startPoint_Nx, sos_startPoint_Mx,
|
||
sos_res_Nx, sos_res_Mx,
|
||
|
||
attVolume_Nx,attVolume_Mx,
|
||
ATTGrid_Xx, ATTGrid_Yx, ATTGrid_Zx,
|
||
|
||
res_Nx, res_Mx,
|
||
timeint_Nx, timeint_Mx,
|
||
IMAGE_XYZ_Nx, IMAGE_XYZ_Mx,
|
||
IMAGE_SUM_Xx, IMAGE_SUM_Yx, IMAGE_SUM_Zx,
|
||
BlockDim_XYZ_Nx, BlockDim_XYZ_Mx,
|
||
|
||
GPUs_Nx, GPUs_Mx,
|
||
dbgMode_Nx,dbgMode_Mx
|
||
;
|
||
|
||
int aScanCount;
|
||
int aScanLength;
|
||
float *aScan_ptr;
|
||
int3 IMAGE_SIZE_XYZ;
|
||
int3 BlockDim_XYZ;
|
||
bool SOSMode_3DVolume; // Mode of SOS use: Grid (1) or Block (0) --> SOSGrid_XYZ, SOS_RESOLUTION, sosOffset not neccessary
|
||
bool ATTMode_3DVolume; // Mode of SOS use: Grid (1) or Block (0) --> ATTGrid_XYZ not neccessary
|
||
int SAFT_MODE;
|
||
int *SAFT_VARIANT; // Variances of SAFT
|
||
int SAFT_VARIANT_Size; // Size of Varainces
|
||
int3 SOSGrid_XYZ; // Size of SOSGrid
|
||
int3 ATTGrid_XYZ; // Size of ATTGrid
|
||
float3 regionOfInterestOffset; // Startpoint
|
||
float3 sosOffset; // Startpoint SoS
|
||
float IMAGE_RESOLUTION; // Aufloesung
|
||
float SOS_RESOLUTION; // Aufloesung
|
||
float sampleRate; // Samplerate für AScans
|
||
int selectedNumberGPUs; // Anzahl der genutzten GPUs durch uebergebene Groesse der Ausgewaehlten GPUs
|
||
float debugMode;
|
||
float debugModeParameter;
|
||
|
||
// check number of arguments... // Anzahl der Argumente überprüfen
|
||
#ifdef debug_OutputParameter
|
||
printf( "Number of Arguments: Input(nrhs) = %i Output(nlhs) = %i\n", nrhs, nlhs);
|
||
#endif
|
||
|
||
if (params.size() != 19)
|
||
{
|
||
printf( " \n");
|
||
printf( " Inputparameter \n");
|
||
printf( " In[n] Meaning [Row N x Col M] Type\n");
|
||
printf( " =============================================================================\n");
|
||
printf( " 1-prhs[0] AScan-Data [3000?xnAscans] single\n");
|
||
printf( " 2-prhs[1] IMAGE_STARTPOINT_S [1x3] single\n");
|
||
|
||
printf( " 3-prhs[2] receiver_index [1xnAscans] uint16\n");
|
||
printf( " 4-prhs[3] emitter_index [1xnAscans] uint16\n");
|
||
printf( " 5-prhs[4] receiver_list [3xnReceiver] single\n");
|
||
printf( " 6-prhs[5] emitter_list [3xnEmitter] single\n");
|
||
|
||
printf( " 7-prhs[6] SAFT_mode [1x1] uint32\n");
|
||
printf( " 8-prhs[7] SAFT_variant [1x6] uint32\n");
|
||
|
||
printf( " Standard: [1 1 1 1 0 0] \n");
|
||
printf( " -> Ascan Preintegration\n");
|
||
printf( " -> Ascan Interpolation\n");
|
||
printf( " -> Preprocessing SOS&ATT 3D Volume Interpolation\n");
|
||
printf( " -> Reconstruction SOS&ATT 3D Volume Interpolation\n");
|
||
printf( " -> not yet\n");
|
||
printf( " -> not yet\n");
|
||
|
||
printf( " 9-prhs[8] SOSVolume [SOS_XxYxZ] single in m/s\n");
|
||
printf( " 10-prhs[9] SOS_STARTPOINT_S [1x3] single\n");
|
||
printf( " 11-prhs[10] SOS_RESOLUTION_S [1x1] single\n");
|
||
|
||
printf( " 12-prhs[11] ATTVolume [ATT_XxYxZ] single in dB/cm\n");
|
||
|
||
printf( " 13-prhs[12] IMAGE_RESOLUTION_S [1x1] single\n");
|
||
printf( " 14-prhs[13] TimeInterval_S [1x1] single\n");
|
||
printf( " 15-prhs[14] IMAGE_XYZ [1x3] uint32\n");
|
||
printf( " 16-prhs[15] IMAGE_SUM [Output_XxYxZ] double\n");
|
||
|
||
printf( " 17-prhs[16] BlockDim_XYZ (GPU) [1x3] uint32\n");
|
||
printf( " 18-prhs[17] GPUs (DeviceNr GPU) [1xn] uint32\n");
|
||
|
||
printf( " 19-prhs[18] dbgMode,dbgModeParam [1x2] single\n");
|
||
|
||
printf( " ==============================================================================\n");
|
||
printf( "\n");
|
||
printf( " Outputparameter \n");
|
||
printf( " Out[n] Meaning \n");
|
||
printf( " ================================================================================================= \n");
|
||
printf( " plhs[0] = Output_Voxels = mxCreateNumericArray ( [IMAGE_XYZ] , mxDOUBLE_CLASS, mxREAL); \n");
|
||
printf( " plhs[1] = Duration = mxCreateDoubleMatrix ( [nGPUs+1, 1] , mxREAL); \n");
|
||
printf( " plhs[2] = Output_Ascans = mxCreateNumericMatrix( [3000?,nAscans], mxSINGLE_CLASS, mxREAL); \n");
|
||
printf( " ================================================================================================= \n");
|
||
printf("Wrong number of input arguments. Should be 19.");
|
||
}
|
||
|
||
// assign input arguments... // Bestimme die Eingangswerte
|
||
const Matrix_t&AScan = params [0]; // AScan-Data
|
||
const Matrix_t&pix_vect = params [1]; // Image Startpoint (IMAGE_STARTPOINT_S)
|
||
|
||
const Matrix_t&receiver_index = params [2]; // Index Data for Receiver-Position Data
|
||
const Matrix_t&emitter_index = params [3]; // Index Data for Emitter-Position Data
|
||
const Matrix_t&receiver_list = params [4]; // Assignment Index to Receiver-Position Data
|
||
const Matrix_t&emitter_list = params [5]; // Assignment Index to Emitter-Position Data
|
||
|
||
const Matrix_t&SAFT_mode = params [6]; // SOS?, ATT?
|
||
const Matrix_t&SAFT_variant = params [7]; // Differnt Mode-Parameter for Reconstruction
|
||
|
||
const Matrix_t&speed = params [8]; // Speed of Sound Data (Single, SoS-Grid)
|
||
const Matrix_t&sos_startPoint = params [9]; // Startpoint of Speed of Sound Grid
|
||
const Matrix_t&sos_res = params [10]; // SoS Grid Resolution
|
||
|
||
const Matrix_t&attVolume = params [11]; // Attenuation Data (Single, SoS-Grid)
|
||
|
||
const Matrix_t&res = params [12]; // Output Volume Resolution
|
||
const Matrix_t&timeint = params [13]; // 1/Sample-Rate
|
||
const Matrix_t&IMAGE_XYZ = params [14]; // Output Volume Size XYZ
|
||
const Matrix_t&IMAGE_SUM = params [15]; // Volume from previous Call
|
||
|
||
const Matrix_t&BlockDim = params [16]; // Block Dimension to use for GPU
|
||
const Matrix_t&GPUs = params [17]; // Welche GPUs sollen genutzt werden?
|
||
|
||
const Matrix_t&dbgMode = params [18]; // DebugMode and DebugMode-Parameter
|
||
|
||
|
||
// check data types and assign variables... // Ueberpruefe die Datentypen und lege Variablen fest
|
||
|
||
//================================================================================================================ Check Dimensions of input parameters
|
||
#ifdef debug_OutputParameter
|
||
printf( "\n");
|
||
printf( "In[n] Meaning [Row N x Col M] \n");
|
||
printf( "=================================================================================================\n");
|
||
#endif
|
||
|
||
|
||
//====================================================================== 1.Input Parameter - Check AScan
|
||
AScan_Nx = GetDimensions(AScan)[0]; // Reihen N ermitteln
|
||
AScan_Mx = GetDimensions(AScan)[1]; // Spalten M ermitteln
|
||
|
||
aScanCount = AScan_Mx;
|
||
aScanLength = AScan_Nx;
|
||
|
||
|
||
#ifdef debug_OutputParameter
|
||
printf( "prhs[0] Ascan-Data [%ix%i]\n", AScan_Nx , AScan_Mx);
|
||
#endif
|
||
|
||
//printf( "mxGetNumberOfDimensions(AScan)=%i\n", mxGetNumberOfDimensions(AScan));
|
||
if ((aScanCount > 65535)) // new 2019: increasing the limit of the A-Scan block size. however this is limited by the datatype of unsigned short which is used for a pointer.
|
||
{
|
||
printf( " -> AScanBlock size = %i\n", aScanCount);
|
||
printf( "AScanBlock size might be too large (=> 2^16)!!!");
|
||
}
|
||
if ((aScanCount > 1))
|
||
#ifdef debug_OutputParameter
|
||
printf( " -> Blockmode with [%i x %i]\n", AScan_Nx, aScanCount);
|
||
#endif
|
||
// if(!(mxIsSingle(AScan)))
|
||
// printf("AScans must be Single");
|
||
|
||
aScan_ptr = (float*)GetPr(AScan);
|
||
|
||
#ifdef debug_OutputVariables
|
||
// Ausgabe einzelner AScan-Samples der übergabewerte
|
||
//int aScanSampleCountPerReceiver=3000;
|
||
//printf( " -> AScan (%3i) [ 0: 2] = [%f %f %f]\n",0 , *(aScan_ptr+0), *(aScan_ptr+1), *(aScan_ptr+2));
|
||
//printf( " -> AScan (%3i) [2997:2999] = [%f %f %f]\n",0 , *(aScan_ptr+2997), *(aScan_ptr+2998), *(aScan_ptr+2999));
|
||
//printf( " -> AScan (%3i) [ 0: 2] = [%f %f %f]\n",1 , aScan_ptr[3000], aScan_ptr[3001], aScan_ptr[3002]);
|
||
//printf( " -> AScan (%3i) [2997:2999] = [%f %f %f]\n",1 , aScan_ptr[5997], aScan_ptr[5998], aScan_ptr[5999]);
|
||
//printf( " -> AScan (%3i) [ 0: 2] = [%f %f %f]\n",156 , aScan_ptr[0+(156*aScanSampleCountPerReceiver)], aScan_ptr[1+(156*aScanSampleCountPerReceiver)], aScan_ptr[2+(156*aScanSampleCountPerReceiver)]);
|
||
//printf( " -> AScan (%3i) [2997:2999] = [%f %f %f]\n",156 , aScan_ptr[2997+(156*aScanSampleCountPerReceiver)], aScan_ptr[2998+(156*aScanSampleCountPerReceiver)], aScan_ptr[2999+(156*aScanSampleCountPerReceiver)]);
|
||
#endif
|
||
|
||
//====================================================================== 2.Input Parameter - Check IMAGE_STARTPOINT_S / pix_vect
|
||
pix_vect_Nx = GetDimensions(pix_vect)[0]; // Reihen N ermitteln
|
||
pix_vect_Mx = GetDimensions(pix_vect)[1]; // Spalten M ermitteln
|
||
|
||
regionOfInterestOffset.x = *((float*)GetPr(pix_vect));
|
||
regionOfInterestOffset.y = *((float*)GetPr(pix_vect)+1);
|
||
regionOfInterestOffset.z = *((float*)GetPr(pix_vect)+2);
|
||
|
||
#ifdef debug_OutputParameter
|
||
printf( "prhs[1] IMAGE_STARTPOINT_S [%ix%i] = [%f x %f x %f]\n", pix_vect_Nx , pix_vect_Mx, regionOfInterestOffset.x, regionOfInterestOffset.y, regionOfInterestOffset.z);
|
||
#endif
|
||
|
||
if (!(pix_vect_Nx == 1)||!(pix_vect_Mx == 3))
|
||
printf(" -> Dimension of IMAGE_STARTPOINT_S must be [1 x 3]");
|
||
if ((pix_vect_Nx > 1))
|
||
printf( " -> No Blockmode [%i x 3] allowed for IMAGE_STARTPOINT_S\n", pix_vect_Nx);
|
||
// if(!(mxIsSingle(pix_vect)))
|
||
// printf(" -> IMAGE_STARTPOINT_S must be Single");
|
||
|
||
//====================================================================== 3.Input Parameter - Check Receiver Index
|
||
receiver_index_Nx = GetDimensions(receiver_index)[0]; // Reihen N ermitteln
|
||
receiver_index_Mx = GetDimensions(receiver_index)[1]; // Spalten M ermitteln
|
||
|
||
#ifdef debug_OutputParameter
|
||
printf( "prhs[2] receiver_index [%ix%i]\n", receiver_index_Nx , receiver_index_Mx);
|
||
#endif
|
||
|
||
if (!(receiver_index_Nx == 1))
|
||
printf(" -> Dimension of receiver_index must be [1 x M]");
|
||
if (!(receiver_index_Mx == aScanCount)){
|
||
printf (" -> aScanCount(%i)!= M(%i)\n", aScanCount, receiver_index_Mx);
|
||
printf(" -> Dimension of receiver_index has different size as Ascan-Data\n");
|
||
}
|
||
// if (!(receiver_index_Mx == 1))
|
||
#ifdef debug_OutputParameter
|
||
printf( " -> Blockmode with [1 x %i]\n", receiver_index_Mx);
|
||
#endif
|
||
// if(!(mxIsUint16(receiver_index)))
|
||
// printf(" -> receiver_index must be Uint16");
|
||
|
||
|
||
|
||
// Ausgabe einzelner Geometriedaten der Uebergabewerte mit verschiedenen Varianten
|
||
unsigned short *receiver_index_ptr;
|
||
receiver_index_ptr = (unsigned short*)GetPr(receiver_index);
|
||
#ifdef debug_OutputVariables
|
||
|
||
if ((receiver_index_Mx > 1))
|
||
{
|
||
printf( " -> receiver_index: %i = [%i]\n",1 , *(receiver_index_ptr+0));
|
||
printf( " -> receiver_index: %i = [%i]\n",2 , *(receiver_index_ptr+1));
|
||
printf( " -> receiver_index: %i = [%i]\n",3 , *(receiver_index_ptr+2));
|
||
}
|
||
else
|
||
printf( " -> receiver_index: %i = [%i]\n",1 , *(receiver_index_ptr+0));
|
||
#endif
|
||
|
||
//====================================================================== 4.Input Parameter - Check Emitter Index
|
||
emitter_index_Nx = GetDimensions(emitter_index)[0]; // Reihen N ermitteln
|
||
emitter_index_Mx = GetDimensions(emitter_index)[1]; // Spalten M ermitteln
|
||
|
||
#ifdef debug_OutputParameter
|
||
printf( "prhs[3] emitter_index [%ix%i]\n", emitter_index_Nx , emitter_index_Mx);
|
||
#endif
|
||
|
||
if (!(emitter_index_Nx == 1))
|
||
printf(" -> Dimension of emitter_index must be [1 x M]");
|
||
if (!(emitter_index_Mx == aScanCount)){
|
||
printf (" -> aScanCount(%i)!= M(%i)\n", aScanCount, emitter_index_Mx);
|
||
printf(" -> Dimension of emitter_index has different size as Ascan-Data\n");
|
||
}
|
||
// if (!(emitter_index_Mx == 1))
|
||
#ifdef debug_OutputParameter
|
||
printf( " -> Blockmode with [1 x %i]\n", emitter_index_Mx);
|
||
#endif
|
||
// if(!(mxIsUint16(emitter_index)))
|
||
// printf(" -> emitter_index must be Uint16");
|
||
|
||
// Ausgabe einzelner Geometriedaten der Uebergabewerte mit verschiedenen Varianten
|
||
unsigned short * emitter_index_ptr = (unsigned short*)GetPr(emitter_index);
|
||
|
||
#ifdef debug_OutputVariables
|
||
|
||
if ((receiver_index_Mx > 1))
|
||
{
|
||
printf( " -> emitter_index: %i = [%i]\n",1 , *(emitter_index_ptr+0));
|
||
printf( " -> emitter_index: %i = [%i]\n",2 , *(emitter_index_ptr+1));
|
||
printf( " -> emitter_index: %i = [%i]\n",3 , *(emitter_index_ptr+2));
|
||
}
|
||
else
|
||
printf( " -> emitter_index: %i = [%i]\n",1 , *(emitter_index_ptr+0));
|
||
#endif
|
||
|
||
|
||
|
||
//====================================================================== 5.Input Parameter - Check receiver_list
|
||
receiver_list_Nx = GetDimensions(receiver_list)[0]; // Reihen N ermitteln
|
||
receiver_list_Mx = GetDimensions(receiver_list)[1]; // Spalten M ermitteln
|
||
|
||
#ifdef debug_OutputParameter
|
||
printf( "prhs[4] receiver_list [%ix%i]\n", receiver_list_Nx , receiver_list_Mx);
|
||
#endif
|
||
|
||
if (!(receiver_list_Nx == 3))
|
||
printf(" -> Dimension of receiver_list must be [3 x M]");
|
||
// if(!(mxIsSingle(receiver_list)))
|
||
// printf(" -> receiver_list must be Single");
|
||
|
||
// Ausgabe einzelner Geometriedaten der Uebergabewerte mit verschiedenen Varianten
|
||
float *receiver_list_ptr;
|
||
receiver_list_ptr = (float*)GetPr(receiver_list);
|
||
|
||
#ifdef debug_OutputVariables
|
||
|
||
if ((receiver_list_Mx > 1))
|
||
{
|
||
printf( " -> receiver_list-Position: %i = [%f %f %f]\n",1 , *(receiver_list_ptr+0), *(receiver_list_ptr+1), *(receiver_list_ptr+2));
|
||
printf( " -> receiver_list-Position: %i = [%f %f %f]\n",2 , *(receiver_list_ptr+3), *(receiver_list_ptr+4), *(receiver_list_ptr+5));
|
||
}
|
||
else
|
||
printf( " -> receiver_list-Position: %i = [%f %f %f]\n",1 , *(receiver_list_ptr+0), *(receiver_list_ptr+1), *(receiver_list_ptr+2));
|
||
|
||
#endif
|
||
|
||
|
||
|
||
//====================================================================== 6.Input Parameter - Check emitter_list
|
||
emitter_list_Nx = GetDimensions(emitter_list)[0]; // Reihen N ermitteln
|
||
emitter_list_Mx = GetDimensions(emitter_list)[1]; // Spalten M ermitteln // emitter_list gibt die maximale Anzahl an Emittern die in diesem Block vorkommen können wieder!
|
||
|
||
#ifdef debug_OutputParameter
|
||
printf( "prhs[5] emitter_list [%ix%i]\n", emitter_list_Nx , emitter_list_Mx);
|
||
#endif
|
||
|
||
if (!(emitter_list_Nx == 3))
|
||
printf(" -> Dimension of emitter_list must be [3 x M]");
|
||
// if(!(mxIsSingle(receiver_list)))
|
||
// printf(" -> emitter_list must be Single");
|
||
|
||
// Ausgabe einzelner Geometriedaten der übergabewerte mit verschiedenen Varianten
|
||
float *emitter_list_ptr;
|
||
emitter_list_ptr = (float*)GetPr(emitter_list);
|
||
|
||
#ifdef debug_OutputVariables
|
||
|
||
|
||
if ((emitter_list_Mx > 1))
|
||
{
|
||
printf( " -> emitter_list-Position: %i = [%f %f %f]\n",1 , *(emitter_list_ptr+0), *(emitter_list_ptr+1), *(emitter_list_ptr+2));
|
||
printf( " -> emitter_list-Position: %i = [%f %f %f]\n",2 , *(emitter_list_ptr+3), *(emitter_list_ptr+4), *(emitter_list_ptr+5));
|
||
}
|
||
else
|
||
printf( " -> emitter_list-Position: %i = [%f %f %f]\n",1 , *(emitter_list_ptr+0), *(emitter_list_ptr+1), *(emitter_list_ptr+2));
|
||
|
||
#endif
|
||
|
||
|
||
|
||
//====================================================================== 7.Input Parameter - Check SAFT_mode
|
||
SAFT_mode_Nx = GetDimensions(SAFT_mode)[0]; // Reihen N ermitteln
|
||
SAFT_mode_Mx = GetDimensions(SAFT_mode)[1]; // Spalten M ermitteln
|
||
|
||
SAFT_MODE = *((int*)GetPr(SAFT_mode));
|
||
|
||
|
||
#ifdef debug_OutputParameter
|
||
printf( "prhs[6] SAFT_MODE [%ix%i] = [%i]\n", SAFT_mode_Nx , SAFT_mode_Mx, SAFT_MODE);
|
||
#endif
|
||
|
||
if (!(SAFT_mode_Nx == 1))
|
||
printf(" -> Dimension of SAFT_MODE must be [1 x 1]");
|
||
// if(!(mxIsUint32(SAFT_mode)))
|
||
// printf(" -> SAFT_MODE must be Uint32");
|
||
|
||
#ifdef debug_OutputParameter
|
||
printf ( "\e[7;37m ======================================================================================== \e[0m\n");
|
||
printf ( "\e[7;37m Used SAFT (GPU-Version):\e[0m");
|
||
#endif
|
||
switch (SAFT_MODE)
|
||
{
|
||
case 0:
|
||
SOSMode_3DVolume = false; ATTMode_3DVolume = false;
|
||
//printf ( "\e[7;37m Standard SAFT without correction (-SOS -ATT) (%i,%i) \e[0m", SOSMode_3DVolume, ATTMode_3DVolume);
|
||
printf (" -> AscanIndexVersion only make sense with SOS or SOS and ATT Volume => exit");
|
||
break;
|
||
case 1:
|
||
SOSMode_3DVolume = true; ATTMode_3DVolume = false;
|
||
//printf ( "\e[7;37m + Speed of sound correction - Attenuation correction (%i,%i) \e[0m", SOSMode_3DVolume, ATTMode_3DVolume);
|
||
break;
|
||
case 2:
|
||
SOSMode_3DVolume = true; ATTMode_3DVolume = true;
|
||
//printf ( "\e[7;37m + Speed of sound correction + Attenuation correction (%i,%i) \e[0m", SOSMode_3DVolume, ATTMode_3DVolume);
|
||
break;
|
||
case 3:
|
||
//SOSMode_3DVolume = true; ATTMode_3DVolume = true;
|
||
//printf ( "\e[7;37m SAFT_MODE = 3 \e[0m", SOSMode_3DVolume, ATTMode_3DVolume);
|
||
printf(" -> not implemented => exit");
|
||
break;
|
||
case 4:
|
||
//SOSMode_3DVolume = false; ATTMode_3DVolume = false;
|
||
//printf ( "\e[7;37m SAFT_MODE = 4 \e[0m", SOSMode_3DVolume, ATTMode_3DVolume);
|
||
printf(" -> not implemented => exit");
|
||
break;
|
||
default: SOSMode_3DVolume = false; ATTMode_3DVolume = false;
|
||
//printf ( " -> SAFT_MODE %i is out of range [0..3] => use Standard SAFT\n", SAFT_MODE);
|
||
//printf ( "\e[7;37m Standard SAFT without correction (-SOS -ATT) (%i,%i) \e[0m", SOSMode_3DVolume, ATTMode_3DVolume);
|
||
break;
|
||
}
|
||
|
||
#ifdef debug_OutputParameter
|
||
printf ( "\n\e[7;37m ======================================================================================== \e[0m\n");
|
||
#else
|
||
//printf ( "\n");
|
||
#endif
|
||
|
||
|
||
//====================================================================== 8.Input Parameter - Check SAFT_variant
|
||
SAFT_variant_Nx = GetDimensions(SAFT_variant)[0]; // Reihen N ermitteln
|
||
SAFT_variant_Mx = GetDimensions(SAFT_variant)[1]; // Spalten M ermitteln
|
||
|
||
SAFT_VARIANT = (int*)GetPr(SAFT_variant);
|
||
SAFT_VARIANT_Size = SAFT_variant_Mx;
|
||
|
||
#ifdef debug_OutputParameter
|
||
//printf( "prhs[7] SAFT_VARIANT [%ix%i] = [%i %i %i %i %i %i]\n", SAFT_variant_Nx , SAFT_variant_Mx, SAFT_VARIANT[0], SAFT_VARIANT[1], SAFT_VARIANT[2], SAFT_VARIANT[3], SAFT_VARIANT[4], SAFT_VARIANT[5]);
|
||
printf(" -> Ascan Preintegration = [%i]\n", SAFT_VARIANT[SAFT_VARIANT_AscanPreintegration]);
|
||
printf(" -> Ascan Interpolation = [%i]\n", SAFT_VARIANT[SAFT_VARIANT_AscanInterpolation]);
|
||
printf(" -> Preprocessing SOS&ATT 3D Volume Interpolation = [%i]\n", SAFT_VARIANT[SAFT_VARIANT_3DVolumeInterpolationAtPreprocessing]);
|
||
printf(" -> Reconstruction SOS&ATT 3D Volume Interpolation = [%i]\n", SAFT_VARIANT[SAFT_VARIANT_3DVolumeInterpolationAtReconstruction]);
|
||
printf(" -> Standard deviation (STD) not yet = [%i]\n", SAFT_VARIANT[SAFT_VARIANT_CalcStandardDeviation]);
|
||
printf(" -> Sum up over boarder Indices not yet = [%i]\n", SAFT_VARIANT[SAFT_VARIANT_SumUpOverBoarderIndices]);
|
||
#endif
|
||
|
||
// if(!(mxIsUint32(SAFT_variant)))
|
||
// printf(" -> SAFT_VARIANT must be Uint32");
|
||
if (!(SAFT_variant_Nx == 1)||!(SAFT_variant_Mx == 6))
|
||
printf(" -> Dimension of SAFT_VARIANT must be [1 x 6]");
|
||
|
||
|
||
|
||
|
||
|
||
|
||
//====================================================================== 9.Input Parameter - Check for SOS volume
|
||
speed_Nx = GetDimensions(speed)[0]; // Reihen N ermitteln
|
||
speed_Mx = GetDimensions(speed)[1]; // Spalten M ermitteln
|
||
float Sos = *((float*)GetPr(speed));
|
||
|
||
float *speed_vec_ptr;
|
||
speed_vec_ptr = (float*)GetPr(speed); // Pointer für SoSDaten ermitteln
|
||
|
||
// if(!(mxIsSingle(speed)))
|
||
// printf(" -> SOSVolume must be Single");
|
||
|
||
|
||
if (SOSMode_3DVolume == true) // SOS correction need 3D Volume
|
||
{
|
||
if (GetNumberOfDimensions(speed) == 3){
|
||
#ifdef debug_OutputParameter
|
||
printf( "prhs[8] SOSVolume [%ix%ix%i]\n", mxGetDimensions(speed)[0] , mxGetDimensions(speed)[1], mxGetDimensions(speed)[2]);
|
||
printf( " -> use SoS-Mode with SoS-Correction per Path with SOS 3D Volume\n");
|
||
#endif
|
||
|
||
if (!( (speed_Nx > 1)&&(speed_Mx > 1) )){
|
||
printf( " -> SOSGrid_XYZ.x and SOSGrid_XYZ.y must be > 1 for SOS Correction with 3D Volume!!!");
|
||
}
|
||
}
|
||
else if (GetNumberOfDimensions(speed) == 2){
|
||
printf ( "prhs[8] SOSVolume [%ix%i]\n",(int)GetDimensions(speed)[0] , (int)GetDimensions(speed)[1]);
|
||
printf( " -> SOSVolume is not a 3D Volume as expected!");
|
||
}
|
||
|
||
SOSGrid_Xx = GetDimensions(speed)[0]; // SOSGrid_X ermitteln
|
||
SOSGrid_Yx = GetDimensions(speed)[1]; // SOSGrid_Y ermitteln
|
||
SOSGrid_Zx = GetDimensions(speed)[2]; // SOSGrid_Z ermitteln
|
||
|
||
SOSGrid_XYZ.x = SOSGrid_Xx;
|
||
SOSGrid_XYZ.y = SOSGrid_Yx;
|
||
SOSGrid_XYZ.z = SOSGrid_Zx;
|
||
|
||
if ((SOSGrid_XYZ.x > 128)||(SOSGrid_XYZ.y > 128)|| (SOSGrid_XYZ.z > 128)){
|
||
printf ( " -> SOSGrid_XYZ [%i x %i x %i]\n", (int)SOSGrid_Xx, (int)SOSGrid_Yx, (int)SOSGrid_Zx);
|
||
printf ( " Warning -> SOSGrid_XYZ.x, SOSGrid_XYZ.y, SOSGrid_XYZ.z > 128!!! --> can be problematic due to memory requirement\n");
|
||
}
|
||
|
||
#ifdef debug_OutputVariables
|
||
printf(" -> SoS-Data[1-3] = [%f %f %f]\n",speed_vec_ptr[0], speed_vec_ptr[1], speed_vec_ptr[2]);
|
||
//int speedOfSoundFieldIndex = (currentVoxel.z * SOSGrid_XYZ.y + currentVoxel.y) * SOSGrid_XYZ.x + currentVoxel.x;
|
||
int speedOfSoundFieldIndex = (0 * SOSGrid_XYZ.y + 1) * SOSGrid_XYZ.x + 9;
|
||
printf(" -> SoS-Data[%i-%i] = [%f %f %f]\n",speedOfSoundFieldIndex, (speedOfSoundFieldIndex+2), speed_vec_ptr[speedOfSoundFieldIndex + 0], speed_vec_ptr[speedOfSoundFieldIndex + 1], speed_vec_ptr[speedOfSoundFieldIndex + 2]);
|
||
speedOfSoundFieldIndex = (4 * SOSGrid_XYZ.y + 37) * SOSGrid_XYZ.x + 9;
|
||
printf(" -> SoS-Data[%i-%i] = [%f %f %f]\n",speedOfSoundFieldIndex, (speedOfSoundFieldIndex+2), speed_vec_ptr[speedOfSoundFieldIndex + 0], speed_vec_ptr[speedOfSoundFieldIndex + 1], speed_vec_ptr[speedOfSoundFieldIndex + 2]);
|
||
|
||
int SOSGrid_END = (SOSGrid_XYZ.x * SOSGrid_XYZ.y * SOSGrid_XYZ.z);
|
||
printf(" -> SoS-Data[%i-%i] = [%f %f %f]\n",(SOSGrid_END-3), (SOSGrid_END-1), speed_vec_ptr[SOSGrid_END-3], speed_vec_ptr[SOSGrid_END-2], speed_vec_ptr[SOSGrid_END-1]);
|
||
#endif
|
||
|
||
}
|
||
|
||
|
||
|
||
|
||
//====================================================================== 10.Input Parameter - Check SoS Startpoint
|
||
sos_startPoint_Nx = GetDimensions(sos_startPoint)[0]; // Reihen N ermitteln
|
||
sos_startPoint_Mx = GetDimensions(sos_startPoint)[1]; // Spalten M ermitteln
|
||
|
||
sosOffset.x = *((float*)GetPr(sos_startPoint));
|
||
sosOffset.y = *((float*)GetPr(sos_startPoint)+1);
|
||
sosOffset.z = *((float*)GetPr(sos_startPoint)+2);
|
||
|
||
#ifdef debug_OutputParameter
|
||
printf( "prhs[9] SOS_STARTPOINT_S [%ix%i] = [%f x %f x %f]\n", sos_startPoint_Nx , sos_startPoint_Mx, sosOffset.x, sosOffset.y, sosOffset.z);
|
||
#endif
|
||
|
||
|
||
if (!(sos_startPoint_Nx == 1)||!(sos_startPoint_Mx == 3))
|
||
printf(" -> Dimension of SOS_STARTPOINT_S must be [1 x 3]");
|
||
if ((sos_startPoint_Nx > 1))
|
||
printf( " -> No Blockmode [%i x 3] allowed for SOS_STARTPOINT_S\n", sos_startPoint_Nx);
|
||
// if(!(mxIsSingle(sos_startPoint)))
|
||
// printf(" -> SOS_STARTPOINT_S must be Single");
|
||
|
||
//====================================================================== 11.Input Parameter - Check SoS_RESOLUTION / sos_res
|
||
if (SOSMode_3DVolume == true){
|
||
sos_res_Nx = GetDimensions(sos_res)[0]; // Reihen N ermitteln
|
||
sos_res_Mx = GetDimensions(sos_res)[1]; // Spalten M ermitteln
|
||
|
||
SOS_RESOLUTION = *((float*)GetPr(sos_res));
|
||
|
||
#ifdef debug_OutputParameter
|
||
printf( "prhs[10] SOS_RESOLUTION_S [%ix%i] = [%f]\n", sos_res_Nx , sos_res_Mx, SOS_RESOLUTION);
|
||
#endif
|
||
|
||
|
||
if (!(sos_res_Nx == 1))
|
||
printf(" -> Dimension of SOS_RESOLUTION_S must be [1 x 1]");
|
||
if ((sos_res_Mx > 1))
|
||
printf( " -> No Blockmode allowed for SOS_RESOLUTION_S! [1 x %i]\n", sos_res_Mx);
|
||
// if(!(mxIsSingle(sos_res)))
|
||
// printf(" -> SOS_RESOLUTION_S must be Single");
|
||
}
|
||
|
||
//====================================================================== 12.Input Parameter - Check for ATTVolume / Attenuation-Data
|
||
|
||
attVolume_Nx = GetDimensions(attVolume)[0]; // Reihen N ermitteln
|
||
attVolume_Mx = GetDimensions(attVolume)[1]; // Spalten M ermitteln
|
||
|
||
float *att_vec_ptr;
|
||
att_vec_ptr = (float*)GetPr(attVolume); // Pointer für ATT-Daten ermitteln
|
||
|
||
// if(!(mxIsSingle(attVolume)))
|
||
// printf(" -> attVolume must be Single");
|
||
|
||
|
||
if (GetNumberOfDimensions(attVolume) == 3){
|
||
#ifdef debug_OutputParameter
|
||
printf( "prhs[11] attVolume [%ix%ix%i]\n", mxGetDimensions(attVolume)[0] , mxGetDimensions(attVolume)[1], mxGetDimensions(attVolume)[2]);
|
||
printf( " -> use ATT-Mode with ATT-Correction per Path with ATT 3D Volume\n");
|
||
#endif
|
||
|
||
if (!( (attVolume_Nx > 1)&&(attVolume_Mx > 1) )){
|
||
printf( " -> ATTGrid_XYZ.x and ATTGrid_XYZ.y must be > 1 for ATT Correction with 3D Volume!!!");
|
||
}
|
||
}
|
||
else if (GetNumberOfDimensions(attVolume) == 2){
|
||
#ifdef debug_OutputParameter
|
||
printf ( "prhs[11] attVolume [%ix%i]\n", mxGetDimensions(attVolume)[0] , mxGetDimensions(attVolume)[1]);
|
||
printf( " -> attVolume is not a 3D Volume as expected!");
|
||
#endif
|
||
}
|
||
|
||
if ((SOSMode_3DVolume == true)&&(ATTMode_3DVolume == true)){ // 3D Volume muss bei SOS und ATT angegeben sein damit ATT Korrektur durchgefuehrt werden kann
|
||
ATTGrid_Xx = GetDimensions(attVolume)[0]; // ATTGrid_X ermitteln
|
||
ATTGrid_Yx = GetDimensions(attVolume)[1]; // ATTGrid_Y ermitteln
|
||
ATTGrid_Zx = GetDimensions(attVolume)[2]; // ATTGrid_Z ermitteln
|
||
|
||
ATTGrid_XYZ.x = ATTGrid_Xx;
|
||
ATTGrid_XYZ.y = ATTGrid_Yx;
|
||
ATTGrid_XYZ.z = ATTGrid_Zx;
|
||
|
||
if ((ATTGrid_XYZ.x > 128) || (ATTGrid_XYZ.y > 128) || (ATTGrid_XYZ.z > 128)){
|
||
printf ( " -> ATTGrid_XYZ [%i x %i x %i]\n", ATTGrid_XYZ.x, ATTGrid_XYZ.y, ATTGrid_XYZ.z);
|
||
printf ( " Warning -> ATTGrid_XYZ.x, ATTGrid_XYZ.y, ATTGrid_XYZ.z > 128!!! --> can be problematic due to memory requirement\n");
|
||
}
|
||
|
||
if ((ATTGrid_XYZ.x != SOSGrid_XYZ.x) || (ATTGrid_XYZ.y != SOSGrid_XYZ.y) || (ATTGrid_XYZ.z != SOSGrid_XYZ.z)){ // Restriction: Volume parameter of ATT & SOS must be the same
|
||
printf( " -> ATTGrid[%i %i %i] != SOSGrid[%i %i %i]!\n", ATTGrid_XYZ.x , ATTGrid_XYZ.y, ATTGrid_XYZ.z, SOSGrid_XYZ.x , SOSGrid_XYZ.y, SOSGrid_XYZ.z);
|
||
printf(" -> ATTGrid must have the same size as SOSGrid \n");
|
||
}
|
||
|
||
#ifdef debug_OutputVariables
|
||
printf(" -> ATT-Data[1-3] = [%f %f %f]\n", att_vec_ptr[0], att_vec_ptr[1], att_vec_ptr[2]);
|
||
int ATTFieldIndex = (14 * ATTGrid_XYZ.y + 30 ) * ATTGrid_XYZ.x + 16;
|
||
printf(" -> ATT-Data[%i-%i] = [%f %f %f]\n", ATTFieldIndex, (ATTFieldIndex+2), att_vec_ptr[ATTFieldIndex + 0], att_vec_ptr[ATTFieldIndex + 1], att_vec_ptr[ATTFieldIndex + 2]);
|
||
ATTFieldIndex = (15 * ATTGrid_XYZ.y + 32 ) * ATTGrid_XYZ.x + 32;
|
||
printf(" -> ATT-Data[%i-%i] = [%f %f %f]\n", ATTFieldIndex, (ATTFieldIndex+2), att_vec_ptr[ATTFieldIndex + 0], att_vec_ptr[ATTFieldIndex + 1], att_vec_ptr[ATTFieldIndex + 2]);
|
||
|
||
int ATTGrid_END = (ATTGrid_XYZ.x * ATTGrid_XYZ.y * ATTGrid_XYZ.z);
|
||
printf(" -> ATT-Data[%i-%i] = [%f %f %f]\n", (ATTGrid_END-3), (ATTGrid_END-1), att_vec_ptr[ATTGrid_END-3], att_vec_ptr[ATTGrid_END-2], att_vec_ptr[ATTGrid_END-1]);
|
||
#endif
|
||
|
||
}
|
||
else{
|
||
//#ifdef debug_OutputVariables
|
||
printf( " -> ATTMode_3DVolume == false => skip ATTGrid\n");
|
||
//#endif
|
||
|
||
ATTGrid_Xx = 0; // ATTGrid_X ermitteln
|
||
ATTGrid_Yx = 0; // ATTGrid_Y ermitteln
|
||
ATTGrid_Zx = 0; // ATTGrid_Z ermitteln
|
||
|
||
ATTGrid_XYZ.x = ATTGrid_Xx;
|
||
ATTGrid_XYZ.y = ATTGrid_Yx;
|
||
ATTGrid_XYZ.z = ATTGrid_Zx;
|
||
}
|
||
|
||
|
||
|
||
|
||
|
||
|
||
//====================================================================== 13.Input Parameter - Check IMAGE_RESOLUTION_S / res
|
||
res_Nx = GetDimensions(res)[0]; // Reihen N ermitteln
|
||
res_Mx = GetDimensions(res)[1]; // Spalten M ermitteln
|
||
|
||
IMAGE_RESOLUTION = *((float*)GetPr(res));
|
||
|
||
#ifdef debug_OutputParameter
|
||
printf( "prhs[12] IMAGE_RESOLUTION_S [%ix%i] = [%f]\n", res_Nx , res_Mx, IMAGE_RESOLUTION);
|
||
#endif
|
||
|
||
if (!(res_Nx == 1))
|
||
printf(" -> Dimension of IMAGE_RESOLUTION must be [1 x 1]");
|
||
if ((res_Mx > 1))
|
||
printf( " -> No Blockmode allowed for IMAGE_RESOLUTION! [1 x %i]\n", res_Mx);
|
||
// if(!(mxIsSingle(res)))
|
||
// printf(" -> IMAGE_RESOLUTION must be Single");
|
||
|
||
if (SOSMode_3DVolume == true){
|
||
if(IMAGE_RESOLUTION > SOS_RESOLUTION){
|
||
printf( " -> IMAGE_RESOLUTION (%f) > SOS_RESOLUTION (%f)\n", IMAGE_RESOLUTION, SOS_RESOLUTION);
|
||
printf(" -> IMAGE_RESOLUTION must not > SOS_RESOLUTION !!!");
|
||
}
|
||
}
|
||
|
||
|
||
//====================================================================== 14.Input Parameter - Check TimeInterval_S / Timeint
|
||
timeint_Nx = GetDimensions(timeint)[0]; // Reihen N ermitteln
|
||
timeint_Mx = GetDimensions(timeint)[1]; // Spalten M ermitteln
|
||
|
||
sampleRate = *((float*)GetPr(timeint));
|
||
|
||
#ifdef debug_OutputParameter
|
||
printf( "prhs[13] TimeInterval_S [%ix%i] = [%e]\n", timeint_Nx , timeint_Mx, sampleRate);
|
||
#endif
|
||
|
||
if (!(timeint_Nx == 1))
|
||
printf(" -> Dimension of TimeInterval_S must be [1 x 1]");
|
||
if ((timeint_Mx > 1))
|
||
printf( " -> No Blockmode allowed for TimeInterval_S! [1 x %i]\n", timeint_Mx);
|
||
// if(!(mxIsSingle(timeint)))
|
||
// printf(" -> TimeInterval_S must be Single");
|
||
|
||
//====================================================================== 15.Input Parameter - Check IMAGE_XYZ_UI32 / IMAGE_XYZ
|
||
IMAGE_XYZ_Nx = GetDimensions(IMAGE_XYZ)[0]; // Reihen N ermitteln
|
||
IMAGE_XYZ_Mx = GetDimensions(IMAGE_XYZ)[1]; // Spalten M ermitteln
|
||
|
||
IMAGE_SIZE_XYZ.x = *((int*)GetPr(IMAGE_XYZ));
|
||
IMAGE_SIZE_XYZ.y = *((int*)GetPr(IMAGE_XYZ)+1);
|
||
IMAGE_SIZE_XYZ.z = *((int*)GetPr(IMAGE_XYZ)+2);
|
||
|
||
#ifdef debug_OutputParameter
|
||
printf( "prhs[14] IMAGE_XYZ [%ix%i] = [%ix%ix%i]\n", IMAGE_XYZ_Nx , IMAGE_XYZ_Mx,IMAGE_SIZE_XYZ.x, IMAGE_SIZE_XYZ.y, IMAGE_SIZE_XYZ.z);
|
||
#endif
|
||
|
||
if (!(IMAGE_XYZ_Nx == 1)||!(IMAGE_XYZ_Mx == 3))
|
||
printf(" -> Dimension of IMAGE_XYZ must be [1 x 3]");
|
||
if ((IMAGE_XYZ_Nx > 1))
|
||
printf( " -> No Blockmode allowed for IMAGE_XYZ! [%i x 3]\n", IMAGE_XYZ_Nx);
|
||
// if(!(mxIsUint32(IMAGE_XYZ)))
|
||
// printf(" -> IMAGE_XYZ must be UINT32");
|
||
|
||
if ((IMAGE_SIZE_XYZ.x > 8192)||(IMAGE_SIZE_XYZ.y > 8192)) // Aufteilung in BlockDim 512,1,1 passt für 5632x5632. Es würde etwas weiter gehen aber dann muss Y kleiner sein.
|
||
printf(" -> IMAGE_XYZ must not > [8192 x 8192 x N]!!!");
|
||
|
||
//====================================================================== 16.Input Parameter - Check Env / IMAGE_SUM
|
||
IMAGE_SUM_Xx = GetDimensions(IMAGE_SUM)[0]; // Spalten M ermitteln X
|
||
IMAGE_SUM_Yx = GetDimensions(IMAGE_SUM)[1]; // Reihen N ermitteln Y
|
||
|
||
if (GetNumberOfDimensions(IMAGE_SUM) > 2)
|
||
IMAGE_SUM_Zx = GetDimensions(IMAGE_SUM)[2]; // Z-Schichten ermitteln Z
|
||
else if (GetNumberOfDimensions(IMAGE_SUM) == 2)
|
||
IMAGE_SUM_Zx = 1; // Z-Schichten = 1
|
||
else
|
||
{
|
||
printf( " -> mxGetNumberOfDimensions of IMAGE_SUM = %i\n", (int)GetNumberOfDimensions(IMAGE_SUM));
|
||
printf(" -> Dimension of IMAGE_SUM must be 3: [X x Y x Z]");
|
||
}
|
||
|
||
#ifdef debug_OutputParameter
|
||
printf( "prhs[15] IMAGE_SUM [%ix%ix%i]\n", IMAGE_SUM_Xx , IMAGE_SUM_Yx, IMAGE_SUM_Zx);
|
||
#endif
|
||
|
||
// if(!(mxIsDouble(IMAGE_SUM)))
|
||
// printf(" -> IMAGE_SUM must be Double");
|
||
// if(!(mxGetNumberOfElements(IMAGE_SUM) == ((size_t)IMAGE_SIZE_XYZ.x * (size_t)IMAGE_SIZE_XYZ.y * (size_t)IMAGE_SIZE_XYZ.z)))
|
||
// {
|
||
// printf( " -> IMAGE_SUM and the Number of Voxels don't match: %lld = [%lldx%lldx%lld]\n",(int)GetNumberOfElements(IMAGE_SUM), IMAGE_SIZE_XYZ.x, IMAGE_SIZE_XYZ.y, IMAGE_SIZE_XYZ.z);
|
||
// printf(" -> Make sure that they have the same size");
|
||
// }
|
||
|
||
uint64_t IMAGE_SUM_Count = GetNumberOfElements(IMAGE_SUM);
|
||
double *IMAGE_SUM_vec_ptr = (double*)GetPr(IMAGE_SUM);
|
||
|
||
#ifdef debug_OutputVariables
|
||
printf( " -> IMAGE_SUM: %i = [%f %f %f]\n",0 , IMAGE_SUM_vec_ptr[0], IMAGE_SUM_vec_ptr[1], IMAGE_SUM_vec_ptr[2]);
|
||
printf( " -> IMAGE_SUM: %i = [%f %f %f]\n",1 , IMAGE_SUM_vec_ptr[3], IMAGE_SUM_vec_ptr[4], IMAGE_SUM_vec_ptr[5]);
|
||
printf( " -> IMAGE_SUM: %i = [%f %f %f]\n",2 , IMAGE_SUM_vec_ptr[6], IMAGE_SUM_vec_ptr[7], IMAGE_SUM_vec_ptr[8]);
|
||
printf( " -> IMAGE_SUM: %lld = [%f %f %f]\n",(IMAGE_SUM_Count-3) , IMAGE_SUM_vec_ptr[IMAGE_SUM_Count - 3], IMAGE_SUM_vec_ptr[IMAGE_SUM_Count - 2], IMAGE_SUM_vec_ptr[IMAGE_SUM_Count -1]);
|
||
#endif
|
||
|
||
//====================================================================== 17.Input Parameter - Check BlockDimension for GPU
|
||
|
||
BlockDim_XYZ_Nx = GetDimensions(BlockDim)[0]; // Reihen N ermitteln
|
||
BlockDim_XYZ_Mx = GetDimensions(BlockDim)[1]; // Spalten M ermitteln
|
||
|
||
BlockDim_XYZ.x = *((int*)GetPr(BlockDim));
|
||
BlockDim_XYZ.y = *((int*)GetPr(BlockDim)+1);
|
||
BlockDim_XYZ.z = *((int*)GetPr(BlockDim)+2);
|
||
|
||
#ifdef debug_OutputParameter
|
||
printf( "prhs[16] BlockDim_XYZ (GPU) [%ix%i] = [%ix%ix%i]\n", BlockDim_XYZ_Nx , BlockDim_XYZ_Mx, BlockDim_XYZ.x, BlockDim_XYZ.y, BlockDim_XYZ.z);
|
||
#endif
|
||
|
||
if (!(BlockDim_XYZ_Nx == 1)||!(BlockDim_XYZ_Mx == 3))
|
||
printf(" -> Dimension of BlockDim_XYZ must be [1 x 3]");
|
||
if ((BlockDim_XYZ_Nx > 1))
|
||
printf( " -> No Blockmode! [%i x 3]\n", (int)BlockDim_XYZ_Nx);
|
||
// if(!(mxIsUint32(BlockDim)))
|
||
// printf(" -> BlockDim_XYZ must be UINT32");
|
||
|
||
if ((BlockDim_XYZ.x * BlockDim_XYZ.y * BlockDim_XYZ.z) > 1024){ // BlockSize limited to 1024. Perhaps newer GPUs will support more Threads per Block
|
||
printf( " -> BlockDim_XYZ.x * BlockDim_XYZ.y * BlockDim_XYZ.z must not > 1024!!!");
|
||
|
||
// Here Adaption for BlockSize can be done.
|
||
BlockDim_XYZ.x = 1024; // If Blockdimensions are not specified than standard Blockdimensions will be used.
|
||
BlockDim_XYZ.x = 1;
|
||
BlockDim_XYZ.x = 1;
|
||
|
||
printf( " -> Standard Size for BlockDim_XYZ is used [%ix%ix%i]\n", BlockDim_XYZ.x, BlockDim_XYZ.y, BlockDim_XYZ.z);
|
||
}
|
||
|
||
|
||
//====================================================================== 18.Input Parameter - Check GPUs
|
||
|
||
int *enableGPUs_ptr;
|
||
|
||
GPUs_Nx = GetDimensions(GPUs)[0]; // Reihen N ermitteln
|
||
GPUs_Mx = GetDimensions(GPUs)[1]; // Spalten M ermitteln
|
||
|
||
#ifdef debug_OutputParameter
|
||
printf( "prhs[17] GPUs [%ix%i] \n", GPUs_Nx , GPUs_Mx);
|
||
#endif
|
||
|
||
enableGPUs_ptr = (int*)GetPr(GPUs);
|
||
selectedNumberGPUs = GPUs_Mx;
|
||
|
||
|
||
//Determine Number of GPU-Devices and check if there are so many available
|
||
int num_devices = 0;
|
||
//printf( " -> cudaGetDeviceCount: %i\n", num_devices);
|
||
CUDA_CHECK(cudaGetDeviceCount(&num_devices));
|
||
|
||
#ifdef debug_OutputInfo
|
||
printf( " -> GPU-devices available: %i\n", num_devices);
|
||
#endif
|
||
|
||
|
||
if (selectedNumberGPUs <= num_devices)
|
||
{
|
||
#ifdef debug_OutputParameter
|
||
printf( " (selectedNumberGPUs(%i) <= num_devices(%i)) -> OK -> selectedNumberGPUs = %i!\n", selectedNumberGPUs, num_devices, selectedNumberGPUs);
|
||
#endif
|
||
}
|
||
else
|
||
{
|
||
printf( " !!! !!! selectedNumberGPUs(%i) > num_devices(%i) !!!! !!!! -> selectedNumberGPUs = num_devices = %i!\n", selectedNumberGPUs, num_devices, num_devices);
|
||
selectedNumberGPUs = num_devices; // Reduce number of selected to number of GPUs in PC system!
|
||
}
|
||
|
||
|
||
// Check passed GPU-ID-Numbers and amount of GPUs
|
||
#ifdef debug_OutputParameter
|
||
printf( "prhs[17] GPUs [%ix%i] = [", GPUs_Nx , selectedNumberGPUs);
|
||
#endif
|
||
int gpuNr = 0;
|
||
int gpuNrCheck = 0;
|
||
for (gpuNr = 0; gpuNr < selectedNumberGPUs; ++gpuNr){
|
||
#ifdef debug_OutputParameter
|
||
printf( " %i ", enableGPUs_ptr[gpuNr]);
|
||
#endif
|
||
|
||
if (enableGPUs_ptr[gpuNr] > (num_devices-1)){ // Check if more GPUs are selected then available in System
|
||
printf( "\n enableGPUs_ptr[gpuNr=%i] = %i !!!\n", gpuNr, enableGPUs_ptr[gpuNr]);
|
||
printf(" -> selected number of GPU > available Devices is not allowed!");
|
||
}
|
||
|
||
for (gpuNrCheck = 0; gpuNrCheck < gpuNr; ++gpuNrCheck){
|
||
#ifdef debug_OutputParameter
|
||
printf( "\n enableGPUs_ptr[gpuNrCheck %i] = %i, enableGPUs_ptr[gpuNr %i] = %i\n",gpuNrCheck, enableGPUs_ptr[gpuNrCheck], gpuNr, enableGPUs_ptr[gpuNr]);
|
||
#endif
|
||
if (enableGPUs_ptr[gpuNrCheck] == enableGPUs_ptr[gpuNr])
|
||
printf(" -> GPU Device can only be used once!!!");
|
||
}
|
||
|
||
}
|
||
#ifdef debug_OutputParameter
|
||
printf( "]; Number of GPUs to use = %i\n", selectedNumberGPUs);
|
||
#endif
|
||
|
||
if (!(GPUs_Nx == 1)||!(GPUs_Mx < 10))
|
||
printf(" -> Dimension of GPUs must be [1 x <10]");
|
||
if ((pix_vect_Nx > 1))
|
||
printf( " -> No Blockmode [%i x n] allowed for GPUs\n", GPUs_Nx);
|
||
// if(!(mxIsUint32(GPUs)))
|
||
// printf(" -> GPUs must be UINT32");
|
||
|
||
|
||
|
||
//====================================================================== 19.Input Parameter - debugMode, debugModeParameter
|
||
dbgMode_Nx = GetDimensions(dbgMode)[0]; // Reihen N ermitteln
|
||
dbgMode_Mx = GetDimensions(dbgMode)[1]; // Spalten M ermitteln
|
||
|
||
debugMode = *((float*)GetPr(dbgMode) );
|
||
debugModeParameter = *((float*)GetPr(dbgMode)+1);
|
||
|
||
#ifdef debug_OutputParameter
|
||
printf( "prhs[18] debugMode [%ix%i] = [%f %f]\n", dbgMode_Nx , dbgMode_Mx, debugMode, debugModeParameter);
|
||
#endif
|
||
|
||
if ((dbgMode_Nx != 1)||(dbgMode_Mx != 2))
|
||
printf(" -> Dimension of debugMode must be [1 x 2]\n");
|
||
// if(!(mxIsSingle(dbgMode)))
|
||
// printf(" -> debugMode must be single");
|
||
if(debugMode != 0.0)
|
||
printf (" -> debugMode = [%f], debugModeParameter = [%f]\n", debugMode, debugModeParameter);
|
||
|
||
|
||
|
||
|
||
|
||
|
||
#ifdef debug_OutputParameter
|
||
printf( "=================================================================================================\n"); // End of Inputparameter
|
||
#endif
|
||
|
||
// // Enable colored Output // http://linuxgazette.net/issue65/padala.html
|
||
// printf ("\e[1;34mThis is a blue text.\e[0m"); // farbige Ausgabe mit
|
||
// printf ("\e[1;34m %-6s \e[m", "This is text"); // Farbige Text-Ausgabe
|
||
// printf ("\n");
|
||
|
||
|
||
|
||
|
||
//================================================================================================================ Create Output Memory / Parameter
|
||
#ifdef debug_OutputParameter
|
||
printf( "\n");
|
||
printf( "Create Outputparameter\n");
|
||
printf( "Out[n] Meaning \n");
|
||
printf( "=================================================================================================\n");
|
||
#endif
|
||
|
||
// ~~~~ Create 3D-Matrix for the Output-Values
|
||
// Output-Dimension is {IMAGE_XYZ_X, IMAGE_XYZ_Y, IMAGE_XYZ_Z}
|
||
const int dims[]={IMAGE_SIZE_XYZ.x, IMAGE_SIZE_XYZ.y, IMAGE_SIZE_XYZ.z};
|
||
int ndim = 3;
|
||
|
||
#ifdef debug_OutputParameter
|
||
printf( "plhs[0] = Output_Voxels = mxCreateNumericArray( ndim(%i), dims{%i %i %i}, mxDOUBLE_CLASS, mxREAL);\n", ndim, dims[0], dims[1], dims[2]);
|
||
#endif
|
||
|
||
Matrix_t Output_Voxels;
|
||
Output_Voxels.NumberOfDims = ndim;
|
||
Output_Voxels.Dims[0] = dims[0];
|
||
Output_Voxels.Dims[1] = dims[1];
|
||
Output_Voxels.Dims[2] = dims[2];
|
||
Output_Voxels.Data = new double[dims[0]*dims[1]*(dims[2]?dims[2]:1)];
|
||
|
||
double *Output_Voxels_ptr = (double*)GetPr(Output_Voxels);
|
||
|
||
|
||
// ~~~~ Create Pointer to return value from Duration of Kernel
|
||
// Erstelle Array mit folgender Formatierung
|
||
// 0: Total Durationtime all GPUs
|
||
// 1: Total Durationtime GPU 1
|
||
// 2: Total Durationtime GPU 2
|
||
// n: Total Durationtime GPU n
|
||
|
||
int
|
||
m = (1 + selectedNumberGPUs),
|
||
n = 1;
|
||
|
||
#ifdef debug_OutputParameter
|
||
printf( "plhs[1] = Duration = mxCreateDoubleMatrix( m(%i), n(%i), mxREAL);\n", m, n);
|
||
#endif
|
||
|
||
double *Duration_ptr = new double[m*n];
|
||
|
||
// ~~~~ Create Pointer to return Error/Abortvalue of each multithread
|
||
//int *Abort_ptr = (int*) malloc(num_workingPackages * sizeof(int));
|
||
int *Abort_ptr = (int*) malloc(selectedNumberGPUs*sizeof(int));
|
||
|
||
|
||
// ~~~~ Erstelle Array fuer Testrueckgabe der integrierten Ascans
|
||
//AScan_Nx = mxGetDimensions(AScan)[0]; // Reihen N ermitteln
|
||
//AScan_Mx = mxGetDimensions(AScan)[1]; // Spalten M ermitteln
|
||
//int
|
||
m = aScanCount; // 1
|
||
n = AScan_Nx; //z.B. 3000
|
||
#ifdef debug_OutputParameter
|
||
printf( "plhs[2] = Ascans = mxCreateNumericMatrix( m(%i), n(%i), mxREAL);\n", m, n);
|
||
#endif
|
||
|
||
float *AscansOut_ptr = new float[m*n];
|
||
|
||
|
||
#ifdef debug_OutputParameter
|
||
printf( "=================================================================================================\n\n"); // End of Outputparameter
|
||
#endif
|
||
|
||
|
||
//================================================================================================================ Preintegrate Ascans
|
||
#ifdef preAscanIntegrationToMatchSamplerateToResolution // Preintegrate Ascans for matching of Samplerate and Resolution
|
||
|
||
|
||
#ifdef debug_OutputHostStepsPerformance
|
||
double diff_time;
|
||
|
||
struct timeval startPreintegrateAscans, stopPreintegrateAscans;
|
||
gettimeofday(&startPreintegrateAscans, NULL);
|
||
#endif
|
||
if (SAFT_VARIANT[SAFT_VARIANT_AscanPreintegration] == 1){
|
||
//printf( "(SAFT_VARIANT[0] == 1) => perform preintegrateAscans\n\n");
|
||
|
||
speed_vec_ptr = (float*)GetPr(speed);
|
||
// printf( " speed_vec_ptr[%3i] = %12.10f\n",0,speed_vec_ptr[0]);
|
||
if (speed_vec_ptr[0] == 0){
|
||
printf("First value in SOS Volume = 0 --> preintegrateAscans can't be performed!!! --> Exit");
|
||
}
|
||
|
||
//================================================================================================================
|
||
preintegrateAscans(aScan_ptr, AscansOut_ptr, speed_vec_ptr, aScanCount, aScanLength, IMAGE_RESOLUTION, sampleRate, debugMode, debugModeParameter);
|
||
//================================================================================================================
|
||
}
|
||
else{
|
||
|
||
//printf( "(SAFT_VARIANT[0] == 0) => skip preintegrateAscans\n\n");
|
||
//Daten trotzdem in Outputspeicher fuer Matlab transferieren
|
||
for (int j = 0; j<aScanCount; j++){ // ueber alle A-scans gehen.
|
||
for (int i = 0; i < aScanLength; i++){
|
||
//printf( " i (%4i) = %6.3f ",i, aScan_ptr[j*aScanLength+i]);
|
||
AscansOut_ptr[j*aScanLength+i] = aScan_ptr[j*aScanLength+i]; // nach Matlab zurueckgeben
|
||
//printf( " i (%4i) = %6.3f \n",i, AscansOut_ptr[j*aScanLength+i]);
|
||
}
|
||
}
|
||
}
|
||
|
||
#ifdef debug_OutputHostStepsPerformance
|
||
gettimeofday(&stopPreintegrateAscans, NULL);
|
||
|
||
diff_time = (double)((stopPreintegrateAscans.tv_sec * 1000000.0 + stopPreintegrateAscans.tv_usec) - (startPreintegrateAscans.tv_sec * 1000000.0 + startPreintegrateAscans.tv_usec));
|
||
printf ("########################################################################\n");
|
||
//printf ("### aScanCount = %i %f %f\n", aScanCount,double(aScanCount),(double)(aScanCount));
|
||
printf ("### HOST ### PreintegrateAscans = %8.0f µs = %8.2f MAscans/s\n", diff_time, (double)aScanCount/diff_time);
|
||
#endif
|
||
|
||
#endif
|
||
|
||
//================================================================================================================ Start Reconstruction
|
||
//================================================================================================================
|
||
|
||
#ifdef debug_OutputHostStepsPerformance
|
||
struct timeval startMultithreadProcessing, stopMultithreadProcessing;
|
||
gettimeofday(&startMultithreadProcessing, NULL);
|
||
#endif
|
||
|
||
multithreaded_processing( aScan_ptr,
|
||
Output_Voxels_ptr,
|
||
receiver_index_ptr,
|
||
emitter_index_ptr,
|
||
receiver_list_ptr,
|
||
receiver_list_Mx,
|
||
emitter_list_ptr,
|
||
emitter_list_Mx,
|
||
speed_vec_ptr,
|
||
SOSGrid_XYZ,
|
||
sosOffset,
|
||
SOS_RESOLUTION,
|
||
att_vec_ptr,
|
||
AScan_Mx,
|
||
AScan_Nx,
|
||
regionOfInterestOffset,
|
||
IMAGE_SIZE_XYZ,
|
||
IMAGE_RESOLUTION,
|
||
sampleRate,
|
||
BlockDim_XYZ,
|
||
Duration_ptr,
|
||
selectedNumberGPUs,
|
||
enableGPUs_ptr,
|
||
debugMode,
|
||
debugModeParameter,
|
||
SOSMode_3DVolume,
|
||
ATTMode_3DVolume,
|
||
SAFT_MODE,
|
||
SAFT_VARIANT,
|
||
SAFT_VARIANT_Size,
|
||
Abort_ptr
|
||
);
|
||
|
||
#ifdef debug_OutputHostStepsPerformance
|
||
gettimeofday(&stopMultithreadProcessing, NULL);
|
||
diff_time = (double)((stopMultithreadProcessing.tv_sec * 1000000.0 + stopMultithreadProcessing.tv_usec) - (startMultithreadProcessing.tv_sec * 1000000.0 + startMultithreadProcessing.tv_usec));
|
||
printf ("########################################################################\n");
|
||
printf ("### HOST ### MultithreadProcessing = %8.0 us = %8.2f GVA/s\n", diff_time, double(IMAGE_SUM_Count)*double(aScanCount)/(diff_time*1000));
|
||
#endif
|
||
|
||
// Check if errors occurred
|
||
bool AbortedThreads = false;
|
||
for (int i=0; i < selectedNumberGPUs; i++ ){
|
||
if (Abort_ptr[i] > 0)
|
||
{
|
||
printf( "!!!!!!!!!!!!!!!!!!! Aborted Thread for GPU[%i] = %i\n", i, Abort_ptr[i]);
|
||
AbortedThreads = true;
|
||
}
|
||
}
|
||
free(Abort_ptr);
|
||
if (AbortedThreads)
|
||
printf(" Aborted Thread occurred -> see output history");
|
||
|
||
//================================================================================================================
|
||
//================================================================================================================
|
||
|
||
|
||
//================================================================================================================ Build Sum of IMAGE_SUM and current reconstructed Volume
|
||
|
||
// Daten des uebergebenen Outputvolumens zum rekonstruierten Volumen addieren
|
||
#ifdef debug_OutputInfo
|
||
printf( "Build Sum of reconstructed Volume and given IMAGE_SUM \n");
|
||
#endif
|
||
|
||
#ifdef debug_OutputHostStepsPerformance
|
||
struct timeval startSumIMAGE_SUM, stopSumIMAGE_SUM;
|
||
gettimeofday(&startSumIMAGE_SUM, NULL);
|
||
#endif
|
||
|
||
for(uint64_t i=0; i < IMAGE_SUM_Count; i++)
|
||
{
|
||
Output_Voxels_ptr[i] += IMAGE_SUM_vec_ptr[i];
|
||
}
|
||
|
||
#ifdef debug_OutputHostStepsPerformance
|
||
gettimeofday(&stopSumIMAGE_SUM, NULL);
|
||
diff_time = (double)((stopSumIMAGE_SUM.tv_sec * 1000000.0 + stopSumIMAGE_SUM.tv_usec) - (startSumIMAGE_SUM.tv_sec * 1000000.0 + startSumIMAGE_SUM.tv_usec));
|
||
printf ("### HOST ### Sum up (IMAGE_SUM + reconstr. Volume) = %8.0f µs = %8.2f MVoxel/s\n", diff_time, double(IMAGE_SUM_Count)/diff_time);
|
||
printf ("########################################################################\n");
|
||
#endif
|
||
|
||
|
||
|
||
|
||
//================================================================================================================ Show returned Output-Values
|
||
//
|
||
#ifdef debug_OutputVariables
|
||
// Testoutput Value of duration
|
||
printf( "Duration_ptr[0] = %f\n", Duration_ptr[0]);
|
||
|
||
// Testoutput Sum of IMAGE_SUM and reconstructed Volume
|
||
printf( "Output_Voxels: %i = [%f %f %f]\n",0 , Output_Voxels_ptr[0], Output_Voxels_ptr[1], Output_Voxels_ptr[2]);
|
||
printf( "Output_Voxels: %i = [%f %f %f]\n",1 , Output_Voxels_ptr[3], Output_Voxels_ptr[4], Output_Voxels_ptr[5]);
|
||
printf( "Output_Voxels: %i = [%f %f %f]\n",2 , Output_Voxels_ptr[6], Output_Voxels_ptr[7], Output_Voxels_ptr[8]);
|
||
printf( "Output_Voxels: %i = [%f %f %f]\n",(IMAGE_SUM_Count-3) , Output_Voxels_ptr[IMAGE_SUM_Count - 3], Output_Voxels_ptr[IMAGE_SUM_Count - 2], Output_Voxels_ptr[IMAGE_SUM_Count -1]);
|
||
#endif
|
||
|
||
#ifdef debug_OutputFunctions
|
||
printf( "<== mexFunction - End\n");
|
||
#endif
|
||
delete [] AscansOut_ptr;
|
||
delete [] Duration_ptr;
|
||
return Output_Voxels;
|
||
}
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|