Files
URDepends/SAFT_TOFI/src/SAFT_TOFI.cpp
2023-06-09 11:19:30 +08:00

1948 lines
84 KiB
C++
Raw Blame History

#include "SAFT_TOFI.h"
#include "saft.hpp"
#include <cstddef>
#include <math.h>
#include <locale.h> // For German printf output format: Float with , instead of .
#include <iostream>
#include <map>
#include <string>
#include <stdint.h>
#include <future>
#include <array>
#include <chrono>
// TODO: Blockgroesse (z > 1) fuehrt zu Kernelabbruechen
//pthread handle
typedef struct thread_handle_t {
//pthread_t pthread;
int deviceId;
int deviceIndex;
float *aScan_ptr;
double *output_ptr;
double *Duration_ptr;
unsigned short *receiver_index_ptr;
unsigned short *emitter_index_ptr;
float *receiver_list_ptr;
int receiver_list_Size;
float *emitter_list_ptr;
int emitter_list_Size;
float *speed_vec_ptr;
int3 SOSGrid_XYZ;
float3 sosOffset;
float SOS_RESOLUTION;
float *att_vec_ptr;
int aScanCount;
int aScanLength;
float inc;
int3 res;
float sampleRate;
float3 volposition;
int num_threads;
dim3 fixedBlockDimensions;
float debugMode;
float debugModeParameter;
bool SOSMode_3DVolume;
bool ATTMode_3DVolume;
int SAFT_MODE;
int *SAFT_VARIANT;
int SAFT_VARIANT_Size;
int *Abort_ptr;
} thread_handle;
//Convenient typedefs for GPU-DeviceProperties container
typedef std::vector<cudaDeviceProp> DeviceProperties;
/**
Load CUDA devices and write them to a container.
*/
void loadDevices(
DeviceProperties & output ///< This argument is written to. Container in which the device data are stored.
)
{
#ifdef debug_OutputFunctions
printf( "==> loadDevices - Start\n");
#endif
int deviceCount;
CUDA_CHECK(cudaGetDeviceCount(&deviceCount));
#ifdef debug_OutputInfo
printf( "There are %i devices present:\n", deviceCount);
#endif
output.reserve(static_cast<std::size_t>(deviceCount));
for(int i = 0; i < deviceCount; i++)
{
cudaDeviceProp & device = output[i];
CUDA_CHECK(cudaGetDeviceProperties(&device, i));
#ifdef debug_OutputInfo
std::cout << " " << (i + 1) << ". " << device.name << std::endl;
std::cout << " " << "Byte Total Global Mem: " << device.totalGlobalMem <<std::endl;
std::cout << " " << "Compute Capability: " << device.major << "." << device.minor <<std::endl;
#endif
output.push_back(device);
}
#ifdef debug_OutputFunctions
std::cout << "<== loadDevices - End" << std::endl;
#endif
}
//pthread call function
void thread_function (void *arg) {
thread_handle *pthread_handle = (thread_handle*)arg;
#ifdef debug_OutputSAFTHandlerThreadPerformance
struct timeval startSAFTHandler, stopSAFTHandler;
gettimeofday(&startSAFTHandler, NULL);
#endif
// Create Instance of SAFT-Handler and call constructor
SAFTHandler saft(
pthread_handle->deviceId, // deviceId
pthread_handle->deviceIndex, // deviceIndex
pthread_handle->aScan_ptr, // aScan_ptr,
pthread_handle->output_ptr, // output_ptr,
pthread_handle->Duration_ptr, // Duration_ptr,
pthread_handle->receiver_index_ptr, // receiver_index_ptr ///<
pthread_handle->emitter_index_ptr, // emitter_index_ptr ///<
pthread_handle->receiver_list_ptr, // receiver_list_ptr ///<
pthread_handle->receiver_list_Size, // receiver_list_Size ///<
pthread_handle->emitter_list_ptr, // emitter_list_ptr ///<
pthread_handle->emitter_list_Size, // emitter_list_Size ///<
pthread_handle->speed_vec_ptr, // speed_vec_ptr,
pthread_handle->SOSGrid_XYZ, // SOSGrid_XYZ,
pthread_handle->sosOffset, // sosOffset, ///< Startpoint of SoSGrid
pthread_handle->SOS_RESOLUTION, // SOS_RESOLUTION, ///< Resolution of SoSGrid
pthread_handle->att_vec_ptr, // att_vec_ptr
pthread_handle->aScanCount, // aScanCount,
pthread_handle->aScanLength, // aScanLength
pthread_handle->res, // resolution => IMAGE_SIZE_XYZ,
pthread_handle->sampleRate, // sampleRate
pthread_handle->volposition, // => regionOfInterestOffset,
pthread_handle->inc, // IMAGE_RESOLUTION,
pthread_handle->fixedBlockDimensions, // fixedBlockDimensions
pthread_handle->debugMode, // debugMode
pthread_handle->debugModeParameter, // Parameter for DebugMode
pthread_handle->SOSMode_3DVolume,
pthread_handle->ATTMode_3DVolume,
pthread_handle->SAFT_MODE,
pthread_handle->SAFT_VARIANT,
pthread_handle->SAFT_VARIANT_Size,
pthread_handle->Abort_ptr // If there is not enough memory abort reconstruction. Wenn Fehler --> Abbruch;
);
saft.performReconstruction();
#ifdef debug_OutputSAFTHandlerThreadPerformance
CUDA_CHECK(cudaDeviceSynchronize());
gettimeofday(&stopSAFTHandler, NULL);
double diff_time = (double)((stopSAFTHandler.tv_sec * 1000000.0 + stopSAFTHandler.tv_usec) - (startSAFTHandler.tv_sec * 1000000.0 + startSAFTHandler.tv_usec));
printf ("{~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~}\n");
printf ("{ ~~~> } Device (%i) - SAFTHandler-Thread : %8.0f µs\n", pthread_handle->deviceId, diff_time);
double performance_Thread = (((double)pthread_handle->aScanCount * (double)pthread_handle->res.x * (double)pthread_handle->res.y * (double)pthread_handle->res.z) / diff_time )/ 1000.0;
printf ("{ ~~~> } Device (%i) - SAFTHandler-Thread Performance: %.6lf A-Scan * GVoxel/s\n", pthread_handle->deviceId, performance_Thread);
printf ("{~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~}\n");
#endif
//pthread_exit(NULL);
}
/**
Check amount of GPUs and divide Volume ins parts with same size
Abfrage der Anzahl an GPU-Devices und Einteilung des Volumens in möglichst gleichgroße Volumen in Z-Richtung (3D-Volumen) oder Y-Richtung (2D-Volumen) auf.
*/
void multithreaded_processing(
float *aScan_ptr, ///< AScan-Daten
double *output_ptr, ///< OutputDaten der Voxel
unsigned short *receiver_index_ptr, ///< Index Receiver per Ascan
unsigned short *emitter_index_ptr, ///< Index Emitter per Ascan
float *receiver_list_ptr, ///< Positionskoordinaten Receiver
int receiver_list_Size, ///< Menge an Receiver
float *emitter_list_ptr, ///< Positionskoordinaten Emitter
int emitter_list_Size, ///< Menge an Emitter
float *speed_vec_ptr, ///< SoS Daten im Blockmode oder als SoSGrid
int3 SOSGrid_XYZ, ///< Size of SoSGrid
float3 sosOffset, ///< Startpoint of SoSGrid
float SOS_RESOLUTION, ///< Aufloesung des SoSGrid
float *att_vec_ptr, ///< Attenuation Daten als ATTGrid
int aScanCount, ///< Anzahl der AScans die im Blockmode verarbeitet werden sollen
int aScanLength, ///< Laenge der AscanDaten (normal 3000)
float3 regionOfInterestOffset,
int3 IMAGE_SIZE_XYZ, ///< Groesse des Bildbereichs in Voxel
float IMAGE_RESOLUTION, ///< Aufloesung des Bildbereichs
float sampleRate, ///< Samplerate für AScans
int3 BlockDim_XYZ, ///< BlockDimension für GPU
double *Duration_ptr, ///< Rückgabepointer an Matlab für Laufzeit des SAFT-Kernels
int selectedNumberGPUs, ///< Anzahl der ausgewählten GPUs bzw. auf maximale Anzhal vorhandener begrenzt
int *enableGPUs_ptr, ///< Gibt an welche GPUs genutzt werden und welche nicht
float debugMode, ///< Ausgabe im Debugmode -> Verschiedene Werte können ausgegeben werden
float debugModeParameter, ///< Parameter der mit fuer Debugmode uebermittelt werden kann
bool SOSMode_3DVolume, ///< Wird 3D Volumen für SOS-Korrektur genutzt?
bool ATTMode_3DVolume, ///< Wird 3D Volumen für ATT-Korrektur genutzt?
int SAFT_MODE, ///< Modus für SAFT-Rekonstruktion
int *SAFT_VARIANT, ///< Verschiedene Parameter der Rekonstruktion
int SAFT_VARIANT_Size, ///< Menge der verschiedenen Parameter für Rekonstruktion
int *Abort_ptr ///< FehlerArray
)
{
#ifdef debug_OutputFunctions
printf( "==> multithreaded_processing - Start\n");
#endif
dim3 fixedBlockDimensions( // convert int3 to dim3
BlockDim_XYZ.x,
BlockDim_XYZ.y,
BlockDim_XYZ.z
);
// Divide workload and show Information ------------------------------------------------------------------------------------------------------------------
// Divide workload in pieces with the same size for all available GPUs.
// If the workload can not be divided in pieces with the same size, the last piece will be the one with little less workload.
// Testfall simuliert die mehrfache Anzahl an GPUs
int num_devices_factor = 1; // Vielfache an GPUs simulieren bzw. kleinere Pakete erzeugen
int num_workingPackages = selectedNumberGPUs*num_devices_factor;
float3 *position = (float3*)malloc(num_workingPackages * sizeof(float3));
int3 *resolution = (int3*) malloc(num_workingPackages * sizeof(int3));
int3 *volumeStartpoint = (int3*) malloc(num_workingPackages * sizeof(int3));
size_t *volumePtr = (size_t*)malloc(num_workingPackages * sizeof(size_t));
//int *Abort_ptr = (int*) malloc(num_workingPackages * sizeof(int));
float3 volposition = regionOfInterestOffset; // Uebergabe der Parameter
float inc = IMAGE_RESOLUTION; // koennte auch direkt umbenannt werden
#ifdef debug_OutputMultiGpu
printf("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n");
printf("~ Dividing workload between %i devices ~\n", num_devices );
printf("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n");
#endif
size_t volume_size = (size_t)IMAGE_SIZE_XYZ.x * (size_t)IMAGE_SIZE_XYZ.y * (size_t)IMAGE_SIZE_XYZ.z * (size_t)sizeof(double); // = Groesse des Outputvolumens in Byte
if (( IMAGE_SIZE_XYZ.y == 1 ) && ( IMAGE_SIZE_XYZ.z == 1 )){
selectedNumberGPUs = 1;
num_workingPackages = 1;
#ifdef debug_OutputMultiGpu
printf( "IMAGE_SIZE_XYZ.y = IMAGE_SIZE_XYZ.z = 1 ==> Use only one GPU\n", num_devices);
#endif
}
volumeStartpoint[0].x = 0;
volumeStartpoint[0].y = 0;
volumeStartpoint[0].z = 0;
position[0].x = volposition.x; // Startposition
position[0].y = volposition.y;
position[0].z = volposition.z;
std::vector<int> resolutionZs(num_workingPackages, IMAGE_SIZE_XYZ.z / num_workingPackages);
for (size_t i = 0; i < IMAGE_SIZE_XYZ.z % num_workingPackages; i++)
resolutionZs[i]++;
int i,j,k;
for (i=0; i < num_workingPackages; i++ ) {
#ifdef debug_OutputMultiGpu
printf("Working Package [%i]\n", i);
#endif
if ( IMAGE_SIZE_XYZ.z > 1 ) { // Divide in Z-Direction
#ifdef debug_OutputMultiGpu
printf( "( IMAGE_SIZE_XYZ.z > 1 ) => Divide through %i WP in Z-Direction for %i GPUs\n", num_workingPackages, selectedNumberGPUs);
#endif
resolution[i].x = IMAGE_SIZE_XYZ.x; // Initialization
resolution[i].y = IMAGE_SIZE_XYZ.y;
resolution[i].z = resolutionZs[i];
if (i>0)
{
volumeStartpoint[i].x = 0; // Koordinaten als Startpunkt für Layer in einzelnen GPUs
volumeStartpoint[i].y = 0;
volumeStartpoint[i].z = volumeStartpoint[i-1].z + resolution[i-1].z;
}
volumePtr[i] = (size_t)((size_t)resolution[0].x * (size_t)resolution[0].y * (size_t)volumeStartpoint[i].z); //Startpunkt der Speicherstellen fuer das Outputvolumen
#ifdef debug_OutputMultiGpu
printf(" - volumeStartpoint[%i] = [%d %d %d]\n", i, volumeStartpoint[i].x, volumeStartpoint[i].y, volumeStartpoint[i].z);
printf(" => volume_size[%i] = [%i %i %i]*double = %lld kB\n", i, resolution[i].x, resolution[i].y, resolution[i].z, ((uint64_t)resolution[i].x * (uint64_t)resolution[i].y * (uint64_t)resolution[i].z * (uint64_t)sizeof(double)) / ((uint64_t)1024));
printf(" => volumePtr[%i] = [%lld]\n", i, (uint64_t)volumePtr[i]);
#endif
}
else { // Divide in Y-Direction
#ifdef debug_OutputMultiGpu
printf( "( IMAGE_SIZE_XYZ.z = 1 ) => Divide through %i in Y-Direction for %i GPUs\n", num_workingPackages, selectedNumberGPUs);
#endif
resolution[i].x = IMAGE_SIZE_XYZ.x; // Initialization
resolution[i].z = IMAGE_SIZE_XYZ.z;
if ( IMAGE_SIZE_XYZ.y % num_workingPackages == 0) { // Volume is divisible
resolution[i].y = IMAGE_SIZE_XYZ.y / num_workingPackages;
}
else { // if not divisible,
if (i != (num_workingPackages - 1)){ // increment each GPU slice by one
resolution[i].y = IMAGE_SIZE_XYZ.y / num_workingPackages + 1;
}
else { // except the last one, which get the remaining Layers
resolution[i].y = IMAGE_SIZE_XYZ.y % resolution[0].y;
}
}
}
#ifdef debug_OutputMultiGpu
printf("WP[%i] on deviceId[%i]=deviceIndex[%i]\n", i, enableGPUs_ptr[(i % selectedNumberGPUs)], (i % selectedNumberGPUs));
printf(" - resolution[%i].x = %i\n", i, resolution[i].x );
printf(" - resolution[%i].y = %i\n", i, resolution[i].y );
printf(" - resolution[%i].z = %i\n", i, resolution[i].z );
#endif
position[i].x = volposition.x; // Startposition
position[i].y = volposition.y;
position[i].z = volposition.z;
if ( IMAGE_SIZE_XYZ.z > 1 ) position[i].z += i*inc*resolution[0].z; // Calculate Startpositions for the workload-pieces
else position[i].y += i*inc*resolution[0].y;
#ifdef debug_OutputMultiGpu
printf(" - position[%i].x = %f\n", i, position[i].x );
printf(" - position[%i].y = %f\n", i, position[i].y );
printf(" - position[%i].z = %f\n", i, position[i].z );
#endif
}
// Create one thread per GPU ------------------------------------------------------------------------------------------------------------------------
thread_handle *pthread_handle = (thread_handle*)malloc(selectedNumberGPUs * sizeof(thread_handle));
#ifdef debug_OutputMultiGpu
printf("-> pthread_handles for %i workingPackages created!\n", num_workingPackages);
printf("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n");
#endif
for ( i = 0; i < num_workingPackages; i++ ) {
//printf("num_workingPackages = %i !\n", num_workingPackages);
//printf("num_devices = %i\n", num_devices);
//printf("(i %% num_devices) = %i !\n", (i % num_devices));
//printf("deviceId = enableGPUs_ptr[%i] = %i !\n", (i % num_devices), enableGPUs_ptr[(i % num_devices)]);
//printf("deviceIndex = (i %% num_devices) = %i !\n", (i % num_devices));
// initialize control block
pthread_handle[i].deviceId = enableGPUs_ptr[(i % selectedNumberGPUs)]; // Hier DeviceID der GPU setzen
pthread_handle[i].deviceIndex = (i % selectedNumberGPUs);
pthread_handle[i].aScan_ptr = aScan_ptr;
if ( IMAGE_SIZE_XYZ.z > 1 )
pthread_handle[i].output_ptr = &output_ptr[volumePtr[i]]; // Startpoint for Outputvolume.
//volumePtr[i] = (size_t)(resolution[0].x * resolution[0].y * volumeStartpoint[i].z); //Startpunkt der Speicherstellen fuer das Outputvolumen
else
//pthread_handle[i].output_ptr = &output_ptr[ i * resolution[0].x * resolution[0].y * resolution[0].z ]; // Startpoint for Outputvolume. [0] da nur der letzte eine andere Gr<47><72>e hat.
pthread_handle[i].output_ptr = &output_ptr[ (size_t)resolution[0].x * (size_t)i * (size_t)resolution[0].y]; // Startpoint for Outputvolume. [0] da nur der letzte eine andere Groesse hat. Z spielt hier keine Rolle
pthread_handle[i].Duration_ptr = Duration_ptr;
pthread_handle[i].receiver_index_ptr = receiver_index_ptr;
pthread_handle[i].emitter_index_ptr = emitter_index_ptr;
pthread_handle[i].receiver_list_ptr = receiver_list_ptr;
pthread_handle[i].receiver_list_Size = receiver_list_Size;
pthread_handle[i].emitter_list_ptr = emitter_list_ptr;
pthread_handle[i].emitter_list_Size = emitter_list_Size;
pthread_handle[i].speed_vec_ptr = speed_vec_ptr;
pthread_handle[i].SOSGrid_XYZ = SOSGrid_XYZ;
pthread_handle[i].sosOffset = sosOffset;
pthread_handle[i].SOS_RESOLUTION = SOS_RESOLUTION;
pthread_handle[i].att_vec_ptr = att_vec_ptr;
pthread_handle[i].aScanCount = aScanCount;
pthread_handle[i].aScanLength = aScanLength,
pthread_handle[i].inc = IMAGE_RESOLUTION;
pthread_handle[i].res = resolution[i];
pthread_handle[i].sampleRate = sampleRate;
pthread_handle[i].volposition = position[i]; //regionOfInterestOffset
pthread_handle[i].num_threads = num_workingPackages;
pthread_handle[i].fixedBlockDimensions = fixedBlockDimensions; //
pthread_handle[i].debugMode = debugMode;
pthread_handle[i].debugModeParameter = debugModeParameter;
pthread_handle[i].SOSMode_3DVolume = SOSMode_3DVolume;
pthread_handle[i].ATTMode_3DVolume = ATTMode_3DVolume;
pthread_handle[i].SAFT_MODE = SAFT_MODE;
pthread_handle[i].SAFT_VARIANT = SAFT_VARIANT;
pthread_handle[i].SAFT_VARIANT_Size = SAFT_VARIANT_Size;
Abort_ptr[i] = 0; // Initialisieren mit kein Fehler
pthread_handle[i].Abort_ptr = &Abort_ptr[i];
}
auto startAllThreads = std::chrono::steady_clock::now();
double diff_time = 0.0;
#ifdef debug_OutputMultiGpu
printf("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n");
printf("pthread_handles for %i devices initialized!\n", selectedNumberGPUs)-1;
#endif
// Zeitmessung ueber alle Threads
//gettimeofday(&startAllThreads, NULL);
#ifdef debug_OutputMultiGpu
printf("-> pthread_handles for %i workingPackages initialized!\n", num_workingPackages);
printf("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n");
#endif
std::vector<std::future<void>> futures;
futures.resize(selectedNumberGPUs);
for ( j = 0; j < num_devices_factor; j++ ) {
for ( k = 0; k < selectedNumberGPUs; k++ ) {
//for ( i = 0; i < num_devices; i++ ) {
#ifdef debug_OutputMultiGpu
printf("]~~~~> create thread (%i) [WP (%i) on Device (%i)] and execute thread_function!\n", (j*selectedNumberGPUs+k), (j*selectedNumberGPUs+k), enableGPUs_ptr[k]);
#endif
//create thread and start execution by calling thread_function
//===========================================================================================
//pthread_create(&pthread_handle[(j*selectedNumberGPUs+k)].pthread, NULL, &thread_function, &pthread_handle[(j*selectedNumberGPUs+k)] );
//===========================================================================================
//new async threads
futures[k] = std::async(std::forward<std::function<void (void*)>>(thread_function), std::forward<void*>((void*)&pthread_handle[(j * selectedNumberGPUs + k)])); //forward used for perfect forwarding
}
//Synchronization and termination -------------------------------------------------------------------------------------------------------------------
for ( k = 0; k < selectedNumberGPUs; k++ ) {
//wait for threads to finish processing
//pthread_join(pthread_handle[(j*selectedNumberGPUs+k)].pthread, NULL);
#ifdef debug_OutputMultiGpu
printf("<~~~~[ joined thread (%i) [WP (%i) on Device (%i)]!\n", (j*selectedNumberGPUs+k), (j*selectedNumberGPUs+k), enableGPUs_ptr[k]);
#endif
//new async threads
futures[k].wait(); //advantage: async are packaged tasks after c++ with os handling, and consistency handling (if destructor is called, it executes task)
}
}
//gettimeofday(&stopAllThreads, NULL);
auto stopAllThreads = std::chrono::steady_clock::now();
diff_time = std::chrono::duration_cast<std::chrono::microseconds>(stopAllThreads - startAllThreads).count(); // total duration in µs
#ifdef debug_OutputPerformance
printf("\n# NUM_VOXEL = %i * %i * %i = %i\n", IMAGE_SIZE_XYZ.x, IMAGE_SIZE_XYZ.y, IMAGE_SIZE_XYZ.z, IMAGE_SIZE_XYZ.x * IMAGE_SIZE_XYZ.y * IMAGE_SIZE_XYZ.z );
printf( "# aScanCount = %i\n", aScanCount);
//printf("volposition.x = %f, .y=%f, .z=%f\n", volposition.x, volposition.y, volposition.z );
double performance_all = (((double)aScanCount * (double)IMAGE_SIZE_XYZ.x * (double)IMAGE_SIZE_XYZ.y * (double)IMAGE_SIZE_XYZ.z) / diff_time )/ 1000.0;
printf("# Duration of main Processing (all GPUs): %.3f us\n", diff_time);
printf("{~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~}\n");
printf("{~~~~~~~~~} Performance: %.6lf A-Scan * GVoxel/s {~~~~~~~~~}\n", performance_all);
printf("{~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~}\n");
#endif
Duration_ptr[0] = diff_time; // Return total duration in µs
// Speicher wieder freigeben
free (position);
free (resolution);
free (volumeStartpoint);
free (volumePtr);
//free (Abort_ptr);
free (pthread_handle);
#ifdef debug_OutputFunctions
printf( "<== multithreaded_processing - End\n");
#endif
}
/**
preintegrateAscans
Determine maximal SampleWidth, matching to the resolution to be used for reconstruction, and integrate A-scan over an window of this SampleWidth
*/
void preintegrateAscans(
float *aScan_ptr, ///< AScan-Daten
float *AscansOut_ptr, ///< AScan-OutputDaten fuer Testrueckgabe
float *speed_vec_ptr, ///< SoS Daten im Blockmode
int aScanCount, ///< Anzahl der AScans die im Blockmode verarbeitet werden sollen
int aScanLength, ///< Laenge der AscanDaten (normal 3000)
float IMAGE_RESOLUTION, ///< Aufloesung des Bildbereichs
float sampleRate, ///< Samplerate fuer AScans
float debugMode, ///< Ausgabe im Debugmode -> Verschiedene Werte können ausgegeben werden
float debugModeParameter ///< Parameter der mit fuer Debugmode uebermittelt werden kann
)
{
#ifdef debug_OutputFunctions
printf( "==> preintegrateAscans - Start\n");
#endif
float* AscanBuffer;
AscanBuffer = (float*)malloc(aScanLength * sizeof(float)); // Zweiten AScan-Buffer erzeugen mit Länge eines AScans (z.B. 3000)
// //Debuging: Test: Alle Daten uebertragen von Input-Ascans auf AscansOut_ptr[0..aScanLength] ueber AscanBuffer
// for (int i = 0; i<aScanLength; i++){
// printf( " -> i (%3i) \n",i);
// //AscansOut_ptr[i] = *(aScan_ptr+i);
// AscanBuffer[i] = aScan_ptr[i];
// AscansOut_ptr[i] = AscanBuffer[i];
// }
#ifdef preAscanIntegrationVersion1Michael // direkt übernommene Version von Michael Zapf
int windowWidth = 0;
/////////////////////// Integration über Anzahl "sampl" voxel ///////////////////////
unsigned int i,j,sampl = 0;
double i_buffer=0.0;
/////xsum ueber diagonale der voxellänge
sampl = (unsigned int)(ceil((float)1.7*(( IMAGE_RESOLUTION / speed_vec_ptr[0])/ (sampleRate)) /2)); //halbe Breite bestimmen
windowWidth = (int)2*IMAGE_RESOLUTION/sampleRate/speed_vec_ptr[0];
#ifdef debug_preAscanIntegration
printf( "~~~~~~~~~~~~~~~~~ V1 Michael ~~~~~~~~~~~~~~~~~~~~~~~~~~\n");
printf( " aScanCount = %5i\n",aScanCount);
printf( " aScanLength = %5i\n",aScanLength);
printf( " IMAGE_RESOLUTION = %12.10f\n",IMAGE_RESOLUTION);
printf( " speed_vec_ptr[%3i] = %12.10f\n",i,speed_vec_ptr[i]);
printf( " sampleRate = %12.10f\n\n",sampleRate);
printf( " => windowWidth = %5i (Ganze Breite) \n",windowWidth);
printf( " => sampl = %5i (Halbe Breite) \n",sampl);
printf( "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n");
#endif
printf( " => windowWidth = %5i (Ganze Breite) \n",windowWidth);
printf( " => sampl = %5i (Halbe Breite) \n",sampl);
// Zugriff über
// sec_buffer, das mit INTERP_RATIO gestreckt ist
// bufferz ist der vorher schon angelegte Buffer der gleichen Größe wie sec_buffer
// sampl = ist die Breite des Additionsfensters für die Addition
if ((debugMode == 0.0) && (debugModeParameter == 2.0)){
printf( "~~~~~~~~~~~~~~~~~~~~~ !!!Use Abs before Preintegrate Ascans!!! ~~~~~~~~~~~~~~~~~~~~~\n");
for (int j = 0; j<aScanCount; j++){
for (int i = 0; i<aScanLength; i++){
//printf( " i (%4i) = %6.3f ",i,AscanBuffer[i]);
//AscansOut_ptr[i] = *(aScan_ptr+i);
//AscanBuffer[i] = aScan_ptr[i];
aScan_ptr[j*aScanLength+i] = fabs(aScan_ptr[j*aScanLength+i]); // In AscanSpeicher schreiben
//AscansOut_ptr[j*aScanLength+i] = fabs(aScan_ptr[j*aScanLength+i]); // Ebenso nach Matlab zurueckgeben
}
}
}
else if ((debugMode == 0.0) && (debugModeParameter == 3.0)){
printf( "~~~~~~~~~~~~~~~~~~~~~ !!!Use sqrt before Preintegrate Ascans!!! ~~~~~~~~~~~~~~~~~~~~~\n");
for (int j = 0; j<aScanCount; j++){
for (int i = 0; i<aScanLength; i++){
//printf( " i (%4i) = %6.3f ",i,AscanBuffer[i]);
//AscansOut_ptr[i] = *(aScan_ptr+i);
//AscanBuffer[i] = aScan_ptr[i];
aScan_ptr[j*aScanLength+i] = pow(aScan_ptr[j*aScanLength+i],2); // In AscanSpeicher schreiben
//AscansOut_ptr[j*aScanLength+i] = fabs(aScan_ptr[j*aScanLength+i]); // Ebenso nach Matlab zurueckgeben
}
}
}
for (int j = 0; j<aScanCount; j++){
#ifdef debug_preAscanIntegration
printf( "\n -> Ascan (j=%3i) ==> j*aScanLength+0 = %i \n\n",j, j*aScanLength);
#endif
i_buffer = 0;
for (i=0;i<sampl;i++)
{ i_buffer = i_buffer + aScan_ptr[j*aScanLength+i]/(2*sampl);
#ifdef debug_preAscanIntegration
printf( " i_buffer = %6.3f ",i_buffer);
#endif
}
for (i=0;i<sampl;i++)
{ if (i+sampl<aScanLength){
i_buffer = i_buffer +aScan_ptr[j*aScanLength+i+sampl]/(2*sampl);
AscanBuffer[i] = i_buffer;}
//printf( " i_buffer = %6.3f ",i_buffer);
}
for (i=sampl;i<(aScanLength)-sampl;i++)
{ if (i+sampl<aScanLength){
i_buffer = i_buffer + aScan_ptr[j*aScanLength+i+sampl]/(2*sampl) - aScan_ptr[j*aScanLength+i-sampl]/(2*sampl);
AscanBuffer[i] = i_buffer;}
}
for (i=aScanLength-sampl;i<aScanLength;i++)
{
if (i-sampl>=0){
i_buffer = i_buffer - aScan_ptr[j*aScanLength+i-sampl]/(2*sampl);
//AscanBuffer[i] = i_buffer / (sampl-(aScanLength)-i); // Michaels alter Code
AscanBuffer[i] = i_buffer ; // passt so besser zu Anfangsstrategie
}
}
//Alle Daten uebertragen von AscanBuffer auf AscansOut_ptr[0..aScanLength]
for (int i = 0; i<aScanLength; i++){
//printf( " i (%4i) = %6.3f ",i,AscanBuffer[i]);
//AscansOut_ptr[i] = *(aScan_ptr+i);
//AscanBuffer[i] = aScan_ptr[i];
aScan_ptr[j*aScanLength+i] = AscanBuffer[i]; // In AscanSpeicher schreiben
AscansOut_ptr[j*aScanLength+i] = AscanBuffer[i]; // Ebenso nach Matlab zurueckgeben
}
}
#endif
#ifdef preAscanIntegrationVersion2Ernst
int i, i_start, i_end = 0;
float nSample = 0.0f;
float windowWidth = 0.0f;
float windowWidthHalf = 0.0f;
float windowWidthHalf_minus1 = 0.0f;
float windowSum = 0.0f;
// maximale Schrittweite ueber einen Voxel = sqr(3)*2*IMAGE_RESOLUTION*fs/c // sqr(3)*2 = 3.464101615
// width = ( ceil( 1.7*(( resz / speedz)/ (timeintz/INTERP_RATIO)) )); % Breite berechnen
windowWidth = (float)3.464101615 * IMAGE_RESOLUTION / sampleRate / speed_vec_ptr[0];
windowWidthHalf = (float)1.732050808 * IMAGE_RESOLUTION / sampleRate / speed_vec_ptr[0]; //halbe Fenster Breite
#ifdef debug_preAscanIntegration
printf( "~~~~~~~~~~~~~~~~~ V2 Ernst ~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n");
printf( " aScanCount = %5i\n",aScanCount);
printf( " aScanLength = %5i\n",aScanLength);
printf( " IMAGE_RESOLUTION = %12.10f\n",IMAGE_RESOLUTION);
printf( " speed_vec_ptr[%3i] = %12.10f\n",i,speed_vec_ptr[i]);
printf( " sampleRate = %12.10f\n\n",sampleRate);
printf( " => windowWidth = %5.2f (Ganze Breite) \n",windowWidth);
printf( " => windowWidthHalf = %5.2f (Halbe Breite) \n",windowWidthHalf);
printf( "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n");
printf ("\n 2. Testvariante mit Start und Endpkt-berechnung --> Neue Implementierung mit Zetrum in der Mitte \n#######################################################################################\n");
#endif
//printf( " => windowWidth = %5i (Ganze Breite) \n",(int)ceil(windowWidth));
//printf( " => windowWidthHalf = %5.2f (Halbe Breite) \n",windowWidthHalf);
for (int j = 0; j<aScanCount; j++){ // über alle A-scans gehen.
if ((int)ceil(windowWidth)%2 == 1){ // Uneven / Ungerade
// // Bei ungeraden Breiten kann symmetrisch sampl = widthHalf_minus1 = floor((ceil(width)-1)/2) genutzt werden
windowWidthHalf_minus1 = floor(( ceil(windowWidth)-1)/2);
#ifdef debug_preAscanIntegration
printf( " ==> Ungerade ==> widthHalf_minus1 = %i \n",(int)windowWidthHalf_minus1);
#endif
for (int i = 0; i < aScanLength; i++){ // über gesamte Breite des A-scans gehen.
i_start = i - (int)windowWidthHalf_minus1;
i_end = i + (int)windowWidthHalf_minus1;
// Grenzen einhalten
if (i_start < 0) i_start=0;
if (i_end > aScanLength-1) i_end =aScanLength-1;
// Anzahl Sample bestimmen
nSample = i_end-i_start+1; // +1 da erstes Element auch dazugehört.
windowSum = 0;
for (int k = i_start; k <= i_end; k++){
#ifdef debug_preAscanIntegration
if ((i >= DebugSammleMin) && (i<=DebugSammleMax)){
printf("Index %3i: windowSum (%3i:%3i) += aScan_ptr(k=%i) %07.5f \n", i, i_start, i_end, k, windowSum);
}
#endif
windowSum += aScan_ptr[j*aScanLength+k];
}
// geteilt durch nur die genutzten Samples
//AscanBuffer[i] = windowSum/ceil(nSample);
//Michael teilt durch die gesamte Breite an Samples,auch wenn sie an dem
//Rand nicht vorhanden/bzw = 0 sind. --> Abflachung am Rand
AscanBuffer[i] = windowSum/ceil(windowWidth);
#ifdef debug_preAscanIntegration
//if ((i >= 1720) && (i<=1730)){
if ((i >= DebugSammleMin) && (i<=DebugSammleMax)){
printf("Index %3i: Summe über %5i Samples (%3i:%3i) = %07.5f \n", i, (int)nSample, i_start, i_end, AscanBuffer[i]);
}
#endif
}
}
else if ((int)ceil(windowWidth)%2 == 0){ // Even / Gerade
// Bei geraden Breiten symmetrisch mit den beiden äußeren Samplewerten zu je 1/2 gewichten.
windowWidthHalf_minus1 = floor(( ceil(windowWidth)-1)/2);
#ifdef debug_preAscanIntegration
printf( " ==> Gerade ==> widthHalf_minus1 = %i \n", (int)windowWidthHalf_minus1);
#endif
for (int i = 0; i < aScanLength; i++){ // über gesamte Breite des A-scans gehen.
i_start = i - (int)windowWidthHalf_minus1;
i_end = i + (int)windowWidthHalf_minus1;
// Grenzen einhalten
if (i_start < 0) i_start=0;
if (i_end > aScanLength-1) i_end =aScanLength-1;
// Anzahl Sample bestimmen
nSample = i_end-i_start+1; // +1 da erstes Element auch dazugehört.
windowSum = 0;
for (int k = i_start; k <= i_end; k++){
#ifdef debug_preAscanIntegration
//if ((i >= 1720) && (i<=1730)){
if ((i >= DebugSammleMin) && (i<=DebugSammleMax)){
printf("Index %3i: windowSum (%3i:%3i) += aScan_ptr(k=%i) %07.5f \n", i, i_start, i_end, k, windowSum);
}
#endif
windowSum += aScan_ptr[j*aScanLength+k];
}
// geteilt wird durch nur die genutzten Samples
//AscanBuffer[i] = windowSum/ceil(nSample);
//Michael teilt durch die gesamte Breite an Samples,auch wenn sie an dem
//Rand nicht vorhanden/bzw = 0 sind.--> Abflachung am Rand
windowSum = windowSum/ceil(windowWidth);
// Halbe Samplewerte an Grenzen miteinberechen aber absolute Grenzen einhalten
if (i_start >= 0){
windowSum = windowSum + aScan_ptr[j*aScanLength+(i_start-1)]/(2*ceil(windowWidth)); // Linken Nachbarn zu 1/2 mit dazunehmen
nSample = nSample + 0.5;
}
if (i_end < aScanLength-1){
windowSum = windowSum + aScan_ptr[j*aScanLength+(i_end+1)]/(2*ceil(windowWidth)); // Rechten Nachbarn zu 1/2 mit dazunehmen
nSample = nSample + 0.5;
}
AscanBuffer[i] = windowSum;
#ifdef debug_preAscanIntegration
//if ((i >= 1720) && (i<=1730)){
if ((i >= DebugSammleMin) && (i<=DebugSammleMax)){
printf("Index %3i: Summe über %5.1f Samples (%3i:%3i) = %07.5f \n", i, nSample, i_start, i_end, AscanBuffer[i]);
}
#endif
}
}
// Transfer Data from Buffer to Memory regions
for (int i = 0; i < aScanLength; i++){
//printf( " i (%4i) = %6.3f ",i,AscanBuffer[i]);
//AscansOut_ptr[i] = *(aScan_ptr+i);
//AscanBuffer[i] = aScan_ptr[i];
aScan_ptr[j*aScanLength+i] = AscanBuffer[i]; // Write in A-scans Memory
AscansOut_ptr[j*aScanLength+i] = AscanBuffer[i]; // Also write back for Matlab
}
}
#ifdef debug_preAscanIntegration
printf ("#######################################################################################\n\n");
#endif
#endif
#ifdef debug_preAscanIntegration
printf( " \n\n\n\n ");
#ifdef debug_OutputVariables
// Ausgabe einzelner AScan-Samples der <20>bergabewerte
//float *aScan_ptr;
//aScan_ptr = (float*)mxGetPr(AScan);
//int aScanSampleCountPerReceiver=3000;
//printf( " -> AScan (%3i) [ 0: 2] = [%f %f %f]\n",0 , *(aScan_ptr+0), *(aScan_ptr+1), *(aScan_ptr+2));
printf( " -> AScan (%3i) [1726:1729] = [%f %f %f %f]\n",0 , *(aScan_ptr+1726),*(aScan_ptr+1726+1), *(aScan_ptr+1726+2), *(aScan_ptr+1726+3));
// Ale drei Moeglichkeiten koennen angewandt werden
//*(AscansOut_ptr+1726) = *(aScan_ptr+1726);
//*(AscansOut_ptr+1726+1) = 0;
//AscansOut_ptr[1726+2] = (float)*(aScan_ptr+1726);
printf( " -> AScan (%3i) [1726:1729] = [%f %f %f %f]\n",0 , *(aScan_ptr+1726),*(aScan_ptr+1726+1), *(aScan_ptr+1726+2), *(aScan_ptr+1726+3));
//printf( " -> AScan (%3i) [2997:2999] = [%f %f %f]\n",0 , *(aScan_ptr+2997), *(aScan_ptr+2998), *(aScan_ptr+2999));
//printf( " -> AScan (%3i) [ 0: 2] = [%f %f %f]\n",1 , aScan_ptr[3000], aScan_ptr[3001], aScan_ptr[3002]);
//printf( " -> AScan (%3i) [2997:2999] = [%f %f %f]\n",1 , aScan_ptr[5997], aScan_ptr[5998], aScan_ptr[5999]);
//printf( " -> AScan (%3i) [ 0: 2] = [%f %f %f]\n",156 , aScan_ptr[0+(156*aScanSampleCountPerReceiver)], aScan_ptr[1+(156*aScanSampleCountPerReceiver)], aScan_ptr[2+(156*aScanSampleCountPerReceiver)]);
//printf( " -> AScan (%3i) [2997:2999] = [%f %f %f]\n",156 , aScan_ptr[2997+(156*aScanSampleCountPerReceiver)], aScan_ptr[2998+(156*aScanSampleCountPerReceiver)], aScan_ptr[2999+(156*aScanSampleCountPerReceiver)]);
#endif
#endif
free(AscanBuffer);
#ifdef debug_OutputFunctions
printf( "<== preintegrateAscans - End\n");
#endif
}
const size_t* GetDimensions(const Matrix_t& matrix)
{
return matrix.Dims;
}
const void* GetPr(const Matrix_t& matrix){
return matrix.Data;
}
size_t GetNumberOfDimensions(const Matrix_t& matrix){
return matrix.NumberOfDims;
}
size_t GetNumberOfElements(const Matrix_t& matrix){
return matrix.DataSize;
}
Matrix_t SAFT_TOFI(std::vector<Matrix_t>& params){
#ifdef debug_OutputFunctions
printf( "==> mexFunction - Start\n");
#endif
#ifdef debug_OutputFormat_German
setlocale(LC_NUMERIC, "de_DE"); // German Format , instead . for numbers
#endif
size_t AScan_Nx, AScan_Mx,
pix_vect_Nx, pix_vect_Mx,
receiver_index_Nx, receiver_index_Mx,
emitter_index_Nx, emitter_index_Mx,
receiver_list_Nx, receiver_list_Mx,
emitter_list_Nx, emitter_list_Mx,
SAFT_mode_Nx, SAFT_mode_Mx,
SAFT_variant_Nx, SAFT_variant_Mx,
speed_Nx, speed_Mx,
SOSGrid_Xx, SOSGrid_Yx, SOSGrid_Zx,
sos_startPoint_Nx, sos_startPoint_Mx,
sos_res_Nx, sos_res_Mx,
attVolume_Nx,attVolume_Mx,
ATTGrid_Xx, ATTGrid_Yx, ATTGrid_Zx,
res_Nx, res_Mx,
timeint_Nx, timeint_Mx,
IMAGE_XYZ_Nx, IMAGE_XYZ_Mx,
IMAGE_SUM_Xx, IMAGE_SUM_Yx, IMAGE_SUM_Zx,
BlockDim_XYZ_Nx, BlockDim_XYZ_Mx,
GPUs_Nx, GPUs_Mx,
dbgMode_Nx,dbgMode_Mx
;
int aScanCount;
int aScanLength;
float *aScan_ptr;
int3 IMAGE_SIZE_XYZ;
int3 BlockDim_XYZ;
bool SOSMode_3DVolume; // Mode of SOS use: Grid (1) or Block (0) --> SOSGrid_XYZ, SOS_RESOLUTION, sosOffset not neccessary
bool ATTMode_3DVolume; // Mode of SOS use: Grid (1) or Block (0) --> ATTGrid_XYZ not neccessary
int SAFT_MODE;
int *SAFT_VARIANT; // Variances of SAFT
int SAFT_VARIANT_Size; // Size of Varainces
int3 SOSGrid_XYZ; // Size of SOSGrid
int3 ATTGrid_XYZ; // Size of ATTGrid
float3 regionOfInterestOffset; // Startpoint
float3 sosOffset; // Startpoint SoS
float IMAGE_RESOLUTION; // Aufloesung
float SOS_RESOLUTION; // Aufloesung
float sampleRate; // Samplerate für AScans
int selectedNumberGPUs; // Anzahl der genutzten GPUs durch uebergebene Groesse der Ausgewaehlten GPUs
float debugMode;
float debugModeParameter;
// check number of arguments... // Anzahl der Argumente überprüfen
#ifdef debug_OutputParameter
printf( "Number of Arguments: Input(nrhs) = %i Output(nlhs) = %i\n", nrhs, nlhs);
#endif
if (params.size() != 19)
{
printf( " \n");
printf( " Inputparameter \n");
printf( " In[n] Meaning [Row N x Col M] Type\n");
printf( " =============================================================================\n");
printf( " 1-prhs[0] AScan-Data [3000?xnAscans] single\n");
printf( " 2-prhs[1] IMAGE_STARTPOINT_S [1x3] single\n");
printf( " 3-prhs[2] receiver_index [1xnAscans] uint16\n");
printf( " 4-prhs[3] emitter_index [1xnAscans] uint16\n");
printf( " 5-prhs[4] receiver_list [3xnReceiver] single\n");
printf( " 6-prhs[5] emitter_list [3xnEmitter] single\n");
printf( " 7-prhs[6] SAFT_mode [1x1] uint32\n");
printf( " 8-prhs[7] SAFT_variant [1x6] uint32\n");
printf( " Standard: [1 1 1 1 0 0] \n");
printf( " -> Ascan Preintegration\n");
printf( " -> Ascan Interpolation\n");
printf( " -> Preprocessing SOS&ATT 3D Volume Interpolation\n");
printf( " -> Reconstruction SOS&ATT 3D Volume Interpolation\n");
printf( " -> not yet\n");
printf( " -> not yet\n");
printf( " 9-prhs[8] SOSVolume [SOS_XxYxZ] single in m/s\n");
printf( " 10-prhs[9] SOS_STARTPOINT_S [1x3] single\n");
printf( " 11-prhs[10] SOS_RESOLUTION_S [1x1] single\n");
printf( " 12-prhs[11] ATTVolume [ATT_XxYxZ] single in dB/cm\n");
printf( " 13-prhs[12] IMAGE_RESOLUTION_S [1x1] single\n");
printf( " 14-prhs[13] TimeInterval_S [1x1] single\n");
printf( " 15-prhs[14] IMAGE_XYZ [1x3] uint32\n");
printf( " 16-prhs[15] IMAGE_SUM [Output_XxYxZ] double\n");
printf( " 17-prhs[16] BlockDim_XYZ (GPU) [1x3] uint32\n");
printf( " 18-prhs[17] GPUs (DeviceNr GPU) [1xn] uint32\n");
printf( " 19-prhs[18] dbgMode,dbgModeParam [1x2] single\n");
printf( " ==============================================================================\n");
printf( "\n");
printf( " Outputparameter \n");
printf( " Out[n] Meaning \n");
printf( " ================================================================================================= \n");
printf( " plhs[0] = Output_Voxels = mxCreateNumericArray ( [IMAGE_XYZ] , mxDOUBLE_CLASS, mxREAL); \n");
printf( " plhs[1] = Duration = mxCreateDoubleMatrix ( [nGPUs+1, 1] , mxREAL); \n");
printf( " plhs[2] = Output_Ascans = mxCreateNumericMatrix( [3000?,nAscans], mxSINGLE_CLASS, mxREAL); \n");
printf( " ================================================================================================= \n");
printf("Wrong number of input arguments. Should be 19.");
}
// assign input arguments... // Bestimme die Eingangswerte
const Matrix_t&AScan = params [0]; // AScan-Data
const Matrix_t&pix_vect = params [1]; // Image Startpoint (IMAGE_STARTPOINT_S)
const Matrix_t&receiver_index = params [2]; // Index Data for Receiver-Position Data
const Matrix_t&emitter_index = params [3]; // Index Data for Emitter-Position Data
const Matrix_t&receiver_list = params [4]; // Assignment Index to Receiver-Position Data
const Matrix_t&emitter_list = params [5]; // Assignment Index to Emitter-Position Data
const Matrix_t&SAFT_mode = params [6]; // SOS?, ATT?
const Matrix_t&SAFT_variant = params [7]; // Differnt Mode-Parameter for Reconstruction
const Matrix_t&speed = params [8]; // Speed of Sound Data (Single, SoS-Grid)
const Matrix_t&sos_startPoint = params [9]; // Startpoint of Speed of Sound Grid
const Matrix_t&sos_res = params [10]; // SoS Grid Resolution
const Matrix_t&attVolume = params [11]; // Attenuation Data (Single, SoS-Grid)
const Matrix_t&res = params [12]; // Output Volume Resolution
const Matrix_t&timeint = params [13]; // 1/Sample-Rate
const Matrix_t&IMAGE_XYZ = params [14]; // Output Volume Size XYZ
const Matrix_t&IMAGE_SUM = params [15]; // Volume from previous Call
const Matrix_t&BlockDim = params [16]; // Block Dimension to use for GPU
const Matrix_t&GPUs = params [17]; // Welche GPUs sollen genutzt werden?
const Matrix_t&dbgMode = params [18]; // DebugMode and DebugMode-Parameter
// check data types and assign variables... // Ueberpruefe die Datentypen und lege Variablen fest
//================================================================================================================ Check Dimensions of input parameters
#ifdef debug_OutputParameter
printf( "\n");
printf( "In[n] Meaning [Row N x Col M] \n");
printf( "=================================================================================================\n");
#endif
//====================================================================== 1.Input Parameter - Check AScan
AScan_Nx = GetDimensions(AScan)[0]; // Reihen N ermitteln
AScan_Mx = GetDimensions(AScan)[1]; // Spalten M ermitteln
aScanCount = AScan_Mx;
aScanLength = AScan_Nx;
#ifdef debug_OutputParameter
printf( "prhs[0] Ascan-Data [%ix%i]\n", AScan_Nx , AScan_Mx);
#endif
//printf( "mxGetNumberOfDimensions(AScan)=%i\n", mxGetNumberOfDimensions(AScan));
if ((aScanCount > 65535)) // new 2019: increasing the limit of the A-Scan block size. however this is limited by the datatype of unsigned short which is used for a pointer.
{
printf( " -> AScanBlock size = %i\n", aScanCount);
printf( "AScanBlock size might be too large (=> 2^16)!!!");
}
if ((aScanCount > 1))
#ifdef debug_OutputParameter
printf( " -> Blockmode with [%i x %i]\n", AScan_Nx, aScanCount);
#endif
// if(!(mxIsSingle(AScan)))
// printf("AScans must be Single");
aScan_ptr = (float*)GetPr(AScan);
#ifdef debug_OutputVariables
// Ausgabe einzelner AScan-Samples der übergabewerte
//int aScanSampleCountPerReceiver=3000;
//printf( " -> AScan (%3i) [ 0: 2] = [%f %f %f]\n",0 , *(aScan_ptr+0), *(aScan_ptr+1), *(aScan_ptr+2));
//printf( " -> AScan (%3i) [2997:2999] = [%f %f %f]\n",0 , *(aScan_ptr+2997), *(aScan_ptr+2998), *(aScan_ptr+2999));
//printf( " -> AScan (%3i) [ 0: 2] = [%f %f %f]\n",1 , aScan_ptr[3000], aScan_ptr[3001], aScan_ptr[3002]);
//printf( " -> AScan (%3i) [2997:2999] = [%f %f %f]\n",1 , aScan_ptr[5997], aScan_ptr[5998], aScan_ptr[5999]);
//printf( " -> AScan (%3i) [ 0: 2] = [%f %f %f]\n",156 , aScan_ptr[0+(156*aScanSampleCountPerReceiver)], aScan_ptr[1+(156*aScanSampleCountPerReceiver)], aScan_ptr[2+(156*aScanSampleCountPerReceiver)]);
//printf( " -> AScan (%3i) [2997:2999] = [%f %f %f]\n",156 , aScan_ptr[2997+(156*aScanSampleCountPerReceiver)], aScan_ptr[2998+(156*aScanSampleCountPerReceiver)], aScan_ptr[2999+(156*aScanSampleCountPerReceiver)]);
#endif
//====================================================================== 2.Input Parameter - Check IMAGE_STARTPOINT_S / pix_vect
pix_vect_Nx = GetDimensions(pix_vect)[0]; // Reihen N ermitteln
pix_vect_Mx = GetDimensions(pix_vect)[1]; // Spalten M ermitteln
regionOfInterestOffset.x = *((float*)GetPr(pix_vect));
regionOfInterestOffset.y = *((float*)GetPr(pix_vect)+1);
regionOfInterestOffset.z = *((float*)GetPr(pix_vect)+2);
#ifdef debug_OutputParameter
printf( "prhs[1] IMAGE_STARTPOINT_S [%ix%i] = [%f x %f x %f]\n", pix_vect_Nx , pix_vect_Mx, regionOfInterestOffset.x, regionOfInterestOffset.y, regionOfInterestOffset.z);
#endif
if (!(pix_vect_Nx == 1)||!(pix_vect_Mx == 3))
printf(" -> Dimension of IMAGE_STARTPOINT_S must be [1 x 3]");
if ((pix_vect_Nx > 1))
printf( " -> No Blockmode [%i x 3] allowed for IMAGE_STARTPOINT_S\n", pix_vect_Nx);
// if(!(mxIsSingle(pix_vect)))
// printf(" -> IMAGE_STARTPOINT_S must be Single");
//====================================================================== 3.Input Parameter - Check Receiver Index
receiver_index_Nx = GetDimensions(receiver_index)[0]; // Reihen N ermitteln
receiver_index_Mx = GetDimensions(receiver_index)[1]; // Spalten M ermitteln
#ifdef debug_OutputParameter
printf( "prhs[2] receiver_index [%ix%i]\n", receiver_index_Nx , receiver_index_Mx);
#endif
if (!(receiver_index_Nx == 1))
printf(" -> Dimension of receiver_index must be [1 x M]");
if (!(receiver_index_Mx == aScanCount)){
printf (" -> aScanCount(%i)!= M(%i)\n", aScanCount, receiver_index_Mx);
printf(" -> Dimension of receiver_index has different size as Ascan-Data\n");
}
// if (!(receiver_index_Mx == 1))
#ifdef debug_OutputParameter
printf( " -> Blockmode with [1 x %i]\n", receiver_index_Mx);
#endif
// if(!(mxIsUint16(receiver_index)))
// printf(" -> receiver_index must be Uint16");
// Ausgabe einzelner Geometriedaten der Uebergabewerte mit verschiedenen Varianten
unsigned short *receiver_index_ptr;
receiver_index_ptr = (unsigned short*)GetPr(receiver_index);
#ifdef debug_OutputVariables
if ((receiver_index_Mx > 1))
{
printf( " -> receiver_index: %i = [%i]\n",1 , *(receiver_index_ptr+0));
printf( " -> receiver_index: %i = [%i]\n",2 , *(receiver_index_ptr+1));
printf( " -> receiver_index: %i = [%i]\n",3 , *(receiver_index_ptr+2));
}
else
printf( " -> receiver_index: %i = [%i]\n",1 , *(receiver_index_ptr+0));
#endif
//====================================================================== 4.Input Parameter - Check Emitter Index
emitter_index_Nx = GetDimensions(emitter_index)[0]; // Reihen N ermitteln
emitter_index_Mx = GetDimensions(emitter_index)[1]; // Spalten M ermitteln
#ifdef debug_OutputParameter
printf( "prhs[3] emitter_index [%ix%i]\n", emitter_index_Nx , emitter_index_Mx);
#endif
if (!(emitter_index_Nx == 1))
printf(" -> Dimension of emitter_index must be [1 x M]");
if (!(emitter_index_Mx == aScanCount)){
printf (" -> aScanCount(%i)!= M(%i)\n", aScanCount, emitter_index_Mx);
printf(" -> Dimension of emitter_index has different size as Ascan-Data\n");
}
// if (!(emitter_index_Mx == 1))
#ifdef debug_OutputParameter
printf( " -> Blockmode with [1 x %i]\n", emitter_index_Mx);
#endif
// if(!(mxIsUint16(emitter_index)))
// printf(" -> emitter_index must be Uint16");
// Ausgabe einzelner Geometriedaten der Uebergabewerte mit verschiedenen Varianten
unsigned short * emitter_index_ptr = (unsigned short*)GetPr(emitter_index);
#ifdef debug_OutputVariables
if ((receiver_index_Mx > 1))
{
printf( " -> emitter_index: %i = [%i]\n",1 , *(emitter_index_ptr+0));
printf( " -> emitter_index: %i = [%i]\n",2 , *(emitter_index_ptr+1));
printf( " -> emitter_index: %i = [%i]\n",3 , *(emitter_index_ptr+2));
}
else
printf( " -> emitter_index: %i = [%i]\n",1 , *(emitter_index_ptr+0));
#endif
//====================================================================== 5.Input Parameter - Check receiver_list
receiver_list_Nx = GetDimensions(receiver_list)[0]; // Reihen N ermitteln
receiver_list_Mx = GetDimensions(receiver_list)[1]; // Spalten M ermitteln
#ifdef debug_OutputParameter
printf( "prhs[4] receiver_list [%ix%i]\n", receiver_list_Nx , receiver_list_Mx);
#endif
if (!(receiver_list_Nx == 3))
printf(" -> Dimension of receiver_list must be [3 x M]");
// if(!(mxIsSingle(receiver_list)))
// printf(" -> receiver_list must be Single");
// Ausgabe einzelner Geometriedaten der Uebergabewerte mit verschiedenen Varianten
float *receiver_list_ptr;
receiver_list_ptr = (float*)GetPr(receiver_list);
#ifdef debug_OutputVariables
if ((receiver_list_Mx > 1))
{
printf( " -> receiver_list-Position: %i = [%f %f %f]\n",1 , *(receiver_list_ptr+0), *(receiver_list_ptr+1), *(receiver_list_ptr+2));
printf( " -> receiver_list-Position: %i = [%f %f %f]\n",2 , *(receiver_list_ptr+3), *(receiver_list_ptr+4), *(receiver_list_ptr+5));
}
else
printf( " -> receiver_list-Position: %i = [%f %f %f]\n",1 , *(receiver_list_ptr+0), *(receiver_list_ptr+1), *(receiver_list_ptr+2));
#endif
//====================================================================== 6.Input Parameter - Check emitter_list
emitter_list_Nx = GetDimensions(emitter_list)[0]; // Reihen N ermitteln
emitter_list_Mx = GetDimensions(emitter_list)[1]; // Spalten M ermitteln // emitter_list gibt die maximale Anzahl an Emittern die in diesem Block vorkommen können wieder!
#ifdef debug_OutputParameter
printf( "prhs[5] emitter_list [%ix%i]\n", emitter_list_Nx , emitter_list_Mx);
#endif
if (!(emitter_list_Nx == 3))
printf(" -> Dimension of emitter_list must be [3 x M]");
// if(!(mxIsSingle(receiver_list)))
// printf(" -> emitter_list must be Single");
// Ausgabe einzelner Geometriedaten der übergabewerte mit verschiedenen Varianten
float *emitter_list_ptr;
emitter_list_ptr = (float*)GetPr(emitter_list);
#ifdef debug_OutputVariables
if ((emitter_list_Mx > 1))
{
printf( " -> emitter_list-Position: %i = [%f %f %f]\n",1 , *(emitter_list_ptr+0), *(emitter_list_ptr+1), *(emitter_list_ptr+2));
printf( " -> emitter_list-Position: %i = [%f %f %f]\n",2 , *(emitter_list_ptr+3), *(emitter_list_ptr+4), *(emitter_list_ptr+5));
}
else
printf( " -> emitter_list-Position: %i = [%f %f %f]\n",1 , *(emitter_list_ptr+0), *(emitter_list_ptr+1), *(emitter_list_ptr+2));
#endif
//====================================================================== 7.Input Parameter - Check SAFT_mode
SAFT_mode_Nx = GetDimensions(SAFT_mode)[0]; // Reihen N ermitteln
SAFT_mode_Mx = GetDimensions(SAFT_mode)[1]; // Spalten M ermitteln
SAFT_MODE = *((int*)GetPr(SAFT_mode));
#ifdef debug_OutputParameter
printf( "prhs[6] SAFT_MODE [%ix%i] = [%i]\n", SAFT_mode_Nx , SAFT_mode_Mx, SAFT_MODE);
#endif
if (!(SAFT_mode_Nx == 1))
printf(" -> Dimension of SAFT_MODE must be [1 x 1]");
// if(!(mxIsUint32(SAFT_mode)))
// printf(" -> SAFT_MODE must be Uint32");
#ifdef debug_OutputParameter
printf ( "\e[7;37m ======================================================================================== \e[0m\n");
printf ( "\e[7;37m Used SAFT (GPU-Version):\e[0m");
#endif
switch (SAFT_MODE)
{
case 0:
SOSMode_3DVolume = false; ATTMode_3DVolume = false;
//printf ( "\e[7;37m Standard SAFT without correction (-SOS -ATT) (%i,%i) \e[0m", SOSMode_3DVolume, ATTMode_3DVolume);
printf (" -> AscanIndexVersion only make sense with SOS or SOS and ATT Volume => exit");
break;
case 1:
SOSMode_3DVolume = true; ATTMode_3DVolume = false;
//printf ( "\e[7;37m + Speed of sound correction - Attenuation correction (%i,%i) \e[0m", SOSMode_3DVolume, ATTMode_3DVolume);
break;
case 2:
SOSMode_3DVolume = true; ATTMode_3DVolume = true;
//printf ( "\e[7;37m + Speed of sound correction + Attenuation correction (%i,%i) \e[0m", SOSMode_3DVolume, ATTMode_3DVolume);
break;
case 3:
//SOSMode_3DVolume = true; ATTMode_3DVolume = true;
//printf ( "\e[7;37m SAFT_MODE = 3 \e[0m", SOSMode_3DVolume, ATTMode_3DVolume);
printf(" -> not implemented => exit");
break;
case 4:
//SOSMode_3DVolume = false; ATTMode_3DVolume = false;
//printf ( "\e[7;37m SAFT_MODE = 4 \e[0m", SOSMode_3DVolume, ATTMode_3DVolume);
printf(" -> not implemented => exit");
break;
default: SOSMode_3DVolume = false; ATTMode_3DVolume = false;
//printf ( " -> SAFT_MODE %i is out of range [0..3] => use Standard SAFT\n", SAFT_MODE);
//printf ( "\e[7;37m Standard SAFT without correction (-SOS -ATT) (%i,%i) \e[0m", SOSMode_3DVolume, ATTMode_3DVolume);
break;
}
#ifdef debug_OutputParameter
printf ( "\n\e[7;37m ======================================================================================== \e[0m\n");
#else
//printf ( "\n");
#endif
//====================================================================== 8.Input Parameter - Check SAFT_variant
SAFT_variant_Nx = GetDimensions(SAFT_variant)[0]; // Reihen N ermitteln
SAFT_variant_Mx = GetDimensions(SAFT_variant)[1]; // Spalten M ermitteln
SAFT_VARIANT = (int*)GetPr(SAFT_variant);
SAFT_VARIANT_Size = SAFT_variant_Mx;
#ifdef debug_OutputParameter
//printf( "prhs[7] SAFT_VARIANT [%ix%i] = [%i %i %i %i %i %i]\n", SAFT_variant_Nx , SAFT_variant_Mx, SAFT_VARIANT[0], SAFT_VARIANT[1], SAFT_VARIANT[2], SAFT_VARIANT[3], SAFT_VARIANT[4], SAFT_VARIANT[5]);
printf(" -> Ascan Preintegration = [%i]\n", SAFT_VARIANT[SAFT_VARIANT_AscanPreintegration]);
printf(" -> Ascan Interpolation = [%i]\n", SAFT_VARIANT[SAFT_VARIANT_AscanInterpolation]);
printf(" -> Preprocessing SOS&ATT 3D Volume Interpolation = [%i]\n", SAFT_VARIANT[SAFT_VARIANT_3DVolumeInterpolationAtPreprocessing]);
printf(" -> Reconstruction SOS&ATT 3D Volume Interpolation = [%i]\n", SAFT_VARIANT[SAFT_VARIANT_3DVolumeInterpolationAtReconstruction]);
printf(" -> Standard deviation (STD) not yet = [%i]\n", SAFT_VARIANT[SAFT_VARIANT_CalcStandardDeviation]);
printf(" -> Sum up over boarder Indices not yet = [%i]\n", SAFT_VARIANT[SAFT_VARIANT_SumUpOverBoarderIndices]);
#endif
// if(!(mxIsUint32(SAFT_variant)))
// printf(" -> SAFT_VARIANT must be Uint32");
if (!(SAFT_variant_Nx == 1)||!(SAFT_variant_Mx == 6))
printf(" -> Dimension of SAFT_VARIANT must be [1 x 6]");
//====================================================================== 9.Input Parameter - Check for SOS volume
speed_Nx = GetDimensions(speed)[0]; // Reihen N ermitteln
speed_Mx = GetDimensions(speed)[1]; // Spalten M ermitteln
float Sos = *((float*)GetPr(speed));
float *speed_vec_ptr;
speed_vec_ptr = (float*)GetPr(speed); // Pointer für SoSDaten ermitteln
// if(!(mxIsSingle(speed)))
// printf(" -> SOSVolume must be Single");
if (SOSMode_3DVolume == true) // SOS correction need 3D Volume
{
if (GetNumberOfDimensions(speed) == 3){
#ifdef debug_OutputParameter
printf( "prhs[8] SOSVolume [%ix%ix%i]\n", mxGetDimensions(speed)[0] , mxGetDimensions(speed)[1], mxGetDimensions(speed)[2]);
printf( " -> use SoS-Mode with SoS-Correction per Path with SOS 3D Volume\n");
#endif
if (!( (speed_Nx > 1)&&(speed_Mx > 1) )){
printf( " -> SOSGrid_XYZ.x and SOSGrid_XYZ.y must be > 1 for SOS Correction with 3D Volume!!!");
}
}
else if (GetNumberOfDimensions(speed) == 2){
printf ( "prhs[8] SOSVolume [%ix%i]\n",(int)GetDimensions(speed)[0] , (int)GetDimensions(speed)[1]);
printf( " -> SOSVolume is not a 3D Volume as expected!");
}
SOSGrid_Xx = GetDimensions(speed)[0]; // SOSGrid_X ermitteln
SOSGrid_Yx = GetDimensions(speed)[1]; // SOSGrid_Y ermitteln
SOSGrid_Zx = GetDimensions(speed)[2]; // SOSGrid_Z ermitteln
SOSGrid_XYZ.x = SOSGrid_Xx;
SOSGrid_XYZ.y = SOSGrid_Yx;
SOSGrid_XYZ.z = SOSGrid_Zx;
if ((SOSGrid_XYZ.x > 128)||(SOSGrid_XYZ.y > 128)|| (SOSGrid_XYZ.z > 128)){
printf ( " -> SOSGrid_XYZ [%i x %i x %i]\n", (int)SOSGrid_Xx, (int)SOSGrid_Yx, (int)SOSGrid_Zx);
printf ( " Warning -> SOSGrid_XYZ.x, SOSGrid_XYZ.y, SOSGrid_XYZ.z > 128!!! --> can be problematic due to memory requirement\n");
}
#ifdef debug_OutputVariables
printf(" -> SoS-Data[1-3] = [%f %f %f]\n",speed_vec_ptr[0], speed_vec_ptr[1], speed_vec_ptr[2]);
//int speedOfSoundFieldIndex = (currentVoxel.z * SOSGrid_XYZ.y + currentVoxel.y) * SOSGrid_XYZ.x + currentVoxel.x;
int speedOfSoundFieldIndex = (0 * SOSGrid_XYZ.y + 1) * SOSGrid_XYZ.x + 9;
printf(" -> SoS-Data[%i-%i] = [%f %f %f]\n",speedOfSoundFieldIndex, (speedOfSoundFieldIndex+2), speed_vec_ptr[speedOfSoundFieldIndex + 0], speed_vec_ptr[speedOfSoundFieldIndex + 1], speed_vec_ptr[speedOfSoundFieldIndex + 2]);
speedOfSoundFieldIndex = (4 * SOSGrid_XYZ.y + 37) * SOSGrid_XYZ.x + 9;
printf(" -> SoS-Data[%i-%i] = [%f %f %f]\n",speedOfSoundFieldIndex, (speedOfSoundFieldIndex+2), speed_vec_ptr[speedOfSoundFieldIndex + 0], speed_vec_ptr[speedOfSoundFieldIndex + 1], speed_vec_ptr[speedOfSoundFieldIndex + 2]);
int SOSGrid_END = (SOSGrid_XYZ.x * SOSGrid_XYZ.y * SOSGrid_XYZ.z);
printf(" -> SoS-Data[%i-%i] = [%f %f %f]\n",(SOSGrid_END-3), (SOSGrid_END-1), speed_vec_ptr[SOSGrid_END-3], speed_vec_ptr[SOSGrid_END-2], speed_vec_ptr[SOSGrid_END-1]);
#endif
}
//====================================================================== 10.Input Parameter - Check SoS Startpoint
sos_startPoint_Nx = GetDimensions(sos_startPoint)[0]; // Reihen N ermitteln
sos_startPoint_Mx = GetDimensions(sos_startPoint)[1]; // Spalten M ermitteln
sosOffset.x = *((float*)GetPr(sos_startPoint));
sosOffset.y = *((float*)GetPr(sos_startPoint)+1);
sosOffset.z = *((float*)GetPr(sos_startPoint)+2);
#ifdef debug_OutputParameter
printf( "prhs[9] SOS_STARTPOINT_S [%ix%i] = [%f x %f x %f]\n", sos_startPoint_Nx , sos_startPoint_Mx, sosOffset.x, sosOffset.y, sosOffset.z);
#endif
if (!(sos_startPoint_Nx == 1)||!(sos_startPoint_Mx == 3))
printf(" -> Dimension of SOS_STARTPOINT_S must be [1 x 3]");
if ((sos_startPoint_Nx > 1))
printf( " -> No Blockmode [%i x 3] allowed for SOS_STARTPOINT_S\n", sos_startPoint_Nx);
// if(!(mxIsSingle(sos_startPoint)))
// printf(" -> SOS_STARTPOINT_S must be Single");
//====================================================================== 11.Input Parameter - Check SoS_RESOLUTION / sos_res
if (SOSMode_3DVolume == true){
sos_res_Nx = GetDimensions(sos_res)[0]; // Reihen N ermitteln
sos_res_Mx = GetDimensions(sos_res)[1]; // Spalten M ermitteln
SOS_RESOLUTION = *((float*)GetPr(sos_res));
#ifdef debug_OutputParameter
printf( "prhs[10] SOS_RESOLUTION_S [%ix%i] = [%f]\n", sos_res_Nx , sos_res_Mx, SOS_RESOLUTION);
#endif
if (!(sos_res_Nx == 1))
printf(" -> Dimension of SOS_RESOLUTION_S must be [1 x 1]");
if ((sos_res_Mx > 1))
printf( " -> No Blockmode allowed for SOS_RESOLUTION_S! [1 x %i]\n", sos_res_Mx);
// if(!(mxIsSingle(sos_res)))
// printf(" -> SOS_RESOLUTION_S must be Single");
}
//====================================================================== 12.Input Parameter - Check for ATTVolume / Attenuation-Data
attVolume_Nx = GetDimensions(attVolume)[0]; // Reihen N ermitteln
attVolume_Mx = GetDimensions(attVolume)[1]; // Spalten M ermitteln
float *att_vec_ptr;
att_vec_ptr = (float*)GetPr(attVolume); // Pointer für ATT-Daten ermitteln
// if(!(mxIsSingle(attVolume)))
// printf(" -> attVolume must be Single");
if (GetNumberOfDimensions(attVolume) == 3){
#ifdef debug_OutputParameter
printf( "prhs[11] attVolume [%ix%ix%i]\n", mxGetDimensions(attVolume)[0] , mxGetDimensions(attVolume)[1], mxGetDimensions(attVolume)[2]);
printf( " -> use ATT-Mode with ATT-Correction per Path with ATT 3D Volume\n");
#endif
if (!( (attVolume_Nx > 1)&&(attVolume_Mx > 1) )){
printf( " -> ATTGrid_XYZ.x and ATTGrid_XYZ.y must be > 1 for ATT Correction with 3D Volume!!!");
}
}
else if (GetNumberOfDimensions(attVolume) == 2){
#ifdef debug_OutputParameter
printf ( "prhs[11] attVolume [%ix%i]\n", mxGetDimensions(attVolume)[0] , mxGetDimensions(attVolume)[1]);
printf( " -> attVolume is not a 3D Volume as expected!");
#endif
}
if ((SOSMode_3DVolume == true)&&(ATTMode_3DVolume == true)){ // 3D Volume muss bei SOS und ATT angegeben sein damit ATT Korrektur durchgefuehrt werden kann
ATTGrid_Xx = GetDimensions(attVolume)[0]; // ATTGrid_X ermitteln
ATTGrid_Yx = GetDimensions(attVolume)[1]; // ATTGrid_Y ermitteln
ATTGrid_Zx = GetDimensions(attVolume)[2]; // ATTGrid_Z ermitteln
ATTGrid_XYZ.x = ATTGrid_Xx;
ATTGrid_XYZ.y = ATTGrid_Yx;
ATTGrid_XYZ.z = ATTGrid_Zx;
if ((ATTGrid_XYZ.x > 128) || (ATTGrid_XYZ.y > 128) || (ATTGrid_XYZ.z > 128)){
printf ( " -> ATTGrid_XYZ [%i x %i x %i]\n", ATTGrid_XYZ.x, ATTGrid_XYZ.y, ATTGrid_XYZ.z);
printf ( " Warning -> ATTGrid_XYZ.x, ATTGrid_XYZ.y, ATTGrid_XYZ.z > 128!!! --> can be problematic due to memory requirement\n");
}
if ((ATTGrid_XYZ.x != SOSGrid_XYZ.x) || (ATTGrid_XYZ.y != SOSGrid_XYZ.y) || (ATTGrid_XYZ.z != SOSGrid_XYZ.z)){ // Restriction: Volume parameter of ATT & SOS must be the same
printf( " -> ATTGrid[%i %i %i] != SOSGrid[%i %i %i]!\n", ATTGrid_XYZ.x , ATTGrid_XYZ.y, ATTGrid_XYZ.z, SOSGrid_XYZ.x , SOSGrid_XYZ.y, SOSGrid_XYZ.z);
printf(" -> ATTGrid must have the same size as SOSGrid \n");
}
#ifdef debug_OutputVariables
printf(" -> ATT-Data[1-3] = [%f %f %f]\n", att_vec_ptr[0], att_vec_ptr[1], att_vec_ptr[2]);
int ATTFieldIndex = (14 * ATTGrid_XYZ.y + 30 ) * ATTGrid_XYZ.x + 16;
printf(" -> ATT-Data[%i-%i] = [%f %f %f]\n", ATTFieldIndex, (ATTFieldIndex+2), att_vec_ptr[ATTFieldIndex + 0], att_vec_ptr[ATTFieldIndex + 1], att_vec_ptr[ATTFieldIndex + 2]);
ATTFieldIndex = (15 * ATTGrid_XYZ.y + 32 ) * ATTGrid_XYZ.x + 32;
printf(" -> ATT-Data[%i-%i] = [%f %f %f]\n", ATTFieldIndex, (ATTFieldIndex+2), att_vec_ptr[ATTFieldIndex + 0], att_vec_ptr[ATTFieldIndex + 1], att_vec_ptr[ATTFieldIndex + 2]);
int ATTGrid_END = (ATTGrid_XYZ.x * ATTGrid_XYZ.y * ATTGrid_XYZ.z);
printf(" -> ATT-Data[%i-%i] = [%f %f %f]\n", (ATTGrid_END-3), (ATTGrid_END-1), att_vec_ptr[ATTGrid_END-3], att_vec_ptr[ATTGrid_END-2], att_vec_ptr[ATTGrid_END-1]);
#endif
}
else{
//#ifdef debug_OutputVariables
printf( " -> ATTMode_3DVolume == false => skip ATTGrid\n");
//#endif
ATTGrid_Xx = 0; // ATTGrid_X ermitteln
ATTGrid_Yx = 0; // ATTGrid_Y ermitteln
ATTGrid_Zx = 0; // ATTGrid_Z ermitteln
ATTGrid_XYZ.x = ATTGrid_Xx;
ATTGrid_XYZ.y = ATTGrid_Yx;
ATTGrid_XYZ.z = ATTGrid_Zx;
}
//====================================================================== 13.Input Parameter - Check IMAGE_RESOLUTION_S / res
res_Nx = GetDimensions(res)[0]; // Reihen N ermitteln
res_Mx = GetDimensions(res)[1]; // Spalten M ermitteln
IMAGE_RESOLUTION = *((float*)GetPr(res));
#ifdef debug_OutputParameter
printf( "prhs[12] IMAGE_RESOLUTION_S [%ix%i] = [%f]\n", res_Nx , res_Mx, IMAGE_RESOLUTION);
#endif
if (!(res_Nx == 1))
printf(" -> Dimension of IMAGE_RESOLUTION must be [1 x 1]");
if ((res_Mx > 1))
printf( " -> No Blockmode allowed for IMAGE_RESOLUTION! [1 x %i]\n", res_Mx);
// if(!(mxIsSingle(res)))
// printf(" -> IMAGE_RESOLUTION must be Single");
if (SOSMode_3DVolume == true){
if(IMAGE_RESOLUTION > SOS_RESOLUTION){
printf( " -> IMAGE_RESOLUTION (%f) > SOS_RESOLUTION (%f)\n", IMAGE_RESOLUTION, SOS_RESOLUTION);
printf(" -> IMAGE_RESOLUTION must not > SOS_RESOLUTION !!!");
}
}
//====================================================================== 14.Input Parameter - Check TimeInterval_S / Timeint
timeint_Nx = GetDimensions(timeint)[0]; // Reihen N ermitteln
timeint_Mx = GetDimensions(timeint)[1]; // Spalten M ermitteln
sampleRate = *((float*)GetPr(timeint));
#ifdef debug_OutputParameter
printf( "prhs[13] TimeInterval_S [%ix%i] = [%e]\n", timeint_Nx , timeint_Mx, sampleRate);
#endif
if (!(timeint_Nx == 1))
printf(" -> Dimension of TimeInterval_S must be [1 x 1]");
if ((timeint_Mx > 1))
printf( " -> No Blockmode allowed for TimeInterval_S! [1 x %i]\n", timeint_Mx);
// if(!(mxIsSingle(timeint)))
// printf(" -> TimeInterval_S must be Single");
//====================================================================== 15.Input Parameter - Check IMAGE_XYZ_UI32 / IMAGE_XYZ
IMAGE_XYZ_Nx = GetDimensions(IMAGE_XYZ)[0]; // Reihen N ermitteln
IMAGE_XYZ_Mx = GetDimensions(IMAGE_XYZ)[1]; // Spalten M ermitteln
IMAGE_SIZE_XYZ.x = *((int*)GetPr(IMAGE_XYZ));
IMAGE_SIZE_XYZ.y = *((int*)GetPr(IMAGE_XYZ)+1);
IMAGE_SIZE_XYZ.z = *((int*)GetPr(IMAGE_XYZ)+2);
#ifdef debug_OutputParameter
printf( "prhs[14] IMAGE_XYZ [%ix%i] = [%ix%ix%i]\n", IMAGE_XYZ_Nx , IMAGE_XYZ_Mx,IMAGE_SIZE_XYZ.x, IMAGE_SIZE_XYZ.y, IMAGE_SIZE_XYZ.z);
#endif
if (!(IMAGE_XYZ_Nx == 1)||!(IMAGE_XYZ_Mx == 3))
printf(" -> Dimension of IMAGE_XYZ must be [1 x 3]");
if ((IMAGE_XYZ_Nx > 1))
printf( " -> No Blockmode allowed for IMAGE_XYZ! [%i x 3]\n", IMAGE_XYZ_Nx);
// if(!(mxIsUint32(IMAGE_XYZ)))
// printf(" -> IMAGE_XYZ must be UINT32");
if ((IMAGE_SIZE_XYZ.x > 8192)||(IMAGE_SIZE_XYZ.y > 8192)) // Aufteilung in BlockDim 512,1,1 passt für 5632x5632. Es würde etwas weiter gehen aber dann muss Y kleiner sein.
printf(" -> IMAGE_XYZ must not > [8192 x 8192 x N]!!!");
//====================================================================== 16.Input Parameter - Check Env / IMAGE_SUM
IMAGE_SUM_Xx = GetDimensions(IMAGE_SUM)[0]; // Spalten M ermitteln X
IMAGE_SUM_Yx = GetDimensions(IMAGE_SUM)[1]; // Reihen N ermitteln Y
if (GetNumberOfDimensions(IMAGE_SUM) > 2)
IMAGE_SUM_Zx = GetDimensions(IMAGE_SUM)[2]; // Z-Schichten ermitteln Z
else if (GetNumberOfDimensions(IMAGE_SUM) == 2)
IMAGE_SUM_Zx = 1; // Z-Schichten = 1
else
{
printf( " -> mxGetNumberOfDimensions of IMAGE_SUM = %i\n", (int)GetNumberOfDimensions(IMAGE_SUM));
printf(" -> Dimension of IMAGE_SUM must be 3: [X x Y x Z]");
}
#ifdef debug_OutputParameter
printf( "prhs[15] IMAGE_SUM [%ix%ix%i]\n", IMAGE_SUM_Xx , IMAGE_SUM_Yx, IMAGE_SUM_Zx);
#endif
// if(!(mxIsDouble(IMAGE_SUM)))
// printf(" -> IMAGE_SUM must be Double");
// if(!(mxGetNumberOfElements(IMAGE_SUM) == ((size_t)IMAGE_SIZE_XYZ.x * (size_t)IMAGE_SIZE_XYZ.y * (size_t)IMAGE_SIZE_XYZ.z)))
// {
// printf( " -> IMAGE_SUM and the Number of Voxels don't match: %lld = [%lldx%lldx%lld]\n",(int)GetNumberOfElements(IMAGE_SUM), IMAGE_SIZE_XYZ.x, IMAGE_SIZE_XYZ.y, IMAGE_SIZE_XYZ.z);
// printf(" -> Make sure that they have the same size");
// }
uint64_t IMAGE_SUM_Count = GetNumberOfElements(IMAGE_SUM);
double *IMAGE_SUM_vec_ptr = (double*)GetPr(IMAGE_SUM);
#ifdef debug_OutputVariables
printf( " -> IMAGE_SUM: %i = [%f %f %f]\n",0 , IMAGE_SUM_vec_ptr[0], IMAGE_SUM_vec_ptr[1], IMAGE_SUM_vec_ptr[2]);
printf( " -> IMAGE_SUM: %i = [%f %f %f]\n",1 , IMAGE_SUM_vec_ptr[3], IMAGE_SUM_vec_ptr[4], IMAGE_SUM_vec_ptr[5]);
printf( " -> IMAGE_SUM: %i = [%f %f %f]\n",2 , IMAGE_SUM_vec_ptr[6], IMAGE_SUM_vec_ptr[7], IMAGE_SUM_vec_ptr[8]);
printf( " -> IMAGE_SUM: %lld = [%f %f %f]\n",(IMAGE_SUM_Count-3) , IMAGE_SUM_vec_ptr[IMAGE_SUM_Count - 3], IMAGE_SUM_vec_ptr[IMAGE_SUM_Count - 2], IMAGE_SUM_vec_ptr[IMAGE_SUM_Count -1]);
#endif
//====================================================================== 17.Input Parameter - Check BlockDimension for GPU
BlockDim_XYZ_Nx = GetDimensions(BlockDim)[0]; // Reihen N ermitteln
BlockDim_XYZ_Mx = GetDimensions(BlockDim)[1]; // Spalten M ermitteln
BlockDim_XYZ.x = *((int*)GetPr(BlockDim));
BlockDim_XYZ.y = *((int*)GetPr(BlockDim)+1);
BlockDim_XYZ.z = *((int*)GetPr(BlockDim)+2);
#ifdef debug_OutputParameter
printf( "prhs[16] BlockDim_XYZ (GPU) [%ix%i] = [%ix%ix%i]\n", BlockDim_XYZ_Nx , BlockDim_XYZ_Mx, BlockDim_XYZ.x, BlockDim_XYZ.y, BlockDim_XYZ.z);
#endif
if (!(BlockDim_XYZ_Nx == 1)||!(BlockDim_XYZ_Mx == 3))
printf(" -> Dimension of BlockDim_XYZ must be [1 x 3]");
if ((BlockDim_XYZ_Nx > 1))
printf( " -> No Blockmode! [%i x 3]\n", (int)BlockDim_XYZ_Nx);
// if(!(mxIsUint32(BlockDim)))
// printf(" -> BlockDim_XYZ must be UINT32");
if ((BlockDim_XYZ.x * BlockDim_XYZ.y * BlockDim_XYZ.z) > 1024){ // BlockSize limited to 1024. Perhaps newer GPUs will support more Threads per Block
printf( " -> BlockDim_XYZ.x * BlockDim_XYZ.y * BlockDim_XYZ.z must not > 1024!!!");
// Here Adaption for BlockSize can be done.
BlockDim_XYZ.x = 1024; // If Blockdimensions are not specified than standard Blockdimensions will be used.
BlockDim_XYZ.x = 1;
BlockDim_XYZ.x = 1;
printf( " -> Standard Size for BlockDim_XYZ is used [%ix%ix%i]\n", BlockDim_XYZ.x, BlockDim_XYZ.y, BlockDim_XYZ.z);
}
//====================================================================== 18.Input Parameter - Check GPUs
int *enableGPUs_ptr;
GPUs_Nx = GetDimensions(GPUs)[0]; // Reihen N ermitteln
GPUs_Mx = GetDimensions(GPUs)[1]; // Spalten M ermitteln
#ifdef debug_OutputParameter
printf( "prhs[17] GPUs [%ix%i] \n", GPUs_Nx , GPUs_Mx);
#endif
enableGPUs_ptr = (int*)GetPr(GPUs);
selectedNumberGPUs = GPUs_Mx;
//Determine Number of GPU-Devices and check if there are so many available
int num_devices = 0;
//printf( " -> cudaGetDeviceCount: %i\n", num_devices);
CUDA_CHECK(cudaGetDeviceCount(&num_devices));
#ifdef debug_OutputInfo
printf( " -> GPU-devices available: %i\n", num_devices);
#endif
if (selectedNumberGPUs <= num_devices)
{
#ifdef debug_OutputParameter
printf( " (selectedNumberGPUs(%i) <= num_devices(%i)) -> OK -> selectedNumberGPUs = %i!\n", selectedNumberGPUs, num_devices, selectedNumberGPUs);
#endif
}
else
{
printf( " !!! !!! selectedNumberGPUs(%i) > num_devices(%i) !!!! !!!! -> selectedNumberGPUs = num_devices = %i!\n", selectedNumberGPUs, num_devices, num_devices);
selectedNumberGPUs = num_devices; // Reduce number of selected to number of GPUs in PC system!
}
// Check passed GPU-ID-Numbers and amount of GPUs
#ifdef debug_OutputParameter
printf( "prhs[17] GPUs [%ix%i] = [", GPUs_Nx , selectedNumberGPUs);
#endif
int gpuNr = 0;
int gpuNrCheck = 0;
for (gpuNr = 0; gpuNr < selectedNumberGPUs; ++gpuNr){
#ifdef debug_OutputParameter
printf( " %i ", enableGPUs_ptr[gpuNr]);
#endif
if (enableGPUs_ptr[gpuNr] > (num_devices-1)){ // Check if more GPUs are selected then available in System
printf( "\n enableGPUs_ptr[gpuNr=%i] = %i !!!\n", gpuNr, enableGPUs_ptr[gpuNr]);
printf(" -> selected number of GPU > available Devices is not allowed!");
}
for (gpuNrCheck = 0; gpuNrCheck < gpuNr; ++gpuNrCheck){
#ifdef debug_OutputParameter
printf( "\n enableGPUs_ptr[gpuNrCheck %i] = %i, enableGPUs_ptr[gpuNr %i] = %i\n",gpuNrCheck, enableGPUs_ptr[gpuNrCheck], gpuNr, enableGPUs_ptr[gpuNr]);
#endif
if (enableGPUs_ptr[gpuNrCheck] == enableGPUs_ptr[gpuNr])
printf(" -> GPU Device can only be used once!!!");
}
}
#ifdef debug_OutputParameter
printf( "]; Number of GPUs to use = %i\n", selectedNumberGPUs);
#endif
if (!(GPUs_Nx == 1)||!(GPUs_Mx < 10))
printf(" -> Dimension of GPUs must be [1 x <10]");
if ((pix_vect_Nx > 1))
printf( " -> No Blockmode [%i x n] allowed for GPUs\n", GPUs_Nx);
// if(!(mxIsUint32(GPUs)))
// printf(" -> GPUs must be UINT32");
//====================================================================== 19.Input Parameter - debugMode, debugModeParameter
dbgMode_Nx = GetDimensions(dbgMode)[0]; // Reihen N ermitteln
dbgMode_Mx = GetDimensions(dbgMode)[1]; // Spalten M ermitteln
debugMode = *((float*)GetPr(dbgMode) );
debugModeParameter = *((float*)GetPr(dbgMode)+1);
#ifdef debug_OutputParameter
printf( "prhs[18] debugMode [%ix%i] = [%f %f]\n", dbgMode_Nx , dbgMode_Mx, debugMode, debugModeParameter);
#endif
if ((dbgMode_Nx != 1)||(dbgMode_Mx != 2))
printf(" -> Dimension of debugMode must be [1 x 2]\n");
// if(!(mxIsSingle(dbgMode)))
// printf(" -> debugMode must be single");
if(debugMode != 0.0)
printf (" -> debugMode = [%f], debugModeParameter = [%f]\n", debugMode, debugModeParameter);
#ifdef debug_OutputParameter
printf( "=================================================================================================\n"); // End of Inputparameter
#endif
// // Enable colored Output // http://linuxgazette.net/issue65/padala.html
// printf ("\e[1;34mThis is a blue text.\e[0m"); // farbige Ausgabe mit
// printf ("\e[1;34m %-6s \e[m", "This is text"); // Farbige Text-Ausgabe
// printf ("\n");
//================================================================================================================ Create Output Memory / Parameter
#ifdef debug_OutputParameter
printf( "\n");
printf( "Create Outputparameter\n");
printf( "Out[n] Meaning \n");
printf( "=================================================================================================\n");
#endif
// ~~~~ Create 3D-Matrix for the Output-Values
// Output-Dimension is {IMAGE_XYZ_X, IMAGE_XYZ_Y, IMAGE_XYZ_Z}
const int dims[]={IMAGE_SIZE_XYZ.x, IMAGE_SIZE_XYZ.y, IMAGE_SIZE_XYZ.z};
int ndim = 3;
#ifdef debug_OutputParameter
printf( "plhs[0] = Output_Voxels = mxCreateNumericArray( ndim(%i), dims{%i %i %i}, mxDOUBLE_CLASS, mxREAL);\n", ndim, dims[0], dims[1], dims[2]);
#endif
Matrix_t Output_Voxels;
Output_Voxels.NumberOfDims = ndim;
Output_Voxels.Dims[0] = dims[0];
Output_Voxels.Dims[1] = dims[1];
Output_Voxels.Dims[2] = dims[2];
Output_Voxels.Data = new double[dims[0]*dims[1]*(dims[2]?dims[2]:1)];
double *Output_Voxels_ptr = (double*)GetPr(Output_Voxels);
// ~~~~ Create Pointer to return value from Duration of Kernel
// Erstelle Array mit folgender Formatierung
// 0: Total Durationtime all GPUs
// 1: Total Durationtime GPU 1
// 2: Total Durationtime GPU 2
// n: Total Durationtime GPU n
int
m = (1 + selectedNumberGPUs),
n = 1;
#ifdef debug_OutputParameter
printf( "plhs[1] = Duration = mxCreateDoubleMatrix( m(%i), n(%i), mxREAL);\n", m, n);
#endif
double *Duration_ptr = new double[m*n];
// ~~~~ Create Pointer to return Error/Abortvalue of each multithread
//int *Abort_ptr = (int*) malloc(num_workingPackages * sizeof(int));
int *Abort_ptr = (int*) malloc(selectedNumberGPUs*sizeof(int));
// ~~~~ Erstelle Array fuer Testrueckgabe der integrierten Ascans
//AScan_Nx = mxGetDimensions(AScan)[0]; // Reihen N ermitteln
//AScan_Mx = mxGetDimensions(AScan)[1]; // Spalten M ermitteln
//int
m = aScanCount; // 1
n = AScan_Nx; //z.B. 3000
#ifdef debug_OutputParameter
printf( "plhs[2] = Ascans = mxCreateNumericMatrix( m(%i), n(%i), mxREAL);\n", m, n);
#endif
float *AscansOut_ptr = new float[m*n];
#ifdef debug_OutputParameter
printf( "=================================================================================================\n\n"); // End of Outputparameter
#endif
//================================================================================================================ Preintegrate Ascans
#ifdef preAscanIntegrationToMatchSamplerateToResolution // Preintegrate Ascans for matching of Samplerate and Resolution
#ifdef debug_OutputHostStepsPerformance
double diff_time;
struct timeval startPreintegrateAscans, stopPreintegrateAscans;
gettimeofday(&startPreintegrateAscans, NULL);
#endif
if (SAFT_VARIANT[SAFT_VARIANT_AscanPreintegration] == 1){
//printf( "(SAFT_VARIANT[0] == 1) => perform preintegrateAscans\n\n");
speed_vec_ptr = (float*)GetPr(speed);
// printf( " speed_vec_ptr[%3i] = %12.10f\n",0,speed_vec_ptr[0]);
if (speed_vec_ptr[0] == 0){
printf("First value in SOS Volume = 0 --> preintegrateAscans can't be performed!!! --> Exit");
}
//================================================================================================================
preintegrateAscans(aScan_ptr, AscansOut_ptr, speed_vec_ptr, aScanCount, aScanLength, IMAGE_RESOLUTION, sampleRate, debugMode, debugModeParameter);
//================================================================================================================
}
else{
//printf( "(SAFT_VARIANT[0] == 0) => skip preintegrateAscans\n\n");
//Daten trotzdem in Outputspeicher fuer Matlab transferieren
for (int j = 0; j<aScanCount; j++){ // ueber alle A-scans gehen.
for (int i = 0; i < aScanLength; i++){
//printf( " i (%4i) = %6.3f ",i, aScan_ptr[j*aScanLength+i]);
AscansOut_ptr[j*aScanLength+i] = aScan_ptr[j*aScanLength+i]; // nach Matlab zurueckgeben
//printf( " i (%4i) = %6.3f \n",i, AscansOut_ptr[j*aScanLength+i]);
}
}
}
#ifdef debug_OutputHostStepsPerformance
gettimeofday(&stopPreintegrateAscans, NULL);
diff_time = (double)((stopPreintegrateAscans.tv_sec * 1000000.0 + stopPreintegrateAscans.tv_usec) - (startPreintegrateAscans.tv_sec * 1000000.0 + startPreintegrateAscans.tv_usec));
printf ("########################################################################\n");
//printf ("### aScanCount = %i %f %f\n", aScanCount,double(aScanCount),(double)(aScanCount));
printf ("### HOST ### PreintegrateAscans = %8.0f µs = %8.2f MAscans/s\n", diff_time, (double)aScanCount/diff_time);
#endif
#endif
//================================================================================================================ Start Reconstruction
//================================================================================================================
#ifdef debug_OutputHostStepsPerformance
struct timeval startMultithreadProcessing, stopMultithreadProcessing;
gettimeofday(&startMultithreadProcessing, NULL);
#endif
multithreaded_processing( aScan_ptr,
Output_Voxels_ptr,
receiver_index_ptr,
emitter_index_ptr,
receiver_list_ptr,
receiver_list_Mx,
emitter_list_ptr,
emitter_list_Mx,
speed_vec_ptr,
SOSGrid_XYZ,
sosOffset,
SOS_RESOLUTION,
att_vec_ptr,
AScan_Mx,
AScan_Nx,
regionOfInterestOffset,
IMAGE_SIZE_XYZ,
IMAGE_RESOLUTION,
sampleRate,
BlockDim_XYZ,
Duration_ptr,
selectedNumberGPUs,
enableGPUs_ptr,
debugMode,
debugModeParameter,
SOSMode_3DVolume,
ATTMode_3DVolume,
SAFT_MODE,
SAFT_VARIANT,
SAFT_VARIANT_Size,
Abort_ptr
);
#ifdef debug_OutputHostStepsPerformance
gettimeofday(&stopMultithreadProcessing, NULL);
diff_time = (double)((stopMultithreadProcessing.tv_sec * 1000000.0 + stopMultithreadProcessing.tv_usec) - (startMultithreadProcessing.tv_sec * 1000000.0 + startMultithreadProcessing.tv_usec));
printf ("########################################################################\n");
printf ("### HOST ### MultithreadProcessing = %8.0 us = %8.2f GVA/s\n", diff_time, double(IMAGE_SUM_Count)*double(aScanCount)/(diff_time*1000));
#endif
// Check if errors occurred
bool AbortedThreads = false;
for (int i=0; i < selectedNumberGPUs; i++ ){
if (Abort_ptr[i] > 0)
{
printf( "!!!!!!!!!!!!!!!!!!! Aborted Thread for GPU[%i] = %i\n", i, Abort_ptr[i]);
AbortedThreads = true;
}
}
free(Abort_ptr);
if (AbortedThreads)
printf(" Aborted Thread occurred -> see output history");
//================================================================================================================
//================================================================================================================
//================================================================================================================ Build Sum of IMAGE_SUM and current reconstructed Volume
// Daten des uebergebenen Outputvolumens zum rekonstruierten Volumen addieren
#ifdef debug_OutputInfo
printf( "Build Sum of reconstructed Volume and given IMAGE_SUM \n");
#endif
#ifdef debug_OutputHostStepsPerformance
struct timeval startSumIMAGE_SUM, stopSumIMAGE_SUM;
gettimeofday(&startSumIMAGE_SUM, NULL);
#endif
for(uint64_t i=0; i < IMAGE_SUM_Count; i++)
{
Output_Voxels_ptr[i] += IMAGE_SUM_vec_ptr[i];
}
#ifdef debug_OutputHostStepsPerformance
gettimeofday(&stopSumIMAGE_SUM, NULL);
diff_time = (double)((stopSumIMAGE_SUM.tv_sec * 1000000.0 + stopSumIMAGE_SUM.tv_usec) - (startSumIMAGE_SUM.tv_sec * 1000000.0 + startSumIMAGE_SUM.tv_usec));
printf ("### HOST ### Sum up (IMAGE_SUM + reconstr. Volume) = %8.0f µs = %8.2f MVoxel/s\n", diff_time, double(IMAGE_SUM_Count)/diff_time);
printf ("########################################################################\n");
#endif
//================================================================================================================ Show returned Output-Values
//
#ifdef debug_OutputVariables
// Testoutput Value of duration
printf( "Duration_ptr[0] = %f\n", Duration_ptr[0]);
// Testoutput Sum of IMAGE_SUM and reconstructed Volume
printf( "Output_Voxels: %i = [%f %f %f]\n",0 , Output_Voxels_ptr[0], Output_Voxels_ptr[1], Output_Voxels_ptr[2]);
printf( "Output_Voxels: %i = [%f %f %f]\n",1 , Output_Voxels_ptr[3], Output_Voxels_ptr[4], Output_Voxels_ptr[5]);
printf( "Output_Voxels: %i = [%f %f %f]\n",2 , Output_Voxels_ptr[6], Output_Voxels_ptr[7], Output_Voxels_ptr[8]);
printf( "Output_Voxels: %i = [%f %f %f]\n",(IMAGE_SUM_Count-3) , Output_Voxels_ptr[IMAGE_SUM_Count - 3], Output_Voxels_ptr[IMAGE_SUM_Count - 2], Output_Voxels_ptr[IMAGE_SUM_Count -1]);
#endif
#ifdef debug_OutputFunctions
printf( "<== mexFunction - End\n");
#endif
delete [] AscansOut_ptr;
delete [] Duration_ptr;
return Output_Voxels;
}