1159 lines
54 KiB
C++
1159 lines
54 KiB
C++
#include "SAFT_TOFI.h"
|
||
|
||
#include <locale.h>
|
||
#include <math.h>
|
||
#include <omp.h>
|
||
|
||
#include <cstddef>
|
||
|
||
#include "saft.hpp"
|
||
|
||
|
||
// For German printf output format: Float with , instead of .
|
||
|
||
#include <spdlog/spdlog.h>
|
||
#include <stdint.h>
|
||
|
||
#include <array>
|
||
#include <chrono>
|
||
#include <future>
|
||
#include <iostream>
|
||
#include <map>
|
||
#include <string>
|
||
|
||
#include "spdlog/sinks/stdout_color_sinks.h"
|
||
|
||
|
||
// TODO: Blockgroesse (z > 1) fuehrt zu Kernelabbruechen
|
||
|
||
// pthread handle
|
||
typedef struct thread_handle_t
|
||
{
|
||
// pthread_t pthread;
|
||
|
||
int deviceId;
|
||
int deviceIndex;
|
||
float *aScan_ptr;
|
||
double *output_ptr;
|
||
double *Duration_ptr;
|
||
unsigned short *receiver_index_ptr;
|
||
unsigned short *emitter_index_ptr;
|
||
float *receiver_list_ptr;
|
||
int receiver_list_Size;
|
||
float *emitter_list_ptr;
|
||
int emitter_list_Size;
|
||
float *speed_vec_ptr;
|
||
int3 SOSGrid_XYZ;
|
||
float3 sosOffset;
|
||
float SOS_RESOLUTION;
|
||
|
||
float *att_vec_ptr;
|
||
|
||
int aScanCount;
|
||
int aScanLength;
|
||
|
||
float inc;
|
||
int3 res;
|
||
float sampleRate;
|
||
float3 volposition;
|
||
|
||
int num_threads;
|
||
dim3 fixedBlockDimensions;
|
||
|
||
float debugMode;
|
||
float debugModeParameter;
|
||
bool SOSMode_3DVolume;
|
||
bool ATTMode_3DVolume;
|
||
int SAFT_MODE;
|
||
int *SAFT_VARIANT;
|
||
int SAFT_VARIANT_Size;
|
||
int *Abort_ptr;
|
||
|
||
} thread_handle;
|
||
|
||
// Convenient typedefs for GPU-DeviceProperties container
|
||
typedef std::vector<cudaDeviceProp> DeviceProperties;
|
||
|
||
/**
|
||
Load CUDA devices and write them to a container.
|
||
*/
|
||
void loadDevices(DeviceProperties &output ///< This argument is written to. Container in which the device data are stored.
|
||
)
|
||
{
|
||
int deviceCount;
|
||
CUDA_CHECK(cudaGetDeviceCount(&deviceCount));
|
||
|
||
output.reserve(static_cast<std::size_t>(deviceCount));
|
||
for (int i = 0; i < deviceCount; i++)
|
||
{
|
||
cudaDeviceProp &device = output[i];
|
||
CUDA_CHECK(cudaGetDeviceProperties(&device, i));
|
||
output.push_back(device);
|
||
}
|
||
}
|
||
|
||
// pthread call function
|
||
void thread_function(void *arg)
|
||
{
|
||
thread_handle *pthread_handle = (thread_handle *)arg;
|
||
// Create Instance of SAFT-Handler and call constructor
|
||
SAFTHandler saft(pthread_handle->deviceId, // deviceId
|
||
pthread_handle->deviceIndex, // deviceIndex
|
||
|
||
pthread_handle->aScan_ptr, // aScan_ptr,
|
||
pthread_handle->output_ptr, // output_ptr,
|
||
pthread_handle->Duration_ptr, // Duration_ptr,
|
||
|
||
pthread_handle->receiver_index_ptr, // receiver_index_ptr ///<
|
||
pthread_handle->emitter_index_ptr, // emitter_index_ptr ///<
|
||
pthread_handle->receiver_list_ptr, // receiver_list_ptr ///<
|
||
pthread_handle->receiver_list_Size, // receiver_list_Size ///<
|
||
pthread_handle->emitter_list_ptr, // emitter_list_ptr ///<
|
||
pthread_handle->emitter_list_Size, // emitter_list_Size ///<
|
||
|
||
pthread_handle->speed_vec_ptr, // speed_vec_ptr,
|
||
|
||
pthread_handle->SOSGrid_XYZ, // SOSGrid_XYZ,
|
||
pthread_handle->sosOffset, // sosOffset, ///< Startpoint of SoSGrid
|
||
pthread_handle->SOS_RESOLUTION, // SOS_RESOLUTION, ///< Resolution of SoSGrid
|
||
|
||
pthread_handle->att_vec_ptr, // att_vec_ptr
|
||
|
||
pthread_handle->aScanCount, // aScanCount,
|
||
pthread_handle->aScanLength, // aScanLength
|
||
pthread_handle->res, // resolution => IMAGE_SIZE_XYZ,
|
||
pthread_handle->sampleRate, // sampleRate
|
||
pthread_handle->volposition, // => regionOfInterestOffset,
|
||
pthread_handle->inc, // IMAGE_RESOLUTION,
|
||
|
||
pthread_handle->fixedBlockDimensions, // fixedBlockDimensions
|
||
pthread_handle->debugMode, // debugMode
|
||
pthread_handle->debugModeParameter, // Parameter for DebugMode
|
||
|
||
pthread_handle->SOSMode_3DVolume, pthread_handle->ATTMode_3DVolume,
|
||
|
||
pthread_handle->SAFT_MODE, pthread_handle->SAFT_VARIANT, pthread_handle->SAFT_VARIANT_Size,
|
||
pthread_handle->Abort_ptr // If there is not enough memory abort reconstruction. Wenn Fehler --> Abbruch;
|
||
|
||
);
|
||
|
||
saft.performReconstruction();
|
||
// pthread_exit(NULL);
|
||
}
|
||
|
||
/**
|
||
Check amount of GPUs and divide Volume ins parts with same size
|
||
Abfrage der Anzahl an GPU-Devices und Einteilung des Volumens in möglichst gleichgroße Volumen in Z-Richtung (3D-Volumen) oder Y-Richtung (2D-Volumen) auf.
|
||
*/
|
||
|
||
void multithreaded_processing(float *aScan_ptr, ///< AScan-Daten
|
||
double *output_ptr, ///< OutputDaten der Voxel
|
||
|
||
unsigned short *receiver_index_ptr, ///< Index Receiver per Ascan
|
||
unsigned short *emitter_index_ptr, ///< Index Emitter per Ascan
|
||
float *receiver_list_ptr, ///< Positionskoordinaten Receiver
|
||
int receiver_list_Size, ///< Menge an Receiver
|
||
float *emitter_list_ptr, ///< Positionskoordinaten Emitter
|
||
int emitter_list_Size, ///< Menge an Emitter
|
||
|
||
float *speed_vec_ptr, ///< SoS Daten im Blockmode oder als SoSGrid
|
||
|
||
int3 SOSGrid_XYZ, ///< Size of SoSGrid
|
||
float3 sosOffset, ///< Startpoint of SoSGrid
|
||
float SOS_RESOLUTION, ///< Aufloesung des SoSGrid
|
||
|
||
float *att_vec_ptr, ///< Attenuation Daten als ATTGrid
|
||
|
||
int aScanCount, ///< Anzahl der AScans die im Blockmode verarbeitet werden sollen
|
||
int aScanLength, ///< Laenge der AscanDaten (normal 3000)
|
||
float3 regionOfInterestOffset,
|
||
int3 IMAGE_SIZE_XYZ, ///< Groesse des Bildbereichs in Voxel
|
||
float IMAGE_RESOLUTION, ///< Aufloesung des Bildbereichs
|
||
float sampleRate, ///< Samplerate für AScans
|
||
int3 BlockDim_XYZ, ///< BlockDimension für GPU
|
||
double *Duration_ptr, ///< Rückgabepointer an Matlab für Laufzeit des SAFT-Kernels
|
||
int selectedNumberGPUs, ///< Anzahl der ausgewählten GPUs bzw. auf maximale Anzhal vorhandener begrenzt
|
||
int *enableGPUs_ptr, ///< Gibt an welche GPUs genutzt werden und welche nicht
|
||
float debugMode, ///< Ausgabe im Debugmode -> Verschiedene Werte können ausgegeben werden
|
||
float debugModeParameter, ///< Parameter der mit fuer Debugmode uebermittelt werden kann
|
||
|
||
bool SOSMode_3DVolume, ///< Wird 3D Volumen für SOS-Korrektur genutzt?
|
||
bool ATTMode_3DVolume, ///< Wird 3D Volumen für ATT-Korrektur genutzt?
|
||
|
||
int SAFT_MODE, ///< Modus für SAFT-Rekonstruktion
|
||
int *SAFT_VARIANT, ///< Verschiedene Parameter der Rekonstruktion
|
||
int SAFT_VARIANT_Size, ///< Menge der verschiedenen Parameter für Rekonstruktion
|
||
int *Abort_ptr ///< FehlerArray
|
||
)
|
||
{
|
||
dim3 fixedBlockDimensions( // convert int3 to dim3
|
||
BlockDim_XYZ.x, BlockDim_XYZ.y, BlockDim_XYZ.z);
|
||
|
||
// Divide workload and show Information ------------------------------------------------------------------------------------------------------------------
|
||
// Divide workload in pieces with the same size for all available GPUs.
|
||
// If the workload can not be divided in pieces with the same size, the last piece will be the one with little less workload.
|
||
|
||
// Testfall simuliert die mehrfache Anzahl an GPUs
|
||
int num_devices_factor = 1; // Vielfache an GPUs simulieren bzw. kleinere Pakete erzeugen
|
||
int num_workingPackages = selectedNumberGPUs * num_devices_factor;
|
||
|
||
float3 *position = (float3 *)malloc(num_workingPackages * sizeof(float3));
|
||
int3 *resolution = (int3 *)malloc(num_workingPackages * sizeof(int3));
|
||
int3 *volumeStartpoint = (int3 *)malloc(num_workingPackages * sizeof(int3));
|
||
size_t *volumePtr = (size_t *)malloc(num_workingPackages * sizeof(size_t));
|
||
// int *Abort_ptr = (int*) malloc(num_workingPackages * sizeof(int));
|
||
|
||
float3 volposition = regionOfInterestOffset; // Uebergabe der Parameter
|
||
float inc = IMAGE_RESOLUTION; // koennte auch direkt umbenannt werden
|
||
|
||
size_t volume_size = (size_t)IMAGE_SIZE_XYZ.x * (size_t)IMAGE_SIZE_XYZ.y * (size_t)IMAGE_SIZE_XYZ.z * (size_t)sizeof(double); // = Groesse des Outputvolumens in Byte
|
||
|
||
if ((IMAGE_SIZE_XYZ.y == 1) && (IMAGE_SIZE_XYZ.z == 1))
|
||
{
|
||
selectedNumberGPUs = 1;
|
||
num_workingPackages = 1;
|
||
}
|
||
|
||
volumeStartpoint[0].x = 0;
|
||
volumeStartpoint[0].y = 0;
|
||
volumeStartpoint[0].z = 0;
|
||
|
||
position[0].x = volposition.x; // Startposition
|
||
position[0].y = volposition.y;
|
||
position[0].z = volposition.z;
|
||
|
||
std::vector<int> resolutionZs(num_workingPackages, IMAGE_SIZE_XYZ.z / num_workingPackages);
|
||
for (size_t i = 0; i < IMAGE_SIZE_XYZ.z % num_workingPackages; i++) resolutionZs[i]++;
|
||
|
||
int i, j, k;
|
||
|
||
for (i = 0; i < num_workingPackages; i++)
|
||
{
|
||
if (IMAGE_SIZE_XYZ.z > 1)
|
||
{ // Divide in Z-Direction
|
||
|
||
resolution[i].x = IMAGE_SIZE_XYZ.x; // Initialization
|
||
resolution[i].y = IMAGE_SIZE_XYZ.y;
|
||
resolution[i].z = resolutionZs[i];
|
||
|
||
if (i > 0)
|
||
{
|
||
volumeStartpoint[i].x = 0; // Koordinaten als Startpunkt für Layer in einzelnen GPUs
|
||
volumeStartpoint[i].y = 0;
|
||
volumeStartpoint[i].z = volumeStartpoint[i - 1].z + resolution[i - 1].z;
|
||
}
|
||
|
||
volumePtr[i] = (size_t)((size_t)resolution[0].x * (size_t)resolution[0].y * (size_t)volumeStartpoint[i].z); // Startpunkt der Speicherstellen fuer das Outputvolumen
|
||
}
|
||
else
|
||
{ // Divide in Y-Direction
|
||
|
||
resolution[i].x = IMAGE_SIZE_XYZ.x; // Initialization
|
||
resolution[i].z = IMAGE_SIZE_XYZ.z;
|
||
|
||
if (IMAGE_SIZE_XYZ.y % num_workingPackages == 0)
|
||
{ // Volume is divisible
|
||
|
||
resolution[i].y = IMAGE_SIZE_XYZ.y / num_workingPackages;
|
||
}
|
||
else
|
||
{ // if not divisible,
|
||
|
||
if (i != (num_workingPackages - 1))
|
||
{ // increment each GPU slice by one
|
||
resolution[i].y = IMAGE_SIZE_XYZ.y / num_workingPackages + 1;
|
||
}
|
||
else
|
||
{ // except the last one, which get the remaining Layers
|
||
resolution[i].y = IMAGE_SIZE_XYZ.y % resolution[0].y;
|
||
}
|
||
}
|
||
}
|
||
|
||
position[i].x = volposition.x; // Startposition
|
||
position[i].y = volposition.y;
|
||
position[i].z = volposition.z;
|
||
|
||
if (IMAGE_SIZE_XYZ.z > 1)
|
||
position[i].z += i * inc * resolution[0].z; // Calculate Startpositions for the workload-pieces
|
||
else
|
||
position[i].y += i * inc * resolution[0].y;
|
||
}
|
||
|
||
// Create one thread per GPU ------------------------------------------------------------------------------------------------------------------------
|
||
thread_handle *pthread_handle = (thread_handle *)malloc(selectedNumberGPUs * sizeof(thread_handle));
|
||
|
||
for (i = 0; i < num_workingPackages; i++)
|
||
{
|
||
// initialize control block
|
||
pthread_handle[i].deviceId = enableGPUs_ptr[(i % selectedNumberGPUs)]; // Hier DeviceID der GPU setzen
|
||
pthread_handle[i].deviceIndex = (i % selectedNumberGPUs);
|
||
|
||
pthread_handle[i].aScan_ptr = aScan_ptr;
|
||
if (IMAGE_SIZE_XYZ.z > 1)
|
||
pthread_handle[i].output_ptr = &output_ptr[volumePtr[i]]; // Startpoint for Outputvolume.
|
||
// volumePtr[i] = (size_t)(resolution[0].x * resolution[0].y * volumeStartpoint[i].z); //Startpunkt der Speicherstellen fuer das Outputvolumen
|
||
else
|
||
// pthread_handle[i].output_ptr = &output_ptr[ i * resolution[0].x * resolution[0].y * resolution[0].z ]; // Startpoint for Outputvolume. [0] da nur der letzte eine andere Gr<47><72>e hat.
|
||
pthread_handle[i].output_ptr =
|
||
&output_ptr[(size_t)resolution[0].x * (size_t)i * (size_t)resolution[0].y]; // Startpoint for Outputvolume. [0] da nur der letzte eine andere Groesse hat. Z spielt hier keine Rolle
|
||
|
||
pthread_handle[i].Duration_ptr = Duration_ptr;
|
||
pthread_handle[i].receiver_index_ptr = receiver_index_ptr;
|
||
pthread_handle[i].emitter_index_ptr = emitter_index_ptr;
|
||
pthread_handle[i].receiver_list_ptr = receiver_list_ptr;
|
||
pthread_handle[i].receiver_list_Size = receiver_list_Size;
|
||
pthread_handle[i].emitter_list_ptr = emitter_list_ptr;
|
||
pthread_handle[i].emitter_list_Size = emitter_list_Size;
|
||
pthread_handle[i].speed_vec_ptr = speed_vec_ptr;
|
||
pthread_handle[i].SOSGrid_XYZ = SOSGrid_XYZ;
|
||
pthread_handle[i].sosOffset = sosOffset;
|
||
pthread_handle[i].SOS_RESOLUTION = SOS_RESOLUTION;
|
||
pthread_handle[i].att_vec_ptr = att_vec_ptr;
|
||
|
||
pthread_handle[i].aScanCount = aScanCount;
|
||
pthread_handle[i].aScanLength = aScanLength, pthread_handle[i].inc = IMAGE_RESOLUTION;
|
||
pthread_handle[i].res = resolution[i];
|
||
pthread_handle[i].sampleRate = sampleRate;
|
||
pthread_handle[i].volposition = position[i]; // regionOfInterestOffset
|
||
|
||
pthread_handle[i].num_threads = num_workingPackages;
|
||
|
||
pthread_handle[i].fixedBlockDimensions = fixedBlockDimensions; //
|
||
pthread_handle[i].debugMode = debugMode;
|
||
pthread_handle[i].debugModeParameter = debugModeParameter;
|
||
pthread_handle[i].SOSMode_3DVolume = SOSMode_3DVolume;
|
||
pthread_handle[i].ATTMode_3DVolume = ATTMode_3DVolume;
|
||
|
||
pthread_handle[i].SAFT_MODE = SAFT_MODE;
|
||
pthread_handle[i].SAFT_VARIANT = SAFT_VARIANT;
|
||
pthread_handle[i].SAFT_VARIANT_Size = SAFT_VARIANT_Size;
|
||
|
||
Abort_ptr[i] = 0; // Initialisieren mit kein Fehler
|
||
pthread_handle[i].Abort_ptr = &Abort_ptr[i];
|
||
}
|
||
|
||
auto startAllThreads = std::chrono::steady_clock::now();
|
||
double diff_time = 0.0;
|
||
std::vector<std::future<void>> futures;
|
||
futures.resize(selectedNumberGPUs);
|
||
SPDLOG_INFO("Start GPU execute!");
|
||
|
||
for (j = 0; j < num_devices_factor; j++)
|
||
{
|
||
for (k = 0; k < selectedNumberGPUs; k++)
|
||
{
|
||
// new async threads
|
||
futures[k] = std::async(std::forward<std::function<void(void *)>>(thread_function),
|
||
std::forward<void *>((void *)&pthread_handle[(j * selectedNumberGPUs + k)])); // forward used for perfect forwarding
|
||
}
|
||
|
||
// Synchronization and termination -------------------------------------------------------------------------------------------------------------------
|
||
for (k = 0; k < selectedNumberGPUs; k++)
|
||
{
|
||
// new async threads
|
||
futures[k].wait(); // advantage: async are packaged tasks after c++ with os handling, and consistency handling (if destructor is called, it executes task)
|
||
}
|
||
}
|
||
|
||
// gettimeofday(&stopAllThreads, NULL);
|
||
SPDLOG_INFO("GPU execute finish!");
|
||
|
||
auto stopAllThreads = std::chrono::steady_clock::now();
|
||
diff_time = std::chrono::duration_cast<std::chrono::microseconds>(stopAllThreads - startAllThreads).count(); // total duration in µs
|
||
|
||
Duration_ptr[0] = diff_time; // Return total duration in µs
|
||
|
||
// Speicher wieder freigeben
|
||
free(position);
|
||
free(resolution);
|
||
free(volumeStartpoint);
|
||
free(volumePtr);
|
||
// free (Abort_ptr);
|
||
free(pthread_handle);
|
||
}
|
||
|
||
/**
|
||
preintegrateAscans
|
||
Determine maximal SampleWidth, matching to the resolution to be used for reconstruction, and integrate A-scan over an window of this SampleWidth
|
||
*/
|
||
void preintegrateAscans(float *aScan_ptr, ///< AScan-Daten
|
||
float *AscansOut_ptr, ///< AScan-OutputDaten fuer Testrueckgabe
|
||
float *speed_vec_ptr, ///< SoS Daten im Blockmode
|
||
int aScanCount, ///< Anzahl der AScans die im Blockmode verarbeitet werden sollen
|
||
int aScanLength, ///< Laenge der AscanDaten (normal 3000)
|
||
float IMAGE_RESOLUTION, ///< Aufloesung des Bildbereichs
|
||
float sampleRate, ///< Samplerate fuer AScans
|
||
float debugMode, ///< Ausgabe im Debugmode -> Verschiedene Werte können ausgegeben werden
|
||
float debugModeParameter ///< Parameter der mit fuer Debugmode uebermittelt werden kann
|
||
)
|
||
{
|
||
float windowWidth = 0.0f;
|
||
float windowWidthHalf = 0.0f;
|
||
|
||
// maximale Schrittweite ueber einen Voxel = sqr(3)*2*IMAGE_RESOLUTION*fs/c // sqr(3)*2 = 3.464101615
|
||
// width = ( ceil( 1.7*(( resz / speedz)/ (timeintz/INTERP_RATIO)) )); % Breite berechnen
|
||
windowWidth = (float)3.464101615 * IMAGE_RESOLUTION / sampleRate / speed_vec_ptr[0];
|
||
windowWidthHalf = (float)1.732050808 * IMAGE_RESOLUTION / sampleRate / speed_vec_ptr[0]; // halbe Fenster Breite
|
||
|
||
#pragma omp parallel for num_threads(32)
|
||
for (int j = 0; j < aScanCount; j++)
|
||
{ // über alle A-scans gehen.
|
||
|
||
float *AscanBuffer = (float *)malloc(aScanLength * sizeof(float));
|
||
int i_start, i_end = 0;
|
||
float nSample = 0.0f;
|
||
float windowWidthHalf_minus1 = 0.0f;
|
||
float windowSum = 0.0f;
|
||
if ((int)ceil(windowWidth) % 2 == 1)
|
||
{ // Uneven / Ungerade
|
||
// // Bei ungeraden Breiten kann symmetrisch sampl = widthHalf_minus1 = floor((ceil(width)-1)/2) genutzt werden
|
||
|
||
windowWidthHalf_minus1 = floor((ceil(windowWidth) - 1) / 2);
|
||
for (int i = 0; i < aScanLength; i++)
|
||
{ // über gesamte Breite des A-scans gehen.
|
||
i_start = i - (int)windowWidthHalf_minus1;
|
||
i_end = i + (int)windowWidthHalf_minus1;
|
||
|
||
// Grenzen einhalten
|
||
if (i_start < 0)
|
||
i_start = 0;
|
||
if (i_end > aScanLength - 1)
|
||
i_end = aScanLength - 1;
|
||
|
||
// Anzahl Sample bestimmen
|
||
nSample = i_end - i_start + 1; // +1 da erstes Element auch dazugehört.
|
||
|
||
windowSum = 0;
|
||
for (int k = i_start; k <= i_end; k++)
|
||
{
|
||
windowSum += aScan_ptr[j * aScanLength + k];
|
||
}
|
||
|
||
// geteilt durch nur die genutzten Samples
|
||
// AscanBuffer[i] = windowSum/ceil(nSample);
|
||
// Michael teilt durch die gesamte Breite an Samples,auch wenn sie an dem
|
||
// Rand nicht vorhanden/bzw = 0 sind. --> Abflachung am Rand
|
||
AscanBuffer[i] = windowSum / ceil(windowWidth);
|
||
}
|
||
}
|
||
else if ((int)ceil(windowWidth) % 2 == 0)
|
||
{ // Even / Gerade
|
||
|
||
// Bei geraden Breiten symmetrisch mit den beiden äußeren Samplewerten zu je 1/2 gewichten.
|
||
|
||
windowWidthHalf_minus1 = floor((ceil(windowWidth) - 1) / 2);
|
||
for (int i = 0; i < aScanLength; i++)
|
||
{ // über gesamte Breite des A-scans gehen.
|
||
i_start = i - (int)windowWidthHalf_minus1;
|
||
i_end = i + (int)windowWidthHalf_minus1;
|
||
|
||
// Grenzen einhalten
|
||
if (i_start < 0)
|
||
i_start = 0;
|
||
if (i_end > aScanLength - 1)
|
||
i_end = aScanLength - 1;
|
||
|
||
// Anzahl Sample bestimmen
|
||
nSample = i_end - i_start + 1; // +1 da erstes Element auch dazugehört.
|
||
|
||
windowSum = 0;
|
||
for (int k = i_start; k <= i_end; k++)
|
||
{
|
||
windowSum += aScan_ptr[j * aScanLength + k];
|
||
}
|
||
|
||
// geteilt wird durch nur die genutzten Samples
|
||
// AscanBuffer[i] = windowSum/ceil(nSample);
|
||
// Michael teilt durch die gesamte Breite an Samples,auch wenn sie an dem
|
||
// Rand nicht vorhanden/bzw = 0 sind.--> Abflachung am Rand
|
||
windowSum = windowSum / ceil(windowWidth);
|
||
|
||
// Halbe Samplewerte an Grenzen miteinberechen aber absolute Grenzen einhalten
|
||
if (i_start >= 0)
|
||
{
|
||
windowSum = windowSum + aScan_ptr[j * aScanLength + (i_start - 1)] / (2 * ceil(windowWidth)); // Linken Nachbarn zu 1/2 mit dazunehmen
|
||
nSample = nSample + 0.5;
|
||
}
|
||
if (i_end < aScanLength - 1)
|
||
{
|
||
windowSum = windowSum + aScan_ptr[j * aScanLength + (i_end + 1)] / (2 * ceil(windowWidth)); // Rechten Nachbarn zu 1/2 mit dazunehmen
|
||
nSample = nSample + 0.5;
|
||
}
|
||
|
||
AscanBuffer[i] = windowSum;
|
||
}
|
||
}
|
||
|
||
// Transfer Data from Buffer to Memory regions
|
||
for (int i = 0; i < aScanLength; i++)
|
||
{
|
||
// printf( " i (%4i) = %6.3f ",i,AscanBuffer[i]);
|
||
// AscansOut_ptr[i] = *(aScan_ptr+i);
|
||
// AscanBuffer[i] = aScan_ptr[i];
|
||
aScan_ptr[j * aScanLength + i] = AscanBuffer[i]; // Write in A-scans Memory
|
||
AscansOut_ptr[j * aScanLength + i] = AscanBuffer[i]; // Also write back for Matlab
|
||
}
|
||
free(AscanBuffer);
|
||
}
|
||
}
|
||
|
||
const size_t *GetDimensions(const Matrix_t &matrix) { return matrix.Dims; }
|
||
const void *GetPr(const Matrix_t &matrix) { return matrix.Data; }
|
||
|
||
size_t GetNumberOfDimensions(const Matrix_t &matrix) { return matrix.NumberOfDims; }
|
||
|
||
size_t GetNumberOfElements(const Matrix_t &matrix) { return matrix.DataSize; }
|
||
|
||
Matrix_t SAFT_TOFI(std::vector<Matrix_t> ¶ms)
|
||
{
|
||
auto console_sink = std::make_shared<spdlog::sinks::stdout_color_sink_mt>();
|
||
console_sink->set_level(spdlog::level::info);
|
||
console_sink->set_pattern(fmt::format("[%Y-%m-%d %T .%f][{}] [%^%l%$] %v", "SAFT"));
|
||
std::shared_ptr<spdlog::logger> logger(new spdlog::logger("SAFT", {console_sink}));
|
||
logger->set_level(spdlog::level::info);
|
||
logger->flush_on(spdlog::level::info);
|
||
|
||
SPDLOG_INFO("Start SAFT!");
|
||
size_t AScan_Nx, AScan_Mx, pix_vect_Nx, pix_vect_Mx, receiver_index_Nx, receiver_index_Mx, emitter_index_Nx, emitter_index_Mx, receiver_list_Nx, receiver_list_Mx, emitter_list_Nx, emitter_list_Mx,
|
||
|
||
SAFT_mode_Nx, SAFT_mode_Mx, SAFT_variant_Nx, SAFT_variant_Mx,
|
||
|
||
speed_Nx, speed_Mx, SOSGrid_Xx, SOSGrid_Yx, SOSGrid_Zx, sos_startPoint_Nx, sos_startPoint_Mx, sos_res_Nx, sos_res_Mx,
|
||
|
||
attVolume_Nx, attVolume_Mx, ATTGrid_Xx, ATTGrid_Yx, ATTGrid_Zx,
|
||
|
||
res_Nx, res_Mx, timeint_Nx, timeint_Mx, IMAGE_XYZ_Nx, IMAGE_XYZ_Mx, IMAGE_SUM_Xx, IMAGE_SUM_Yx, IMAGE_SUM_Zx, BlockDim_XYZ_Nx, BlockDim_XYZ_Mx,
|
||
|
||
GPUs_Nx, GPUs_Mx, dbgMode_Nx, dbgMode_Mx;
|
||
|
||
int aScanCount;
|
||
int aScanLength;
|
||
float *aScan_ptr;
|
||
int3 IMAGE_SIZE_XYZ;
|
||
int3 BlockDim_XYZ;
|
||
bool SOSMode_3DVolume; // Mode of SOS use: Grid (1) or Block (0) --> SOSGrid_XYZ, SOS_RESOLUTION, sosOffset not neccessary
|
||
bool ATTMode_3DVolume; // Mode of SOS use: Grid (1) or Block (0) --> ATTGrid_XYZ not neccessary
|
||
int SAFT_MODE;
|
||
int *SAFT_VARIANT; // Variances of SAFT
|
||
int SAFT_VARIANT_Size; // Size of Varainces
|
||
int3 SOSGrid_XYZ; // Size of SOSGrid
|
||
int3 ATTGrid_XYZ; // Size of ATTGrid
|
||
float3 regionOfInterestOffset; // Startpoint
|
||
float3 sosOffset; // Startpoint SoS
|
||
float IMAGE_RESOLUTION; // Aufloesung
|
||
float SOS_RESOLUTION; // Aufloesung
|
||
float sampleRate; // Samplerate für AScans
|
||
int selectedNumberGPUs; // Anzahl der genutzten GPUs durch uebergebene Groesse der Ausgewaehlten GPUs
|
||
float debugMode;
|
||
float debugModeParameter;
|
||
|
||
if (params.size() != 19)
|
||
{
|
||
printf(" \n");
|
||
printf(" Inputparameter \n");
|
||
printf(" In[n] Meaning [Row N x Col M] Type\n");
|
||
printf(" =============================================================================\n");
|
||
printf(" 1-prhs[0] AScan-Data [3000?xnAscans] single\n");
|
||
printf(" 2-prhs[1] IMAGE_STARTPOINT_S [1x3] single\n");
|
||
|
||
printf(" 3-prhs[2] receiver_index [1xnAscans] uint16\n");
|
||
printf(" 4-prhs[3] emitter_index [1xnAscans] uint16\n");
|
||
printf(" 5-prhs[4] receiver_list [3xnReceiver] single\n");
|
||
printf(" 6-prhs[5] emitter_list [3xnEmitter] single\n");
|
||
|
||
printf(" 7-prhs[6] SAFT_mode [1x1] uint32\n");
|
||
printf(" 8-prhs[7] SAFT_variant [1x6] uint32\n");
|
||
|
||
printf(" Standard: [1 1 1 1 0 0] \n");
|
||
printf(" -> Ascan Preintegration\n");
|
||
printf(" -> Ascan Interpolation\n");
|
||
printf(" -> Preprocessing SOS&ATT 3D Volume Interpolation\n");
|
||
printf(" -> Reconstruction SOS&ATT 3D Volume Interpolation\n");
|
||
printf(" -> not yet\n");
|
||
printf(" -> not yet\n");
|
||
|
||
printf(" 9-prhs[8] SOSVolume [SOS_XxYxZ] single in m/s\n");
|
||
printf(" 10-prhs[9] SOS_STARTPOINT_S [1x3] single\n");
|
||
printf(" 11-prhs[10] SOS_RESOLUTION_S [1x1] single\n");
|
||
|
||
printf(" 12-prhs[11] ATTVolume [ATT_XxYxZ] single in dB/cm\n");
|
||
|
||
printf(" 13-prhs[12] IMAGE_RESOLUTION_S [1x1] single\n");
|
||
printf(" 14-prhs[13] TimeInterval_S [1x1] single\n");
|
||
printf(" 15-prhs[14] IMAGE_XYZ [1x3] uint32\n");
|
||
printf(" 16-prhs[15] IMAGE_SUM [Output_XxYxZ] double\n");
|
||
|
||
printf(" 17-prhs[16] BlockDim_XYZ (GPU) [1x3] uint32\n");
|
||
printf(" 18-prhs[17] GPUs (DeviceNr GPU) [1xn] uint32\n");
|
||
|
||
printf(" 19-prhs[18] dbgMode,dbgModeParam [1x2] single\n");
|
||
|
||
printf(" ==============================================================================\n");
|
||
printf("\n");
|
||
printf(" Outputparameter \n");
|
||
printf(" Out[n] Meaning \n");
|
||
printf(" ================================================================================================= \n");
|
||
printf(" plhs[0] = Output_Voxels = mxCreateNumericArray ( [IMAGE_XYZ] , mxDOUBLE_CLASS, mxREAL); \n");
|
||
printf(" plhs[1] = Duration = mxCreateDoubleMatrix ( [nGPUs+1, 1] , mxREAL); \n");
|
||
printf(" plhs[2] = Output_Ascans = mxCreateNumericMatrix( [3000?,nAscans], mxSINGLE_CLASS, mxREAL); \n");
|
||
printf(" ================================================================================================= \n");
|
||
printf("Wrong number of input arguments. Should be 19.");
|
||
}
|
||
|
||
// assign input arguments... // Bestimme die Eingangswerte
|
||
const Matrix_t &AScan = params[0]; // AScan-Data
|
||
const Matrix_t &pix_vect = params[1]; // Image Startpoint (IMAGE_STARTPOINT_S)
|
||
|
||
const Matrix_t &receiver_index = params[2]; // Index Data for Receiver-Position Data
|
||
const Matrix_t &emitter_index = params[3]; // Index Data for Emitter-Position Data
|
||
const Matrix_t &receiver_list = params[4]; // Assignment Index to Receiver-Position Data
|
||
const Matrix_t &emitter_list = params[5]; // Assignment Index to Emitter-Position Data
|
||
|
||
const Matrix_t &SAFT_mode = params[6]; // SOS?, ATT?
|
||
const Matrix_t &SAFT_variant = params[7]; // Differnt Mode-Parameter for Reconstruction
|
||
|
||
const Matrix_t &speed = params[8]; // Speed of Sound Data (Single, SoS-Grid)
|
||
const Matrix_t &sos_startPoint = params[9]; // Startpoint of Speed of Sound Grid
|
||
const Matrix_t &sos_res = params[10]; // SoS Grid Resolution
|
||
|
||
const Matrix_t &attVolume = params[11]; // Attenuation Data (Single, SoS-Grid)
|
||
|
||
const Matrix_t &res = params[12]; // Output Volume Resolution
|
||
const Matrix_t &timeint = params[13]; // 1/Sample-Rate
|
||
const Matrix_t &IMAGE_XYZ = params[14]; // Output Volume Size XYZ
|
||
const Matrix_t &IMAGE_SUM = params[15]; // Volume from previous Call
|
||
|
||
const Matrix_t &BlockDim = params[16]; // Block Dimension to use for GPU
|
||
const Matrix_t &GPUs = params[17]; // Welche GPUs sollen genutzt werden?
|
||
|
||
const Matrix_t &dbgMode = params[18]; // DebugMode and DebugMode-Parameter
|
||
|
||
//====================================================================== 1.Input Parameter - Check AScan
|
||
AScan_Nx = GetDimensions(AScan)[0]; // Reihen N ermitteln
|
||
AScan_Mx = GetDimensions(AScan)[1]; // Spalten M ermitteln
|
||
|
||
aScanCount = AScan_Mx;
|
||
aScanLength = AScan_Nx;
|
||
|
||
// printf( "mxGetNumberOfDimensions(AScan)=%i\n", mxGetNumberOfDimensions(AScan));
|
||
if ((aScanCount > 65535)) // new 2019: increasing the limit of the A-Scan block size. however this is limited by the datatype of unsigned short which is used for a pointer.
|
||
{
|
||
printf(" -> AScanBlock size = %i\n", aScanCount);
|
||
printf("AScanBlock size might be too large (=> 2^16)!!!");
|
||
}
|
||
|
||
aScan_ptr = (float *)GetPr(AScan);
|
||
|
||
//====================================================================== 2.Input Parameter - Check IMAGE_STARTPOINT_S / pix_vect
|
||
pix_vect_Nx = GetDimensions(pix_vect)[0]; // Reihen N ermitteln
|
||
pix_vect_Mx = GetDimensions(pix_vect)[1]; // Spalten M ermitteln
|
||
|
||
regionOfInterestOffset.x = *((float *)GetPr(pix_vect));
|
||
regionOfInterestOffset.y = *((float *)GetPr(pix_vect) + 1);
|
||
regionOfInterestOffset.z = *((float *)GetPr(pix_vect) + 2);
|
||
|
||
if (!(pix_vect_Nx == 1) || !(pix_vect_Mx == 3))
|
||
printf(" -> Dimension of IMAGE_STARTPOINT_S must be [1 x 3]");
|
||
if ((pix_vect_Nx > 1))
|
||
printf(" -> No Blockmode [%i x 3] allowed for IMAGE_STARTPOINT_S\n", pix_vect_Nx);
|
||
// if(!(mxIsSingle(pix_vect)))
|
||
// printf(" -> IMAGE_STARTPOINT_S must be Single");
|
||
|
||
//====================================================================== 3.Input Parameter - Check Receiver Index
|
||
receiver_index_Nx = GetDimensions(receiver_index)[0]; // Reihen N ermitteln
|
||
receiver_index_Mx = GetDimensions(receiver_index)[1]; // Spalten M ermitteln
|
||
|
||
if (!(receiver_index_Nx == 1))
|
||
printf(" -> Dimension of receiver_index must be [1 x M]");
|
||
if (!(receiver_index_Mx == aScanCount))
|
||
{
|
||
printf(" -> aScanCount(%i)!= M(%i)\n", aScanCount, receiver_index_Mx);
|
||
printf(" -> Dimension of receiver_index has different size as Ascan-Data\n");
|
||
}
|
||
|
||
// Ausgabe einzelner Geometriedaten der Uebergabewerte mit verschiedenen Varianten
|
||
unsigned short *receiver_index_ptr;
|
||
receiver_index_ptr = (unsigned short *)GetPr(receiver_index);
|
||
|
||
//====================================================================== 4.Input Parameter - Check Emitter Index
|
||
emitter_index_Nx = GetDimensions(emitter_index)[0]; // Reihen N ermitteln
|
||
emitter_index_Mx = GetDimensions(emitter_index)[1]; // Spalten M ermitteln
|
||
|
||
if (!(emitter_index_Nx == 1))
|
||
printf(" -> Dimension of emitter_index must be [1 x M]");
|
||
if (!(emitter_index_Mx == aScanCount))
|
||
{
|
||
printf(" -> aScanCount(%i)!= M(%i)\n", aScanCount, emitter_index_Mx);
|
||
printf(" -> Dimension of emitter_index has different size as Ascan-Data\n");
|
||
}
|
||
|
||
// Ausgabe einzelner Geometriedaten der Uebergabewerte mit verschiedenen Varianten
|
||
unsigned short *emitter_index_ptr = (unsigned short *)GetPr(emitter_index);
|
||
|
||
//====================================================================== 5.Input Parameter - Check receiver_list
|
||
receiver_list_Nx = GetDimensions(receiver_list)[0]; // Reihen N ermitteln
|
||
receiver_list_Mx = GetDimensions(receiver_list)[1]; // Spalten M ermitteln
|
||
|
||
if (!(receiver_list_Nx == 3))
|
||
printf(" -> Dimension of receiver_list must be [3 x M]");
|
||
|
||
// Ausgabe einzelner Geometriedaten der Uebergabewerte mit verschiedenen Varianten
|
||
float *receiver_list_ptr;
|
||
receiver_list_ptr = (float *)GetPr(receiver_list);
|
||
|
||
//====================================================================== 6.Input Parameter - Check emitter_list
|
||
emitter_list_Nx = GetDimensions(emitter_list)[0]; // Reihen N ermitteln
|
||
emitter_list_Mx = GetDimensions(emitter_list)[1]; // Spalten M ermitteln // emitter_list gibt die maximale Anzahl an Emittern die in diesem Block vorkommen können wieder!
|
||
|
||
if (!(emitter_list_Nx == 3))
|
||
printf(" -> Dimension of emitter_list must be [3 x M]");
|
||
|
||
// Ausgabe einzelner Geometriedaten der übergabewerte mit verschiedenen Varianten
|
||
float *emitter_list_ptr;
|
||
emitter_list_ptr = (float *)GetPr(emitter_list);
|
||
|
||
//====================================================================== 7.Input Parameter - Check SAFT_mode
|
||
SAFT_mode_Nx = GetDimensions(SAFT_mode)[0]; // Reihen N ermitteln
|
||
SAFT_mode_Mx = GetDimensions(SAFT_mode)[1]; // Spalten M ermitteln
|
||
|
||
SAFT_MODE = *((int *)GetPr(SAFT_mode));
|
||
|
||
if (!(SAFT_mode_Nx == 1))
|
||
printf(" -> Dimension of SAFT_MODE must be [1 x 1]");
|
||
|
||
switch (SAFT_MODE)
|
||
{
|
||
case 0:
|
||
SOSMode_3DVolume = false;
|
||
ATTMode_3DVolume = false;
|
||
// printf ( "\e[7;37m Standard SAFT without correction (-SOS -ATT) (%i,%i) \e[0m", SOSMode_3DVolume, ATTMode_3DVolume);
|
||
printf(" -> AscanIndexVersion only make sense with SOS or SOS and ATT Volume => exit");
|
||
break;
|
||
case 1:
|
||
SOSMode_3DVolume = true;
|
||
ATTMode_3DVolume = false;
|
||
// printf ( "\e[7;37m + Speed of sound correction - Attenuation correction (%i,%i) \e[0m", SOSMode_3DVolume, ATTMode_3DVolume);
|
||
break;
|
||
case 2:
|
||
SOSMode_3DVolume = true;
|
||
ATTMode_3DVolume = true;
|
||
// printf ( "\e[7;37m + Speed of sound correction + Attenuation correction (%i,%i) \e[0m", SOSMode_3DVolume, ATTMode_3DVolume);
|
||
break;
|
||
case 3:
|
||
// SOSMode_3DVolume = true; ATTMode_3DVolume = true;
|
||
// printf ( "\e[7;37m SAFT_MODE = 3 \e[0m", SOSMode_3DVolume, ATTMode_3DVolume);
|
||
printf(" -> not implemented => exit");
|
||
break;
|
||
case 4:
|
||
// SOSMode_3DVolume = false; ATTMode_3DVolume = false;
|
||
// printf ( "\e[7;37m SAFT_MODE = 4 \e[0m", SOSMode_3DVolume, ATTMode_3DVolume);
|
||
printf(" -> not implemented => exit");
|
||
break;
|
||
default:
|
||
SOSMode_3DVolume = false;
|
||
ATTMode_3DVolume = false;
|
||
// printf ( " -> SAFT_MODE %i is out of range [0..3] => use Standard SAFT\n", SAFT_MODE);
|
||
// printf ( "\e[7;37m Standard SAFT without correction (-SOS -ATT) (%i,%i) \e[0m", SOSMode_3DVolume, ATTMode_3DVolume);
|
||
break;
|
||
}
|
||
|
||
//====================================================================== 8.Input Parameter - Check SAFT_variant
|
||
SAFT_variant_Nx = GetDimensions(SAFT_variant)[0]; // Reihen N ermitteln
|
||
SAFT_variant_Mx = GetDimensions(SAFT_variant)[1]; // Spalten M ermitteln
|
||
|
||
SAFT_VARIANT = (int *)GetPr(SAFT_variant);
|
||
SAFT_VARIANT_Size = SAFT_variant_Mx;
|
||
|
||
// if(!(mxIsUint32(SAFT_variant)))
|
||
// printf(" -> SAFT_VARIANT must be Uint32");
|
||
if (!(SAFT_variant_Nx == 1) || !(SAFT_variant_Mx == 6))
|
||
printf(" -> Dimension of SAFT_VARIANT must be [1 x 6]");
|
||
|
||
//====================================================================== 9.Input Parameter - Check for SOS volume
|
||
speed_Nx = GetDimensions(speed)[0]; // Reihen N ermitteln
|
||
speed_Mx = GetDimensions(speed)[1]; // Spalten M ermitteln
|
||
float Sos = *((float *)GetPr(speed));
|
||
|
||
float *speed_vec_ptr;
|
||
speed_vec_ptr = (float *)GetPr(speed); // Pointer für SoSDaten ermitteln
|
||
|
||
// if(!(mxIsSingle(speed)))
|
||
// printf(" -> SOSVolume must be Single");
|
||
|
||
if (SOSMode_3DVolume == true) // SOS correction need 3D Volume
|
||
{
|
||
if (GetNumberOfDimensions(speed) == 3)
|
||
{
|
||
if (!((speed_Nx > 1) && (speed_Mx > 1)))
|
||
{
|
||
printf(" -> SOSGrid_XYZ.x and SOSGrid_XYZ.y must be > 1 for SOS Correction with 3D Volume!!!");
|
||
}
|
||
}
|
||
else if (GetNumberOfDimensions(speed) == 2)
|
||
{
|
||
printf("prhs[8] SOSVolume [%ix%i]\n", (int)GetDimensions(speed)[0], (int)GetDimensions(speed)[1]);
|
||
printf(" -> SOSVolume is not a 3D Volume as expected!");
|
||
}
|
||
|
||
SOSGrid_Xx = GetDimensions(speed)[0]; // SOSGrid_X ermitteln
|
||
SOSGrid_Yx = GetDimensions(speed)[1]; // SOSGrid_Y ermitteln
|
||
SOSGrid_Zx = GetDimensions(speed)[2]; // SOSGrid_Z ermitteln
|
||
|
||
SOSGrid_XYZ.x = SOSGrid_Xx;
|
||
SOSGrid_XYZ.y = SOSGrid_Yx;
|
||
SOSGrid_XYZ.z = SOSGrid_Zx;
|
||
|
||
if ((SOSGrid_XYZ.x > 128) || (SOSGrid_XYZ.y > 128) || (SOSGrid_XYZ.z > 128))
|
||
{
|
||
printf(" -> SOSGrid_XYZ [%i x %i x %i]\n", (int)SOSGrid_Xx, (int)SOSGrid_Yx, (int)SOSGrid_Zx);
|
||
printf(" Warning -> SOSGrid_XYZ.x, SOSGrid_XYZ.y, SOSGrid_XYZ.z > 128!!! --> can be problematic due to memory requirement\n");
|
||
}
|
||
}
|
||
|
||
//====================================================================== 10.Input Parameter - Check SoS Startpoint
|
||
sos_startPoint_Nx = GetDimensions(sos_startPoint)[0]; // Reihen N ermitteln
|
||
sos_startPoint_Mx = GetDimensions(sos_startPoint)[1]; // Spalten M ermitteln
|
||
|
||
sosOffset.x = *((float *)GetPr(sos_startPoint));
|
||
sosOffset.y = *((float *)GetPr(sos_startPoint) + 1);
|
||
sosOffset.z = *((float *)GetPr(sos_startPoint) + 2);
|
||
|
||
if (!(sos_startPoint_Nx == 1) || !(sos_startPoint_Mx == 3))
|
||
printf(" -> Dimension of SOS_STARTPOINT_S must be [1 x 3]");
|
||
if ((sos_startPoint_Nx > 1))
|
||
printf(" -> No Blockmode [%i x 3] allowed for SOS_STARTPOINT_S\n", sos_startPoint_Nx);
|
||
// if(!(mxIsSingle(sos_startPoint)))
|
||
// printf(" -> SOS_STARTPOINT_S must be Single");
|
||
|
||
//====================================================================== 11.Input Parameter - Check SoS_RESOLUTION / sos_res
|
||
if (SOSMode_3DVolume == true)
|
||
{
|
||
sos_res_Nx = GetDimensions(sos_res)[0]; // Reihen N ermitteln
|
||
sos_res_Mx = GetDimensions(sos_res)[1]; // Spalten M ermitteln
|
||
|
||
SOS_RESOLUTION = *((float *)GetPr(sos_res));
|
||
|
||
if (!(sos_res_Nx == 1))
|
||
printf(" -> Dimension of SOS_RESOLUTION_S must be [1 x 1]");
|
||
if ((sos_res_Mx > 1))
|
||
printf(" -> No Blockmode allowed for SOS_RESOLUTION_S! [1 x %i]\n", sos_res_Mx);
|
||
// if(!(mxIsSingle(sos_res)))
|
||
// printf(" -> SOS_RESOLUTION_S must be Single");
|
||
}
|
||
|
||
//====================================================================== 12.Input Parameter - Check for ATTVolume / Attenuation-Data
|
||
|
||
attVolume_Nx = GetDimensions(attVolume)[0]; // Reihen N ermitteln
|
||
attVolume_Mx = GetDimensions(attVolume)[1]; // Spalten M ermitteln
|
||
|
||
float *att_vec_ptr;
|
||
att_vec_ptr = (float *)GetPr(attVolume); // Pointer für ATT-Daten ermitteln
|
||
|
||
if (GetNumberOfDimensions(attVolume) == 3)
|
||
{
|
||
if (!((attVolume_Nx > 1) && (attVolume_Mx > 1)))
|
||
{
|
||
printf(" -> ATTGrid_XYZ.x and ATTGrid_XYZ.y must be > 1 for ATT Correction with 3D Volume!!!");
|
||
}
|
||
}
|
||
else if (GetNumberOfDimensions(attVolume) == 2)
|
||
{
|
||
}
|
||
|
||
if ((SOSMode_3DVolume == true) && (ATTMode_3DVolume == true))
|
||
{ // 3D Volume muss bei SOS und ATT angegeben sein damit ATT Korrektur durchgefuehrt werden kann
|
||
ATTGrid_Xx = GetDimensions(attVolume)[0]; // ATTGrid_X ermitteln
|
||
ATTGrid_Yx = GetDimensions(attVolume)[1]; // ATTGrid_Y ermitteln
|
||
ATTGrid_Zx = GetDimensions(attVolume)[2]; // ATTGrid_Z ermitteln
|
||
|
||
ATTGrid_XYZ.x = ATTGrid_Xx;
|
||
ATTGrid_XYZ.y = ATTGrid_Yx;
|
||
ATTGrid_XYZ.z = ATTGrid_Zx;
|
||
|
||
if ((ATTGrid_XYZ.x > 128) || (ATTGrid_XYZ.y > 128) || (ATTGrid_XYZ.z > 128))
|
||
{
|
||
printf(" -> ATTGrid_XYZ [%i x %i x %i]\n", ATTGrid_XYZ.x, ATTGrid_XYZ.y, ATTGrid_XYZ.z);
|
||
printf(" Warning -> ATTGrid_XYZ.x, ATTGrid_XYZ.y, ATTGrid_XYZ.z > 128!!! --> can be problematic due to memory requirement\n");
|
||
}
|
||
|
||
if ((ATTGrid_XYZ.x != SOSGrid_XYZ.x) || (ATTGrid_XYZ.y != SOSGrid_XYZ.y) || (ATTGrid_XYZ.z != SOSGrid_XYZ.z))
|
||
{ // Restriction: Volume parameter of ATT & SOS must be the same
|
||
printf(" -> ATTGrid[%i %i %i] != SOSGrid[%i %i %i]!\n", ATTGrid_XYZ.x, ATTGrid_XYZ.y, ATTGrid_XYZ.z, SOSGrid_XYZ.x, SOSGrid_XYZ.y, SOSGrid_XYZ.z);
|
||
printf(" -> ATTGrid must have the same size as SOSGrid \n");
|
||
}
|
||
}
|
||
else
|
||
{
|
||
printf(" -> ATTMode_3DVolume == false => skip ATTGrid\n");
|
||
|
||
ATTGrid_Xx = 0; // ATTGrid_X ermitteln
|
||
ATTGrid_Yx = 0; // ATTGrid_Y ermitteln
|
||
ATTGrid_Zx = 0; // ATTGrid_Z ermitteln
|
||
|
||
ATTGrid_XYZ.x = ATTGrid_Xx;
|
||
ATTGrid_XYZ.y = ATTGrid_Yx;
|
||
ATTGrid_XYZ.z = ATTGrid_Zx;
|
||
}
|
||
|
||
//====================================================================== 13.Input Parameter - Check IMAGE_RESOLUTION_S / res
|
||
res_Nx = GetDimensions(res)[0]; // Reihen N ermitteln
|
||
res_Mx = GetDimensions(res)[1]; // Spalten M ermitteln
|
||
|
||
IMAGE_RESOLUTION = *((float *)GetPr(res));
|
||
|
||
if (!(res_Nx == 1))
|
||
printf(" -> Dimension of IMAGE_RESOLUTION must be [1 x 1]");
|
||
if ((res_Mx > 1))
|
||
printf(" -> No Blockmode allowed for IMAGE_RESOLUTION! [1 x %i]\n", res_Mx);
|
||
// if(!(mxIsSingle(res)))
|
||
// printf(" -> IMAGE_RESOLUTION must be Single");
|
||
|
||
if (SOSMode_3DVolume == true)
|
||
{
|
||
if (IMAGE_RESOLUTION > SOS_RESOLUTION)
|
||
{
|
||
printf(" -> IMAGE_RESOLUTION (%f) > SOS_RESOLUTION (%f)\n", IMAGE_RESOLUTION, SOS_RESOLUTION);
|
||
printf(" -> IMAGE_RESOLUTION must not > SOS_RESOLUTION !!!");
|
||
}
|
||
}
|
||
|
||
//====================================================================== 14.Input Parameter - Check TimeInterval_S / Timeint
|
||
timeint_Nx = GetDimensions(timeint)[0]; // Reihen N ermitteln
|
||
timeint_Mx = GetDimensions(timeint)[1]; // Spalten M ermitteln
|
||
|
||
sampleRate = *((float *)GetPr(timeint));
|
||
|
||
if (!(timeint_Nx == 1))
|
||
printf(" -> Dimension of TimeInterval_S must be [1 x 1]");
|
||
if ((timeint_Mx > 1))
|
||
printf(" -> No Blockmode allowed for TimeInterval_S! [1 x %i]\n", timeint_Mx);
|
||
//====================================================================== 15.Input Parameter - Check IMAGE_XYZ_UI32 / IMAGE_XYZ
|
||
IMAGE_XYZ_Nx = GetDimensions(IMAGE_XYZ)[0]; // Reihen N ermitteln
|
||
IMAGE_XYZ_Mx = GetDimensions(IMAGE_XYZ)[1]; // Spalten M ermitteln
|
||
|
||
IMAGE_SIZE_XYZ.x = *((int *)GetPr(IMAGE_XYZ));
|
||
IMAGE_SIZE_XYZ.y = *((int *)GetPr(IMAGE_XYZ) + 1);
|
||
IMAGE_SIZE_XYZ.z = *((int *)GetPr(IMAGE_XYZ) + 2);
|
||
|
||
if (!(IMAGE_XYZ_Nx == 1) || !(IMAGE_XYZ_Mx == 3))
|
||
printf(" -> Dimension of IMAGE_XYZ must be [1 x 3]");
|
||
if ((IMAGE_XYZ_Nx > 1))
|
||
printf(" -> No Blockmode allowed for IMAGE_XYZ! [%i x 3]\n", IMAGE_XYZ_Nx);
|
||
// if(!(mxIsUint32(IMAGE_XYZ)))
|
||
// printf(" -> IMAGE_XYZ must be UINT32");
|
||
|
||
if ((IMAGE_SIZE_XYZ.x > 8192) || (IMAGE_SIZE_XYZ.y > 8192)) // Aufteilung in BlockDim 512,1,1 passt für 5632x5632. Es würde etwas weiter gehen aber dann muss Y kleiner sein.
|
||
printf(" -> IMAGE_XYZ must not > [8192 x 8192 x N]!!!");
|
||
|
||
//====================================================================== 16.Input Parameter - Check Env / IMAGE_SUM
|
||
IMAGE_SUM_Xx = GetDimensions(IMAGE_SUM)[0]; // Spalten M ermitteln X
|
||
IMAGE_SUM_Yx = GetDimensions(IMAGE_SUM)[1]; // Reihen N ermitteln Y
|
||
|
||
if (GetNumberOfDimensions(IMAGE_SUM) > 2)
|
||
IMAGE_SUM_Zx = GetDimensions(IMAGE_SUM)[2]; // Z-Schichten ermitteln Z
|
||
else if (GetNumberOfDimensions(IMAGE_SUM) == 2)
|
||
IMAGE_SUM_Zx = 1; // Z-Schichten = 1
|
||
else
|
||
{
|
||
printf(" -> mxGetNumberOfDimensions of IMAGE_SUM = %i\n", (int)GetNumberOfDimensions(IMAGE_SUM));
|
||
printf(" -> Dimension of IMAGE_SUM must be 3: [X x Y x Z]");
|
||
}
|
||
|
||
uint64_t IMAGE_SUM_Count = GetNumberOfElements(IMAGE_SUM);
|
||
float *IMAGE_SUM_vec_ptr = (float *)GetPr(IMAGE_SUM);
|
||
|
||
//====================================================================== 17.Input Parameter - Check BlockDimension for GPU
|
||
|
||
BlockDim_XYZ_Nx = GetDimensions(BlockDim)[0]; // Reihen N ermitteln
|
||
BlockDim_XYZ_Mx = GetDimensions(BlockDim)[1]; // Spalten M ermitteln
|
||
|
||
BlockDim_XYZ.x = *((int *)GetPr(BlockDim));
|
||
BlockDim_XYZ.y = *((int *)GetPr(BlockDim) + 1);
|
||
BlockDim_XYZ.z = *((int *)GetPr(BlockDim) + 2);
|
||
|
||
if (!(BlockDim_XYZ_Nx == 1) || !(BlockDim_XYZ_Mx == 3))
|
||
printf(" -> Dimension of BlockDim_XYZ must be [1 x 3]");
|
||
if ((BlockDim_XYZ_Nx > 1))
|
||
printf(" -> No Blockmode! [%i x 3]\n", (int)BlockDim_XYZ_Nx);
|
||
// if(!(mxIsUint32(BlockDim)))
|
||
// printf(" -> BlockDim_XYZ must be UINT32");
|
||
|
||
if ((BlockDim_XYZ.x * BlockDim_XYZ.y * BlockDim_XYZ.z) > 1024)
|
||
{ // BlockSize limited to 1024. Perhaps newer GPUs will support more Threads per Block
|
||
printf(" -> BlockDim_XYZ.x * BlockDim_XYZ.y * BlockDim_XYZ.z must not > 1024!!!");
|
||
|
||
// Here Adaption for BlockSize can be done.
|
||
BlockDim_XYZ.x = 1024; // If Blockdimensions are not specified than standard Blockdimensions will be used.
|
||
BlockDim_XYZ.x = 1;
|
||
BlockDim_XYZ.x = 1;
|
||
|
||
printf(" -> Standard Size for BlockDim_XYZ is used [%ix%ix%i]\n", BlockDim_XYZ.x, BlockDim_XYZ.y, BlockDim_XYZ.z);
|
||
}
|
||
|
||
//====================================================================== 18.Input Parameter - Check GPUs
|
||
|
||
int *enableGPUs_ptr;
|
||
|
||
GPUs_Nx = GetDimensions(GPUs)[0]; // Reihen N ermitteln
|
||
GPUs_Mx = GetDimensions(GPUs)[1]; // Spalten M ermitteln
|
||
enableGPUs_ptr = (int *)GetPr(GPUs);
|
||
selectedNumberGPUs = GPUs_Mx;
|
||
|
||
// Determine Number of GPU-Devices and check if there are so many available
|
||
int num_devices = 0;
|
||
// printf( " -> cudaGetDeviceCount: %i\n", num_devices);
|
||
CUDA_CHECK(cudaGetDeviceCount(&num_devices));
|
||
|
||
if (selectedNumberGPUs <= num_devices)
|
||
{
|
||
}
|
||
else
|
||
{
|
||
printf(" !!! !!! selectedNumberGPUs(%i) > num_devices(%i) !!!! !!!! -> selectedNumberGPUs = num_devices = %i!\n", selectedNumberGPUs, num_devices, num_devices);
|
||
selectedNumberGPUs = num_devices; // Reduce number of selected to number of GPUs in PC system!
|
||
}
|
||
|
||
// Check passed GPU-ID-Numbers and amount of GPUs
|
||
int gpuNr = 0;
|
||
int gpuNrCheck = 0;
|
||
for (gpuNr = 0; gpuNr < selectedNumberGPUs; ++gpuNr)
|
||
{
|
||
if (enableGPUs_ptr[gpuNr] > (num_devices - 1))
|
||
{ // Check if more GPUs are selected then available in System
|
||
printf("\n enableGPUs_ptr[gpuNr=%i] = %i !!!\n", gpuNr, enableGPUs_ptr[gpuNr]);
|
||
printf(" -> selected number of GPU > available Devices is not allowed!");
|
||
}
|
||
|
||
for (gpuNrCheck = 0; gpuNrCheck < gpuNr; ++gpuNrCheck)
|
||
{
|
||
if (enableGPUs_ptr[gpuNrCheck] == enableGPUs_ptr[gpuNr])
|
||
printf(" -> GPU Device can only be used once!!!");
|
||
}
|
||
}
|
||
|
||
if (!(GPUs_Nx == 1) || !(GPUs_Mx < 10))
|
||
printf(" -> Dimension of GPUs must be [1 x <10]");
|
||
if ((pix_vect_Nx > 1))
|
||
printf(" -> No Blockmode [%i x n] allowed for GPUs\n", GPUs_Nx);
|
||
|
||
//====================================================================== 19.Input Parameter - debugMode, debugModeParameter
|
||
dbgMode_Nx = GetDimensions(dbgMode)[0]; // Reihen N ermitteln
|
||
dbgMode_Mx = GetDimensions(dbgMode)[1]; // Spalten M ermitteln
|
||
|
||
debugMode = *((float *)GetPr(dbgMode));
|
||
debugModeParameter = *((float *)GetPr(dbgMode) + 1);
|
||
|
||
if ((dbgMode_Nx != 1) || (dbgMode_Mx != 2))
|
||
printf(" -> Dimension of debugMode must be [1 x 2]\n");
|
||
if (debugMode != 0.0)
|
||
printf(" -> debugMode = [%f], debugModeParameter = [%f]\n", debugMode, debugModeParameter);
|
||
|
||
// ~~~~ Create 3D-Matrix for the Output-Values
|
||
// Output-Dimension is {IMAGE_XYZ_X, IMAGE_XYZ_Y, IMAGE_XYZ_Z}
|
||
const int dims[] = {IMAGE_SIZE_XYZ.x, IMAGE_SIZE_XYZ.y, IMAGE_SIZE_XYZ.z};
|
||
int ndim = 3;
|
||
|
||
Matrix_t Output_Voxels;
|
||
Output_Voxels.NumberOfDims = ndim;
|
||
Output_Voxels.Dims[0] = dims[0];
|
||
Output_Voxels.Dims[1] = dims[1];
|
||
Output_Voxels.Dims[2] = dims[2];
|
||
Output_Voxels.Data = new float[dims[0] * dims[1] * (dims[2] ? dims[2] : 1)];
|
||
|
||
double *Output_Voxels_ptr = new double[dims[0] * dims[1] * (dims[2] ? dims[2] : 1)];
|
||
;
|
||
|
||
// ~~~~ Create Pointer to return value from Duration of Kernel
|
||
// Erstelle Array mit folgender Formatierung
|
||
// 0: Total Durationtime all GPUs
|
||
// 1: Total Durationtime GPU 1
|
||
// 2: Total Durationtime GPU 2
|
||
// n: Total Durationtime GPU n
|
||
|
||
int m = (1 + selectedNumberGPUs), n = 1;
|
||
|
||
double *Duration_ptr = new double[m * n];
|
||
|
||
// ~~~~ Create Pointer to return Error/Abortvalue of each multithread
|
||
// int *Abort_ptr = (int*) malloc(num_workingPackages * sizeof(int));
|
||
int *Abort_ptr = (int *)malloc(selectedNumberGPUs * sizeof(int));
|
||
m = aScanCount; // 1
|
||
n = AScan_Nx; // z.B. 3000
|
||
|
||
float *AscansOut_ptr = new float[m * n];
|
||
|
||
SPDLOG_INFO("preintegrateAscans!");
|
||
|
||
// 按照当前参数必走这个分支
|
||
//================================================================================================================ Preintegrate Ascans
|
||
if (SAFT_VARIANT[SAFT_VARIANT_AscanPreintegration] == 1)
|
||
{
|
||
// printf( "(SAFT_VARIANT[0] == 1) => perform preintegrateAscans\n\n");
|
||
|
||
speed_vec_ptr = (float *)GetPr(speed);
|
||
// printf( " speed_vec_ptr[%3i] = %12.10f\n",0,speed_vec_ptr[0]);
|
||
if (speed_vec_ptr[0] == 0)
|
||
{
|
||
printf("First value in SOS Volume = 0 --> preintegrateAscans can't be performed!!! --> Exit");
|
||
}
|
||
|
||
//================================================================================================================
|
||
preintegrateAscans(aScan_ptr, AscansOut_ptr, speed_vec_ptr, aScanCount, aScanLength, IMAGE_RESOLUTION, sampleRate, debugMode, debugModeParameter);
|
||
//================================================================================================================
|
||
}
|
||
else
|
||
{
|
||
// printf( "(SAFT_VARIANT[0] == 0) => skip preintegrateAscans\n\n");
|
||
// Daten trotzdem in Outputspeicher fuer Matlab transferieren
|
||
for (int j = 0; j < aScanCount; j++)
|
||
{ // ueber alle A-scans gehen.
|
||
for (int i = 0; i < aScanLength; i++)
|
||
{
|
||
// printf( " i (%4i) = %6.3f ",i, aScan_ptr[j*aScanLength+i]);
|
||
AscansOut_ptr[j * aScanLength + i] = aScan_ptr[j * aScanLength + i]; // nach Matlab zurueckgeben
|
||
// printf( " i (%4i) = %6.3f \n",i, AscansOut_ptr[j*aScanLength+i]);
|
||
}
|
||
}
|
||
}
|
||
|
||
//================================================================================================================ Start Reconstruction
|
||
//================================================================================================================
|
||
|
||
SPDLOG_INFO("multithreaded_processing!");
|
||
multithreaded_processing(aScan_ptr, Output_Voxels_ptr, receiver_index_ptr, emitter_index_ptr, receiver_list_ptr, receiver_list_Mx, emitter_list_ptr, emitter_list_Mx, speed_vec_ptr, SOSGrid_XYZ,
|
||
sosOffset, SOS_RESOLUTION, att_vec_ptr, AScan_Mx, AScan_Nx, regionOfInterestOffset, IMAGE_SIZE_XYZ, IMAGE_RESOLUTION, sampleRate, BlockDim_XYZ, Duration_ptr,
|
||
selectedNumberGPUs, enableGPUs_ptr, debugMode, debugModeParameter, SOSMode_3DVolume, ATTMode_3DVolume, SAFT_MODE, SAFT_VARIANT, SAFT_VARIANT_Size, Abort_ptr);
|
||
// Check if errors occurred
|
||
bool AbortedThreads = false;
|
||
for (int i = 0; i < selectedNumberGPUs; i++)
|
||
{
|
||
if (Abort_ptr[i] > 0)
|
||
{
|
||
printf("!!!!!!!!!!!!!!!!!!! Aborted Thread for GPU[%i] = %i\n", i, Abort_ptr[i]);
|
||
AbortedThreads = true;
|
||
}
|
||
}
|
||
free(Abort_ptr);
|
||
if (AbortedThreads)
|
||
printf(" Aborted Thread occurred -> see output history");
|
||
|
||
//================================================================================================================
|
||
//================================================================================================================
|
||
|
||
//================================================================================================================ Build Sum of IMAGE_SUM and current reconstructed Volume
|
||
SPDLOG_INFO("multithreaded_processing finish!");
|
||
|
||
float *outData = (float *)Output_Voxels.Data;
|
||
for (uint64_t i = 0; i < IMAGE_SUM_Count; i++)
|
||
{
|
||
outData[i] = Output_Voxels_ptr[i] + IMAGE_SUM_vec_ptr[i];
|
||
}
|
||
|
||
delete[] AscansOut_ptr;
|
||
delete[] Duration_ptr;
|
||
delete[] Output_Voxels_ptr;
|
||
SPDLOG_INFO("SAFT finish!");
|
||
return Output_Voxels;
|
||
}
|