595 lines
24 KiB
C++
595 lines
24 KiB
C++
|
|
// 1. Compilieren mit make
|
|||
|
|
// -> es wird folgende Datei erstellt: output/saft_sos.mexa64
|
|||
|
|
// 2. Kopieren in Arbeitsordner
|
|||
|
|
// cp /home/kretzek/fser/sandbox/SAFT-GPU/output/saft_sos.mexa64 /home/kretzek/fser/USCT_SW/3DReconstruction/Reconstruction/Reflection/trunk/saft_sos_compute2_debugSoS.mexa64
|
|||
|
|
|
|||
|
|
|
|||
|
|
#pragma once
|
|||
|
|
|
|||
|
|
#include <string>
|
|||
|
|
|
|||
|
|
#include <cuda.h>
|
|||
|
|
#include <cuda_runtime.h>
|
|||
|
|
#include <cuda_runtime_api.h>
|
|||
|
|
|
|||
|
|
#include <stdint.h>
|
|||
|
|
#include <stdio.h> // standard input/output
|
|||
|
|
#include <vector> // stl vector header
|
|||
|
|
|
|||
|
|
|
|||
|
|
typedef unsigned char uchar;
|
|||
|
|
typedef unsigned short ushort;
|
|||
|
|
typedef unsigned long ulong;
|
|||
|
|
typedef unsigned long long ullong;
|
|||
|
|
|
|||
|
|
//Define Outputs for Debugmode
|
|||
|
|
//============================
|
|||
|
|
//#define debug_OutputFunctions // Funktionenaufrufe ausgeben
|
|||
|
|
//#define debug_OutputVariables // Werte der Variablen ausgeben
|
|||
|
|
//#define debug_OutputParameter // Uebersicht der Eingabedaten anzeigen sowie Infoblöcke in den einzelnen Schritten
|
|||
|
|
//#define debug_OutputMemory // Speicherverwaltung, Malloc, Free, Groessen
|
|||
|
|
//#define debug_OutputMaxMemory // Gibt aktuellen Speicherverbrauch an, wenn memoryCheck aufgerufen wird
|
|||
|
|
//#define debug_OutputInfo // Gibt Infos zu Schritten, Variablen,... aus
|
|||
|
|
//#define debug_OutputPerformance // Gibt die Laufzeiten und die eizelnen Multi-GPU Performanzwerte von ProcessAscans aus (MemAlloc,PerformCoreReconstruction, Duration, FreeMem)
|
|||
|
|
//#define debug_OutputStepsPerformance // Gibt die Laufzeiten und für die eizelnen Schritte in performCoreReconstruction aus (Copy Ascans, Precalc, PerfCoreReconstruction, copy back)
|
|||
|
|
//#define debug_OutputZSteps // Gibt die Einteilung in Z-Richtung aus
|
|||
|
|
#define DebugOutputGPUIdx 0
|
|||
|
|
//#define debug_OutputHostStepsPerformance // Gibt die Laufzeiten für die eizelnen Schritte auf dem HOST aus (Preintegrated Ascans)
|
|||
|
|
//#define debug_OutputSAFTHandlerThreadPerformance // Gibt die Gesamt-Laufzeiten der einzelnen Multi-GPU Threads aus
|
|||
|
|
//#define debug_OutputMultiGpu // Einteilung des Volumens auf mehrerer GPUs ausgeben
|
|||
|
|
//#define debug_OutputStreams // Gibt die Schritte der Berechnung der Streams aus
|
|||
|
|
//#define debug_OutputSOSPaths // Gibt die Schritte und Werte der SOSPfadberechnung aus
|
|||
|
|
//#define debug_OutputSOSStepsParameter // Einteilung der ZLayer in SOSZlayer
|
|||
|
|
//#define debug_OutputLookUpGeometryMemoryList // Debugausgabe fuer die LookUpGeometryMemoryList (Constant Memory)
|
|||
|
|
|
|||
|
|
//#define OutputVolume // Ausgabe des Volumens
|
|||
|
|
|
|||
|
|
|
|||
|
|
// Debugging CUDA Kernels
|
|||
|
|
//================================================
|
|||
|
|
//#define debug_CudaSAFTKernel
|
|||
|
|
//#define debug_CudaSAFTKernel_Median
|
|||
|
|
//#define debug_CudaPrecalculateKernel
|
|||
|
|
//#define debug_CudaRayTraceKernel
|
|||
|
|
//#define debug_CudaRayTraceKernelLive
|
|||
|
|
|
|||
|
|
//#define DebugSetMemoryToZero // Set SOSPathMemory to Zero as Initialisation
|
|||
|
|
|
|||
|
|
// Define specific Hardware-Versions
|
|||
|
|
#define GTX_590
|
|||
|
|
//#define GTX_690
|
|||
|
|
//#define GTX_TITAN
|
|||
|
|
|
|||
|
|
#if defined(GTX_590)
|
|||
|
|
#define GTX_Fermi
|
|||
|
|
#endif
|
|||
|
|
#if defined(GTX_690) || defined(GTX_TITAN)
|
|||
|
|
#define GTX_Kepler
|
|||
|
|
#endif
|
|||
|
|
|
|||
|
|
// Speichermanagement der GPU sowie Errordetektion
|
|||
|
|
//================================================
|
|||
|
|
//#define SaftNoTexture
|
|||
|
|
//#define SaftCorrectSumOneAscan // 9.7-9.9 GVA/s // Skip wrong Numbers
|
|||
|
|
#define SaftCorrectSumAllAscan // 8.2 GVA/s // Recalculation if too high numbers are calculated
|
|||
|
|
|
|||
|
|
#define SaftEmitterCache // Caching for Emitter Coordinates and Distance
|
|||
|
|
//#define SaftEmitterCacheTernery // Caching for Emitter Coordinates and Distance
|
|||
|
|
|
|||
|
|
|
|||
|
|
// SAFT- SOS Implementierungen
|
|||
|
|
//================================================
|
|||
|
|
//#define SaftSoSNoCache
|
|||
|
|
//#define SaftSoSEmitterCache
|
|||
|
|
//#define SaftSoSCombineTasCache // noch nicht implementiert
|
|||
|
|
//#define SaftSoSCombineInSoSVoxelCache
|
|||
|
|
#define SaftSoSWithPrecalculateSoSZLayer
|
|||
|
|
|
|||
|
|
|
|||
|
|
#define SaftMedian
|
|||
|
|
#define BRANCHLESS_MEDIAN // Ohne kommts zum Absturz!
|
|||
|
|
//#define SaftMedian_withMean3 // Mean of 3 Values
|
|||
|
|
//#define SaftMedian_withMean5 // Mean of 5 Values
|
|||
|
|
//#define SaftMedian_CalcOnlyMean // Mean of all buffered Values in Window
|
|||
|
|
|
|||
|
|
#define maxMedianWindowSize 96
|
|||
|
|
#ifndef FLT_MAX //is not defined in cuda kernel?
|
|||
|
|
#define FLT_MAX 0x1.fffffep127f
|
|||
|
|
#endif
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
// Integration der A-scans im Vornherein durchfuehren um Samplebreite an zu rekonstruierende Aufloesung anzupassen
|
|||
|
|
|
|||
|
|
#define preAscanIntegrationToMatchSamplerateToResolution // Integration der Ascans ueber Fensterbreite durchfuehren
|
|||
|
|
//#define debug_preAscanIntegration
|
|||
|
|
#define DebugSammleMin 2990
|
|||
|
|
#define DebugSammleMax 3000
|
|||
|
|
//#define preAscanIntegrationVersion1Michael // direkt übernommene Version von Michael
|
|||
|
|
#define preAscanIntegrationVersion2Ernst // korrigierte Variante mit genauerer Fensterbreite
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
// Parameter fuer SAFT-Kernel
|
|||
|
|
#define SaftLinearInterpolation // Lineare Interpolation beim Zugriff auf A-scans durchführen
|
|||
|
|
|
|||
|
|
#define SaftUseConstantMemforGeometry // Geometriedaten im Constantmemory nutzen
|
|||
|
|
//#define SaftTextureForERIndexBlock // Texturmemory für das Laden der Emitter und Receiver Indexe fuer entsprechenden AScan nutzen
|
|||
|
|
|
|||
|
|
#define debug_CudaSAFTKernelModes // Use variable debugMode for different calulations methods and output
|
|||
|
|
//#define debug_CudaSAFTKernel_EnableAnalyticAverageSpeedCalculation // Fuer Fehlerberchnungen
|
|||
|
|
|
|||
|
|
//#define SaftTextureForEmRecSosPathsTablesFloat1 // Use Float1-Textur for loading SOS-Paths -> Sum, Count separated
|
|||
|
|
//#define SaftTextureForEmRecSosPathsTablesFloat2 // Use Float2-Textur for loading SOS-Paths -> Sum + Count for SOS for one position
|
|||
|
|
#define SaftTextureForEmRecSosPathsTablesFloat4 // Use Float4-Textur for loading SOS-Paths -> Sum as well Count for SOS and ATT for one position
|
|||
|
|
|
|||
|
|
#if defined(SaftTextureForEmRecSosPathsTablesFloat1) || defined(SaftTextureForEmRecSosPathsTablesFloat2) || defined(SaftTextureForEmRecSosPathsTablesFloat4)
|
|||
|
|
#define SaftTextureForEmRecSosPathsTables // Use Textur for loading SOS-Paths, -> Interpolation between SoSVoxelnPaths is possible
|
|||
|
|
#endif
|
|||
|
|
|
|||
|
|
// Several SAFT_VARIANTs
|
|||
|
|
#define SAFT_VARIANT_AscanPreintegration 0
|
|||
|
|
#define SAFT_VARIANT_AscanInterpolation 1
|
|||
|
|
#define SAFT_VARIANT_3DVolumeInterpolationAtPreprocessing 2 // Use interpolation while Preprocessing
|
|||
|
|
#define SAFT_VARIANT_3DVolumeInterpolationAtReconstruction 3 // Use interpolation while Reconstruction
|
|||
|
|
#define SAFT_VARIANT_CalcStandardDeviation 4
|
|||
|
|
#define SAFT_VARIANT_SumUpOverBoarderIndices 5
|
|||
|
|
|
|||
|
|
|
|||
|
|
// Cache <-> shared Memory
|
|||
|
|
//#define SaftPreferSharedMem // cudaFuncCachePreferShared: shared memory is 48 KB
|
|||
|
|
#define SaftPreferL1SharedMem // cudaFuncCachePreferL1: shared memory is 16
|
|||
|
|
//#define SaftPreferNone // cudaFuncCachePreferNone: no preference
|
|||
|
|
|
|||
|
|
// Receiver Cache mit shared Memory (nur bei kleinen Blockgroeßen)
|
|||
|
|
//#define SaftReceiverSharedMemCacheReceiverDistance
|
|||
|
|
//#define SaftCacheReceiverSOS
|
|||
|
|
//#define SaftReceiverSharedMemCacheReceiverSOS // Use Shared Memory for Caching
|
|||
|
|
//#define SaftRegisterCacheReceiverSOS // Use Register for Caching
|
|||
|
|
|
|||
|
|
// Berechnung der mittleren Schallgeschwindigkeit
|
|||
|
|
//================================================
|
|||
|
|
//#define SaftUseArithmeticMean // arithmetic Mean
|
|||
|
|
#define SaftUseHarmonicMean // harmonic Mean //das Richtige!!
|
|||
|
|
|
|||
|
|
|
|||
|
|
//#define SaftCalcSoSInKernel // Bresenham wird noch mal speziell bei jedem Voxel und Pfad durchgerechnet!
|
|||
|
|
// ! SOS_Version2 rausnehmen sonst gehts nicht!
|
|||
|
|
|
|||
|
|
#define SaftTextureForBresenhamSosPaths // Texturmemory für SOS-Volumen nutzen
|
|||
|
|
//#define SaftTextureForBresenhamInterpolated //iSOS-Version --> wird nun ueber Parameter uebergeben
|
|||
|
|
//#define SaftUseFastMath //FastMath fuer schnellere Berechnung aber Fehler am Rand. Dafuer ist Korrektur noetig.
|
|||
|
|
|
|||
|
|
//#define SaftUseSosAttFloat1 // Nutze getrennte Texturen fuer beide Volumen (Sos+Att) // Aktuell nicht implementiert
|
|||
|
|
#define SaftUseSosAttFloat2 // Nutze nur eine Textur fuer beide Volumen (Sos+Att)
|
|||
|
|
|
|||
|
|
#define SOS_Version2 // korrekte Version mit Definitionen im Mittelpunkt
|
|||
|
|
//#define SOS_Version3 // Mit extra Angabe der Endpkte
|
|||
|
|
|
|||
|
|
|
|||
|
|
// MultiGPU
|
|||
|
|
//================================================
|
|||
|
|
// #define debug_SetNumGPU // Anzahl der GPUs festlegen
|
|||
|
|
// //#undef debug_SetNumGPU
|
|||
|
|
//
|
|||
|
|
// #ifdef debug_SetNumGPU
|
|||
|
|
// #define NUM_GPUS 1
|
|||
|
|
// #define NUM_DEVICEGPU 1 // Um diese Anzahl verschiebt sich alles also zB bei +1
|
|||
|
|
// #endif
|
|||
|
|
|
|||
|
|
const int MAX_EMITTER_RECEIVE_IN_CONSTANT_MEMORY = 2340;
|
|||
|
|
|
|||
|
|
#define Distanz_Standard //172 MV/s //14,5 GVA/s
|
|||
|
|
//#define Distanz_Heron2
|
|||
|
|
//#define Distanz_Memory 100 //Mit 100-Werte LUT-Memory //11,53 GVA/s //Diff [0 .. 0.0828] sehr schlecht!
|
|||
|
|
//#define Distanz_Memory 1000 //Mit 1000-Werte LUT-Memory //12,6 GVA/s //Diff [0 .. 0.0096]
|
|||
|
|
//#define Distanz_Memory 1000_Heron //281 MV/s //Diff [0 .. 2.3176e-004]
|
|||
|
|
//#define Distanz_Memory 10000 //Mit 10000-Werte LUT-Memory //11,58 GVA/s //Diff [0 .. 9.6333e-004]
|
|||
|
|
//#define Distanz_Memory 100000 //Mit 10000-Werte LUT-Memory //375 MV/s
|
|||
|
|
//#define Use_Distanz_SharedMemory
|
|||
|
|
|
|||
|
|
//Macro used to perform CUDA calls. Throws an exception in case of a CUDA error. Also shows on which line it occurred.
|
|||
|
|
#define CUDA_CHECK(operation) performCUDAResultCheck(operation, __FILE__, __LINE__);
|
|||
|
|
|
|||
|
|
//Macro used to see when a particular line of code is executed on the host.
|
|||
|
|
#define DEBUG_MARK std::cout << "[DEBUG] file " << __FILE__ << ", line " << __LINE__ << std::endl
|
|||
|
|
|
|||
|
|
|
|||
|
|
//Convenient typedefs for containers
|
|||
|
|
typedef std::vector<cudaDeviceProp> DeviceProperties;
|
|||
|
|
typedef std::vector<dim3> Dimensions;
|
|||
|
|
|
|||
|
|
|
|||
|
|
/**
|
|||
|
|
Most important class in the application.
|
|||
|
|
- Haupt-Klasse der Applikation
|
|||
|
|
It is responsible for all of the image reconstruction.
|
|||
|
|
- Sie ist verantwortlich fuer alle BildRekonstruktionen
|
|||
|
|
*/
|
|||
|
|
class SAFTHandler
|
|||
|
|
{
|
|||
|
|
public:
|
|||
|
|
SAFTHandler(int deviceId,
|
|||
|
|
int deviceIndex,
|
|||
|
|
float *aScan_ptr, ///< Zeiger zu den AScandaten //std::string const & aScanSamplesPath,
|
|||
|
|
double *output_ptr, ///< Zeiger zu den Outputdaten //std::string const & outputPath,
|
|||
|
|
double *Duration_ptr, ///< Zeiger auf Ausgabewert f<>r benoetigte Laufzeit des SAFT-Kernels
|
|||
|
|
unsigned short *receiver_index_ptr, ///<
|
|||
|
|
unsigned short *emitter_index_ptr, ///<
|
|||
|
|
float *receiver_list_ptr, ///<
|
|||
|
|
int receiver_list_Size, ///<
|
|||
|
|
float *emitter_list_ptr, ///<
|
|||
|
|
int emitter_list_Size, ///<
|
|||
|
|
float *speed_vec_ptr,
|
|||
|
|
int3 SOSGrid_XYZ,
|
|||
|
|
float3 sosOffset, ///< Startpoint of SoSGrid
|
|||
|
|
float SOS_RESOLUTION, ///< Aufloesung des SoSGrid
|
|||
|
|
float *att_vec_ptr, //att_vec_ptr
|
|||
|
|
|
|||
|
|
int aScanCount,
|
|||
|
|
int aScanLength,
|
|||
|
|
int3 IMAGE_SIZE_XYZ,
|
|||
|
|
float sampleRate,
|
|||
|
|
float3 regionOfInterestOffset,
|
|||
|
|
float IMAGE_RESOLUTION,
|
|||
|
|
dim3 const & fixedBlockDimensions,
|
|||
|
|
|
|||
|
|
int medianWindowSize, ///< define width of used median filter
|
|||
|
|
|
|||
|
|
float debugMode,
|
|||
|
|
float debugModeParameter,
|
|||
|
|
//bool useFixedPartialOutputWindow,
|
|||
|
|
|
|||
|
|
bool SOSMode_3DVolume,
|
|||
|
|
bool ATTMode_3DVolume,
|
|||
|
|
|
|||
|
|
int SAFT_MODE,
|
|||
|
|
int *SAFT_VARIANT
|
|||
|
|
);
|
|||
|
|
|
|||
|
|
void performReconstruction();
|
|||
|
|
|
|||
|
|
private:
|
|||
|
|
bool SOSMode_3DVolume,
|
|||
|
|
ATTMode_3DVolume;
|
|||
|
|
|
|||
|
|
int SAFT_MODE;
|
|||
|
|
int *SAFT_VARIANT;
|
|||
|
|
int *deviceSAFT_VARIANT;
|
|||
|
|
|
|||
|
|
#ifdef Distanz_Memory
|
|||
|
|
float *deviceWurzelApprox;
|
|||
|
|
#endif
|
|||
|
|
|
|||
|
|
int deviceId;
|
|||
|
|
int deviceIndex;
|
|||
|
|
float debugMode;
|
|||
|
|
float debugModeParameter;
|
|||
|
|
|
|||
|
|
DeviceProperties deviceProperties;
|
|||
|
|
float
|
|||
|
|
*aScan_ptr;
|
|||
|
|
|
|||
|
|
// float
|
|||
|
|
// *rec_vec_ptr,
|
|||
|
|
// *send_vec_ptr;
|
|||
|
|
|
|||
|
|
unsigned short
|
|||
|
|
*emitter_index_ptr,
|
|||
|
|
*receiver_index_ptr;
|
|||
|
|
|
|||
|
|
float
|
|||
|
|
*emitter_list_ptr,
|
|||
|
|
*receiver_list_ptr;
|
|||
|
|
|
|||
|
|
int
|
|||
|
|
receiver_list_Size,
|
|||
|
|
emitter_list_Size;
|
|||
|
|
|
|||
|
|
double
|
|||
|
|
*output_ptr;
|
|||
|
|
|
|||
|
|
double
|
|||
|
|
*Duration_ptr;
|
|||
|
|
|
|||
|
|
float
|
|||
|
|
Sos,
|
|||
|
|
*speed_vec_ptr,
|
|||
|
|
*att_vec_ptr;
|
|||
|
|
|
|||
|
|
int3
|
|||
|
|
SOSGrid_XYZ;
|
|||
|
|
|
|||
|
|
float3
|
|||
|
|
sosOffset; ///< Startpoint of SoSGrid
|
|||
|
|
|
|||
|
|
int
|
|||
|
|
aScanCount,
|
|||
|
|
aScanLength;
|
|||
|
|
|
|||
|
|
int3
|
|||
|
|
IMAGE_SIZE_XYZ;
|
|||
|
|
|
|||
|
|
float3 regionOfInterestSize; // ROI-Groesse in meter
|
|||
|
|
|
|||
|
|
float3
|
|||
|
|
regionOfInterestOffset; //imageStartpoint; TODO: umbenennen!
|
|||
|
|
|
|||
|
|
float
|
|||
|
|
IMAGE_RESOLUTION, ///< Aufl<66>sung im OutputVolumen
|
|||
|
|
IMAGE_RESOLUTION_FACTOR, ///< 1/Aufl<66>sung im OutputVolumen
|
|||
|
|
SOS_RESOLUTION, ///< Aufloesung des SoSGrid
|
|||
|
|
SOS_RESOLUTION_FACTOR; ///< 1/Aufl<66>sung im SoS-Grid
|
|||
|
|
|
|||
|
|
std::string
|
|||
|
|
emitterGeometryPath,
|
|||
|
|
receiverGeometryPath,
|
|||
|
|
aScanSamplesPath,
|
|||
|
|
outputPath;
|
|||
|
|
|
|||
|
|
// bool
|
|||
|
|
// printPerformanceAnalysis,
|
|||
|
|
// printSortedAutoTuningResults;
|
|||
|
|
|
|||
|
|
|
|||
|
|
float *aScanSamples;
|
|||
|
|
double *output;
|
|||
|
|
//int aScanCount;
|
|||
|
|
int
|
|||
|
|
aScanSize,
|
|||
|
|
batchSize,
|
|||
|
|
aScanBatchSize;
|
|||
|
|
|
|||
|
|
float voxelSize;
|
|||
|
|
|
|||
|
|
float sampleRate;
|
|||
|
|
|
|||
|
|
//size_t
|
|||
|
|
uint64_t
|
|||
|
|
regionOfInterestVoxelCount,
|
|||
|
|
outputSize;
|
|||
|
|
|
|||
|
|
uint64_t
|
|||
|
|
partialOutputZLayerOffset;
|
|||
|
|
|
|||
|
|
int
|
|||
|
|
partialOutputZLayerOffsetCount,
|
|||
|
|
partialOutputSoSZLayerCount,
|
|||
|
|
currentZLayerCount,
|
|||
|
|
partialSoSZLayerCount;
|
|||
|
|
|
|||
|
|
double *currentHostOutputAdress;
|
|||
|
|
|
|||
|
|
// Pointer of Inputdata in memory of Ascanblock
|
|||
|
|
float3
|
|||
|
|
*receiver_list, // LookUpTable receiverNr -> coordinates
|
|||
|
|
*emitter_list; // LookUpTable emitterNr -> coordinates
|
|||
|
|
|
|||
|
|
unsigned short
|
|||
|
|
*receiver_index, // Input Ascanblockdata: corresponding receiverNr
|
|||
|
|
*emitter_index; // Input Ascanblockdata: corresponding emitterNr
|
|||
|
|
|
|||
|
|
float
|
|||
|
|
*SoSData; // Input Ascanblockdata: Corresponding SOS value
|
|||
|
|
|
|||
|
|
float *speedOfSoundField; // Input Ascanblockdata: Corresponding SOS value as volume TODO: ==> in speedOfSoundGrid umbenennen
|
|||
|
|
float *attenuationField; // Input Ascanblockdata: Corresponding ATT value as volume TODO: ==> in attenuationGrid umbenennen
|
|||
|
|
|
|||
|
|
#ifdef SaftUseSosAttFloat2
|
|||
|
|
float2 *hostSosAttField;
|
|||
|
|
#endif
|
|||
|
|
|
|||
|
|
// Memorysizes
|
|||
|
|
//std::size_t
|
|||
|
|
int
|
|||
|
|
speedOfSoundFieldVoxelCount, //
|
|||
|
|
speedOfSoundFieldBytes, //
|
|||
|
|
speedOfSoundEmitterVoxelPathCountByteSize, // Speichergroesse fuer die Anzahl der Voxel, die auf einem Pfad liegen
|
|||
|
|
speedOfSoundEmitterVoxelPathSumByteSize; // Speichergroesse fuer die Summe der Schallgeschwindigkeiten auf dem Pfad zu einem Voxel
|
|||
|
|
|
|||
|
|
dim3
|
|||
|
|
fixedBlockDimensions, // kann ws durch genericSAFTBlockDimensions ersetzt
|
|||
|
|
genericSAFTBlockDimensions,
|
|||
|
|
genericSAFTGridDimensions,
|
|||
|
|
windowGridDimensions;
|
|||
|
|
|
|||
|
|
int medianWindowSize; // define width of used median filter
|
|||
|
|
|
|||
|
|
|
|||
|
|
#ifdef SaftNoTexture
|
|||
|
|
float ** deviceAScans;
|
|||
|
|
#else
|
|||
|
|
cudaArray **deviceAScansCuArray;
|
|||
|
|
|
|||
|
|
#endif
|
|||
|
|
|
|||
|
|
#ifdef SaftTextureForBresenhamSosPaths
|
|||
|
|
|
|||
|
|
#ifdef SaftUseSosAttFloat1 // Nutze getrennte Texturen fuer beide Volumen (Sos+Att)
|
|||
|
|
cudaArray *deviceSpeedOfSoundFieldCuArray; // SOS volume
|
|||
|
|
cudaArray *deviceAttenuationFieldCuArray; // ATT volume
|
|||
|
|
#endif
|
|||
|
|
|
|||
|
|
#ifdef SaftUseSosAttFloat2 // Nutze nur eine Textur fuer beide Volumen (Sos+Att)
|
|||
|
|
cudaArray *deviceSosAttFieldCuArray;
|
|||
|
|
#endif
|
|||
|
|
#endif
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
int maxSoSReceiverArrayForTexture;
|
|||
|
|
int TableVoxelToReceiverPathSosAllocationCount;
|
|||
|
|
std::size_t receiver_list_Size_deviceMemory;
|
|||
|
|
|
|||
|
|
#ifdef SaftTextureForEmRecSosPathsTables
|
|||
|
|
// Für Emitter ----- normal definieren
|
|||
|
|
cudaArray *deviceTableVoxelToEmitterPathSosSumCuArray; //SoSSum
|
|||
|
|
//cudaPitchedPtr pitchedTableVoxelToEmitterPathSosSumDevPtr;
|
|||
|
|
|
|||
|
|
cudaArray *deviceTableVoxelToEmitterPathCountCuArray; //Count
|
|||
|
|
//cudaPitchedPtr pitchedTableVoxelToEmitterPathCountDevPtr;
|
|||
|
|
|
|||
|
|
// Für Receiver ----- als Arrays definieren
|
|||
|
|
cudaArray **deviceTableVoxelToReceiverPathSosSumCuArray; //SoSSum
|
|||
|
|
//cudaPitchedPtr * pitchedTableVoxelToReceiverPathSosSumDevPtr;
|
|||
|
|
|
|||
|
|
cudaArray **deviceTableVoxelToReceiverPathCountCuArray; //Count
|
|||
|
|
//cudaPitchedPtr * pitchedTableVoxelToReceiverPathCountDevPtr;
|
|||
|
|
#endif
|
|||
|
|
|
|||
|
|
#if defined(SaftTextureForEmRecSosPathsTablesFloat2) || defined(SaftTextureForEmRecSosPathsTablesFloat4)
|
|||
|
|
cudaArray *deviceTableVoxelToEmPathSosBothCuArray; //Emitter SoSSum + Count
|
|||
|
|
cudaArray **deviceTableVoxelToRecPathSosBothCuArray; //Receiver SoSSum + Count
|
|||
|
|
#endif
|
|||
|
|
|
|||
|
|
#ifdef SaftTextureForERIndexBlock
|
|||
|
|
cudaArray * deviceEmIndexBlockCuArray;
|
|||
|
|
cudaArray * deviceRecIndexBlockCuArray;
|
|||
|
|
#endif
|
|||
|
|
|
|||
|
|
// Schallgeschwindigkeitskorrektur-Mode
|
|||
|
|
float *deviceSpeedOfSoundField; // Adressen fuer Speicherfuer Schallgeschwindigkeitsgrid auf der GPU
|
|||
|
|
|
|||
|
|
// Block-Mode
|
|||
|
|
unsigned short *deviceEmitterIndex_block; // Adressen fuer Speicher fuer Index der Geometriedaten auf der GPU
|
|||
|
|
unsigned short *deviceReceiverIndex_block;
|
|||
|
|
float3 *deviceListEmitterGeometry; // Adressen fuer Speicher fuer Zuordnung Index <-> Geometriedaten auf der GPU
|
|||
|
|
float3 *deviceListReceiverGeometry;
|
|||
|
|
|
|||
|
|
float *deviceSoSData_block; // Adressen fuer Speicher fuer Schallgeschwindigkeitsdaten auf der GPU
|
|||
|
|
|
|||
|
|
// VoxelCountType // Adressen fuer Speicher der SoS-Pfade auf der GPU
|
|||
|
|
// * deviceTableVoxelToEmitterPathCount,
|
|||
|
|
// * deviceTableVoxelToReceiverPathCount;
|
|||
|
|
float
|
|||
|
|
*deviceTableVoxelToEmitterPathCountFloat,
|
|||
|
|
*deviceTableVoxelToReceiverPathCountFloat,
|
|||
|
|
*deviceTableVoxelToEmitterPathSosSum,
|
|||
|
|
*deviceTableVoxelToReceiverPathSosSum;
|
|||
|
|
|
|||
|
|
|
|||
|
|
bool *deviceValidEmitterReceiverCombinations;
|
|||
|
|
|
|||
|
|
int *deviceTransducerVectorAnalysisDistributionCounters;
|
|||
|
|
|
|||
|
|
// float3
|
|||
|
|
// * deviceEmitterGeometry,
|
|||
|
|
// * deviceReceiverGeometry;
|
|||
|
|
|
|||
|
|
int usedAmountOfEmitter, // amount of used emitter
|
|||
|
|
usedAmountOfReceiver; // amount of used receiver
|
|||
|
|
|
|||
|
|
// Output volume
|
|||
|
|
double *deviceOutput;
|
|||
|
|
|
|||
|
|
//Streams used for synchronisation
|
|||
|
|
cudaStream_t
|
|||
|
|
copyStream,
|
|||
|
|
calculationStream;
|
|||
|
|
|
|||
|
|
//This variable describes the number of allocations used by the current SAFT mode
|
|||
|
|
std::size_t aScanAllocationCount;
|
|||
|
|
|
|||
|
|
int
|
|||
|
|
invalidEmitterReceiverCombinationsCount,
|
|||
|
|
validEmitterReceiverCombinationsCount;
|
|||
|
|
|
|||
|
|
Dimensions validBlockDimensions;
|
|||
|
|
bool useAutoTuning;
|
|||
|
|
// AutoTuningConfiguration autoTuningConfiguration;
|
|||
|
|
|
|||
|
|
size_t
|
|||
|
|
partialOutputSize,
|
|||
|
|
partialVolumeSize, // Speicher(OutputVolumen), der fuer die entsprechende Anzahl an Z-Layern benötigt wuerde
|
|||
|
|
partialSosPathSize, // Speicher(OutputVolumen), der fuer die entsprechende Anzahl an SoS-Z-Layer benötigt wuerde
|
|||
|
|
maxFeasibleZLayerCount, // Maximal moegliche Anzahl an Z-Layern wird zu Beginn auf # die in eine SOS Z-layer passt gesetzt.
|
|||
|
|
maxFeasibleSosZLayerCount; // Maximal moegliche Anzahl an Sos-Z-Layern wird zu Beginn auf Anzahl der noetigen SoS-Z-Layern für die OutputDaten gesetzt.
|
|||
|
|
|
|||
|
|
int
|
|||
|
|
minimumAutoTuningThreadCount,
|
|||
|
|
maximumAutoTuningThreadCount;
|
|||
|
|
|
|||
|
|
|
|||
|
|
//New partial reconstruction data
|
|||
|
|
|
|||
|
|
std::size_t partialSpeedOfSoundVoxelCount;
|
|||
|
|
std::size_t partialOutputZLayerCount;
|
|||
|
|
std::size_t zLayerVoxelCount;
|
|||
|
|
std::size_t sosZLayerVoxelCount; // Anzahl der X-Y-SOSVoxel in einer SoS-Layer. //saft.hpp
|
|||
|
|
std::size_t partialOutputVoxelCount;
|
|||
|
|
|
|||
|
|
std::size_t
|
|||
|
|
//deviceTableVoxelToEmitterPathCountSize,
|
|||
|
|
deviceTableVoxelToEmitterPathCountFloatSize,
|
|||
|
|
deviceTableVoxelToEmitterPathSosSumSize,
|
|||
|
|
//deviceTableVoxelToReceiverPathCountSize,
|
|||
|
|
deviceTableVoxelToReceiverPathCountFloatSize,
|
|||
|
|
deviceTableVoxelToReceiverPathSosSumSize;
|
|||
|
|
|
|||
|
|
double diff_time; // For Time Measurement
|
|||
|
|
float transferRate; // For DataTransferrate Measurement
|
|||
|
|
float performRate; // For PerformSAFTrate Measurement
|
|||
|
|
cudaDeviceProp deviceProp; // Ausgabe der Frequenz
|
|||
|
|
|
|||
|
|
|
|||
|
|
//Core reconstruction
|
|||
|
|
|
|||
|
|
void processAScans(ullong & duration);
|
|||
|
|
void performCoreReconstruction();
|
|||
|
|
|
|||
|
|
//Pre-calculation
|
|||
|
|
|
|||
|
|
void precalculateAverageSpeedOfSound(int zLayer, int zLayerCount);
|
|||
|
|
// void analysisOfTransducerVectors();
|
|||
|
|
|
|||
|
|
// void normalisePerformanceStatisticsOutput();
|
|||
|
|
// void printTransducerVectorStatistics();
|
|||
|
|
|
|||
|
|
//Auto-tuning
|
|||
|
|
bool determineGridDimensions(dim3 const & blockDimensions, dim3 & gridDimensions);
|
|||
|
|
void determineValidBlockDimensions();
|
|||
|
|
|
|||
|
|
|
|||
|
|
void reduceKernelDimensions(dim3 const & gridDimensions, dim3 const & blockDimensions, dim3 & reducedGridDimensions, dim3 & reducedBlockDimensions);
|
|||
|
|
|
|||
|
|
//Pre-calculation kernels
|
|||
|
|
#ifdef SaftUseConstantMemforGeometry
|
|||
|
|
//void precalculateAverageSpeedOfSound(int firstZLayer, int sosZLayerCount, int deviceGeometry, int geometryElementCount, VoxelCountType * deviceVoxelCountOutput, float * deviceVoxelCountOutputFloat, float * deviceSpeedOfSoundSumOutput);
|
|||
|
|
void precalculateAverageSpeedOfSound(int firstZLayer, int sosZLayerCount, int deviceGeometry, int geometryElementCount, float * deviceVoxelCountOutputFloat, float * deviceSpeedOfSoundSumOutput);
|
|||
|
|
#else
|
|||
|
|
//void precalculateAverageSpeedOfSound(int firstZLayer, int sosZLayerCount, float3 const * deviceGeometry, int geometryElementCount, VoxelCountType * deviceVoxelCountOutput, float * deviceSpeedOfSoundSumOutput);
|
|||
|
|
void precalculateAverageSpeedOfSound(int firstZLayer, int sosZLayerCount, float3 const * deviceGeometry, int geometryElementCount, float * deviceSpeedOfSoundSumOutput);
|
|||
|
|
#endif
|
|||
|
|
|
|||
|
|
|
|||
|
|
// void analyseTransducerVectors(dim3 gridDimensions, dim3 blockDimensions);
|
|||
|
|
|
|||
|
|
//SAFT kernels
|
|||
|
|
//void performInterpolation(float * deviceAScans, float * deviceOutput, dim3 gridDimensions, dim3 blockDimensions, cudaStream_t stream);
|
|||
|
|
//void performSAFT(int aScanIndex, int aScanWindowSize, int3 IMAGE_SIZE_XYZ, int blockIndexOffset, int outputWindowVoxelCount, int speedOfSoundZLayer, int speedOfSoundVoxelsWithinZLayers, dim3 const & windowGridDimensions, dim3 const & gridDimensions, dim3 const & blockDimensions, float const * deviceAScans); //, cudaStream_t stream);
|
|||
|
|
#ifdef SaftNoTexture
|
|||
|
|
void performSAFT(int aScanIndex, int aScanWindowSize, int3 IMAGE_SIZE_XYZ, int3 SOSGrid_XYZ, int blockIndexOffset, int outputWindowVoxelCount, int speedOfSoundZLayer, int speedOfSoundVoxelsWithinZLayers, dim3 const & windowGridDimensions, dim3 const & gridDimensions, dim3 const & blockDimensions, float * deviceSpeedOfSoundField, float * deviceAScans ); //Ascans im Devicememory
|
|||
|
|
#else
|
|||
|
|
void performSAFT(int aScanIndex, int aScanWindowSize, int3 IMAGE_SIZE_XYZ, int3 SOSGrid_XYZ, int blockIndexOffset, int outputWindowVoxelCount, int speedOfSoundZLayer, int speedOfSoundVoxelsWithinZLayers, int maxFeasibleSosZLayerCount, dim3 const & windowGridDimensions, dim3 const & gridDimensions, dim3 const & blockDimensions, float * deviceSpeedOfSoundField, cudaArray * deviceAScansCuArray); //Ascans in CuArray f<>r Texturmemory
|
|||
|
|
//void performSAFT(int aScanIndex, int aScanWindowSize, int3 IMAGE_SIZE_XYZ, int3 SOSGrid_XYZ, int blockIndexOffset, int outputWindowVoxelCount, int speedOfSoundZLayer, int speedOfSoundVoxelsWithinZLayers, dim3 const & windowGridDimensions, dim3 const & gridDimensions, dim3 const & blockDimensions, float * deviceSpeedOfSoundField, cudaArray * deviceSpeedOfSoundFieldCuArray, cudaArray * deviceAScansCuArray); //Ascans in CuArray f<>r Texturmemory
|
|||
|
|
#endif
|
|||
|
|
|
|||
|
|
//Utility functions
|
|||
|
|
bool setGenericDimensions();
|
|||
|
|
std::size_t resolutionConversion(std::size_t input, std::size_t greaterResolution, std::size_t lowerResolution);
|
|||
|
|
void partialReconstructionInitialisation();
|
|||
|
|
std::size_t getCurrentZLayerCount(std::size_t zOffset);
|
|||
|
|
void getCurrentSpeedOfSoundVariables(std::size_t zOffset, std::size_t currentZLayerCount, std::size_t & currentSpeedOfSoundZLayer, std::size_t & currentSpeedOfSoundPartialZLayerCount);
|
|||
|
|
void determineSpeedOfSoundData(std::size_t regionOfInterestZLayers);
|
|||
|
|
};
|
|||
|
|
|
|||
|
|
//std::string vectorToString(float3 const & vector);
|
|||
|
|
//std::string voxelToString(dim3 const & voxel);
|
|||
|
|
extern void memoryCheck();
|
|||
|
|
|
|||
|
|
extern void performCUDAResultCheck(cudaError_t result, std::string const & file, int line);
|
|||
|
|
|
|||
|
|
|