Commit source
This commit is contained in:
594
SAFT_ATT/src/saft.hpp
Normal file
594
SAFT_ATT/src/saft.hpp
Normal file
@@ -0,0 +1,594 @@
|
||||
// 1. Compilieren mit make
|
||||
// -> es wird folgende Datei erstellt: output/saft_sos.mexa64
|
||||
// 2. Kopieren in Arbeitsordner
|
||||
// cp /home/kretzek/fser/sandbox/SAFT-GPU/output/saft_sos.mexa64 /home/kretzek/fser/USCT_SW/3DReconstruction/Reconstruction/Reflection/trunk/saft_sos_compute2_debugSoS.mexa64
|
||||
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
|
||||
#include <cuda.h>
|
||||
#include <cuda_runtime.h>
|
||||
#include <cuda_runtime_api.h>
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdio.h> // standard input/output
|
||||
#include <vector> // stl vector header
|
||||
|
||||
|
||||
typedef unsigned char uchar;
|
||||
typedef unsigned short ushort;
|
||||
typedef unsigned long ulong;
|
||||
typedef unsigned long long ullong;
|
||||
|
||||
//Define Outputs for Debugmode
|
||||
//============================
|
||||
//#define debug_OutputFunctions // Funktionenaufrufe ausgeben
|
||||
//#define debug_OutputVariables // Werte der Variablen ausgeben
|
||||
//#define debug_OutputParameter // Uebersicht der Eingabedaten anzeigen sowie Infoblöcke in den einzelnen Schritten
|
||||
//#define debug_OutputMemory // Speicherverwaltung, Malloc, Free, Groessen
|
||||
//#define debug_OutputMaxMemory // Gibt aktuellen Speicherverbrauch an, wenn memoryCheck aufgerufen wird
|
||||
//#define debug_OutputInfo // Gibt Infos zu Schritten, Variablen,... aus
|
||||
//#define debug_OutputPerformance // Gibt die Laufzeiten und die eizelnen Multi-GPU Performanzwerte von ProcessAscans aus (MemAlloc,PerformCoreReconstruction, Duration, FreeMem)
|
||||
//#define debug_OutputStepsPerformance // Gibt die Laufzeiten und für die eizelnen Schritte in performCoreReconstruction aus (Copy Ascans, Precalc, PerfCoreReconstruction, copy back)
|
||||
//#define debug_OutputZSteps // Gibt die Einteilung in Z-Richtung aus
|
||||
#define DebugOutputGPUIdx 0
|
||||
//#define debug_OutputHostStepsPerformance // Gibt die Laufzeiten für die eizelnen Schritte auf dem HOST aus (Preintegrated Ascans)
|
||||
//#define debug_OutputSAFTHandlerThreadPerformance // Gibt die Gesamt-Laufzeiten der einzelnen Multi-GPU Threads aus
|
||||
//#define debug_OutputMultiGpu // Einteilung des Volumens auf mehrerer GPUs ausgeben
|
||||
//#define debug_OutputStreams // Gibt die Schritte der Berechnung der Streams aus
|
||||
//#define debug_OutputSOSPaths // Gibt die Schritte und Werte der SOSPfadberechnung aus
|
||||
//#define debug_OutputSOSStepsParameter // Einteilung der ZLayer in SOSZlayer
|
||||
//#define debug_OutputLookUpGeometryMemoryList // Debugausgabe fuer die LookUpGeometryMemoryList (Constant Memory)
|
||||
|
||||
//#define OutputVolume // Ausgabe des Volumens
|
||||
|
||||
|
||||
// Debugging CUDA Kernels
|
||||
//================================================
|
||||
//#define debug_CudaSAFTKernel
|
||||
//#define debug_CudaSAFTKernel_Median
|
||||
//#define debug_CudaPrecalculateKernel
|
||||
//#define debug_CudaRayTraceKernel
|
||||
//#define debug_CudaRayTraceKernelLive
|
||||
|
||||
//#define DebugSetMemoryToZero // Set SOSPathMemory to Zero as Initialisation
|
||||
|
||||
// Define specific Hardware-Versions
|
||||
#define GTX_590
|
||||
//#define GTX_690
|
||||
//#define GTX_TITAN
|
||||
|
||||
#if defined(GTX_590)
|
||||
#define GTX_Fermi
|
||||
#endif
|
||||
#if defined(GTX_690) || defined(GTX_TITAN)
|
||||
#define GTX_Kepler
|
||||
#endif
|
||||
|
||||
// Speichermanagement der GPU sowie Errordetektion
|
||||
//================================================
|
||||
//#define SaftNoTexture
|
||||
//#define SaftCorrectSumOneAscan // 9.7-9.9 GVA/s // Skip wrong Numbers
|
||||
#define SaftCorrectSumAllAscan // 8.2 GVA/s // Recalculation if too high numbers are calculated
|
||||
|
||||
#define SaftEmitterCache // Caching for Emitter Coordinates and Distance
|
||||
//#define SaftEmitterCacheTernery // Caching for Emitter Coordinates and Distance
|
||||
|
||||
|
||||
// SAFT- SOS Implementierungen
|
||||
//================================================
|
||||
//#define SaftSoSNoCache
|
||||
//#define SaftSoSEmitterCache
|
||||
//#define SaftSoSCombineTasCache // noch nicht implementiert
|
||||
//#define SaftSoSCombineInSoSVoxelCache
|
||||
#define SaftSoSWithPrecalculateSoSZLayer
|
||||
|
||||
|
||||
#define SaftMedian
|
||||
#define BRANCHLESS_MEDIAN // Ohne kommts zum Absturz!
|
||||
//#define SaftMedian_withMean3 // Mean of 3 Values
|
||||
//#define SaftMedian_withMean5 // Mean of 5 Values
|
||||
//#define SaftMedian_CalcOnlyMean // Mean of all buffered Values in Window
|
||||
|
||||
#define maxMedianWindowSize 96
|
||||
#ifndef FLT_MAX //is not defined in cuda kernel?
|
||||
#define FLT_MAX 0x1.fffffep127f
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
|
||||
// Integration der A-scans im Vornherein durchfuehren um Samplebreite an zu rekonstruierende Aufloesung anzupassen
|
||||
|
||||
#define preAscanIntegrationToMatchSamplerateToResolution // Integration der Ascans ueber Fensterbreite durchfuehren
|
||||
//#define debug_preAscanIntegration
|
||||
#define DebugSammleMin 2990
|
||||
#define DebugSammleMax 3000
|
||||
//#define preAscanIntegrationVersion1Michael // direkt übernommene Version von Michael
|
||||
#define preAscanIntegrationVersion2Ernst // korrigierte Variante mit genauerer Fensterbreite
|
||||
|
||||
|
||||
|
||||
|
||||
// Parameter fuer SAFT-Kernel
|
||||
#define SaftLinearInterpolation // Lineare Interpolation beim Zugriff auf A-scans durchführen
|
||||
|
||||
#define SaftUseConstantMemforGeometry // Geometriedaten im Constantmemory nutzen
|
||||
//#define SaftTextureForERIndexBlock // Texturmemory für das Laden der Emitter und Receiver Indexe fuer entsprechenden AScan nutzen
|
||||
|
||||
#define debug_CudaSAFTKernelModes // Use variable debugMode for different calulations methods and output
|
||||
//#define debug_CudaSAFTKernel_EnableAnalyticAverageSpeedCalculation // Fuer Fehlerberchnungen
|
||||
|
||||
//#define SaftTextureForEmRecSosPathsTablesFloat1 // Use Float1-Textur for loading SOS-Paths -> Sum, Count separated
|
||||
//#define SaftTextureForEmRecSosPathsTablesFloat2 // Use Float2-Textur for loading SOS-Paths -> Sum + Count for SOS for one position
|
||||
#define SaftTextureForEmRecSosPathsTablesFloat4 // Use Float4-Textur for loading SOS-Paths -> Sum as well Count for SOS and ATT for one position
|
||||
|
||||
#if defined(SaftTextureForEmRecSosPathsTablesFloat1) || defined(SaftTextureForEmRecSosPathsTablesFloat2) || defined(SaftTextureForEmRecSosPathsTablesFloat4)
|
||||
#define SaftTextureForEmRecSosPathsTables // Use Textur for loading SOS-Paths, -> Interpolation between SoSVoxelnPaths is possible
|
||||
#endif
|
||||
|
||||
// Several SAFT_VARIANTs
|
||||
#define SAFT_VARIANT_AscanPreintegration 0
|
||||
#define SAFT_VARIANT_AscanInterpolation 1
|
||||
#define SAFT_VARIANT_3DVolumeInterpolationAtPreprocessing 2 // Use interpolation while Preprocessing
|
||||
#define SAFT_VARIANT_3DVolumeInterpolationAtReconstruction 3 // Use interpolation while Reconstruction
|
||||
#define SAFT_VARIANT_CalcStandardDeviation 4
|
||||
#define SAFT_VARIANT_SumUpOverBoarderIndices 5
|
||||
|
||||
|
||||
// Cache <-> shared Memory
|
||||
//#define SaftPreferSharedMem // cudaFuncCachePreferShared: shared memory is 48 KB
|
||||
#define SaftPreferL1SharedMem // cudaFuncCachePreferL1: shared memory is 16
|
||||
//#define SaftPreferNone // cudaFuncCachePreferNone: no preference
|
||||
|
||||
// Receiver Cache mit shared Memory (nur bei kleinen Blockgroeßen)
|
||||
//#define SaftReceiverSharedMemCacheReceiverDistance
|
||||
//#define SaftCacheReceiverSOS
|
||||
//#define SaftReceiverSharedMemCacheReceiverSOS // Use Shared Memory for Caching
|
||||
//#define SaftRegisterCacheReceiverSOS // Use Register for Caching
|
||||
|
||||
// Berechnung der mittleren Schallgeschwindigkeit
|
||||
//================================================
|
||||
//#define SaftUseArithmeticMean // arithmetic Mean
|
||||
#define SaftUseHarmonicMean // harmonic Mean //das Richtige!!
|
||||
|
||||
|
||||
//#define SaftCalcSoSInKernel // Bresenham wird noch mal speziell bei jedem Voxel und Pfad durchgerechnet!
|
||||
// ! SOS_Version2 rausnehmen sonst gehts nicht!
|
||||
|
||||
#define SaftTextureForBresenhamSosPaths // Texturmemory für SOS-Volumen nutzen
|
||||
//#define SaftTextureForBresenhamInterpolated //iSOS-Version --> wird nun ueber Parameter uebergeben
|
||||
//#define SaftUseFastMath //FastMath fuer schnellere Berechnung aber Fehler am Rand. Dafuer ist Korrektur noetig.
|
||||
|
||||
//#define SaftUseSosAttFloat1 // Nutze getrennte Texturen fuer beide Volumen (Sos+Att) // Aktuell nicht implementiert
|
||||
#define SaftUseSosAttFloat2 // Nutze nur eine Textur fuer beide Volumen (Sos+Att)
|
||||
|
||||
#define SOS_Version2 // korrekte Version mit Definitionen im Mittelpunkt
|
||||
//#define SOS_Version3 // Mit extra Angabe der Endpkte
|
||||
|
||||
|
||||
// MultiGPU
|
||||
//================================================
|
||||
// #define debug_SetNumGPU // Anzahl der GPUs festlegen
|
||||
// //#undef debug_SetNumGPU
|
||||
//
|
||||
// #ifdef debug_SetNumGPU
|
||||
// #define NUM_GPUS 1
|
||||
// #define NUM_DEVICEGPU 1 // Um diese Anzahl verschiebt sich alles also zB bei +1
|
||||
// #endif
|
||||
|
||||
const int MAX_EMITTER_RECEIVE_IN_CONSTANT_MEMORY = 2340;
|
||||
|
||||
#define Distanz_Standard //172 MV/s //14,5 GVA/s
|
||||
//#define Distanz_Heron2
|
||||
//#define Distanz_Memory 100 //Mit 100-Werte LUT-Memory //11,53 GVA/s //Diff [0 .. 0.0828] sehr schlecht!
|
||||
//#define Distanz_Memory 1000 //Mit 1000-Werte LUT-Memory //12,6 GVA/s //Diff [0 .. 0.0096]
|
||||
//#define Distanz_Memory 1000_Heron //281 MV/s //Diff [0 .. 2.3176e-004]
|
||||
//#define Distanz_Memory 10000 //Mit 10000-Werte LUT-Memory //11,58 GVA/s //Diff [0 .. 9.6333e-004]
|
||||
//#define Distanz_Memory 100000 //Mit 10000-Werte LUT-Memory //375 MV/s
|
||||
//#define Use_Distanz_SharedMemory
|
||||
|
||||
//Macro used to perform CUDA calls. Throws an exception in case of a CUDA error. Also shows on which line it occurred.
|
||||
#define CUDA_CHECK(operation) performCUDAResultCheck(operation, __FILE__, __LINE__);
|
||||
|
||||
//Macro used to see when a particular line of code is executed on the host.
|
||||
#define DEBUG_MARK std::cout << "[DEBUG] file " << __FILE__ << ", line " << __LINE__ << std::endl
|
||||
|
||||
|
||||
//Convenient typedefs for containers
|
||||
typedef std::vector<cudaDeviceProp> DeviceProperties;
|
||||
typedef std::vector<dim3> Dimensions;
|
||||
|
||||
|
||||
/**
|
||||
Most important class in the application.
|
||||
- Haupt-Klasse der Applikation
|
||||
It is responsible for all of the image reconstruction.
|
||||
- Sie ist verantwortlich fuer alle BildRekonstruktionen
|
||||
*/
|
||||
class SAFTHandler
|
||||
{
|
||||
public:
|
||||
SAFTHandler(int deviceId,
|
||||
int deviceIndex,
|
||||
float *aScan_ptr, ///< Zeiger zu den AScandaten //std::string const & aScanSamplesPath,
|
||||
double *output_ptr, ///< Zeiger zu den Outputdaten //std::string const & outputPath,
|
||||
double *Duration_ptr, ///< Zeiger auf Ausgabewert f<>r benoetigte Laufzeit des SAFT-Kernels
|
||||
unsigned short *receiver_index_ptr, ///<
|
||||
unsigned short *emitter_index_ptr, ///<
|
||||
float *receiver_list_ptr, ///<
|
||||
int receiver_list_Size, ///<
|
||||
float *emitter_list_ptr, ///<
|
||||
int emitter_list_Size, ///<
|
||||
float *speed_vec_ptr,
|
||||
int3 SOSGrid_XYZ,
|
||||
float3 sosOffset, ///< Startpoint of SoSGrid
|
||||
float SOS_RESOLUTION, ///< Aufloesung des SoSGrid
|
||||
float *att_vec_ptr, //att_vec_ptr
|
||||
|
||||
int aScanCount,
|
||||
int aScanLength,
|
||||
int3 IMAGE_SIZE_XYZ,
|
||||
float sampleRate,
|
||||
float3 regionOfInterestOffset,
|
||||
float IMAGE_RESOLUTION,
|
||||
dim3 const & fixedBlockDimensions,
|
||||
|
||||
int medianWindowSize, ///< define width of used median filter
|
||||
|
||||
float debugMode,
|
||||
float debugModeParameter,
|
||||
//bool useFixedPartialOutputWindow,
|
||||
|
||||
bool SOSMode_3DVolume,
|
||||
bool ATTMode_3DVolume,
|
||||
|
||||
int SAFT_MODE,
|
||||
int *SAFT_VARIANT
|
||||
);
|
||||
|
||||
void performReconstruction();
|
||||
|
||||
private:
|
||||
bool SOSMode_3DVolume,
|
||||
ATTMode_3DVolume;
|
||||
|
||||
int SAFT_MODE;
|
||||
int *SAFT_VARIANT;
|
||||
int *deviceSAFT_VARIANT;
|
||||
|
||||
#ifdef Distanz_Memory
|
||||
float *deviceWurzelApprox;
|
||||
#endif
|
||||
|
||||
int deviceId;
|
||||
int deviceIndex;
|
||||
float debugMode;
|
||||
float debugModeParameter;
|
||||
|
||||
DeviceProperties deviceProperties;
|
||||
float
|
||||
*aScan_ptr;
|
||||
|
||||
// float
|
||||
// *rec_vec_ptr,
|
||||
// *send_vec_ptr;
|
||||
|
||||
unsigned short
|
||||
*emitter_index_ptr,
|
||||
*receiver_index_ptr;
|
||||
|
||||
float
|
||||
*emitter_list_ptr,
|
||||
*receiver_list_ptr;
|
||||
|
||||
int
|
||||
receiver_list_Size,
|
||||
emitter_list_Size;
|
||||
|
||||
double
|
||||
*output_ptr;
|
||||
|
||||
double
|
||||
*Duration_ptr;
|
||||
|
||||
float
|
||||
Sos,
|
||||
*speed_vec_ptr,
|
||||
*att_vec_ptr;
|
||||
|
||||
int3
|
||||
SOSGrid_XYZ;
|
||||
|
||||
float3
|
||||
sosOffset; ///< Startpoint of SoSGrid
|
||||
|
||||
int
|
||||
aScanCount,
|
||||
aScanLength;
|
||||
|
||||
int3
|
||||
IMAGE_SIZE_XYZ;
|
||||
|
||||
float3 regionOfInterestSize; // ROI-Groesse in meter
|
||||
|
||||
float3
|
||||
regionOfInterestOffset; //imageStartpoint; TODO: umbenennen!
|
||||
|
||||
float
|
||||
IMAGE_RESOLUTION, ///< Aufl<66>sung im OutputVolumen
|
||||
IMAGE_RESOLUTION_FACTOR, ///< 1/Aufl<66>sung im OutputVolumen
|
||||
SOS_RESOLUTION, ///< Aufloesung des SoSGrid
|
||||
SOS_RESOLUTION_FACTOR; ///< 1/Aufl<66>sung im SoS-Grid
|
||||
|
||||
std::string
|
||||
emitterGeometryPath,
|
||||
receiverGeometryPath,
|
||||
aScanSamplesPath,
|
||||
outputPath;
|
||||
|
||||
// bool
|
||||
// printPerformanceAnalysis,
|
||||
// printSortedAutoTuningResults;
|
||||
|
||||
|
||||
float *aScanSamples;
|
||||
double *output;
|
||||
//int aScanCount;
|
||||
int
|
||||
aScanSize,
|
||||
batchSize,
|
||||
aScanBatchSize;
|
||||
|
||||
float voxelSize;
|
||||
|
||||
float sampleRate;
|
||||
|
||||
//size_t
|
||||
uint64_t
|
||||
regionOfInterestVoxelCount,
|
||||
outputSize;
|
||||
|
||||
uint64_t
|
||||
partialOutputZLayerOffset;
|
||||
|
||||
int
|
||||
partialOutputZLayerOffsetCount,
|
||||
partialOutputSoSZLayerCount,
|
||||
currentZLayerCount,
|
||||
partialSoSZLayerCount;
|
||||
|
||||
double *currentHostOutputAdress;
|
||||
|
||||
// Pointer of Inputdata in memory of Ascanblock
|
||||
float3
|
||||
*receiver_list, // LookUpTable receiverNr -> coordinates
|
||||
*emitter_list; // LookUpTable emitterNr -> coordinates
|
||||
|
||||
unsigned short
|
||||
*receiver_index, // Input Ascanblockdata: corresponding receiverNr
|
||||
*emitter_index; // Input Ascanblockdata: corresponding emitterNr
|
||||
|
||||
float
|
||||
*SoSData; // Input Ascanblockdata: Corresponding SOS value
|
||||
|
||||
float *speedOfSoundField; // Input Ascanblockdata: Corresponding SOS value as volume TODO: ==> in speedOfSoundGrid umbenennen
|
||||
float *attenuationField; // Input Ascanblockdata: Corresponding ATT value as volume TODO: ==> in attenuationGrid umbenennen
|
||||
|
||||
#ifdef SaftUseSosAttFloat2
|
||||
float2 *hostSosAttField;
|
||||
#endif
|
||||
|
||||
// Memorysizes
|
||||
//std::size_t
|
||||
int
|
||||
speedOfSoundFieldVoxelCount, //
|
||||
speedOfSoundFieldBytes, //
|
||||
speedOfSoundEmitterVoxelPathCountByteSize, // Speichergroesse fuer die Anzahl der Voxel, die auf einem Pfad liegen
|
||||
speedOfSoundEmitterVoxelPathSumByteSize; // Speichergroesse fuer die Summe der Schallgeschwindigkeiten auf dem Pfad zu einem Voxel
|
||||
|
||||
dim3
|
||||
fixedBlockDimensions, // kann ws durch genericSAFTBlockDimensions ersetzt
|
||||
genericSAFTBlockDimensions,
|
||||
genericSAFTGridDimensions,
|
||||
windowGridDimensions;
|
||||
|
||||
int medianWindowSize; // define width of used median filter
|
||||
|
||||
|
||||
#ifdef SaftNoTexture
|
||||
float ** deviceAScans;
|
||||
#else
|
||||
cudaArray **deviceAScansCuArray;
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef SaftTextureForBresenhamSosPaths
|
||||
|
||||
#ifdef SaftUseSosAttFloat1 // Nutze getrennte Texturen fuer beide Volumen (Sos+Att)
|
||||
cudaArray *deviceSpeedOfSoundFieldCuArray; // SOS volume
|
||||
cudaArray *deviceAttenuationFieldCuArray; // ATT volume
|
||||
#endif
|
||||
|
||||
#ifdef SaftUseSosAttFloat2 // Nutze nur eine Textur fuer beide Volumen (Sos+Att)
|
||||
cudaArray *deviceSosAttFieldCuArray;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
int maxSoSReceiverArrayForTexture;
|
||||
int TableVoxelToReceiverPathSosAllocationCount;
|
||||
std::size_t receiver_list_Size_deviceMemory;
|
||||
|
||||
#ifdef SaftTextureForEmRecSosPathsTables
|
||||
// Für Emitter ----- normal definieren
|
||||
cudaArray *deviceTableVoxelToEmitterPathSosSumCuArray; //SoSSum
|
||||
//cudaPitchedPtr pitchedTableVoxelToEmitterPathSosSumDevPtr;
|
||||
|
||||
cudaArray *deviceTableVoxelToEmitterPathCountCuArray; //Count
|
||||
//cudaPitchedPtr pitchedTableVoxelToEmitterPathCountDevPtr;
|
||||
|
||||
// Für Receiver ----- als Arrays definieren
|
||||
cudaArray **deviceTableVoxelToReceiverPathSosSumCuArray; //SoSSum
|
||||
//cudaPitchedPtr * pitchedTableVoxelToReceiverPathSosSumDevPtr;
|
||||
|
||||
cudaArray **deviceTableVoxelToReceiverPathCountCuArray; //Count
|
||||
//cudaPitchedPtr * pitchedTableVoxelToReceiverPathCountDevPtr;
|
||||
#endif
|
||||
|
||||
#if defined(SaftTextureForEmRecSosPathsTablesFloat2) || defined(SaftTextureForEmRecSosPathsTablesFloat4)
|
||||
cudaArray *deviceTableVoxelToEmPathSosBothCuArray; //Emitter SoSSum + Count
|
||||
cudaArray **deviceTableVoxelToRecPathSosBothCuArray; //Receiver SoSSum + Count
|
||||
#endif
|
||||
|
||||
#ifdef SaftTextureForERIndexBlock
|
||||
cudaArray * deviceEmIndexBlockCuArray;
|
||||
cudaArray * deviceRecIndexBlockCuArray;
|
||||
#endif
|
||||
|
||||
// Schallgeschwindigkeitskorrektur-Mode
|
||||
float *deviceSpeedOfSoundField; // Adressen fuer Speicherfuer Schallgeschwindigkeitsgrid auf der GPU
|
||||
|
||||
// Block-Mode
|
||||
unsigned short *deviceEmitterIndex_block; // Adressen fuer Speicher fuer Index der Geometriedaten auf der GPU
|
||||
unsigned short *deviceReceiverIndex_block;
|
||||
float3 *deviceListEmitterGeometry; // Adressen fuer Speicher fuer Zuordnung Index <-> Geometriedaten auf der GPU
|
||||
float3 *deviceListReceiverGeometry;
|
||||
|
||||
float *deviceSoSData_block; // Adressen fuer Speicher fuer Schallgeschwindigkeitsdaten auf der GPU
|
||||
|
||||
// VoxelCountType // Adressen fuer Speicher der SoS-Pfade auf der GPU
|
||||
// * deviceTableVoxelToEmitterPathCount,
|
||||
// * deviceTableVoxelToReceiverPathCount;
|
||||
float
|
||||
*deviceTableVoxelToEmitterPathCountFloat,
|
||||
*deviceTableVoxelToReceiverPathCountFloat,
|
||||
*deviceTableVoxelToEmitterPathSosSum,
|
||||
*deviceTableVoxelToReceiverPathSosSum;
|
||||
|
||||
|
||||
bool *deviceValidEmitterReceiverCombinations;
|
||||
|
||||
int *deviceTransducerVectorAnalysisDistributionCounters;
|
||||
|
||||
// float3
|
||||
// * deviceEmitterGeometry,
|
||||
// * deviceReceiverGeometry;
|
||||
|
||||
int usedAmountOfEmitter, // amount of used emitter
|
||||
usedAmountOfReceiver; // amount of used receiver
|
||||
|
||||
// Output volume
|
||||
double *deviceOutput;
|
||||
|
||||
//Streams used for synchronisation
|
||||
cudaStream_t
|
||||
copyStream,
|
||||
calculationStream;
|
||||
|
||||
//This variable describes the number of allocations used by the current SAFT mode
|
||||
std::size_t aScanAllocationCount;
|
||||
|
||||
int
|
||||
invalidEmitterReceiverCombinationsCount,
|
||||
validEmitterReceiverCombinationsCount;
|
||||
|
||||
Dimensions validBlockDimensions;
|
||||
bool useAutoTuning;
|
||||
// AutoTuningConfiguration autoTuningConfiguration;
|
||||
|
||||
size_t
|
||||
partialOutputSize,
|
||||
partialVolumeSize, // Speicher(OutputVolumen), der fuer die entsprechende Anzahl an Z-Layern benötigt wuerde
|
||||
partialSosPathSize, // Speicher(OutputVolumen), der fuer die entsprechende Anzahl an SoS-Z-Layer benötigt wuerde
|
||||
maxFeasibleZLayerCount, // Maximal moegliche Anzahl an Z-Layern wird zu Beginn auf # die in eine SOS Z-layer passt gesetzt.
|
||||
maxFeasibleSosZLayerCount; // Maximal moegliche Anzahl an Sos-Z-Layern wird zu Beginn auf Anzahl der noetigen SoS-Z-Layern für die OutputDaten gesetzt.
|
||||
|
||||
int
|
||||
minimumAutoTuningThreadCount,
|
||||
maximumAutoTuningThreadCount;
|
||||
|
||||
|
||||
//New partial reconstruction data
|
||||
|
||||
std::size_t partialSpeedOfSoundVoxelCount;
|
||||
std::size_t partialOutputZLayerCount;
|
||||
std::size_t zLayerVoxelCount;
|
||||
std::size_t sosZLayerVoxelCount; // Anzahl der X-Y-SOSVoxel in einer SoS-Layer. //saft.hpp
|
||||
std::size_t partialOutputVoxelCount;
|
||||
|
||||
std::size_t
|
||||
//deviceTableVoxelToEmitterPathCountSize,
|
||||
deviceTableVoxelToEmitterPathCountFloatSize,
|
||||
deviceTableVoxelToEmitterPathSosSumSize,
|
||||
//deviceTableVoxelToReceiverPathCountSize,
|
||||
deviceTableVoxelToReceiverPathCountFloatSize,
|
||||
deviceTableVoxelToReceiverPathSosSumSize;
|
||||
|
||||
double diff_time; // For Time Measurement
|
||||
float transferRate; // For DataTransferrate Measurement
|
||||
float performRate; // For PerformSAFTrate Measurement
|
||||
cudaDeviceProp deviceProp; // Ausgabe der Frequenz
|
||||
|
||||
|
||||
//Core reconstruction
|
||||
|
||||
void processAScans(ullong & duration);
|
||||
void performCoreReconstruction();
|
||||
|
||||
//Pre-calculation
|
||||
|
||||
void precalculateAverageSpeedOfSound(int zLayer, int zLayerCount);
|
||||
// void analysisOfTransducerVectors();
|
||||
|
||||
// void normalisePerformanceStatisticsOutput();
|
||||
// void printTransducerVectorStatistics();
|
||||
|
||||
//Auto-tuning
|
||||
bool determineGridDimensions(dim3 const & blockDimensions, dim3 & gridDimensions);
|
||||
void determineValidBlockDimensions();
|
||||
|
||||
|
||||
void reduceKernelDimensions(dim3 const & gridDimensions, dim3 const & blockDimensions, dim3 & reducedGridDimensions, dim3 & reducedBlockDimensions);
|
||||
|
||||
//Pre-calculation kernels
|
||||
#ifdef SaftUseConstantMemforGeometry
|
||||
//void precalculateAverageSpeedOfSound(int firstZLayer, int sosZLayerCount, int deviceGeometry, int geometryElementCount, VoxelCountType * deviceVoxelCountOutput, float * deviceVoxelCountOutputFloat, float * deviceSpeedOfSoundSumOutput);
|
||||
void precalculateAverageSpeedOfSound(int firstZLayer, int sosZLayerCount, int deviceGeometry, int geometryElementCount, float * deviceVoxelCountOutputFloat, float * deviceSpeedOfSoundSumOutput);
|
||||
#else
|
||||
//void precalculateAverageSpeedOfSound(int firstZLayer, int sosZLayerCount, float3 const * deviceGeometry, int geometryElementCount, VoxelCountType * deviceVoxelCountOutput, float * deviceSpeedOfSoundSumOutput);
|
||||
void precalculateAverageSpeedOfSound(int firstZLayer, int sosZLayerCount, float3 const * deviceGeometry, int geometryElementCount, float * deviceSpeedOfSoundSumOutput);
|
||||
#endif
|
||||
|
||||
|
||||
// void analyseTransducerVectors(dim3 gridDimensions, dim3 blockDimensions);
|
||||
|
||||
//SAFT kernels
|
||||
//void performInterpolation(float * deviceAScans, float * deviceOutput, dim3 gridDimensions, dim3 blockDimensions, cudaStream_t stream);
|
||||
//void performSAFT(int aScanIndex, int aScanWindowSize, int3 IMAGE_SIZE_XYZ, int blockIndexOffset, int outputWindowVoxelCount, int speedOfSoundZLayer, int speedOfSoundVoxelsWithinZLayers, dim3 const & windowGridDimensions, dim3 const & gridDimensions, dim3 const & blockDimensions, float const * deviceAScans); //, cudaStream_t stream);
|
||||
#ifdef SaftNoTexture
|
||||
void performSAFT(int aScanIndex, int aScanWindowSize, int3 IMAGE_SIZE_XYZ, int3 SOSGrid_XYZ, int blockIndexOffset, int outputWindowVoxelCount, int speedOfSoundZLayer, int speedOfSoundVoxelsWithinZLayers, dim3 const & windowGridDimensions, dim3 const & gridDimensions, dim3 const & blockDimensions, float * deviceSpeedOfSoundField, float * deviceAScans ); //Ascans im Devicememory
|
||||
#else
|
||||
void performSAFT(int aScanIndex, int aScanWindowSize, int3 IMAGE_SIZE_XYZ, int3 SOSGrid_XYZ, int blockIndexOffset, int outputWindowVoxelCount, int speedOfSoundZLayer, int speedOfSoundVoxelsWithinZLayers, int maxFeasibleSosZLayerCount, dim3 const & windowGridDimensions, dim3 const & gridDimensions, dim3 const & blockDimensions, float * deviceSpeedOfSoundField, cudaArray * deviceAScansCuArray); //Ascans in CuArray f<>r Texturmemory
|
||||
//void performSAFT(int aScanIndex, int aScanWindowSize, int3 IMAGE_SIZE_XYZ, int3 SOSGrid_XYZ, int blockIndexOffset, int outputWindowVoxelCount, int speedOfSoundZLayer, int speedOfSoundVoxelsWithinZLayers, dim3 const & windowGridDimensions, dim3 const & gridDimensions, dim3 const & blockDimensions, float * deviceSpeedOfSoundField, cudaArray * deviceSpeedOfSoundFieldCuArray, cudaArray * deviceAScansCuArray); //Ascans in CuArray f<>r Texturmemory
|
||||
#endif
|
||||
|
||||
//Utility functions
|
||||
bool setGenericDimensions();
|
||||
std::size_t resolutionConversion(std::size_t input, std::size_t greaterResolution, std::size_t lowerResolution);
|
||||
void partialReconstructionInitialisation();
|
||||
std::size_t getCurrentZLayerCount(std::size_t zOffset);
|
||||
void getCurrentSpeedOfSoundVariables(std::size_t zOffset, std::size_t currentZLayerCount, std::size_t & currentSpeedOfSoundZLayer, std::size_t & currentSpeedOfSoundPartialZLayerCount);
|
||||
void determineSpeedOfSoundData(std::size_t regionOfInterestZLayers);
|
||||
};
|
||||
|
||||
//std::string vectorToString(float3 const & vector);
|
||||
//std::string voxelToString(dim3 const & voxel);
|
||||
extern void memoryCheck();
|
||||
|
||||
extern void performCUDAResultCheck(cudaError_t result, std::string const & file, int line);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user