Compare commits
3 Commits
2023End
...
SAFT_TOFI_
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
b599153837 | ||
|
|
82a2a9e132 | ||
|
|
0e29f139af |
@@ -3,7 +3,10 @@ project(SaftTofi)
|
||||
set(CMAKE_CUDA_COMPILER /usr/local/cuda/bin/nvcc)
|
||||
enable_language(CUDA)
|
||||
find_package (OpenMP REQUIRED)
|
||||
add_library(SaftTofi SHARED ./src/SAFT_TOFI.cpp ./src/saft.cu ./src/processAScans.cpp ./src/saft.cpp )
|
||||
file(GLOB_RECURSE cu_files ./src/*.cu)
|
||||
file(GLOB_RECURSE cuh_files ./src/*.cuh)
|
||||
|
||||
add_library(SaftTofi SHARED ./src/SAFT_TOFI.cpp ./src/processAScans.cpp ./src/saft.cpp ${cu_files} ${cuh_files})
|
||||
target_include_directories(SaftTofi PRIVATE ../SAFT ./src /usr/local/cuda/include )
|
||||
set_target_properties(SaftTofi PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
|
||||
target_compile_options(SaftTofi PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:
|
||||
@@ -11,7 +14,7 @@ target_compile_options(SaftTofi PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:
|
||||
--use_fast_math
|
||||
--ptxas-options=-v
|
||||
-arch compute_30 -code compute_30,sm_30
|
||||
>)
|
||||
>)
|
||||
|
||||
target_link_libraries(SaftTofi PRIVATE ${CUDA_RUNTIME_LIBRARY} )
|
||||
target_link_libraries(SaftTofi PRIVATE OpenMP::OpenMP_CXX )
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,33 +0,0 @@
|
||||
#include "saft.hpp"
|
||||
|
||||
/*!
|
||||
Emitter and receiver geometry held in constant memory, available across all functions in saft.cu because all of it is held in the same compilation unit.
|
||||
- Emitter und Receiver Geometrie werden im Constant Memory gehalten, erreichbar f<>r alle Funktionen in Saft.cu weil alle von ihnen in der selben Kompilierungs-Einheit gehalten werden.
|
||||
*/
|
||||
|
||||
#ifdef SaftUseConstantMemforGeometry
|
||||
|
||||
#ifdef SaftUseHarmonicMean
|
||||
|
||||
__constant__ float3 emitterPOSharmon[MAX_EMITTER_RECEIVE_IN_CONSTANT_MEMORY];
|
||||
__constant__ float3 receiverPOSharmon[MAX_EMITTER_RECEIVE_IN_CONSTANT_MEMORY];
|
||||
|
||||
// __constant__ float3 emitterPOSharmon[157 * 4];
|
||||
// __constant__ float3 receiverPOSharmon[157 * 9];
|
||||
|
||||
float3* constEmitterPtr = &emitterPOSharmon[0];
|
||||
float3* constReceiverPtr = &receiverPOSharmon[0];
|
||||
#endif
|
||||
|
||||
|
||||
// LookUpTable for GeometryList and Memory Position
|
||||
|
||||
__constant__ unsigned short lookUpGeometryMemoryListEmitter [MAX_EMITTER_RECEIVE_IN_CONSTANT_MEMORY];
|
||||
__constant__ unsigned short lookUpGeometryMemoryListReceiver[MAX_EMITTER_RECEIVE_IN_CONSTANT_MEMORY];
|
||||
|
||||
// __constant__ unsigned short lookUpGeometryMemoryListEmitter [157 * 4];
|
||||
// __constant__ unsigned short lookUpGeometryMemoryListReceiver[157 * 9];
|
||||
|
||||
unsigned short* constLookUpGeometryMemoryListEmitterPtr = &lookUpGeometryMemoryListEmitter[0];
|
||||
unsigned short* constLookUpGeometryMemoryListReceiverPtr = &lookUpGeometryMemoryListReceiver[0];
|
||||
#endif
|
||||
1030
SAFT_TOFI/src/kernel/precalculateSpeedOfSoundKernel.cu
Normal file
1030
SAFT_TOFI/src/kernel/precalculateSpeedOfSoundKernel.cu
Normal file
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
3
SAFT_TOFI/src/kernel/rayTracing.cu
Normal file
3
SAFT_TOFI/src/kernel/rayTracing.cu
Normal file
@@ -0,0 +1,3 @@
|
||||
#include "rayTracing.cuh"
|
||||
texture<float, cudaTextureType3D, cudaReadModeElementType> texRefSpeedOfSoundField; // Schritt 1. Textur anlegen //TODO: fuer Float2 fall rausnehmen
|
||||
texture<float2, cudaTextureType3D, cudaReadModeElementType> texRefSosAttField; // Schritt 1. Textur anlegen
|
||||
@@ -1,676 +1,167 @@
|
||||
#include <stdio.h>
|
||||
#include "saft.hpp"
|
||||
|
||||
// Structure of File
|
||||
//-------------------------------------------------------
|
||||
//
|
||||
// __device__ __forceinline__ void determineSpeedOfSoundFieldVoxelFloat // Teilpfade (genutzt)// Bestimme den SOSVoxel der zu einer Position gehoert, mit Nachkommastelle
|
||||
//
|
||||
// __device__ __forceinline__ void processRayTracedVoxelTexture // Für den Fall SaftUseSosAttFloat1 erstmal lassen
|
||||
// __device__ __forceinline__ void processRayTracedVoxelTextureSosAtt // AscanIndex // Addiere lokale SOS&ATT-Werte und # Voxel im Pfad
|
||||
//
|
||||
// Bresenham
|
||||
// __device__ __forceinline__ void performRayTracedSpeedAdditionTexture // Teilpfade // Dreidimensionale Version des Bresenham Line Algorithmus im Float-Format
|
||||
#define SQR(X) ((X) * (X))
|
||||
|
||||
// -------------------------------------------------------
|
||||
extern texture<float, cudaTextureType3D, cudaReadModeElementType> texRefSpeedOfSoundField; // Schritt 1. Textur anlegen //TODO: fuer Float2 fall rausnehmen
|
||||
extern texture<float2, cudaTextureType3D, cudaReadModeElementType> texRefSosAttField; // Schritt 1. Textur anlegen
|
||||
|
||||
// printf() is only supported
|
||||
// for devices of compute capability 2.0 and above
|
||||
|
||||
#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ < 200)
|
||||
#define printf(f, ...) ((void)(f, __VA_ARGS__),0)
|
||||
#endif
|
||||
|
||||
#define SQR(X) ((X)*(X))
|
||||
#define OutputSOSPositionX 20 // Volumen Voxel im SOS-Grid!
|
||||
#define OutputSOSPositionY 20
|
||||
#define OutputSOSPositionZ 20 // Echte Z-SoS-Layer ohne Offset
|
||||
|
||||
|
||||
//#ifdef debug_CudaRayTraceKernel
|
||||
#define OutputSOSPositionX 20 // Volumen Voxel im SOS-Grid!
|
||||
#define OutputSOSPositionY 20
|
||||
#define OutputSOSPositionZ 20 // Echte Z-SoS-Layer ohne Offset
|
||||
|
||||
// #define ER_PositionX 4 // Emitter Receiver Position im SOS-Grid!
|
||||
// #define ER_PositionY 37
|
||||
// #define ER_PositionZ 9
|
||||
//#endif
|
||||
|
||||
#ifdef debug_CudaRayTraceKernelLive
|
||||
#define OutputPositionX 250 // Volumen Voxel im Volumen!
|
||||
#define OutputPositionY 250
|
||||
#define OutputPositionZ 0
|
||||
|
||||
#define DebugSosVoxelX 10
|
||||
#define DebugSosVoxelY 10
|
||||
#define DebugSosVoxelZ 10
|
||||
|
||||
#define ER_PositionX 0 // Emitter Receiver Position im SOS-Grid!
|
||||
#define ER_PositionY 32
|
||||
#define ER_PositionZ 0
|
||||
#endif
|
||||
|
||||
//Textur fuer SoSField anlegen
|
||||
|
||||
#ifdef SaftUseSosAttFloat1 // Nutze getrennte Texturen fuer beide Volumen (Sos+Att)
|
||||
texture<float, cudaTextureType3D, cudaReadModeElementType> texRefSpeedOfSoundField; // Schritt 1. Textur anlegen
|
||||
#endif
|
||||
|
||||
#ifdef SaftUseSosAttFloat2 // Nutze nur eine Textur fuer beide Volumen (Sos+Att)
|
||||
texture<float , cudaTextureType3D, cudaReadModeElementType> texRefSpeedOfSoundField; // Schritt 1. Textur anlegen //TODO: fuer Float2 fall rausnehmen
|
||||
texture<float2, cudaTextureType3D, cudaReadModeElementType> texRefSosAttField; // Schritt 1. Textur anlegen
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
/**
|
||||
Determines the voxel within the speed of sound field associated with a position.
|
||||
- Bestimme den Voxel in dem Schallgeschwindigkeitsfeld der zu einer Position gehoert, mit Nachkommastelle
|
||||
*/
|
||||
__device__ __forceinline__ void determineSpeedOfSoundFieldVoxelFloat( // __forceinline__ zwingt den Compiler diesen Code bei jeden Aufruf direkt einzubinden (nicht als Funktion).
|
||||
float3 const & position, ///< Position within the scanner.
|
||||
float3 & voxel, ///< This argument is written to. Output voxel.
|
||||
float3 & sosOffset,
|
||||
float & SOS_RESOLUTION_FACTOR
|
||||
)
|
||||
__device__ __forceinline__ void determineSpeedOfSoundFieldVoxelFloat( // __forceinline__ zwingt den Compiler diesen Code bei jeden Aufruf direkt einzubinden (nicht als Funktion).
|
||||
float3 const &position, ///< Position within the scanner.
|
||||
float3 &voxel, ///< This argument is written to. Output voxel.
|
||||
float3 &sosOffset, float &SOS_RESOLUTION_FACTOR)
|
||||
|
||||
{
|
||||
|
||||
#ifndef SOS_Version2
|
||||
voxel.x = (position.x - sosOffset.x ) * SOS_RESOLUTION_FACTOR + 0.5f; // SoSVoxel aus Positionsdaten bestimmen
|
||||
voxel.y = (position.y - sosOffset.y ) * SOS_RESOLUTION_FACTOR + 0.5f;
|
||||
voxel.z = (position.z - sosOffset.z ) * SOS_RESOLUTION_FACTOR + 0.5f;
|
||||
#else
|
||||
voxel.x = (position.x - sosOffset.x ) * SOS_RESOLUTION_FACTOR; // SoSVoxel aus Positionsdaten bestimmen
|
||||
voxel.y = (position.y - sosOffset.y ) * SOS_RESOLUTION_FACTOR;
|
||||
voxel.z = (position.z - sosOffset.z ) * SOS_RESOLUTION_FACTOR;
|
||||
#endif
|
||||
|
||||
voxel.x = (position.x - sosOffset.x) * SOS_RESOLUTION_FACTOR; // SoSVoxel aus Positionsdaten bestimmen
|
||||
voxel.y = (position.y - sosOffset.y) * SOS_RESOLUTION_FACTOR;
|
||||
voxel.z = (position.z - sosOffset.z) * SOS_RESOLUTION_FACTOR;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
/**
|
||||
Process a voxel in the Bresenham algorithm. Float Format
|
||||
Accumulate speed of sound samples and keep track of the number of voxels in the path.
|
||||
- Verarbeite einen Voxel in dem Bresenham Algorithmus
|
||||
- Addiere Schallgeschwindigkeits-Sample
|
||||
*/
|
||||
__device__ __forceinline__ void processRayTracedVoxelTexture(
|
||||
float const * currentVoxelFloat, ///< Bresenham speed of sound voxel float.
|
||||
int & voxelCount, ///< This argument is written to. Number of voxels in the path so far.
|
||||
float & totalSpeed, ///< This argument is written to. Speed of sound sample accumulator.
|
||||
//float const * speedOfSoundField,///< Speed of sound field data containing samples.
|
||||
cudaArray *deviceSpeedOfSoundFieldCuArray, ///< Pointer to cudaArray for SoSFieldData
|
||||
int3 const & SOSGrid_XYZ ///< Size of SOS-Grid in XYZ
|
||||
)
|
||||
__device__ __forceinline__ void processRayTracedVoxelTexture(float const *currentVoxelFloat, ///< Bresenham speed of sound voxel float.
|
||||
int &voxelCount, ///< This argument is written to. Number of voxels in the path so far.
|
||||
float &totalSpeed, ///< This argument is written to. Speed of sound sample accumulator.
|
||||
// float const * speedOfSoundField,///< Speed of sound field data containing samples.
|
||||
cudaArray *deviceSpeedOfSoundFieldCuArray, ///< Pointer to cudaArray for SoSFieldData
|
||||
int3 const &SOSGrid_XYZ ///< Size of SOS-Grid in XYZ
|
||||
)
|
||||
|
||||
{
|
||||
#ifdef SaftUseHarmonicMean
|
||||
totalSpeed += 1/(float)tex3D( texRefSpeedOfSoundField, currentVoxelFloat[0] + 0.5f, currentVoxelFloat[1] + 0.5f, currentVoxelFloat[2] + 0.5f);
|
||||
#endif
|
||||
|
||||
#ifdef debug_CudaRayTraceKernel
|
||||
if ((currentVoxelFloat[0] == DebugSosVoxelX) && (currentVoxelFloat[1] == DebugSosVoxelY) && (currentVoxelFloat[2] == DebugSosVoxelZ))
|
||||
{
|
||||
printf("pSOS currentVoxel [%d %d %d]\n", currentVoxelFloat[0], currentVoxelFloat[1], currentVoxelFloat[2]);
|
||||
//printf(" speedOfSoundSample = %2.10f\n", speedOfSoundSample);
|
||||
printf(" totalSpeed = %2.10f\n", totalSpeed);
|
||||
//printf(" totalAttenuation = %2.10f\n", totalAttenuation);
|
||||
printf(" voxelCount = %d\n", voxelCount);
|
||||
}
|
||||
#endif
|
||||
totalSpeed += 1 / (float)tex3D(texRefSpeedOfSoundField, currentVoxelFloat[0] + 0.5f, currentVoxelFloat[1] + 0.5f, currentVoxelFloat[2] + 0.5f);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////
|
||||
// AscanIndex - Variante
|
||||
// kernel version Z.316
|
||||
///////////////////////////////////////////////////////////////////////////////////////////
|
||||
///////////////////////////////////////////////////////////////////////////////////////////
|
||||
/**
|
||||
Process a voxel in the Bresenham algorithm. Float Format
|
||||
Accumulate speed of sound samples and keep track of the number of voxels in the path.
|
||||
- Addiere lokale Schallgeschwindigkeits-Werte und Anzahl besuchter Voxel im Pfad
|
||||
*/
|
||||
// Aufruf: processRayTracedVoxelTextureSosAtt(currentVoxelFloat, voxelCount, totalSpeed, totalAttenuation, deviceSosAttFieldCuArray, SOSGrid_XYZ);
|
||||
|
||||
__device__ __forceinline__ void processRayTracedVoxelTextureSosAtt(
|
||||
float const * currentVoxelFloat, ///< SOS-coordinates of current Voxel for Bresenham in float[3].
|
||||
float & voxelCount, ///< This argument is written to. Number of voxels in the path so far. // TODO: not necessary here to sum up because it is already known from Bresenham length
|
||||
float & totalSpeed, ///< This argument is written to. Sum Up total SOS
|
||||
float & totalAttenuation, ///< This argument is written to. Sum Up total Attenuation
|
||||
//float const * speedOfSoundField, ///< Speed of sound field data containing samples.
|
||||
cudaArray *deviceSosAttFieldCuArray, ///< Pointer to cudaArray for SoSATTFieldData // TODO: not necessary if access is used with texture memory
|
||||
int3 const & SOSGrid_XYZ ///< Size of SOS-Grid in XYZ // TODO: only necessary for boundary-check.
|
||||
)
|
||||
float const *currentVoxelFloat, ///< SOS-coordinates of current Voxel for Bresenham in float[3].
|
||||
float &voxelCount, ///< This argument is written to. Number of voxels in the path so far. // TODO: not necessary here to sum up because it is already known from Bresenham length
|
||||
float &totalSpeed, ///< This argument is written to. Sum Up total SOS
|
||||
float &totalAttenuation, ///< This argument is written to. Sum Up total Attenuation
|
||||
cudaArray *deviceSosAttFieldCuArray, ///< Pointer to cudaArray for SoSATTFieldData // TODO: not necessary if access is used with texture memory
|
||||
int3 const &SOSGrid_XYZ ///< Size of SOS-Grid in XYZ // TODO: only necessary for boundary-check.
|
||||
)
|
||||
|
||||
{
|
||||
float2 SosAttValue;
|
||||
|
||||
#ifdef SaftUseHarmonicMean
|
||||
//totalSpeed += 1/speedOfSoundSample; // harmonisches Mittel
|
||||
// #ifdef SaftTextureForBresenhamInterpolated
|
||||
// #ifndef SOS_Version2
|
||||
// totalSpeed += 1/(float)tex3D( texRefSpeedOfSoundField, currentVoxelFloat[0] + 0.5f, currentVoxelFloat[1] + 0.5f, currentVoxelFloat[2] + 0.5f );
|
||||
// #else
|
||||
// totalSpeed += 1/(float)tex3D( texRefSpeedOfSoundField, currentVoxelFloat[0] + 0.5f, currentVoxelFloat[1] + 0.5f, currentVoxelFloat[2] + 0.5f );
|
||||
// #endif
|
||||
// #else
|
||||
#ifndef SOS_Version2
|
||||
totalSpeed += 1/(float)tex3D( texRefSpeedOfSoundField, currentVoxelFloat[0], currentVoxelFloat[1], currentVoxelFloat[2] );
|
||||
#else
|
||||
//totalSpeed += 1/tex3D( texRefSpeedOfSoundField, currentVoxelFloat[0] + 0.5f, currentVoxelFloat[1] + 0.5f, currentVoxelFloat[2] + 0.5f);
|
||||
//totalAttenuation += tex3D( texRefAbsorptionField, currentVoxelFloat[0] + 0.5f, currentVoxelFloat[1] + 0.5f, currentVoxelFloat[2] + 0.5f);
|
||||
|
||||
|
||||
#ifdef SaftUseSosAttFloat1 // Nutze getrennte Texturen fuer beide Volumen (Sos+Att)
|
||||
totalSpeed += 1/tex3D( texRefSpeedOfSoundField, currentVoxelFloat[0] + 0.5f, currentVoxelFloat[1] + 0.5f, currentVoxelFloat[2] + 0.5f);
|
||||
//totalAttenuation += tex3D( texRefAbsorptionField, currentVoxelFloat[0] + 0.5f, currentVoxelFloat[1] + 0.5f, currentVoxelFloat[2] + 0.5f);
|
||||
#endif
|
||||
#ifdef SaftUseSosAttFloat2 // Nutze nur eine Textur fuer beide Volumen (Sos+Att)
|
||||
// if ((currentVoxelFloat[0] == DebugSosVoxelX) && (currentVoxelFloat[1] == DebugSosVoxelY) && (currentVoxelFloat[2] == DebugSosVoxelZ))
|
||||
// {
|
||||
// //printf("pSOSATT pathPoint [%f %f %f]\n", currentVoxelFloat[0], currentVoxelFloat[1], currentVoxelFloat[2]);
|
||||
// //printf(" speedOfSoundSample = %2.10f\n", speedOfSoundSample);
|
||||
// printf(". totalSpeed = %2.10f\n", totalSpeed);
|
||||
// printf(". totalAttenuation = %2.10f\n", totalAttenuation);
|
||||
// printf(". voxelCount = %d\n", voxelCount);
|
||||
// }
|
||||
SosAttValue = tex3D( texRefSosAttField, currentVoxelFloat[0] + 0.5f, currentVoxelFloat[1] + 0.5f, currentVoxelFloat[2] + 0.5f);
|
||||
totalSpeed += 1/SosAttValue.x;
|
||||
totalAttenuation += SosAttValue.y;
|
||||
|
||||
|
||||
//if (SosAttValue.y > 20)
|
||||
// printf("!!!!!!!!!!!!!!!!! pSOSATT pathPoint [%f %f %f] = %f\n", currentVoxelFloat[0], currentVoxelFloat[1], currentVoxelFloat[2], SosAttValue.y);
|
||||
|
||||
|
||||
// if ((currentVoxelFloat[0] == DebugSosVoxelX) && (currentVoxelFloat[1] == DebugSosVoxelY) && (currentVoxelFloat[2] == DebugSosVoxelZ))
|
||||
// {
|
||||
// //printf("pSOSATT pathPoint [%f %f %f]\n", currentVoxelFloat[0], currentVoxelFloat[1], currentVoxelFloat[2]);
|
||||
// //printf(" speedOfSoundSample = %2.10f\n", speedOfSoundSample);
|
||||
// printf(".. totalSpeed = %2.10f\n", totalSpeed);
|
||||
// printf(".. totalAttenuation = %2.10f\n", totalAttenuation);
|
||||
// printf(".. voxelCount = %d\n", voxelCount);
|
||||
// }
|
||||
#endif
|
||||
|
||||
|
||||
#endif
|
||||
// #endif
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef debug_CudaRayTraceKernel
|
||||
if ((currentVoxelFloat[0] == DebugSosVoxelX) && (currentVoxelFloat[1] == DebugSosVoxelY) && (currentVoxelFloat[2] == DebugSosVoxelZ))
|
||||
{
|
||||
printf("pSOSATT currentVoxel [%f %f %f]\n", currentVoxelFloat[0], currentVoxelFloat[1], currentVoxelFloat[2]);
|
||||
//printf(" speedOfSoundSample = %2.10f\n", speedOfSoundSample);
|
||||
printf(" totalSpeed = %2.10f\n", totalSpeed);
|
||||
printf(" totalAttenuation = %2.10f\n", totalAttenuation);
|
||||
printf(" voxelCount = %d\n", voxelCount);
|
||||
}
|
||||
#endif
|
||||
|
||||
float2 SosAttValue;
|
||||
SosAttValue = tex3D(texRefSosAttField, currentVoxelFloat[0] + 0.5f, currentVoxelFloat[1] + 0.5f, currentVoxelFloat[2] + 0.5f);
|
||||
totalSpeed += 1 / SosAttValue.x;
|
||||
totalAttenuation += SosAttValue.y;
|
||||
}
|
||||
|
||||
|
||||
__device__ __forceinline__ void performRayTracedSpeedAdditionTexture(float &voxelCount, ///< This argument is written to. Number of voxels within the path traced.
|
||||
|
||||
float &totalSpeed, ///< This argument is written to. Sum of the speed of sound samples in the path traced.
|
||||
float &totalAttenuation,
|
||||
|
||||
float3 const &point1f, ///< Vector array describing the Voxelcoordinates of emitters or receivers.
|
||||
dim3 const &point2, ///< Vector array describing the Voxelcoordinates of Voxels.
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////
|
||||
// aktuell genutze Variante fuer Teilpfade
|
||||
///////////////////////////////////////////////////////////////////////////////////////////
|
||||
cudaArray *deviceSpeedOfSoundFieldCuArray, ///< CuArray fuer SOSFieldTextur
|
||||
|
||||
/**
|
||||
- Dreidimensionale Version des Bresenham Line Algorithmus im Float-Format
|
||||
*/
|
||||
__device__ __forceinline__ void performRayTracedSpeedAdditionTexture(
|
||||
float & voxelCount, ///< This argument is written to. Number of voxels within the path traced.
|
||||
|
||||
#ifdef SaftUseSosAttFloat1 // Nutze getrennte Texturen fuer beide Volumen (Sos+Att)
|
||||
float & totalSpeed, ///< This argument is written to. Sum of the speed of sound samples in the path traced.
|
||||
#endif
|
||||
#ifdef SaftUseSosAttFloat2 // Nutze nur eine Textur fuer beide Volumen (Sos+Att)
|
||||
float & totalSpeed, ///< This argument is written to. Sum of the speed of sound samples in the path traced.
|
||||
float & totalAttenuation,
|
||||
#endif
|
||||
|
||||
float3 const & point1f, ///< Vector array describing the Voxelcoordinates of emitters or receivers.
|
||||
dim3 const & point2, ///< Vector array describing the Voxelcoordinates of Voxels.
|
||||
|
||||
#ifndef SaftTextureForBresenhamSosPaths
|
||||
float const * deviceSpeedOfSoundField, ///< Array of speed of sound samples. Dimensions ordered by speed of indices, commencing with the fastest moving one: 1. x 2. y 3. z
|
||||
#else
|
||||
cudaArray *deviceSpeedOfSoundFieldCuArray, ///< CuArray fuer SOSFieldTextur
|
||||
#endif
|
||||
|
||||
int3 const & SOSGrid_XYZ, ///< Size of SOS-Grid in XYZ
|
||||
float3 & sosOffset,
|
||||
float & SOS_RESOLUTION,
|
||||
float & IMAGE_RESOLUTION,
|
||||
float3 regionOfInterestOffset,
|
||||
|
||||
#ifdef SaftUseConstantMemforGeometry
|
||||
int geometry ///< emitters=0 or receivers=1.
|
||||
#else
|
||||
float3 const * geometry ///< Vector array describing the positions of emitters or receivers.
|
||||
#endif
|
||||
)
|
||||
int3 const &SOSGrid_XYZ, ///< Size of SOS-Grid in XYZ
|
||||
float3 &sosOffset, float &SOS_RESOLUTION, float &IMAGE_RESOLUTION, float3 regionOfInterestOffset,
|
||||
int geometry ///< emitters=0 or receivers=1.
|
||||
)
|
||||
|
||||
{
|
||||
voxelCount = 0.0f;
|
||||
totalSpeed = 0.0f;
|
||||
totalAttenuation = 0.0f;
|
||||
|
||||
voxelCount = 0.0f;
|
||||
totalSpeed = 0.0f;
|
||||
totalAttenuation = 0.0f;
|
||||
// Voxel-Koordinaten ebenfalls in float umwandeln
|
||||
float voxel1f[3] = {point1f.x, point1f.y, point1f.z}; // Point1 liegt im float3 Format vor enthaellt schon +0.5
|
||||
|
||||
// Voxel-Koordinaten ebenfalls in float umwandeln
|
||||
float voxel1f[3]= {point1f.x,point1f.y,point1f.z}; //Point1 liegt im float3 Format vor enthaellt schon +0.5
|
||||
// float VoxelIncrement = IMAGE_RESOLUTION/SOS_RESOLUTION;
|
||||
float voxel2f[3];
|
||||
|
||||
//float VoxelIncrement = IMAGE_RESOLUTION/SOS_RESOLUTION;
|
||||
float voxel2f[3];
|
||||
voxel2f[0] = (float)point2.x; // + 0.5f;
|
||||
voxel2f[1] = (float)point2.y; // + 0.5f;
|
||||
voxel2f[2] = (float)point2.z; // + 0.5f;
|
||||
|
||||
#ifndef SOS_Version2
|
||||
voxel2f[0] = (regionOfInterestOffset.x - sosOffset.x ) / SOS_RESOLUTION + 0.5f; // Start des Bildes im SOS-Grid aus Positionsdaten bestimmen
|
||||
voxel2f[1] = (regionOfInterestOffset.y - sosOffset.y ) / SOS_RESOLUTION + 0.5f;
|
||||
voxel2f[2] = (regionOfInterestOffset.z - sosOffset.z ) / SOS_RESOLUTION + 0.5f;
|
||||
#endif
|
||||
int greatestDistanceDim = 0; // Gibt die Richtung(X,Y oder Z) an mit der grueueten Entfernung
|
||||
int slowDim1 = 0; // Gibt die Richtung(X,Y oder Z) der langsamen Richtung;
|
||||
int slowDim2 = 0; // Gibt die Richtung(X,Y oder Z) der langsamen Richtung;
|
||||
float greatestDistance_XYZ = 0.0; // Grueuete Distanz in die Richtung mit der grueueten Entfernung
|
||||
int fastDirectionSteps = 0; // Schritte die gegangen werden.
|
||||
|
||||
float m_XYZ[3];
|
||||
float pathPoint[3];
|
||||
if ((abs(voxel1f[0] - voxel2f[0]) <= abs(voxel1f[2] - voxel2f[2]))) // X<Z
|
||||
if ((abs(voxel1f[1] - voxel2f[1]) <= abs(voxel1f[0] - voxel2f[0]))) // Y<Z
|
||||
{
|
||||
greatestDistanceDim = 2; // => Z
|
||||
slowDim1 = 0;
|
||||
slowDim2 = 1;
|
||||
}
|
||||
else if ((abs(voxel1f[2] - voxel2f[2]) <= abs(voxel1f[1] - voxel2f[1]))) // Z<Y
|
||||
{
|
||||
greatestDistanceDim = 1; // => Y
|
||||
slowDim1 = 0;
|
||||
slowDim2 = 2;
|
||||
}
|
||||
else // Z=Y
|
||||
{
|
||||
greatestDistanceDim = 2; // => Z
|
||||
slowDim1 = 0;
|
||||
slowDim2 = 1;
|
||||
}
|
||||
else // X>Z
|
||||
if ((abs(voxel1f[1] - voxel2f[1]) <= abs(voxel1f[0] - voxel2f[0]))) // Y<X
|
||||
{
|
||||
greatestDistanceDim = 0; // => X
|
||||
slowDim1 = 1;
|
||||
slowDim2 = 2;
|
||||
}
|
||||
else // Y>X
|
||||
{
|
||||
greatestDistanceDim = 1; // => Y
|
||||
slowDim1 = 0;
|
||||
slowDim2 = 2;
|
||||
}
|
||||
|
||||
#ifdef debug_CudaRayTraceKernel
|
||||
if ((point2.x == OutputSOSPositionX) && (point2.y == OutputSOSPositionY) && (point2.z == OutputSOSPositionZ)) // Nur von VolumeOutput abhuengig
|
||||
// if ((point2.x == OutputPositionX) && (point2.y == OutputPositionY) && (point2.z == OutputPositionZ) && (voxel1.x == ER_PositionX) && (voxel1.y == ER_PositionY) && (voxel1.z == ER_PositionZ)) // Auch von ER-Position
|
||||
{
|
||||
// printf("-> performRayTracedSpeedAdditionTexture\n");
|
||||
// printf(" point2 (Vol Pos im SoS) [%d %d %d]\n", point2.x , point2.y , point2.z);
|
||||
// printf(" voxel1 (E/R Pos im SoS) [%13.6f %13.6f %13.6f]\n", voxel1f[0] , voxel1f[1] , voxel1f[2] ); // Voxel 1 = ER Position
|
||||
printf(" voxel1f = [%3f %3f %3f]\n", voxel1f[0],voxel1f[1],voxel1f[2]);
|
||||
printf(" voxel1f = [%3i %3i %3i]\n", (int)voxel1f[0],(int)voxel1f[1],(int)voxel1f[2]);
|
||||
// printf(" regionOfInterestOffset [%f %f %f]\n", regionOfInterestOffset.x , regionOfInterestOffset.y , regionOfInterestOffset.z ); // Voxel 2 = Volumen Position
|
||||
// printf(" sosOffset [%f %f %f]\n", sosOffset.x , sosOffset.y , sosOffset.z );
|
||||
// printf(" SOS_RESOLUTION = %12.8f\n", SOS_RESOLUTION );
|
||||
// printf(" VoxelIncrement = %12.8f\n", VoxelIncrement );
|
||||
// //printf(" -> voxel2Start (Vol Pos SoS) [%f %f %f]\n", voxel2f[0] , voxel2f[1] , voxel2f[2] ); // Voxel 2 Start -> wird noch angepasst
|
||||
}
|
||||
// ist Steigung negativ? & Bestimmen der fastDirectionSteps vom diskretisierten Startpunkt zum Endpunkt bzw. Voxel
|
||||
if (voxel1f[greatestDistanceDim] >= voxel2f[greatestDistanceDim]) // voxel2f > voxel1f Endpukt > Startpkt -> Steigung positiv
|
||||
{
|
||||
fastDirectionSteps = floor(voxel1f[greatestDistanceDim] + 0.5f) - floor(voxel2f[greatestDistanceDim] + 0.5f) + 1;
|
||||
pathPoint[greatestDistanceDim] = voxel2f[greatestDistanceDim];
|
||||
pathPoint[slowDim1] = voxel2f[slowDim1];
|
||||
pathPoint[slowDim2] = voxel2f[slowDim2];
|
||||
}
|
||||
else // voxel2f < voxel1f End point < Start point -> Slope negative
|
||||
{
|
||||
fastDirectionSteps = floor(voxel2f[greatestDistanceDim] + 0.5f) - floor(voxel1f[greatestDistanceDim] + 0.5f) + 1;
|
||||
pathPoint[greatestDistanceDim] = voxel1f[greatestDistanceDim];
|
||||
pathPoint[slowDim1] = voxel1f[slowDim1];
|
||||
pathPoint[slowDim2] = voxel1f[slowDim2];
|
||||
}
|
||||
|
||||
#endif
|
||||
// Steigung bestimmen
|
||||
// Auf Gesamtentfernung bezogene Steigung in jede Richtung (Float)
|
||||
greatestDistance_XYZ = ((float)voxel2f[greatestDistanceDim] - (float)voxel1f[greatestDistanceDim]); // Groessten Abstand in XY oder Z-Richtung ermitteln (Float)
|
||||
m_XYZ[greatestDistanceDim] = 1.0f; // Wegen Rundungsfehler bei der Division, die auftreten koennen.
|
||||
// m_XYZ[greatestDistanceDim] = (voxel2f[greatestDistanceDim] - voxel1f[greatestDistanceDim]) / greatestDistance_XYZ;
|
||||
m_XYZ[slowDim1] = (voxel2f[slowDim1] - voxel1f[slowDim1]) / greatestDistance_XYZ;
|
||||
m_XYZ[slowDim2] = (voxel2f[slowDim2] - voxel1f[slowDim2]) / greatestDistance_XYZ;
|
||||
|
||||
#ifndef SOS_Version2
|
||||
// Herausfinden wo genau der Voxel anfaengt/die Koordinaten des Voxels
|
||||
voxel2f[0] = (float)point2.x + VoxelIncrement - fmod(((float)point2.x-voxel2f[0]), VoxelIncrement);
|
||||
voxel2f[1] = (float)point2.y + VoxelIncrement - fmod(((float)point2.y-voxel2f[1]), VoxelIncrement);
|
||||
voxel2f[2] = (float)point2.z + VoxelIncrement - fmod(((float)point2.z-voxel2f[2]), VoxelIncrement);
|
||||
#else
|
||||
//Vom Mittelpunkt ausgehen
|
||||
voxel2f[0] = (float)point2.x;// + 0.5f;
|
||||
voxel2f[1] = (float)point2.y;// + 0.5f;
|
||||
voxel2f[2] = (float)point2.z;// + 0.5f;
|
||||
#endif
|
||||
int j = 0;
|
||||
for (j = fastDirectionSteps; j > 0; j--) //(Alle Punkte innerhalb der Schleife berechnen)
|
||||
{
|
||||
processRayTracedVoxelTextureSosAtt(pathPoint, voxelCount, totalSpeed, totalAttenuation, deviceSpeedOfSoundFieldCuArray, SOSGrid_XYZ);
|
||||
pathPoint[greatestDistanceDim] = pathPoint[greatestDistanceDim] + m_XYZ[greatestDistanceDim];
|
||||
pathPoint[slowDim1] = pathPoint[slowDim1] + m_XYZ[slowDim1];
|
||||
pathPoint[slowDim2] = pathPoint[slowDim2] + m_XYZ[slowDim2];
|
||||
}
|
||||
|
||||
|
||||
#ifdef debug_CudaRayTraceKernel
|
||||
|
||||
if ((point2.x == OutputSOSPositionX) && (point2.y == OutputSOSPositionY) && (point2.z == OutputSOSPositionZ)) // Nur von VolumeOutput abhuengig
|
||||
// if ((point2.x == OutputPositionX) && (point2.y == OutputPositionY) && (point2.z == OutputPositionZ) && (voxel1.x == ER_PositionX) && (voxel1.y == ER_PositionY) && (voxel1.z == ER_PositionZ)) // Auch von ER-Position
|
||||
{
|
||||
printf(" -> voxel2Angepasst (Vol Pos SoS) [%13.6f %13.6f %13.6f]\n", voxel2f[0] , voxel2f[1] , voxel2f[2] ); // Voxel 2 nach anpassung
|
||||
//printf("-> performRayTracedSpeedAdditionTexture\n");
|
||||
// printf(" voxel1 (E/R Pos im SoS) [%f %f %f]\n", voxel1f[0] , voxel1f[1] , voxel1f[2] ); // Voxel 1 = ER Position
|
||||
// printf(" voxel2 (Vol Pos im SoS) [%f %f %f]\n", voxel2f[0] , voxel2f[1] , voxel2f[2] ); // Voxel 2 = Volumen Position
|
||||
// printf(" regionOfInterestOffset [%f %f %f]\n", regionOfInterestOffset.x , regionOfInterestOffset.y , regionOfInterestOffset.z ); // Voxel 2 = Volumen Position
|
||||
// printf(" sosOffset [%f %f %f]\n", sosOffset.x , sosOffset.y , sosOffset.z );
|
||||
// printf(" SOS_RESOLUTION = %f\n", SOS_RESOLUTION );
|
||||
// printf(" VoxelIncrement = %f\n", VoxelIncrement );
|
||||
// printf(" point2 (Vol Pos im SoS) [%f %f %f]\n", point2.x , point2.y , point2.z);
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
int greatestDistanceDim = 0; // Gibt die Richtung(X,Y oder Z) an mit der grueueten Entfernung
|
||||
int slowDim1 = 0; // Gibt die Richtung(X,Y oder Z) der langsamen Richtung;
|
||||
int slowDim2 = 0; // Gibt die Richtung(X,Y oder Z) der langsamen Richtung;
|
||||
float greatestDistance_XYZ = 0.0; // Grueuete Distanz in die Richtung mit der grueueten Entfernung
|
||||
#ifdef debug_CudaRayTraceKernel
|
||||
bool m_positiv = 0; // Steigung positiv?
|
||||
#endif
|
||||
int fastDirectionSteps = 0; // Schritte die gegangen werden.
|
||||
|
||||
#if defined(debug_CudaRayTraceKernel) || (! defined(SOS_Version2))
|
||||
float distance_XYZ[3];
|
||||
float mFastDirectionSteps_XYZ[3];
|
||||
float mDistance_XYZ[3];
|
||||
#endif
|
||||
float m_XYZ[3];
|
||||
float pathPoint[3];
|
||||
#ifdef SOS_Version3
|
||||
float endPoint[3];
|
||||
#endif
|
||||
|
||||
// Welcher Abstand in den Dimensionen XYZ ist der groesste? Um Richtung zu bestimmen
|
||||
#ifndef SOS_Version2
|
||||
if ( (abs(voxel1f[0]-voxel2f[0]) <= abs(voxel1f[2]-voxel2f[2]))) // X<Z
|
||||
if ( (abs(voxel1f[1]-voxel2f[1]) <= abs(voxel1f[0]-voxel2f[0]))) // Y<Z
|
||||
greatestDistanceDim = 2; // => Z
|
||||
else if ( (abs(voxel1f[2]-voxel2f[2]) <= abs(voxel1f[1]-voxel2f[1]))) // Z<Y
|
||||
greatestDistanceDim = 1; // => Y
|
||||
else // Z=Y
|
||||
greatestDistanceDim = 2; // => Z
|
||||
else // X>Z
|
||||
if ( (abs(voxel1f[1]-voxel2f[1]) <= abs(voxel1f[0]-voxel2f[0]))) // Y<X
|
||||
greatestDistanceDim = 0; // => X
|
||||
else // Y>X
|
||||
greatestDistanceDim = 1; // => Y
|
||||
#else
|
||||
if ( (abs(voxel1f[0]-voxel2f[0]) <= abs(voxel1f[2]-voxel2f[2]))) // X<Z
|
||||
if ( (abs(voxel1f[1]-voxel2f[1]) <= abs(voxel1f[0]-voxel2f[0]))) // Y<Z
|
||||
{
|
||||
greatestDistanceDim = 2; // => Z
|
||||
slowDim1 = 0;
|
||||
slowDim2 = 1;
|
||||
}
|
||||
else if ( (abs(voxel1f[2]-voxel2f[2]) <= abs(voxel1f[1]-voxel2f[1]))) // Z<Y
|
||||
{
|
||||
greatestDistanceDim = 1; // => Y
|
||||
slowDim1 = 0;
|
||||
slowDim2 = 2;
|
||||
}
|
||||
else // Z=Y
|
||||
{
|
||||
greatestDistanceDim = 2; // => Z
|
||||
slowDim1 = 0;
|
||||
slowDim2 = 1;
|
||||
}
|
||||
else // X>Z
|
||||
if ( (abs(voxel1f[1]-voxel2f[1]) <= abs(voxel1f[0]-voxel2f[0]))) // Y<X
|
||||
{
|
||||
greatestDistanceDim = 0; // => X
|
||||
slowDim1 = 1;
|
||||
slowDim2 = 2;
|
||||
}
|
||||
else // Y>X
|
||||
{
|
||||
greatestDistanceDim = 1; // => Y
|
||||
slowDim1 = 0;
|
||||
slowDim2 = 2;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifndef SOS_Version2
|
||||
#ifdef debug_CudaRayTraceKernel
|
||||
greatestDistance_XYZ=((float)voxel2f[greatestDistanceDim] - (float)voxel1f[greatestDistanceDim]); // Groessten Abstand in XY oder Z-Richtung ermitteln (Float)
|
||||
|
||||
distance_XYZ[0] = (voxel2f[0] - voxel1f[0]); // Abstand in X-Richtung
|
||||
distance_XYZ[1] = (voxel2f[1] - voxel1f[1]); // Abstand in Y-Richtung
|
||||
distance_XYZ[2] = (voxel2f[2] - voxel1f[2]); // Abstand in Z-Richtung
|
||||
#endif
|
||||
|
||||
pathPoint[0] = voxel1f[0]; // Voxel1f bzw. Emitter als Startpunkt nutzen
|
||||
pathPoint[1] = voxel1f[1];
|
||||
pathPoint[2] = voxel1f[2];
|
||||
|
||||
#ifdef debug_CudaRayTraceKernel
|
||||
|
||||
if ((point2.x == OutputSOSPositionX) && (point2.y == OutputSOSPositionY) && (point2.z == OutputSOSPositionZ)) // Nur von VolumeOutput abhuengig
|
||||
// if ((point2.x == OutputPositionX) && (point2.y == OutputPositionY) && (point2.z == OutputPositionZ) && (voxel1.x == ER_PositionX) && (voxel1.y == ER_PositionY) && (voxel1.z == ER_PositionZ)) // Auch von ER-Position
|
||||
{
|
||||
// Ausgabe der berechneten Werte
|
||||
printf(" \n");
|
||||
printf(" voxel1f = [%3f %3f %3f]\n", voxel1f[0],voxel1f[1],voxel1f[2]);
|
||||
printf(" voxel2f = [%3f %3f %3f]\n", voxel2f[0],voxel2f[1],voxel2f[2]);
|
||||
printf(" greatestDistanceDim(X0 Y1 Z2) = %i\n", greatestDistanceDim);
|
||||
printf(" greatestDistance_XYZ = %f\n", greatestDistance_XYZ);
|
||||
printf(" distance_XYZ = [%3f %3f %3f]\n", distance_XYZ[0],distance_XYZ[1],distance_XYZ[2]);
|
||||
//printf(" m_positiv = %i\n", m_positiv);
|
||||
//printf(" fastDirectionSteps = %i\n", fastDirectionSteps); // Fehler wenn negativ!
|
||||
printf(" pathPoint = [%3f %3f %3f]\n", pathPoint[0],pathPoint[1],pathPoint[2]);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef debug_CudaRayTraceKernel
|
||||
// Steigung bestimmen
|
||||
// Auf Gesamtentfernung bezogene Steigung in jede Richtung (Float)
|
||||
mDistance_XYZ[0] = distance_XYZ[0] / greatestDistance_XYZ;
|
||||
mDistance_XYZ[1] = distance_XYZ[1] / greatestDistance_XYZ;
|
||||
mDistance_XYZ[2] = distance_XYZ[2] / greatestDistance_XYZ;
|
||||
#endif
|
||||
|
||||
// ist Steigung negativ? & Bestimmen der fastDirectionSteps vom diskretisierten Startpunkt zum Endpunkt bzw. Voxel
|
||||
if (voxel2f[greatestDistanceDim] >= pathPoint[greatestDistanceDim]) // voxel2f > voxel1f Endpukt > Startpkt -> Steigung positiv
|
||||
{
|
||||
#ifdef debug_CudaRayTraceKernel
|
||||
m_positiv = 1;
|
||||
#endif
|
||||
fastDirectionSteps = floor(voxel2f[greatestDistanceDim] ) - floor(pathPoint[greatestDistanceDim] ) + 1;
|
||||
}
|
||||
else // voxel2f < voxel1f Endpukt < Startpkt -> Steigung negativ
|
||||
{
|
||||
#ifdef debug_CudaRayTraceKernel
|
||||
m_positiv = 0;
|
||||
#endif
|
||||
fastDirectionSteps = floor(pathPoint[greatestDistanceDim] ) - floor(voxel2f[greatestDistanceDim] ) + 1;
|
||||
}
|
||||
|
||||
// Steigung bezogen auf die Anzahl der noetigen ganzen Schritte
|
||||
mFastDirectionSteps_XYZ[0] = distance_XYZ[0]/(float)(fastDirectionSteps-1);
|
||||
mFastDirectionSteps_XYZ[1] = distance_XYZ[1]/(float)(fastDirectionSteps-1);
|
||||
mFastDirectionSteps_XYZ[2] = distance_XYZ[2]/(float)(fastDirectionSteps-1);
|
||||
|
||||
m_XYZ[0] = mFastDirectionSteps_XYZ[0] ; // Steigung bezogen auf die Anzahl der nuetigen ganzen Schritte in jede Richtung nutzen
|
||||
m_XYZ[1] = mFastDirectionSteps_XYZ[1] ;
|
||||
m_XYZ[2] = mFastDirectionSteps_XYZ[2] ;
|
||||
#else
|
||||
|
||||
// ist Steigung negativ? & Bestimmen der fastDirectionSteps vom diskretisierten Startpunkt zum Endpunkt bzw. Voxel
|
||||
if (voxel1f[greatestDistanceDim] >= voxel2f[greatestDistanceDim]) // voxel2f > voxel1f Endpukt > Startpkt -> Steigung positiv
|
||||
{
|
||||
fastDirectionSteps = floor(voxel1f[greatestDistanceDim] + 0.5f ) - floor(voxel2f[greatestDistanceDim] + 0.5f ) + 1;
|
||||
|
||||
pathPoint[greatestDistanceDim] = voxel2f[greatestDistanceDim];
|
||||
pathPoint[slowDim1] = voxel2f[slowDim1];
|
||||
pathPoint[slowDim2] = voxel2f[slowDim2];
|
||||
|
||||
#ifdef SOS_Version3
|
||||
endPoint[0] = voxel2f[0];
|
||||
endPoint[1] = voxel1f[1];
|
||||
endPoint[2] = voxel1f[2];
|
||||
#endif
|
||||
}
|
||||
else // voxel2f < voxel1f Endpukt < Startpkt -> Steigung negativ
|
||||
{
|
||||
fastDirectionSteps = floor(voxel2f[greatestDistanceDim] + 0.5f ) - floor(voxel1f[greatestDistanceDim] + 0.5f ) + 1;
|
||||
|
||||
pathPoint[greatestDistanceDim] = voxel1f[greatestDistanceDim];
|
||||
pathPoint[slowDim1] = voxel1f[slowDim1];
|
||||
pathPoint[slowDim2] = voxel1f[slowDim2];
|
||||
|
||||
#ifdef SOS_Version3
|
||||
endPoint[0] = voxel2f[0];
|
||||
endPoint[1] = voxel2f[1];
|
||||
endPoint[2] = voxel2f[2];
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
#ifdef debug_CudaRayTraceKernel
|
||||
greatestDistance_XYZ=((float)voxel2f[greatestDistanceDim] - (float)voxel1f[greatestDistanceDim]); // Groessten Abstand in XY oder Z-Richtung ermitteln (Float)
|
||||
|
||||
distance_XYZ[0] = (voxel2f[0] - voxel1f[0]); // Abstand in X-Richtung
|
||||
distance_XYZ[1] = (voxel2f[1] - voxel1f[1]); // Abstand in Y-Richtung
|
||||
distance_XYZ[2] = (voxel2f[2] - voxel1f[2]); // Abstand in Z-Richtung
|
||||
|
||||
if ((point2.x == OutputSOSPositionX) && (point2.y == OutputSOSPositionY) && (point2.z == OutputSOSPositionZ)) // Nur von VolumeOutput abhuengig
|
||||
// if ((point2.x == OutputPositionX) && (point2.y == OutputPositionY) && (point2.z == OutputPositionZ) && (voxel1.x == ER_PositionX) && (voxel1.y == ER_PositionY) && (voxel1.z == ER_PositionZ)) // Auch von ER-Position
|
||||
{
|
||||
// Ausgabe der berechneten Werte
|
||||
printf(" \n");
|
||||
printf(" voxel1f = [%3f %3f %3f]\n", voxel1f[0],voxel1f[1],voxel1f[2]);
|
||||
printf(" voxel2f = [%3f %3f %3f]\n", voxel2f[0],voxel2f[1],voxel2f[2]);
|
||||
printf(" greatestDistanceDim(X0 Y1 Z2) = %i\n", greatestDistanceDim);
|
||||
printf(" greatestDistance_XYZ = %f\n", greatestDistance_XYZ);
|
||||
printf(" distance_XYZ = [%3f %3f %3f]\n", distance_XYZ[0],distance_XYZ[1],distance_XYZ[2]);
|
||||
//printf(" m_positiv = %i\n", m_positiv);
|
||||
//printf(" fastDirectionSteps = %i\n", fastDirectionSteps); // Fehler wenn negativ!
|
||||
printf(" pathPoint = [%3f %3f %3f]\n", pathPoint[0],pathPoint[1],pathPoint[2]);
|
||||
}
|
||||
|
||||
// Steigung bestimmen
|
||||
// Auf Gesamtentfernung bezogene Steigung in jede Richtung (Float)
|
||||
mDistance_XYZ[0] = distance_XYZ[0] / greatestDistance_XYZ;
|
||||
mDistance_XYZ[1] = distance_XYZ[1] / greatestDistance_XYZ;
|
||||
mDistance_XYZ[2] = distance_XYZ[2] / greatestDistance_XYZ;
|
||||
#endif
|
||||
|
||||
// Steigung bestimmen
|
||||
// Auf Gesamtentfernung bezogene Steigung in jede Richtung (Float)
|
||||
greatestDistance_XYZ=((float)voxel2f[greatestDistanceDim] - (float)voxel1f[greatestDistanceDim]); // Groessten Abstand in XY oder Z-Richtung ermitteln (Float)
|
||||
m_XYZ[greatestDistanceDim] = 1.0f; // Wegen Rundungsfehler bei der Division, die auftreten koennen.
|
||||
//m_XYZ[greatestDistanceDim] = (voxel2f[greatestDistanceDim] - voxel1f[greatestDistanceDim]) / greatestDistance_XYZ;
|
||||
m_XYZ[slowDim1] = (voxel2f[slowDim1] - voxel1f[slowDim1]) / greatestDistance_XYZ;
|
||||
m_XYZ[slowDim2] = (voxel2f[slowDim2] - voxel1f[slowDim2]) / greatestDistance_XYZ;
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef debug_CudaRayTraceKernel
|
||||
|
||||
if ((point2.x == OutputSOSPositionX) && (point2.y == OutputSOSPositionY) && (point2.z == OutputSOSPositionZ)) // Nur von VolumeOutput abhuengig
|
||||
// if ((point2.x == OutputPositionX) && (point2.y == OutputPositionY) && (point2.z == OutputPositionZ) && (voxel1.x == ER_PositionX) && (voxel1.y == ER_PositionY) && (voxel1.z == ER_PositionZ)) // Auch von ER-Position
|
||||
{
|
||||
|
||||
// Ausgabe der berechneten Werte
|
||||
//printf(" greatestDistance_XYZ = %f\n", greatestDistance_XYZ);
|
||||
//printf(" distance_XYZ = [%3f %3f %3f]\n", distance_XYZ[0],distance_XYZ[1],distance_XYZ[2]);
|
||||
//printf(" m_positiv = %i\n", m_positiv);
|
||||
printf(" fastDirectionSteps -1 = %i\n", (fastDirectionSteps-1));
|
||||
printf(" pathPoint = [%3f %3f %3f]\n", pathPoint[0],pathPoint[1],pathPoint[2]);
|
||||
printf(" mFastDirectionSteps_XYZ = [%3f %3f %3f]\n", mFastDirectionSteps_XYZ[0],mFastDirectionSteps_XYZ[1],mFastDirectionSteps_XYZ[2]);
|
||||
printf(" m_XYZ = [%3f %3f %3f]\n", m_XYZ[0],m_XYZ[1],m_XYZ[2]);
|
||||
|
||||
// Punkte auf Pfad ablaufen
|
||||
// printf(" \n");
|
||||
// printf(" Float Bresenham mit Anfangspkt- und Endpktkorrektur Texture\n");
|
||||
// printf(" =========================================================================================================\n");
|
||||
// printf(" Step | currentVoxel | X | Y | Z || SoSvalue | delta_SoSvalue\n");
|
||||
// printf(" ---------------------------------------------------------------------------------------------------------\n");
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
#ifndef SOS_Version2
|
||||
// Startpunkt (voxel1f) festlegen
|
||||
pathPoint[0] = voxel1f[0];
|
||||
pathPoint[1] = voxel1f[1];
|
||||
pathPoint[2] = voxel1f[2];
|
||||
#endif
|
||||
|
||||
// Innere schleife fuer den Bresenham ohne Endpunkt
|
||||
#ifndef SOS_Version2
|
||||
int j = 1;
|
||||
for (j=1; j<=(fastDirectionSteps-1); j++) //(Start und Endpunkt werden ausserhalb der Schleife festgelegt)
|
||||
#else
|
||||
#ifndef SOS_Version3
|
||||
int j=0;
|
||||
for (j=fastDirectionSteps; j>0; j--) //(Alle Punkte innerhalb der Schleife berechnen)
|
||||
#else
|
||||
int j = 1;
|
||||
for (j=1; j<=(fastDirectionSteps-1); j++) //(Endpunkt wird ausserhalb der Schleife festgelegt)
|
||||
#endif
|
||||
#endif
|
||||
{
|
||||
//processRayTracedVoxelTexture(pathPoint, voxelCount, totalSpeed, speedOfSoundField, deviceSpeedOfSoundFieldCuArray, SOSGrid_XYZ);
|
||||
|
||||
#ifndef SaftTextureForBresenhamSosPaths
|
||||
// AuskommentiertprocessRayTracedVoxelFloat(pathPoint, voxelCount, totalSpeed, deviceSpeedOfSoundField, SOSGrid_XYZ);
|
||||
//processRayTracedVoxelTexture(pathPoint, voxelCount, totalSpeed, speedOfSoundField, SOSGrid_XYZ);
|
||||
#else
|
||||
//processRayTracedVoxelTexture(pathPoint, voxelCount, totalSpeed, deviceSpeedOfSoundFieldCuArray, SOSGrid_XYZ);
|
||||
|
||||
#ifdef SaftUseSosAttFloat1 // Nutze getrennte Texturen fuer beide Volumen (Sos+Att)
|
||||
processRayTracedVoxelTexture(pathPoint, voxelCount, totalSpeed, deviceSpeedOfSoundFieldCuArray, SOSGrid_XYZ);
|
||||
#endif
|
||||
|
||||
#ifdef SaftUseSosAttFloat2 // Nutze nur eine Textur fuer beide Volumen (Sos+Att)
|
||||
processRayTracedVoxelTextureSosAtt(pathPoint, voxelCount, totalSpeed, totalAttenuation, deviceSpeedOfSoundFieldCuArray, SOSGrid_XYZ);
|
||||
|
||||
#ifdef debug_CudaRayTraceKernel
|
||||
if ((point2.x == OutputSOSPositionX) && (point2.y == OutputSOSPositionY) && (point2.z == OutputSOSPositionZ)) // Nur von VolumeOutput abhaengig
|
||||
//if ((pathPoint[0] == DebugSosVoxelX) && (pathPoint[1] == DebugSosVoxelY) && (pathPoint[2] == DebugSosVoxelZ) && ((int)voxel1f[0] == ER_PositionX) && ((int)voxel1f[1] == ER_PositionY) && ((int)voxel1f[2] == ER_PositionZ)) // Auch von ER-Position
|
||||
//if ((pathPoint[0] == DebugSosVoxelX) && (pathPoint[1] == DebugSosVoxelY) && (pathPoint[2] == DebugSosVoxelZ)) // Nur von SOS_XYZ-Position
|
||||
//if ( ((int)voxel1f[0] == ER_PositionX) && ((int)voxel1f[1] == ER_PositionY) && ((int)voxel1f[2] == ER_PositionZ) ) // Nur von ER-Position
|
||||
{
|
||||
printf("pSOSATT currentVoxel [%f %f %f] - j(%i)\n", pathPoint[0], pathPoint[1], pathPoint[2], j);
|
||||
//printf(" speedOfSoundSample = %2.10f\n", speedOfSoundSample);
|
||||
|
||||
float2 SosAttValue = tex3D( texRefSosAttField, pathPoint[0] + 0.5f, pathPoint[1] + 0.5f, pathPoint[2] + 0.5f);
|
||||
//totalSpeed += SosAttValue.x;
|
||||
//totalAttenuation += SosAttValue.y;
|
||||
|
||||
printf(" totalSpeed = %2.10f\n", SosAttValue.x, totalSpeed);
|
||||
printf(" totalAttenuation + %2.10f = %2.10f\n", SosAttValue.y, totalAttenuation);
|
||||
printf(" voxelCount = %d\n", voxelCount);
|
||||
}
|
||||
#endif
|
||||
|
||||
// SosAttValue = tex3D( texRefSosAttField, currentVoxelFloat[0] + 0.5f, currentVoxelFloat[1] + 0.5f, currentVoxelFloat[2] + 0.5f);
|
||||
// totalSpeed += SosAttValue.x;
|
||||
// totalAttenuation += SosAttValue.y;
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef debug_CudaRayTraceKernel
|
||||
if ((point2.x == OutputSOSPositionX) && (point2.y == OutputSOSPositionY) && (point2.z == OutputSOSPositionZ)) // Nur von VolumeOutput abhaengig
|
||||
// if ((point2.x == OutputPositionX) && (point2.y == OutputPositionY) && (point2.z == OutputPositionZ) && (voxel1.x == ER_PositionX) && (voxel1.y == ER_PositionY) && (voxel1.z == ER_PositionZ)) // Auch von ER-Position
|
||||
{
|
||||
//printf(" Step %3i| [%3i %3i %3i] | % 11.6f | % 11.6f | % 11.6f || % 13.6f | % 13.6f \n", j, (int)floor(pathPoint[0]), (int)floor(pathPoint[1]), (int)floor(pathPoint[2]), pathPoint[0], pathPoint[1], pathPoint[2], totalSpeed, speedOfSoundField[((int)floor(pathPoint[2]) * SOSGrid_XYZ.y + (int)floor(pathPoint[1])) * SOSGrid_XYZ.x + (int)floor(pathPoint[0])]);
|
||||
printf(" Step %3i| [%3i %3i %3i] | % 17.12f % 17.12f % 17.12f || % 13.6f | % 13.6f \n", j, (int)floor(pathPoint[0]+0.5f), (int)floor(pathPoint[1]+0.5f), (int)floor(pathPoint[2]+0.5f), pathPoint[0], pathPoint[1], pathPoint[2], voxelCount/totalSpeed, (float)tex3D( texRefSpeedOfSoundField, pathPoint[0] + 0.5f, pathPoint[1] + 0.5f, pathPoint[2] + 0.5f ) );
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifndef SOS_Version2
|
||||
pathPoint[0] = pathPoint[0] + m_XYZ[0];
|
||||
pathPoint[1] = pathPoint[1] + m_XYZ[1];
|
||||
pathPoint[2] = pathPoint[2] + m_XYZ[2];
|
||||
#else
|
||||
pathPoint[greatestDistanceDim] = pathPoint[greatestDistanceDim] + m_XYZ[greatestDistanceDim];
|
||||
pathPoint[slowDim1] = pathPoint[slowDim1] + m_XYZ[slowDim1];
|
||||
pathPoint[slowDim2] = pathPoint[slowDim2] + m_XYZ[slowDim2];
|
||||
#endif
|
||||
}
|
||||
|
||||
#if (!defined (SOS_Version2)) || defined(SOS_Version3)
|
||||
// Endpunkt
|
||||
|
||||
#ifndef SaftTextureForBresenhamSosPaths
|
||||
// AuskommentiertprocessRayTracedVoxelFloat(endPoint, voxelCount, totalSpeed, deviceSpeedOfSoundField, SOSGrid_XYZ);
|
||||
//processRayTracedVoxelTexture(pathPoint, voxelCount, totalSpeed, speedOfSoundField, SOSGrid_XYZ);
|
||||
#else
|
||||
processRayTracedVoxelTexture(endPoint, voxelCount, totalSpeed, deviceSpeedOfSoundFieldCuArray, SOSGrid_XYZ);
|
||||
// TODO: Wieder einen ganzen Schritt entfernen und nur Distanz des Voxels dazufuegen --> dazu erst voxelCount auf float umstellen
|
||||
voxelCount--;
|
||||
pathPoint[greatestDistanceDim] = pathPoint[greatestDistanceDim] - m_XYZ[greatestDistanceDim];
|
||||
pathPoint[slowDim1] = pathPoint[slowDim1] - m_XYZ[slowDim1];
|
||||
pathPoint[slowDim2] = pathPoint[slowDim2] - m_XYZ[slowDim2];
|
||||
voxelCount += sqrtf( SQR(pathPoint[1]-endPoint[1]) + SQR(pathPoint[2]-endPoint[2]) + SQR(pathPoint[3]-endPoint[3]) );
|
||||
#endif
|
||||
|
||||
#ifdef debug_CudaRayTraceKernel
|
||||
if ((point2.x == OutputSOSPositionX) && (point2.y == OutputSOSPositionY) && (point2.z == OutputSOSPositionZ)) // Nur von VolumeOutput abhuengig
|
||||
// if ((point2.x == OutputPositionX) && (point2.y == OutputPositionY) && (point2.z == OutputPositionZ) && (voxel1.x == ER_PositionX) && (voxel1.y == ER_PositionY) && (voxel1.z == ER_PositionZ)) // Auch von ER-Position
|
||||
{
|
||||
//printf(" Step %3i| [%3i %3i %3i] | % 11.6f | % 11.6f | % 11.6f || % 13.6f | % 13.6f \n", j, (int)floor(voxel2f[0]), (int)floor(voxel2f[1]), (int)floor(voxel2f[2]), voxel2f[0], voxel2f[1], voxel2f[2], totalSpeed, speedOfSoundField[((int)floor(voxel2f[2]) * SOSGrid_XYZ.y + (int)floor(voxel2f[1])) * SOSGrid_XYZ.x + (int)floor(voxel2f[0])]);
|
||||
printf("endPkt Step %3i| [%3i %3i %3i] | % 17.12f % 17.12f % 17.12f || % 13.6f | % 13.6f \n", j, (int)floor(endPoint[0]+0.5f), (int)floor(endPoint[1]+0.5f), (int)floor(endPoint[2]+0.5f), endPoint[0], endPoint[1], endPoint[2], voxelCount/totalSpeed, (float)tex3D( texRefSpeedOfSoundField, endPoint[0] + 0.5f, endPoint[1] + 0.5f, endPoint[2] + 0.5f ) );
|
||||
// printf("<- performRayTracedSpeedAdditionTexture\n");
|
||||
}
|
||||
#endif
|
||||
#else
|
||||
// keinen Endpunkt extra dazurechnen.
|
||||
#endif
|
||||
|
||||
// Anzahl der Besuchten Voxel liegt von Anfang an fest, daher nicht noetig einzeln aufzuaddieren!
|
||||
voxelCount=fastDirectionSteps;
|
||||
// Anzahl der Besuchten Voxel liegt von Anfang an fest, daher nicht noetig einzeln aufzuaddieren!
|
||||
voxelCount = fastDirectionSteps;
|
||||
}
|
||||
|
||||
561
SAFT_TOFI/src/kernel/saftKernel.cu
Normal file
561
SAFT_TOFI/src/kernel/saftKernel.cu
Normal file
@@ -0,0 +1,561 @@
|
||||
#include "saftKernel.cuh"
|
||||
|
||||
texture<float, cudaTextureType2D, cudaReadModeElementType> texRefAscans; // Schritt 1. Textur anlegen
|
||||
|
||||
// Texture for loading AscanIndexes without ATT-Correction (float)
|
||||
texture<float, cudaTextureType3D, cudaReadModeElementType> texTableAscanIndexFloat1_0;
|
||||
texture<float, cudaTextureType3D, cudaReadModeElementType> texTableAscanIndexFloat1_1;
|
||||
texture<float, cudaTextureType3D, cudaReadModeElementType> texTableAscanIndexFloat1_2;
|
||||
texture<float, cudaTextureType3D, cudaReadModeElementType> texTableAscanIndexFloat1_3;
|
||||
|
||||
// Texture for loading AscanIndexes with ATT-Correction (float2)
|
||||
texture<float2, cudaTextureType3D, cudaReadModeElementType> texTableAscanIndexFloat2_0;
|
||||
texture<float2, cudaTextureType3D, cudaReadModeElementType> texTableAscanIndexFloat2_1;
|
||||
texture<float2, cudaTextureType3D, cudaReadModeElementType> texTableAscanIndexFloat2_2;
|
||||
texture<float2, cudaTextureType3D, cudaReadModeElementType> texTableAscanIndexFloat2_3;
|
||||
|
||||
__global__ void saftKernelAscanIndex_SOS_ATT( // Version SoSATT-Korrektur
|
||||
|
||||
int const ascanIndexBatchOffset, ///< Offset of AScans if more then one AScan-Batch is used for Reconstruction
|
||||
float const aScanWindowSize, ///< Amount of AScans used here for Reconstruction
|
||||
int const maxAscanIndexArraysInTexture, ///< Maximum amount in A-Scans in one Texture
|
||||
int const TableAscanIndexAllocationCount, ///< Used amount of Textures (currently limited to maximum 4 Textures)
|
||||
|
||||
int3 const IMAGE_SIZE_XYZ, ///< XYZ des Outputvolumens
|
||||
float3 const SosVoxelStartPosition, ///< Offset of SOS-Grids
|
||||
float const IMAGE_RESOLUTION, ///< Resolution of Output-Volume
|
||||
float const VoxelIncrement, ///< Step-Width of one Voxel in SOS-Korrdinates
|
||||
|
||||
int const blockIndexOffset, ///<
|
||||
int const speedOfSoundZLayer, ///<
|
||||
dim3 const gridDimensions, ///<
|
||||
dim3 const blockDimensions, ///<
|
||||
float const debugMode, ///<
|
||||
float const debugModeParameter, ///<
|
||||
int const *deviceSAFT_VARIANT, ///<
|
||||
double *output
|
||||
|
||||
)
|
||||
{
|
||||
/////////////////////////////////////////////////////////////////////////////////////////
|
||||
// 1. Determine which Voxel is to be calculated in this Kernel
|
||||
/////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
dim3 blockVoxel( // Which Block(xyz) corresponds to this Thread?
|
||||
(threadIdx.x / blockDimensions.y) % blockDimensions.x, threadIdx.x % blockDimensions.y, threadIdx.x / (blockDimensions.x * blockDimensions.y));
|
||||
|
||||
// Index of Block for this Thread
|
||||
unsigned long long int blockIndex = ((blockIndexOffset + blockIdx.z) * gridDim.y + blockIdx.y) * gridDim.x + blockIdx.x;
|
||||
|
||||
dim3 regionOfInterestVoxel( // Which Voxel corresponds to this Thread? Start with 0.
|
||||
((blockIndex / gridDimensions.y) % gridDimensions.x) * blockDimensions.x + blockVoxel.x, // Medium speed index
|
||||
(blockIndex % gridDimensions.y) * blockDimensions.y + blockVoxel.y, // Fastest index
|
||||
(blockIndex / (gridDimensions.x * gridDimensions.y)) * blockDimensions.z + blockVoxel.z // Slowest index
|
||||
);
|
||||
|
||||
// If Voxel is outside the reconstructed Image leave Kernel
|
||||
if ((regionOfInterestVoxel.x >= IMAGE_SIZE_XYZ.x) || (regionOfInterestVoxel.y >= IMAGE_SIZE_XYZ.y) || (regionOfInterestVoxel.z >= IMAGE_SIZE_XYZ.z))
|
||||
return;
|
||||
/////////////////////////////////////////////////////////////////////////////////////////
|
||||
// 2. Determine
|
||||
/////////////////////////////////////////////////////////////////////////////////////////
|
||||
// - SOSvoxel in which voxel is located
|
||||
// - Index of OutputVolume, and tables of Emitter, Receiver coordinates and SOSpaths
|
||||
// - Variable declarations
|
||||
/////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// Memory-Index for this Thread for Output-Array of this Voxel
|
||||
unsigned long long int memoryIndex =
|
||||
(((unsigned long long int)IMAGE_SIZE_XYZ.y * ((unsigned long long int)regionOfInterestVoxel.z - (unsigned long long int)blockIndexOffset) + (unsigned long long int)regionOfInterestVoxel.y) *
|
||||
(unsigned long long int)IMAGE_SIZE_XYZ.x +
|
||||
(unsigned long long int)regionOfInterestVoxel.x);
|
||||
float3 SosVoxelf; // SoS-Voxel Koordinates in float
|
||||
// Determine SOS-Voxel-Position
|
||||
SosVoxelf.x = (SosVoxelStartPosition.x + (VoxelIncrement * regionOfInterestVoxel.x)); // Hier Addition der SOSVoxel im SoS-Grid durchfuehren
|
||||
SosVoxelf.y = (SosVoxelStartPosition.y + (VoxelIncrement * regionOfInterestVoxel.y)); // Hier Addition der SOSVoxel im SoS-Grid durchfuehren
|
||||
SosVoxelf.z = (SosVoxelStartPosition.z + (VoxelIncrement * regionOfInterestVoxel.z)); // Hier Addition der SOSVoxel im SoS-Grid durchfuehren
|
||||
// printf("\n\n SosVoxelStartPosition [%f %f %f]\n",SosVoxelStartPosition.x,SosVoxelStartPosition.y,SosVoxelStartPosition.z);
|
||||
|
||||
// TexturIndex for access on Texturmemory depending of Voxel
|
||||
float TexturIndexZ_AscanIndex = 0.0f; // Z-Index for access on Texturmemory
|
||||
float const TexturIndexX = SosVoxelf.x + 0.5f; // Due to Access over Texturmemory +0.5f.
|
||||
float const TexturIndexY = SosVoxelf.y + 0.5f;
|
||||
float const SosVoxelTextureZ = (SosVoxelf.z - (float)speedOfSoundZLayer) + 0.5f; // Z offset inside precalculated SOS paths
|
||||
|
||||
float voxelValue = 0.0; // reflection value, which is summed up in Ascan-Loop = Outputvalue
|
||||
|
||||
__syncthreads();
|
||||
|
||||
if (ascanIndexBatchOffset == 0) // Initialisierung beim ersten Kernelaufruf sprich ascanIndexBatchOffset == 0
|
||||
{
|
||||
voxelValue = 0.0;
|
||||
}
|
||||
else
|
||||
{
|
||||
voxelValue = (float)output[memoryIndex]; // Alle anderen Kernelaufrufe muessen zuerst den alten Wert laden, ist bei mehreren Durchlaeufen noetig
|
||||
}
|
||||
|
||||
__syncthreads();
|
||||
|
||||
if (TableAscanIndexAllocationCount == 1)
|
||||
{
|
||||
float2 currentSOSVoxel_AscanIndexAttValues;
|
||||
|
||||
float Offset_0 = (float)ascanIndexBatchOffset + 0.5f;
|
||||
|
||||
// #pragma unroll 2
|
||||
for (float ascanIndex_i = 0.0f; ascanIndex_i < maxAscanIndexArraysInTexture; ascanIndex_i += 1.0f) // bis zu 60.8GV/s
|
||||
{
|
||||
TexturIndexZ_AscanIndex = 2.0f * ascanIndex_i + SosVoxelTextureZ; // Z-Index fuer Zugriff auf Textur interpoliert
|
||||
|
||||
currentSOSVoxel_AscanIndexAttValues = tex3D(texTableAscanIndexFloat2_0, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex);
|
||||
__syncthreads();
|
||||
voxelValue += currentSOSVoxel_AscanIndexAttValues.y * tex2D(texRefAscans, currentSOSVoxel_AscanIndexAttValues.x - 0.5f, Offset_0 + ascanIndex_i); // i gibt Index fuer Ascan an
|
||||
}
|
||||
}
|
||||
else if (TableAscanIndexAllocationCount == 2)
|
||||
{
|
||||
float2 currentSOSVoxel_AscanIndexAttValues;
|
||||
|
||||
// #pragma unroll 3
|
||||
for (float ascanIndex_i = 0.0f; ascanIndex_i < maxAscanIndexArraysInTexture; ascanIndex_i += 1.0f) // bis zu 60.6GV/s
|
||||
{
|
||||
// 1ten Teil mit selben Index laden
|
||||
TexturIndexZ_AscanIndex = 2 * ascanIndex_i + SosVoxelTextureZ; // Z-Index fuer Zugriff auf Textur interpoliert
|
||||
currentSOSVoxel_AscanIndexAttValues = tex3D(texTableAscanIndexFloat2_0, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex);
|
||||
|
||||
voxelValue += currentSOSVoxel_AscanIndexAttValues.y *
|
||||
tex2D(texRefAscans, currentSOSVoxel_AscanIndexAttValues.x - 0.5f, (float)(ascanIndexBatchOffset) + ascanIndex_i + 0.5f); // i gibt Index fuer Ascan an
|
||||
|
||||
// 2ten Teil mit selben Index laden
|
||||
currentSOSVoxel_AscanIndexAttValues = tex3D(texTableAscanIndexFloat2_1, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex);
|
||||
voxelValue += currentSOSVoxel_AscanIndexAttValues.y *
|
||||
tex2D(texRefAscans, currentSOSVoxel_AscanIndexAttValues.x - 0.5f, (float)(ascanIndexBatchOffset) + ascanIndex_i + maxAscanIndexArraysInTexture + 0.5f);
|
||||
}
|
||||
}
|
||||
else if (TableAscanIndexAllocationCount == 3)
|
||||
{
|
||||
|
||||
float2 currentSOSVoxel_AscanIndexAttValues_0;
|
||||
float2 currentSOSVoxel_AscanIndexAttValues_1;
|
||||
float2 currentSOSVoxel_AscanIndexAttValues_2;
|
||||
|
||||
float Offset_0 = (float)ascanIndexBatchOffset + 0.5f;
|
||||
float Offset_1 = (float)ascanIndexBatchOffset + maxAscanIndexArraysInTexture + 0.5f;
|
||||
float Offset_2 = (float)ascanIndexBatchOffset + 2.0f * maxAscanIndexArraysInTexture + 0.5f;
|
||||
|
||||
// #pragma unroll 2
|
||||
for (float ascanIndex_i = 0.0f; ascanIndex_i < maxAscanIndexArraysInTexture; ascanIndex_i += 1.0f) // bis zu 60.8GV/s
|
||||
{
|
||||
TexturIndexZ_AscanIndex = 2.0f * ascanIndex_i + SosVoxelTextureZ; // Z-Index fuer Zugriff auf Textur interpoliert
|
||||
// load TOF-Index from Textur 0-3
|
||||
currentSOSVoxel_AscanIndexAttValues_0 = tex3D(texTableAscanIndexFloat2_0, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex);
|
||||
currentSOSVoxel_AscanIndexAttValues_1 = tex3D(texTableAscanIndexFloat2_1, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex);
|
||||
currentSOSVoxel_AscanIndexAttValues_2 = tex3D(texTableAscanIndexFloat2_2, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex);
|
||||
__syncthreads();
|
||||
voxelValue += currentSOSVoxel_AscanIndexAttValues_0.y * tex2D(texRefAscans, currentSOSVoxel_AscanIndexAttValues_0.x - 0.5f, Offset_0 + ascanIndex_i); // i gibt Index fuer Ascan an
|
||||
voxelValue += currentSOSVoxel_AscanIndexAttValues_1.y * tex2D(texRefAscans, currentSOSVoxel_AscanIndexAttValues_1.x - 0.5f, Offset_1 + ascanIndex_i);
|
||||
voxelValue += currentSOSVoxel_AscanIndexAttValues_2.y * tex2D(texRefAscans, currentSOSVoxel_AscanIndexAttValues_2.x - 0.5f, Offset_2 + ascanIndex_i);
|
||||
}
|
||||
}
|
||||
else if (TableAscanIndexAllocationCount == 4)
|
||||
{
|
||||
float2 currentSOSVoxel_AscanIndexAttValues_0;
|
||||
float2 currentSOSVoxel_AscanIndexAttValues_1;
|
||||
float2 currentSOSVoxel_AscanIndexAttValues_2;
|
||||
float2 currentSOSVoxel_AscanIndexAttValues_3;
|
||||
|
||||
float Offset_0 = (float)ascanIndexBatchOffset + 0.5f;
|
||||
float Offset_1 = (float)ascanIndexBatchOffset + maxAscanIndexArraysInTexture + 0.5f;
|
||||
float Offset_2 = (float)ascanIndexBatchOffset + 2.0f * maxAscanIndexArraysInTexture + 0.5f;
|
||||
float Offset_3 = (float)ascanIndexBatchOffset + 3.0f * maxAscanIndexArraysInTexture + 0.5f;
|
||||
|
||||
// #pragma unroll 2
|
||||
for (float ascanIndex_i = 0.0f; ascanIndex_i < maxAscanIndexArraysInTexture; ascanIndex_i += 1.0f) // bis zu 60.8GV/s
|
||||
{
|
||||
TexturIndexZ_AscanIndex = 2.0f * ascanIndex_i + SosVoxelTextureZ; // Z-Index fuer Zugriff auf Textur interpoliert
|
||||
// syncthreads();
|
||||
// load TOF-Index from Textur 0-3
|
||||
currentSOSVoxel_AscanIndexAttValues_0 = tex3D(texTableAscanIndexFloat2_0, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex);
|
||||
currentSOSVoxel_AscanIndexAttValues_1 = tex3D(texTableAscanIndexFloat2_1, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex);
|
||||
currentSOSVoxel_AscanIndexAttValues_2 = tex3D(texTableAscanIndexFloat2_2, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex);
|
||||
currentSOSVoxel_AscanIndexAttValues_3 = tex3D(texTableAscanIndexFloat2_3, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex);
|
||||
__syncthreads();
|
||||
voxelValue += currentSOSVoxel_AscanIndexAttValues_0.y * tex2D(texRefAscans, currentSOSVoxel_AscanIndexAttValues_0.x - 0.5f, Offset_0 + ascanIndex_i); // i gibt Index fuer Ascan an
|
||||
voxelValue += currentSOSVoxel_AscanIndexAttValues_1.y * tex2D(texRefAscans, currentSOSVoxel_AscanIndexAttValues_1.x - 0.5f, Offset_1 + ascanIndex_i);
|
||||
voxelValue += currentSOSVoxel_AscanIndexAttValues_2.y * tex2D(texRefAscans, currentSOSVoxel_AscanIndexAttValues_2.x - 0.5f, Offset_2 + ascanIndex_i);
|
||||
voxelValue += currentSOSVoxel_AscanIndexAttValues_3.y * tex2D(texRefAscans, currentSOSVoxel_AscanIndexAttValues_3.x - 0.5f, Offset_3 + ascanIndex_i);
|
||||
}
|
||||
}
|
||||
|
||||
__syncthreads();
|
||||
output[memoryIndex] = (double)voxelValue;
|
||||
}
|
||||
|
||||
__global__ void saftKernelAscanIndex_SOS( // Version SoS-Korrektur
|
||||
|
||||
int const ascanIndexBatchOffset, ///< Offset of AScans if more then one AScan-Batch is used for Reconstruction
|
||||
float const aScanWindowSize, ///< Amount of AScans used here for Reconstruction
|
||||
int const maxAscanIndexArraysInTexture, ///< Maximum amount in A-Scans in one Texture
|
||||
int const TableAscanIndexAllocationCount, ///< Used amount of Textures (currently limited to maximum 4 Textures)
|
||||
|
||||
int3 const IMAGE_SIZE_XYZ, ///< XYZ des Outputvolumens
|
||||
float3 const SosVoxelStartPosition, ///< Offset of SOS-Grids
|
||||
float const IMAGE_RESOLUTION, ///< Resolution of Output-Volume
|
||||
float const VoxelIncrement, ///< Step-Width of one Voxel in SOS-Korrdinates
|
||||
|
||||
int const blockIndexOffset, int const speedOfSoundZLayer, dim3 const gridDimensions, dim3 const blockDimensions, double *output
|
||||
|
||||
)
|
||||
{
|
||||
/////////////////////////////////////////////////////////////////////////////////////////
|
||||
// 1. Determine which Voxel is to be calculated in this Kernel
|
||||
/////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
dim3 blockVoxel( // Which Block(xyz) corresponds to this Thread?
|
||||
(threadIdx.x / blockDimensions.y) % blockDimensions.x, threadIdx.x % blockDimensions.y, threadIdx.x / (blockDimensions.x * blockDimensions.y));
|
||||
|
||||
// Index of Block for this Thread
|
||||
long blockIndex = ((blockIndexOffset + blockIdx.z) * gridDim.y + blockIdx.y) * gridDim.x + blockIdx.x;
|
||||
|
||||
dim3 regionOfInterestVoxel( // Which Voxel corresponds to this Thread? Start with 0.
|
||||
((blockIndex / gridDimensions.y) % gridDimensions.x) * blockDimensions.x + blockVoxel.x, // Medium speed index
|
||||
(blockIndex % gridDimensions.y) * blockDimensions.y + blockVoxel.y, // Fastest index
|
||||
(blockIndex / (gridDimensions.x * gridDimensions.y)) * blockDimensions.z + blockVoxel.z // Slowest index
|
||||
);
|
||||
|
||||
// If Voxel is outside the reconstructed Image leave Kernel
|
||||
if ((regionOfInterestVoxel.x >= IMAGE_SIZE_XYZ.x) || (regionOfInterestVoxel.y >= IMAGE_SIZE_XYZ.y) || (regionOfInterestVoxel.z >= IMAGE_SIZE_XYZ.z))
|
||||
return;
|
||||
/////////////////////////////////////////////////////////////////////////////////////////
|
||||
// 2. Determine
|
||||
/////////////////////////////////////////////////////////////////////////////////////////
|
||||
// - SOSvoxel in which voxel is located
|
||||
// - Index of OutputVolume, and tables of Emitter, Receiver coordinates and SOSpaths
|
||||
// - Variable declarations
|
||||
/////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// Memory-Index for this Thread for Output-Array of this Voxel
|
||||
long memoryIndex = ((IMAGE_SIZE_XYZ.y * (regionOfInterestVoxel.z - blockIndexOffset) + regionOfInterestVoxel.y) * IMAGE_SIZE_XYZ.x + regionOfInterestVoxel.x);
|
||||
float3 SosVoxelf; // SoS-Voxel Koordinates in float
|
||||
// Determine SOS-Voxel-Position
|
||||
SosVoxelf.x = (SosVoxelStartPosition.x + (VoxelIncrement * regionOfInterestVoxel.x)); // Hier Addition der SOSVoxel im SoS-Grid durchfuehren
|
||||
SosVoxelf.y = (SosVoxelStartPosition.y + (VoxelIncrement * regionOfInterestVoxel.y)); // Hier Addition der SOSVoxel im SoS-Grid durchfuehren
|
||||
SosVoxelf.z = (SosVoxelStartPosition.z + (VoxelIncrement * regionOfInterestVoxel.z)); // Hier Addition der SOSVoxel im SoS-Grid durchfuehren
|
||||
// printf("\n\n SosVoxelStartPosition [%f %f %f]\n",SosVoxelStartPosition.x,SosVoxelStartPosition.y,SosVoxelStartPosition.z);
|
||||
|
||||
// TexturIndex for access on Texturmemory depending of Voxel
|
||||
float TexturIndexZ_AscanIndex = 0.0f; // Z-Index for access on Texturmemory
|
||||
float const TexturIndexX = SosVoxelf.x + 0.5f; // Due to Access over Texturmemory +0.5f.
|
||||
float const TexturIndexY = SosVoxelf.y + 0.5f;
|
||||
float const SosVoxelTextureZ = (SosVoxelf.z - (float)speedOfSoundZLayer) + 0.5f; // Z offset inside precalculated SOS paths
|
||||
float voxelValue = 0.0; // reflection value, which is summed up in Ascan-Loop = Outputvalue
|
||||
|
||||
float currentSOSVoxel_AscanIndexValues;
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////
|
||||
// 3. SAFT-Algorithmus
|
||||
/////////////////////////////////////////////////////////////////////////////////////////
|
||||
// - Index aus Textur lesen und fuer zugriff auf A-Scan nutzen
|
||||
/////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// Vorgehen Ascanindexvariante
|
||||
// 1. Bestimme Koordinaten in SOS-Koordinaten für festen Emitter und 1413 Receiver
|
||||
// Index = X+Xmax*Y+Xmax*Ymax*(Zmax*RecNr+Z).
|
||||
// Xmax = 128
|
||||
// Ymax = 128
|
||||
// Zmax = 2
|
||||
// => X = x
|
||||
// => Y = y
|
||||
// => Z = Zmax*RecNr+z
|
||||
// 2. Lade an dieser Stelle den Interpolierten Index.
|
||||
// 2.a Über Texturmemory
|
||||
// 2.b über alle 8 benachbarten Voxel oder 64 bei tricubic-Interpolation in 3D
|
||||
// 3. Lade Ascan-Sample an diesem Index und summiere auf
|
||||
|
||||
__syncthreads();
|
||||
if (ascanIndexBatchOffset == 0) // Initialisierung beim ersten Kernelaufruf sprich ascanIndexBatchOffset == 0
|
||||
{
|
||||
voxelValue = 0.0f;
|
||||
}
|
||||
else
|
||||
{
|
||||
voxelValue = (float)output[memoryIndex]; // Alle anderen Kernelaufrufe muessen zuerst den alten Wert laden, ist bei mehreren Durchlaeufen noetig
|
||||
}
|
||||
|
||||
__syncthreads();
|
||||
// float VoxelAscanIndex2 = 0.0f;
|
||||
// float voxelValue2 = 0.0f;
|
||||
|
||||
if (TableAscanIndexAllocationCount == 1)
|
||||
{
|
||||
// #pragma unroll 3
|
||||
// for(float ascanIndex_i = 0.0f; ascanIndex_i < maxAscanIndexArraysInTexture; ascanIndex_i+=1.0f) // 1.Teil Ascans durchlaufen // GV/s //2: GV/s, 4: GV/s, 8: GV/s, 16: GV/s
|
||||
for (float ascanIndex_i = 0.0f; ascanIndex_i < maxAscanIndexArraysInTexture; ascanIndex_i += 1.0f) // bis zu 60.6GV/s
|
||||
// for(float ascanIndex_i = 0.0f; ascanIndex_i < 1; ascanIndex_i+=1.0f) // bis zu 60.6GV/s
|
||||
{
|
||||
TexturIndexZ_AscanIndex = 2 * ascanIndex_i + SosVoxelTextureZ; // Z-Index fuer Zugriff auf Textur interpoliert
|
||||
|
||||
// nutze immer nur 1tes Surface
|
||||
currentSOSVoxel_AscanIndexValues = tex3D(texTableAscanIndexFloat1_0, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex);
|
||||
voxelValue += tex2D(texRefAscans, currentSOSVoxel_AscanIndexValues - 0.5f, ascanIndexBatchOffset + ascanIndex_i + 0.5f); // i gibt Index fuer Ascan a
|
||||
}
|
||||
}
|
||||
else if (TableAscanIndexAllocationCount == 2)
|
||||
{
|
||||
// #pragma unroll 3
|
||||
// for(float ascanIndex_i = 0.0f; ascanIndex_i < maxAscanIndexArraysInTexture; ascanIndex_i+=1.0f) // 1.Teil Ascans durchlaufen // GV/s //2: GV/s, 4: GV/s, 8: GV/s, 16: GV/s
|
||||
for (float ascanIndex_i = 0.0f; ascanIndex_i < maxAscanIndexArraysInTexture; ascanIndex_i += 1.0f) // bis zu 60.6GV/s
|
||||
{
|
||||
TexturIndexZ_AscanIndex = 2.0 * ascanIndex_i + SosVoxelTextureZ; // Z-Index fuer Zugriff auf Textur interpoliert
|
||||
|
||||
// 1ten Teil mit selben Index laden
|
||||
currentSOSVoxel_AscanIndexValues = tex3D(texTableAscanIndexFloat1_0, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex);
|
||||
voxelValue += tex2D(texRefAscans, currentSOSVoxel_AscanIndexValues - 0.5f, ascanIndexBatchOffset + ascanIndex_i + 0.5f); // i gibt Index fuer Ascan an
|
||||
|
||||
// 2ten Teil mit selben Index laden
|
||||
currentSOSVoxel_AscanIndexValues = tex3D(texTableAscanIndexFloat1_1, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex);
|
||||
voxelValue += tex2D(texRefAscans, currentSOSVoxel_AscanIndexValues - 0.5f, ascanIndexBatchOffset + ascanIndex_i + maxAscanIndexArraysInTexture + 0.5f);
|
||||
}
|
||||
}
|
||||
else if (TableAscanIndexAllocationCount == 3)
|
||||
{
|
||||
// #pragma unroll 3
|
||||
// for(float ascanIndex_i = 0.0f; ascanIndex_i < maxAscanIndexArraysInTexture; ascanIndex_i+=1.0f) // 1.Teil Ascans durchlaufen // GV/s //2: GV/s, 4: GV/s, 8: GV/s, 16: GV/s
|
||||
for (float ascanIndex_i = 0.0f; ascanIndex_i < maxAscanIndexArraysInTexture; ascanIndex_i += 1.0f) // bis zu 60.6GV/s
|
||||
{
|
||||
TexturIndexZ_AscanIndex = 2.0 * ascanIndex_i + SosVoxelTextureZ; // Z-Index fuer Zugriff auf Textur interpoliert
|
||||
|
||||
// 1ten Teil mit selben Index laden
|
||||
currentSOSVoxel_AscanIndexValues = tex3D(texTableAscanIndexFloat1_0, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex);
|
||||
voxelValue += tex2D(texRefAscans, currentSOSVoxel_AscanIndexValues - 0.5f, ascanIndexBatchOffset + ascanIndex_i + 0.5f); // i gibt Index fuer Ascan an
|
||||
|
||||
// 2ten Teil mit selben Index laden
|
||||
currentSOSVoxel_AscanIndexValues = tex3D(texTableAscanIndexFloat1_1, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex);
|
||||
voxelValue += tex2D(texRefAscans, currentSOSVoxel_AscanIndexValues - 0.5f, ascanIndexBatchOffset + ascanIndex_i + maxAscanIndexArraysInTexture + 0.5f);
|
||||
|
||||
// 3ten Teil mit selben Index laden
|
||||
currentSOSVoxel_AscanIndexValues = tex3D(texTableAscanIndexFloat1_2, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex);
|
||||
voxelValue += tex2D(texRefAscans, currentSOSVoxel_AscanIndexValues - 0.5f, ascanIndexBatchOffset + ascanIndex_i + 2 * maxAscanIndexArraysInTexture + 0.5f);
|
||||
}
|
||||
}
|
||||
|
||||
else if (TableAscanIndexAllocationCount == 4)
|
||||
{
|
||||
float currentSOSVoxel_AscanIndexValues_0;
|
||||
float currentSOSVoxel_AscanIndexValues_1;
|
||||
float currentSOSVoxel_AscanIndexValues_2;
|
||||
float currentSOSVoxel_AscanIndexValues_3;
|
||||
|
||||
float Offset_0 = (float)ascanIndexBatchOffset + 0.5f;
|
||||
float Offset_1 = (float)ascanIndexBatchOffset + maxAscanIndexArraysInTexture + 0.5f;
|
||||
float Offset_2 = (float)ascanIndexBatchOffset + 2.0f * maxAscanIndexArraysInTexture + 0.5f;
|
||||
float Offset_3 = (float)ascanIndexBatchOffset + 3.0f * maxAscanIndexArraysInTexture + 0.5f;
|
||||
|
||||
// #pragma unroll 4
|
||||
for (float ascanIndex_i = 0.0f; ascanIndex_i < maxAscanIndexArraysInTexture; ascanIndex_i += 1.0f) // bis zu 60.8GV/s
|
||||
{
|
||||
TexturIndexZ_AscanIndex = 2.0f * ascanIndex_i + SosVoxelTextureZ; // Z-Index fuer Zugriff auf Textur interpoliert
|
||||
// syncthreads();
|
||||
// load TOF-Index from Textur 0-3
|
||||
currentSOSVoxel_AscanIndexValues_0 = tex3D(texTableAscanIndexFloat1_0, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex);
|
||||
currentSOSVoxel_AscanIndexValues_1 = tex3D(texTableAscanIndexFloat1_1, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex);
|
||||
currentSOSVoxel_AscanIndexValues_2 = tex3D(texTableAscanIndexFloat1_2, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex);
|
||||
currentSOSVoxel_AscanIndexValues_3 = tex3D(texTableAscanIndexFloat1_3, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex);
|
||||
|
||||
voxelValue += tex2D(texRefAscans, currentSOSVoxel_AscanIndexValues_0 - 0.5f, Offset_0 + ascanIndex_i); // i gibt Index fuer Ascan an
|
||||
voxelValue += tex2D(texRefAscans, currentSOSVoxel_AscanIndexValues_1 - 0.5f, Offset_1 + ascanIndex_i);
|
||||
voxelValue += tex2D(texRefAscans, currentSOSVoxel_AscanIndexValues_2 - 0.5f, Offset_2 + ascanIndex_i);
|
||||
voxelValue += tex2D(texRefAscans, currentSOSVoxel_AscanIndexValues_3 - 0.5f, Offset_3 + ascanIndex_i);
|
||||
|
||||
// if (((int)ascanIndex_i & 31) == 0) __syncthreads();
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// Do nothing due to only 4 are defined
|
||||
}
|
||||
__syncthreads();
|
||||
output[memoryIndex] = (double)voxelValue;
|
||||
}
|
||||
|
||||
__global__ void saftKernelAscanIndex( // Version ohne-Korrektur
|
||||
|
||||
int const ascanIndexBatchOffset, ///< Offset of AScans if more then one AScan-Batch is used for Reconstruction
|
||||
float const aScanWindowSize, ///< Amount of AScans used here for Reconstruction
|
||||
int const maxAscanIndexArraysInTexture, ///< Maximum amount in A-Scans in one Texture
|
||||
int const TableAscanIndexAllocationCount, ///< Used amount of Textures (currently limited to maximum 4 Textures)
|
||||
|
||||
int3 const IMAGE_SIZE_XYZ, ///< XYZ des Outputvolumens
|
||||
float3 const SosVoxelStartPosition, ///< Offset of SOS-Grids
|
||||
float const IMAGE_RESOLUTION, ///< Resolution of Output-Volume
|
||||
float const VoxelIncrement, ///< Step-Width of one Voxel in SOS-Korrdinates
|
||||
|
||||
int const blockIndexOffset, int const speedOfSoundZLayer, dim3 const gridDimensions, dim3 const blockDimensions, double *output
|
||||
|
||||
)
|
||||
{
|
||||
/////////////////////////////////////////////////////////////////////////////////////////
|
||||
// 1. Determine which Voxel is to be calculated in this Kernel
|
||||
/////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
dim3 blockVoxel( // Which Block(xyz) corresponds to this Thread?
|
||||
(threadIdx.x / blockDimensions.y) % blockDimensions.x, threadIdx.x % blockDimensions.y, threadIdx.x / (blockDimensions.x * blockDimensions.y));
|
||||
|
||||
// Index of Block for this Thread
|
||||
long blockIndex = ((blockIndexOffset + blockIdx.z) * gridDim.y + blockIdx.y) * gridDim.x + blockIdx.x;
|
||||
|
||||
dim3 regionOfInterestVoxel( // Which Voxel corresponds to this Thread? Start with 0.
|
||||
((blockIndex / gridDimensions.y) % gridDimensions.x) * blockDimensions.x + blockVoxel.x, // Medium speed index
|
||||
(blockIndex % gridDimensions.y) * blockDimensions.y + blockVoxel.y, // Fastest index
|
||||
(blockIndex / (gridDimensions.x * gridDimensions.y)) * blockDimensions.z + blockVoxel.z // Slowest index
|
||||
);
|
||||
|
||||
// If Voxel is outside the reconstructed Image leave Kernel
|
||||
if ((regionOfInterestVoxel.x >= IMAGE_SIZE_XYZ.x) || (regionOfInterestVoxel.y >= IMAGE_SIZE_XYZ.y) || (regionOfInterestVoxel.z >= IMAGE_SIZE_XYZ.z))
|
||||
return;
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////
|
||||
// 2. Determine
|
||||
/////////////////////////////////////////////////////////////////////////////////////////
|
||||
// - SOSvoxel in which voxel is located
|
||||
// - Index of OutputVolume, and tables of Emitter, Receiver coordinates and SOSpaths
|
||||
// - Variable declarations
|
||||
/////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// Memory-Index for this Thread for Output-Array of this Voxel
|
||||
long memoryIndex = ((IMAGE_SIZE_XYZ.y * (regionOfInterestVoxel.z - blockIndexOffset) + regionOfInterestVoxel.y) * IMAGE_SIZE_XYZ.x + regionOfInterestVoxel.x);
|
||||
float3 SosVoxelf; // SoS-Voxel Koordinates in float
|
||||
// Determine SOS-Voxel-Position
|
||||
SosVoxelf.x = (SosVoxelStartPosition.x + (VoxelIncrement * regionOfInterestVoxel.x)); // Hier Addition der SOSVoxel im SoS-Grid durchfuehren
|
||||
SosVoxelf.y = (SosVoxelStartPosition.y + (VoxelIncrement * regionOfInterestVoxel.y)); // Hier Addition der SOSVoxel im SoS-Grid durchfuehren
|
||||
SosVoxelf.z = (SosVoxelStartPosition.z + (VoxelIncrement * regionOfInterestVoxel.z)); // Hier Addition der SOSVoxel im SoS-Grid durchfuehren
|
||||
// printf("\n\n SosVoxelStartPosition [%f %f %f]\n",SosVoxelStartPosition.x,SosVoxelStartPosition.y,SosVoxelStartPosition.z);
|
||||
|
||||
// TexturIndex for access on Texturmemory depending of Voxel
|
||||
float TexturIndexZ_AscanIndex = 0.0f; // Z-Index for access on Texturmemory
|
||||
float const TexturIndexX = SosVoxelf.x + 0.5f; // Due to Access over Texturmemory +0.5f.
|
||||
float const TexturIndexY = SosVoxelf.y + 0.5f;
|
||||
float const SosVoxelTextureZ = (SosVoxelf.z - (float)speedOfSoundZLayer) + 0.5f; // Z offset inside precalculated SOS paths
|
||||
float voxelValue = 0.0; // reflection value, which is summed up in Ascan-Loop = Outputvalue
|
||||
|
||||
float currentSOSVoxel_AscanIndexValues;
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////
|
||||
// 3. SAFT-Algorithmus
|
||||
/////////////////////////////////////////////////////////////////////////////////////////
|
||||
// - Index aus Textur lesen und fuer zugriff auf A-Scan nutzen
|
||||
/////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// Vorgehen Ascanindexvariante
|
||||
// 1. Bestimme Koordinaten in SOS-Koordinaten für festen Emitter und 1413 Receiver
|
||||
// Index = X+Xmax*Y+Xmax*Ymax*(Zmax*RecNr+Z).
|
||||
// Xmax = 128
|
||||
// Ymax = 128
|
||||
// Zmax = 2
|
||||
// => X = x
|
||||
// => Y = y
|
||||
// => Z = Zmax*RecNr+z
|
||||
// 2. Lade an dieser Stelle den Interpolierten Index.
|
||||
// 2.a Über Texturmemory
|
||||
// 2.b über alle 8 benachbarten Voxel oder 64 bei tricubic-Interpolation in 3D
|
||||
// 3. Lade Ascan-Sample an diesem Index und summiere auf
|
||||
|
||||
__syncthreads();
|
||||
if (ascanIndexBatchOffset == 0) // Initialisierung beim ersten Kernelaufruf sprich ascanIndexBatchOffset == 0
|
||||
{
|
||||
voxelValue = 0.0f;
|
||||
}
|
||||
else
|
||||
{
|
||||
voxelValue = (float)output[memoryIndex]; // Alle anderen Kernelaufrufe muessen zuerst den alten Wert laden, ist bei mehreren Durchlaeufen noetig
|
||||
}
|
||||
|
||||
__syncthreads();
|
||||
// float VoxelAscanIndex2 = 0.0f;
|
||||
// float voxelValue2 = 0.0f;
|
||||
|
||||
if (TableAscanIndexAllocationCount == 1)
|
||||
{
|
||||
// #pragma unroll 3
|
||||
// for(float ascanIndex_i = 0.0f; ascanIndex_i < maxAscanIndexArraysInTexture; ascanIndex_i+=1.0f) // 1.Teil Ascans durchlaufen // GV/s //2: GV/s, 4: GV/s, 8: GV/s, 16: GV/s
|
||||
for (float ascanIndex_i = 0.0f; ascanIndex_i < maxAscanIndexArraysInTexture; ascanIndex_i += 1.0f) // bis zu 60.6GV/s
|
||||
{
|
||||
TexturIndexZ_AscanIndex = 2 * ascanIndex_i + SosVoxelTextureZ; // Z-Index fuer Zugriff auf Textur interpoliert
|
||||
|
||||
// nutze immer nur 1tes Surface
|
||||
currentSOSVoxel_AscanIndexValues = tex3D(texTableAscanIndexFloat1_0, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex);
|
||||
voxelValue += tex2D(texRefAscans, currentSOSVoxel_AscanIndexValues - 0.5f, ascanIndexBatchOffset + ascanIndex_i + 0.5f); // i gibt Index fuer Ascan an
|
||||
}
|
||||
}
|
||||
else if (TableAscanIndexAllocationCount == 2)
|
||||
{
|
||||
// #pragma unroll 3
|
||||
// for(float ascanIndex_i = 0.0f; ascanIndex_i < maxAscanIndexArraysInTexture; ascanIndex_i+=1.0f) // 1.Teil Ascans durchlaufen // GV/s //2: GV/s, 4: GV/s, 8: GV/s, 16: GV/s
|
||||
for (float ascanIndex_i = 0.0f; ascanIndex_i < maxAscanIndexArraysInTexture; ascanIndex_i += 1.0f) // bis zu 60.6GV/s
|
||||
{
|
||||
TexturIndexZ_AscanIndex = 2.0 * ascanIndex_i + SosVoxelTextureZ; // Z-Index fuer Zugriff auf Textur interpoliert
|
||||
|
||||
// 1ten Teil mit selben Index laden
|
||||
currentSOSVoxel_AscanIndexValues = tex3D(texTableAscanIndexFloat1_0, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex);
|
||||
voxelValue += tex2D(texRefAscans, currentSOSVoxel_AscanIndexValues - 0.5f, ascanIndexBatchOffset + ascanIndex_i + 0.5f); // i gibt Index fuer Ascan an
|
||||
|
||||
// 2ten Teil mit selben Index laden
|
||||
currentSOSVoxel_AscanIndexValues = tex3D(texTableAscanIndexFloat1_1, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex);
|
||||
voxelValue += tex2D(texRefAscans, currentSOSVoxel_AscanIndexValues - 0.5f, ascanIndexBatchOffset + ascanIndex_i + maxAscanIndexArraysInTexture + 0.5f);
|
||||
}
|
||||
}
|
||||
else if (TableAscanIndexAllocationCount == 3)
|
||||
{
|
||||
// #pragma unroll 3
|
||||
// for(float ascanIndex_i = 0.0f; ascanIndex_i < maxAscanIndexArraysInTexture; ascanIndex_i+=1.0f) // 1.Teil Ascans durchlaufen // GV/s //2: GV/s, 4: GV/s, 8: GV/s, 16: GV/s
|
||||
for (float ascanIndex_i = 0.0f; ascanIndex_i < maxAscanIndexArraysInTexture; ascanIndex_i += 1.0f) // bis zu 60.6GV/s
|
||||
{
|
||||
TexturIndexZ_AscanIndex = 2.0 * ascanIndex_i + SosVoxelTextureZ; // Z-Index fuer Zugriff auf Textur interpoliert
|
||||
|
||||
// 1ten Teil mit selben Index laden
|
||||
currentSOSVoxel_AscanIndexValues = tex3D(texTableAscanIndexFloat1_0, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex);
|
||||
voxelValue += tex2D(texRefAscans, currentSOSVoxel_AscanIndexValues - 0.5f, ascanIndexBatchOffset + ascanIndex_i + 0.5f); // i gibt Index fuer Ascan an
|
||||
|
||||
// 2ten Teil mit selben Index laden
|
||||
currentSOSVoxel_AscanIndexValues = tex3D(texTableAscanIndexFloat1_1, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex);
|
||||
voxelValue += tex2D(texRefAscans, currentSOSVoxel_AscanIndexValues - 0.5f, ascanIndexBatchOffset + ascanIndex_i + maxAscanIndexArraysInTexture + 0.5f);
|
||||
|
||||
// 3ten Teil mit selben Index laden
|
||||
currentSOSVoxel_AscanIndexValues = tex3D(texTableAscanIndexFloat1_2, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex);
|
||||
voxelValue += tex2D(texRefAscans, currentSOSVoxel_AscanIndexValues - 0.5f, ascanIndexBatchOffset + ascanIndex_i + 2 * maxAscanIndexArraysInTexture + 0.5f);
|
||||
}
|
||||
}
|
||||
|
||||
else if (TableAscanIndexAllocationCount == 4)
|
||||
{
|
||||
float currentSOSVoxel_AscanIndexValues_0;
|
||||
float currentSOSVoxel_AscanIndexValues_1;
|
||||
float currentSOSVoxel_AscanIndexValues_2;
|
||||
float currentSOSVoxel_AscanIndexValues_3;
|
||||
|
||||
float Offset_0 = (float)ascanIndexBatchOffset + 0.5f;
|
||||
float Offset_1 = (float)ascanIndexBatchOffset + maxAscanIndexArraysInTexture + 0.5f;
|
||||
float Offset_2 = (float)ascanIndexBatchOffset + 2.0f * maxAscanIndexArraysInTexture + 0.5f;
|
||||
float Offset_3 = (float)ascanIndexBatchOffset + 3.0f * maxAscanIndexArraysInTexture + 0.5f;
|
||||
|
||||
// #pragma unroll 2
|
||||
for (float ascanIndex_i = 0.0f; ascanIndex_i < maxAscanIndexArraysInTexture; ascanIndex_i += 1.0f) // bis zu 60.8GV/s
|
||||
{
|
||||
TexturIndexZ_AscanIndex = 2.0f * ascanIndex_i + SosVoxelTextureZ; // Z-Index fuer Zugriff auf Textur interpoliert
|
||||
//__syncthreads();
|
||||
// load TOF-Index from Textur 0-3
|
||||
currentSOSVoxel_AscanIndexValues_0 = tex3D(texTableAscanIndexFloat1_0, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex);
|
||||
currentSOSVoxel_AscanIndexValues_1 = tex3D(texTableAscanIndexFloat1_1, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex);
|
||||
currentSOSVoxel_AscanIndexValues_2 = tex3D(texTableAscanIndexFloat1_2, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex);
|
||||
currentSOSVoxel_AscanIndexValues_3 = tex3D(texTableAscanIndexFloat1_3, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex);
|
||||
|
||||
voxelValue += tex2D(texRefAscans, currentSOSVoxel_AscanIndexValues_0 - 0.5f, Offset_0 + ascanIndex_i); // i gibt Index fuer Ascan an
|
||||
voxelValue += tex2D(texRefAscans, currentSOSVoxel_AscanIndexValues_1 - 0.5f, Offset_1 + ascanIndex_i);
|
||||
voxelValue += tex2D(texRefAscans, currentSOSVoxel_AscanIndexValues_2 - 0.5f, Offset_2 + ascanIndex_i);
|
||||
voxelValue += tex2D(texRefAscans, currentSOSVoxel_AscanIndexValues_3 - 0.5f, Offset_3 + ascanIndex_i);
|
||||
}
|
||||
}
|
||||
__syncthreads();
|
||||
output[memoryIndex] = (double)voxelValue;
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
624
SAFT_TOFI/src/kernel/saftPrivate.cu
Normal file
624
SAFT_TOFI/src/kernel/saftPrivate.cu
Normal file
@@ -0,0 +1,624 @@
|
||||
#include "precalculateSpeedOfSoundKernel.cuh"
|
||||
#include "rayTracing.cuh"
|
||||
#include "saftKernel.cuh"
|
||||
#include "saft.hpp"
|
||||
|
||||
void SAFTHandler::precalculateAscanIndex_usePaths(int ascanIndex_i, int aScanWindowSize, int currentSpeedOfSoundZLayer, int maxFeasibleSosZLayerCount)
|
||||
{
|
||||
cudaChannelFormatDesc texChannelDescTableVoxelToEmRecPathSosBoth = cudaCreateChannelDesc(32, 32, 32, 32,
|
||||
cudaChannelFormatKindFloat); // Schritt 2.1 Output-Kanal anlegen und
|
||||
// beschreiben - Float4
|
||||
// Both Emitter Path Tables
|
||||
// --------------------------------------------------------
|
||||
texTableVoxelToEmitterPathSosBoth_preprocess.addressMode[0] = cudaAddressModeClamp; // Texturreferenz beschreiben
|
||||
texTableVoxelToEmitterPathSosBoth_preprocess.addressMode[1] = cudaAddressModeClamp;
|
||||
texTableVoxelToEmitterPathSosBoth_preprocess.addressMode[2] = cudaAddressModeClamp;
|
||||
switch (SAFT_VARIANT[SAFT_VARIANT_3DVolumeInterpolationAtReconstruction])
|
||||
{
|
||||
case 0:
|
||||
texTableVoxelToEmitterPathSosBoth_preprocess.filterMode = cudaFilterModePoint;
|
||||
break;
|
||||
case 1:
|
||||
texTableVoxelToEmitterPathSosBoth_preprocess.filterMode = cudaFilterModeLinear;
|
||||
break;
|
||||
}
|
||||
texTableVoxelToEmitterPathSosBoth_preprocess.normalized = 0;
|
||||
CUDA_CHECK(cudaBindTextureToArray(&texTableVoxelToEmitterPathSosBoth_preprocess, deviceTableVoxelToEmPathSosBothCuArray, &texChannelDescTableVoxelToEmRecPathSosBoth));
|
||||
|
||||
// Texturmemory fuer Receiver - SosPathsTables
|
||||
// ===================================================================================================================
|
||||
// Both Receiver Path Tables
|
||||
// ------------------------------------------------------
|
||||
texTableVoxelToReceiverPathSosBoth0_preprocess.addressMode[0] = cudaAddressModeClamp; // Texturreferenz beschreiben
|
||||
texTableVoxelToReceiverPathSosBoth0_preprocess.addressMode[1] = cudaAddressModeClamp;
|
||||
texTableVoxelToReceiverPathSosBoth0_preprocess.addressMode[2] = cudaAddressModeClamp;
|
||||
switch (SAFT_VARIANT[SAFT_VARIANT_3DVolumeInterpolationAtReconstruction])
|
||||
{
|
||||
case 0:
|
||||
texTableVoxelToReceiverPathSosBoth0_preprocess.filterMode = cudaFilterModePoint;
|
||||
break;
|
||||
case 1:
|
||||
texTableVoxelToReceiverPathSosBoth0_preprocess.filterMode = cudaFilterModeLinear;
|
||||
break;
|
||||
}
|
||||
texTableVoxelToReceiverPathSosBoth0_preprocess.normalized = 0;
|
||||
|
||||
CUDA_CHECK(cudaBindTextureToArray(&texTableVoxelToReceiverPathSosBoth0_preprocess, deviceTableVoxelToRecPathSosBothCuArray[0], &texChannelDescTableVoxelToEmRecPathSosBoth));
|
||||
|
||||
if (TableVoxelToReceiverPathSosAllocationCount > 1)
|
||||
{ // TODO: mit Arrays flexibel programmieren, wenn moeglich!!!
|
||||
texTableVoxelToReceiverPathSosBoth1_preprocess.addressMode[0] = cudaAddressModeClamp; // Texturreferenz beschreiben
|
||||
texTableVoxelToReceiverPathSosBoth1_preprocess.addressMode[1] = cudaAddressModeClamp;
|
||||
texTableVoxelToReceiverPathSosBoth1_preprocess.addressMode[2] = cudaAddressModeClamp;
|
||||
switch (SAFT_VARIANT[SAFT_VARIANT_3DVolumeInterpolationAtReconstruction])
|
||||
{
|
||||
case 0:
|
||||
texTableVoxelToReceiverPathSosBoth1_preprocess.filterMode = cudaFilterModePoint;
|
||||
break;
|
||||
case 1:
|
||||
texTableVoxelToReceiverPathSosBoth1_preprocess.filterMode = cudaFilterModeLinear;
|
||||
break;
|
||||
}
|
||||
texTableVoxelToReceiverPathSosBoth1_preprocess.normalized = 0;
|
||||
|
||||
CUDA_CHECK(cudaBindTextureToArray(&texTableVoxelToReceiverPathSosBoth1_preprocess, deviceTableVoxelToRecPathSosBothCuArray[1], &texChannelDescTableVoxelToEmRecPathSosBoth));
|
||||
}
|
||||
|
||||
if (TableVoxelToReceiverPathSosAllocationCount > 2)
|
||||
{
|
||||
texTableVoxelToReceiverPathSosBoth2_preprocess.addressMode[0] = cudaAddressModeClamp; // Texturreferenz beschreiben
|
||||
texTableVoxelToReceiverPathSosBoth2_preprocess.addressMode[1] = cudaAddressModeClamp;
|
||||
texTableVoxelToReceiverPathSosBoth2_preprocess.addressMode[2] = cudaAddressModeClamp;
|
||||
switch (SAFT_VARIANT[SAFT_VARIANT_3DVolumeInterpolationAtReconstruction])
|
||||
{
|
||||
case 0:
|
||||
texTableVoxelToReceiverPathSosBoth2_preprocess.filterMode = cudaFilterModePoint;
|
||||
break;
|
||||
case 1:
|
||||
texTableVoxelToReceiverPathSosBoth2_preprocess.filterMode = cudaFilterModeLinear;
|
||||
break;
|
||||
}
|
||||
texTableVoxelToReceiverPathSosBoth2_preprocess.normalized = 0;
|
||||
|
||||
CUDA_CHECK(cudaBindTextureToArray(&texTableVoxelToReceiverPathSosBoth2_preprocess, deviceTableVoxelToRecPathSosBothCuArray[2], &texChannelDescTableVoxelToEmRecPathSosBoth));
|
||||
}
|
||||
|
||||
dim3 threadsPerBlock(SOSGrid_XYZ.x, 1, 1);
|
||||
dim3 blocksPerGrid(1, 1, 1);
|
||||
blocksPerGrid.x = SOSGrid_XYZ.y;
|
||||
blocksPerGrid.y = maxFeasibleSosZLayerCount;
|
||||
blocksPerGrid.z = 1;
|
||||
|
||||
// Step 2. Bereite Output-Textur fuer AscanIndex vor
|
||||
|
||||
if (TableAscanIndexAllocationCount > 0)
|
||||
{
|
||||
cudaBindSurfaceToArray(outSurfRefAscanIndexFloat0, deviceTextureAscanIndexFloatCuArray[0]);
|
||||
}
|
||||
if (TableAscanIndexAllocationCount > 1)
|
||||
{
|
||||
cudaBindSurfaceToArray(outSurfRefAscanIndexFloat1, deviceTextureAscanIndexFloatCuArray[1]);
|
||||
}
|
||||
if (TableAscanIndexAllocationCount > 2)
|
||||
{
|
||||
cudaBindSurfaceToArray(outSurfRefAscanIndexFloat2, deviceTextureAscanIndexFloatCuArray[2]);
|
||||
}
|
||||
if (TableAscanIndexAllocationCount > 3)
|
||||
{
|
||||
cudaBindSurfaceToArray(outSurfRefAscanIndexFloat3, deviceTextureAscanIndexFloatCuArray[3]);
|
||||
}
|
||||
|
||||
// Step 3. Fuehre Kernel aus mit #Threads: SOS.x*SOS.y . Innerhalb werden
|
||||
// immer 1024/2048 A-Scans durchlaufen und in AscanIndex-Textur geschrieben
|
||||
|
||||
if ((SOSMode_3DVolume == false) && (ATTMode_3DVolume == false))
|
||||
{ // ====================================================
|
||||
// Blockmode with SOS-value per Ascan
|
||||
|
||||
precalculateAscanIndex_usePathsKernel<<<blocksPerGrid, threadsPerBlock>>>(ascanIndex_i, ///< Offset of AscanIndex batch (bei mehreren Aufrufen)
|
||||
aScanWindowSize, // aktuelle Anzahl der Ascans, die maximal
|
||||
// vorberechnet werden können
|
||||
deviceSosAttFieldCuArray,
|
||||
currentSpeedOfSoundZLayer, ///< First z-layer in the speed of sound
|
||||
///< grid the pre-calculation is performed
|
||||
///< for.
|
||||
maxFeasibleSosZLayerCount, ///< Number of z-layers in the speed of
|
||||
///< sound grid the pre-calculation is
|
||||
///< performed for.
|
||||
// currentEmIndexUsedForAscanIndexCalculation, ///< current Index of Em
|
||||
// for which the AscanIndex is calculated
|
||||
|
||||
maxSoSReceiverArrayForTexture,
|
||||
|
||||
deviceEmitterIndex_block, // Speicheradresse fuer EmitterIndexdaten
|
||||
deviceReceiverIndex_block, // Speicheradresse fuer ReceiverIndexdaten
|
||||
|
||||
TableAscanIndexAllocationCount, ///< Anzahl der benoetigten AscanBlocks
|
||||
///< der Groesse 2048/4096
|
||||
maxAscanIndexArraysInTexture, ///< maximale Anzahl an Em/Rec in einem
|
||||
///< CUDA Array (fest definiert fuer
|
||||
///< bestimmung welche Textur genutzt
|
||||
///< wird)
|
||||
|
||||
deviceTextureAscanIndexFloatCuArray, ///< Out: Sum of SoS samples in
|
||||
///< the path from transducer to
|
||||
///< voxel.
|
||||
|
||||
SOSGrid_XYZ, sosOffset, regionOfInterestOffset, IMAGE_RESOLUTION, SOS_RESOLUTION, debugMode, debugModeParameter,
|
||||
deviceSAFT_VARIANT);
|
||||
}
|
||||
else if ((SOSMode_3DVolume == true) && (ATTMode_3DVolume == false))
|
||||
{ // ====================================================
|
||||
// 3DVolume Mode with SOS-Correction no ATT-Correction
|
||||
|
||||
precalculateAscanIndex_usePathsKernel_SOS<<<blocksPerGrid,
|
||||
threadsPerBlock>>>(ascanIndex_i, ///< Offset of AscanIndex batch (bei mehreren Aufrufen)
|
||||
aScanWindowSize, // aktuelle Anzahl der Ascans, die maximal
|
||||
// vorberechnet werden können
|
||||
deviceSosAttFieldCuArray,
|
||||
currentSpeedOfSoundZLayer, ///< First z-layer in the speed of sound
|
||||
///< grid the pre-calculation is performed
|
||||
///< for.
|
||||
maxFeasibleSosZLayerCount, ///< Number of z-layers in the speed of
|
||||
///< sound grid the pre-calculation is
|
||||
///< performed for.
|
||||
// currentEmIndexUsedForAscanIndexCalculation, ///< current
|
||||
// Index of Em for which the AscanIndex is calculated
|
||||
|
||||
maxSoSReceiverArrayForTexture,
|
||||
|
||||
deviceEmitterIndex_block, // Speicheradresse fuer EmitterIndexdaten
|
||||
deviceReceiverIndex_block, // Speicheradresse fuer ReceiverIndexdaten
|
||||
|
||||
TableAscanIndexAllocationCount, ///< Anzahl der benoetigten AscanBlocks
|
||||
///< der Groesse 2048/4096
|
||||
maxAscanIndexArraysInTexture, ///< maximale Anzahl an Em/Rec in einem
|
||||
///< CUDA Array (fest definiert fuer
|
||||
///< bestimmung welche Textur genutzt
|
||||
///< wird)
|
||||
|
||||
deviceTextureAscanIndexFloatCuArray, ///< Out: Sum of SoS samples in
|
||||
///< the path from transducer to
|
||||
///< voxel.
|
||||
|
||||
SOSGrid_XYZ, sosOffset, regionOfInterestOffset, IMAGE_RESOLUTION, SOS_RESOLUTION, debugMode, debugModeParameter,
|
||||
deviceSAFT_VARIANT);
|
||||
}
|
||||
else if ((SOSMode_3DVolume == true) && (ATTMode_3DVolume == true))
|
||||
{ // ====================================================
|
||||
// 3DVolume Mode with SOS- and ATT-Correction
|
||||
|
||||
precalculateAscanIndex_usePathsKernel_SOS_ATT<<<blocksPerGrid,
|
||||
threadsPerBlock>>>(ascanIndex_i, ///< Offset of AscanIndex batch (bei mehreren Aufrufen)
|
||||
aScanWindowSize, // aktuelle Anzahl der Ascans, die maximal
|
||||
// vorberechnet werden können
|
||||
deviceSosAttFieldCuArray,
|
||||
currentSpeedOfSoundZLayer, ///< First z-layer in the speed of sound
|
||||
///< grid the pre-calculation is performed
|
||||
///< for.
|
||||
maxFeasibleSosZLayerCount, ///< Number of z-layers in the speed of
|
||||
///< sound grid the pre-calculation is
|
||||
///< performed for.
|
||||
// currentEmIndexUsedForAscanIndexCalculation, ///< current
|
||||
// Index of Em for which the AscanIndex is calculated
|
||||
maxSoSReceiverArrayForTexture,
|
||||
deviceEmitterIndex_block, // Speicheradresse fuer EmitterIndexdaten
|
||||
deviceReceiverIndex_block, // Speicheradresse fuer ReceiverIndexdaten
|
||||
TableAscanIndexAllocationCount, ///< Anzahl der benoetigten AscanBlocks
|
||||
///< der Groesse 2048/4096
|
||||
maxAscanIndexArraysInTexture, ///< maximale Anzahl an Em/Rec in einem
|
||||
///< CUDA Array (fest definiert fuer
|
||||
///< bestimmung welche Textur genutzt
|
||||
///< wird)
|
||||
deviceTextureAscanIndexFloatCuArray, ///< Out: Sum of SoS samples in
|
||||
///< the path from transducer to
|
||||
///< voxel.
|
||||
SOSGrid_XYZ, sosOffset, regionOfInterestOffset, IMAGE_RESOLUTION, SOS_RESOLUTION, debugMode, debugModeParameter,
|
||||
deviceSAFT_VARIANT);
|
||||
}
|
||||
|
||||
CUDA_CHECK(cudaGetLastError());
|
||||
|
||||
// ==================================================== cudaUnbindTexture
|
||||
// Texturmemory fuer Emitter - SosPathsTables entbinden
|
||||
CUDA_CHECK(cudaUnbindTexture(&texTableVoxelToEmitterPathSosBoth_preprocess));
|
||||
// Texturmemory fuer Receiver - SosPathsTables entbinden
|
||||
CUDA_CHECK(cudaUnbindTexture(&texTableVoxelToReceiverPathSosBoth0_preprocess));
|
||||
CUDA_CHECK(cudaUnbindTexture(&texTableVoxelToReceiverPathSosBoth1_preprocess));
|
||||
CUDA_CHECK(cudaUnbindTexture(&texTableVoxelToReceiverPathSosBoth2_preprocess));
|
||||
}
|
||||
|
||||
void SAFTHandler::precalculateAverageSpeedOfSound(int firstZLayer, int sosZLayerCount, int deviceListGeometry, int geometryElementCount, float *deviceVoxelCountOutputFloat,
|
||||
float *deviceSpeedOfSoundSumOutput)
|
||||
{
|
||||
dim3 threadsPerBlock(SOSGrid_XYZ.x, 1,
|
||||
1); // max. 512 oder 1024 Threads werden vorgegeben und
|
||||
// dim3 threadsPerBlock (SOSGrid_XYZ.x,SOSGrid_XYZ.y,1); // max. 512 oder
|
||||
// 1024 Threads werden vorgegeben und
|
||||
dim3 blocksPerGrid(1, 1, 1); // max. 65.535 Bloecke im Grid
|
||||
// berechnet. Initialisierung
|
||||
blocksPerGrid.x = SOSGrid_XYZ.y;
|
||||
blocksPerGrid.y = sosZLayerCount;
|
||||
blocksPerGrid.z = 1;
|
||||
|
||||
cudaChannelFormatDesc texChannelDescSosAttField = cudaCreateChannelDesc(32, 32, 0, 0,
|
||||
cudaChannelFormatKindFloat); // Schritt 2.1 Output-Kanal
|
||||
// anlegen und beschreiben
|
||||
|
||||
texRefSosAttField.addressMode[0] = cudaAddressModeClamp; // Texturreferenz beschreiben
|
||||
texRefSosAttField.addressMode[1] = cudaAddressModeClamp;
|
||||
texRefSosAttField.addressMode[2] = cudaAddressModeClamp;
|
||||
|
||||
if (SAFT_VARIANT[SAFT_VARIANT_3DVolumeInterpolationAtPreprocessing] == 1)
|
||||
{
|
||||
texRefSosAttField.filterMode = cudaFilterModeLinear; // Lineare Interpolation
|
||||
}
|
||||
else
|
||||
{
|
||||
texRefSosAttField.filterMode = cudaFilterModePoint; // Nearest Neighbor
|
||||
}
|
||||
texRefSosAttField.normalized = 0;
|
||||
|
||||
CUDA_CHECK(cudaBindTextureToArray(&texRefSosAttField, deviceSosAttFieldCuArray,
|
||||
&texChannelDescSosAttField)); // Schritt 4.1 3DArray an Texturmemory
|
||||
// binden
|
||||
|
||||
if (deviceListGeometry == 0)
|
||||
{
|
||||
cudaBindSurfaceToArray(outSurfRefTableVoxelToEmPathSosBoth, deviceTableVoxelToEmPathSosBothCuArray);
|
||||
}
|
||||
|
||||
if (deviceListGeometry == 1)
|
||||
{
|
||||
if (TableVoxelToReceiverPathSosAllocationCount > 0)
|
||||
{
|
||||
cudaBindSurfaceToArray(outSurfRefTableVoxelToRecPathSosBoth0, deviceTableVoxelToRecPathSosBothCuArray[0]);
|
||||
}
|
||||
if (TableVoxelToReceiverPathSosAllocationCount > 1)
|
||||
{
|
||||
cudaBindSurfaceToArray(outSurfRefTableVoxelToRecPathSosBoth1, deviceTableVoxelToRecPathSosBothCuArray[1]);
|
||||
}
|
||||
if (TableVoxelToReceiverPathSosAllocationCount > 2)
|
||||
{
|
||||
cudaBindSurfaceToArray(outSurfRefTableVoxelToRecPathSosBoth2, deviceTableVoxelToRecPathSosBothCuArray[2]);
|
||||
}
|
||||
}
|
||||
|
||||
precalculateAverageSpeedOfSoundKernel<<<blocksPerGrid, threadsPerBlock>>>(deviceSosAttFieldCuArray, firstZLayer, sosZLayerCount, deviceListGeometry, geometryElementCount,
|
||||
maxSoSReceiverArrayForTexture, // maximale Anzahl an Receivern in einem
|
||||
// CUDA Array
|
||||
|
||||
// deviceVoxelCountOutput,
|
||||
deviceVoxelCountOutputFloat, deviceSpeedOfSoundSumOutput,
|
||||
// regionOfInterestOffset,
|
||||
SOSGrid_XYZ, sosOffset, regionOfInterestOffset, IMAGE_RESOLUTION, SOS_RESOLUTION, debugMode, debugModeParameter);
|
||||
CUDA_CHECK(cudaGetLastError());
|
||||
|
||||
CUDA_CHECK(cudaUnbindTexture(&texRefSosAttField));
|
||||
}
|
||||
|
||||
void SAFTHandler::fillCuArray(float useValue,
|
||||
cudaArray **deviceTextureAscanIndexFloatCuArray, ///< CuArray to fill
|
||||
int TableAscanIndexAllocationCount)
|
||||
{
|
||||
dim3 threadsPerBlock(SOSGrid_XYZ.x, 1,
|
||||
1); // determine neccessary amount of threads
|
||||
// // max. 512 oder 1024
|
||||
dim3 blocksPerGrid(1, 1,
|
||||
1); // determine neccessary amount of blocks in grid // max. 65.535
|
||||
blocksPerGrid.x = SOSGrid_XYZ.y;
|
||||
blocksPerGrid.y = maxFeasibleSosZLayerCount;
|
||||
blocksPerGrid.z = 1;
|
||||
|
||||
// Step 1. Bereite Output-Textur fuer AscanIndex vor
|
||||
if (TableAscanIndexAllocationCount > 0)
|
||||
{
|
||||
cudaBindSurfaceToArray(outSurfRefAscanIndexFloat0, deviceTextureAscanIndexFloatCuArray[0]);
|
||||
}
|
||||
if (TableAscanIndexAllocationCount > 1)
|
||||
{
|
||||
cudaBindSurfaceToArray(outSurfRefAscanIndexFloat1, deviceTextureAscanIndexFloatCuArray[1]);
|
||||
}
|
||||
if (TableAscanIndexAllocationCount > 2)
|
||||
{
|
||||
cudaBindSurfaceToArray(outSurfRefAscanIndexFloat2, deviceTextureAscanIndexFloatCuArray[2]);
|
||||
}
|
||||
if (TableAscanIndexAllocationCount > 3)
|
||||
{
|
||||
cudaBindSurfaceToArray(outSurfRefAscanIndexFloat3, deviceTextureAscanIndexFloatCuArray[3]);
|
||||
}
|
||||
|
||||
// Step 2. Fuere Kernel aus mit #Threads: SOS.x*SOS.y . Innerhalb werden immer
|
||||
// 1024/2048 A-Scans durchgegangen und in AscanIndex-Textur geschrieben
|
||||
fillCuArrayKernel<<<blocksPerGrid, threadsPerBlock>>>(useValue,
|
||||
deviceTextureAscanIndexFloatCuArray, ///< Out: Sum of SoS
|
||||
///< samples in the path
|
||||
///< from transducer to
|
||||
///< voxel.
|
||||
maxAscanIndexArraysInTexture,
|
||||
TableAscanIndexAllocationCount, ///< Amount of Surfaces in
|
||||
///< the Array of cuArrays
|
||||
maxFeasibleSosZLayerCount, ATTMode_3DVolume, debugMode, debugModeParameter);
|
||||
|
||||
CUDA_CHECK(cudaGetLastError());
|
||||
}
|
||||
|
||||
void SAFTHandler::performSAFT(
|
||||
int aScanIndex, ///< The A-scan index is increased by the A-scan batch size in every iteration. It describes the offset into the A-scan samples the SAFT kernel is operating with.
|
||||
size_t aScanWindowSize, ///< A-scan batch size in terms of number of samples within one window.
|
||||
int3 IMAGE_SIZE_XYZ, ///< Bildbereichsgroesse/ROI in Voxel
|
||||
int3 SOSGrid_XYZ, ///< SoSGridgroesse in Voxel
|
||||
int blockIndexOffset, ///< Additional offset added to the z component of the block index, required because of the adjustments for partial reconstruction in different z-layers.
|
||||
int outputWindowVoxelCount, ///< Number of Voxels in the output window.
|
||||
int speedOfSoundZLayer, ///< current SoS z-layer Offset in the speed of sound grid.
|
||||
int speedOfSoundVoxelsWithinZLayers, ///< Number of z-layers in the speed of sound grid touched by the z-layers of the active zone of reconstruction in the region of interest.
|
||||
int maxFeasibleSosZLayerCount,
|
||||
int currentEmIndexUsedForAscanIndexCalculation, ///< current Index of Em for which the AscanIndex is calculated
|
||||
dim3 const &windowGridDimensions, ///< Grid dimensions to be used to launch the SAFT kernel. It is smaller than the full grid dimensions and only represents the current reconstruction window.
|
||||
dim3 const &gridDimensions, ///< Full grid dimensions of the reconstruction.
|
||||
dim3 const &blockDimensions, ///< Block dimensions to be used with the SAFT kernel.
|
||||
float *deviceSpeedOfSoundField, ///< Pointer to SoSGrid.
|
||||
cudaArray *deviceAScansCuArray
|
||||
// cudaStream_t stream ///< Stream to execute the SAFT kernel on.
|
||||
)
|
||||
{
|
||||
|
||||
dim3 reducedGridDimensions, reducedBlockDimensions;
|
||||
|
||||
reduceKernelDimensions(windowGridDimensions, blockDimensions, reducedGridDimensions, reducedBlockDimensions);
|
||||
|
||||
CUDA_CHECK(cudaFuncSetCacheConfig(saftKernelAscanIndex_SOS_ATT, cudaFuncCachePreferL1));
|
||||
CUDA_CHECK(cudaFuncSetCacheConfig(saftKernelAscanIndex_SOS, cudaFuncCachePreferL1));
|
||||
CUDA_CHECK(cudaFuncSetCacheConfig(saftKernelAscanIndex, cudaFuncCachePreferL1));
|
||||
|
||||
// Texture Memory Adressing-mode // http://docs.nvidia.com/cuda/pdf/CUDA_C_Programming_Guide.pdf -> 3.2.11.1. Texture Memory S. 42
|
||||
// cudaAddressModeClamp - Return values at the boarders if out-of range - default
|
||||
// cudaAddressModeBorder - Return 0 if out-of range
|
||||
// cudaAddressModeMirror - Mirror the values - For normalized coordinates
|
||||
// cudaAddressModeWrap - Repeating the values - For normalized coordinates
|
||||
|
||||
// Texturmemory fuer Ascans
|
||||
cudaChannelFormatDesc texChannelDesc = cudaCreateChannelDesc(32, 0, 0, 0, cudaChannelFormatKindFloat); // Beschreibung des RueckgabeFormats der Textur
|
||||
texRefAscans.addressMode[0] = cudaAddressModeBorder; // Texturreferenz beschreiben
|
||||
texRefAscans.addressMode[1] = cudaAddressModeBorder;
|
||||
|
||||
if (SAFT_VARIANT[SAFT_VARIANT_AscanInterpolation] == 1)
|
||||
{
|
||||
texRefAscans.filterMode = cudaFilterModeLinear; // Lineare Interpolation
|
||||
}
|
||||
else
|
||||
{
|
||||
texRefAscans.filterMode = cudaFilterModePoint; // Nearest Neighbor
|
||||
}
|
||||
|
||||
texRefAscans.normalized = 0;
|
||||
CUDA_CHECK(cudaBindTextureToArray(&texRefAscans, deviceAScansCuArray, &texChannelDesc));
|
||||
|
||||
if (ATTMode_3DVolume == false)
|
||||
{ // ========= 3DVolume Mode without ATT-Correction
|
||||
|
||||
cudaChannelFormatDesc texChannelDescTableAscanIndexFloat = cudaCreateChannelDesc<float>(); // Should do the same
|
||||
|
||||
// AscanIndex Path Tables ------------------------------------------------------
|
||||
texTableAscanIndexFloat1_0.addressMode[0] = cudaAddressModeClamp; // Texturreferenz beschreiben
|
||||
texTableAscanIndexFloat1_0.addressMode[1] = cudaAddressModeClamp;
|
||||
texTableAscanIndexFloat1_0.addressMode[2] = cudaAddressModeClamp;
|
||||
switch (SAFT_VARIANT[SAFT_VARIANT_3DVolumeInterpolationAtReconstruction])
|
||||
{
|
||||
case 0:
|
||||
texTableAscanIndexFloat1_0.filterMode = cudaFilterModePoint;
|
||||
break;
|
||||
case 1:
|
||||
texTableAscanIndexFloat1_0.filterMode = cudaFilterModeLinear;
|
||||
break;
|
||||
}
|
||||
texTableAscanIndexFloat1_0.normalized = 0;
|
||||
|
||||
CUDA_CHECK(cudaBindTextureToArray(&texTableAscanIndexFloat1_0, deviceTextureAscanIndexFloatCuArray[0], &texChannelDescTableAscanIndexFloat));
|
||||
|
||||
if (TableAscanIndexAllocationCount > 1)
|
||||
{ // TODO: mit Arrays flexibel programmieren!!!
|
||||
texTableAscanIndexFloat1_1.addressMode[0] = cudaAddressModeClamp; // Texturreferenz beschreiben
|
||||
texTableAscanIndexFloat1_1.addressMode[1] = cudaAddressModeClamp;
|
||||
texTableAscanIndexFloat1_1.addressMode[2] = cudaAddressModeClamp;
|
||||
switch (SAFT_VARIANT[SAFT_VARIANT_3DVolumeInterpolationAtReconstruction])
|
||||
{
|
||||
case 0:
|
||||
texTableAscanIndexFloat1_1.filterMode = cudaFilterModePoint;
|
||||
break;
|
||||
case 1:
|
||||
texTableAscanIndexFloat1_1.filterMode = cudaFilterModeLinear;
|
||||
break;
|
||||
}
|
||||
texTableAscanIndexFloat1_1.normalized = 0;
|
||||
|
||||
CUDA_CHECK(cudaBindTextureToArray(&texTableAscanIndexFloat1_1, deviceTextureAscanIndexFloatCuArray[1], &texChannelDescTableAscanIndexFloat));
|
||||
}
|
||||
|
||||
if (TableAscanIndexAllocationCount > 2)
|
||||
{
|
||||
texTableAscanIndexFloat1_2.addressMode[0] = cudaAddressModeClamp; // Texturreferenz beschreiben
|
||||
texTableAscanIndexFloat1_2.addressMode[1] = cudaAddressModeClamp;
|
||||
texTableAscanIndexFloat1_2.addressMode[2] = cudaAddressModeClamp;
|
||||
switch (SAFT_VARIANT[SAFT_VARIANT_3DVolumeInterpolationAtReconstruction])
|
||||
{
|
||||
case 0:
|
||||
texTableAscanIndexFloat1_2.filterMode = cudaFilterModePoint;
|
||||
break;
|
||||
case 1:
|
||||
texTableAscanIndexFloat1_2.filterMode = cudaFilterModeLinear;
|
||||
break;
|
||||
}
|
||||
texTableAscanIndexFloat1_2.normalized = 0;
|
||||
|
||||
CUDA_CHECK(cudaBindTextureToArray(&texTableAscanIndexFloat1_2, deviceTextureAscanIndexFloatCuArray[2], &texChannelDescTableAscanIndexFloat));
|
||||
}
|
||||
|
||||
if (TableAscanIndexAllocationCount > 3)
|
||||
{
|
||||
texTableAscanIndexFloat1_3.addressMode[0] = cudaAddressModeClamp; // Texturreferenz beschreiben
|
||||
texTableAscanIndexFloat1_3.addressMode[1] = cudaAddressModeClamp;
|
||||
texTableAscanIndexFloat1_3.addressMode[2] = cudaAddressModeClamp;
|
||||
switch (SAFT_VARIANT[SAFT_VARIANT_3DVolumeInterpolationAtReconstruction])
|
||||
{
|
||||
case 0:
|
||||
texTableAscanIndexFloat1_3.filterMode = cudaFilterModePoint;
|
||||
break;
|
||||
case 1:
|
||||
texTableAscanIndexFloat1_3.filterMode = cudaFilterModeLinear;
|
||||
break;
|
||||
}
|
||||
texTableAscanIndexFloat1_3.normalized = 0;
|
||||
|
||||
CUDA_CHECK(cudaBindTextureToArray(&texTableAscanIndexFloat1_3, deviceTextureAscanIndexFloatCuArray[3], &texChannelDescTableAscanIndexFloat));
|
||||
}
|
||||
}
|
||||
else if (ATTMode_3DVolume == true)
|
||||
{ // ========= 3DVolume Mode with ATT-Correction
|
||||
|
||||
cudaChannelFormatDesc texChannelDescTableAscanIndexFloat = cudaCreateChannelDesc<float2>(); // Should do the same
|
||||
|
||||
// AscanIndex Path Tables ------------------------------------------------------
|
||||
texTableAscanIndexFloat2_0.addressMode[0] = cudaAddressModeClamp; // Texturreferenz beschreiben
|
||||
texTableAscanIndexFloat2_0.addressMode[1] = cudaAddressModeClamp;
|
||||
texTableAscanIndexFloat2_0.addressMode[2] = cudaAddressModeClamp;
|
||||
switch (SAFT_VARIANT[SAFT_VARIANT_3DVolumeInterpolationAtReconstruction])
|
||||
{
|
||||
case 0:
|
||||
texTableAscanIndexFloat2_0.filterMode = cudaFilterModePoint;
|
||||
break;
|
||||
case 1:
|
||||
texTableAscanIndexFloat2_0.filterMode = cudaFilterModeLinear;
|
||||
break;
|
||||
}
|
||||
texTableAscanIndexFloat2_0.normalized = 0;
|
||||
|
||||
CUDA_CHECK(cudaBindTextureToArray(&texTableAscanIndexFloat2_0, deviceTextureAscanIndexFloatCuArray[0], &texChannelDescTableAscanIndexFloat));
|
||||
|
||||
if (TableAscanIndexAllocationCount > 1)
|
||||
{ // TODO: mit Arrays flexibel programmieren!!!
|
||||
texTableAscanIndexFloat2_1.addressMode[0] = cudaAddressModeClamp; // Texturreferenz beschreiben
|
||||
texTableAscanIndexFloat2_1.addressMode[1] = cudaAddressModeClamp;
|
||||
texTableAscanIndexFloat2_1.addressMode[2] = cudaAddressModeClamp;
|
||||
switch (SAFT_VARIANT[SAFT_VARIANT_3DVolumeInterpolationAtReconstruction])
|
||||
{
|
||||
case 0:
|
||||
texTableAscanIndexFloat2_1.filterMode = cudaFilterModePoint;
|
||||
break;
|
||||
case 1:
|
||||
texTableAscanIndexFloat2_1.filterMode = cudaFilterModeLinear;
|
||||
break;
|
||||
}
|
||||
texTableAscanIndexFloat2_1.normalized = 0;
|
||||
|
||||
CUDA_CHECK(cudaBindTextureToArray(&texTableAscanIndexFloat2_1, deviceTextureAscanIndexFloatCuArray[1], &texChannelDescTableAscanIndexFloat));
|
||||
}
|
||||
|
||||
if (TableAscanIndexAllocationCount > 2)
|
||||
{
|
||||
texTableAscanIndexFloat2_2.addressMode[0] = cudaAddressModeClamp; // Texturreferenz beschreiben
|
||||
texTableAscanIndexFloat2_2.addressMode[1] = cudaAddressModeClamp;
|
||||
texTableAscanIndexFloat2_2.addressMode[2] = cudaAddressModeClamp;
|
||||
switch (SAFT_VARIANT[SAFT_VARIANT_3DVolumeInterpolationAtReconstruction])
|
||||
{
|
||||
case 0:
|
||||
texTableAscanIndexFloat2_2.filterMode = cudaFilterModePoint;
|
||||
break;
|
||||
case 1:
|
||||
texTableAscanIndexFloat2_2.filterMode = cudaFilterModeLinear;
|
||||
break;
|
||||
}
|
||||
texTableAscanIndexFloat2_2.normalized = 0;
|
||||
|
||||
CUDA_CHECK(cudaBindTextureToArray(&texTableAscanIndexFloat2_2, deviceTextureAscanIndexFloatCuArray[2], &texChannelDescTableAscanIndexFloat));
|
||||
}
|
||||
|
||||
if (TableAscanIndexAllocationCount > 3)
|
||||
{
|
||||
texTableAscanIndexFloat2_3.addressMode[0] = cudaAddressModeClamp; // Texturreferenz beschreiben
|
||||
texTableAscanIndexFloat2_3.addressMode[1] = cudaAddressModeClamp;
|
||||
texTableAscanIndexFloat2_3.addressMode[2] = cudaAddressModeClamp;
|
||||
switch (SAFT_VARIANT[SAFT_VARIANT_3DVolumeInterpolationAtReconstruction])
|
||||
{
|
||||
case 0:
|
||||
texTableAscanIndexFloat2_3.filterMode = cudaFilterModePoint;
|
||||
break;
|
||||
case 1:
|
||||
texTableAscanIndexFloat2_3.filterMode = cudaFilterModeLinear;
|
||||
break;
|
||||
}
|
||||
texTableAscanIndexFloat2_3.normalized = 0;
|
||||
|
||||
CUDA_CHECK(cudaBindTextureToArray(&texTableAscanIndexFloat2_3, deviceTextureAscanIndexFloatCuArray[3], &texChannelDescTableAscanIndexFloat));
|
||||
}
|
||||
}
|
||||
// Vorberechnung der Koordinaten --> schnelleres Bestimmen der Voxelposition
|
||||
float VoxelIncrement = IMAGE_RESOLUTION / SOS_RESOLUTION;
|
||||
float3 SosVoxelStartPosition;
|
||||
SosVoxelStartPosition.x = (regionOfInterestOffset.x - sosOffset.x) / SOS_RESOLUTION; // Start des Bildes im SOS-Grid aus Positionsdaten bestimmen
|
||||
SosVoxelStartPosition.y = (regionOfInterestOffset.y - sosOffset.y) / SOS_RESOLUTION;
|
||||
SosVoxelStartPosition.z = (regionOfInterestOffset.z - sosOffset.z) / SOS_RESOLUTION;
|
||||
// printf("\n\n SosVoxelStartPosition [%f %f %f]\n",SosVoxelStartPosition.x,SosVoxelStartPosition.y,SosVoxelStartPosition.z);
|
||||
|
||||
// Anzahl der Teiltabellen, bereits hier berechnen oder übergeben
|
||||
int TableAscanIndexAllocationCount = ceil((float)aScanWindowSize / (float)maxAscanIndexArraysInTexture); // float is important due to ceiling
|
||||
// printf( "TableAscanIndexAllocationCount = (%i/%i) = %i = %i\n", aScanWindowSize, maxAscanIndexArraysInTexture, TableAscanIndexAllocationCount,
|
||||
// ceil(aScanWindowSize/maxAscanIndexArraysInTexture));
|
||||
|
||||
// Call of 3 SAFT Versions - AscanIndex-Varainten
|
||||
// #####################################################################################################################################################################
|
||||
// ==================================================== Blockmode with SOS-value per Ascan
|
||||
// ==================================================== 3DVolume Mode with SOS-Correction no ATT-Correction
|
||||
// ==================================================== 3DVolume Mode with SOS- and ATT-Correction
|
||||
|
||||
if ((SOSMode_3DVolume == false) && (ATTMode_3DVolume == false))
|
||||
{ // ==================================================== Blockmode with SOS-value per Ascan
|
||||
printf("\n\n --- SAFT (AscanIndex) without SOS currently not implemented --- \n");
|
||||
}
|
||||
else if ((SOSMode_3DVolume == true) && (ATTMode_3DVolume == false))
|
||||
{
|
||||
// ==================================================== 3DVolume Mode with SOS-Correction no ATT-Correction
|
||||
saftKernelAscanIndex_SOS<<<reducedGridDimensions, reducedBlockDimensions>>>( // , 0, stream>>>(
|
||||
|
||||
aScanIndex,
|
||||
|
||||
(float)aScanWindowSize, // maxAscanIndexArraysInTexture
|
||||
maxAscanIndexArraysInTexture, // maxSoSReceiverArrayForTexture
|
||||
TableAscanIndexAllocationCount, // Anzahl der genutzten Teiltabellen
|
||||
IMAGE_SIZE_XYZ, SosVoxelStartPosition, IMAGE_RESOLUTION, VoxelIncrement, blockIndexOffset, speedOfSoundZLayer, gridDimensions, blockDimensions, deviceOutput);
|
||||
}
|
||||
else if ((SOSMode_3DVolume == true) && (ATTMode_3DVolume == true))
|
||||
{ // ==================================================== 3DVolume Mode with SOS- and ATT-Correction
|
||||
|
||||
saftKernelAscanIndex_SOS_ATT<<<reducedGridDimensions, reducedBlockDimensions>>>( // , 0, stream>>>(
|
||||
aScanIndex,
|
||||
(float)aScanWindowSize, // maxAscanIndexArraysInTexture
|
||||
maxAscanIndexArraysInTexture, // maxSoSReceiverArrayForTexture
|
||||
TableAscanIndexAllocationCount, // Anzahl der genutzten Teiltabellen
|
||||
IMAGE_SIZE_XYZ, SosVoxelStartPosition, IMAGE_RESOLUTION, VoxelIncrement,
|
||||
blockIndexOffset, speedOfSoundZLayer, gridDimensions, blockDimensions,
|
||||
debugMode, debugModeParameter, deviceSAFT_VARIANT,
|
||||
deviceOutput
|
||||
|
||||
);
|
||||
}
|
||||
|
||||
// Unbind Textures
|
||||
CUDA_CHECK(cudaUnbindTexture(&texRefAscans));
|
||||
|
||||
if (ATTMode_3DVolume == false)
|
||||
{ // ========= 3DVolume Mode without ATT-Correction
|
||||
CUDA_CHECK(cudaUnbindTexture(&texTableAscanIndexFloat1_0));
|
||||
CUDA_CHECK(cudaUnbindTexture(&texTableAscanIndexFloat1_1));
|
||||
CUDA_CHECK(cudaUnbindTexture(&texTableAscanIndexFloat1_2));
|
||||
CUDA_CHECK(cudaUnbindTexture(&texTableAscanIndexFloat1_3));
|
||||
}
|
||||
else if (ATTMode_3DVolume == true)
|
||||
{ // ========= 3DVolume Mode with ATT-Correction
|
||||
CUDA_CHECK(cudaUnbindTexture(&texTableAscanIndexFloat2_0));
|
||||
CUDA_CHECK(cudaUnbindTexture(&texTableAscanIndexFloat2_1));
|
||||
CUDA_CHECK(cudaUnbindTexture(&texTableAscanIndexFloat2_2));
|
||||
CUDA_CHECK(cudaUnbindTexture(&texTableAscanIndexFloat2_3));
|
||||
}
|
||||
CUDA_CHECK(cudaGetLastError());
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,126 +1,91 @@
|
||||
#include <string.h>
|
||||
|
||||
#include <cmath>
|
||||
#include <cstdlib>
|
||||
#include <ctime>
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
|
||||
#include <cstdlib>
|
||||
#include <ctime>
|
||||
#include <cmath>
|
||||
#include <string.h>
|
||||
|
||||
//#include <ail/file.hpp>
|
||||
//#include <ail/string.hpp>
|
||||
//#include <ail/time.hpp>
|
||||
// #include <ail/file.hpp>
|
||||
// #include <ail/string.hpp>
|
||||
// #include <ail/time.hpp>
|
||||
|
||||
#include "saft.hpp"
|
||||
|
||||
|
||||
|
||||
|
||||
/**
|
||||
Clumsy constructor of the core reconstruction class.
|
||||
- Unbeholfener Konstruktor der Kern Rekonstuktionsklasse
|
||||
*/
|
||||
SAFTHandler::SAFTHandler(
|
||||
int deviceId, ///< CUDA ID of the device to be used.
|
||||
int deviceIndex, ///< Index given by MATLAB (An welcher Position steht die GPU in der Liste?)
|
||||
float *aScan_ptr, ///< Zeiger zu den AScandaten
|
||||
double *output_ptr, ///< Zeiger zu den Volumen-Daten
|
||||
double *Duration_ptr, ///< Zeiger auf Rueckgabewert fuer Matlab fuer Laufzeit des Kernels
|
||||
unsigned short *receiver_index_ptr, ///<
|
||||
unsigned short *emitter_index_ptr, ///<
|
||||
float *receiver_list_ptr, ///<
|
||||
int receiver_list_Size,
|
||||
float *emitter_list_ptr, ///<
|
||||
int emitter_list_Size,
|
||||
float *speed_vec_ptr, ///< Zeiger auf die SoS-Daten in Block-/Gridmode
|
||||
int3 SOSGrid_XYZ,
|
||||
float3 sosOffset, ///< Startpoint of SoSGrid
|
||||
float SOS_RESOLUTION, ///< Aufloesung des SoSGrid
|
||||
float *att_vec_ptr, ///< Zeiger auf die Att-Daten inm Gridmode
|
||||
SAFTHandler::SAFTHandler(int deviceId, ///< CUDA ID of the device to be used.
|
||||
int deviceIndex, ///< Index given by MATLAB (An welcher Position steht die GPU in der Liste?)
|
||||
float *aScan_ptr, ///< Zeiger zu den AScandaten
|
||||
double *output_ptr, ///< Zeiger zu den Volumen-Daten
|
||||
double *Duration_ptr, ///< Zeiger auf Rueckgabewert fuer Matlab fuer Laufzeit des Kernels
|
||||
unsigned short *receiver_index_ptr, ///<
|
||||
unsigned short *emitter_index_ptr, ///<
|
||||
float *receiver_list_ptr, ///<
|
||||
int receiver_list_Size,
|
||||
float *emitter_list_ptr, ///<
|
||||
int emitter_list_Size,
|
||||
float *speed_vec_ptr, ///< Zeiger auf die SoS-Daten in Block-/Gridmode
|
||||
int3 SOSGrid_XYZ,
|
||||
float3 sosOffset, ///< Startpoint of SoSGrid
|
||||
float SOS_RESOLUTION, ///< Aufloesung des SoSGrid
|
||||
float *att_vec_ptr, ///< Zeiger auf die Att-Daten inm Gridmode
|
||||
|
||||
int aScanCount,
|
||||
int aScanLength,
|
||||
int3 IMAGE_SIZE_XYZ,
|
||||
float sampleRate,
|
||||
float3 regionOfInterestOffset,
|
||||
float IMAGE_RESOLUTION,
|
||||
dim3 const & fixedBlockDimensions, ///< If fixed block dimensions are enabled, they will be used over the ones determined by auto-tuning.
|
||||
float debugMode,
|
||||
float debugModeParameter,
|
||||
bool SOSMode_3DVolume,
|
||||
bool ATTMode_3DVolume,
|
||||
int aScanCount, int aScanLength, int3 IMAGE_SIZE_XYZ, float sampleRate, float3 regionOfInterestOffset, float IMAGE_RESOLUTION,
|
||||
dim3 const &fixedBlockDimensions, ///< If fixed block dimensions are enabled, they will be used over the ones determined by auto-tuning.
|
||||
float debugMode, float debugModeParameter, bool SOSMode_3DVolume, bool ATTMode_3DVolume,
|
||||
|
||||
int SAFT_MODE,
|
||||
int *SAFT_VARIANT,
|
||||
int SAFT_VARIANT_Size,
|
||||
int SAFT_MODE, int *SAFT_VARIANT, int SAFT_VARIANT_Size,
|
||||
|
||||
int *Abort_ptr ///< If there is not enough memory abort reconstruction. Wenn Fehler --> Abbruch;
|
||||
):
|
||||
// Initialisation der Klassenvariablen mit den uebergebenen Werten (aehnlich Konstruktor)
|
||||
// Initializer list of class variables
|
||||
deviceId(deviceId),
|
||||
deviceIndex(deviceIndex),
|
||||
int *Abort_ptr ///< If there is not enough memory abort reconstruction. Wenn Fehler --> Abbruch;
|
||||
)
|
||||
: deviceId(deviceId),
|
||||
deviceIndex(deviceIndex),
|
||||
|
||||
aScan_ptr(aScan_ptr), //aScanSamplesPath(aScanSamplesPath),
|
||||
aScan_ptr(aScan_ptr), // aScanSamplesPath(aScanSamplesPath),
|
||||
|
||||
output_ptr(output_ptr), //Path(Path),
|
||||
Duration_ptr(Duration_ptr),
|
||||
output_ptr(output_ptr), // Path(Path),
|
||||
Duration_ptr(Duration_ptr),
|
||||
|
||||
receiver_index_ptr(receiver_index_ptr), //
|
||||
emitter_index_ptr(emitter_index_ptr), //
|
||||
receiver_list_ptr(receiver_list_ptr), //
|
||||
receiver_list_Size(receiver_list_Size),
|
||||
emitter_list_ptr(emitter_list_ptr), //
|
||||
emitter_list_Size(emitter_list_Size),
|
||||
speed_vec_ptr(speed_vec_ptr), ///< SoS-Daten im Blockmode oder SoSGrid
|
||||
SOSGrid_XYZ(SOSGrid_XYZ), // Groesse des SoSGrids
|
||||
sosOffset(sosOffset), ///< Startpoint of SoSGrid
|
||||
SOS_RESOLUTION(SOS_RESOLUTION), ///< Aufloesung des SoSGrid
|
||||
receiver_index_ptr(receiver_index_ptr), //
|
||||
emitter_index_ptr(emitter_index_ptr), //
|
||||
receiver_list_ptr(receiver_list_ptr), //
|
||||
receiver_list_Size(receiver_list_Size),
|
||||
emitter_list_ptr(emitter_list_ptr), //
|
||||
emitter_list_Size(emitter_list_Size),
|
||||
speed_vec_ptr(speed_vec_ptr), ///< SoS-Daten im Blockmode oder SoSGrid
|
||||
SOSGrid_XYZ(SOSGrid_XYZ), // Groesse des SoSGrids
|
||||
sosOffset(sosOffset), ///< Startpoint of SoSGrid
|
||||
SOS_RESOLUTION(SOS_RESOLUTION), ///< Aufloesung des SoSGrid
|
||||
|
||||
att_vec_ptr(att_vec_ptr), ///< Att-Daten als ATTGrid
|
||||
att_vec_ptr(att_vec_ptr), ///< Att-Daten als ATTGrid
|
||||
|
||||
aScanCount(aScanCount),
|
||||
aScanLength(aScanLength),
|
||||
IMAGE_SIZE_XYZ(IMAGE_SIZE_XYZ),
|
||||
sampleRate(sampleRate),
|
||||
regionOfInterestOffset(regionOfInterestOffset),
|
||||
IMAGE_RESOLUTION(IMAGE_RESOLUTION),
|
||||
aScanCount(aScanCount),
|
||||
aScanLength(aScanLength),
|
||||
IMAGE_SIZE_XYZ(IMAGE_SIZE_XYZ),
|
||||
sampleRate(sampleRate),
|
||||
regionOfInterestOffset(regionOfInterestOffset),
|
||||
IMAGE_RESOLUTION(IMAGE_RESOLUTION),
|
||||
|
||||
fixedBlockDimensions(fixedBlockDimensions),
|
||||
debugMode(debugMode),
|
||||
debugModeParameter(debugModeParameter),
|
||||
SOSMode_3DVolume(SOSMode_3DVolume),
|
||||
ATTMode_3DVolume(ATTMode_3DVolume),
|
||||
fixedBlockDimensions(fixedBlockDimensions),
|
||||
debugMode(debugMode),
|
||||
debugModeParameter(debugModeParameter),
|
||||
SOSMode_3DVolume(SOSMode_3DVolume),
|
||||
ATTMode_3DVolume(ATTMode_3DVolume),
|
||||
|
||||
SAFT_MODE(SAFT_MODE),
|
||||
SAFT_VARIANT(SAFT_VARIANT),
|
||||
SAFT_VARIANT_Size(SAFT_VARIANT_Size),
|
||||
SAFT_MODE(SAFT_MODE),
|
||||
SAFT_VARIANT(SAFT_VARIANT),
|
||||
SAFT_VARIANT_Size(SAFT_VARIANT_Size),
|
||||
|
||||
Abort_ptr(Abort_ptr)
|
||||
Abort_ptr(Abort_ptr)
|
||||
{
|
||||
#ifdef debug_OutputFunctions
|
||||
printf( "==> SAFTHandler::SAFTHandler - Start\n");
|
||||
#endif
|
||||
|
||||
#ifdef debug_OutputInfo
|
||||
printf( "SAFTHandler Constructor\n");
|
||||
#endif
|
||||
|
||||
aScanAllocationCount = USED_ASCANSMEMORYREGIONS; // Anzahl der A-Scan-Speicherbereiche die alloziert werden, es reicht einer statt 2! 2 nur wenn Streams fuer A-ScanCopy genutzt werden sollen.
|
||||
maxSupportedTexturesForAscanIndex = MAX_SUPPORTEDTEXTURES_FORASCANINDEX; // Definiert die im Code maximal unterstuetzen Texturen fuer AscanIndex;
|
||||
|
||||
IMAGE_RESOLUTION_FACTOR = 1 / IMAGE_RESOLUTION; // Auflösung im OutputVolumen
|
||||
SOS_RESOLUTION_FACTOR = 1 / SOS_RESOLUTION; // Auflösung im SoS-Volumen
|
||||
|
||||
#ifdef debug_OutputVariables
|
||||
printf( "IMAGE_RESOLUTION_FACTOR = %e\n", IMAGE_RESOLUTION_FACTOR);
|
||||
printf( "SOS_RESOLUTION_FACTOR = %e\n", SOS_RESOLUTION_FACTOR);
|
||||
printf( "Samplerate = %e\n", sampleRate);
|
||||
#endif
|
||||
|
||||
#ifdef debug_OutputFunctions
|
||||
printf( "<== SAFTHandler::SAFTHandler - End\n");
|
||||
#endif
|
||||
aScanAllocationCount = USED_ASCANSMEMORYREGIONS; // Anzahl der A-Scan-Speicherbereiche die alloziert werden, es reicht einer statt 2! 2 nur wenn Streams fuer A-ScanCopy genutzt werden sollen.
|
||||
maxSupportedTexturesForAscanIndex = MAX_SUPPORTEDTEXTURES_FORASCANINDEX; // Definiert die im Code maximal unterstuetzen Texturen fuer AscanIndex;
|
||||
|
||||
IMAGE_RESOLUTION_FACTOR = 1 / IMAGE_RESOLUTION; // Auflösung im OutputVolumen
|
||||
SOS_RESOLUTION_FACTOR = 1 / SOS_RESOLUTION; // Auflösung im SoS-Volumen
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -129,271 +94,100 @@ SAFTHandler::SAFTHandler(
|
||||
*/
|
||||
void SAFTHandler::performReconstruction()
|
||||
{
|
||||
#ifdef debug_OutputFunctions
|
||||
printf( "==> SAFTHandler::performReconstruction - Start\n");
|
||||
#endif
|
||||
aScanSamples = (float *)aScan_ptr; // Ascan-Data
|
||||
emitter_index = (unsigned short *)emitter_index_ptr; // Index for associating emitter to corresponding coordinates
|
||||
receiver_index = (unsigned short *)receiver_index_ptr; // Index for associating receiver to corresponding coordinates
|
||||
emitter_list = (float3 *)emitter_list_ptr; // Lookuptable for emitter coordinates
|
||||
receiver_list = (float3 *)receiver_list_ptr; // Lookuptable for receiver coordinates
|
||||
output = (double *)output_ptr; // Output-Data
|
||||
|
||||
speedOfSoundField = (float *)speed_vec_ptr; // For SOS Correction
|
||||
// SoSData = (float*) speed_vec_ptr; // Fuer Blockmode
|
||||
attenuationField = (float *)att_vec_ptr; // For Attenuation Correction
|
||||
|
||||
//Pointeruebergabe der AScan-Daten, Geometrie-Daten und Output-Daten von Matlab
|
||||
#ifdef debug_OutputInfo
|
||||
printf( "Give Pointer Names for AScan, Geometry, Output and SoS-Data from Matlab\n");
|
||||
#endif
|
||||
aScanSamples = (float*) aScan_ptr; // Ascan-Data
|
||||
emitter_index = (unsigned short*) emitter_index_ptr; // Index for associating emitter to corresponding coordinates
|
||||
receiver_index = (unsigned short*) receiver_index_ptr; // Index for associating receiver to corresponding coordinates
|
||||
emitter_list = (float3*) emitter_list_ptr; // Lookuptable for emitter coordinates
|
||||
receiver_list = (float3*) receiver_list_ptr; // Lookuptable for receiver coordinates
|
||||
output = (double*) output_ptr; // Output-Data
|
||||
// Read out GPU-Device Properties
|
||||
// ----------------------------------------------------------
|
||||
// List of all deviceProperties: http://developer.download.nvidia.com/compute/cuda/4_1/rel/toolkit/docs/online/group__CUDART__DEVICE_g5aa4f47938af8276f08074d09b7d520c.html
|
||||
|
||||
speedOfSoundField = (float*) speed_vec_ptr; // For SOS Correction
|
||||
//SoSData = (float*) speed_vec_ptr; // Fuer Blockmode
|
||||
attenuationField = (float*) att_vec_ptr; // For Attenuation Correction
|
||||
// Determine the number of GPU-Devices in System
|
||||
int deviceCount;
|
||||
CUDA_CHECK(cudaGetDeviceCount(&deviceCount));
|
||||
deviceProperties.reserve(static_cast<std::size_t>(deviceCount)); // Request Vector for all GPU Devices with the size deviceCount
|
||||
|
||||
// Determine the number of GPU-Devices in System
|
||||
cudaDeviceProp &device = deviceProperties[deviceId];
|
||||
CUDA_CHECK(cudaGetDeviceProperties(&device, deviceId)); // Read out Properties of current used GPU-Device in this thread
|
||||
|
||||
#ifdef debug_OutputInfo // Name des Device mit ID ausgeben
|
||||
printf( "Device ID: %i\n", deviceId);
|
||||
#endif
|
||||
CUDA_CHECK(cudaSetDevice(deviceId));
|
||||
|
||||
#ifdef debug_OutputFunctions
|
||||
printf( "==> loadDevices - Start\n");
|
||||
#endif
|
||||
deviceProperties.push_back(device); // Add element at the end of the vector outputProb
|
||||
// Determine minimum supported Surface size. Dependent on device.maxTexture3D[2] and device.maxSurface3D[2]
|
||||
maxSurfaceTexture3DDimension = (device.maxTexture3D[2] < device.maxSurface3D[2]) ? device.maxTexture3D[2] : device.maxSurface3D[2];
|
||||
// printf("DEVICE => maxSurfaceTexture3DDimension = %d (device.maxTexture3D[2] = %d - device.maxSurface3D[2] = %d)\n", maxSurfaceTexture3DDimension, device.maxTexture3D[2],
|
||||
// device.maxSurface3D[2]); // Set maximum Size of Texture
|
||||
|
||||
// Read out GPU-Device Properties
|
||||
// ----------------------------------------------------------
|
||||
// List of all deviceProperties: http://developer.download.nvidia.com/compute/cuda/4_1/rel/toolkit/docs/online/group__CUDART__DEVICE_g5aa4f47938af8276f08074d09b7d520c.html
|
||||
|
||||
// Determine the number of GPU-Devices in System
|
||||
int deviceCount;
|
||||
CUDA_CHECK(cudaGetDeviceCount(&deviceCount));
|
||||
deviceProperties.reserve(static_cast<std::size_t>(deviceCount)); // Request Vector for all GPU Devices with the size deviceCount
|
||||
|
||||
// Determine the number of GPU-Devices in System
|
||||
cudaDeviceProp & device = deviceProperties[deviceId];
|
||||
CUDA_CHECK(cudaGetDeviceProperties(&device, deviceId)); // Read out Properties of current used GPU-Device in this thread
|
||||
//printf("%i. %s\n", deviceId, device.name);
|
||||
//#ifdef debug_OutputInfo
|
||||
//printf( "Device used: %18s (HW-ID %i) (Idx %i)\n", device.name , deviceId, deviceIndex); // Name des Device mit ID ausgeben
|
||||
//#endif
|
||||
CUDA_CHECK(cudaSetDevice(deviceId));
|
||||
|
||||
#ifdef debug_OutputInfo
|
||||
printf("Reset Device\n"); // Reset Device
|
||||
#endif
|
||||
|
||||
// CUDA_CHECK(cudaDeviceReset()); 2019: commented to remove re-initialization when called, avoids blocked threads later on
|
||||
|
||||
//printf("%i. %s\n", deviceId, deviceProperties[deviceId].name);
|
||||
//printf("DEVICE => Maximum 3D texture dimensions: [%d %d %d]\n", device.maxTexture3D[0], device.maxTexture3D[1], device.maxTexture3D[2]);
|
||||
//printf("DEVICE => Maximum width, height, and depth for a 3D surface reference bound to a CUDA array: [%d %d %d]\n", device.maxSurface3D[0], device.maxSurface3D[1], device.maxSurface3D[2]);
|
||||
|
||||
#ifdef debug_OutputInfo
|
||||
printf("%i. %s\n", deviceId, device.name);
|
||||
printf(" Byte Total Global Mem: %lld \n", device.totalGlobalMem);
|
||||
printf(" Compute Capability: %i.%i\n", device.major,device.minor);
|
||||
|
||||
printf(" Name: %s\n", device.name);
|
||||
printf(" Major revision number: %d\n", device.major);
|
||||
printf(" Minor revision number: %d\n", device.minor);
|
||||
printf(" Total global memory: %lld\n", device.totalGlobalMem);
|
||||
printf(" Total shared memory per block: %u\n", device.sharedMemPerBlock);
|
||||
printf(" Total registers per block: %d\n", device.regsPerBlock);
|
||||
printf(" Warp size: %d\n", device.warpSize);
|
||||
printf(" Maximum memory pitch: %lld\n", device.memPitch);
|
||||
printf(" Maximum threads per block: %d\n", device.maxThreadsPerBlock);
|
||||
printf(" Maximum 3D texture dimensions: [%d %d %d]\n", device.maxTexture3D[0], device.maxTexture3D[1], device.maxTexture3D[2]);
|
||||
for (int i = 0; i < 3; ++i)
|
||||
printf(" Maximum dimension %d of block: %lld\n", i, device.maxThreadsDim[i]);
|
||||
for (int i = 0; i < 3; ++i)
|
||||
printf(" Maximum dimension %d of grid: %lld\n", i, device.maxGridSize[i]);
|
||||
printf(" Clock rate: %d\n", device.clockRate);
|
||||
printf(" Total constant memory: %u\n", device.totalConstMem);
|
||||
printf(" Texture alignment: %u\n", device.textureAlignment);
|
||||
printf(" Concurrent copy and execution: %s\n", (device.deviceOverlap ? "Yes" : "No"));
|
||||
printf(" Number of multiprocessors: %d\n", device.multiProcessorCount);
|
||||
printf(" Kernel execution timeout: %s\n\n", (device.kernelExecTimeoutEnabled ? "Yes" : "No"));
|
||||
printf(" Maximum 3D texture dimensions: [%d %d %d]\n", device.maxTexture3D[0], device.maxTexture3D[1], device.maxTexture3D[2]);
|
||||
printf(" Maximum width, height, and depth for a 3D surface reference bound to a CUDA array: [%d %d %d]\n", device.maxSurface3D[0], device.maxSurface3D[1], device.maxSurface3D[2]);
|
||||
#endif
|
||||
|
||||
deviceProperties.push_back(device); // Add element at the end of the vector outputProb
|
||||
|
||||
// printf(" Maximum memory pitch: %lld\n", device.memPitch);
|
||||
// printf(" Texture alignment: %u\n", device.textureAlignment);
|
||||
// printf(" Texture Pitch alignment: %u\n", device.texturePitchAlignment);
|
||||
|
||||
// Determine minimum supported Surface size. Dependent on device.maxTexture3D[2] and device.maxSurface3D[2]
|
||||
maxSurfaceTexture3DDimension = (device.maxTexture3D[2]<device.maxSurface3D[2]) ? device.maxTexture3D[2]:device.maxSurface3D[2];
|
||||
//printf("DEVICE => maxSurfaceTexture3DDimension = %d (device.maxTexture3D[2] = %d - device.maxSurface3D[2] = %d)\n", maxSurfaceTexture3DDimension, device.maxTexture3D[2], device.maxSurface3D[2]); // Set maximum Size of Texture
|
||||
|
||||
//Set the maximal used number of SOS-ZLayers, dependend on SAFT_VARIANT-Parameter 3DVolumeInterpolationAtReconstruction (=3)
|
||||
// Set the maximal used number of SOS-ZLayers, dependend on SAFT_VARIANT-Parameter 3DVolumeInterpolationAtReconstruction (=3)
|
||||
|
||||
//按照目前配置必定 maxFeasibleSosZLayerCount = 2;!!!
|
||||
switch (SAFT_VARIANT[SAFT_VARIANT_3DVolumeInterpolationAtReconstruction])
|
||||
{
|
||||
case 0: // Mit Textur -> 1ne SOS-ZLayer
|
||||
maxFeasibleSosZLayerCount = 1;
|
||||
break;
|
||||
case 1: // Mit Textur & Interpolation -> 2 SOS-ZLayer
|
||||
maxFeasibleSosZLayerCount = 2;
|
||||
break;
|
||||
}
|
||||
#ifdef debug_OutputVariables
|
||||
printf( "Set maxFeasibleSosZLayerCount = %u\n", maxFeasibleSosZLayerCount);
|
||||
#endif
|
||||
{
|
||||
case 0: // Mit Textur -> 1ne SOS-ZLayer
|
||||
maxFeasibleSosZLayerCount = 1;
|
||||
break;
|
||||
case 1: // Mit Textur & Interpolation -> 2 SOS-ZLayer
|
||||
maxFeasibleSosZLayerCount = 2;
|
||||
break;
|
||||
}
|
||||
|
||||
//printf( "AScan Blockgroesse (aScanCount)= %i\n", aScanCount);
|
||||
//printf( "maxSurfaceTexture3DDimension= %i\n", maxSurfaceTexture3DDimension);
|
||||
maxAscanIndexArraysInTexture = maxSurfaceTexture3DDimension / maxFeasibleSosZLayerCount; // Max Anzahl der Ascans in einer Teiltabelle (1024)
|
||||
|
||||
// Fuer Ascan-Index-Varainte von SAFT werden mehrere Texturen benoetigt, da die Anzahl der Z_layer limitiert ist.
|
||||
// Um 3D-Interpolation zu ermoeglichen muessen jeweils 2 Z-Layer pro A-Scan vorhanden sein.
|
||||
// --> 2*nAscans < maxSurfaceTexture3DDimension(Fermi & Kepler: 2048) ==> maximal 1024 Em/Rec - Kombinationen koennen in einem Surface/Textur gespeichert werden
|
||||
// maxSurfaceTexture3DDimension = maximale Groesse die erlaubt ist (2048)
|
||||
// TableAscanIndexAllocationCount = Anzahl der TeilSurfaces ==> auch Anzahl der benoetigten Durchlaeufe (aktuell 4 Texturen)
|
||||
// maxFeasibleSosZLayerCount = Anzahl der SoS-Zlayer die gleichzeitig im Speicher pro EM/REC-Kombi vorgehalten werden (1 oder 2 bei Interpolierten Variante)
|
||||
// maxAscanIndexArraysInTexture = Anzahl der Ascans in einer Teiltabelle (1024)
|
||||
// maxSupportedTexturesForAscanIndex = MAX_SUPPORTEDTEXTURES_FORASCANINDEX (=4) // Definiert die aktuell maximal unterstuetzen Texturen im Code fuer AscanIndex
|
||||
// neededAscanBatchCount = Anzahl an benoetigten Durchlaeufe des SAFTs um alle Ascans abarbeiten zu koennen
|
||||
// memoryCheck(); // Freier Speicher am Anfang ausgeben
|
||||
|
||||
maxAscanIndexArraysInTexture = maxSurfaceTexture3DDimension/maxFeasibleSosZLayerCount; // Max Anzahl der Ascans in einer Teiltabelle (1024)
|
||||
// if ((strcmp(device.name, "GeForce GTX 690") == 0)||(strcmp(device.name, "GeForce GTX 590") == 0)){
|
||||
if (memoryGPUfree() <= 2500000000)
|
||||
{ // IF GPUMemory < 2.5GB only 1 Surface can be used
|
||||
maxSupportedTexturesForAscanIndex = 1;
|
||||
}
|
||||
|
||||
neededAscanBatchCount = ceil((float)aScanCount / (maxSurfaceTexture3DDimension / maxFeasibleSosZLayerCount) / maxSupportedTexturesForAscanIndex);
|
||||
// Determine amount of PartSurfaces
|
||||
if (neededAscanBatchCount > 1)
|
||||
{
|
||||
TableAscanIndexAllocationCount = maxSupportedTexturesForAscanIndex; // Wenn mehr als ein Durlauf nötig --> so viele wie möglich nutzen
|
||||
}
|
||||
else
|
||||
{
|
||||
TableAscanIndexAllocationCount = (int)ceil((float)aScanCount / (maxAscanIndexArraysInTexture)); // Wenn nur ein Durlauf nötig --> so wenige wie nötig nutzen
|
||||
}
|
||||
|
||||
#ifdef debug_OutputAScanIndexMemoryDivision
|
||||
printf("%s :\n", device.name);
|
||||
printf(" Total memory %lld Bytes\n", memoryGPUtotal() );
|
||||
printf(" Free memory %lld Bytes\n", memoryGPUfree() );
|
||||
printf(" => Used memory %lld Bytes\n", (memoryGPUtotal()-memoryGPUfree()));
|
||||
#endif
|
||||
//memoryCheck(); // Freier Speicher am Anfang ausgeben
|
||||
// Set Block and Grid-Dimensions for GPU Threads
|
||||
genericSAFTBlockDimensions = fixedBlockDimensions; // fixedBlockDimensions = Parameter BlockDim_XYZ
|
||||
genericSAFTGridDimensions =
|
||||
dim3((IMAGE_SIZE_XYZ.x + genericSAFTBlockDimensions.x - 1) / genericSAFTBlockDimensions.x, // hier wird aufgerundet! Wenn ungerade Aufloesung nicht genau
|
||||
(IMAGE_SIZE_XYZ.y + genericSAFTBlockDimensions.y - 1) / genericSAFTBlockDimensions.y, // in Blockgroesse geteilt werden kann, muss ein weiterer
|
||||
(IMAGE_SIZE_XYZ.z + genericSAFTBlockDimensions.z - 1) / genericSAFTBlockDimensions.z // Block berechnet werden. Wenn insgesamt zu viele werden sie im Kernel aussortiert.
|
||||
);
|
||||
|
||||
// if ((strcmp(device.name, "GeForce GTX 690") == 0)||(strcmp(device.name, "GeForce GTX 590") == 0)){
|
||||
if (memoryGPUfree() <= 2500000000){ // IF GPUMemory < 2.5GB only 1 Surface can be used
|
||||
maxSupportedTexturesForAscanIndex = 1;
|
||||
#ifdef debug_OutputAScanIndexMemoryDivision
|
||||
printf("Free GPU Memory: %lld < 2.5GB\n --> reduce maxSupportedTexturesForAscanIndex 4 -> %i \n", memoryGPUfree(), maxSupportedTexturesForAscanIndex );
|
||||
//printf("GeForce GTX 690/590 \n --> reduce maxSupportedTexturesForAscanIndex 4 -> %i \n", maxSupportedTexturesForAscanIndex );
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef debug_OutputAScanIndexMemoryDivision
|
||||
printf( "--> maxSupportedTexturesForAscanIndex %i \n", maxSupportedTexturesForAscanIndex);
|
||||
//printf( "--> TableAscanIndexAllocationCount %i \n", TableAscanIndexAllocationCount);
|
||||
#endif
|
||||
|
||||
neededAscanBatchCount = ceil((float)aScanCount/(maxSurfaceTexture3DDimension/maxFeasibleSosZLayerCount)/maxSupportedTexturesForAscanIndex);
|
||||
#ifdef debug_OutputAScanIndexMemoryDivision
|
||||
printf("aScanCount %i -> neededAscanBatchCount = %i!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n", aScanCount, neededAscanBatchCount);
|
||||
|
||||
//printf("totalGlobalMem = %lld Byte\n", device.totalGlobalMem);
|
||||
//printf("multiProcessorCount = %i MultiProcessors\n", device.multiProcessorCount);
|
||||
#endif
|
||||
|
||||
// Determine amount of PartSurfaces
|
||||
if (neededAscanBatchCount > 1){
|
||||
TableAscanIndexAllocationCount = maxSupportedTexturesForAscanIndex; // Wenn mehr als ein Durlauf nötig --> so viele wie möglich nutzen
|
||||
} else {
|
||||
TableAscanIndexAllocationCount = (int)ceil((float)aScanCount/(maxAscanIndexArraysInTexture)); // Wenn nur ein Durlauf nötig --> so wenige wie nötig nutzen
|
||||
}
|
||||
|
||||
|
||||
|
||||
// Set Block and Grid-Dimensions for GPU Threads
|
||||
genericSAFTBlockDimensions = fixedBlockDimensions; // fixedBlockDimensions = Parameter BlockDim_XYZ
|
||||
genericSAFTGridDimensions = dim3(
|
||||
(IMAGE_SIZE_XYZ.x + genericSAFTBlockDimensions.x-1)/ genericSAFTBlockDimensions.x, // hier wird aufgerundet! Wenn ungerade Aufloesung nicht genau
|
||||
(IMAGE_SIZE_XYZ.y + genericSAFTBlockDimensions.y-1)/ genericSAFTBlockDimensions.y, // in Blockgroesse geteilt werden kann, muss ein weiterer
|
||||
(IMAGE_SIZE_XYZ.z + genericSAFTBlockDimensions.z-1)/ genericSAFTBlockDimensions.z // Block berechnet werden. Wenn insgesamt zu viele werden sie im Kernel aussortiert.
|
||||
);
|
||||
#ifdef debug_OutputVariables
|
||||
printf( "genericSAFTBlockDimensions X,Y,Z = (%i %i %i)\n",genericSAFTBlockDimensions.x, genericSAFTBlockDimensions.y, genericSAFTBlockDimensions.z);
|
||||
printf( "genericSAFTGridDimensions X,Y,Z = (%i %i %i)\n",genericSAFTGridDimensions.x, genericSAFTGridDimensions.y, genericSAFTGridDimensions.z);
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
// Outputsize of SOS Volume
|
||||
// Outputsize of SOS Volume
|
||||
SOSVolume_VoxelCount = SOSGrid_XYZ.x * SOSGrid_XYZ.y * SOSGrid_XYZ.z;
|
||||
SOSVolume_Bytes = SOSVolume_VoxelCount * sizeof(float);
|
||||
#ifdef debug_OutputVariables
|
||||
printf(" SOSVolume_VoxelCount [%ix%ix%i] = %i\n", SOSGrid_XYZ.x, SOSGrid_XYZ.y, SOSGrid_XYZ.z, SOSVolume_VoxelCount);
|
||||
printf(" SOSVolume_Bytes = SOSVolumeVoxelCount(%i) x sizeof(float = 4) = %i\n", SOSVolume_VoxelCount, SOSVolume_Bytes);
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
// Warn if Outputsize of Volume is too big for 32Bit Sytems
|
||||
outputVolume_VoxelCount = (uint64_t)IMAGE_SIZE_XYZ.x * (uint64_t)IMAGE_SIZE_XYZ.y * (uint64_t)IMAGE_SIZE_XYZ.z; // Anzahl der Voxel im Volumen
|
||||
outputVolume_Bytes = outputVolume_VoxelCount * sizeof(double); // Speicherbedarf fuer alle Voxel im Volumen
|
||||
outputVolume_VoxelCount = (uint64_t)IMAGE_SIZE_XYZ.x * (uint64_t)IMAGE_SIZE_XYZ.y * (uint64_t)IMAGE_SIZE_XYZ.z; // Anzahl der Voxel im Volumen
|
||||
outputVolume_Bytes = outputVolume_VoxelCount * sizeof(double); // Speicherbedarf fuer alle Voxel im Volumen
|
||||
|
||||
#ifdef debug_OutputVariables
|
||||
printf(" outputVolume_VoxelCount [%ix%ix%i]= %lld\n",IMAGE_SIZE_XYZ.x, IMAGE_SIZE_XYZ.y, IMAGE_SIZE_XYZ.z, outputVolume_VoxelCount);
|
||||
printf(" outputVolume_Bytes [%lld x sizeof(double = 8)] = %lld\n", outputVolume_VoxelCount, outputVolume_Bytes);
|
||||
#endif
|
||||
aScan_Bytes = aScanLength * sizeof(float);
|
||||
aScanBatch_Bytes = aScanCount * aScan_Bytes;
|
||||
|
||||
//Hier auf maximale Outputgroesse von 32-BitSystem ueberpruefen --> falls Probleme mit 32-Bitsystemen hier noch Abfrage und Abbruch implementieren
|
||||
//if (outputVolume_VoxelCount > 536870912) // 536870912 = 2^32 / sizeof(double)
|
||||
// std::cout << "outputVolume_Bytes > 2^32 the upper limit of unsigned integer!!!\n => Reconstruction only in 64-Bit Systems";
|
||||
|
||||
//Groesse der Datenbloecke fuer die Blockverarbeitung wird mit aScanCount angegeben
|
||||
#ifdef debug_OutputVariables
|
||||
printf( "AScan Blockgroesse (aScanCount)= %i\n", aScanCount);
|
||||
#endif
|
||||
aScan_Bytes = aScanLength * sizeof(float);
|
||||
aScanBatch_Bytes = aScanCount * aScan_Bytes;
|
||||
#ifdef debug_OutputVariables
|
||||
printf( "aScan_Bytes = aScanLength(%i) * sizeof(float=4) = %i\n", aScanLength, aScan_Bytes);
|
||||
printf( "aScanCount = %i\n", aScanCount);
|
||||
printf( "aScanBatch_Bytes = aScanCount * aScan_Bytes ( = %i * sizeof(float)) = %i\n", aScanLength, aScanBatch_Bytes);
|
||||
#endif
|
||||
|
||||
#ifdef debug_OutputInfo
|
||||
printf("\nParameter for Image Reconstruction\n");
|
||||
printf( "========================================================================\n");
|
||||
printf( "IMAGE_SIZE_XYZ: [%i x %i x %i]\n", IMAGE_SIZE_XYZ.x, IMAGE_SIZE_XYZ.y, IMAGE_SIZE_XYZ.z);
|
||||
printf( "outputVolume_VoxelCount: %lld\n", outputVolume_VoxelCount);
|
||||
printf( "Increment vector/Resolution: %f\n", IMAGE_RESOLUTION);
|
||||
printf( "IMAGE_STARTPOINT in m: [%f x %f x %f]\n", regionOfInterestOffset.x, regionOfInterestOffset.y, regionOfInterestOffset.z);
|
||||
outputVolume_size_m.x = IMAGE_SIZE_XYZ.x * IMAGE_RESOLUTION;
|
||||
outputVolume_size_m.y = IMAGE_SIZE_XYZ.y * IMAGE_RESOLUTION;
|
||||
outputVolume_size_m.z = IMAGE_SIZE_XYZ.z * IMAGE_RESOLUTION;
|
||||
printf( "Volume Size in m: [%f x %f x %f]\n", outputVolume_size_m.x, outputVolume_size_m.y, outputVolume_size_m.z);
|
||||
printf( "aScanCount: %i\n", aScanCount);
|
||||
printf( "========================================================================\n\n");
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef debug_OutputPerformance
|
||||
struct timeval startProcessAscans, stopProcessAscans;
|
||||
gettimeofday(&startProcessAscans, NULL);
|
||||
#endif
|
||||
|
||||
//perform processing with AScan-Data
|
||||
//===========================================================================================================
|
||||
//===========================================================================================================
|
||||
ullong duration;
|
||||
processAScans(duration);
|
||||
//===========================================================================================================
|
||||
//===========================================================================================================
|
||||
|
||||
#ifdef debug_OutputPerformance
|
||||
diff_time = (double)((stopProcessAscans.tv_sec * 1000000.0 + stopProcessAscans.tv_usec) - (startProcessAscans.tv_sec * 1000000.0 + startProcessAscans.tv_usec));
|
||||
printf ("########################################################################\n");
|
||||
printf ("### GPU (%18s: HW-ID %i, Idx %i) ### Free Memory = %4.0f µs\n", deviceProperties[deviceId].name, deviceId, deviceIndex, diff_time);
|
||||
printf ("########################################################################\n");
|
||||
#endif
|
||||
|
||||
Duration_ptr[(deviceIndex+1)] = (double)duration; // Für jede GPU einen Laufzeitwert in µs übermitteln, Angabe von Reihenfolge der angegebenen GPU-IDs abhaengig
|
||||
|
||||
#ifdef debug_OutputVariables
|
||||
printf( " GPU (%s:ID %i,Index %i): => Duration_ptr[%i] = duration(%i µs) = %.2f s\n", device.name, deviceId, deviceIndex, (deviceIndex+1), duration, Duration_ptr[(deviceIndex+1)]/1000/1000);
|
||||
#endif
|
||||
|
||||
#ifdef debug_OutputInfo
|
||||
printf("Reset Device\n"); // Reset Device
|
||||
#endif
|
||||
// CUDA_CHECK(cudaDeviceReset()); // news 2019 commented, see above reason.
|
||||
|
||||
#ifdef debug_OutputFunctions
|
||||
printf( "<== SAFTHandler::performReconstruction - End\n");
|
||||
#endif
|
||||
// perform processing with AScan-Data
|
||||
//===========================================================================================================
|
||||
//===========================================================================================================
|
||||
ullong duration;
|
||||
processAScans(duration);
|
||||
//===========================================================================================================
|
||||
//===========================================================================================================
|
||||
|
||||
Duration_ptr[(deviceIndex + 1)] = (double)duration; // Für jede GPU einen Laufzeitwert in µs übermitteln, Angabe von Reihenfolge der angegebenen GPU-IDs abhaengig
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -402,96 +196,42 @@ void SAFTHandler::performReconstruction()
|
||||
- Der SAFT Kernel erwartet Argumente in den die Grid Dimension auf drei Dimensionen reduziert wurde und die Block-Dimensionen auf nur eine Dimension reduziert ist.
|
||||
- Das haengt auch von den Eigenschaften der verfuegbaren HW ab (shader model)
|
||||
*/
|
||||
void SAFTHandler::reduceKernelDimensions(
|
||||
dim3 const & gridDimensions, ///< Input grid dimensions.
|
||||
dim3 const & blockDimensions, ///< Input block dimensions.
|
||||
dim3 & reducedGridDimensions, ///< Reduced output grid dimensions.
|
||||
dim3 & reducedBlockDimensions ///< Reduced output block dimensions.
|
||||
)
|
||||
void SAFTHandler::reduceKernelDimensions(dim3 const &gridDimensions, ///< Input grid dimensions.
|
||||
dim3 const &blockDimensions, ///< Input block dimensions.
|
||||
dim3 &reducedGridDimensions, ///< Reduced output grid dimensions.
|
||||
dim3 &reducedBlockDimensions ///< Reduced output block dimensions.
|
||||
)
|
||||
{
|
||||
|
||||
#ifdef debug_OutputFunctions
|
||||
printf( "==> SAFTHandler::reduceKernelDimensions - Start\n");
|
||||
#endif
|
||||
|
||||
|
||||
if(deviceProperties[deviceId].maxGridSize[2] > 1)
|
||||
if (deviceProperties[deviceId].maxGridSize[2] > 1)
|
||||
{
|
||||
reducedGridDimensions = gridDimensions;
|
||||
#ifdef debug_OutputParameter
|
||||
printf( "reducedGridDimensions X,Y,Z = (%i %i %i)\n",reducedGridDimensions.x, reducedGridDimensions.y, reducedGridDimensions.z);
|
||||
#endif
|
||||
}
|
||||
else
|
||||
{
|
||||
reducedGridDimensions = dim3(
|
||||
gridDimensions.x * gridDimensions.y,
|
||||
gridDimensions.z,
|
||||
1
|
||||
);
|
||||
#ifdef debug_OutputParameter
|
||||
printf( "reducedGridDimensions X,Y,Z = (%i %i %i)\n",reducedGridDimensions.x, reducedGridDimensions.y, reducedGridDimensions.z);
|
||||
#endif
|
||||
reducedGridDimensions = dim3(gridDimensions.x * gridDimensions.y, gridDimensions.z, 1);
|
||||
}
|
||||
|
||||
reducedBlockDimensions = dim3(blockDimensions.x * blockDimensions.y * blockDimensions.z);
|
||||
#ifdef debug_OutputParameter
|
||||
printf( "reducedBlockDimensions X,Y,Z = (%i %i %i)\n", reducedBlockDimensions.x, reducedBlockDimensions.y, reducedBlockDimensions.z);
|
||||
#endif
|
||||
|
||||
#ifdef debug_OutputFunctions
|
||||
printf( "<== SAFTHandler::reduceKernelDimensions - End\n");
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
Determine free memory available on the current device.
|
||||
*/
|
||||
std::size_t memoryGPUfree()
|
||||
{
|
||||
#ifdef debug_OutputFunctions
|
||||
printf( "==> memoryGPUfree - Start\n");
|
||||
#endif
|
||||
|
||||
std::size_t
|
||||
totalMemory,
|
||||
freeMemory;
|
||||
std::size_t totalMemory, freeMemory;
|
||||
CUDA_CHECK(cudaMemGetInfo(&freeMemory, &totalMemory));
|
||||
|
||||
// printf(" Total memory %lld Bytes\n", totalMemory);
|
||||
// printf(" Free memory %lld Bytes\n", freeMemory);
|
||||
// printf(" => Used memory %lld Bytes\n", (totalMemory-freeMemory));
|
||||
|
||||
#ifdef debug_OutputFunctions
|
||||
printf( "<== memoryGPUfree - End\n");
|
||||
#endif
|
||||
return freeMemory;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
Determine free memory available on the current device.
|
||||
*/
|
||||
std::size_t memoryGPUtotal()
|
||||
{
|
||||
#ifdef debug_OutputFunctions
|
||||
printf( "==> current - Start\n");
|
||||
#endif
|
||||
|
||||
std::size_t
|
||||
totalMemory,
|
||||
freeMemory;
|
||||
std::size_t totalMemory, freeMemory;
|
||||
CUDA_CHECK(cudaMemGetInfo(&freeMemory, &totalMemory));
|
||||
|
||||
// printf(" Total memory %lld Bytes\n", totalMemory);
|
||||
// printf(" Free memory %lld Bytes\n", freeMemory);
|
||||
// printf(" => Used memory %lld Bytes\n", (totalMemory-freeMemory));
|
||||
|
||||
#ifdef debug_OutputFunctions
|
||||
printf( "<== current - End\n");
|
||||
#endif
|
||||
return totalMemory;
|
||||
}
|
||||
|
||||
@@ -501,111 +241,125 @@ std::size_t memoryGPUtotal()
|
||||
*/
|
||||
void memoryCheck()
|
||||
{
|
||||
#ifdef debug_OutputFunctions
|
||||
printf( "==> memoryCheck - Start\n");
|
||||
#endif
|
||||
#ifdef debug_OutputFunctions
|
||||
printf("==> memoryCheck - Start\n");
|
||||
#endif
|
||||
|
||||
std::size_t
|
||||
totalMemory,
|
||||
freeMemory;
|
||||
float check;
|
||||
std::size_t totalMemory, freeMemory;
|
||||
float check;
|
||||
CUDA_CHECK(cudaMemGetInfo(&freeMemory, &totalMemory));
|
||||
// totalMemory
|
||||
check = 1024.0f * 1024.0f * 1024.0f * 1024.0f;
|
||||
if (totalMemory >= check)
|
||||
{
|
||||
printf(" Total memory %.3f TB\n", totalMemory / check);
|
||||
}
|
||||
else
|
||||
{
|
||||
check /= 1024.0f;
|
||||
if (totalMemory >= check)
|
||||
{
|
||||
printf(" Total memory %.3f GB\n", totalMemory / check);
|
||||
}
|
||||
else
|
||||
{
|
||||
check /= 1024.0f;
|
||||
if (totalMemory >= check)
|
||||
{
|
||||
printf(" Total memory %.3f MB\n", totalMemory / check);
|
||||
}
|
||||
else
|
||||
{
|
||||
check /= 1024.0f;
|
||||
if (totalMemory >= check)
|
||||
{
|
||||
printf(" Total memory %.3f kB\n", totalMemory / check);
|
||||
}
|
||||
else
|
||||
{
|
||||
check /= 1024.0f;
|
||||
if (totalMemory >= check)
|
||||
printf(" Total memory %.3f Bytes\n", totalMemory / check);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//#if defined(debug_OutputInfo) || defined(debug_OutputMaxMemory)
|
||||
// printf(" Total memory %lld Bytes\n", totalMemory);
|
||||
// printf(" Free memory %lld Bytes\n", freeMemory);
|
||||
// printf(" => Used memory %lld Bytes\n", (totalMemory-freeMemory));
|
||||
|
||||
|
||||
// totalMemory
|
||||
check = 1024.0f*1024.0f*1024.0f*1024.0f;
|
||||
if (totalMemory >= check){
|
||||
printf(" Total memory %.3f TB\n", totalMemory/check);
|
||||
} else {
|
||||
check /= 1024.0f;
|
||||
if (totalMemory >= check){
|
||||
printf(" Total memory %.3f GB\n", totalMemory/check);
|
||||
} else {
|
||||
check /= 1024.0f;
|
||||
if (totalMemory >= check){
|
||||
printf(" Total memory %.3f MB\n", totalMemory/check);
|
||||
} else {
|
||||
check /= 1024.0f;
|
||||
if (totalMemory >= check){
|
||||
printf(" Total memory %.3f kB\n", totalMemory/check);
|
||||
} else {
|
||||
check /= 1024.0f;
|
||||
if (totalMemory >= check)
|
||||
printf(" Total memory %.3f Bytes\n", totalMemory/check);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// freeMemory
|
||||
check = 1024.0f*1024.0f*1024.0f*1024.0f;
|
||||
if (freeMemory >= check){
|
||||
printf(" Free memory %.3f TB\n", freeMemory/check);
|
||||
} else {
|
||||
check /= 1024.0f;
|
||||
if (freeMemory >= check){
|
||||
printf(" Free memory %.3f GB\n", freeMemory/check);
|
||||
} else {
|
||||
check /= 1024.0f;
|
||||
if (freeMemory >= check){
|
||||
printf(" Free memory %.3f MB\n", freeMemory/check);
|
||||
} else {
|
||||
check /= 1024.0f;
|
||||
if (freeMemory >= check){
|
||||
printf(" Free memory %.3f kB\n", freeMemory/check);
|
||||
} else {
|
||||
check /= 1024.0f;
|
||||
if (freeMemory >= check)
|
||||
printf(" Free memory %.3f Bytes\n", freeMemory/check);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Used Memory
|
||||
check = 1024.0f*1024.0f*1024.0f*1024.0f;
|
||||
if ((totalMemory-freeMemory) >= check){
|
||||
printf(" Used memory %.3f TB\n", (totalMemory-freeMemory)/check);
|
||||
} else {
|
||||
check /= 1024.0f;
|
||||
if ((totalMemory-freeMemory) >= check){
|
||||
printf(" Used memory %.3f GB\n", (totalMemory-freeMemory)/check);
|
||||
} else {
|
||||
check /= 1024.0f;
|
||||
if ((totalMemory-freeMemory) >= check){
|
||||
printf(" Used memory %.3f MB\n", (totalMemory-freeMemory)/check);
|
||||
} else {
|
||||
check /= 1024.0f;
|
||||
if ((totalMemory-freeMemory) >= check){
|
||||
printf(" Used memory %.3f kB\n", (totalMemory-freeMemory)/check);
|
||||
} else {
|
||||
check /= 1024.0f;
|
||||
if ((totalMemory-freeMemory) >= check)
|
||||
printf(" Used memory %.3f Bytes\n", (totalMemory-freeMemory)/check);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
//#endif
|
||||
|
||||
#ifdef debug_OutputFunctions
|
||||
printf( "<== memoryCheck - End\n");
|
||||
#endif
|
||||
// freeMemory
|
||||
check = 1024.0f * 1024.0f * 1024.0f * 1024.0f;
|
||||
if (freeMemory >= check)
|
||||
{
|
||||
printf(" Free memory %.3f TB\n", freeMemory / check);
|
||||
}
|
||||
else
|
||||
{
|
||||
check /= 1024.0f;
|
||||
if (freeMemory >= check)
|
||||
{
|
||||
printf(" Free memory %.3f GB\n", freeMemory / check);
|
||||
}
|
||||
else
|
||||
{
|
||||
check /= 1024.0f;
|
||||
if (freeMemory >= check)
|
||||
{
|
||||
printf(" Free memory %.3f MB\n", freeMemory / check);
|
||||
}
|
||||
else
|
||||
{
|
||||
check /= 1024.0f;
|
||||
if (freeMemory >= check)
|
||||
{
|
||||
printf(" Free memory %.3f kB\n", freeMemory / check);
|
||||
}
|
||||
else
|
||||
{
|
||||
check /= 1024.0f;
|
||||
if (freeMemory >= check)
|
||||
printf(" Free memory %.3f Bytes\n", freeMemory / check);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Used Memory
|
||||
check = 1024.0f * 1024.0f * 1024.0f * 1024.0f;
|
||||
if ((totalMemory - freeMemory) >= check)
|
||||
{
|
||||
printf(" Used memory %.3f TB\n", (totalMemory - freeMemory) / check);
|
||||
}
|
||||
else
|
||||
{
|
||||
check /= 1024.0f;
|
||||
if ((totalMemory - freeMemory) >= check)
|
||||
{
|
||||
printf(" Used memory %.3f GB\n", (totalMemory - freeMemory) / check);
|
||||
}
|
||||
else
|
||||
{
|
||||
check /= 1024.0f;
|
||||
if ((totalMemory - freeMemory) >= check)
|
||||
{
|
||||
printf(" Used memory %.3f MB\n", (totalMemory - freeMemory) / check);
|
||||
}
|
||||
else
|
||||
{
|
||||
check /= 1024.0f;
|
||||
if ((totalMemory - freeMemory) >= check)
|
||||
{
|
||||
printf(" Used memory %.3f kB\n", (totalMemory - freeMemory) / check);
|
||||
}
|
||||
else
|
||||
{
|
||||
check /= 1024.0f;
|
||||
if ((totalMemory - freeMemory) >= check)
|
||||
printf(" Used memory %.3f Bytes\n", (totalMemory - freeMemory) / check);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
/**
|
||||
Generic CUDA call wrapper.
|
||||
Check the result of a CUDA operation and throw an exception if an error occurred.
|
||||
@@ -614,21 +368,19 @@ void memoryCheck()
|
||||
- <20>berpr<70>ft die Ergebnisse einer CUDA Operation und wirft eine Exception wenn ein Fehler auftritt
|
||||
- Das wird wird mit einer Kombination mit einem Makro in saft.hpp genutzt.
|
||||
*/
|
||||
//inline // Da performCUDAResultCheck in allen Files genutzt werden soll funktioniert inline und etern nicht zusammen
|
||||
void performCUDAResultCheck(
|
||||
cudaError_t result, ///< Result of the CUDA operation.
|
||||
std::string const & file, ///< Path to the source code file.
|
||||
int line ///< Line within the source code
|
||||
)
|
||||
// inline // Da performCUDAResultCheck in allen Files genutzt werden soll funktioniert inline und etern nicht zusammen
|
||||
void performCUDAResultCheck(cudaError_t result, ///< Result of the CUDA operation.
|
||||
std::string const &file, ///< Path to the source code file.
|
||||
int line ///< Line within the source code
|
||||
)
|
||||
{
|
||||
if(result != cudaSuccess)
|
||||
if (result != cudaSuccess)
|
||||
{
|
||||
//printf("A CUDA operation failed in file \"%s\" (line %i): %s \n", file, line, cudaGetErrorString(result).c_str() );
|
||||
printf("%s\n", cudaGetErrorString( cudaGetLastError() ) );
|
||||
// printf("A CUDA operation failed in file \"%s\" (line %i): %s \n", file, line, cudaGetErrorString(result).c_str() );
|
||||
printf("%s\n", cudaGetErrorString(cudaGetLastError()));
|
||||
|
||||
//std::string errorMessage = "A CUDA operation failed in file \"" + file + "\" (line " + ail::number_to_string(line) + "): " + std::string(cudaGetErrorString(result));
|
||||
//std::cout << errorMessage << std::endl;
|
||||
// std::string errorMessage = "A CUDA operation failed in file \"" + file + "\" (line " + ail::number_to_string(line) + "): " + std::string(cudaGetErrorString(result));
|
||||
// std::cout << errorMessage << std::endl;
|
||||
printf("-> Error occurred");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1,16 +0,0 @@
|
||||
#include <iostream>
|
||||
|
||||
#include "saft.hpp"
|
||||
|
||||
/*!
|
||||
This is the central CUDA file which really just includes the other modules.
|
||||
This is done because CUDA does not support external references for referencing data from other compilation units.
|
||||
- Dies ist das zentrale CUDA-File welches nur die anderen Module einbindet
|
||||
- Das wird gemacht, weil CUDA keine externen Referenzen unterst<73>tzt, um Daten von anderen Compilierungs Einheiten zu referenzieren.
|
||||
*/
|
||||
|
||||
#include "kernel/constantMemory.cuh" // Deklaration der Daten, die sich im Constant-Memory befinden Geometriedaten
|
||||
#include "kernel/rayTracing.cuh" // GPU-Code für Bresenham
|
||||
#include "kernel/precalculateSpeedOfSoundKernel.cuh" // GPU-Code Partitionierung für Bresenham. Ruft den Bresenham auf.
|
||||
#include "kernel/saftKernel.cuh" // GPU-Kernel für SAFT
|
||||
|
||||
@@ -1,154 +1,88 @@
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
|
||||
#include <cuda.h>
|
||||
#include <cuda_runtime.h>
|
||||
#include <cuda_runtime_api.h>
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdio.h> // standard input/output
|
||||
#include <vector> // stl vector header
|
||||
#include <stdio.h> // standard input/output
|
||||
|
||||
#include <string>
|
||||
#include <vector> // stl vector header
|
||||
|
||||
typedef unsigned char uchar;
|
||||
typedef unsigned char uchar;
|
||||
typedef unsigned short ushort;
|
||||
typedef unsigned long ulong;
|
||||
typedef unsigned long long ullong;
|
||||
|
||||
|
||||
//Define Outputs for Debugmode
|
||||
//============================
|
||||
//#define debug_OutputFormat_German // German Format , instead . for numbers
|
||||
//#define debug_OutputFunctions // Funktionenaufrufe ausgeben
|
||||
//#define debug_OutputVariables // Werte der Variablen ausgeben
|
||||
//#define debug_OutputParameter // Uebersicht der Eingabedaten anzeigen sowie Infobloeke in den einzelnen Schritten
|
||||
//#define debug_OutputMemory // Speicherverwaltung, Malloc, Free, Groessen
|
||||
//#define debug_OutputMaxMemory // Gibt aktuellen Speicherverbrauch an, wenn memoryCheck aufgerufen wird
|
||||
//#define debug_OutputInfo // Gibt Infos zu Schritten, Variablen,... aus
|
||||
//#define debug_OutputPerformance // Gibt die Laufzeiten und die eizelnen Multi-GPU Performanzwerte von ProcessAscans aus
|
||||
//#define debug_OutputStepsPerformance // Gibt die Laufzeiten und für die einzelnen Schritte in performCoreReconstruction aus (Copy Ascans, Precalc, PerfCoreReconstruction, copy back)
|
||||
//#define debug_OutputStepsPrecalculation // Gibt Infos ueber die einzelnen Schritte der Precalculation Steps an
|
||||
//#define debug_OutputHostStepsPerformance // Gibt die Laufzeiten für die eizelnen Schritte auf dem HOST aus (Preintegrated Ascans)
|
||||
//#define debug_OutputSAFTHandlerThreadPerformance // Gibt die Gesamt-Laufzeiten der einzelnen Multi-GPU Threads aus
|
||||
//#define debug_OutputMultiGpu // Einteilung des Volumens auf mehrerer GPUs ausgeben
|
||||
//#define debug_OutputStreams // Gibt die Schritte der Berechnung der Streams aus
|
||||
//#define debug_OutputSOSPaths // Gibt die Schritte und Werte der SOSPfadberechnung aus
|
||||
//#define debug_OutputSOSStepsParameter // Einteilung der ZLayer in SOSZlayer
|
||||
//#define debug_OutputLookUpGeometryMemoryList // Debugausgabe fuer die LookUpGeometryMemoryList (Constant Memory)
|
||||
//#define debug_OutputAScanIndexMemoryDivision // Debugausgabe für die Einteilung in Surfaces da mehrere Surfaces benoetigt werden
|
||||
|
||||
//#define OutputVolume // Ausgabe des Volumens
|
||||
|
||||
// Debugging CUDA Kernels
|
||||
//================================================
|
||||
//#define debug_CudaSAFTKernelModes // Use variable debugMode for different calulations methods and output
|
||||
//#define debug_CudaSAFTKernel_EnableAnalyticAverageSpeedCalculation // Fuer Fehlerberchnungen
|
||||
//#define debug_CudaSAFTKernel
|
||||
//#define debug_CudaPrecalculateKernel
|
||||
//#define debug_CudaPrecalculateAscanIndexKernel // Kernel function for PrecalculateAscanIndex
|
||||
//#define debug_CudaPrecalculateAscanIndexKernelProxy // Proxy function for PrecalculateAscanIndex
|
||||
//#define debug_CudaFillCuArrayKernelProxy
|
||||
//#define debug_CudaSAFTAscanIndexKernel // Kernel function for SAFTAscanIndex
|
||||
//#define debug_CudaSAFTAscanIndexKernelDataAccess // Access and sum up values from Ascans
|
||||
//#define debug_CudaRayTraceKernel
|
||||
//#define debug_CudaRayTraceKernelLive
|
||||
|
||||
|
||||
//#define DebugSetMemoryToZero // Set SOSPathMemory to Zero as Initialisation
|
||||
|
||||
|
||||
// Define specific Hardware-Versions
|
||||
#define GTX_590
|
||||
//#define GTX_690
|
||||
//#define GTX_TITAN
|
||||
|
||||
#if defined(GTX_590)
|
||||
#define GTX_Fermi
|
||||
#endif
|
||||
#if defined(GTX_690) || defined(GTX_TITAN)
|
||||
#define GTX_Kepler
|
||||
#endif
|
||||
typedef unsigned long ulong;
|
||||
typedef unsigned long long ullong;
|
||||
|
||||
// Memory management of GPU and Errordetection
|
||||
//================================================
|
||||
//#define SaftNoTexture // Instead of TextureMemory use Memory access on GPU Device Memory // out-of-date
|
||||
//#define SaftCorrectSumAllAscan // Recalculation if too big values occur
|
||||
|
||||
|
||||
// SAFT-Implementierung 2-stufig mit AscanIndexInterpolation
|
||||
//============================================================
|
||||
#define SaftUseAscanIndexInterpolation
|
||||
#define noGeometryLoading
|
||||
//#define SaftUseAscanIndexInterpolation_PartWise // Kernel mit nur einem Durchlauf durchfuehren, sonst ueber Ascans im Kernel laufen
|
||||
#define AscanTextureUse1Float // Textur mit nur Float1 für SOS berechnen
|
||||
// #define SaftNoTexture // Instead of TextureMemory use Memory access on GPU Device Memory // out-of-date
|
||||
// #define SaftCorrectSumAllAscan // Recalculation if too big values occur
|
||||
|
||||
// #define SaftUseAscanIndexInterpolation_PartWise // Kernel mit nur einem Durchlauf durchfuehren, sonst ueber Ascans im Kernel laufen
|
||||
#define AscanTextureUse1Float // Textur mit nur Float1 für SOS berechnen
|
||||
|
||||
// Integration der A-scans im Vornherein durchfuehren um Samplebreite an zu rekonstruierende Aufloesung anzupassen
|
||||
//=======================================================================================================================
|
||||
#define preAscanIntegrationToMatchSamplerateToResolution // Integration der Ascans ueber Fensterbreite durchfuehren
|
||||
//#define debug_preAscanIntegration
|
||||
#define DebugSammleMin 2990 // Grenzen feeru Degbugausgabe der Werte
|
||||
#define DebugSammleMax 3000
|
||||
//#define preAscanIntegrationVersion1Michael // direkt übernommene Version von Michael
|
||||
#define preAscanIntegrationVersion2Ernst // korrigierte Variante mit genauerer Fensterbreite
|
||||
|
||||
#define preAscanIntegrationToMatchSamplerateToResolution // Integration der Ascans ueber Fensterbreite durchfuehren
|
||||
// #define debug_preAscanIntegration
|
||||
#define DebugSammleMin 2990 // Grenzen feeru Degbugausgabe der Werte
|
||||
#define DebugSammleMax 3000
|
||||
// #define preAscanIntegrationVersion1Michael // direkt übernommene Version von Michael
|
||||
#define preAscanIntegrationVersion2Ernst // korrigierte Variante mit genauerer Fensterbreite
|
||||
|
||||
// Parameter fuer SAFT-Kernel
|
||||
//=======================================================================================================================
|
||||
#define USED_ASCANSMEMORYREGIONS 1 // Anzahl der A-Scan-Speicherbereiche die alloziert werden
|
||||
// Hier reicht einer statt zwei! 2 nur nötig wenn A-scans Spückweise mit Streams fuer kopiert werden sollen.
|
||||
#define MAX_SUPPORTEDTEXTURES_FORASCANINDEX 4 // Definiert die im Code maximal unterstuetzen Texturen fuer AscanIndex;
|
||||
#define MAX_SUPPORTEDRECEIVER_FORSOSPATHTEXTURE 710 // Definiert maximale #Receiver in einem SOSPATH-Textur
|
||||
#define MATLABSAVETY_MB 25 // in MB. Matlab belegt zusaetzlich GPU-Speicher, der bei Grenzfällen zum absturz fuehren kann, daher zur Sicherheit Speicher freihalten
|
||||
#define USED_ASCANSMEMORYREGIONS \
|
||||
1 // Anzahl der A-Scan-Speicherbereiche die alloziert werden
|
||||
// Hier reicht einer statt zwei! 2 nur nötig wenn A-scans Spückweise mit Streams fuer kopiert werden sollen.
|
||||
#define MAX_SUPPORTEDTEXTURES_FORASCANINDEX 4 // Definiert die im Code maximal unterstuetzen Texturen fuer AscanIndex;
|
||||
#define MAX_SUPPORTEDRECEIVER_FORSOSPATHTEXTURE 710 // Definiert maximale #Receiver in einem SOSPATH-Textur
|
||||
#define MATLABSAVETY_MB 25 // in MB. Matlab belegt zusaetzlich GPU-Speicher, der bei Grenzfällen zum absturz fuehren kann, daher zur Sicherheit Speicher freihalten
|
||||
|
||||
#define SaftLinearInterpolation // Lineare Interpolation beim Zugriff auf A-scans durchführen
|
||||
|
||||
#define SaftLinearInterpolation // Lineare Interpolation beim Zugriff auf A-scans durchführen
|
||||
#define SaftUseConstantMemforGeometry // Geometriedaten im Constantmemory nutzen
|
||||
|
||||
#define SaftUseConstantMemforGeometry // Geometriedaten im Constantmemory nutzen
|
||||
// #define SaftTextureForEmRecSosPathsTablesFloat1 // Use Float1-Textur for loading SOS-Paths -> Sum, Count separated
|
||||
// #define SaftTextureForEmRecSosPathsTablesFloat2 // Use Float2-Textur for loading SOS-Paths -> Sum + Count for SOS for one position
|
||||
#define SaftTextureForEmRecSosPathsTablesFloat4 // Use Float4-Textur for loading SOS-Paths -> Sum as well Count for SOS and ATT for one position
|
||||
|
||||
//#define SaftTextureForEmRecSosPathsTablesFloat1 // Use Float1-Textur for loading SOS-Paths -> Sum, Count separated
|
||||
//#define SaftTextureForEmRecSosPathsTablesFloat2 // Use Float2-Textur for loading SOS-Paths -> Sum + Count for SOS for one position
|
||||
#define SaftTextureForEmRecSosPathsTablesFloat4 // Use Float4-Textur for loading SOS-Paths -> Sum as well Count for SOS and ATT for one position
|
||||
|
||||
#if defined(SaftTextureForEmRecSosPathsTablesFloat1) || defined(SaftTextureForEmRecSosPathsTablesFloat2) || defined(SaftTextureForEmRecSosPathsTablesFloat4)
|
||||
#define SaftTextureForEmRecSosPathsTables // Use Textur for loading SOS-Paths, -> Interpolation between SoSVoxelnPaths is possible
|
||||
#endif
|
||||
|
||||
// Several SAFT_VARIANTs
|
||||
#define SAFT_VARIANT_AscanPreintegration 0
|
||||
#define SAFT_VARIANT_AscanInterpolation 1
|
||||
#define SAFT_VARIANT_3DVolumeInterpolationAtPreprocessing 2 // Use interpolation while Preprocessing
|
||||
#define SAFT_VARIANT_3DVolumeInterpolationAtReconstruction 3 // Use interpolation while Reconstruction
|
||||
#define SAFT_VARIANT_CalcStandardDeviation 4 // Not yet implemented
|
||||
#define SAFT_VARIANT_SumUpOverBoarderIndices 5 // Not yet implemented
|
||||
#if defined(SaftTextureForEmRecSosPathsTablesFloat1) || defined(SaftTextureForEmRecSosPathsTablesFloat2) || defined(SaftTextureForEmRecSosPathsTablesFloat4)
|
||||
#define SaftTextureForEmRecSosPathsTables // Use Textur for loading SOS-Paths, -> Interpolation between SoSVoxelnPaths is possible
|
||||
#endif
|
||||
|
||||
// Several SAFT_VARIANTs
|
||||
#define SAFT_VARIANT_AscanPreintegration 0
|
||||
#define SAFT_VARIANT_AscanInterpolation 1
|
||||
#define SAFT_VARIANT_3DVolumeInterpolationAtPreprocessing 2 // Use interpolation while Preprocessing
|
||||
#define SAFT_VARIANT_3DVolumeInterpolationAtReconstruction 3 // Use interpolation while Reconstruction
|
||||
#define SAFT_VARIANT_CalcStandardDeviation 4 // Not yet implemented
|
||||
#define SAFT_VARIANT_SumUpOverBoarderIndices 5 // Not yet implemented
|
||||
|
||||
// Cache <-> shared Memory
|
||||
//#define SaftPreferSharedMem // cudaFuncCachePreferShared: shared memory is 48 KB
|
||||
#define SaftPreferL1SharedMem // cudaFuncCachePreferL1: shared memory is 16
|
||||
//#define SaftPreferNone // cudaFuncCachePreferNone: no preference
|
||||
// #define SaftPreferSharedMem // cudaFuncCachePreferShared: shared memory is 48 KB
|
||||
#define SaftPreferL1SharedMem // cudaFuncCachePreferL1: shared memory is 16
|
||||
// #define SaftPreferNone // cudaFuncCachePreferNone: no preference
|
||||
|
||||
// Receiver Cache mit shared Memory (nur bei kleinen Blockgroeßen)
|
||||
//#define SaftReceiverSharedMemCacheReceiverDistance
|
||||
//#define SaftCacheReceiverSOS
|
||||
//#define SaftReceiverSharedMemCacheReceiverSOS // Use Shared Memory for Caching
|
||||
//#define SaftRegisterCacheReceiverSOS // Use Register for Caching
|
||||
|
||||
// #define SaftReceiverSharedMemCacheReceiverDistance
|
||||
// #define SaftCacheReceiverSOS
|
||||
// #define SaftReceiverSharedMemCacheReceiverSOS // Use Shared Memory for Caching
|
||||
// #define SaftRegisterCacheReceiverSOS // Use Register for Caching
|
||||
|
||||
// Berechnung der mittleren Schallgeschwindigkeit
|
||||
//================================================
|
||||
#define SaftUseHarmonicMean // harmonic Mean
|
||||
#define SaftUseHarmonicMean // harmonic Mean
|
||||
|
||||
#define SaftTextureForBresenhamSosPaths // Texturmemory für SOS-Volumen nutzen (Version without not full implemented)
|
||||
//#define SaftTextureForBresenhamInterpolated //iSOS-Version --> wird nun ueber Parameter uebergeben
|
||||
//#define SaftUseFastMath //FastMath fuer schnellere Berechnung aber Fehler am Rand. Dafuer ist Korrektur noetig.
|
||||
#define SaftTextureForBresenhamSosPaths // Texturmemory für SOS-Volumen nutzen (Version without not full implemented)
|
||||
// #define SaftTextureForBresenhamInterpolated //iSOS-Version --> wird nun ueber Parameter uebergeben
|
||||
// #define SaftUseFastMath //FastMath fuer schnellere Berechnung aber Fehler am Rand. Dafuer ist Korrektur noetig.
|
||||
|
||||
//#define SaftUseSosAttFloat1 // Nutze getrennte Texturen fuer beide Volumen (Sos+Att) // Aktuell nicht implementiert
|
||||
#define SaftUseSosAttFloat2 // Nutze nur eine Textur fuer beide Volumen (Sos+Att)
|
||||
|
||||
#define SOS_Version2 // korrekte Version mit Definitionen im Mittelpunkt
|
||||
// #define SaftUseSosAttFloat1 // Nutze getrennte Texturen fuer beide Volumen (Sos+Att) // Aktuell nicht implementiert
|
||||
#define SaftUseSosAttFloat2 // Nutze nur eine Textur fuer beide Volumen (Sos+Att)
|
||||
|
||||
#define SOS_Version2 // korrekte Version mit Definitionen im Mittelpunkt
|
||||
|
||||
// MultiGPU
|
||||
//================================================
|
||||
@@ -163,18 +97,16 @@ typedef unsigned long long ullong;
|
||||
// constant such that 64kB of constant is fully blocked by emitter/receiver combinations
|
||||
const int MAX_EMITTER_RECEIVE_IN_CONSTANT_MEMORY = 2340;
|
||||
|
||||
//Macro used to perform CUDA calls. Throws an exception in case of a CUDA error. Also shows on which line it occurred.
|
||||
// Macro used to perform CUDA calls. Throws an exception in case of a CUDA error. Also shows on which line it occurred.
|
||||
#define CUDA_CHECK(operation) performCUDAResultCheck(operation, __FILE__, __LINE__);
|
||||
|
||||
//Macro used to see when a particular line of code is executed on the host.
|
||||
// Macro used to see when a particular line of code is executed on the host.
|
||||
#define DEBUG_MARK std::cout << "[DEBUG] file " << __FILE__ << ", line " << __LINE__ << std::endl
|
||||
|
||||
|
||||
//Convenient typedefs for containers
|
||||
// Convenient typedefs for containers
|
||||
typedef std::vector<cudaDeviceProp> DeviceProperties;
|
||||
typedef std::vector<dim3> Dimensions;
|
||||
|
||||
|
||||
/**
|
||||
Most important class in the application.
|
||||
- Haupt-Klasse der Applikation
|
||||
@@ -183,399 +115,288 @@ typedef std::vector<dim3> Dimensions;
|
||||
*/
|
||||
class SAFTHandler
|
||||
{
|
||||
public:
|
||||
SAFTHandler(int deviceId,
|
||||
int deviceIndex,
|
||||
float *aScan_ptr, ///< Zeiger zu den AScandaten //std::string const & aScanSamplesPath,
|
||||
double *output_ptr, ///< Zeiger zu den Outputdaten //std::string const & outputPath,
|
||||
double *Duration_ptr, ///< Zeiger auf Ausgabewert f<>r benoetigte Laufzeit des SAFT-Kernels
|
||||
unsigned short *receiver_index_ptr, ///<
|
||||
unsigned short *emitter_index_ptr, ///<
|
||||
float *receiver_list_ptr, ///<
|
||||
int receiver_list_Size, ///<
|
||||
float *emitter_list_ptr, ///<
|
||||
int emitter_list_Size, ///<
|
||||
float *speed_vec_ptr,
|
||||
int3 SOSGrid_XYZ,
|
||||
float3 sosOffset, ///< Startpoint of SoSGrid
|
||||
float SOS_RESOLUTION, ///< Aufloesung des SoSGrid
|
||||
float *att_vec_ptr, //att_vec_ptr
|
||||
public:
|
||||
SAFTHandler(int deviceId, int deviceIndex,
|
||||
float *aScan_ptr, ///< Zeiger zu den AScandaten //std::string const & aScanSamplesPath,
|
||||
double *output_ptr, ///< Zeiger zu den Outputdaten //std::string const & outputPath,
|
||||
double *Duration_ptr, ///< Zeiger auf Ausgabewert f<>r benoetigte Laufzeit des SAFT-Kernels
|
||||
unsigned short *receiver_index_ptr, ///<
|
||||
unsigned short *emitter_index_ptr, ///<
|
||||
float *receiver_list_ptr, ///<
|
||||
int receiver_list_Size, ///<
|
||||
float *emitter_list_ptr, ///<
|
||||
int emitter_list_Size, ///<
|
||||
float *speed_vec_ptr, int3 SOSGrid_XYZ,
|
||||
float3 sosOffset, ///< Startpoint of SoSGrid
|
||||
float SOS_RESOLUTION, ///< Aufloesung des SoSGrid
|
||||
float *att_vec_ptr, // att_vec_ptr
|
||||
|
||||
int aScanCount,
|
||||
int aScanLength,
|
||||
int3 IMAGE_SIZE_XYZ,
|
||||
float sampleRate,
|
||||
float3 regionOfInterestOffset,
|
||||
float IMAGE_RESOLUTION,
|
||||
dim3 const & fixedBlockDimensions,
|
||||
float debugMode,
|
||||
float debugModeParameter,
|
||||
//bool useFixedPartialOutputWindow,
|
||||
int aScanCount, int aScanLength, int3 IMAGE_SIZE_XYZ, float sampleRate, float3 regionOfInterestOffset, float IMAGE_RESOLUTION, dim3 const &fixedBlockDimensions, float debugMode,
|
||||
float debugModeParameter,
|
||||
// bool useFixedPartialOutputWindow,
|
||||
|
||||
bool SOSMode_3DVolume,
|
||||
bool ATTMode_3DVolume,
|
||||
bool SOSMode_3DVolume, bool ATTMode_3DVolume,
|
||||
|
||||
int SAFT_MODE,
|
||||
int *SAFT_VARIANT,
|
||||
int SAFT_VARIANT_Size,
|
||||
int SAFT_MODE, int *SAFT_VARIANT, int SAFT_VARIANT_Size,
|
||||
|
||||
int *Abort_ptr
|
||||
);
|
||||
int *Abort_ptr);
|
||||
|
||||
void performReconstruction();
|
||||
|
||||
private:
|
||||
private:
|
||||
int *Abort_ptr; // Ist ein Fehler aufgetreten der zum Abburch geführt hat
|
||||
// int Abort;
|
||||
|
||||
int *Abort_ptr; // Ist ein Fehler aufgetreten der zum Abburch geführt hat
|
||||
//int Abort;
|
||||
bool SOSMode_3DVolume, ATTMode_3DVolume;
|
||||
|
||||
bool SOSMode_3DVolume,
|
||||
ATTMode_3DVolume;
|
||||
|
||||
int SAFT_MODE;
|
||||
int *SAFT_VARIANT;
|
||||
int *deviceSAFT_VARIANT;
|
||||
int SAFT_VARIANT_Size;
|
||||
int SAFT_MODE;
|
||||
int *SAFT_VARIANT;
|
||||
int *deviceSAFT_VARIANT;
|
||||
int SAFT_VARIANT_Size;
|
||||
|
||||
int deviceId;
|
||||
int deviceIndex;
|
||||
float debugMode;
|
||||
float debugModeParameter;
|
||||
float debugMode;
|
||||
float debugModeParameter;
|
||||
|
||||
DeviceProperties deviceProperties;
|
||||
|
||||
float
|
||||
*aScan_ptr;
|
||||
float *aScan_ptr;
|
||||
|
||||
// float
|
||||
// *rec_vec_ptr,
|
||||
// *send_vec_ptr;
|
||||
// float
|
||||
// *rec_vec_ptr,
|
||||
// *send_vec_ptr;
|
||||
|
||||
// Zuordnungslisten in der geschaut wird welcher Emitter/Receiver genutzt wird (65535 = nicht genutzt, alles andere ist dann der Index)
|
||||
unsigned short* hostLookUpGeometryMemoryListEmitterPtr; // Memory of hostLookUpGeometryMemoryListEmitter
|
||||
unsigned short* hostLookUpGeometryMemoryListReceiverPtr; // Memory of hostLookUpGeometryMemoryListReceiver
|
||||
int lookUpGeometryMemoryListEmitterSize; // Size of hostLookUpGeometryMemoryListEmitterPtr
|
||||
int lookUpGeometryMemoryListReceiverSize;// Size of hostLookUpGeometryMemoryListReceiverPtr
|
||||
unsigned short *hostLookUpGeometryMemoryListEmitterPtr; // Memory of hostLookUpGeometryMemoryListEmitter
|
||||
unsigned short *hostLookUpGeometryMemoryListReceiverPtr; // Memory of hostLookUpGeometryMemoryListReceiver
|
||||
int lookUpGeometryMemoryListEmitterSize; // Size of hostLookUpGeometryMemoryListEmitterPtr
|
||||
int lookUpGeometryMemoryListReceiverSize; // Size of hostLookUpGeometryMemoryListReceiverPtr
|
||||
|
||||
unsigned short
|
||||
*emitter_index_ptr,
|
||||
*receiver_index_ptr;
|
||||
unsigned short *emitter_index_ptr, *receiver_index_ptr;
|
||||
|
||||
float
|
||||
*emitter_list_ptr,
|
||||
*receiver_list_ptr;
|
||||
float *emitter_list_ptr, *receiver_list_ptr;
|
||||
|
||||
int
|
||||
receiver_list_Size,
|
||||
emitter_list_Size;
|
||||
int receiver_list_Size, emitter_list_Size;
|
||||
|
||||
double
|
||||
*output_ptr;
|
||||
double *output_ptr;
|
||||
|
||||
double
|
||||
*Duration_ptr;
|
||||
double *Duration_ptr;
|
||||
|
||||
float
|
||||
Sos,
|
||||
*speed_vec_ptr,
|
||||
*att_vec_ptr;
|
||||
float Sos, *speed_vec_ptr, *att_vec_ptr;
|
||||
|
||||
int3
|
||||
SOSGrid_XYZ;
|
||||
int3 SOSGrid_XYZ;
|
||||
|
||||
float3
|
||||
sosOffset; ///< Startpoint of SoSGrid
|
||||
float3 sosOffset; ///< Startpoint of SoSGrid
|
||||
|
||||
int
|
||||
aScanCount,
|
||||
aScanLength;
|
||||
int aScanCount, aScanLength;
|
||||
|
||||
int3
|
||||
IMAGE_SIZE_XYZ;
|
||||
int3 IMAGE_SIZE_XYZ;
|
||||
|
||||
float3
|
||||
regionOfInterestOffset; //imageStartpoint; TODO: umbenennen!
|
||||
float3 regionOfInterestOffset; // imageStartpoint; TODO: umbenennen!
|
||||
|
||||
float
|
||||
IMAGE_RESOLUTION, ///< Aufl<66>sung im OutputVolumen
|
||||
IMAGE_RESOLUTION_FACTOR, ///< 1/Aufl<EFBFBD>sung im OutputVolumen
|
||||
SOS_RESOLUTION, ///< Aufloesung des SoSGrid
|
||||
SOS_RESOLUTION_FACTOR; ///< 1/Aufl<66>sung im SoS-Grid
|
||||
float IMAGE_RESOLUTION, ///< Aufl<66>sung im OutputVolumen
|
||||
IMAGE_RESOLUTION_FACTOR, ///< 1/Aufl<EFBFBD>sung im OutputVolumen
|
||||
SOS_RESOLUTION, ///< Aufloesung des SoSGrid
|
||||
SOS_RESOLUTION_FACTOR; ///< 1/Aufl<EFBFBD>sung im SoS-Grid
|
||||
|
||||
std::string
|
||||
emitterGeometryPath,
|
||||
receiverGeometryPath,
|
||||
aScanSamplesPath,
|
||||
outputPath;
|
||||
std::string emitterGeometryPath, receiverGeometryPath, aScanSamplesPath, outputPath;
|
||||
|
||||
float *aScanSamples;
|
||||
double *output;
|
||||
//int aScanCount;
|
||||
// int aScanCount;
|
||||
int
|
||||
//aScanSize,
|
||||
aScan_Bytes,
|
||||
//batchSize, // --> aScanCount
|
||||
//aScanBatchSize;
|
||||
aScanBatch_Bytes;
|
||||
// aScanSize,
|
||||
aScan_Bytes,
|
||||
// batchSize, // --> aScanCount
|
||||
// aScanBatchSize;
|
||||
aScanBatch_Bytes;
|
||||
|
||||
float voxelSize;
|
||||
|
||||
float sampleRate;
|
||||
|
||||
//size_t
|
||||
// size_t
|
||||
uint64_t
|
||||
//regionOfInterestVoxelCount,
|
||||
outputVolume_VoxelCount,
|
||||
//outputSize;
|
||||
outputVolume_Bytes;
|
||||
// regionOfInterestVoxelCount,
|
||||
outputVolume_VoxelCount,
|
||||
// outputSize;
|
||||
outputVolume_Bytes;
|
||||
|
||||
float3 outputVolume_size_m; // ROI-Groesse in meter
|
||||
float3 outputVolume_size_m; // ROI-Groesse in meter
|
||||
|
||||
uint64_t
|
||||
partialOutputZLayerOffset;
|
||||
uint64_t partialOutputZLayerOffset;
|
||||
|
||||
int
|
||||
partialOutputZLayerOffsetCount,
|
||||
partialOutputSoSZLayerCount,
|
||||
currentZLayerCount,
|
||||
partialSoSZLayerCount;
|
||||
int partialOutputZLayerOffsetCount, partialOutputSoSZLayerCount, currentZLayerCount, partialSoSZLayerCount;
|
||||
|
||||
// Fuer AscanIndexInterpolation
|
||||
// ------------------------------------------------------
|
||||
int currentEmIndexUsedForAscanIndexCalculation;
|
||||
float *deviceTextureAscanIndexFloat; // Texture adresses for precalculated AscanIndex data
|
||||
//std::size_t deviceTextureAscanIndexFloatSize;
|
||||
int currentEmIndexUsedForAscanIndexCalculation;
|
||||
float *deviceTextureAscanIndexFloat; // Texture adresses for precalculated AscanIndex data
|
||||
// std::size_t deviceTextureAscanIndexFloatSize;
|
||||
|
||||
cudaArray **deviceTextureAscanIndexFloatCuArray; // CudaArray for AscanIndex data
|
||||
int maxSurfaceTexture3DDimension; // max Dimension in 3D --> Max size for Texture
|
||||
int maxAscanIndexArraysInTexture; // = maxSurfaceTexture3DDimension/2;
|
||||
int TableAscanIndexAllocationCount; // Anzahl der benoetigten AscanBlocks der Groesse 2048/4096
|
||||
int maxSupportedTexturesForAscanIndex; // Definiert die maximal unterstuetzen Texturen fuer AscanIndex
|
||||
int neededAscanBatchCount; // Anzahl an benoetigten Durchlaeufen des SAFTs um alle Ascans abarbeiten zu koennen
|
||||
cudaArray **deviceTextureAscanIndexFloatCuArray; // CudaArray for AscanIndex data
|
||||
int maxSurfaceTexture3DDimension; // max Dimension in 3D --> Max size for Texture
|
||||
int maxAscanIndexArraysInTexture; // = maxSurfaceTexture3DDimension/2;
|
||||
int TableAscanIndexAllocationCount; // Anzahl der benoetigten AscanBlocks der Groesse 2048/4096
|
||||
int maxSupportedTexturesForAscanIndex; // Definiert die maximal unterstuetzen Texturen fuer AscanIndex
|
||||
int neededAscanBatchCount; // Anzahl an benoetigten Durchlaeufen des SAFTs um alle Ascans abarbeiten zu koennen
|
||||
// ------------------------------------------------------
|
||||
|
||||
double *currentHostOutputAdress;
|
||||
|
||||
// Pointer of Inputdata in memory of Ascanblock
|
||||
float3
|
||||
*receiver_list, // LookUpTable receiverNr -> coordinates
|
||||
*emitter_list; // LookUpTable emitterNr -> coordinates
|
||||
float3 *receiver_list, // LookUpTable receiverNr -> coordinates
|
||||
*emitter_list; // LookUpTable emitterNr -> coordinates
|
||||
|
||||
unsigned short
|
||||
*receiver_index, // Input Ascanblockdata: corresponding receiverNr
|
||||
*emitter_index; // Input Ascanblockdata: corresponding emitterNr
|
||||
unsigned short *receiver_index, // Input Ascanblockdata: corresponding receiverNr
|
||||
*emitter_index; // Input Ascanblockdata: corresponding emitterNr
|
||||
|
||||
//float
|
||||
// float
|
||||
// *SoSData; // Input Ascanblockdata: Corresponding SOS value
|
||||
|
||||
float *speedOfSoundField; // Input Ascanblockdata: Corresponding SOS value as volume TODO: ==> in speedOfSoundGrid umbenennen
|
||||
float *attenuationField; // Input Ascanblockdata: Corresponding ATT value as volume TODO: ==> in attenuationGrid umbenennen
|
||||
float *speedOfSoundField; // Input Ascanblockdata: Corresponding SOS value as volume TODO: ==> in speedOfSoundGrid umbenennen
|
||||
float *attenuationField; // Input Ascanblockdata: Corresponding ATT value as volume TODO: ==> in attenuationGrid umbenennen
|
||||
|
||||
#ifdef SaftUseSosAttFloat2
|
||||
float2 *hostSosAttField;
|
||||
#endif
|
||||
float2 *hostSosAttField;
|
||||
|
||||
// Memorysizes
|
||||
int
|
||||
//speedOfSoundFieldVoxelCount, //
|
||||
SOSVolume_VoxelCount, // Amount of Voxels of SOSVolume
|
||||
//speedOfSoundFieldBytes, //
|
||||
SOSVolume_Bytes, // Size of SOSVolume in Byte
|
||||
speedOfSoundEmitterVoxelPathCountByteSize, // Speichergroesse fuer die Anzahl der Voxel, die auf einem Pfad liegen
|
||||
speedOfSoundEmitterVoxelPathSumByteSize; // Speichergroesse fuer die Summe der Schallgeschwindigkeiten auf dem Pfad zu einem Voxel
|
||||
// speedOfSoundFieldVoxelCount, //
|
||||
SOSVolume_VoxelCount, // Amount of Voxels of SOSVolume
|
||||
// speedOfSoundFieldBytes, //
|
||||
SOSVolume_Bytes, // Size of SOSVolume in Byte
|
||||
speedOfSoundEmitterVoxelPathCountByteSize, // Speichergroesse fuer die Anzahl der Voxel, die auf einem Pfad liegen
|
||||
speedOfSoundEmitterVoxelPathSumByteSize; // Speichergroesse fuer die Summe der Schallgeschwindigkeiten auf dem Pfad zu einem Voxel
|
||||
|
||||
dim3
|
||||
fixedBlockDimensions, // kann ws durch genericSAFTBlockDimensions ersetzt
|
||||
genericSAFTBlockDimensions,
|
||||
genericSAFTGridDimensions,
|
||||
windowGridDimensions;
|
||||
dim3 fixedBlockDimensions, // kann ws durch genericSAFTBlockDimensions ersetzt
|
||||
genericSAFTBlockDimensions, genericSAFTGridDimensions, windowGridDimensions;
|
||||
|
||||
cudaArray **deviceAScansCuArray;
|
||||
cudaArray **deviceAScansCuArray;
|
||||
|
||||
#ifdef SaftTextureForBresenhamSosPaths
|
||||
cudaArray *deviceSosAttFieldCuArray;
|
||||
|
||||
#ifdef SaftUseSosAttFloat1 // Nutze getrennte Texturen fuer beide Volumen (Sos+Att)
|
||||
cudaArray *deviceSpeedOfSoundFieldCuArray; // SOS volume
|
||||
cudaArray *deviceAttenuationFieldCuArray; // ATT volume
|
||||
#endif
|
||||
int maxSoSReceiverArrayForTexture;
|
||||
int TableVoxelToReceiverPathSosAllocationCount;
|
||||
std::size_t receiver_list_Size_deviceMemory;
|
||||
|
||||
#ifdef SaftUseSosAttFloat2 // Nutze nur eine Textur fuer beide Volumen (Sos+Att)
|
||||
cudaArray *deviceSosAttFieldCuArray;
|
||||
#endif
|
||||
#endif
|
||||
// Für Emitter ----- normal definieren
|
||||
cudaArray *deviceTableVoxelToEmitterPathSosSumCuArray; // SoSSum
|
||||
cudaArray *deviceTableVoxelToEmitterPathCountCuArray; // Count
|
||||
|
||||
// Für Receiver ----- als Arrays definieren da zwei benoetigt
|
||||
cudaArray **deviceTableVoxelToReceiverPathSosSumCuArray; // SoSSum
|
||||
cudaArray **deviceTableVoxelToReceiverPathCountCuArray; // Count
|
||||
|
||||
cudaArray *deviceTableVoxelToEmPathSosBothCuArray; // Emitter SoSSum + Count
|
||||
cudaArray **deviceTableVoxelToRecPathSosBothCuArray; // Receiver SoSSum + Count
|
||||
|
||||
int maxSoSReceiverArrayForTexture;
|
||||
int TableVoxelToReceiverPathSosAllocationCount;
|
||||
std::size_t receiver_list_Size_deviceMemory;
|
||||
|
||||
#ifdef SaftTextureForEmRecSosPathsTables
|
||||
// Für Emitter ----- normal definieren
|
||||
cudaArray *deviceTableVoxelToEmitterPathSosSumCuArray; //SoSSum
|
||||
cudaArray *deviceTableVoxelToEmitterPathCountCuArray; //Count
|
||||
|
||||
// Für Receiver ----- als Arrays definieren da zwei benoetigt
|
||||
cudaArray **deviceTableVoxelToReceiverPathSosSumCuArray; //SoSSum
|
||||
cudaArray **deviceTableVoxelToReceiverPathCountCuArray; //Count
|
||||
#endif
|
||||
|
||||
#if defined(SaftTextureForEmRecSosPathsTablesFloat2) || defined(SaftTextureForEmRecSosPathsTablesFloat4)
|
||||
cudaArray *deviceTableVoxelToEmPathSosBothCuArray; //Emitter SoSSum + Count
|
||||
cudaArray **deviceTableVoxelToRecPathSosBothCuArray; //Receiver SoSSum + Count
|
||||
#endif
|
||||
|
||||
// Schallgeschwindigkeitskorrektur-Mode
|
||||
float *deviceSpeedOfSoundField; // Adressen fuer Speicherfuer Schallgeschwindigkeitsgrid auf der GPU
|
||||
// Schallgeschwindigkeitskorrektur-Mode
|
||||
float *deviceSpeedOfSoundField; // Adressen fuer Speicherfuer Schallgeschwindigkeitsgrid auf der GPU
|
||||
|
||||
// Block-Mode
|
||||
unsigned short *deviceEmitterIndex_block; // Adressen fuer Speicher fuer Index der Geometriedaten auf der GPU
|
||||
unsigned short *deviceEmitterIndex_block; // Adressen fuer Speicher fuer Index der Geometriedaten auf der GPU
|
||||
unsigned short *deviceReceiverIndex_block;
|
||||
float3 *deviceListEmitterGeometry; // Adressen fuer Speicher fuer Zuordnung Index <-> Geometriedaten auf der GPU
|
||||
float3 *deviceListEmitterGeometry; // Adressen fuer Speicher fuer Zuordnung Index <-> Geometriedaten auf der GPU
|
||||
float3 *deviceListReceiverGeometry;
|
||||
|
||||
float *deviceSoSData_block; // Adressen fuer Speicher fuer Schallgeschwindigkeitsdaten auf der GPU
|
||||
|
||||
// VoxelCountType // Adressen fuer Speicher der SoS-Pfade auf der GPU
|
||||
// * deviceTableVoxelToEmitterPathCount,
|
||||
// * deviceTableVoxelToReceiverPathCount;
|
||||
float
|
||||
*deviceTableVoxelToEmitterPathCountFloat, // Texture adresses for precalculated SOS data
|
||||
*deviceTableVoxelToReceiverPathCountFloat,
|
||||
*deviceTableVoxelToEmitterPathSosSum,
|
||||
*deviceTableVoxelToReceiverPathSosSum;
|
||||
float *deviceSoSData_block; // Adressen fuer Speicher fuer Schallgeschwindigkeitsdaten auf der GPU
|
||||
|
||||
float *deviceTableVoxelToEmitterPathCountFloat, // Texture adresses for precalculated SOS data
|
||||
*deviceTableVoxelToReceiverPathCountFloat, *deviceTableVoxelToEmitterPathSosSum, *deviceTableVoxelToReceiverPathSosSum;
|
||||
|
||||
bool *deviceValidEmitterReceiverCombinations;
|
||||
|
||||
int *deviceTransducerVectorAnalysisDistributionCounters;
|
||||
|
||||
// float3
|
||||
// * deviceEmitterGeometry,
|
||||
// * deviceReceiverGeometry;
|
||||
|
||||
int usedAmountOfEmitter, // amount of used emitter
|
||||
usedAmountOfReceiver; // amount of used receiver
|
||||
int usedAmountOfEmitter, // amount of used emitter
|
||||
usedAmountOfReceiver; // amount of used receiver
|
||||
|
||||
// Output volume
|
||||
double *deviceOutput;
|
||||
double *deviceOutput;
|
||||
|
||||
//Streams used for synchronisation
|
||||
cudaStream_t
|
||||
copyStream,
|
||||
calculationStream;
|
||||
// Streams used for synchronisation
|
||||
cudaStream_t copyStream, calculationStream;
|
||||
|
||||
//This variable describes the number of allocations used by the current SAFT mode
|
||||
std::size_t aScanAllocationCount; // Anzahl der Speicher die alloziert werden, es reicht einer statt 2! 2 nur wenn Streams fuer Copy genutzt werden sollen.
|
||||
// This variable describes the number of allocations used by the current SAFT mode
|
||||
std::size_t aScanAllocationCount; // Anzahl der Speicher die alloziert werden, es reicht einer statt 2! 2 nur wenn Streams fuer Copy genutzt werden sollen.
|
||||
|
||||
int
|
||||
invalidEmitterReceiverCombinationsCount,
|
||||
validEmitterReceiverCombinationsCount;
|
||||
int invalidEmitterReceiverCombinationsCount, validEmitterReceiverCombinationsCount;
|
||||
|
||||
Dimensions validBlockDimensions;
|
||||
bool useAutoTuning;
|
||||
// AutoTuningConfiguration autoTuningConfiguration;
|
||||
// AutoTuningConfiguration autoTuningConfiguration;
|
||||
|
||||
size_t
|
||||
partialOutputSize,
|
||||
partialVolumeSize, // Speicher(OutputVolumen), der fuer die entsprechende Anzahl an Z-Layern benoetigt wuerde
|
||||
partialSosPathSize, // Speicher(SOSATTPaths) , der fuer die entsprechende Anzahl an SoS-Z-Layer benoetigt wuerde
|
||||
partialAscanIndexSize, // Speicher(AscanIndex) , der fuer die entsprechende Anzahl an SoS-Z-Layer & Ascans benoetigt wuerde
|
||||
maxFeasibleZLayerCount, // Maximal moegliche Anzahl an Z-Layern wird zu Beginn auf # die in eine SOS Z-layer passt gesetzt.
|
||||
maxFeasibleSosZLayerCount; // Maximal moegliche Anzahl an Sos-Z-Layern wird zu Beginn auf Anzahl der noetigen SoS-Z-Layern für die OutputDaten gesetzt.
|
||||
size_t partialOutputSize,
|
||||
partialVolumeSize, // Speicher(OutputVolumen), der fuer die entsprechende Anzahl an Z-Layern benoetigt wuerde
|
||||
partialSosPathSize, // Speicher(SOSATTPaths) , der fuer die entsprechende Anzahl an SoS-Z-Layer benoetigt wuerde
|
||||
partialAscanIndexSize, // Speicher(AscanIndex) , der fuer die entsprechende Anzahl an SoS-Z-Layer & Ascans benoetigt wuerde
|
||||
maxFeasibleZLayerCount, // Maximal moegliche Anzahl an Z-Layern wird zu Beginn auf # die in eine SOS Z-layer passt gesetzt.
|
||||
maxFeasibleSosZLayerCount; // Maximal moegliche Anzahl an Sos-Z-Layern wird zu Beginn auf Anzahl der noetigen SoS-Z-Layern für die OutputDaten gesetzt.
|
||||
|
||||
int
|
||||
minimumAutoTuningThreadCount,
|
||||
maximumAutoTuningThreadCount;
|
||||
int minimumAutoTuningThreadCount, maximumAutoTuningThreadCount;
|
||||
|
||||
|
||||
//New partial reconstruction data
|
||||
// New partial reconstruction data
|
||||
|
||||
std::size_t partialSpeedOfSoundVoxelCount;
|
||||
std::size_t partialOutputZLayerCount;
|
||||
std::size_t zLayerVoxelCount;
|
||||
std::size_t sosZLayerVoxelCount; // Anzahl der X-Y-SOSVoxel in einer SoS-Layer. //saft.hpp
|
||||
std::size_t sosZLayerVoxelCount; // Anzahl der X-Y-SOSVoxel in einer SoS-Layer. //saft.hpp
|
||||
std::size_t partialOutputVoxelCount;
|
||||
|
||||
double diff_time; // For Time Measurement
|
||||
float transferRate; // For DataTransferrate Measurement
|
||||
float performRate; // For PerformSAFTrate Measurement
|
||||
cudaDeviceProp deviceProp; // Ausgabe der Frequenz
|
||||
|
||||
double diff_time; // For Time Measurement
|
||||
float transferRate; // For DataTransferrate Measurement
|
||||
float performRate; // For PerformSAFTrate Measurement
|
||||
cudaDeviceProp deviceProp; // Ausgabe der Frequenz
|
||||
// Core reconstruction
|
||||
|
||||
|
||||
//Core reconstruction
|
||||
|
||||
void processAScans(ullong & duration);
|
||||
void processAScans(ullong &duration);
|
||||
void performCoreReconstruction();
|
||||
|
||||
//Pre-calculation
|
||||
// Pre-calculation
|
||||
|
||||
//void precalculateAverageSpeedOfSound(int zLayer, int zLayerCount); // TODO: Funktion die nicht mehr benutzt wird?
|
||||
// void analysisOfTransducerVectors();
|
||||
|
||||
// void normalisePerformanceStatisticsOutput();
|
||||
// void printTransducerVectorStatistics();
|
||||
|
||||
//Auto-tuning
|
||||
bool determineGridDimensions(dim3 const & blockDimensions, dim3 & gridDimensions);
|
||||
// Auto-tuning
|
||||
bool determineGridDimensions(dim3 const &blockDimensions, dim3 &gridDimensions);
|
||||
void determineValidBlockDimensions();
|
||||
|
||||
void reduceKernelDimensions(dim3 const &gridDimensions, dim3 const &blockDimensions, dim3 &reducedGridDimensions, dim3 &reducedBlockDimensions);
|
||||
|
||||
void reduceKernelDimensions(dim3 const & gridDimensions, dim3 const & blockDimensions, dim3 & reducedGridDimensions, dim3 & reducedBlockDimensions);
|
||||
|
||||
//Pre-calculation kernels
|
||||
// Pre-calculation kernels
|
||||
//------------------------------------------------------------------------
|
||||
#ifdef SaftUseConstantMemforGeometry
|
||||
//void precalculateAverageSpeedOfSound(int firstZLayer, int sosZLayerCount, int deviceGeometry, int geometryElementCount, VoxelCountType * deviceVoxelCountOutput, float * deviceVoxelCountOutputFloat, float * deviceSpeedOfSoundSumOutput);
|
||||
void precalculateAverageSpeedOfSound(int firstZLayer, int sosZLayerCount, int deviceGeometry, int geometryElementCount, float * deviceVoxelCountOutputFloat, float * deviceSpeedOfSoundSumOutput);
|
||||
#else
|
||||
//void precalculateAverageSpeedOfSound(int firstZLayer, int sosZLayerCount, float3 const * deviceGeometry, int geometryElementCount, VoxelCountType * deviceVoxelCountOutput, float * deviceSpeedOfSoundSumOutput);
|
||||
void precalculateAverageSpeedOfSound(int firstZLayer, int sosZLayerCount, float3 const * deviceGeometry, int geometryElementCount, float * deviceSpeedOfSoundSumOutput);
|
||||
#endif
|
||||
void precalculateAverageSpeedOfSound(int firstZLayer, int sosZLayerCount, int deviceGeometry, int geometryElementCount, float *deviceVoxelCountOutputFloat, float *deviceSpeedOfSoundSumOutput);
|
||||
|
||||
#ifdef SaftUseAscanIndexInterpolation
|
||||
void precalculateAscanIndex
|
||||
(
|
||||
int currentSpeedOfSoundZLayer, ///< First z-layer in the speed of sound grid the pre-calculation is performed for.
|
||||
int maxFeasibleSosZLayerCount ///< Number of z-layers in the speed of sound grid the pre-calculation is performed for.
|
||||
//int currentEmIndexUsedForAscanIndexCalculation, ///< current Index of Em for which the AscanIndex is calculated
|
||||
//int emitter_list_Size, ///< Number of emitter_array got from Matlab
|
||||
//int receiver_list_Size, ///< Number of receiver_array got from Matlab
|
||||
//float * deviceTextureAscanIndexFloatCuArray ///< Out: AscanIndex for the path from Emitter to voxel to Receiver.
|
||||
);
|
||||
void precalculateAscanIndex(int currentSpeedOfSoundZLayer, ///< First z-layer in the speed of sound grid the pre-calculation is performed for.
|
||||
int maxFeasibleSosZLayerCount ///< Number of z-layers in the speed of sound grid the pre-calculation is performed for.
|
||||
// int currentEmIndexUsedForAscanIndexCalculation, ///< current Index of Em for which the AscanIndex is calculated
|
||||
// int emitter_list_Size, ///< Number of emitter_array got from Matlab
|
||||
// int receiver_list_Size, ///< Number of receiver_array got from Matlab
|
||||
// float * deviceTextureAscanIndexFloatCuArray ///< Out: AscanIndex for the path from Emitter to voxel to Receiver.
|
||||
);
|
||||
|
||||
void precalculateAscanIndex_usePaths
|
||||
(
|
||||
int ascanIndex_i, ///< Offset of AscanIndex batch.
|
||||
int aScanWindowSize, ///< Amount of Ascans in AscanIndex batch to process.
|
||||
int currentSpeedOfSoundZLayer, ///< First z-layer in the speed of sound grid the pre-calculation is performed for.
|
||||
int maxFeasibleSosZLayerCount ///< Number of z-layers in the speed of sound grid the pre-calculation is performed for.
|
||||
// int currentEmIndexUsedForAscanIndexCalculation, ///< current Index of Em for which the AscanIndex is calculated -> No more necessary due to all Combinations-should be Calculated
|
||||
// float * deviceTextureAscanIndexFloatCuArray ///< Out: AscanIndex for the path from Emitter to voxel to Receiver.
|
||||
);
|
||||
void precalculateAscanIndex_usePaths(
|
||||
int ascanIndex_i, ///< Offset of AscanIndex batch.
|
||||
int aScanWindowSize, ///< Amount of Ascans in AscanIndex batch to process.
|
||||
int currentSpeedOfSoundZLayer, ///< First z-layer in the speed of sound grid the pre-calculation is performed for.
|
||||
int maxFeasibleSosZLayerCount ///< Number of z-layers in the speed of sound grid the pre-calculation is performed for.
|
||||
// int currentEmIndexUsedForAscanIndexCalculation, ///< current Index of Em for which the AscanIndex is calculated -> No more necessary due to all
|
||||
// Combinations-should be Calculated float * deviceTextureAscanIndexFloatCuArray ///< Out: AscanIndex for the path from Emitter to voxel to
|
||||
// Receiver.
|
||||
);
|
||||
|
||||
#endif
|
||||
|
||||
// Initialize AScanIndexSurface
|
||||
void fillCuArray
|
||||
(
|
||||
float useValue,
|
||||
cudaArray **deviceTextureAscanIndexFloatCuArray, ///< CuArray to fill
|
||||
int TableAscanIndexAllocationCount
|
||||
);
|
||||
|
||||
//SAFT Kernel
|
||||
void performSAFT(int aScanIndex, size_t aScanWindowSize, int3 IMAGE_SIZE_XYZ, int3 SOSGrid_XYZ, int blockIndexOffset, int outputWindowVoxelCount, int speedOfSoundZLayer, int speedOfSoundVoxelsWithinZLayers, int maxFeasibleSosZLayerCount, int currentEmIndexUsedForAscanIndexCalculation, dim3 const & windowGridDimensions, dim3 const & gridDimensions, dim3 const & blockDimensions, float * deviceSpeedOfSoundField, cudaArray * deviceAScansCuArray); //Ascans in CuArray f<>r Texturmemory
|
||||
// Initialize AScanIndexSurface
|
||||
void fillCuArray(float useValue,
|
||||
cudaArray **deviceTextureAscanIndexFloatCuArray, ///< CuArray to fill
|
||||
int TableAscanIndexAllocationCount);
|
||||
|
||||
// SAFT Kernel
|
||||
void performSAFT(int aScanIndex, size_t aScanWindowSize, int3 IMAGE_SIZE_XYZ, int3 SOSGrid_XYZ, int blockIndexOffset, int outputWindowVoxelCount, int speedOfSoundZLayer,
|
||||
int speedOfSoundVoxelsWithinZLayers, int maxFeasibleSosZLayerCount, int currentEmIndexUsedForAscanIndexCalculation, dim3 const &windowGridDimensions, dim3 const &gridDimensions,
|
||||
dim3 const &blockDimensions, float *deviceSpeedOfSoundField, cudaArray *deviceAScansCuArray); // Ascans in CuArray f<>r Texturmemory
|
||||
};
|
||||
|
||||
//std::string vectorToString(float3 const & vector);
|
||||
//std::string voxelToString(dim3 const & voxel);
|
||||
extern void memoryCheck();
|
||||
extern std::size_t memoryGPUfree();
|
||||
extern std::size_t memoryGPUtotal();
|
||||
|
||||
extern void performCUDAResultCheck(cudaError_t result, std::string const & file, int line);
|
||||
|
||||
|
||||
extern void performCUDAResultCheck(cudaError_t result, std::string const &file, int line);
|
||||
|
||||
Reference in New Issue
Block a user