feat: refactor & clean kernerl dir in SAFT_TOFI

This commit is contained in:
kradchen
2024-11-19 16:49:21 +08:00
parent d470f12dc1
commit 0e29f139af
10 changed files with 2487 additions and 5499 deletions

View File

@@ -3,7 +3,9 @@ project(SaftTofi)
set(CMAKE_CUDA_COMPILER /usr/local/cuda/bin/nvcc)
enable_language(CUDA)
find_package (OpenMP REQUIRED)
add_library(SaftTofi SHARED ./src/SAFT_TOFI.cpp ./src/saft.cu ./src/processAScans.cpp ./src/saft.cpp )
file(GLOB_RECURSE cu_files ./src/*.cu)
file(GLOB_RECURSE cuh_files ./src/*.cuh)
add_library(SaftTofi SHARED ./src/SAFT_TOFI.cpp ./src/processAScans.cpp ./src/saft.cpp ${cu_files} ${cuh_files})
target_include_directories(SaftTofi PRIVATE ../SAFT ./src /usr/local/cuda/include )
set_target_properties(SaftTofi PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
target_compile_options(SaftTofi PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:

View File

@@ -1,33 +0,0 @@
#include "saft.hpp"
/*!
Emitter and receiver geometry held in constant memory, available across all functions in saft.cu because all of it is held in the same compilation unit.
- Emitter und Receiver Geometrie werden im Constant Memory gehalten, erreichbar f<>r alle Funktionen in Saft.cu weil alle von ihnen in der selben Kompilierungs-Einheit gehalten werden.
*/
#ifdef SaftUseConstantMemforGeometry
#ifdef SaftUseHarmonicMean
__constant__ float3 emitterPOSharmon[MAX_EMITTER_RECEIVE_IN_CONSTANT_MEMORY];
__constant__ float3 receiverPOSharmon[MAX_EMITTER_RECEIVE_IN_CONSTANT_MEMORY];
// __constant__ float3 emitterPOSharmon[157 * 4];
// __constant__ float3 receiverPOSharmon[157 * 9];
float3* constEmitterPtr = &emitterPOSharmon[0];
float3* constReceiverPtr = &receiverPOSharmon[0];
#endif
// LookUpTable for GeometryList and Memory Position
__constant__ unsigned short lookUpGeometryMemoryListEmitter [MAX_EMITTER_RECEIVE_IN_CONSTANT_MEMORY];
__constant__ unsigned short lookUpGeometryMemoryListReceiver[MAX_EMITTER_RECEIVE_IN_CONSTANT_MEMORY];
// __constant__ unsigned short lookUpGeometryMemoryListEmitter [157 * 4];
// __constant__ unsigned short lookUpGeometryMemoryListReceiver[157 * 9];
unsigned short* constLookUpGeometryMemoryListEmitterPtr = &lookUpGeometryMemoryListEmitter[0];
unsigned short* constLookUpGeometryMemoryListReceiverPtr = &lookUpGeometryMemoryListReceiver[0];
#endif

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,3 @@
#include "rayTracing.cuh"
texture<float, cudaTextureType3D, cudaReadModeElementType> texRefSpeedOfSoundField; // Schritt 1. Textur anlegen //TODO: fuer Float2 fall rausnehmen
texture<float2, cudaTextureType3D, cudaReadModeElementType> texRefSosAttField; // Schritt 1. Textur anlegen

View File

@@ -1,676 +1,167 @@
#include <stdio.h>
#include "saft.hpp"
// Structure of File
//-------------------------------------------------------
//
// __device__ __forceinline__ void determineSpeedOfSoundFieldVoxelFloat // Teilpfade (genutzt)// Bestimme den SOSVoxel der zu einer Position gehoert, mit Nachkommastelle
//
// __device__ __forceinline__ void processRayTracedVoxelTexture // Für den Fall SaftUseSosAttFloat1 erstmal lassen
// __device__ __forceinline__ void processRayTracedVoxelTextureSosAtt // AscanIndex // Addiere lokale SOS&ATT-Werte und # Voxel im Pfad
//
// Bresenham
// __device__ __forceinline__ void performRayTracedSpeedAdditionTexture // Teilpfade // Dreidimensionale Version des Bresenham Line Algorithmus im Float-Format
#define SQR(X) ((X) * (X))
// -------------------------------------------------------
extern texture<float, cudaTextureType3D, cudaReadModeElementType> texRefSpeedOfSoundField; // Schritt 1. Textur anlegen //TODO: fuer Float2 fall rausnehmen
extern texture<float2, cudaTextureType3D, cudaReadModeElementType> texRefSosAttField; // Schritt 1. Textur anlegen
// printf() is only supported
// for devices of compute capability 2.0 and above
#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ < 200)
#define printf(f, ...) ((void)(f, __VA_ARGS__),0)
#endif
#define SQR(X) ((X)*(X))
#define OutputSOSPositionX 20 // Volumen Voxel im SOS-Grid!
#define OutputSOSPositionY 20
#define OutputSOSPositionZ 20 // Echte Z-SoS-Layer ohne Offset
//#ifdef debug_CudaRayTraceKernel
#define OutputSOSPositionX 20 // Volumen Voxel im SOS-Grid!
#define OutputSOSPositionY 20
#define OutputSOSPositionZ 20 // Echte Z-SoS-Layer ohne Offset
// #define ER_PositionX 4 // Emitter Receiver Position im SOS-Grid!
// #define ER_PositionY 37
// #define ER_PositionZ 9
//#endif
#ifdef debug_CudaRayTraceKernelLive
#define OutputPositionX 250 // Volumen Voxel im Volumen!
#define OutputPositionY 250
#define OutputPositionZ 0
#define DebugSosVoxelX 10
#define DebugSosVoxelY 10
#define DebugSosVoxelZ 10
#define ER_PositionX 0 // Emitter Receiver Position im SOS-Grid!
#define ER_PositionY 32
#define ER_PositionZ 0
#endif
//Textur fuer SoSField anlegen
#ifdef SaftUseSosAttFloat1 // Nutze getrennte Texturen fuer beide Volumen (Sos+Att)
texture<float, cudaTextureType3D, cudaReadModeElementType> texRefSpeedOfSoundField; // Schritt 1. Textur anlegen
#endif
#ifdef SaftUseSosAttFloat2 // Nutze nur eine Textur fuer beide Volumen (Sos+Att)
texture<float , cudaTextureType3D, cudaReadModeElementType> texRefSpeedOfSoundField; // Schritt 1. Textur anlegen //TODO: fuer Float2 fall rausnehmen
texture<float2, cudaTextureType3D, cudaReadModeElementType> texRefSosAttField; // Schritt 1. Textur anlegen
#endif
/**
Determines the voxel within the speed of sound field associated with a position.
- Bestimme den Voxel in dem Schallgeschwindigkeitsfeld der zu einer Position gehoert, mit Nachkommastelle
*/
__device__ __forceinline__ void determineSpeedOfSoundFieldVoxelFloat( // __forceinline__ zwingt den Compiler diesen Code bei jeden Aufruf direkt einzubinden (nicht als Funktion).
float3 const & position, ///< Position within the scanner.
float3 & voxel, ///< This argument is written to. Output voxel.
float3 & sosOffset,
float & SOS_RESOLUTION_FACTOR
)
__device__ __forceinline__ void determineSpeedOfSoundFieldVoxelFloat( // __forceinline__ zwingt den Compiler diesen Code bei jeden Aufruf direkt einzubinden (nicht als Funktion).
float3 const &position, ///< Position within the scanner.
float3 &voxel, ///< This argument is written to. Output voxel.
float3 &sosOffset, float &SOS_RESOLUTION_FACTOR)
{
#ifndef SOS_Version2
voxel.x = (position.x - sosOffset.x ) * SOS_RESOLUTION_FACTOR + 0.5f; // SoSVoxel aus Positionsdaten bestimmen
voxel.y = (position.y - sosOffset.y ) * SOS_RESOLUTION_FACTOR + 0.5f;
voxel.z = (position.z - sosOffset.z ) * SOS_RESOLUTION_FACTOR + 0.5f;
#else
voxel.x = (position.x - sosOffset.x ) * SOS_RESOLUTION_FACTOR; // SoSVoxel aus Positionsdaten bestimmen
voxel.y = (position.y - sosOffset.y ) * SOS_RESOLUTION_FACTOR;
voxel.z = (position.z - sosOffset.z ) * SOS_RESOLUTION_FACTOR;
#endif
voxel.x = (position.x - sosOffset.x) * SOS_RESOLUTION_FACTOR; // SoSVoxel aus Positionsdaten bestimmen
voxel.y = (position.y - sosOffset.y) * SOS_RESOLUTION_FACTOR;
voxel.z = (position.z - sosOffset.z) * SOS_RESOLUTION_FACTOR;
}
/**
Process a voxel in the Bresenham algorithm. Float Format
Accumulate speed of sound samples and keep track of the number of voxels in the path.
- Verarbeite einen Voxel in dem Bresenham Algorithmus
- Addiere Schallgeschwindigkeits-Sample
*/
__device__ __forceinline__ void processRayTracedVoxelTexture(
float const * currentVoxelFloat, ///< Bresenham speed of sound voxel float.
int & voxelCount, ///< This argument is written to. Number of voxels in the path so far.
float & totalSpeed, ///< This argument is written to. Speed of sound sample accumulator.
//float const * speedOfSoundField,///< Speed of sound field data containing samples.
cudaArray *deviceSpeedOfSoundFieldCuArray, ///< Pointer to cudaArray for SoSFieldData
int3 const & SOSGrid_XYZ ///< Size of SOS-Grid in XYZ
)
__device__ __forceinline__ void processRayTracedVoxelTexture(float const *currentVoxelFloat, ///< Bresenham speed of sound voxel float.
int &voxelCount, ///< This argument is written to. Number of voxels in the path so far.
float &totalSpeed, ///< This argument is written to. Speed of sound sample accumulator.
// float const * speedOfSoundField,///< Speed of sound field data containing samples.
cudaArray *deviceSpeedOfSoundFieldCuArray, ///< Pointer to cudaArray for SoSFieldData
int3 const &SOSGrid_XYZ ///< Size of SOS-Grid in XYZ
)
{
#ifdef SaftUseHarmonicMean
totalSpeed += 1/(float)tex3D( texRefSpeedOfSoundField, currentVoxelFloat[0] + 0.5f, currentVoxelFloat[1] + 0.5f, currentVoxelFloat[2] + 0.5f);
#endif
#ifdef debug_CudaRayTraceKernel
if ((currentVoxelFloat[0] == DebugSosVoxelX) && (currentVoxelFloat[1] == DebugSosVoxelY) && (currentVoxelFloat[2] == DebugSosVoxelZ))
{
printf("pSOS currentVoxel [%d %d %d]\n", currentVoxelFloat[0], currentVoxelFloat[1], currentVoxelFloat[2]);
//printf(" speedOfSoundSample = %2.10f\n", speedOfSoundSample);
printf(" totalSpeed = %2.10f\n", totalSpeed);
//printf(" totalAttenuation = %2.10f\n", totalAttenuation);
printf(" voxelCount = %d\n", voxelCount);
}
#endif
totalSpeed += 1 / (float)tex3D(texRefSpeedOfSoundField, currentVoxelFloat[0] + 0.5f, currentVoxelFloat[1] + 0.5f, currentVoxelFloat[2] + 0.5f);
}
///////////////////////////////////////////////////////////////////////////////////////////
// AscanIndex - Variante
// kernel version Z.316
///////////////////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////
/**
Process a voxel in the Bresenham algorithm. Float Format
Accumulate speed of sound samples and keep track of the number of voxels in the path.
- Addiere lokale Schallgeschwindigkeits-Werte und Anzahl besuchter Voxel im Pfad
*/
// Aufruf: processRayTracedVoxelTextureSosAtt(currentVoxelFloat, voxelCount, totalSpeed, totalAttenuation, deviceSosAttFieldCuArray, SOSGrid_XYZ);
__device__ __forceinline__ void processRayTracedVoxelTextureSosAtt(
float const * currentVoxelFloat, ///< SOS-coordinates of current Voxel for Bresenham in float[3].
float & voxelCount, ///< This argument is written to. Number of voxels in the path so far. // TODO: not necessary here to sum up because it is already known from Bresenham length
float & totalSpeed, ///< This argument is written to. Sum Up total SOS
float & totalAttenuation, ///< This argument is written to. Sum Up total Attenuation
//float const * speedOfSoundField, ///< Speed of sound field data containing samples.
cudaArray *deviceSosAttFieldCuArray, ///< Pointer to cudaArray for SoSATTFieldData // TODO: not necessary if access is used with texture memory
int3 const & SOSGrid_XYZ ///< Size of SOS-Grid in XYZ // TODO: only necessary for boundary-check.
)
float const *currentVoxelFloat, ///< SOS-coordinates of current Voxel for Bresenham in float[3].
float &voxelCount, ///< This argument is written to. Number of voxels in the path so far. // TODO: not necessary here to sum up because it is already known from Bresenham length
float &totalSpeed, ///< This argument is written to. Sum Up total SOS
float &totalAttenuation, ///< This argument is written to. Sum Up total Attenuation
cudaArray *deviceSosAttFieldCuArray, ///< Pointer to cudaArray for SoSATTFieldData // TODO: not necessary if access is used with texture memory
int3 const &SOSGrid_XYZ ///< Size of SOS-Grid in XYZ // TODO: only necessary for boundary-check.
)
{
float2 SosAttValue;
#ifdef SaftUseHarmonicMean
//totalSpeed += 1/speedOfSoundSample; // harmonisches Mittel
// #ifdef SaftTextureForBresenhamInterpolated
// #ifndef SOS_Version2
// totalSpeed += 1/(float)tex3D( texRefSpeedOfSoundField, currentVoxelFloat[0] + 0.5f, currentVoxelFloat[1] + 0.5f, currentVoxelFloat[2] + 0.5f );
// #else
// totalSpeed += 1/(float)tex3D( texRefSpeedOfSoundField, currentVoxelFloat[0] + 0.5f, currentVoxelFloat[1] + 0.5f, currentVoxelFloat[2] + 0.5f );
// #endif
// #else
#ifndef SOS_Version2
totalSpeed += 1/(float)tex3D( texRefSpeedOfSoundField, currentVoxelFloat[0], currentVoxelFloat[1], currentVoxelFloat[2] );
#else
//totalSpeed += 1/tex3D( texRefSpeedOfSoundField, currentVoxelFloat[0] + 0.5f, currentVoxelFloat[1] + 0.5f, currentVoxelFloat[2] + 0.5f);
//totalAttenuation += tex3D( texRefAbsorptionField, currentVoxelFloat[0] + 0.5f, currentVoxelFloat[1] + 0.5f, currentVoxelFloat[2] + 0.5f);
#ifdef SaftUseSosAttFloat1 // Nutze getrennte Texturen fuer beide Volumen (Sos+Att)
totalSpeed += 1/tex3D( texRefSpeedOfSoundField, currentVoxelFloat[0] + 0.5f, currentVoxelFloat[1] + 0.5f, currentVoxelFloat[2] + 0.5f);
//totalAttenuation += tex3D( texRefAbsorptionField, currentVoxelFloat[0] + 0.5f, currentVoxelFloat[1] + 0.5f, currentVoxelFloat[2] + 0.5f);
#endif
#ifdef SaftUseSosAttFloat2 // Nutze nur eine Textur fuer beide Volumen (Sos+Att)
// if ((currentVoxelFloat[0] == DebugSosVoxelX) && (currentVoxelFloat[1] == DebugSosVoxelY) && (currentVoxelFloat[2] == DebugSosVoxelZ))
// {
// //printf("pSOSATT pathPoint [%f %f %f]\n", currentVoxelFloat[0], currentVoxelFloat[1], currentVoxelFloat[2]);
// //printf(" speedOfSoundSample = %2.10f\n", speedOfSoundSample);
// printf(". totalSpeed = %2.10f\n", totalSpeed);
// printf(". totalAttenuation = %2.10f\n", totalAttenuation);
// printf(". voxelCount = %d\n", voxelCount);
// }
SosAttValue = tex3D( texRefSosAttField, currentVoxelFloat[0] + 0.5f, currentVoxelFloat[1] + 0.5f, currentVoxelFloat[2] + 0.5f);
totalSpeed += 1/SosAttValue.x;
totalAttenuation += SosAttValue.y;
//if (SosAttValue.y > 20)
// printf("!!!!!!!!!!!!!!!!! pSOSATT pathPoint [%f %f %f] = %f\n", currentVoxelFloat[0], currentVoxelFloat[1], currentVoxelFloat[2], SosAttValue.y);
// if ((currentVoxelFloat[0] == DebugSosVoxelX) && (currentVoxelFloat[1] == DebugSosVoxelY) && (currentVoxelFloat[2] == DebugSosVoxelZ))
// {
// //printf("pSOSATT pathPoint [%f %f %f]\n", currentVoxelFloat[0], currentVoxelFloat[1], currentVoxelFloat[2]);
// //printf(" speedOfSoundSample = %2.10f\n", speedOfSoundSample);
// printf(".. totalSpeed = %2.10f\n", totalSpeed);
// printf(".. totalAttenuation = %2.10f\n", totalAttenuation);
// printf(".. voxelCount = %d\n", voxelCount);
// }
#endif
#endif
// #endif
#endif
#ifdef debug_CudaRayTraceKernel
if ((currentVoxelFloat[0] == DebugSosVoxelX) && (currentVoxelFloat[1] == DebugSosVoxelY) && (currentVoxelFloat[2] == DebugSosVoxelZ))
{
printf("pSOSATT currentVoxel [%f %f %f]\n", currentVoxelFloat[0], currentVoxelFloat[1], currentVoxelFloat[2]);
//printf(" speedOfSoundSample = %2.10f\n", speedOfSoundSample);
printf(" totalSpeed = %2.10f\n", totalSpeed);
printf(" totalAttenuation = %2.10f\n", totalAttenuation);
printf(" voxelCount = %d\n", voxelCount);
}
#endif
float2 SosAttValue;
SosAttValue = tex3D(texRefSosAttField, currentVoxelFloat[0] + 0.5f, currentVoxelFloat[1] + 0.5f, currentVoxelFloat[2] + 0.5f);
totalSpeed += 1 / SosAttValue.x;
totalAttenuation += SosAttValue.y;
}
__device__ __forceinline__ void performRayTracedSpeedAdditionTexture(float &voxelCount, ///< This argument is written to. Number of voxels within the path traced.
float &totalSpeed, ///< This argument is written to. Sum of the speed of sound samples in the path traced.
float &totalAttenuation,
float3 const &point1f, ///< Vector array describing the Voxelcoordinates of emitters or receivers.
dim3 const &point2, ///< Vector array describing the Voxelcoordinates of Voxels.
///////////////////////////////////////////////////////////////////////////////////////////
// aktuell genutze Variante fuer Teilpfade
///////////////////////////////////////////////////////////////////////////////////////////
cudaArray *deviceSpeedOfSoundFieldCuArray, ///< CuArray fuer SOSFieldTextur
/**
- Dreidimensionale Version des Bresenham Line Algorithmus im Float-Format
*/
__device__ __forceinline__ void performRayTracedSpeedAdditionTexture(
float & voxelCount, ///< This argument is written to. Number of voxels within the path traced.
#ifdef SaftUseSosAttFloat1 // Nutze getrennte Texturen fuer beide Volumen (Sos+Att)
float & totalSpeed, ///< This argument is written to. Sum of the speed of sound samples in the path traced.
#endif
#ifdef SaftUseSosAttFloat2 // Nutze nur eine Textur fuer beide Volumen (Sos+Att)
float & totalSpeed, ///< This argument is written to. Sum of the speed of sound samples in the path traced.
float & totalAttenuation,
#endif
float3 const & point1f, ///< Vector array describing the Voxelcoordinates of emitters or receivers.
dim3 const & point2, ///< Vector array describing the Voxelcoordinates of Voxels.
#ifndef SaftTextureForBresenhamSosPaths
float const * deviceSpeedOfSoundField, ///< Array of speed of sound samples. Dimensions ordered by speed of indices, commencing with the fastest moving one: 1. x 2. y 3. z
#else
cudaArray *deviceSpeedOfSoundFieldCuArray, ///< CuArray fuer SOSFieldTextur
#endif
int3 const & SOSGrid_XYZ, ///< Size of SOS-Grid in XYZ
float3 & sosOffset,
float & SOS_RESOLUTION,
float & IMAGE_RESOLUTION,
float3 regionOfInterestOffset,
#ifdef SaftUseConstantMemforGeometry
int geometry ///< emitters=0 or receivers=1.
#else
float3 const * geometry ///< Vector array describing the positions of emitters or receivers.
#endif
)
int3 const &SOSGrid_XYZ, ///< Size of SOS-Grid in XYZ
float3 &sosOffset, float &SOS_RESOLUTION, float &IMAGE_RESOLUTION, float3 regionOfInterestOffset,
int geometry ///< emitters=0 or receivers=1.
)
{
voxelCount = 0.0f;
totalSpeed = 0.0f;
totalAttenuation = 0.0f;
voxelCount = 0.0f;
totalSpeed = 0.0f;
totalAttenuation = 0.0f;
// Voxel-Koordinaten ebenfalls in float umwandeln
float voxel1f[3] = {point1f.x, point1f.y, point1f.z}; // Point1 liegt im float3 Format vor enthaellt schon +0.5
// Voxel-Koordinaten ebenfalls in float umwandeln
float voxel1f[3]= {point1f.x,point1f.y,point1f.z}; //Point1 liegt im float3 Format vor enthaellt schon +0.5
// float VoxelIncrement = IMAGE_RESOLUTION/SOS_RESOLUTION;
float voxel2f[3];
//float VoxelIncrement = IMAGE_RESOLUTION/SOS_RESOLUTION;
float voxel2f[3];
voxel2f[0] = (float)point2.x; // + 0.5f;
voxel2f[1] = (float)point2.y; // + 0.5f;
voxel2f[2] = (float)point2.z; // + 0.5f;
#ifndef SOS_Version2
voxel2f[0] = (regionOfInterestOffset.x - sosOffset.x ) / SOS_RESOLUTION + 0.5f; // Start des Bildes im SOS-Grid aus Positionsdaten bestimmen
voxel2f[1] = (regionOfInterestOffset.y - sosOffset.y ) / SOS_RESOLUTION + 0.5f;
voxel2f[2] = (regionOfInterestOffset.z - sosOffset.z ) / SOS_RESOLUTION + 0.5f;
#endif
int greatestDistanceDim = 0; // Gibt die Richtung(X,Y oder Z) an mit der grueueten Entfernung
int slowDim1 = 0; // Gibt die Richtung(X,Y oder Z) der langsamen Richtung;
int slowDim2 = 0; // Gibt die Richtung(X,Y oder Z) der langsamen Richtung;
float greatestDistance_XYZ = 0.0; // Grueuete Distanz in die Richtung mit der grueueten Entfernung
int fastDirectionSteps = 0; // Schritte die gegangen werden.
float m_XYZ[3];
float pathPoint[3];
if ((abs(voxel1f[0] - voxel2f[0]) <= abs(voxel1f[2] - voxel2f[2]))) // X<Z
if ((abs(voxel1f[1] - voxel2f[1]) <= abs(voxel1f[0] - voxel2f[0]))) // Y<Z
{
greatestDistanceDim = 2; // => Z
slowDim1 = 0;
slowDim2 = 1;
}
else if ((abs(voxel1f[2] - voxel2f[2]) <= abs(voxel1f[1] - voxel2f[1]))) // Z<Y
{
greatestDistanceDim = 1; // => Y
slowDim1 = 0;
slowDim2 = 2;
}
else // Z=Y
{
greatestDistanceDim = 2; // => Z
slowDim1 = 0;
slowDim2 = 1;
}
else // X>Z
if ((abs(voxel1f[1] - voxel2f[1]) <= abs(voxel1f[0] - voxel2f[0]))) // Y<X
{
greatestDistanceDim = 0; // => X
slowDim1 = 1;
slowDim2 = 2;
}
else // Y>X
{
greatestDistanceDim = 1; // => Y
slowDim1 = 0;
slowDim2 = 2;
}
#ifdef debug_CudaRayTraceKernel
if ((point2.x == OutputSOSPositionX) && (point2.y == OutputSOSPositionY) && (point2.z == OutputSOSPositionZ)) // Nur von VolumeOutput abhuengig
// if ((point2.x == OutputPositionX) && (point2.y == OutputPositionY) && (point2.z == OutputPositionZ) && (voxel1.x == ER_PositionX) && (voxel1.y == ER_PositionY) && (voxel1.z == ER_PositionZ)) // Auch von ER-Position
{
// printf("-> performRayTracedSpeedAdditionTexture\n");
// printf(" point2 (Vol Pos im SoS) [%d %d %d]\n", point2.x , point2.y , point2.z);
// printf(" voxel1 (E/R Pos im SoS) [%13.6f %13.6f %13.6f]\n", voxel1f[0] , voxel1f[1] , voxel1f[2] ); // Voxel 1 = ER Position
printf(" voxel1f = [%3f %3f %3f]\n", voxel1f[0],voxel1f[1],voxel1f[2]);
printf(" voxel1f = [%3i %3i %3i]\n", (int)voxel1f[0],(int)voxel1f[1],(int)voxel1f[2]);
// printf(" regionOfInterestOffset [%f %f %f]\n", regionOfInterestOffset.x , regionOfInterestOffset.y , regionOfInterestOffset.z ); // Voxel 2 = Volumen Position
// printf(" sosOffset [%f %f %f]\n", sosOffset.x , sosOffset.y , sosOffset.z );
// printf(" SOS_RESOLUTION = %12.8f\n", SOS_RESOLUTION );
// printf(" VoxelIncrement = %12.8f\n", VoxelIncrement );
// //printf(" -> voxel2Start (Vol Pos SoS) [%f %f %f]\n", voxel2f[0] , voxel2f[1] , voxel2f[2] ); // Voxel 2 Start -> wird noch angepasst
}
// ist Steigung negativ? & Bestimmen der fastDirectionSteps vom diskretisierten Startpunkt zum Endpunkt bzw. Voxel
if (voxel1f[greatestDistanceDim] >= voxel2f[greatestDistanceDim]) // voxel2f > voxel1f Endpukt > Startpkt -> Steigung positiv
{
fastDirectionSteps = floor(voxel1f[greatestDistanceDim] + 0.5f) - floor(voxel2f[greatestDistanceDim] + 0.5f) + 1;
pathPoint[greatestDistanceDim] = voxel2f[greatestDistanceDim];
pathPoint[slowDim1] = voxel2f[slowDim1];
pathPoint[slowDim2] = voxel2f[slowDim2];
}
else // voxel2f < voxel1f Endpukt < Startpkt -> Steigung negativ
{
fastDirectionSteps = floor(voxel2f[greatestDistanceDim] + 0.5f) - floor(voxel1f[greatestDistanceDim] + 0.5f) + 1;
pathPoint[greatestDistanceDim] = voxel1f[greatestDistanceDim];
pathPoint[slowDim1] = voxel1f[slowDim1];
pathPoint[slowDim2] = voxel1f[slowDim2];
}
#endif
// Steigung bestimmen
// Auf Gesamtentfernung bezogene Steigung in jede Richtung (Float)
greatestDistance_XYZ = ((float)voxel2f[greatestDistanceDim] - (float)voxel1f[greatestDistanceDim]); // Groessten Abstand in XY oder Z-Richtung ermitteln (Float)
m_XYZ[greatestDistanceDim] = 1.0f; // Wegen Rundungsfehler bei der Division, die auftreten koennen.
// m_XYZ[greatestDistanceDim] = (voxel2f[greatestDistanceDim] - voxel1f[greatestDistanceDim]) / greatestDistance_XYZ;
m_XYZ[slowDim1] = (voxel2f[slowDim1] - voxel1f[slowDim1]) / greatestDistance_XYZ;
m_XYZ[slowDim2] = (voxel2f[slowDim2] - voxel1f[slowDim2]) / greatestDistance_XYZ;
#ifndef SOS_Version2
// Herausfinden wo genau der Voxel anfaengt/die Koordinaten des Voxels
voxel2f[0] = (float)point2.x + VoxelIncrement - fmod(((float)point2.x-voxel2f[0]), VoxelIncrement);
voxel2f[1] = (float)point2.y + VoxelIncrement - fmod(((float)point2.y-voxel2f[1]), VoxelIncrement);
voxel2f[2] = (float)point2.z + VoxelIncrement - fmod(((float)point2.z-voxel2f[2]), VoxelIncrement);
#else
//Vom Mittelpunkt ausgehen
voxel2f[0] = (float)point2.x;// + 0.5f;
voxel2f[1] = (float)point2.y;// + 0.5f;
voxel2f[2] = (float)point2.z;// + 0.5f;
#endif
int j = 0;
for (j = fastDirectionSteps; j > 0; j--) //(Alle Punkte innerhalb der Schleife berechnen)
{
processRayTracedVoxelTextureSosAtt(pathPoint, voxelCount, totalSpeed, totalAttenuation, deviceSpeedOfSoundFieldCuArray, SOSGrid_XYZ);
pathPoint[greatestDistanceDim] = pathPoint[greatestDistanceDim] + m_XYZ[greatestDistanceDim];
pathPoint[slowDim1] = pathPoint[slowDim1] + m_XYZ[slowDim1];
pathPoint[slowDim2] = pathPoint[slowDim2] + m_XYZ[slowDim2];
}
#ifdef debug_CudaRayTraceKernel
if ((point2.x == OutputSOSPositionX) && (point2.y == OutputSOSPositionY) && (point2.z == OutputSOSPositionZ)) // Nur von VolumeOutput abhuengig
// if ((point2.x == OutputPositionX) && (point2.y == OutputPositionY) && (point2.z == OutputPositionZ) && (voxel1.x == ER_PositionX) && (voxel1.y == ER_PositionY) && (voxel1.z == ER_PositionZ)) // Auch von ER-Position
{
printf(" -> voxel2Angepasst (Vol Pos SoS) [%13.6f %13.6f %13.6f]\n", voxel2f[0] , voxel2f[1] , voxel2f[2] ); // Voxel 2 nach anpassung
//printf("-> performRayTracedSpeedAdditionTexture\n");
// printf(" voxel1 (E/R Pos im SoS) [%f %f %f]\n", voxel1f[0] , voxel1f[1] , voxel1f[2] ); // Voxel 1 = ER Position
// printf(" voxel2 (Vol Pos im SoS) [%f %f %f]\n", voxel2f[0] , voxel2f[1] , voxel2f[2] ); // Voxel 2 = Volumen Position
// printf(" regionOfInterestOffset [%f %f %f]\n", regionOfInterestOffset.x , regionOfInterestOffset.y , regionOfInterestOffset.z ); // Voxel 2 = Volumen Position
// printf(" sosOffset [%f %f %f]\n", sosOffset.x , sosOffset.y , sosOffset.z );
// printf(" SOS_RESOLUTION = %f\n", SOS_RESOLUTION );
// printf(" VoxelIncrement = %f\n", VoxelIncrement );
// printf(" point2 (Vol Pos im SoS) [%f %f %f]\n", point2.x , point2.y , point2.z);
}
#endif
int greatestDistanceDim = 0; // Gibt die Richtung(X,Y oder Z) an mit der grueueten Entfernung
int slowDim1 = 0; // Gibt die Richtung(X,Y oder Z) der langsamen Richtung;
int slowDim2 = 0; // Gibt die Richtung(X,Y oder Z) der langsamen Richtung;
float greatestDistance_XYZ = 0.0; // Grueuete Distanz in die Richtung mit der grueueten Entfernung
#ifdef debug_CudaRayTraceKernel
bool m_positiv = 0; // Steigung positiv?
#endif
int fastDirectionSteps = 0; // Schritte die gegangen werden.
#if defined(debug_CudaRayTraceKernel) || (! defined(SOS_Version2))
float distance_XYZ[3];
float mFastDirectionSteps_XYZ[3];
float mDistance_XYZ[3];
#endif
float m_XYZ[3];
float pathPoint[3];
#ifdef SOS_Version3
float endPoint[3];
#endif
// Welcher Abstand in den Dimensionen XYZ ist der groesste? Um Richtung zu bestimmen
#ifndef SOS_Version2
if ( (abs(voxel1f[0]-voxel2f[0]) <= abs(voxel1f[2]-voxel2f[2]))) // X<Z
if ( (abs(voxel1f[1]-voxel2f[1]) <= abs(voxel1f[0]-voxel2f[0]))) // Y<Z
greatestDistanceDim = 2; // => Z
else if ( (abs(voxel1f[2]-voxel2f[2]) <= abs(voxel1f[1]-voxel2f[1]))) // Z<Y
greatestDistanceDim = 1; // => Y
else // Z=Y
greatestDistanceDim = 2; // => Z
else // X>Z
if ( (abs(voxel1f[1]-voxel2f[1]) <= abs(voxel1f[0]-voxel2f[0]))) // Y<X
greatestDistanceDim = 0; // => X
else // Y>X
greatestDistanceDim = 1; // => Y
#else
if ( (abs(voxel1f[0]-voxel2f[0]) <= abs(voxel1f[2]-voxel2f[2]))) // X<Z
if ( (abs(voxel1f[1]-voxel2f[1]) <= abs(voxel1f[0]-voxel2f[0]))) // Y<Z
{
greatestDistanceDim = 2; // => Z
slowDim1 = 0;
slowDim2 = 1;
}
else if ( (abs(voxel1f[2]-voxel2f[2]) <= abs(voxel1f[1]-voxel2f[1]))) // Z<Y
{
greatestDistanceDim = 1; // => Y
slowDim1 = 0;
slowDim2 = 2;
}
else // Z=Y
{
greatestDistanceDim = 2; // => Z
slowDim1 = 0;
slowDim2 = 1;
}
else // X>Z
if ( (abs(voxel1f[1]-voxel2f[1]) <= abs(voxel1f[0]-voxel2f[0]))) // Y<X
{
greatestDistanceDim = 0; // => X
slowDim1 = 1;
slowDim2 = 2;
}
else // Y>X
{
greatestDistanceDim = 1; // => Y
slowDim1 = 0;
slowDim2 = 2;
}
#endif
#ifndef SOS_Version2
#ifdef debug_CudaRayTraceKernel
greatestDistance_XYZ=((float)voxel2f[greatestDistanceDim] - (float)voxel1f[greatestDistanceDim]); // Groessten Abstand in XY oder Z-Richtung ermitteln (Float)
distance_XYZ[0] = (voxel2f[0] - voxel1f[0]); // Abstand in X-Richtung
distance_XYZ[1] = (voxel2f[1] - voxel1f[1]); // Abstand in Y-Richtung
distance_XYZ[2] = (voxel2f[2] - voxel1f[2]); // Abstand in Z-Richtung
#endif
pathPoint[0] = voxel1f[0]; // Voxel1f bzw. Emitter als Startpunkt nutzen
pathPoint[1] = voxel1f[1];
pathPoint[2] = voxel1f[2];
#ifdef debug_CudaRayTraceKernel
if ((point2.x == OutputSOSPositionX) && (point2.y == OutputSOSPositionY) && (point2.z == OutputSOSPositionZ)) // Nur von VolumeOutput abhuengig
// if ((point2.x == OutputPositionX) && (point2.y == OutputPositionY) && (point2.z == OutputPositionZ) && (voxel1.x == ER_PositionX) && (voxel1.y == ER_PositionY) && (voxel1.z == ER_PositionZ)) // Auch von ER-Position
{
// Ausgabe der berechneten Werte
printf(" \n");
printf(" voxel1f = [%3f %3f %3f]\n", voxel1f[0],voxel1f[1],voxel1f[2]);
printf(" voxel2f = [%3f %3f %3f]\n", voxel2f[0],voxel2f[1],voxel2f[2]);
printf(" greatestDistanceDim(X0 Y1 Z2) = %i\n", greatestDistanceDim);
printf(" greatestDistance_XYZ = %f\n", greatestDistance_XYZ);
printf(" distance_XYZ = [%3f %3f %3f]\n", distance_XYZ[0],distance_XYZ[1],distance_XYZ[2]);
//printf(" m_positiv = %i\n", m_positiv);
//printf(" fastDirectionSteps = %i\n", fastDirectionSteps); // Fehler wenn negativ!
printf(" pathPoint = [%3f %3f %3f]\n", pathPoint[0],pathPoint[1],pathPoint[2]);
}
#endif
#ifdef debug_CudaRayTraceKernel
// Steigung bestimmen
// Auf Gesamtentfernung bezogene Steigung in jede Richtung (Float)
mDistance_XYZ[0] = distance_XYZ[0] / greatestDistance_XYZ;
mDistance_XYZ[1] = distance_XYZ[1] / greatestDistance_XYZ;
mDistance_XYZ[2] = distance_XYZ[2] / greatestDistance_XYZ;
#endif
// ist Steigung negativ? & Bestimmen der fastDirectionSteps vom diskretisierten Startpunkt zum Endpunkt bzw. Voxel
if (voxel2f[greatestDistanceDim] >= pathPoint[greatestDistanceDim]) // voxel2f > voxel1f Endpukt > Startpkt -> Steigung positiv
{
#ifdef debug_CudaRayTraceKernel
m_positiv = 1;
#endif
fastDirectionSteps = floor(voxel2f[greatestDistanceDim] ) - floor(pathPoint[greatestDistanceDim] ) + 1;
}
else // voxel2f < voxel1f Endpukt < Startpkt -> Steigung negativ
{
#ifdef debug_CudaRayTraceKernel
m_positiv = 0;
#endif
fastDirectionSteps = floor(pathPoint[greatestDistanceDim] ) - floor(voxel2f[greatestDistanceDim] ) + 1;
}
// Steigung bezogen auf die Anzahl der noetigen ganzen Schritte
mFastDirectionSteps_XYZ[0] = distance_XYZ[0]/(float)(fastDirectionSteps-1);
mFastDirectionSteps_XYZ[1] = distance_XYZ[1]/(float)(fastDirectionSteps-1);
mFastDirectionSteps_XYZ[2] = distance_XYZ[2]/(float)(fastDirectionSteps-1);
m_XYZ[0] = mFastDirectionSteps_XYZ[0] ; // Steigung bezogen auf die Anzahl der nuetigen ganzen Schritte in jede Richtung nutzen
m_XYZ[1] = mFastDirectionSteps_XYZ[1] ;
m_XYZ[2] = mFastDirectionSteps_XYZ[2] ;
#else
// ist Steigung negativ? & Bestimmen der fastDirectionSteps vom diskretisierten Startpunkt zum Endpunkt bzw. Voxel
if (voxel1f[greatestDistanceDim] >= voxel2f[greatestDistanceDim]) // voxel2f > voxel1f Endpukt > Startpkt -> Steigung positiv
{
fastDirectionSteps = floor(voxel1f[greatestDistanceDim] + 0.5f ) - floor(voxel2f[greatestDistanceDim] + 0.5f ) + 1;
pathPoint[greatestDistanceDim] = voxel2f[greatestDistanceDim];
pathPoint[slowDim1] = voxel2f[slowDim1];
pathPoint[slowDim2] = voxel2f[slowDim2];
#ifdef SOS_Version3
endPoint[0] = voxel2f[0];
endPoint[1] = voxel1f[1];
endPoint[2] = voxel1f[2];
#endif
}
else // voxel2f < voxel1f Endpukt < Startpkt -> Steigung negativ
{
fastDirectionSteps = floor(voxel2f[greatestDistanceDim] + 0.5f ) - floor(voxel1f[greatestDistanceDim] + 0.5f ) + 1;
pathPoint[greatestDistanceDim] = voxel1f[greatestDistanceDim];
pathPoint[slowDim1] = voxel1f[slowDim1];
pathPoint[slowDim2] = voxel1f[slowDim2];
#ifdef SOS_Version3
endPoint[0] = voxel2f[0];
endPoint[1] = voxel2f[1];
endPoint[2] = voxel2f[2];
#endif
}
#ifdef debug_CudaRayTraceKernel
greatestDistance_XYZ=((float)voxel2f[greatestDistanceDim] - (float)voxel1f[greatestDistanceDim]); // Groessten Abstand in XY oder Z-Richtung ermitteln (Float)
distance_XYZ[0] = (voxel2f[0] - voxel1f[0]); // Abstand in X-Richtung
distance_XYZ[1] = (voxel2f[1] - voxel1f[1]); // Abstand in Y-Richtung
distance_XYZ[2] = (voxel2f[2] - voxel1f[2]); // Abstand in Z-Richtung
if ((point2.x == OutputSOSPositionX) && (point2.y == OutputSOSPositionY) && (point2.z == OutputSOSPositionZ)) // Nur von VolumeOutput abhuengig
// if ((point2.x == OutputPositionX) && (point2.y == OutputPositionY) && (point2.z == OutputPositionZ) && (voxel1.x == ER_PositionX) && (voxel1.y == ER_PositionY) && (voxel1.z == ER_PositionZ)) // Auch von ER-Position
{
// Ausgabe der berechneten Werte
printf(" \n");
printf(" voxel1f = [%3f %3f %3f]\n", voxel1f[0],voxel1f[1],voxel1f[2]);
printf(" voxel2f = [%3f %3f %3f]\n", voxel2f[0],voxel2f[1],voxel2f[2]);
printf(" greatestDistanceDim(X0 Y1 Z2) = %i\n", greatestDistanceDim);
printf(" greatestDistance_XYZ = %f\n", greatestDistance_XYZ);
printf(" distance_XYZ = [%3f %3f %3f]\n", distance_XYZ[0],distance_XYZ[1],distance_XYZ[2]);
//printf(" m_positiv = %i\n", m_positiv);
//printf(" fastDirectionSteps = %i\n", fastDirectionSteps); // Fehler wenn negativ!
printf(" pathPoint = [%3f %3f %3f]\n", pathPoint[0],pathPoint[1],pathPoint[2]);
}
// Steigung bestimmen
// Auf Gesamtentfernung bezogene Steigung in jede Richtung (Float)
mDistance_XYZ[0] = distance_XYZ[0] / greatestDistance_XYZ;
mDistance_XYZ[1] = distance_XYZ[1] / greatestDistance_XYZ;
mDistance_XYZ[2] = distance_XYZ[2] / greatestDistance_XYZ;
#endif
// Steigung bestimmen
// Auf Gesamtentfernung bezogene Steigung in jede Richtung (Float)
greatestDistance_XYZ=((float)voxel2f[greatestDistanceDim] - (float)voxel1f[greatestDistanceDim]); // Groessten Abstand in XY oder Z-Richtung ermitteln (Float)
m_XYZ[greatestDistanceDim] = 1.0f; // Wegen Rundungsfehler bei der Division, die auftreten koennen.
//m_XYZ[greatestDistanceDim] = (voxel2f[greatestDistanceDim] - voxel1f[greatestDistanceDim]) / greatestDistance_XYZ;
m_XYZ[slowDim1] = (voxel2f[slowDim1] - voxel1f[slowDim1]) / greatestDistance_XYZ;
m_XYZ[slowDim2] = (voxel2f[slowDim2] - voxel1f[slowDim2]) / greatestDistance_XYZ;
#endif
#ifdef debug_CudaRayTraceKernel
if ((point2.x == OutputSOSPositionX) && (point2.y == OutputSOSPositionY) && (point2.z == OutputSOSPositionZ)) // Nur von VolumeOutput abhuengig
// if ((point2.x == OutputPositionX) && (point2.y == OutputPositionY) && (point2.z == OutputPositionZ) && (voxel1.x == ER_PositionX) && (voxel1.y == ER_PositionY) && (voxel1.z == ER_PositionZ)) // Auch von ER-Position
{
// Ausgabe der berechneten Werte
//printf(" greatestDistance_XYZ = %f\n", greatestDistance_XYZ);
//printf(" distance_XYZ = [%3f %3f %3f]\n", distance_XYZ[0],distance_XYZ[1],distance_XYZ[2]);
//printf(" m_positiv = %i\n", m_positiv);
printf(" fastDirectionSteps -1 = %i\n", (fastDirectionSteps-1));
printf(" pathPoint = [%3f %3f %3f]\n", pathPoint[0],pathPoint[1],pathPoint[2]);
printf(" mFastDirectionSteps_XYZ = [%3f %3f %3f]\n", mFastDirectionSteps_XYZ[0],mFastDirectionSteps_XYZ[1],mFastDirectionSteps_XYZ[2]);
printf(" m_XYZ = [%3f %3f %3f]\n", m_XYZ[0],m_XYZ[1],m_XYZ[2]);
// Punkte auf Pfad ablaufen
// printf(" \n");
// printf(" Float Bresenham mit Anfangspkt- und Endpktkorrektur Texture\n");
// printf(" =========================================================================================================\n");
// printf(" Step | currentVoxel | X | Y | Z || SoSvalue | delta_SoSvalue\n");
// printf(" ---------------------------------------------------------------------------------------------------------\n");
}
#endif
#ifndef SOS_Version2
// Startpunkt (voxel1f) festlegen
pathPoint[0] = voxel1f[0];
pathPoint[1] = voxel1f[1];
pathPoint[2] = voxel1f[2];
#endif
// Innere schleife fuer den Bresenham ohne Endpunkt
#ifndef SOS_Version2
int j = 1;
for (j=1; j<=(fastDirectionSteps-1); j++) //(Start und Endpunkt werden ausserhalb der Schleife festgelegt)
#else
#ifndef SOS_Version3
int j=0;
for (j=fastDirectionSteps; j>0; j--) //(Alle Punkte innerhalb der Schleife berechnen)
#else
int j = 1;
for (j=1; j<=(fastDirectionSteps-1); j++) //(Endpunkt wird ausserhalb der Schleife festgelegt)
#endif
#endif
{
//processRayTracedVoxelTexture(pathPoint, voxelCount, totalSpeed, speedOfSoundField, deviceSpeedOfSoundFieldCuArray, SOSGrid_XYZ);
#ifndef SaftTextureForBresenhamSosPaths
// AuskommentiertprocessRayTracedVoxelFloat(pathPoint, voxelCount, totalSpeed, deviceSpeedOfSoundField, SOSGrid_XYZ);
//processRayTracedVoxelTexture(pathPoint, voxelCount, totalSpeed, speedOfSoundField, SOSGrid_XYZ);
#else
//processRayTracedVoxelTexture(pathPoint, voxelCount, totalSpeed, deviceSpeedOfSoundFieldCuArray, SOSGrid_XYZ);
#ifdef SaftUseSosAttFloat1 // Nutze getrennte Texturen fuer beide Volumen (Sos+Att)
processRayTracedVoxelTexture(pathPoint, voxelCount, totalSpeed, deviceSpeedOfSoundFieldCuArray, SOSGrid_XYZ);
#endif
#ifdef SaftUseSosAttFloat2 // Nutze nur eine Textur fuer beide Volumen (Sos+Att)
processRayTracedVoxelTextureSosAtt(pathPoint, voxelCount, totalSpeed, totalAttenuation, deviceSpeedOfSoundFieldCuArray, SOSGrid_XYZ);
#ifdef debug_CudaRayTraceKernel
if ((point2.x == OutputSOSPositionX) && (point2.y == OutputSOSPositionY) && (point2.z == OutputSOSPositionZ)) // Nur von VolumeOutput abhaengig
//if ((pathPoint[0] == DebugSosVoxelX) && (pathPoint[1] == DebugSosVoxelY) && (pathPoint[2] == DebugSosVoxelZ) && ((int)voxel1f[0] == ER_PositionX) && ((int)voxel1f[1] == ER_PositionY) && ((int)voxel1f[2] == ER_PositionZ)) // Auch von ER-Position
//if ((pathPoint[0] == DebugSosVoxelX) && (pathPoint[1] == DebugSosVoxelY) && (pathPoint[2] == DebugSosVoxelZ)) // Nur von SOS_XYZ-Position
//if ( ((int)voxel1f[0] == ER_PositionX) && ((int)voxel1f[1] == ER_PositionY) && ((int)voxel1f[2] == ER_PositionZ) ) // Nur von ER-Position
{
printf("pSOSATT currentVoxel [%f %f %f] - j(%i)\n", pathPoint[0], pathPoint[1], pathPoint[2], j);
//printf(" speedOfSoundSample = %2.10f\n", speedOfSoundSample);
float2 SosAttValue = tex3D( texRefSosAttField, pathPoint[0] + 0.5f, pathPoint[1] + 0.5f, pathPoint[2] + 0.5f);
//totalSpeed += SosAttValue.x;
//totalAttenuation += SosAttValue.y;
printf(" totalSpeed = %2.10f\n", SosAttValue.x, totalSpeed);
printf(" totalAttenuation + %2.10f = %2.10f\n", SosAttValue.y, totalAttenuation);
printf(" voxelCount = %d\n", voxelCount);
}
#endif
// SosAttValue = tex3D( texRefSosAttField, currentVoxelFloat[0] + 0.5f, currentVoxelFloat[1] + 0.5f, currentVoxelFloat[2] + 0.5f);
// totalSpeed += SosAttValue.x;
// totalAttenuation += SosAttValue.y;
#endif
#endif
#ifdef debug_CudaRayTraceKernel
if ((point2.x == OutputSOSPositionX) && (point2.y == OutputSOSPositionY) && (point2.z == OutputSOSPositionZ)) // Nur von VolumeOutput abhaengig
// if ((point2.x == OutputPositionX) && (point2.y == OutputPositionY) && (point2.z == OutputPositionZ) && (voxel1.x == ER_PositionX) && (voxel1.y == ER_PositionY) && (voxel1.z == ER_PositionZ)) // Auch von ER-Position
{
//printf(" Step %3i| [%3i %3i %3i] | % 11.6f | % 11.6f | % 11.6f || % 13.6f | % 13.6f \n", j, (int)floor(pathPoint[0]), (int)floor(pathPoint[1]), (int)floor(pathPoint[2]), pathPoint[0], pathPoint[1], pathPoint[2], totalSpeed, speedOfSoundField[((int)floor(pathPoint[2]) * SOSGrid_XYZ.y + (int)floor(pathPoint[1])) * SOSGrid_XYZ.x + (int)floor(pathPoint[0])]);
printf(" Step %3i| [%3i %3i %3i] | % 17.12f % 17.12f % 17.12f || % 13.6f | % 13.6f \n", j, (int)floor(pathPoint[0]+0.5f), (int)floor(pathPoint[1]+0.5f), (int)floor(pathPoint[2]+0.5f), pathPoint[0], pathPoint[1], pathPoint[2], voxelCount/totalSpeed, (float)tex3D( texRefSpeedOfSoundField, pathPoint[0] + 0.5f, pathPoint[1] + 0.5f, pathPoint[2] + 0.5f ) );
}
#endif
#ifndef SOS_Version2
pathPoint[0] = pathPoint[0] + m_XYZ[0];
pathPoint[1] = pathPoint[1] + m_XYZ[1];
pathPoint[2] = pathPoint[2] + m_XYZ[2];
#else
pathPoint[greatestDistanceDim] = pathPoint[greatestDistanceDim] + m_XYZ[greatestDistanceDim];
pathPoint[slowDim1] = pathPoint[slowDim1] + m_XYZ[slowDim1];
pathPoint[slowDim2] = pathPoint[slowDim2] + m_XYZ[slowDim2];
#endif
}
#if (!defined (SOS_Version2)) || defined(SOS_Version3)
// Endpunkt
#ifndef SaftTextureForBresenhamSosPaths
// AuskommentiertprocessRayTracedVoxelFloat(endPoint, voxelCount, totalSpeed, deviceSpeedOfSoundField, SOSGrid_XYZ);
//processRayTracedVoxelTexture(pathPoint, voxelCount, totalSpeed, speedOfSoundField, SOSGrid_XYZ);
#else
processRayTracedVoxelTexture(endPoint, voxelCount, totalSpeed, deviceSpeedOfSoundFieldCuArray, SOSGrid_XYZ);
// TODO: Wieder einen ganzen Schritt entfernen und nur Distanz des Voxels dazufuegen --> dazu erst voxelCount auf float umstellen
voxelCount--;
pathPoint[greatestDistanceDim] = pathPoint[greatestDistanceDim] - m_XYZ[greatestDistanceDim];
pathPoint[slowDim1] = pathPoint[slowDim1] - m_XYZ[slowDim1];
pathPoint[slowDim2] = pathPoint[slowDim2] - m_XYZ[slowDim2];
voxelCount += sqrtf( SQR(pathPoint[1]-endPoint[1]) + SQR(pathPoint[2]-endPoint[2]) + SQR(pathPoint[3]-endPoint[3]) );
#endif
#ifdef debug_CudaRayTraceKernel
if ((point2.x == OutputSOSPositionX) && (point2.y == OutputSOSPositionY) && (point2.z == OutputSOSPositionZ)) // Nur von VolumeOutput abhuengig
// if ((point2.x == OutputPositionX) && (point2.y == OutputPositionY) && (point2.z == OutputPositionZ) && (voxel1.x == ER_PositionX) && (voxel1.y == ER_PositionY) && (voxel1.z == ER_PositionZ)) // Auch von ER-Position
{
//printf(" Step %3i| [%3i %3i %3i] | % 11.6f | % 11.6f | % 11.6f || % 13.6f | % 13.6f \n", j, (int)floor(voxel2f[0]), (int)floor(voxel2f[1]), (int)floor(voxel2f[2]), voxel2f[0], voxel2f[1], voxel2f[2], totalSpeed, speedOfSoundField[((int)floor(voxel2f[2]) * SOSGrid_XYZ.y + (int)floor(voxel2f[1])) * SOSGrid_XYZ.x + (int)floor(voxel2f[0])]);
printf("endPkt Step %3i| [%3i %3i %3i] | % 17.12f % 17.12f % 17.12f || % 13.6f | % 13.6f \n", j, (int)floor(endPoint[0]+0.5f), (int)floor(endPoint[1]+0.5f), (int)floor(endPoint[2]+0.5f), endPoint[0], endPoint[1], endPoint[2], voxelCount/totalSpeed, (float)tex3D( texRefSpeedOfSoundField, endPoint[0] + 0.5f, endPoint[1] + 0.5f, endPoint[2] + 0.5f ) );
// printf("<- performRayTracedSpeedAdditionTexture\n");
}
#endif
#else
// keinen Endpunkt extra dazurechnen.
#endif
// Anzahl der Besuchten Voxel liegt von Anfang an fest, daher nicht noetig einzeln aufzuaddieren!
voxelCount=fastDirectionSteps;
// Anzahl der Besuchten Voxel liegt von Anfang an fest, daher nicht noetig einzeln aufzuaddieren!
voxelCount = fastDirectionSteps;
}

View File

@@ -0,0 +1,561 @@
#include "saftKernel.cuh"
texture<float, cudaTextureType2D, cudaReadModeElementType> texRefAscans; // Schritt 1. Textur anlegen
// Texture for loading AscanIndexes without ATT-Correction (float)
texture<float, cudaTextureType3D, cudaReadModeElementType> texTableAscanIndexFloat1_0;
texture<float, cudaTextureType3D, cudaReadModeElementType> texTableAscanIndexFloat1_1;
texture<float, cudaTextureType3D, cudaReadModeElementType> texTableAscanIndexFloat1_2;
texture<float, cudaTextureType3D, cudaReadModeElementType> texTableAscanIndexFloat1_3;
// Texture for loading AscanIndexes with ATT-Correction (float2)
texture<float2, cudaTextureType3D, cudaReadModeElementType> texTableAscanIndexFloat2_0;
texture<float2, cudaTextureType3D, cudaReadModeElementType> texTableAscanIndexFloat2_1;
texture<float2, cudaTextureType3D, cudaReadModeElementType> texTableAscanIndexFloat2_2;
texture<float2, cudaTextureType3D, cudaReadModeElementType> texTableAscanIndexFloat2_3;
__global__ void saftKernelAscanIndex_SOS_ATT( // Version SoSATT-Korrektur
int const ascanIndexBatchOffset, ///< Offset of AScans if more then one AScan-Batch is used for Reconstruction
float const aScanWindowSize, ///< Amount of AScans used here for Reconstruction
int const maxAscanIndexArraysInTexture, ///< Maximum amount in A-Scans in one Texture
int const TableAscanIndexAllocationCount, ///< Used amount of Textures (currently limited to maximum 4 Textures)
int3 const IMAGE_SIZE_XYZ, ///< XYZ des Outputvolumens
float3 const SosVoxelStartPosition, ///< Offset of SOS-Grids
float const IMAGE_RESOLUTION, ///< Resolution of Output-Volume
float const VoxelIncrement, ///< Step-Width of one Voxel in SOS-Korrdinates
int const blockIndexOffset, ///<
int const speedOfSoundZLayer, ///<
dim3 const gridDimensions, ///<
dim3 const blockDimensions, ///<
float const debugMode, ///<
float const debugModeParameter, ///<
int const *deviceSAFT_VARIANT, ///<
double *output
)
{
/////////////////////////////////////////////////////////////////////////////////////////
// 1. Determine which Voxel is to be calculated in this Kernel
/////////////////////////////////////////////////////////////////////////////////////////
dim3 blockVoxel( // Which Block(xyz) corresponds to this Thread?
(threadIdx.x / blockDimensions.y) % blockDimensions.x, threadIdx.x % blockDimensions.y, threadIdx.x / (blockDimensions.x * blockDimensions.y));
// Index of Block for this Thread
unsigned long long int blockIndex = ((blockIndexOffset + blockIdx.z) * gridDim.y + blockIdx.y) * gridDim.x + blockIdx.x;
dim3 regionOfInterestVoxel( // Which Voxel corresponds to this Thread? Start with 0.
((blockIndex / gridDimensions.y) % gridDimensions.x) * blockDimensions.x + blockVoxel.x, // Medium speed index
(blockIndex % gridDimensions.y) * blockDimensions.y + blockVoxel.y, // Fastest index
(blockIndex / (gridDimensions.x * gridDimensions.y)) * blockDimensions.z + blockVoxel.z // Slowest index
);
// If Voxel is outside the reconstructed Image leave Kernel
if ((regionOfInterestVoxel.x >= IMAGE_SIZE_XYZ.x) || (regionOfInterestVoxel.y >= IMAGE_SIZE_XYZ.y) || (regionOfInterestVoxel.z >= IMAGE_SIZE_XYZ.z))
return;
/////////////////////////////////////////////////////////////////////////////////////////
// 2. Determine
/////////////////////////////////////////////////////////////////////////////////////////
// - SOSvoxel in which voxel is located
// - Index of OutputVolume, and tables of Emitter, Receiver coordinates and SOSpaths
// - Variable declarations
/////////////////////////////////////////////////////////////////////////////////////////
// Memory-Index for this Thread for Output-Array of this Voxel
unsigned long long int memoryIndex =
(((unsigned long long int)IMAGE_SIZE_XYZ.y * ((unsigned long long int)regionOfInterestVoxel.z - (unsigned long long int)blockIndexOffset) + (unsigned long long int)regionOfInterestVoxel.y) *
(unsigned long long int)IMAGE_SIZE_XYZ.x +
(unsigned long long int)regionOfInterestVoxel.x);
float3 SosVoxelf; // SoS-Voxel Koordinates in float
// Determine SOS-Voxel-Position
SosVoxelf.x = (SosVoxelStartPosition.x + (VoxelIncrement * regionOfInterestVoxel.x)); // Hier Addition der SOSVoxel im SoS-Grid durchfuehren
SosVoxelf.y = (SosVoxelStartPosition.y + (VoxelIncrement * regionOfInterestVoxel.y)); // Hier Addition der SOSVoxel im SoS-Grid durchfuehren
SosVoxelf.z = (SosVoxelStartPosition.z + (VoxelIncrement * regionOfInterestVoxel.z)); // Hier Addition der SOSVoxel im SoS-Grid durchfuehren
// printf("\n\n SosVoxelStartPosition [%f %f %f]\n",SosVoxelStartPosition.x,SosVoxelStartPosition.y,SosVoxelStartPosition.z);
// TexturIndex for access on Texturmemory depending of Voxel
float TexturIndexZ_AscanIndex = 0.0f; // Z-Index for access on Texturmemory
float const TexturIndexX = SosVoxelf.x + 0.5f; // Due to Access over Texturmemory +0.5f.
float const TexturIndexY = SosVoxelf.y + 0.5f;
float const SosVoxelTextureZ = (SosVoxelf.z - (float)speedOfSoundZLayer) + 0.5f; // Z offset inside precalculated SOS paths
float voxelValue = 0.0; // reflection value, which is summed up in Ascan-Loop = Outputvalue
__syncthreads();
if (ascanIndexBatchOffset == 0) // Initialisierung beim ersten Kernelaufruf sprich ascanIndexBatchOffset == 0
{
voxelValue = 0.0;
}
else
{
voxelValue = (float)output[memoryIndex]; // Alle anderen Kernelaufrufe muessen zuerst den alten Wert laden, ist bei mehreren Durchlaeufen noetig
}
__syncthreads();
if (TableAscanIndexAllocationCount == 1)
{
float2 currentSOSVoxel_AscanIndexAttValues;
float Offset_0 = (float)ascanIndexBatchOffset + 0.5f;
// #pragma unroll 2
for (float ascanIndex_i = 0.0f; ascanIndex_i < maxAscanIndexArraysInTexture; ascanIndex_i += 1.0f) // bis zu 60.8GV/s
{
TexturIndexZ_AscanIndex = 2.0f * ascanIndex_i + SosVoxelTextureZ; // Z-Index fuer Zugriff auf Textur interpoliert
currentSOSVoxel_AscanIndexAttValues = tex3D(texTableAscanIndexFloat2_0, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex);
__syncthreads();
voxelValue += currentSOSVoxel_AscanIndexAttValues.y * tex2D(texRefAscans, currentSOSVoxel_AscanIndexAttValues.x - 0.5f, Offset_0 + ascanIndex_i); // i gibt Index fuer Ascan an
}
}
else if (TableAscanIndexAllocationCount == 2)
{
float2 currentSOSVoxel_AscanIndexAttValues;
// #pragma unroll 3
for (float ascanIndex_i = 0.0f; ascanIndex_i < maxAscanIndexArraysInTexture; ascanIndex_i += 1.0f) // bis zu 60.6GV/s
{
// 1ten Teil mit selben Index laden
TexturIndexZ_AscanIndex = 2 * ascanIndex_i + SosVoxelTextureZ; // Z-Index fuer Zugriff auf Textur interpoliert
currentSOSVoxel_AscanIndexAttValues = tex3D(texTableAscanIndexFloat2_0, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex);
voxelValue += currentSOSVoxel_AscanIndexAttValues.y *
tex2D(texRefAscans, currentSOSVoxel_AscanIndexAttValues.x - 0.5f, (float)(ascanIndexBatchOffset) + ascanIndex_i + 0.5f); // i gibt Index fuer Ascan an
// 2ten Teil mit selben Index laden
currentSOSVoxel_AscanIndexAttValues = tex3D(texTableAscanIndexFloat2_1, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex);
voxelValue += currentSOSVoxel_AscanIndexAttValues.y *
tex2D(texRefAscans, currentSOSVoxel_AscanIndexAttValues.x - 0.5f, (float)(ascanIndexBatchOffset) + ascanIndex_i + maxAscanIndexArraysInTexture + 0.5f);
}
}
else if (TableAscanIndexAllocationCount == 3)
{
float2 currentSOSVoxel_AscanIndexAttValues_0;
float2 currentSOSVoxel_AscanIndexAttValues_1;
float2 currentSOSVoxel_AscanIndexAttValues_2;
float Offset_0 = (float)ascanIndexBatchOffset + 0.5f;
float Offset_1 = (float)ascanIndexBatchOffset + maxAscanIndexArraysInTexture + 0.5f;
float Offset_2 = (float)ascanIndexBatchOffset + 2.0f * maxAscanIndexArraysInTexture + 0.5f;
// #pragma unroll 2
for (float ascanIndex_i = 0.0f; ascanIndex_i < maxAscanIndexArraysInTexture; ascanIndex_i += 1.0f) // bis zu 60.8GV/s
{
TexturIndexZ_AscanIndex = 2.0f * ascanIndex_i + SosVoxelTextureZ; // Z-Index fuer Zugriff auf Textur interpoliert
// load TOF-Index from Textur 0-3
currentSOSVoxel_AscanIndexAttValues_0 = tex3D(texTableAscanIndexFloat2_0, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex);
currentSOSVoxel_AscanIndexAttValues_1 = tex3D(texTableAscanIndexFloat2_1, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex);
currentSOSVoxel_AscanIndexAttValues_2 = tex3D(texTableAscanIndexFloat2_2, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex);
__syncthreads();
voxelValue += currentSOSVoxel_AscanIndexAttValues_0.y * tex2D(texRefAscans, currentSOSVoxel_AscanIndexAttValues_0.x - 0.5f, Offset_0 + ascanIndex_i); // i gibt Index fuer Ascan an
voxelValue += currentSOSVoxel_AscanIndexAttValues_1.y * tex2D(texRefAscans, currentSOSVoxel_AscanIndexAttValues_1.x - 0.5f, Offset_1 + ascanIndex_i);
voxelValue += currentSOSVoxel_AscanIndexAttValues_2.y * tex2D(texRefAscans, currentSOSVoxel_AscanIndexAttValues_2.x - 0.5f, Offset_2 + ascanIndex_i);
}
}
else if (TableAscanIndexAllocationCount == 4)
{
float2 currentSOSVoxel_AscanIndexAttValues_0;
float2 currentSOSVoxel_AscanIndexAttValues_1;
float2 currentSOSVoxel_AscanIndexAttValues_2;
float2 currentSOSVoxel_AscanIndexAttValues_3;
float Offset_0 = (float)ascanIndexBatchOffset + 0.5f;
float Offset_1 = (float)ascanIndexBatchOffset + maxAscanIndexArraysInTexture + 0.5f;
float Offset_2 = (float)ascanIndexBatchOffset + 2.0f * maxAscanIndexArraysInTexture + 0.5f;
float Offset_3 = (float)ascanIndexBatchOffset + 3.0f * maxAscanIndexArraysInTexture + 0.5f;
// #pragma unroll 2
for (float ascanIndex_i = 0.0f; ascanIndex_i < maxAscanIndexArraysInTexture; ascanIndex_i += 1.0f) // bis zu 60.8GV/s
{
TexturIndexZ_AscanIndex = 2.0f * ascanIndex_i + SosVoxelTextureZ; // Z-Index fuer Zugriff auf Textur interpoliert
// syncthreads();
// load TOF-Index from Textur 0-3
currentSOSVoxel_AscanIndexAttValues_0 = tex3D(texTableAscanIndexFloat2_0, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex);
currentSOSVoxel_AscanIndexAttValues_1 = tex3D(texTableAscanIndexFloat2_1, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex);
currentSOSVoxel_AscanIndexAttValues_2 = tex3D(texTableAscanIndexFloat2_2, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex);
currentSOSVoxel_AscanIndexAttValues_3 = tex3D(texTableAscanIndexFloat2_3, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex);
__syncthreads();
voxelValue += currentSOSVoxel_AscanIndexAttValues_0.y * tex2D(texRefAscans, currentSOSVoxel_AscanIndexAttValues_0.x - 0.5f, Offset_0 + ascanIndex_i); // i gibt Index fuer Ascan an
voxelValue += currentSOSVoxel_AscanIndexAttValues_1.y * tex2D(texRefAscans, currentSOSVoxel_AscanIndexAttValues_1.x - 0.5f, Offset_1 + ascanIndex_i);
voxelValue += currentSOSVoxel_AscanIndexAttValues_2.y * tex2D(texRefAscans, currentSOSVoxel_AscanIndexAttValues_2.x - 0.5f, Offset_2 + ascanIndex_i);
voxelValue += currentSOSVoxel_AscanIndexAttValues_3.y * tex2D(texRefAscans, currentSOSVoxel_AscanIndexAttValues_3.x - 0.5f, Offset_3 + ascanIndex_i);
}
}
__syncthreads();
output[memoryIndex] = (double)voxelValue;
}
__global__ void saftKernelAscanIndex_SOS( // Version SoS-Korrektur
int const ascanIndexBatchOffset, ///< Offset of AScans if more then one AScan-Batch is used for Reconstruction
float const aScanWindowSize, ///< Amount of AScans used here for Reconstruction
int const maxAscanIndexArraysInTexture, ///< Maximum amount in A-Scans in one Texture
int const TableAscanIndexAllocationCount, ///< Used amount of Textures (currently limited to maximum 4 Textures)
int3 const IMAGE_SIZE_XYZ, ///< XYZ des Outputvolumens
float3 const SosVoxelStartPosition, ///< Offset of SOS-Grids
float const IMAGE_RESOLUTION, ///< Resolution of Output-Volume
float const VoxelIncrement, ///< Step-Width of one Voxel in SOS-Korrdinates
int const blockIndexOffset, int const speedOfSoundZLayer, dim3 const gridDimensions, dim3 const blockDimensions, double *output
)
{
/////////////////////////////////////////////////////////////////////////////////////////
// 1. Determine which Voxel is to be calculated in this Kernel
/////////////////////////////////////////////////////////////////////////////////////////
dim3 blockVoxel( // Which Block(xyz) corresponds to this Thread?
(threadIdx.x / blockDimensions.y) % blockDimensions.x, threadIdx.x % blockDimensions.y, threadIdx.x / (blockDimensions.x * blockDimensions.y));
// Index of Block for this Thread
long blockIndex = ((blockIndexOffset + blockIdx.z) * gridDim.y + blockIdx.y) * gridDim.x + blockIdx.x;
dim3 regionOfInterestVoxel( // Which Voxel corresponds to this Thread? Start with 0.
((blockIndex / gridDimensions.y) % gridDimensions.x) * blockDimensions.x + blockVoxel.x, // Medium speed index
(blockIndex % gridDimensions.y) * blockDimensions.y + blockVoxel.y, // Fastest index
(blockIndex / (gridDimensions.x * gridDimensions.y)) * blockDimensions.z + blockVoxel.z // Slowest index
);
// If Voxel is outside the reconstructed Image leave Kernel
if ((regionOfInterestVoxel.x >= IMAGE_SIZE_XYZ.x) || (regionOfInterestVoxel.y >= IMAGE_SIZE_XYZ.y) || (regionOfInterestVoxel.z >= IMAGE_SIZE_XYZ.z))
return;
/////////////////////////////////////////////////////////////////////////////////////////
// 2. Determine
/////////////////////////////////////////////////////////////////////////////////////////
// - SOSvoxel in which voxel is located
// - Index of OutputVolume, and tables of Emitter, Receiver coordinates and SOSpaths
// - Variable declarations
/////////////////////////////////////////////////////////////////////////////////////////
// Memory-Index for this Thread for Output-Array of this Voxel
long memoryIndex = ((IMAGE_SIZE_XYZ.y * (regionOfInterestVoxel.z - blockIndexOffset) + regionOfInterestVoxel.y) * IMAGE_SIZE_XYZ.x + regionOfInterestVoxel.x);
float3 SosVoxelf; // SoS-Voxel Koordinates in float
// Determine SOS-Voxel-Position
SosVoxelf.x = (SosVoxelStartPosition.x + (VoxelIncrement * regionOfInterestVoxel.x)); // Hier Addition der SOSVoxel im SoS-Grid durchfuehren
SosVoxelf.y = (SosVoxelStartPosition.y + (VoxelIncrement * regionOfInterestVoxel.y)); // Hier Addition der SOSVoxel im SoS-Grid durchfuehren
SosVoxelf.z = (SosVoxelStartPosition.z + (VoxelIncrement * regionOfInterestVoxel.z)); // Hier Addition der SOSVoxel im SoS-Grid durchfuehren
// printf("\n\n SosVoxelStartPosition [%f %f %f]\n",SosVoxelStartPosition.x,SosVoxelStartPosition.y,SosVoxelStartPosition.z);
// TexturIndex for access on Texturmemory depending of Voxel
float TexturIndexZ_AscanIndex = 0.0f; // Z-Index for access on Texturmemory
float const TexturIndexX = SosVoxelf.x + 0.5f; // Due to Access over Texturmemory +0.5f.
float const TexturIndexY = SosVoxelf.y + 0.5f;
float const SosVoxelTextureZ = (SosVoxelf.z - (float)speedOfSoundZLayer) + 0.5f; // Z offset inside precalculated SOS paths
float voxelValue = 0.0; // reflection value, which is summed up in Ascan-Loop = Outputvalue
float currentSOSVoxel_AscanIndexValues;
/////////////////////////////////////////////////////////////////////////////////////////
// 3. SAFT-Algorithmus
/////////////////////////////////////////////////////////////////////////////////////////
// - Index aus Textur lesen und fuer zugriff auf A-Scan nutzen
/////////////////////////////////////////////////////////////////////////////////////////
// Vorgehen Ascanindexvariante
// 1. Bestimme Koordinaten in SOS-Koordinaten für festen Emitter und 1413 Receiver
// Index = X+Xmax*Y+Xmax*Ymax*(Zmax*RecNr+Z).
// Xmax = 128
// Ymax = 128
// Zmax = 2
// => X = x
// => Y = y
// => Z = Zmax*RecNr+z
// 2. Lade an dieser Stelle den Interpolierten Index.
// 2.a Über Texturmemory
// 2.b über alle 8 benachbarten Voxel oder 64 bei tricubic-Interpolation in 3D
// 3. Lade Ascan-Sample an diesem Index und summiere auf
__syncthreads();
if (ascanIndexBatchOffset == 0) // Initialisierung beim ersten Kernelaufruf sprich ascanIndexBatchOffset == 0
{
voxelValue = 0.0f;
}
else
{
voxelValue = (float)output[memoryIndex]; // Alle anderen Kernelaufrufe muessen zuerst den alten Wert laden, ist bei mehreren Durchlaeufen noetig
}
__syncthreads();
// float VoxelAscanIndex2 = 0.0f;
// float voxelValue2 = 0.0f;
if (TableAscanIndexAllocationCount == 1)
{
// #pragma unroll 3
// for(float ascanIndex_i = 0.0f; ascanIndex_i < maxAscanIndexArraysInTexture; ascanIndex_i+=1.0f) // 1.Teil Ascans durchlaufen // GV/s //2: GV/s, 4: GV/s, 8: GV/s, 16: GV/s
for (float ascanIndex_i = 0.0f; ascanIndex_i < maxAscanIndexArraysInTexture; ascanIndex_i += 1.0f) // bis zu 60.6GV/s
// for(float ascanIndex_i = 0.0f; ascanIndex_i < 1; ascanIndex_i+=1.0f) // bis zu 60.6GV/s
{
TexturIndexZ_AscanIndex = 2 * ascanIndex_i + SosVoxelTextureZ; // Z-Index fuer Zugriff auf Textur interpoliert
// nutze immer nur 1tes Surface
currentSOSVoxel_AscanIndexValues = tex3D(texTableAscanIndexFloat1_0, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex);
voxelValue += tex2D(texRefAscans, currentSOSVoxel_AscanIndexValues - 0.5f, ascanIndexBatchOffset + ascanIndex_i + 0.5f); // i gibt Index fuer Ascan a
}
}
else if (TableAscanIndexAllocationCount == 2)
{
// #pragma unroll 3
// for(float ascanIndex_i = 0.0f; ascanIndex_i < maxAscanIndexArraysInTexture; ascanIndex_i+=1.0f) // 1.Teil Ascans durchlaufen // GV/s //2: GV/s, 4: GV/s, 8: GV/s, 16: GV/s
for (float ascanIndex_i = 0.0f; ascanIndex_i < maxAscanIndexArraysInTexture; ascanIndex_i += 1.0f) // bis zu 60.6GV/s
{
TexturIndexZ_AscanIndex = 2.0 * ascanIndex_i + SosVoxelTextureZ; // Z-Index fuer Zugriff auf Textur interpoliert
// 1ten Teil mit selben Index laden
currentSOSVoxel_AscanIndexValues = tex3D(texTableAscanIndexFloat1_0, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex);
voxelValue += tex2D(texRefAscans, currentSOSVoxel_AscanIndexValues - 0.5f, ascanIndexBatchOffset + ascanIndex_i + 0.5f); // i gibt Index fuer Ascan an
// 2ten Teil mit selben Index laden
currentSOSVoxel_AscanIndexValues = tex3D(texTableAscanIndexFloat1_1, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex);
voxelValue += tex2D(texRefAscans, currentSOSVoxel_AscanIndexValues - 0.5f, ascanIndexBatchOffset + ascanIndex_i + maxAscanIndexArraysInTexture + 0.5f);
}
}
else if (TableAscanIndexAllocationCount == 3)
{
// #pragma unroll 3
// for(float ascanIndex_i = 0.0f; ascanIndex_i < maxAscanIndexArraysInTexture; ascanIndex_i+=1.0f) // 1.Teil Ascans durchlaufen // GV/s //2: GV/s, 4: GV/s, 8: GV/s, 16: GV/s
for (float ascanIndex_i = 0.0f; ascanIndex_i < maxAscanIndexArraysInTexture; ascanIndex_i += 1.0f) // bis zu 60.6GV/s
{
TexturIndexZ_AscanIndex = 2.0 * ascanIndex_i + SosVoxelTextureZ; // Z-Index fuer Zugriff auf Textur interpoliert
// 1ten Teil mit selben Index laden
currentSOSVoxel_AscanIndexValues = tex3D(texTableAscanIndexFloat1_0, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex);
voxelValue += tex2D(texRefAscans, currentSOSVoxel_AscanIndexValues - 0.5f, ascanIndexBatchOffset + ascanIndex_i + 0.5f); // i gibt Index fuer Ascan an
// 2ten Teil mit selben Index laden
currentSOSVoxel_AscanIndexValues = tex3D(texTableAscanIndexFloat1_1, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex);
voxelValue += tex2D(texRefAscans, currentSOSVoxel_AscanIndexValues - 0.5f, ascanIndexBatchOffset + ascanIndex_i + maxAscanIndexArraysInTexture + 0.5f);
// 3ten Teil mit selben Index laden
currentSOSVoxel_AscanIndexValues = tex3D(texTableAscanIndexFloat1_2, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex);
voxelValue += tex2D(texRefAscans, currentSOSVoxel_AscanIndexValues - 0.5f, ascanIndexBatchOffset + ascanIndex_i + 2 * maxAscanIndexArraysInTexture + 0.5f);
}
}
else if (TableAscanIndexAllocationCount == 4)
{
float currentSOSVoxel_AscanIndexValues_0;
float currentSOSVoxel_AscanIndexValues_1;
float currentSOSVoxel_AscanIndexValues_2;
float currentSOSVoxel_AscanIndexValues_3;
float Offset_0 = (float)ascanIndexBatchOffset + 0.5f;
float Offset_1 = (float)ascanIndexBatchOffset + maxAscanIndexArraysInTexture + 0.5f;
float Offset_2 = (float)ascanIndexBatchOffset + 2.0f * maxAscanIndexArraysInTexture + 0.5f;
float Offset_3 = (float)ascanIndexBatchOffset + 3.0f * maxAscanIndexArraysInTexture + 0.5f;
// #pragma unroll 4
for (float ascanIndex_i = 0.0f; ascanIndex_i < maxAscanIndexArraysInTexture; ascanIndex_i += 1.0f) // bis zu 60.8GV/s
{
TexturIndexZ_AscanIndex = 2.0f * ascanIndex_i + SosVoxelTextureZ; // Z-Index fuer Zugriff auf Textur interpoliert
// syncthreads();
// load TOF-Index from Textur 0-3
currentSOSVoxel_AscanIndexValues_0 = tex3D(texTableAscanIndexFloat1_0, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex);
currentSOSVoxel_AscanIndexValues_1 = tex3D(texTableAscanIndexFloat1_1, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex);
currentSOSVoxel_AscanIndexValues_2 = tex3D(texTableAscanIndexFloat1_2, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex);
currentSOSVoxel_AscanIndexValues_3 = tex3D(texTableAscanIndexFloat1_3, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex);
voxelValue += tex2D(texRefAscans, currentSOSVoxel_AscanIndexValues_0 - 0.5f, Offset_0 + ascanIndex_i); // i gibt Index fuer Ascan an
voxelValue += tex2D(texRefAscans, currentSOSVoxel_AscanIndexValues_1 - 0.5f, Offset_1 + ascanIndex_i);
voxelValue += tex2D(texRefAscans, currentSOSVoxel_AscanIndexValues_2 - 0.5f, Offset_2 + ascanIndex_i);
voxelValue += tex2D(texRefAscans, currentSOSVoxel_AscanIndexValues_3 - 0.5f, Offset_3 + ascanIndex_i);
// if (((int)ascanIndex_i & 31) == 0) __syncthreads();
}
}
else
{
// Do nothing due to only 4 are defined
}
__syncthreads();
output[memoryIndex] = (double)voxelValue;
}
__global__ void saftKernelAscanIndex( // Version ohne-Korrektur
int const ascanIndexBatchOffset, ///< Offset of AScans if more then one AScan-Batch is used for Reconstruction
float const aScanWindowSize, ///< Amount of AScans used here for Reconstruction
int const maxAscanIndexArraysInTexture, ///< Maximum amount in A-Scans in one Texture
int const TableAscanIndexAllocationCount, ///< Used amount of Textures (currently limited to maximum 4 Textures)
int3 const IMAGE_SIZE_XYZ, ///< XYZ des Outputvolumens
float3 const SosVoxelStartPosition, ///< Offset of SOS-Grids
float const IMAGE_RESOLUTION, ///< Resolution of Output-Volume
float const VoxelIncrement, ///< Step-Width of one Voxel in SOS-Korrdinates
int const blockIndexOffset, int const speedOfSoundZLayer, dim3 const gridDimensions, dim3 const blockDimensions, double *output
)
{
/////////////////////////////////////////////////////////////////////////////////////////
// 1. Determine which Voxel is to be calculated in this Kernel
/////////////////////////////////////////////////////////////////////////////////////////
dim3 blockVoxel( // Which Block(xyz) corresponds to this Thread?
(threadIdx.x / blockDimensions.y) % blockDimensions.x, threadIdx.x % blockDimensions.y, threadIdx.x / (blockDimensions.x * blockDimensions.y));
// Index of Block for this Thread
long blockIndex = ((blockIndexOffset + blockIdx.z) * gridDim.y + blockIdx.y) * gridDim.x + blockIdx.x;
dim3 regionOfInterestVoxel( // Which Voxel corresponds to this Thread? Start with 0.
((blockIndex / gridDimensions.y) % gridDimensions.x) * blockDimensions.x + blockVoxel.x, // Medium speed index
(blockIndex % gridDimensions.y) * blockDimensions.y + blockVoxel.y, // Fastest index
(blockIndex / (gridDimensions.x * gridDimensions.y)) * blockDimensions.z + blockVoxel.z // Slowest index
);
// If Voxel is outside the reconstructed Image leave Kernel
if ((regionOfInterestVoxel.x >= IMAGE_SIZE_XYZ.x) || (regionOfInterestVoxel.y >= IMAGE_SIZE_XYZ.y) || (regionOfInterestVoxel.z >= IMAGE_SIZE_XYZ.z))
return;
/////////////////////////////////////////////////////////////////////////////////////////
// 2. Determine
/////////////////////////////////////////////////////////////////////////////////////////
// - SOSvoxel in which voxel is located
// - Index of OutputVolume, and tables of Emitter, Receiver coordinates and SOSpaths
// - Variable declarations
/////////////////////////////////////////////////////////////////////////////////////////
// Memory-Index for this Thread for Output-Array of this Voxel
long memoryIndex = ((IMAGE_SIZE_XYZ.y * (regionOfInterestVoxel.z - blockIndexOffset) + regionOfInterestVoxel.y) * IMAGE_SIZE_XYZ.x + regionOfInterestVoxel.x);
float3 SosVoxelf; // SoS-Voxel Koordinates in float
// Determine SOS-Voxel-Position
SosVoxelf.x = (SosVoxelStartPosition.x + (VoxelIncrement * regionOfInterestVoxel.x)); // Hier Addition der SOSVoxel im SoS-Grid durchfuehren
SosVoxelf.y = (SosVoxelStartPosition.y + (VoxelIncrement * regionOfInterestVoxel.y)); // Hier Addition der SOSVoxel im SoS-Grid durchfuehren
SosVoxelf.z = (SosVoxelStartPosition.z + (VoxelIncrement * regionOfInterestVoxel.z)); // Hier Addition der SOSVoxel im SoS-Grid durchfuehren
// printf("\n\n SosVoxelStartPosition [%f %f %f]\n",SosVoxelStartPosition.x,SosVoxelStartPosition.y,SosVoxelStartPosition.z);
// TexturIndex for access on Texturmemory depending of Voxel
float TexturIndexZ_AscanIndex = 0.0f; // Z-Index for access on Texturmemory
float const TexturIndexX = SosVoxelf.x + 0.5f; // Due to Access over Texturmemory +0.5f.
float const TexturIndexY = SosVoxelf.y + 0.5f;
float const SosVoxelTextureZ = (SosVoxelf.z - (float)speedOfSoundZLayer) + 0.5f; // Z offset inside precalculated SOS paths
float voxelValue = 0.0; // reflection value, which is summed up in Ascan-Loop = Outputvalue
float currentSOSVoxel_AscanIndexValues;
/////////////////////////////////////////////////////////////////////////////////////////
// 3. SAFT-Algorithmus
/////////////////////////////////////////////////////////////////////////////////////////
// - Index aus Textur lesen und fuer zugriff auf A-Scan nutzen
/////////////////////////////////////////////////////////////////////////////////////////
// Vorgehen Ascanindexvariante
// 1. Bestimme Koordinaten in SOS-Koordinaten für festen Emitter und 1413 Receiver
// Index = X+Xmax*Y+Xmax*Ymax*(Zmax*RecNr+Z).
// Xmax = 128
// Ymax = 128
// Zmax = 2
// => X = x
// => Y = y
// => Z = Zmax*RecNr+z
// 2. Lade an dieser Stelle den Interpolierten Index.
// 2.a Über Texturmemory
// 2.b über alle 8 benachbarten Voxel oder 64 bei tricubic-Interpolation in 3D
// 3. Lade Ascan-Sample an diesem Index und summiere auf
__syncthreads();
if (ascanIndexBatchOffset == 0) // Initialisierung beim ersten Kernelaufruf sprich ascanIndexBatchOffset == 0
{
voxelValue = 0.0f;
}
else
{
voxelValue = (float)output[memoryIndex]; // Alle anderen Kernelaufrufe muessen zuerst den alten Wert laden, ist bei mehreren Durchlaeufen noetig
}
__syncthreads();
// float VoxelAscanIndex2 = 0.0f;
// float voxelValue2 = 0.0f;
if (TableAscanIndexAllocationCount == 1)
{
// #pragma unroll 3
// for(float ascanIndex_i = 0.0f; ascanIndex_i < maxAscanIndexArraysInTexture; ascanIndex_i+=1.0f) // 1.Teil Ascans durchlaufen // GV/s //2: GV/s, 4: GV/s, 8: GV/s, 16: GV/s
for (float ascanIndex_i = 0.0f; ascanIndex_i < maxAscanIndexArraysInTexture; ascanIndex_i += 1.0f) // bis zu 60.6GV/s
{
TexturIndexZ_AscanIndex = 2 * ascanIndex_i + SosVoxelTextureZ; // Z-Index fuer Zugriff auf Textur interpoliert
// nutze immer nur 1tes Surface
currentSOSVoxel_AscanIndexValues = tex3D(texTableAscanIndexFloat1_0, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex);
voxelValue += tex2D(texRefAscans, currentSOSVoxel_AscanIndexValues - 0.5f, ascanIndexBatchOffset + ascanIndex_i + 0.5f); // i gibt Index fuer Ascan an
}
}
else if (TableAscanIndexAllocationCount == 2)
{
// #pragma unroll 3
// for(float ascanIndex_i = 0.0f; ascanIndex_i < maxAscanIndexArraysInTexture; ascanIndex_i+=1.0f) // 1.Teil Ascans durchlaufen // GV/s //2: GV/s, 4: GV/s, 8: GV/s, 16: GV/s
for (float ascanIndex_i = 0.0f; ascanIndex_i < maxAscanIndexArraysInTexture; ascanIndex_i += 1.0f) // bis zu 60.6GV/s
{
TexturIndexZ_AscanIndex = 2.0 * ascanIndex_i + SosVoxelTextureZ; // Z-Index fuer Zugriff auf Textur interpoliert
// 1ten Teil mit selben Index laden
currentSOSVoxel_AscanIndexValues = tex3D(texTableAscanIndexFloat1_0, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex);
voxelValue += tex2D(texRefAscans, currentSOSVoxel_AscanIndexValues - 0.5f, ascanIndexBatchOffset + ascanIndex_i + 0.5f); // i gibt Index fuer Ascan an
// 2ten Teil mit selben Index laden
currentSOSVoxel_AscanIndexValues = tex3D(texTableAscanIndexFloat1_1, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex);
voxelValue += tex2D(texRefAscans, currentSOSVoxel_AscanIndexValues - 0.5f, ascanIndexBatchOffset + ascanIndex_i + maxAscanIndexArraysInTexture + 0.5f);
}
}
else if (TableAscanIndexAllocationCount == 3)
{
// #pragma unroll 3
// for(float ascanIndex_i = 0.0f; ascanIndex_i < maxAscanIndexArraysInTexture; ascanIndex_i+=1.0f) // 1.Teil Ascans durchlaufen // GV/s //2: GV/s, 4: GV/s, 8: GV/s, 16: GV/s
for (float ascanIndex_i = 0.0f; ascanIndex_i < maxAscanIndexArraysInTexture; ascanIndex_i += 1.0f) // bis zu 60.6GV/s
{
TexturIndexZ_AscanIndex = 2.0 * ascanIndex_i + SosVoxelTextureZ; // Z-Index fuer Zugriff auf Textur interpoliert
// 1ten Teil mit selben Index laden
currentSOSVoxel_AscanIndexValues = tex3D(texTableAscanIndexFloat1_0, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex);
voxelValue += tex2D(texRefAscans, currentSOSVoxel_AscanIndexValues - 0.5f, ascanIndexBatchOffset + ascanIndex_i + 0.5f); // i gibt Index fuer Ascan an
// 2ten Teil mit selben Index laden
currentSOSVoxel_AscanIndexValues = tex3D(texTableAscanIndexFloat1_1, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex);
voxelValue += tex2D(texRefAscans, currentSOSVoxel_AscanIndexValues - 0.5f, ascanIndexBatchOffset + ascanIndex_i + maxAscanIndexArraysInTexture + 0.5f);
// 3ten Teil mit selben Index laden
currentSOSVoxel_AscanIndexValues = tex3D(texTableAscanIndexFloat1_2, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex);
voxelValue += tex2D(texRefAscans, currentSOSVoxel_AscanIndexValues - 0.5f, ascanIndexBatchOffset + ascanIndex_i + 2 * maxAscanIndexArraysInTexture + 0.5f);
}
}
else if (TableAscanIndexAllocationCount == 4)
{
float currentSOSVoxel_AscanIndexValues_0;
float currentSOSVoxel_AscanIndexValues_1;
float currentSOSVoxel_AscanIndexValues_2;
float currentSOSVoxel_AscanIndexValues_3;
float Offset_0 = (float)ascanIndexBatchOffset + 0.5f;
float Offset_1 = (float)ascanIndexBatchOffset + maxAscanIndexArraysInTexture + 0.5f;
float Offset_2 = (float)ascanIndexBatchOffset + 2.0f * maxAscanIndexArraysInTexture + 0.5f;
float Offset_3 = (float)ascanIndexBatchOffset + 3.0f * maxAscanIndexArraysInTexture + 0.5f;
// #pragma unroll 2
for (float ascanIndex_i = 0.0f; ascanIndex_i < maxAscanIndexArraysInTexture; ascanIndex_i += 1.0f) // bis zu 60.8GV/s
{
TexturIndexZ_AscanIndex = 2.0f * ascanIndex_i + SosVoxelTextureZ; // Z-Index fuer Zugriff auf Textur interpoliert
//__syncthreads();
// load TOF-Index from Textur 0-3
currentSOSVoxel_AscanIndexValues_0 = tex3D(texTableAscanIndexFloat1_0, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex);
currentSOSVoxel_AscanIndexValues_1 = tex3D(texTableAscanIndexFloat1_1, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex);
currentSOSVoxel_AscanIndexValues_2 = tex3D(texTableAscanIndexFloat1_2, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex);
currentSOSVoxel_AscanIndexValues_3 = tex3D(texTableAscanIndexFloat1_3, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex);
voxelValue += tex2D(texRefAscans, currentSOSVoxel_AscanIndexValues_0 - 0.5f, Offset_0 + ascanIndex_i); // i gibt Index fuer Ascan an
voxelValue += tex2D(texRefAscans, currentSOSVoxel_AscanIndexValues_1 - 0.5f, Offset_1 + ascanIndex_i);
voxelValue += tex2D(texRefAscans, currentSOSVoxel_AscanIndexValues_2 - 0.5f, Offset_2 + ascanIndex_i);
voxelValue += tex2D(texRefAscans, currentSOSVoxel_AscanIndexValues_3 - 0.5f, Offset_3 + ascanIndex_i);
}
}
__syncthreads();
output[memoryIndex] = (double)voxelValue;
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,624 @@
#include "precalculateSpeedOfSoundKernel.cuh"
#include "rayTracing.cuh"
#include "saftKernel.cuh"
#include "saft.hpp"
void SAFTHandler::precalculateAscanIndex_usePaths(int ascanIndex_i, int aScanWindowSize, int currentSpeedOfSoundZLayer, int maxFeasibleSosZLayerCount)
{
cudaChannelFormatDesc texChannelDescTableVoxelToEmRecPathSosBoth = cudaCreateChannelDesc(32, 32, 32, 32,
cudaChannelFormatKindFloat); // Schritt 2.1 Output-Kanal anlegen und
// beschreiben - Float4
// Both Emitter Path Tables
// --------------------------------------------------------
texTableVoxelToEmitterPathSosBoth_preprocess.addressMode[0] = cudaAddressModeClamp; // Texturreferenz beschreiben
texTableVoxelToEmitterPathSosBoth_preprocess.addressMode[1] = cudaAddressModeClamp;
texTableVoxelToEmitterPathSosBoth_preprocess.addressMode[2] = cudaAddressModeClamp;
switch (SAFT_VARIANT[SAFT_VARIANT_3DVolumeInterpolationAtReconstruction])
{
case 0:
texTableVoxelToEmitterPathSosBoth_preprocess.filterMode = cudaFilterModePoint;
break;
case 1:
texTableVoxelToEmitterPathSosBoth_preprocess.filterMode = cudaFilterModeLinear;
break;
}
texTableVoxelToEmitterPathSosBoth_preprocess.normalized = 0;
CUDA_CHECK(cudaBindTextureToArray(&texTableVoxelToEmitterPathSosBoth_preprocess, deviceTableVoxelToEmPathSosBothCuArray, &texChannelDescTableVoxelToEmRecPathSosBoth));
// Texturmemory fuer Receiver - SosPathsTables
// ===================================================================================================================
// Both Receiver Path Tables
// ------------------------------------------------------
texTableVoxelToReceiverPathSosBoth0_preprocess.addressMode[0] = cudaAddressModeClamp; // Texturreferenz beschreiben
texTableVoxelToReceiverPathSosBoth0_preprocess.addressMode[1] = cudaAddressModeClamp;
texTableVoxelToReceiverPathSosBoth0_preprocess.addressMode[2] = cudaAddressModeClamp;
switch (SAFT_VARIANT[SAFT_VARIANT_3DVolumeInterpolationAtReconstruction])
{
case 0:
texTableVoxelToReceiverPathSosBoth0_preprocess.filterMode = cudaFilterModePoint;
break;
case 1:
texTableVoxelToReceiverPathSosBoth0_preprocess.filterMode = cudaFilterModeLinear;
break;
}
texTableVoxelToReceiverPathSosBoth0_preprocess.normalized = 0;
CUDA_CHECK(cudaBindTextureToArray(&texTableVoxelToReceiverPathSosBoth0_preprocess, deviceTableVoxelToRecPathSosBothCuArray[0], &texChannelDescTableVoxelToEmRecPathSosBoth));
if (TableVoxelToReceiverPathSosAllocationCount > 1)
{ // TODO: mit Arrays flexibel programmieren, wenn moeglich!!!
texTableVoxelToReceiverPathSosBoth1_preprocess.addressMode[0] = cudaAddressModeClamp; // Texturreferenz beschreiben
texTableVoxelToReceiverPathSosBoth1_preprocess.addressMode[1] = cudaAddressModeClamp;
texTableVoxelToReceiverPathSosBoth1_preprocess.addressMode[2] = cudaAddressModeClamp;
switch (SAFT_VARIANT[SAFT_VARIANT_3DVolumeInterpolationAtReconstruction])
{
case 0:
texTableVoxelToReceiverPathSosBoth1_preprocess.filterMode = cudaFilterModePoint;
break;
case 1:
texTableVoxelToReceiverPathSosBoth1_preprocess.filterMode = cudaFilterModeLinear;
break;
}
texTableVoxelToReceiverPathSosBoth1_preprocess.normalized = 0;
CUDA_CHECK(cudaBindTextureToArray(&texTableVoxelToReceiverPathSosBoth1_preprocess, deviceTableVoxelToRecPathSosBothCuArray[1], &texChannelDescTableVoxelToEmRecPathSosBoth));
}
if (TableVoxelToReceiverPathSosAllocationCount > 2)
{
texTableVoxelToReceiverPathSosBoth2_preprocess.addressMode[0] = cudaAddressModeClamp; // Texturreferenz beschreiben
texTableVoxelToReceiverPathSosBoth2_preprocess.addressMode[1] = cudaAddressModeClamp;
texTableVoxelToReceiverPathSosBoth2_preprocess.addressMode[2] = cudaAddressModeClamp;
switch (SAFT_VARIANT[SAFT_VARIANT_3DVolumeInterpolationAtReconstruction])
{
case 0:
texTableVoxelToReceiverPathSosBoth2_preprocess.filterMode = cudaFilterModePoint;
break;
case 1:
texTableVoxelToReceiverPathSosBoth2_preprocess.filterMode = cudaFilterModeLinear;
break;
}
texTableVoxelToReceiverPathSosBoth2_preprocess.normalized = 0;
CUDA_CHECK(cudaBindTextureToArray(&texTableVoxelToReceiverPathSosBoth2_preprocess, deviceTableVoxelToRecPathSosBothCuArray[2], &texChannelDescTableVoxelToEmRecPathSosBoth));
}
dim3 threadsPerBlock(SOSGrid_XYZ.x, 1, 1);
dim3 blocksPerGrid(1, 1, 1);
blocksPerGrid.x = SOSGrid_XYZ.y;
blocksPerGrid.y = maxFeasibleSosZLayerCount;
blocksPerGrid.z = 1;
// Step 2. Bereite Output-Textur fuer AscanIndex vor
if (TableAscanIndexAllocationCount > 0)
{
cudaBindSurfaceToArray(outSurfRefAscanIndexFloat0, deviceTextureAscanIndexFloatCuArray[0]);
}
if (TableAscanIndexAllocationCount > 1)
{
cudaBindSurfaceToArray(outSurfRefAscanIndexFloat1, deviceTextureAscanIndexFloatCuArray[1]);
}
if (TableAscanIndexAllocationCount > 2)
{
cudaBindSurfaceToArray(outSurfRefAscanIndexFloat2, deviceTextureAscanIndexFloatCuArray[2]);
}
if (TableAscanIndexAllocationCount > 3)
{
cudaBindSurfaceToArray(outSurfRefAscanIndexFloat3, deviceTextureAscanIndexFloatCuArray[3]);
}
// Step 3. Fuehre Kernel aus mit #Threads: SOS.x*SOS.y . Innerhalb werden
// immer 1024/2048 A-Scans durchlaufen und in AscanIndex-Textur geschrieben
if ((SOSMode_3DVolume == false) && (ATTMode_3DVolume == false))
{ // ====================================================
// Blockmode with SOS-value per Ascan
precalculateAscanIndex_usePathsKernel<<<blocksPerGrid, threadsPerBlock>>>(ascanIndex_i, ///< Offset of AscanIndex batch (bei mehreren Aufrufen)
aScanWindowSize, // aktuelle Anzahl der Ascans, die maximal
// vorberechnet werden können
deviceSosAttFieldCuArray,
currentSpeedOfSoundZLayer, ///< First z-layer in the speed of sound
///< grid the pre-calculation is performed
///< for.
maxFeasibleSosZLayerCount, ///< Number of z-layers in the speed of
///< sound grid the pre-calculation is
///< performed for.
// currentEmIndexUsedForAscanIndexCalculation, ///< current Index of Em
// for which the AscanIndex is calculated
maxSoSReceiverArrayForTexture,
deviceEmitterIndex_block, // Speicheradresse fuer EmitterIndexdaten
deviceReceiverIndex_block, // Speicheradresse fuer ReceiverIndexdaten
TableAscanIndexAllocationCount, ///< Anzahl der benoetigten AscanBlocks
///< der Groesse 2048/4096
maxAscanIndexArraysInTexture, ///< maximale Anzahl an Em/Rec in einem
///< CUDA Array (fest definiert fuer
///< bestimmung welche Textur genutzt
///< wird)
deviceTextureAscanIndexFloatCuArray, ///< Out: Sum of SoS samples in
///< the path from transducer to
///< voxel.
SOSGrid_XYZ, sosOffset, regionOfInterestOffset, IMAGE_RESOLUTION, SOS_RESOLUTION, debugMode, debugModeParameter,
deviceSAFT_VARIANT);
}
else if ((SOSMode_3DVolume == true) && (ATTMode_3DVolume == false))
{ // ====================================================
// 3DVolume Mode with SOS-Correction no ATT-Correction
precalculateAscanIndex_usePathsKernel_SOS<<<blocksPerGrid,
threadsPerBlock>>>(ascanIndex_i, ///< Offset of AscanIndex batch (bei mehreren Aufrufen)
aScanWindowSize, // aktuelle Anzahl der Ascans, die maximal
// vorberechnet werden können
deviceSosAttFieldCuArray,
currentSpeedOfSoundZLayer, ///< First z-layer in the speed of sound
///< grid the pre-calculation is performed
///< for.
maxFeasibleSosZLayerCount, ///< Number of z-layers in the speed of
///< sound grid the pre-calculation is
///< performed for.
// currentEmIndexUsedForAscanIndexCalculation, ///< current
// Index of Em for which the AscanIndex is calculated
maxSoSReceiverArrayForTexture,
deviceEmitterIndex_block, // Speicheradresse fuer EmitterIndexdaten
deviceReceiverIndex_block, // Speicheradresse fuer ReceiverIndexdaten
TableAscanIndexAllocationCount, ///< Anzahl der benoetigten AscanBlocks
///< der Groesse 2048/4096
maxAscanIndexArraysInTexture, ///< maximale Anzahl an Em/Rec in einem
///< CUDA Array (fest definiert fuer
///< bestimmung welche Textur genutzt
///< wird)
deviceTextureAscanIndexFloatCuArray, ///< Out: Sum of SoS samples in
///< the path from transducer to
///< voxel.
SOSGrid_XYZ, sosOffset, regionOfInterestOffset, IMAGE_RESOLUTION, SOS_RESOLUTION, debugMode, debugModeParameter,
deviceSAFT_VARIANT);
}
else if ((SOSMode_3DVolume == true) && (ATTMode_3DVolume == true))
{ // ====================================================
// 3DVolume Mode with SOS- and ATT-Correction
precalculateAscanIndex_usePathsKernel_SOS_ATT<<<blocksPerGrid,
threadsPerBlock>>>(ascanIndex_i, ///< Offset of AscanIndex batch (bei mehreren Aufrufen)
aScanWindowSize, // aktuelle Anzahl der Ascans, die maximal
// vorberechnet werden können
deviceSosAttFieldCuArray,
currentSpeedOfSoundZLayer, ///< First z-layer in the speed of sound
///< grid the pre-calculation is performed
///< for.
maxFeasibleSosZLayerCount, ///< Number of z-layers in the speed of
///< sound grid the pre-calculation is
///< performed for.
// currentEmIndexUsedForAscanIndexCalculation, ///< current
// Index of Em for which the AscanIndex is calculated
maxSoSReceiverArrayForTexture,
deviceEmitterIndex_block, // Speicheradresse fuer EmitterIndexdaten
deviceReceiverIndex_block, // Speicheradresse fuer ReceiverIndexdaten
TableAscanIndexAllocationCount, ///< Anzahl der benoetigten AscanBlocks
///< der Groesse 2048/4096
maxAscanIndexArraysInTexture, ///< maximale Anzahl an Em/Rec in einem
///< CUDA Array (fest definiert fuer
///< bestimmung welche Textur genutzt
///< wird)
deviceTextureAscanIndexFloatCuArray, ///< Out: Sum of SoS samples in
///< the path from transducer to
///< voxel.
SOSGrid_XYZ, sosOffset, regionOfInterestOffset, IMAGE_RESOLUTION, SOS_RESOLUTION, debugMode, debugModeParameter,
deviceSAFT_VARIANT);
}
CUDA_CHECK(cudaGetLastError());
// ==================================================== cudaUnbindTexture
// Texturmemory fuer Emitter - SosPathsTables entbinden
CUDA_CHECK(cudaUnbindTexture(&texTableVoxelToEmitterPathSosBoth_preprocess));
// Texturmemory fuer Receiver - SosPathsTables entbinden
CUDA_CHECK(cudaUnbindTexture(&texTableVoxelToReceiverPathSosBoth0_preprocess));
CUDA_CHECK(cudaUnbindTexture(&texTableVoxelToReceiverPathSosBoth1_preprocess));
CUDA_CHECK(cudaUnbindTexture(&texTableVoxelToReceiverPathSosBoth2_preprocess));
}
void SAFTHandler::precalculateAverageSpeedOfSound(int firstZLayer, int sosZLayerCount, int deviceListGeometry, int geometryElementCount, float *deviceVoxelCountOutputFloat,
float *deviceSpeedOfSoundSumOutput)
{
dim3 threadsPerBlock(SOSGrid_XYZ.x, 1,
1); // max. 512 oder 1024 Threads werden vorgegeben und
// dim3 threadsPerBlock (SOSGrid_XYZ.x,SOSGrid_XYZ.y,1); // max. 512 oder
// 1024 Threads werden vorgegeben und
dim3 blocksPerGrid(1, 1, 1); // max. 65.535 Bloecke im Grid
// berechnet. Initialisierung
blocksPerGrid.x = SOSGrid_XYZ.y;
blocksPerGrid.y = sosZLayerCount;
blocksPerGrid.z = 1;
cudaChannelFormatDesc texChannelDescSosAttField = cudaCreateChannelDesc(32, 32, 0, 0,
cudaChannelFormatKindFloat); // Schritt 2.1 Output-Kanal
// anlegen und beschreiben
texRefSosAttField.addressMode[0] = cudaAddressModeClamp; // Texturreferenz beschreiben
texRefSosAttField.addressMode[1] = cudaAddressModeClamp;
texRefSosAttField.addressMode[2] = cudaAddressModeClamp;
if (SAFT_VARIANT[SAFT_VARIANT_3DVolumeInterpolationAtPreprocessing] == 1)
{
texRefSosAttField.filterMode = cudaFilterModeLinear; // Lineare Interpolation
}
else
{
texRefSosAttField.filterMode = cudaFilterModePoint; // Nearest Neighbor
}
texRefSosAttField.normalized = 0;
CUDA_CHECK(cudaBindTextureToArray(&texRefSosAttField, deviceSosAttFieldCuArray,
&texChannelDescSosAttField)); // Schritt 4.1 3DArray an Texturmemory
// binden
if (deviceListGeometry == 0)
{
cudaBindSurfaceToArray(outSurfRefTableVoxelToEmPathSosBoth, deviceTableVoxelToEmPathSosBothCuArray);
}
if (deviceListGeometry == 1)
{
if (TableVoxelToReceiverPathSosAllocationCount > 0)
{
cudaBindSurfaceToArray(outSurfRefTableVoxelToRecPathSosBoth0, deviceTableVoxelToRecPathSosBothCuArray[0]);
}
if (TableVoxelToReceiverPathSosAllocationCount > 1)
{
cudaBindSurfaceToArray(outSurfRefTableVoxelToRecPathSosBoth1, deviceTableVoxelToRecPathSosBothCuArray[1]);
}
if (TableVoxelToReceiverPathSosAllocationCount > 2)
{
cudaBindSurfaceToArray(outSurfRefTableVoxelToRecPathSosBoth2, deviceTableVoxelToRecPathSosBothCuArray[2]);
}
}
precalculateAverageSpeedOfSoundKernel<<<blocksPerGrid, threadsPerBlock>>>(deviceSosAttFieldCuArray, firstZLayer, sosZLayerCount, deviceListGeometry, geometryElementCount,
maxSoSReceiverArrayForTexture, // maximale Anzahl an Receivern in einem
// CUDA Array
// deviceVoxelCountOutput,
deviceVoxelCountOutputFloat, deviceSpeedOfSoundSumOutput,
// regionOfInterestOffset,
SOSGrid_XYZ, sosOffset, regionOfInterestOffset, IMAGE_RESOLUTION, SOS_RESOLUTION, debugMode, debugModeParameter);
CUDA_CHECK(cudaGetLastError());
CUDA_CHECK(cudaUnbindTexture(&texRefSosAttField));
}
void SAFTHandler::fillCuArray(float useValue,
cudaArray **deviceTextureAscanIndexFloatCuArray, ///< CuArray to fill
int TableAscanIndexAllocationCount)
{
dim3 threadsPerBlock(SOSGrid_XYZ.x, 1,
1); // determine neccessary amount of threads
// // max. 512 oder 1024
dim3 blocksPerGrid(1, 1,
1); // determine neccessary amount of blocks in grid // max. 65.535
blocksPerGrid.x = SOSGrid_XYZ.y;
blocksPerGrid.y = maxFeasibleSosZLayerCount;
blocksPerGrid.z = 1;
// Step 1. Bereite Output-Textur fuer AscanIndex vor
if (TableAscanIndexAllocationCount > 0)
{
cudaBindSurfaceToArray(outSurfRefAscanIndexFloat0, deviceTextureAscanIndexFloatCuArray[0]);
}
if (TableAscanIndexAllocationCount > 1)
{
cudaBindSurfaceToArray(outSurfRefAscanIndexFloat1, deviceTextureAscanIndexFloatCuArray[1]);
}
if (TableAscanIndexAllocationCount > 2)
{
cudaBindSurfaceToArray(outSurfRefAscanIndexFloat2, deviceTextureAscanIndexFloatCuArray[2]);
}
if (TableAscanIndexAllocationCount > 3)
{
cudaBindSurfaceToArray(outSurfRefAscanIndexFloat3, deviceTextureAscanIndexFloatCuArray[3]);
}
// Step 2. Fuere Kernel aus mit #Threads: SOS.x*SOS.y . Innerhalb werden immer
// 1024/2048 A-Scans durchgegangen und in AscanIndex-Textur geschrieben
fillCuArrayKernel<<<blocksPerGrid, threadsPerBlock>>>(useValue,
deviceTextureAscanIndexFloatCuArray, ///< Out: Sum of SoS
///< samples in the path
///< from transducer to
///< voxel.
maxAscanIndexArraysInTexture,
TableAscanIndexAllocationCount, ///< Amount of Surfaces in
///< the Array of cuArrays
maxFeasibleSosZLayerCount, ATTMode_3DVolume, debugMode, debugModeParameter);
CUDA_CHECK(cudaGetLastError());
}
void SAFTHandler::performSAFT(
int aScanIndex, ///< The A-scan index is increased by the A-scan batch size in every iteration. It describes the offset into the A-scan samples the SAFT kernel is operating with.
size_t aScanWindowSize, ///< A-scan batch size in terms of number of samples within one window.
int3 IMAGE_SIZE_XYZ, ///< Bildbereichsgroesse/ROI in Voxel
int3 SOSGrid_XYZ, ///< SoSGridgroesse in Voxel
int blockIndexOffset, ///< Additional offset added to the z component of the block index, required because of the adjustments for partial reconstruction in different z-layers.
int outputWindowVoxelCount, ///< Number of Voxels in the output window.
int speedOfSoundZLayer, ///< current SoS z-layer Offset in the speed of sound grid.
int speedOfSoundVoxelsWithinZLayers, ///< Number of z-layers in the speed of sound grid touched by the z-layers of the active zone of reconstruction in the region of interest.
int maxFeasibleSosZLayerCount,
int currentEmIndexUsedForAscanIndexCalculation, ///< current Index of Em for which the AscanIndex is calculated
dim3 const &windowGridDimensions, ///< Grid dimensions to be used to launch the SAFT kernel. It is smaller than the full grid dimensions and only represents the current reconstruction window.
dim3 const &gridDimensions, ///< Full grid dimensions of the reconstruction.
dim3 const &blockDimensions, ///< Block dimensions to be used with the SAFT kernel.
float *deviceSpeedOfSoundField, ///< Pointer to SoSGrid.
cudaArray *deviceAScansCuArray
// cudaStream_t stream ///< Stream to execute the SAFT kernel on.
)
{
dim3 reducedGridDimensions, reducedBlockDimensions;
reduceKernelDimensions(windowGridDimensions, blockDimensions, reducedGridDimensions, reducedBlockDimensions);
CUDA_CHECK(cudaFuncSetCacheConfig(saftKernelAscanIndex_SOS_ATT, cudaFuncCachePreferL1));
CUDA_CHECK(cudaFuncSetCacheConfig(saftKernelAscanIndex_SOS, cudaFuncCachePreferL1));
CUDA_CHECK(cudaFuncSetCacheConfig(saftKernelAscanIndex, cudaFuncCachePreferL1));
// Texture Memory Adressing-mode // http://docs.nvidia.com/cuda/pdf/CUDA_C_Programming_Guide.pdf -> 3.2.11.1. Texture Memory S. 42
// cudaAddressModeClamp - Return values at the boarders if out-of range - default
// cudaAddressModeBorder - Return 0 if out-of range
// cudaAddressModeMirror - Mirror the values - For normalized coordinates
// cudaAddressModeWrap - Repeating the values - For normalized coordinates
// Texturmemory fuer Ascans
cudaChannelFormatDesc texChannelDesc = cudaCreateChannelDesc(32, 0, 0, 0, cudaChannelFormatKindFloat); // Beschreibung des RueckgabeFormats der Textur
texRefAscans.addressMode[0] = cudaAddressModeBorder; // Texturreferenz beschreiben
texRefAscans.addressMode[1] = cudaAddressModeBorder;
if (SAFT_VARIANT[SAFT_VARIANT_AscanInterpolation] == 1)
{
texRefAscans.filterMode = cudaFilterModeLinear; // Lineare Interpolation
}
else
{
texRefAscans.filterMode = cudaFilterModePoint; // Nearest Neighbor
}
texRefAscans.normalized = 0;
CUDA_CHECK(cudaBindTextureToArray(&texRefAscans, deviceAScansCuArray, &texChannelDesc));
if (ATTMode_3DVolume == false)
{ // ========= 3DVolume Mode without ATT-Correction
cudaChannelFormatDesc texChannelDescTableAscanIndexFloat = cudaCreateChannelDesc<float>(); // Should do the same
// AscanIndex Path Tables ------------------------------------------------------
texTableAscanIndexFloat1_0.addressMode[0] = cudaAddressModeClamp; // Texturreferenz beschreiben
texTableAscanIndexFloat1_0.addressMode[1] = cudaAddressModeClamp;
texTableAscanIndexFloat1_0.addressMode[2] = cudaAddressModeClamp;
switch (SAFT_VARIANT[SAFT_VARIANT_3DVolumeInterpolationAtReconstruction])
{
case 0:
texTableAscanIndexFloat1_0.filterMode = cudaFilterModePoint;
break;
case 1:
texTableAscanIndexFloat1_0.filterMode = cudaFilterModeLinear;
break;
}
texTableAscanIndexFloat1_0.normalized = 0;
CUDA_CHECK(cudaBindTextureToArray(&texTableAscanIndexFloat1_0, deviceTextureAscanIndexFloatCuArray[0], &texChannelDescTableAscanIndexFloat));
if (TableAscanIndexAllocationCount > 1)
{ // TODO: mit Arrays flexibel programmieren!!!
texTableAscanIndexFloat1_1.addressMode[0] = cudaAddressModeClamp; // Texturreferenz beschreiben
texTableAscanIndexFloat1_1.addressMode[1] = cudaAddressModeClamp;
texTableAscanIndexFloat1_1.addressMode[2] = cudaAddressModeClamp;
switch (SAFT_VARIANT[SAFT_VARIANT_3DVolumeInterpolationAtReconstruction])
{
case 0:
texTableAscanIndexFloat1_1.filterMode = cudaFilterModePoint;
break;
case 1:
texTableAscanIndexFloat1_1.filterMode = cudaFilterModeLinear;
break;
}
texTableAscanIndexFloat1_1.normalized = 0;
CUDA_CHECK(cudaBindTextureToArray(&texTableAscanIndexFloat1_1, deviceTextureAscanIndexFloatCuArray[1], &texChannelDescTableAscanIndexFloat));
}
if (TableAscanIndexAllocationCount > 2)
{
texTableAscanIndexFloat1_2.addressMode[0] = cudaAddressModeClamp; // Texturreferenz beschreiben
texTableAscanIndexFloat1_2.addressMode[1] = cudaAddressModeClamp;
texTableAscanIndexFloat1_2.addressMode[2] = cudaAddressModeClamp;
switch (SAFT_VARIANT[SAFT_VARIANT_3DVolumeInterpolationAtReconstruction])
{
case 0:
texTableAscanIndexFloat1_2.filterMode = cudaFilterModePoint;
break;
case 1:
texTableAscanIndexFloat1_2.filterMode = cudaFilterModeLinear;
break;
}
texTableAscanIndexFloat1_2.normalized = 0;
CUDA_CHECK(cudaBindTextureToArray(&texTableAscanIndexFloat1_2, deviceTextureAscanIndexFloatCuArray[2], &texChannelDescTableAscanIndexFloat));
}
if (TableAscanIndexAllocationCount > 3)
{
texTableAscanIndexFloat1_3.addressMode[0] = cudaAddressModeClamp; // Texturreferenz beschreiben
texTableAscanIndexFloat1_3.addressMode[1] = cudaAddressModeClamp;
texTableAscanIndexFloat1_3.addressMode[2] = cudaAddressModeClamp;
switch (SAFT_VARIANT[SAFT_VARIANT_3DVolumeInterpolationAtReconstruction])
{
case 0:
texTableAscanIndexFloat1_3.filterMode = cudaFilterModePoint;
break;
case 1:
texTableAscanIndexFloat1_3.filterMode = cudaFilterModeLinear;
break;
}
texTableAscanIndexFloat1_3.normalized = 0;
CUDA_CHECK(cudaBindTextureToArray(&texTableAscanIndexFloat1_3, deviceTextureAscanIndexFloatCuArray[3], &texChannelDescTableAscanIndexFloat));
}
}
else if (ATTMode_3DVolume == true)
{ // ========= 3DVolume Mode with ATT-Correction
cudaChannelFormatDesc texChannelDescTableAscanIndexFloat = cudaCreateChannelDesc<float2>(); // Should do the same
// AscanIndex Path Tables ------------------------------------------------------
texTableAscanIndexFloat2_0.addressMode[0] = cudaAddressModeClamp; // Texturreferenz beschreiben
texTableAscanIndexFloat2_0.addressMode[1] = cudaAddressModeClamp;
texTableAscanIndexFloat2_0.addressMode[2] = cudaAddressModeClamp;
switch (SAFT_VARIANT[SAFT_VARIANT_3DVolumeInterpolationAtReconstruction])
{
case 0:
texTableAscanIndexFloat2_0.filterMode = cudaFilterModePoint;
break;
case 1:
texTableAscanIndexFloat2_0.filterMode = cudaFilterModeLinear;
break;
}
texTableAscanIndexFloat2_0.normalized = 0;
CUDA_CHECK(cudaBindTextureToArray(&texTableAscanIndexFloat2_0, deviceTextureAscanIndexFloatCuArray[0], &texChannelDescTableAscanIndexFloat));
if (TableAscanIndexAllocationCount > 1)
{ // TODO: mit Arrays flexibel programmieren!!!
texTableAscanIndexFloat2_1.addressMode[0] = cudaAddressModeClamp; // Texturreferenz beschreiben
texTableAscanIndexFloat2_1.addressMode[1] = cudaAddressModeClamp;
texTableAscanIndexFloat2_1.addressMode[2] = cudaAddressModeClamp;
switch (SAFT_VARIANT[SAFT_VARIANT_3DVolumeInterpolationAtReconstruction])
{
case 0:
texTableAscanIndexFloat2_1.filterMode = cudaFilterModePoint;
break;
case 1:
texTableAscanIndexFloat2_1.filterMode = cudaFilterModeLinear;
break;
}
texTableAscanIndexFloat2_1.normalized = 0;
CUDA_CHECK(cudaBindTextureToArray(&texTableAscanIndexFloat2_1, deviceTextureAscanIndexFloatCuArray[1], &texChannelDescTableAscanIndexFloat));
}
if (TableAscanIndexAllocationCount > 2)
{
texTableAscanIndexFloat2_2.addressMode[0] = cudaAddressModeClamp; // Texturreferenz beschreiben
texTableAscanIndexFloat2_2.addressMode[1] = cudaAddressModeClamp;
texTableAscanIndexFloat2_2.addressMode[2] = cudaAddressModeClamp;
switch (SAFT_VARIANT[SAFT_VARIANT_3DVolumeInterpolationAtReconstruction])
{
case 0:
texTableAscanIndexFloat2_2.filterMode = cudaFilterModePoint;
break;
case 1:
texTableAscanIndexFloat2_2.filterMode = cudaFilterModeLinear;
break;
}
texTableAscanIndexFloat2_2.normalized = 0;
CUDA_CHECK(cudaBindTextureToArray(&texTableAscanIndexFloat2_2, deviceTextureAscanIndexFloatCuArray[2], &texChannelDescTableAscanIndexFloat));
}
if (TableAscanIndexAllocationCount > 3)
{
texTableAscanIndexFloat2_3.addressMode[0] = cudaAddressModeClamp; // Texturreferenz beschreiben
texTableAscanIndexFloat2_3.addressMode[1] = cudaAddressModeClamp;
texTableAscanIndexFloat2_3.addressMode[2] = cudaAddressModeClamp;
switch (SAFT_VARIANT[SAFT_VARIANT_3DVolumeInterpolationAtReconstruction])
{
case 0:
texTableAscanIndexFloat2_3.filterMode = cudaFilterModePoint;
break;
case 1:
texTableAscanIndexFloat2_3.filterMode = cudaFilterModeLinear;
break;
}
texTableAscanIndexFloat2_3.normalized = 0;
CUDA_CHECK(cudaBindTextureToArray(&texTableAscanIndexFloat2_3, deviceTextureAscanIndexFloatCuArray[3], &texChannelDescTableAscanIndexFloat));
}
}
// Vorberechnung der Koordinaten --> schnelleres Bestimmen der Voxelposition
float VoxelIncrement = IMAGE_RESOLUTION / SOS_RESOLUTION;
float3 SosVoxelStartPosition;
SosVoxelStartPosition.x = (regionOfInterestOffset.x - sosOffset.x) / SOS_RESOLUTION; // Start des Bildes im SOS-Grid aus Positionsdaten bestimmen
SosVoxelStartPosition.y = (regionOfInterestOffset.y - sosOffset.y) / SOS_RESOLUTION;
SosVoxelStartPosition.z = (regionOfInterestOffset.z - sosOffset.z) / SOS_RESOLUTION;
// printf("\n\n SosVoxelStartPosition [%f %f %f]\n",SosVoxelStartPosition.x,SosVoxelStartPosition.y,SosVoxelStartPosition.z);
// Anzahl der Teiltabellen, bereits hier berechnen oder übergeben
int TableAscanIndexAllocationCount = ceil((float)aScanWindowSize / (float)maxAscanIndexArraysInTexture); // float is important due to ceiling
// printf( "TableAscanIndexAllocationCount = (%i/%i) = %i = %i\n", aScanWindowSize, maxAscanIndexArraysInTexture, TableAscanIndexAllocationCount,
// ceil(aScanWindowSize/maxAscanIndexArraysInTexture));
// Call of 3 SAFT Versions - AscanIndex-Varainten
// #####################################################################################################################################################################
// ==================================================== Blockmode with SOS-value per Ascan
// ==================================================== 3DVolume Mode with SOS-Correction no ATT-Correction
// ==================================================== 3DVolume Mode with SOS- and ATT-Correction
if ((SOSMode_3DVolume == false) && (ATTMode_3DVolume == false))
{ // ==================================================== Blockmode with SOS-value per Ascan
printf("\n\n --- SAFT (AscanIndex) without SOS currently not implemented --- \n");
}
else if ((SOSMode_3DVolume == true) && (ATTMode_3DVolume == false))
{
// ==================================================== 3DVolume Mode with SOS-Correction no ATT-Correction
saftKernelAscanIndex_SOS<<<reducedGridDimensions, reducedBlockDimensions>>>( // , 0, stream>>>(
aScanIndex,
(float)aScanWindowSize, // maxAscanIndexArraysInTexture
maxAscanIndexArraysInTexture, // maxSoSReceiverArrayForTexture
TableAscanIndexAllocationCount, // Anzahl der genutzten Teiltabellen
IMAGE_SIZE_XYZ, SosVoxelStartPosition, IMAGE_RESOLUTION, VoxelIncrement, blockIndexOffset, speedOfSoundZLayer, gridDimensions, blockDimensions, deviceOutput);
}
else if ((SOSMode_3DVolume == true) && (ATTMode_3DVolume == true))
{ // ==================================================== 3DVolume Mode with SOS- and ATT-Correction
saftKernelAscanIndex_SOS_ATT<<<reducedGridDimensions, reducedBlockDimensions>>>( // , 0, stream>>>(
aScanIndex,
(float)aScanWindowSize, // maxAscanIndexArraysInTexture
maxAscanIndexArraysInTexture, // maxSoSReceiverArrayForTexture
TableAscanIndexAllocationCount, // Anzahl der genutzten Teiltabellen
IMAGE_SIZE_XYZ, SosVoxelStartPosition, IMAGE_RESOLUTION, VoxelIncrement,
blockIndexOffset, speedOfSoundZLayer, gridDimensions, blockDimensions,
debugMode, debugModeParameter, deviceSAFT_VARIANT,
deviceOutput
);
}
// Unbind Textures
CUDA_CHECK(cudaUnbindTexture(&texRefAscans));
if (ATTMode_3DVolume == false)
{ // ========= 3DVolume Mode without ATT-Correction
CUDA_CHECK(cudaUnbindTexture(&texTableAscanIndexFloat1_0));
CUDA_CHECK(cudaUnbindTexture(&texTableAscanIndexFloat1_1));
CUDA_CHECK(cudaUnbindTexture(&texTableAscanIndexFloat1_2));
CUDA_CHECK(cudaUnbindTexture(&texTableAscanIndexFloat1_3));
}
else if (ATTMode_3DVolume == true)
{ // ========= 3DVolume Mode with ATT-Correction
CUDA_CHECK(cudaUnbindTexture(&texTableAscanIndexFloat2_0));
CUDA_CHECK(cudaUnbindTexture(&texTableAscanIndexFloat2_1));
CUDA_CHECK(cudaUnbindTexture(&texTableAscanIndexFloat2_2));
CUDA_CHECK(cudaUnbindTexture(&texTableAscanIndexFloat2_3));
}
CUDA_CHECK(cudaGetLastError());
}

View File

@@ -9,8 +9,7 @@
- Das wird gemacht, weil CUDA keine externen Referenzen unterst<73>tzt, um Daten von anderen Compilierungs Einheiten zu referenzieren.
*/
#include "kernel/constantMemory.cuh" // Deklaration der Daten, die sich im Constant-Memory befinden Geometriedaten
#include "kernel/rayTracing.cuh" // GPU-Code für Bresenham
#include "kernel/precalculateSpeedOfSoundKernel.cuh" // GPU-Code Partitionierung für Bresenham. Ruft den Bresenham auf.
#include "kernel/saftKernel.cuh" // GPU-Kernel für SAFT
// #include "kernel/rayTracing.cuh" // GPU-Code für Bresenham
// #include "kernel/precalculateSpeedOfSoundKernel.cuh" // GPU-Code Partitionierung für Bresenham. Ruft den Bresenham auf.
// #include "kernel/saftKernel.cuh" // GPU-Kernel für SAFT