From 0e29f139af6771dac0dc29ca02ddf81c5021a7ec Mon Sep 17 00:00:00 2001 From: kradchen Date: Tue, 19 Nov 2024 16:49:21 +0800 Subject: [PATCH] feat: refactor & clean kernerl dir in SAFT_TOFI --- SAFT_TOFI/CMakeLists.txt | 4 +- SAFT_TOFI/src/kernel/constantMemory.cuh | 33 - .../kernel/precalculateSpeedOfSoundKernel.cu | 1030 ++++++ .../kernel/precalculateSpeedOfSoundKernel.cuh | 2890 +---------------- SAFT_TOFI/src/kernel/rayTracing.cu | 3 + SAFT_TOFI/src/kernel/rayTracing.cuh | 755 +---- SAFT_TOFI/src/kernel/saftKernel.cu | 561 ++++ SAFT_TOFI/src/kernel/saftKernel.cuh | 2079 +----------- SAFT_TOFI/src/kernel/saftPrivate.cu | 624 ++++ SAFT_TOFI/src/saft.cu | 7 +- 10 files changed, 2487 insertions(+), 5499 deletions(-) delete mode 100644 SAFT_TOFI/src/kernel/constantMemory.cuh create mode 100644 SAFT_TOFI/src/kernel/precalculateSpeedOfSoundKernel.cu create mode 100644 SAFT_TOFI/src/kernel/rayTracing.cu create mode 100644 SAFT_TOFI/src/kernel/saftKernel.cu create mode 100644 SAFT_TOFI/src/kernel/saftPrivate.cu diff --git a/SAFT_TOFI/CMakeLists.txt b/SAFT_TOFI/CMakeLists.txt index 3a0050d..aed4a1a 100644 --- a/SAFT_TOFI/CMakeLists.txt +++ b/SAFT_TOFI/CMakeLists.txt @@ -3,7 +3,9 @@ project(SaftTofi) set(CMAKE_CUDA_COMPILER /usr/local/cuda/bin/nvcc) enable_language(CUDA) find_package (OpenMP REQUIRED) -add_library(SaftTofi SHARED ./src/SAFT_TOFI.cpp ./src/saft.cu ./src/processAScans.cpp ./src/saft.cpp ) +file(GLOB_RECURSE cu_files ./src/*.cu) +file(GLOB_RECURSE cuh_files ./src/*.cuh) +add_library(SaftTofi SHARED ./src/SAFT_TOFI.cpp ./src/processAScans.cpp ./src/saft.cpp ${cu_files} ${cuh_files}) target_include_directories(SaftTofi PRIVATE ../SAFT ./src /usr/local/cuda/include ) set_target_properties(SaftTofi PROPERTIES CUDA_SEPARABLE_COMPILATION ON) target_compile_options(SaftTofi PRIVATE $<$: diff --git a/SAFT_TOFI/src/kernel/constantMemory.cuh b/SAFT_TOFI/src/kernel/constantMemory.cuh deleted file mode 100644 index 817b331..0000000 --- a/SAFT_TOFI/src/kernel/constantMemory.cuh +++ /dev/null @@ -1,33 +0,0 @@ -#include "saft.hpp" - -/*! - Emitter and receiver geometry held in constant memory, available across all functions in saft.cu because all of it is held in the same compilation unit. - - Emitter und Receiver Geometrie werden im Constant Memory gehalten, erreichbar f�r alle Funktionen in Saft.cu weil alle von ihnen in der selben Kompilierungs-Einheit gehalten werden. -*/ - -#ifdef SaftUseConstantMemforGeometry - - #ifdef SaftUseHarmonicMean - - __constant__ float3 emitterPOSharmon[MAX_EMITTER_RECEIVE_IN_CONSTANT_MEMORY]; - __constant__ float3 receiverPOSharmon[MAX_EMITTER_RECEIVE_IN_CONSTANT_MEMORY]; - - // __constant__ float3 emitterPOSharmon[157 * 4]; - // __constant__ float3 receiverPOSharmon[157 * 9]; - - float3* constEmitterPtr = &emitterPOSharmon[0]; - float3* constReceiverPtr = &receiverPOSharmon[0]; - #endif - - - // LookUpTable for GeometryList and Memory Position - - __constant__ unsigned short lookUpGeometryMemoryListEmitter [MAX_EMITTER_RECEIVE_IN_CONSTANT_MEMORY]; - __constant__ unsigned short lookUpGeometryMemoryListReceiver[MAX_EMITTER_RECEIVE_IN_CONSTANT_MEMORY]; - - // __constant__ unsigned short lookUpGeometryMemoryListEmitter [157 * 4]; - // __constant__ unsigned short lookUpGeometryMemoryListReceiver[157 * 9]; - - unsigned short* constLookUpGeometryMemoryListEmitterPtr = &lookUpGeometryMemoryListEmitter[0]; - unsigned short* constLookUpGeometryMemoryListReceiverPtr = &lookUpGeometryMemoryListReceiver[0]; -#endif diff --git a/SAFT_TOFI/src/kernel/precalculateSpeedOfSoundKernel.cu b/SAFT_TOFI/src/kernel/precalculateSpeedOfSoundKernel.cu new file mode 100644 index 0000000..57703bb --- /dev/null +++ b/SAFT_TOFI/src/kernel/precalculateSpeedOfSoundKernel.cu @@ -0,0 +1,1030 @@ +#include "precalculateSpeedOfSoundKernel.cuh" +#include "rayTracing.cuh" + +__constant__ float3 emitterPOSharmon[MAX_EMITTER_RECEIVE_IN_CONSTANT_MEMORY]; +__constant__ float3 receiverPOSharmon[MAX_EMITTER_RECEIVE_IN_CONSTANT_MEMORY]; + +// __constant__ float3 emitterPOSharmon[157 * 4]; +// __constant__ float3 receiverPOSharmon[157 * 9]; + +float3 *constEmitterPtr = &emitterPOSharmon[0]; +float3 *constReceiverPtr = &receiverPOSharmon[0]; + +// LookUpTable for GeometryList and Memory Position + +__constant__ unsigned short lookUpGeometryMemoryListEmitter[MAX_EMITTER_RECEIVE_IN_CONSTANT_MEMORY]; +__constant__ unsigned short lookUpGeometryMemoryListReceiver[MAX_EMITTER_RECEIVE_IN_CONSTANT_MEMORY]; + +// __constant__ unsigned short lookUpGeometryMemoryListEmitter [157 * 4]; +// __constant__ unsigned short lookUpGeometryMemoryListReceiver[157 * 9]; + +unsigned short *constLookUpGeometryMemoryListEmitterPtr = &lookUpGeometryMemoryListEmitter[0]; +unsigned short *constLookUpGeometryMemoryListReceiverPtr = &lookUpGeometryMemoryListReceiver[0]; + +surface outSurfRefTableVoxelToEmPathSosBoth; + +// Surfaces fuer Emitter - SosPathsTables +// surface +// outSurfRefTableVoxelToReceiverPathSosSumTest; +surface outSurfRefTableVoxelToRecPathSosBoth0; +surface outSurfRefTableVoxelToRecPathSosBoth1; +surface outSurfRefTableVoxelToRecPathSosBoth2; + +// Surfaces fuer AscanIndex +surface outSurfRefAscanIndexFloat0; +surface outSurfRefAscanIndexFloat1; +surface outSurfRefAscanIndexFloat2; +surface outSurfRefAscanIndexFloat3; + +// Texturmemory fuer Emitter - SosPathsTables + +texture texTableVoxelToEmitterPathSosBoth_preprocess; +// Texturmemory fuer Receiver - SosPathsTables + +texture texTableVoxelToReceiverPathSosBoth0_preprocess; +texture texTableVoxelToReceiverPathSosBoth1_preprocess; +texture texTableVoxelToReceiverPathSosBoth2_preprocess; + +__global__ void precalculateAverageSpeedOfSoundKernel(cudaArray *deviceSosAttFieldCuArray, ///< CuArray fuer SosAttFieldTextur + int firstZLayer, ///< First z-layer in the speed of sound grid the + ///< pre-calculation is performed for. + int sosZLayerCount, ///< Number of z-layers in the speed of sound grid the + ///< pre-calculation is performed for. + int geometry, ///< emitters=0 or receivers=1. + int geometryElementCount, ///< Number of elements in the geometry array. + int maxSoSReceiverArrayForTexture, ///< max amount of elements in the + ///< receiver CUDA array. + float *deviceVoxelCountOutputFloat, ///< fuer Count im Floatformat gedacht + ///< fuer Texturmemory. + float *speedOfSoundSumOutput, ///< fuer SoS im Floatformat gedacht fuer + ///< Texturmemory. + int3 SOSGrid_XYZ, float3 sosOffset, float3 regionOfInterestOffset, float IMAGE_RESOLUTION, float SOS_RESOLUTION, float debugMode, + float debugModeParameter) +{ + dim3 SosVoxel(threadIdx.x, // SoS-Voxel X ? Threads fangen bei 0 an + blockIdx.x, // SoS-Voxel Y + blockIdx.y + firstZLayer // SoS-Voxel Z + Offset + ); + float voxelCount; // Anzahl der Voxel auf einem SoS-Pfad + float averageSpeed = 0.0; // SoSSumme auf einem SoS-Pfad + float totalAttenuation = 0.0; // AttSumme auf einem Attenuation-Pfad + + // dim3 SosGeometryVoxel; // SoSVoxel von Emitter/Receiver + float3 SosGeometryVoxelFloat; // SoSVoxel von Emitter/Receiver in Float + float SOS_RESOLUTION_FACTOR = 1 / SOS_RESOLUTION; // Aufluesung im SoS-Grid + // int tableIndex; // Index innerhalb + // TableVoxelToEmitter/ReceiverPath + + int i_x = SosVoxel.x; + int i_y = SosVoxel.y; + int i_z = (SosVoxel.z - firstZLayer); // float SosVoxelTextureZ = (SosVoxelf.z + // - speedOfSoundZLayer); + // int Index; + int TexturGeometryIndexZ; + + float3 currentGeometry; + + for (int geometryIndexCounter = 0; geometryIndexCounter < geometryElementCount; geometryIndexCounter++) // Alle Emitter oder Receiver in der Liste von + // Matlab durchgehen + { + int lookUpGeometryIndex = 0; + + // Lade lookUpGeometryMemoryList-Eintrag, um Position im Memory zu bestimmen + if (geometry == 0) // => Emitter + { + lookUpGeometryIndex = lookUpGeometryMemoryListEmitter[geometryIndexCounter]; // Load from Constant Memory + } + else // if (geometry == 1) => Receiver + { + lookUpGeometryIndex = lookUpGeometryMemoryListReceiver[geometryIndexCounter]; // Load from Constant Memory + } + + // if (currentGeometry.x != 255) // currentGeometry.x = 255 ist + // außerhalb des Wertebereichs und zeigt an, das Geometrie nicht genutzt + // wird. Darum muss nicht berechnet werden. + if (lookUpGeometryIndex != 65535) // currentGeometry.x = 65535 ist außerhalb des Wertebereichs und + // zeigt an, das Geometrie nicht genutzt wird. Darum muss nicht + // berechnet werden. + { + // Wenn Emitter/Receiver genutzt werden Koordinaten laden + if (geometry == 0) // => Emitter + { + currentGeometry = emitterPOSharmon[geometryIndexCounter]; // Positionsdaten von + // Emitter lesen + // syncthreads(); + } + + else // if (geometry == 1) => Receiver + { + currentGeometry = receiverPOSharmon[geometryIndexCounter]; // Positionsdaten von + // Receiver lesen + // syncthreads(); + } + + // Bestimmen der SoS-Koordinaten fuer die Sender/Empfuenger-Koordinaten + determineSpeedOfSoundFieldVoxelFloat(currentGeometry, SosGeometryVoxelFloat, sosOffset, + SOS_RESOLUTION_FACTOR); // SoSVoxel von E/R bestimmen // + // currentGeometry --> SosGeometryVoxel Float + performRayTracedSpeedAdditionTexture(voxelCount, averageSpeed, totalAttenuation, SosGeometryVoxelFloat, SosVoxel, deviceSosAttFieldCuArray, SOSGrid_XYZ, sosOffset, SOS_RESOLUTION, + IMAGE_RESOLUTION, regionOfInterestOffset, + geometry); // SosGeometryVoxelFloat im Floatformat, SoSVoxel als + // Integer + + if (geometry == 0) // Emitter + { + TexturGeometryIndexZ = sosZLayerCount * lookUpGeometryIndex + i_z; + float4 VoxelValues; + VoxelValues.x = averageSpeed; + VoxelValues.y = voxelCount; + VoxelValues.z = totalAttenuation; // Average Attenuation on this Path + + VoxelValues.w = 0.0f; + surf3Dwrite(VoxelValues, outSurfRefTableVoxelToEmPathSosBoth, i_x * sizeof(float4), i_y, + TexturGeometryIndexZ); // Direkt in CUDA Array schreiben + } + else + { + TexturGeometryIndexZ = sosZLayerCount * ((lookUpGeometryIndex) % maxSoSReceiverArrayForTexture) + i_z; + float4 VoxelValues; + VoxelValues.x = averageSpeed; // Average SoS on this Path + VoxelValues.y = voxelCount; // Amount of visited voxel + + VoxelValues.z = totalAttenuation; // Average Attenuation on this Path + VoxelValues.w = 0.0f; // Amount of visited voxel + + if ((int)floor((float)lookUpGeometryIndex / (float)maxSoSReceiverArrayForTexture) == 0) + { + surf3Dwrite(VoxelValues, outSurfRefTableVoxelToRecPathSosBoth0, i_x * sizeof(float4), i_y, + TexturGeometryIndexZ); // Direkt in CUDA Array schreiben + } + else if ((int)floor((float)lookUpGeometryIndex / (float)maxSoSReceiverArrayForTexture) == 1) + { + surf3Dwrite(VoxelValues, outSurfRefTableVoxelToRecPathSosBoth1, i_x * sizeof(float4), i_y, + TexturGeometryIndexZ); // Direkt in CUDA Array schreiben + } + else if ((int)floor((float)lookUpGeometryIndex / (float)maxSoSReceiverArrayForTexture) == 2) + { + surf3Dwrite(VoxelValues, outSurfRefTableVoxelToRecPathSosBoth2, i_x * sizeof(float4), i_y, + TexturGeometryIndexZ); // Direkt in CUDA Array schreiben + } + } + } + } +} + +/** + Proxy function which calls the speed of sound precalculation kernel. + - Proxy-Funktion der einen Schallgeschwindigkeits-Kernel aufruft. +*/ +// precalculateAverageSpeedOfSound( +// currentSpeedOfSoundZLayer, +// maxFeasibleSosZLayerCount, +// 0, +// emitter_list_Size, +// deviceTableVoxelToEmitterPathCount, +// deviceTableVoxelToEmitterPathCountFloat, +// deviceTableVoxelToEmitterPathSosSum); + +__global__ void precalculateAscanIndex_usePathsKernel(int ascanIndexBatchOffset, ///< Offset of AscanIndex batch (bei mehreren + ///< Aufrufen) + int aScanWindowSize, ///< gibt Anzahl der Ascans ein + cudaArray *deviceSosAttFieldCuArray, ///< CuArray fuer SosAttFieldTextur + int currentSpeedOfSoundZLayer, ///< First z-layer in the speed of sound + ///< grid the pre-calculation is performed + ///< for. + int maxFeasibleSosZLayerCount, ///< Number of z-layers in the speed of + ///< sound grid the pre-calculation is + ///< performed for. + // int currentEmIndexUsedForAscanIndexCalculation, ///< current + // Index of Em for which the AscanIndex is calculated + + // int geometryElementCount, ///< + // Number of elements in the geometry array. TODO: --> emitter_list_Size, + // receiver_list_Size int emitter_list_Size, ///< Number of + // emitter_array got from Matlab + // int receiver_list_Size, ///< Number of receiver_array got from + // Matlab + int maxSoSReceiverArrayForTexture, + unsigned short const *deviceEmitterIndex_block, ///< Speicheradresse fuer EmitterIndexdaten + ///< im AscanBlock + unsigned short const *deviceReceiverIndex_block, ///< Speicheradresse fuer + ///< ReceiverIndexdaten im AscanBlock + int TableAscanIndexAllocationCount, ///< Anzahl der benoetigten AscanBlocks + ///< der Groesse 2048/4096 + int maxAscanIndexArraysInTexture, ///< maximale Anzahl an Em/Rec(Ascans) in + ///< einer Teiltabelle /CUDA Array + cudaArray **deviceTextureAscanIndexFloatCuArray, int3 SOSGrid_XYZ, float3 sosOffset, float3 regionOfInterestOffset, float IMAGE_RESOLUTION, + float SOS_RESOLUTION, float debugMode, float debugModeParameter, int *deviceSAFT_VARIANT) +{ + float3 SosVoxelFloat; + SosVoxelFloat.x = (float)threadIdx.x; // SoS-Voxel X ? Threads fangen an bei 0 an + SosVoxelFloat.y = (float)blockIdx.x; // SoS-Voxel X ? Threads fangen an bei 0 an + SosVoxelFloat.z = (float)(blockIdx.y + currentSpeedOfSoundZLayer); // SoS-Voxel X ? Threads + // fangen an bei 0 an + + // Speicher in Texturformat + int i_x = (int)floor(SosVoxelFloat.x); + int i_y = (int)floor(SosVoxelFloat.y); + int i_z = (int)(SosVoxelFloat.z - currentSpeedOfSoundZLayer); + // int Index; + int TexturGeometryIndexZ; + + float3 voxelPosition; + // voxelPosition.x = (float)SosVoxel.x; //Unrechnung von SOS in m + voxelPosition.x = (float)SosVoxelFloat.x * SOS_RESOLUTION + sosOffset.x; // Umrechnung von SOS in m + voxelPosition.y = (float)SosVoxelFloat.y * SOS_RESOLUTION + sosOffset.y; + voxelPosition.z = (float)SosVoxelFloat.z * SOS_RESOLUTION + sosOffset.z; + + float currentAscanIndex = 0.0; // aktueller Ascan, fuer den der AscanIndex + // abhaengig vom SOS-Voxel berechnet wird + float2 currentSOSVoxel_AscanIndexAttValues; + int currentEmitterIndex_minus1 = 0.0; // aktueller Emitter + int oldEmitterIndex = 65535; // letzter Emitter + int currentReceiverIndex_minus1 = 0.0; // aktueller Receiver + + float3 currentEmitterGeometry; // Coordinates for current Emitter + float3 currentReceiverGeometry; // Coordinates for current Receiver + float emitterDistance; + float receiverDistance; + float totalDistance; + + unsigned short lookUpEmitterIndex, // Index of Emitter in lookUp table gives + // the position in texture memory + lookUpReceiverIndex; // Index of Receiver in lookUp table gives the + // position in texture memory + int currentRecTextureIndex; // Index of ReceiverTexture due to division in + // several textures due to HW limitation + + float TexturIndexX = SosVoxelFloat.x + 0.5f; // Index for access to Texturmemory + float TexturIndexY = SosVoxelFloat.y + 0.5f; // Index for access to Texturmemory + float TexturIndexZEmitter = 0.0f; // Index for access to Texturmemory + float TexturIndexZReceiver = 0.0f; // Index for access to Texturmemory + + // Z offset inside precalculated SOS paths + float SosVoxelTextureZnotInterpolated = (SosVoxelFloat.z - (float)currentSpeedOfSoundZLayer); // + 0.5f + + float4 VoxelSosAttValues; // SOS and ATT Values for one Voxel for Texture + // reading + // SOS-Correction and ATT-Correction + // float voxelCount = 0.0; // Anzahl der Voxel auf + // einem SoS-Pfad + float emitterVoxelVoxelCount = 0.0; + float receiverVoxelVoxelCount = 0.0; + float emitterReceiverTotalVoxelCount = 0.0; // Wird zwei mal genutz, Eventuell + // wieder rausnehmen. SOS + float emitterVoxelaverageSpeedSum = 0.0; + float receiverVoxelaverageSpeedSum = 0.0; + float averageSpeed = 0.0; // SoSSumme auf einem SoS-Pfad + // float sumAverageSpeed = 0.0; + // ATT + float emitterVoxelTotalAttenuationSum = 0.0; + float receiverVoxelTotalAttenuationSum = 0.0; + float totalAttenuation_dB = 0.0; // AttSumme auf einem Attenuation-Pfad + float totalAttenuation_multFactor = 0.0; // AttFaktor auf einem Attenuation-Pfad + + for (int ascanIndex_i = 0; ascanIndex_i < aScanWindowSize; ascanIndex_i++) // Alle Emitter oder Receiver in der + // Liste von Matlab durchgehen + { + // 1. Ascans durchgehen - DONE + // 1.1 Emitter und Receiver PfadeID laden, Position laden + // und Abstand berechnen - DONE + // 1.2 Use Emitter-Cache + //// + // TODO + // 1.3 SOS und Attenuation aus Pfadvorberechnung laden - + // 2. Ascan-Index mit SAFT berechnen - DONE + // 2.1 Ascan-Index in Surface speichern - DONE + + // In Index-Listen stehen die Indexe von Em/Rec pro Ascan + // In blockedReceiver/SenderList stehen die 3D-Koordinaten in jeweiliger + // Anzahl + + // lade aktuellen Emitter&Receiver aus AscanBlockliste + // --------------------------------------------------------------- + currentEmitterIndex_minus1 = deviceEmitterIndex_block[ascanIndexBatchOffset + ascanIndex_i] - 1; // Load EmitterIndex from Constant Memory fuer Geometrie,... -1 da + // Matlab mit 1 startet + currentReceiverIndex_minus1 = deviceReceiverIndex_block[ascanIndexBatchOffset + ascanIndex_i] - 1; // Load ReceiverIndex from Constant Memory fuer Geometrie,... + lookUpEmitterIndex = lookUpGeometryMemoryListEmitter[currentEmitterIndex_minus1]; // Load EmitterLookUp Index from Constant + // Memory fuer SOS Pfade. Gibt an wo SOS + // Pfade im Texturspeicher liegen + lookUpReceiverIndex = lookUpGeometryMemoryListReceiver[currentReceiverIndex_minus1]; // Load ReceiverLookUp Index from + // Constant Memory fuer SOS Pfade. Gibt + // an wo SOS Pfade im Texturspeicher + // liegen + + // Berechne die mittlere SOS und ATT fuer diesen Pfad + // Nutzen der Bresenham-Floatvariante mit Texturmemory und Interpolation + // --------------------------------------------------------------- + + if (currentEmitterIndex_minus1 != oldEmitterIndex) + { // Nur wenn neuer Emitter geladen wird neue + // Emitter-Koordinaten laden und Abstand berechnen + + currentEmitterGeometry = emitterPOSharmon[currentEmitterIndex_minus1]; // Use Constant Memory + emitterDistance = sqrtf(SQR(voxelPosition.x - currentEmitterGeometry.x) + SQR(voxelPosition.y - currentEmitterGeometry.y) + SQR(voxelPosition.z - currentEmitterGeometry.z)); + // EmitterIndex in Z-Richtung fuer Zugriff auf Textur + TexturIndexZEmitter = maxFeasibleSosZLayerCount * (lookUpEmitterIndex) + SosVoxelTextureZnotInterpolated + 0.5f; + + // mittlere Schallgeschwindigkeit fuer Emitter->Voxel Pfad ermitteln + // ==================================== + // SpeedSum + Count + Attenuation - Emitter + // ---------------------------------------- + // -------------------------------------------------------- + VoxelSosAttValues = tex3D(texTableVoxelToEmitterPathSosBoth_preprocess, TexturIndexX, TexturIndexY, TexturIndexZEmitter); + emitterVoxelaverageSpeedSum = VoxelSosAttValues.x; + emitterVoxelVoxelCount = VoxelSosAttValues.y; + emitterVoxelTotalAttenuationSum = VoxelSosAttValues.z; + // emitterVoxelVoxelCount VoxelSosAttValues.w; // only float 3 needed + } + + currentReceiverGeometry = receiverPOSharmon[currentReceiverIndex_minus1]; // Use Constant Memory + // receiverDistance = sqrtf( SQR(voxelPosition.x-currentReceiverGeometry.x) + // + SQR(voxelPosition.y-currentReceiverGeometry.y) + + // SQR(voxelPosition.z-currentReceiverGeometry.z) ); + receiverDistance = (float)sqrtf(SQR((double)voxelPosition.x - (double)currentReceiverGeometry.x) + SQR((double)voxelPosition.y - (double)currentReceiverGeometry.y) + + SQR((double)voxelPosition.z - (double)currentReceiverGeometry.z)); + + // Bestimmen der SoS-Koordinaten fuer die Sender/Empfuenger-Koordinaten + // determineSpeedOfSoundFieldVoxelFloat(currentReceiverGeometry, + // SosReceiverVoxelFloat, sosOffset, SOS_RESOLUTION_FACTOR); // SoSVoxel + // von E/R bestimmen // currentGeometry --> SosGeometryVoxel Float + + // Distanz Emitter -> Voxel -> Receiver berechnen + // ==================================== + totalDistance = emitterDistance + receiverDistance; // Gesamt-Abstand + + // mittlere Schallgeschwindigkeit fuer Emitter->Voxel Pfad ermitteln + // ==================================== + // Determine number of current used texture memory for this receiver + currentRecTextureIndex = (int)floor((float)lookUpReceiverIndex / (float)maxSoSReceiverArrayForTexture); + + // ReceiverIndex in Z-Richtung fuer Zugriff auf Textur + + TexturIndexZReceiver = maxFeasibleSosZLayerCount * ((lookUpReceiverIndex) % maxSoSReceiverArrayForTexture) + SosVoxelTextureZnotInterpolated + + 0.5f; // Z-Index fuer Zugriff auf Textur + // TexturIndexZReceiver = maxFeasibleSosZLayerCount * ((lookUpReceiverIndex) + // % maxSoSReceiverArrayForTexture) + SosVoxelTextureZ; // + // Z-Index fuer Zugriff auf Textur + + // mittlere Schallgeschwindigkeit fuer Voxel->Receiver Pfad ermitteln + // ==================================== + // SpeedSum + Count - Receiver ---------------------------------------- + // float4 VoxelValues; + if (currentRecTextureIndex == 0) + { + VoxelSosAttValues = tex3D(texTableVoxelToReceiverPathSosBoth0_preprocess, TexturIndexX, TexturIndexY, TexturIndexZReceiver); + } + else if (currentRecTextureIndex == 1) + { + VoxelSosAttValues = tex3D(texTableVoxelToReceiverPathSosBoth1_preprocess, TexturIndexX, TexturIndexY, TexturIndexZReceiver); + } + else if (currentRecTextureIndex == 2) + { + VoxelSosAttValues = tex3D(texTableVoxelToReceiverPathSosBoth2_preprocess, TexturIndexX, TexturIndexY, TexturIndexZReceiver); + } + + receiverVoxelaverageSpeedSum = VoxelSosAttValues.x; + receiverVoxelVoxelCount = VoxelSosAttValues.y; + receiverVoxelTotalAttenuationSum = VoxelSosAttValues.z; + + emitterReceiverTotalVoxelCount = (emitterVoxelVoxelCount + receiverVoxelVoxelCount); // kleine Optimierung? + + // Mittlere Schallgeschwindigkeit ueber beide Pfade Emitter/Voxel/Receiver + // berechnen + // ======================================================================= + // averageSpeed = + // (emitterVoxelVoxelCount+receiverVoxelVoxelCount)/(emitterVoxelaverageSpeedSum+receiverVoxelaverageSpeedSum); + // // harmonisches Mittel + averageSpeed = emitterReceiverTotalVoxelCount / (emitterVoxelaverageSpeedSum + receiverVoxelaverageSpeedSum); // harmonisches Mittel + + //////////////////////////////////////// SAFT-Calculation to determine + /// Ascan-Index + ////////////////////////////////////////////////////////////////// + + currentAscanIndex = (double)(emitterDistance + receiverDistance) / ((double)1e-7 * (double)averageSpeed); + currentSOSVoxel_AscanIndexAttValues.x = currentAscanIndex; //- 0.12f + currentSOSVoxel_AscanIndexAttValues.y = 1; + + // Save AscanIndex in Texture + // Calculate the Z-Index for storing the AscanIndex value + TexturGeometryIndexZ = maxFeasibleSosZLayerCount * ((ascanIndex_i) % maxAscanIndexArraysInTexture) + i_z; + + // Write in the AscanIndex value for all Receiver in corresponding memory + // adress depending on SOS-Voxel, and Z-Layer floor(ascanIndex_i / + // maxAscanIndexArraysInTexture) gives the surface if more then one is used + int ascanIndexTexture_Nr = (int)floor((float)ascanIndex_i / (float)maxAscanIndexArraysInTexture); + + if (ascanIndexTexture_Nr == 0) + { + surf3Dwrite((float2)currentSOSVoxel_AscanIndexAttValues, outSurfRefAscanIndexFloat0, i_x * sizeof(float2), i_y, + TexturGeometryIndexZ); // Direkt in CUDA Array schreiben: + // TableVoxelToEmitterPathSosSum + } + else if (ascanIndexTexture_Nr == 1) + { + surf3Dwrite((float2)currentSOSVoxel_AscanIndexAttValues, outSurfRefAscanIndexFloat1, i_x * sizeof(float2), i_y, + TexturGeometryIndexZ); // Direkt in CUDA Array schreiben: + // TableVoxelToEmitterPathSosSum + } + else if (ascanIndexTexture_Nr == 2) + { + surf3Dwrite((float2)currentSOSVoxel_AscanIndexAttValues, outSurfRefAscanIndexFloat2, i_x * sizeof(float2), i_y, + TexturGeometryIndexZ); // Direkt in CUDA Array schreiben: + // TableVoxelToEmitterPathSosSum + } + else if (ascanIndexTexture_Nr == 3) + { + surf3Dwrite((float2)currentSOSVoxel_AscanIndexAttValues, outSurfRefAscanIndexFloat3, i_x * sizeof(float2), i_y, + TexturGeometryIndexZ); // Direkt in CUDA Array schreiben: + // TableVoxelToEmitterPathSosSum + } + + } // Ascan_i-loop +} + +__global__ void precalculateAscanIndex_usePathsKernel_SOS(int ascanIndexBatchOffset, ///< Offset of AscanIndex batch (bei mehreren + ///< Aufrufen) + int aScanWindowSize, ///< gibt Anzahl der Ascans ein + cudaArray *deviceSosAttFieldCuArray, ///< CuArray fuer SosAttFieldTextur + int currentSpeedOfSoundZLayer, ///< First z-layer in the speed of sound + ///< grid the pre-calculation is performed + ///< for. + int maxFeasibleSosZLayerCount, ///< Number of z-layers in the speed of + ///< sound grid the pre-calculation is + ///< performed for. + int maxSoSReceiverArrayForTexture, + unsigned short const *deviceEmitterIndex_block, ///< Speicheradresse fuer EmitterIndexdaten + ///< im AscanBlock + unsigned short const *deviceReceiverIndex_block, ///< Speicheradresse fuer + ///< ReceiverIndexdaten im AscanBlock + int TableAscanIndexAllocationCount, ///< Anzahl der benoetigten AscanBlocks + ///< der Groesse 2048/4096 + int maxAscanIndexArraysInTexture, ///< maximale Anzahl an Em/Rec(Ascans) in + ///< einer Teiltabelle /CUDA Array + cudaArray **deviceTextureAscanIndexFloatCuArray, int3 SOSGrid_XYZ, float3 sosOffset, float3 regionOfInterestOffset, float IMAGE_RESOLUTION, + float SOS_RESOLUTION, float debugMode, float debugModeParameter, int *deviceSAFT_VARIANT) +{ + float3 SosVoxelFloat; + SosVoxelFloat.x = (float)threadIdx.x; // SoS-Voxel X Threads fangen an bei 0 an + SosVoxelFloat.y = (float)blockIdx.x; // SoS-Voxel Y Threads fangen an bei 0 an + SosVoxelFloat.z = (float)(blockIdx.y + currentSpeedOfSoundZLayer); // SoS-Voxel Z Threads + // fangen an bei 0 an + // Speicher in Texturformat + int i_x = (int)floor(SosVoxelFloat.x); + int i_y = (int)floor(SosVoxelFloat.y); + int i_z = (int)(SosVoxelFloat.z - currentSpeedOfSoundZLayer); // float SosVoxelTextureZ = + // (SosVoxelf.z + // - speedOfSoundZLayer); + // int Index; + int TexturGeometryIndexZ; + + float3 voxelPosition; + voxelPosition.x = SosVoxelFloat.x * SOS_RESOLUTION + sosOffset.x; // Umrechnung von SOS in m + voxelPosition.y = SosVoxelFloat.y * SOS_RESOLUTION + sosOffset.y; + voxelPosition.z = SosVoxelFloat.z * SOS_RESOLUTION + sosOffset.z; + + float currentAscanIndex = 0.0; // aktueller Ascan, fuer den der AscanIndex + // abhaengig vom SOS-Voxel berechnet wird + int currentEmitterIndex_minus1 = 0.0; // aktueller Emitter + int oldEmitterIndex = 65535; // letzter Emitter + int currentReceiverIndex_minus1 = 0.0; // aktueller Receiver + + float3 currentEmitterGeometry; // Coordinates for current Emitter + float3 currentReceiverGeometry; // Coordinates for current Receiver + float emitterDistance; + float receiverDistance; + float totalDistance; + + unsigned short lookUpEmitterIndex, // Index of Emitter in lookUp table gives + // the position in texture memory + lookUpReceiverIndex; // Index of Receiver in lookUp table gives the + // position in texture memory + int currentRecTextureIndex; // Index of ReceiverTexture due to division in + // several textures due to HW limitation + + float TexturIndexX = SosVoxelFloat.x + 0.5f; // Index for access to Texturmemory + float TexturIndexY = SosVoxelFloat.y + 0.5f; // Index for access to Texturmemory + float TexturIndexZEmitter = 0.0f; // Index for access to Texturmemory + float TexturIndexZReceiver = 0.0f; // Index for access to Texturmemory + + // Z offset inside precalculated SOS paths // TODO: - IMAGE_RESOLUTION/4 hier + // eingefuegt, da ws Rundungsfeher und ich auf 0.0 wie auch 1.0 komme. Da 1.0 + // nicht definiert fuer optimized Fehler in Berechnung! + float SosVoxelTextureZ = (SosVoxelFloat.z - (float)currentSpeedOfSoundZLayer); + float SosVoxelTextureZnotInterpolated = (SosVoxelFloat.z - (float)currentSpeedOfSoundZLayer); // + 0.5f + + float4 VoxelSosAttValues; // SOS and ATT Values for one Voxel for Texture + // reading + + // SOS-Correction and ATT-Correction + float emitterVoxelVoxelCount = 0.0; + float receiverVoxelVoxelCount = 0.0; + float emitterReceiverTotalVoxelCount = 0.0; // Wird zwei mal genutz, Eventuell wieder rausnehmen. + // SOS + float emitterVoxelaverageSpeedSum = 0.0; + float receiverVoxelaverageSpeedSum = 0.0; + float averageSpeed = 0.0; + float sumAverageSpeed = 0.0; + + for (int ascanIndex_i = 0; ascanIndex_i < aScanWindowSize; ascanIndex_i++) // Alle Emitter oder Receiver in der + // Liste von Matlab durchgehen + { + // 1. Ascans durchgehen - DONE + // 1.1 Emitter und Receiver PfadeID laden, Position laden + // und Abstand berechnen - DONE + // 1.2 Use Emitter-Cache -DONE + // 1.3 SOS und Attenuation aus Pfadvorberechnung laden - + // 2. Ascan-Index mit SAFT berechnen - DONE + // 2.1 Ascan-Index in Surface speichern - DONE + + // In Index-Listen stehen die Indexe von Em/Rec pro Ascan + // In blockedReceiver/SenderList stehen die 3D-Koordinaten in jeweiliger + // Anzahl + + // lade aktuellen EmitterIdx&ReceiverIdx aus AscanBlockliste + // --------------------------------------------------------------- + currentEmitterIndex_minus1 = deviceEmitterIndex_block[ascanIndexBatchOffset + ascanIndex_i] - 1; // Load EmitterIndex from Constant Memory fuer Geometrie,... -1 da + // Matlab mit 1 startet + currentReceiverIndex_minus1 = deviceReceiverIndex_block[ascanIndexBatchOffset + ascanIndex_i] - 1; // Load ReceiverIndex from Constant Memory fuer Geometrie,... + lookUpEmitterIndex = lookUpGeometryMemoryListEmitter[currentEmitterIndex_minus1]; // Load EmitterLookUp Index from Constant + // Memory fuer SOS Pfade. Gibt an wo SOS + // Pfade im Texturspeicher liegen + lookUpReceiverIndex = lookUpGeometryMemoryListReceiver[currentReceiverIndex_minus1]; // Load ReceiverLookUp Index from + // Constant Memory fuer SOS Pfade. Gibt + // an wo SOS Pfade im Texturspeicher + // liegen + + // Berechne die mittlere SOS und ATT fuer diesen Pfad + // Nutzen der Bresenham-Floatvariante mit Texturmemory und Interpolation + // --------------------------------------------------------------- + + if (currentEmitterIndex_minus1 != oldEmitterIndex) + { // Nur wenn neuer Emitter geladen wird neue + // Emitter-Koordinaten laden und Abstand berechnen + + currentEmitterGeometry = emitterPOSharmon[currentEmitterIndex_minus1]; // Use Constant Memory + emitterDistance = sqrtf(SQR(voxelPosition.x - currentEmitterGeometry.x) + SQR(voxelPosition.y - currentEmitterGeometry.y) + SQR(voxelPosition.z - currentEmitterGeometry.z)); + // Bestimmen der SoS-Koordinaten fuer die Sender/Empfuenger-Koordinaten + // determineSpeedOfSoundFieldVoxelFloat(currentEmitterGeometry, + // SosEmitterVoxelFloat , sosOffset, SOS_RESOLUTION_FACTOR); // + // SoSVoxel von E/R bestimmen // currentGeometry --> SosGeometryVoxel + // Float + + // EmitterIndex in Z-Richtung fuer Zugriff auf Textur + TexturIndexZEmitter = maxFeasibleSosZLayerCount * (lookUpEmitterIndex) + SosVoxelTextureZnotInterpolated + 0.5f; + + // mittlere Schallgeschwindigkeit fuer Emitter->Voxel Pfad ermitteln + // ==================================== + // SpeedSum + Count + Attenuation - Emitter + // ---------------------------------------- + // -------------------------------------------------------- + VoxelSosAttValues = tex3D(texTableVoxelToEmitterPathSosBoth_preprocess, TexturIndexX, TexturIndexY, TexturIndexZEmitter); + emitterVoxelaverageSpeedSum = VoxelSosAttValues.x; + emitterVoxelVoxelCount = VoxelSosAttValues.y; + } + + currentReceiverGeometry = receiverPOSharmon[currentReceiverIndex_minus1]; // Use Constant Memory + receiverDistance = (float)sqrtf(SQR((double)voxelPosition.x - (double)currentReceiverGeometry.x) + SQR((double)voxelPosition.y - (double)currentReceiverGeometry.y) + + SQR((double)voxelPosition.z - (double)currentReceiverGeometry.z)); + + // Bestimmen der SoS-Koordinaten fuer die Sender/Empfuenger-Koordinaten + // determineSpeedOfSoundFieldVoxelFloat(currentReceiverGeometry, + // SosReceiverVoxelFloat, sosOffset, SOS_RESOLUTION_FACTOR); // SoSVoxel + // von E/R bestimmen // currentGeometry --> SosGeometryVoxel Float + + // Distanz Emitter -> Voxel -> Receiver berechnen + // ==================================== + totalDistance = emitterDistance + receiverDistance; // Gesamt-Abstand + + // mittlere Schallgeschwindigkeit fuer Emitter->Voxel Pfad ermitteln + // ==================================== + // Determine number of current used texture memory for this receiver + currentRecTextureIndex = (int)floor((float)lookUpReceiverIndex / (float)maxSoSReceiverArrayForTexture); + + // ReceiverIndex in Z-Richtung fuer Zugriff auf Textur + TexturIndexZReceiver = maxFeasibleSosZLayerCount * ((lookUpReceiverIndex) % maxSoSReceiverArrayForTexture) + SosVoxelTextureZnotInterpolated + + 0.5f; // Z-Index fuer Zugriff auf Textur + // TexturIndexZReceiver = maxFeasibleSosZLayerCount * ((lookUpReceiverIndex) + // % maxSoSReceiverArrayForTexture) + SosVoxelTextureZ; // + // Z-Index fuer Zugriff auf Textur + + // mittlere Schallgeschwindigkeit fuer Voxel->Receiver Pfad ermitteln + // ==================================== + // SpeedSum + Count - Receiver ---------------------------------------- + // float4 VoxelValues; + if (currentRecTextureIndex == 0) + { + VoxelSosAttValues = tex3D(texTableVoxelToReceiverPathSosBoth0_preprocess, TexturIndexX, TexturIndexY, TexturIndexZReceiver); + } + else if (currentRecTextureIndex == 1) + { + VoxelSosAttValues = tex3D(texTableVoxelToReceiverPathSosBoth1_preprocess, TexturIndexX, TexturIndexY, TexturIndexZReceiver); + } + else if (currentRecTextureIndex == 2) + { + VoxelSosAttValues = tex3D(texTableVoxelToReceiverPathSosBoth2_preprocess, TexturIndexX, TexturIndexY, TexturIndexZReceiver); + } + + receiverVoxelaverageSpeedSum = VoxelSosAttValues.x; + receiverVoxelVoxelCount = VoxelSosAttValues.y; + // receiverVoxelVoxelCount VoxelSosAttValues.w; // only float 3 needed + + emitterReceiverTotalVoxelCount = (emitterVoxelVoxelCount + receiverVoxelVoxelCount); // kleine Optimierung? + + // Calculate harmonic mean of both paths Emitter/Voxel/Receiver + // ======================================================================= + averageSpeed = emitterReceiverTotalVoxelCount / (emitterVoxelaverageSpeedSum + receiverVoxelaverageSpeedSum); // harmonic mean + + //////////////////////////////////////// SAFT-Calculation to determine + /// Ascan-Index + /// /////////////////////////////////////////////////////////////// + + // Für Ascan-Index benoetigt man mehrere Texturen fuer jeweils 2 Z-Layer. + // 2*N < maxSurfaceTexture3DDimension(Fermi: 2048; Kepler: 2048) ==> (Fermi: + // 2048 ; Kepler: 2048 Em/Rec - Kombinationen) maxSurfaceTexture3DDimension + // = maximale Groesse die erlaubt ist TableAscanIndexAllocationCount + // = Anzahl der Teiltabellen ==> auch Anzahl der benoetigten Durchlaeufe + // maxFeasibleSosZLayerCount = Anzahl + // der SoS-Zlayer die gleichzeitig im Speicher pro EM/REC-Kombi vorgehalten + // werden (1 oder 2 bei Interpolierten Variante) + // maxAscanIndexArraysInTexture = Anzahl der Ascans in einer Teiltabelle + + // outSurfRefAscanIndexFloat0..2 = Surface fuer + // die jeweiligen Ascans + + // totalDistance = emitterDistance + receiverDistance; // + // Gesamt-Abstand averageSpeed = deviceSoSData_block + // [currentAScanIndex]; sampleTime = totalDistance / + //(sampleRate*averageSpeed); // Sample Time bestimmen mit entsprechenden + // SoSDaten + + // currentAscanIndex = (emitterDistance + + // receiverDistance)/(1e-7*averageSpeed); + currentAscanIndex = (double)(emitterDistance + receiverDistance) / ((double)1e-7 * (double)averageSpeed); + + // Save AscanIndex in Texture + // Calculate the Z-Index for storing the AscanIndex value + TexturGeometryIndexZ = maxFeasibleSosZLayerCount * ((ascanIndex_i) % maxAscanIndexArraysInTexture) + i_z; + // Write in the AscanIndex value for all Receiver in corresponding memory + // adress depending on SOS-Voxel, and Z-Layer floor(ascanIndex_i / + // maxAscanIndexArraysInTexture) gives the surface if more then one is used + int ascanIndexTexture_Nr = (int)floor((float)ascanIndex_i / (float)maxAscanIndexArraysInTexture); + + if (ascanIndexTexture_Nr == 0) + { + surf3Dwrite((float)currentAscanIndex, outSurfRefAscanIndexFloat0, i_x * sizeof(float), i_y, + TexturGeometryIndexZ); // Direkt in CUDA Array schreiben: + // TableVoxelToEmitterPathSosSum + } + else if (ascanIndexTexture_Nr == 1) + { + surf3Dwrite((float)currentAscanIndex, outSurfRefAscanIndexFloat1, i_x * sizeof(float), i_y, + TexturGeometryIndexZ); // Direkt in CUDA Array schreiben: + // TableVoxelToEmitterPathSosSum + } + else if (ascanIndexTexture_Nr == 2) + { + surf3Dwrite((float)currentAscanIndex, outSurfRefAscanIndexFloat2, i_x * sizeof(float), i_y, + TexturGeometryIndexZ); // Direkt in CUDA Array schreiben: + // TableVoxelToEmitterPathSosSum + } + else if (ascanIndexTexture_Nr == 3) + { + surf3Dwrite((float)currentAscanIndex, outSurfRefAscanIndexFloat3, i_x * sizeof(float), i_y, + TexturGeometryIndexZ); // Direkt in CUDA Array schreiben: + // TableVoxelToEmitterPathSosSum + } + + } // Ascan_i-loop +} + +__global__ void precalculateAscanIndex_usePathsKernel_SOS_ATT(int ascanIndexBatchOffset, ///< Offset of AscanIndex batch (bei mehreren + ///< Aufrufen) + int aScanWindowSize, ///< gibt Anzahl der Ascans ein + cudaArray *deviceSosAttFieldCuArray, ///< CuArray fuer SosAttFieldTextur + int currentSpeedOfSoundZLayer, ///< First z-layer in the speed of sound + ///< grid the pre-calculation is performed + ///< for. + int maxFeasibleSosZLayerCount, ///< Number of z-layers in the speed of + ///< sound grid the pre-calculation is + ///< performed for. + int maxSoSReceiverArrayForTexture, + unsigned short const *deviceEmitterIndex_block, ///< Speicheradresse fuer EmitterIndexdaten + ///< im AscanBlock + unsigned short const *deviceReceiverIndex_block, ///< Speicheradresse fuer + ///< ReceiverIndexdaten im AscanBlock + int TableAscanIndexAllocationCount, ///< Anzahl der benoetigten AscanBlocks + ///< der Groesse 2048/4096 + int maxAscanIndexArraysInTexture, ///< maximale Anzahl an Em/Rec(Ascans) in + ///< einer Teiltabelle /CUDA Array + cudaArray **deviceTextureAscanIndexFloatCuArray, + + int3 SOSGrid_XYZ, float3 sosOffset, float3 regionOfInterestOffset, float IMAGE_RESOLUTION, float SOS_RESOLUTION, float debugMode, + float debugModeParameter, int *deviceSAFT_VARIANT) +{ + // Speicher in Texturformat + int i_x = threadIdx.x; // SoS-Voxel X ? Threads fangen an bei + // 0 an + int i_y = blockIdx.x; // SoS-Voxel Y ? Threads fangen an bei 0 an + int i_z = blockIdx.y; // SoS-Voxel X ? Threads fangen an bei 0 an + int TexturGeometryIndexZ; + + float3 voxelPosition; + voxelPosition.x = (float)i_x * SOS_RESOLUTION + sosOffset.x; // Umrechnung von SOS in m + voxelPosition.y = (float)i_y * SOS_RESOLUTION + sosOffset.y; + voxelPosition.z = (float)(i_z + currentSpeedOfSoundZLayer) * SOS_RESOLUTION + sosOffset.z; + + // float currentAscanIndex = 0.0; // aktueller AscanIndex + float2 currentSOSVoxel_AscanIndexAttValues; + int currentEmitterIndex_minus1 = 0.0; // current EmitterID + int oldEmitterIndex = 65535; // last used EmitterID (check for Caching) + int currentReceiverIndex_minus1 = 0.0; // current ReceiverID + + float3 currentEmitterGeometry; // Coordinates for current Emitter + float3 currentReceiverGeometry; // Coordinates for current Receiver + float emitterDistance; + float receiverDistance; + float totalDistance; + + unsigned short lookUpEmitterIndex, // Index of Emitter in lookUp table gives + // the position in texture memory + lookUpReceiverIndex; // Index of Receiver in lookUp table gives the + // position in texture memory + int currentRecTextureIndex; // Index of ReceiverTexture due to division in + // several textures due to HW limitation + + float TexturIndexX = (float)i_x + 0.5f; // Index for access to Texturmemory + float TexturIndexY = (float)i_y + 0.5f; // Index for access to Texturmemory + float SosVoxelTextureZ = (float)i_z + 0.5f; // Index for access to + // Texturmemory + float TexturIndexZEmitter = 0.0f; // Index for access to Texturmemory + float TexturIndexZReceiver = 0.0f; // Index for access to Texturmemory + + float4 VoxelSosAttValues; // SOS and ATT Values for one Voxel for Texture + // reading + + // SOS-Correction and ATT-Correction + float voxelCount = 0.0; // Anzahl der Voxel auf einem SoS-Pfad + float emitterVoxelVoxelCount = 0.0; + float receiverVoxelVoxelCount = 0.0; + // SOS + float emitterVoxelaverageSpeedSum = 0.0; + float receiverVoxelaverageSpeedSum = 0.0; + float averageSpeed = 0.0; // SoSSumme auf einem SoS-Pfad + // ATT + float emitterVoxelTotalAttenuationSum = 0.0; + float receiverVoxelTotalAttenuationSum = 0.0; + float totalAttenuation_multFactor = 0.0; // AttFaktor auf einem Attenuation-Pfad + + // Für Ascan-Index benoetigt man mehrere Texturen fuer jeweils 2 Z-Layer. 2*N + // < maxSurfaceTexture3DDimension(Fermi: 2048; Kepler: 2048) ==> (Fermi: 1024 + // ; Kepler: 1024 Em/Rec - Kombinationen) maxSurfaceTexture3DDimension = + // maximale Groesse die erlaubt ist + // TableAscanIndexAllocationCount = Anzahl der + // Teiltabellen ==> auch Anzahl der benoetigten Durchlaeufe + // maxFeasibleSosZLayerCount = Anzahl der + // SoS-Zlayer die gleichzeitig im Speicher pro EM/REC-Kombi vorgehalten werden + // (1 oder 2 bei Interpolierten Variante) maxAscanIndexArraysInTexture = + // Anzahl der Ascans in einer Teiltabelle + + // Gehe #Ascans durch und lade Em/Rec-Index + // Abhangig davon laden Koordinaten + // --> Bestimme SOS + // --> Berechne AscanIndex + + for (int ascanIndex_i = 0; ascanIndex_i < aScanWindowSize; ascanIndex_i++) // Alle Emitter oder Receiver in der + // Liste von Matlab durchgehen + { + // 1. Ascans durchgehen - DONE + // 1.1 Emitter und Receiver PfadeID laden, Position laden + // und Abstand berechnen - DONE + // In Index-Listen stehen die Indexe von Em/Rec pro + // Ascan In blockedReceiver/SenderList stehen die + // 3D-Koordinaten 1.2 Use + // Emitter-Cache - DONE + // 1.3 SOS und Attenuation aus Pfadvorberechnung laden - DONE + // 2. Ascan-Index mit SAFT berechnen - DONE + // 2.1 Ascan-Index in Surface speichern - DONE + + // load current Emitter&Receiver from Blocklists + // --------------------------------------------------------------- + currentEmitterIndex_minus1 = deviceEmitterIndex_block[ascanIndexBatchOffset + ascanIndex_i] - 1; // Load EmitterIndex from Constant Memory fuer Geometrie,... -1 da + // Matlab mit 1 startet + currentReceiverIndex_minus1 = deviceReceiverIndex_block[ascanIndexBatchOffset + ascanIndex_i] - 1; // Load ReceiverIndex from Constant Memory fuer Geometrie,... + lookUpEmitterIndex = lookUpGeometryMemoryListEmitter[currentEmitterIndex_minus1]; // Load EmitterLookUp Index from Constant + // Memory fuer SOS Pfade. Gibt an wo SOS + // Pfade im Texturspeicher liegen + lookUpReceiverIndex = lookUpGeometryMemoryListReceiver[currentReceiverIndex_minus1]; // Load ReceiverLookUp Index from + // Constant Memory fuer SOS Pfade. Gibt + // an wo SOS Pfade im Texturspeicher + // liegen + + // Load the precalculated SOS and ATT values for current path + // --------------------------------------------------------------- + + if (currentEmitterIndex_minus1 != oldEmitterIndex) + { // Nur wenn neuer Emitter geladen wird neue + // Emitter-Koordinaten laden und Abstand berechnen + + currentEmitterGeometry = emitterPOSharmon[currentEmitterIndex_minus1]; // Use Constant Memory + emitterDistance = sqrtf(SQR(voxelPosition.x - currentEmitterGeometry.x) + SQR(voxelPosition.y - currentEmitterGeometry.y) + SQR(voxelPosition.z - currentEmitterGeometry.z)); + + // EmitterIndex in Z-Richtung fuer Zugriff auf Textur + TexturIndexZEmitter = maxFeasibleSosZLayerCount * (lookUpEmitterIndex) + SosVoxelTextureZ; + + // mittlere Schallgeschwindigkeit fuer Emitter->Voxel Pfad ermitteln + // ==================================== + // SpeedSum + Count + Attenuation - Emitter + // ---------------------------------------- + VoxelSosAttValues = tex3D(texTableVoxelToEmitterPathSosBoth_preprocess, TexturIndexX, TexturIndexY, TexturIndexZEmitter); + emitterVoxelaverageSpeedSum = VoxelSosAttValues.x; + emitterVoxelVoxelCount = VoxelSosAttValues.y; + emitterVoxelTotalAttenuationSum = VoxelSosAttValues.z; + // emitterVoxelVoxelCount VoxelSosAttValues.w; // only float 3 needed + } + + currentReceiverGeometry = receiverPOSharmon[currentReceiverIndex_minus1]; // Use Constant Memory + receiverDistance = sqrtf(SQR(voxelPosition.x - currentReceiverGeometry.x) + SQR(voxelPosition.y - currentReceiverGeometry.y) + SQR(voxelPosition.z - currentReceiverGeometry.z)); + + // Distanz Emitter -> Voxel -> Receiver berechnen + // ==================================== + totalDistance = emitterDistance + receiverDistance; // Gesamt-Abstand + + // mittlere Schallgeschwindigkeit fuer Emitter->Voxel Pfad ermitteln + // ==================================== + // Determine number of current used texture memory for this receiver + currentRecTextureIndex = (int)floor((float)lookUpReceiverIndex / (float)maxSoSReceiverArrayForTexture); + + // ReceiverIndex in Z-Richtung fuer Zugriff auf Textur + TexturIndexZReceiver = maxFeasibleSosZLayerCount * ((lookUpReceiverIndex) % maxSoSReceiverArrayForTexture) + SosVoxelTextureZ; // Z-Index fuer Zugriff auf Textur + + // mittlere Schallgeschwindigkeit fuer Voxel->Receiver Pfad ermitteln + // ==================================== + // SpeedSum + Count + Attenuation- Receiver + // ---------------------------------------- + if (currentRecTextureIndex == 0) + { + VoxelSosAttValues = tex3D(texTableVoxelToReceiverPathSosBoth0_preprocess, TexturIndexX, TexturIndexY, TexturIndexZReceiver); + } + else if (currentRecTextureIndex == 1) + { + VoxelSosAttValues = tex3D(texTableVoxelToReceiverPathSosBoth1_preprocess, TexturIndexX, TexturIndexY, TexturIndexZReceiver); + } + else if (currentRecTextureIndex == 2) + { + VoxelSosAttValues = tex3D(texTableVoxelToReceiverPathSosBoth2_preprocess, TexturIndexX, TexturIndexY, TexturIndexZReceiver); + } + + receiverVoxelaverageSpeedSum = VoxelSosAttValues.x; + receiverVoxelVoxelCount = VoxelSosAttValues.y; + receiverVoxelTotalAttenuationSum = VoxelSosAttValues.z; + + // Mittlere Schallgeschwindigkeit ueber beide Pfade Emitter/Voxel/Receiver + // berechnen + // ======================================================================= + averageSpeed = (emitterVoxelVoxelCount + receiverVoxelVoxelCount) / (emitterVoxelaverageSpeedSum + receiverVoxelaverageSpeedSum); // harmonisches Mittel + + // Calc Attenuation_multFactor with Sum // Calculate + // 10^(...*Totallength of path *1/20) + totalAttenuation_multFactor = powf(10, (((emitterVoxelTotalAttenuationSum + receiverVoxelTotalAttenuationSum) * totalDistance) / (emitterVoxelVoxelCount + receiverVoxelVoxelCount) * 0.05)); + + if (totalAttenuation_multFactor > debugModeParameter) + { // Max Border for Attenuation Correction + totalAttenuation_multFactor = debugModeParameter; // Average Attenuation on this Path + // printf("over limit\n"); + } + + //////////////////////////////////////// SAFT-Calculation to determine + /// Ascan-Index + ////////////////////////////////////////////////////////////////// + + // Für Ascan-Index benoetigt man mehrere Texturen fuer jeweils 2 Z-Layer. + // 2*N < maxSurfaceTexture3DDimension(Fermi: 2048; Kepler: 2048) ==> (Fermi: + // 2048 ; Kepler: 2048 Em/Rec - Kombinationen) maxSurfaceTexture3DDimension + // = maximale Groesse die erlaubt ist TableAscanIndexAllocationCount + // = Anzahl der Teiltabellen ==> auch Anzahl der benoetigten Durchlaeufe + // maxFeasibleSosZLayerCount = Anzahl + // der SoS-Zlayer die gleichzeitig im Speicher pro EM/REC-Kombi vorgehalten + // werden (1 oder 2 bei Interpolierten Variante) + // maxAscanIndexArraysInTexture = Anzahl der Ascans in einer Teiltabelle + + // outSurfRefAscanIndexFloat0..2 = Surface fuer + // die jeweiligen Ascans + + currentSOSVoxel_AscanIndexAttValues.x = (double)(emitterDistance + receiverDistance) / ((double)1e-7 * (double)averageSpeed); + currentSOSVoxel_AscanIndexAttValues.y = totalAttenuation_multFactor; + + // Calculate the Z-Index for storing the AscanIndex value + TexturGeometryIndexZ = maxFeasibleSosZLayerCount * ((ascanIndex_i) % maxAscanIndexArraysInTexture) + i_z; + + // Write in the AscanIndex value for all Receiver in corresponding memory + // adress depending on SOS-Voxel, and Z-Layer floor(ascanIndex_i / + // maxAscanIndexArraysInTexture) gives the surface if more then one is used + int ascanIndexTexture_Nr = (int)floor((float)ascanIndex_i / (float)maxAscanIndexArraysInTexture); + + if (ascanIndexTexture_Nr == 0) + { + surf3Dwrite((float2)currentSOSVoxel_AscanIndexAttValues, outSurfRefAscanIndexFloat0, i_x * sizeof(float2), i_y, + TexturGeometryIndexZ); // Direkt in CUDA Array schreiben: + // TableVoxelToEmitterPathSosSum + } + else if (ascanIndexTexture_Nr == 1) + { + surf3Dwrite((float2)currentSOSVoxel_AscanIndexAttValues, outSurfRefAscanIndexFloat1, i_x * sizeof(float2), i_y, + TexturGeometryIndexZ); // Direkt in CUDA Array schreiben: + // TableVoxelToEmitterPathSosSum + } + else if (ascanIndexTexture_Nr == 2) + { + surf3Dwrite((float2)currentSOSVoxel_AscanIndexAttValues, outSurfRefAscanIndexFloat2, i_x * sizeof(float2), i_y, + TexturGeometryIndexZ); // Direkt in CUDA Array schreiben: + // TableVoxelToEmitterPathSosSum + } + else if (ascanIndexTexture_Nr == 3) + { + surf3Dwrite((float2)currentSOSVoxel_AscanIndexAttValues, outSurfRefAscanIndexFloat3, i_x * sizeof(float2), i_y, + TexturGeometryIndexZ); // Direkt in CUDA Array schreiben: + // TableVoxelToEmitterPathSosSum + } + + } // Ascan_i-loop +} + +__global__ void fillCuArrayKernel( + + float useValue, + // cudaArray *deviceSosAttFieldCuArray, ///< CuArray to fill if + // no array of cudaArrays is used + cudaArray **deviceTextureAscanIndexFloatCuArray, ///< CuArray to fill + int maxAscanIndexArraysInTexture, int TableAscanIndexAllocationCount, + int maxFeasibleSosZLayerCount, ///< Number of z-layers in the speed of + ///< sound grid the pre-calculation is + ///< performed for. + + bool ATTMode_3DVolume, + + float debugMode, float debugModeParameter) +{ + float currentSOSVoxel_AscanIndexValues = useValue; + float2 currentSOSVoxel_AscanIndexAttValues = {useValue, useValue}; + + // Memoryadress for access on Texture + int i_x = threadIdx.x; + int i_y = blockIdx.x; + int i_z = blockIdx.y; // float SosVoxelTextureZ = (SosVoxelf.z - speedOfSoundZLayer); + + int TexturGeometryIndexZ; + + for (int ascanIndex_i = 0; ascanIndex_i < maxAscanIndexArraysInTexture; ascanIndex_i++) // Alle Emitter oder Receiver in der Liste von Matlab + // durchgehen + { + // Write in the AscanIndex value for all Receiver in corresponding memory + // adress depending on SOS-Voxel, and Z-Layer + + TexturGeometryIndexZ = maxFeasibleSosZLayerCount * ((ascanIndex_i) % maxAscanIndexArraysInTexture) + i_z; + if (ATTMode_3DVolume == false) + { // ========= 3DVolume Mode without ATT-Correction + surf3Dwrite(currentSOSVoxel_AscanIndexValues, outSurfRefAscanIndexFloat0, i_x * sizeof(float), i_y, TexturGeometryIndexZ); + } + else if (ATTMode_3DVolume == true) + { // ========= 3DVolume Mode with ATT-Correction + surf3Dwrite((float2)currentSOSVoxel_AscanIndexAttValues, outSurfRefAscanIndexFloat0, i_x * sizeof(float2), i_y, TexturGeometryIndexZ); + } + + if (TableAscanIndexAllocationCount < 2) + continue; + + if (ATTMode_3DVolume == false) + { // ========= 3DVolume Mode without ATT-Correction + surf3Dwrite(currentSOSVoxel_AscanIndexValues, outSurfRefAscanIndexFloat1, i_x * sizeof(float), i_y, TexturGeometryIndexZ); + } + else if (ATTMode_3DVolume == true) + { // ========= 3DVolume Mode with ATT-Correction + surf3Dwrite((float2)currentSOSVoxel_AscanIndexAttValues, outSurfRefAscanIndexFloat1, i_x * sizeof(float2), i_y, TexturGeometryIndexZ); + } + + if (TableAscanIndexAllocationCount < 3) + continue; + + if (ATTMode_3DVolume == false) + { // ========= 3DVolume Mode without ATT-Correction + surf3Dwrite(currentSOSVoxel_AscanIndexValues, outSurfRefAscanIndexFloat2, i_x * sizeof(float), i_y, TexturGeometryIndexZ); + } + else if (ATTMode_3DVolume == true) + { // ========= 3DVolume Mode with ATT-Correction + surf3Dwrite((float2)currentSOSVoxel_AscanIndexAttValues, outSurfRefAscanIndexFloat2, i_x * sizeof(float2), i_y, TexturGeometryIndexZ); + } + + if (TableAscanIndexAllocationCount < 4) + continue; + + if (ATTMode_3DVolume == false) + { // ========= 3DVolume Mode without ATT-Correction + surf3Dwrite(currentSOSVoxel_AscanIndexValues, outSurfRefAscanIndexFloat3, i_x * sizeof(float), i_y, TexturGeometryIndexZ); + } + else if (ATTMode_3DVolume == true) + { // ========= 3DVolume Mode with ATT-Correction + surf3Dwrite((float2)currentSOSVoxel_AscanIndexAttValues, outSurfRefAscanIndexFloat3, i_x * sizeof(float2), i_y, TexturGeometryIndexZ); + } + } +} diff --git a/SAFT_TOFI/src/kernel/precalculateSpeedOfSoundKernel.cuh b/SAFT_TOFI/src/kernel/precalculateSpeedOfSoundKernel.cuh index 541e433..773ea0c 100644 --- a/SAFT_TOFI/src/kernel/precalculateSpeedOfSoundKernel.cuh +++ b/SAFT_TOFI/src/kernel/precalculateSpeedOfSoundKernel.cuh @@ -1,2823 +1,91 @@ #include + #include "saft.hpp" -//#include +#define DebugSosVoxelX 18 +#define DebugSosVoxelY 18 +#define DebugSosVoxelZ 3 +// Surfaces fuer Emitter - SosPathsTables +extern surface outSurfRefTableVoxelToEmPathSosBoth; -// Structure of File -//------------------------------------------------------- -// ## Fuer Teilpfade -// __global__ void precalculateAverageSpeedOfSoundKernel // Kernel zur Vorberechnung der Teilpfade -// void SAFTHandler::precalculateAverageSpeedOfSound // Zur Vorberechnung der Teilpfade -// -// ## Fuer AscanIndex mit Nutzen von Teilpfaden -// void SAFTHandler::precalculateAscanIndex_usePaths // Precalculation of AscansIndexes using SOS/ATT-Paths parts, calls corresponding Kernel -// __global__ void precalculateAscanIndex_usePathsKernel // Kernel zur Vorberechnung der AscansIndexe mit Nutzen von Teilpfaden -// __global__ void precalculateAscanIndex_usePathsKernel_SOS // Kernel zur Vorberechnung der AscansIndexe mit Nutzen von Teilpfaden +SOS -// __global__ void precalculateAscanIndex_usePathsKernel_SOS_ATT // Kernel zur Vorberechnung der AscansIndexe mit Nutzen von Teilpfaden +SOS +ATT -// ------------------------------------------------------- +// Surfaces fuer Emitter - SosPathsTables +// surface +// outSurfRefTableVoxelToReceiverPathSosSumTest; +extern surface outSurfRefTableVoxelToRecPathSosBoth0; +extern surface outSurfRefTableVoxelToRecPathSosBoth1; +extern surface outSurfRefTableVoxelToRecPathSosBoth2; -// printf() is only supported -// for devices of compute capability 2.0 and above - -#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ < 200) - #define printf(f, ...) ((void)(f, __VA_ARGS__),0) -#endif - -#ifdef debug_CudaPrecalculateKernel - #define DebugSosVoxelX 5 - #define DebugSosVoxelY 5 - #define DebugSosVoxelZ 5 -#endif - -//#if defined (debug_CudaPrecalculateAscanIndexKernel) || defined(debug_CudaPrecalculateAscanIndexKernelProxy) - #define DebugSosVoxelX 18 - #define DebugSosVoxelY 18 - #define DebugSosVoxelZ 3 -//#endif - -//Surfaces fuer Emitter - SosPathsTables -#ifdef SaftTextureForEmRecSosPathsTablesFloat1 - surface outSurfRefTableVoxelToEmitterPathSosSum; - surface outSurfRefTableVoxelToEmitterPathCount; - - //Surfaces fuer Emitter - SosPathsTables - //surface outSurfRefTableVoxelToReceiverPathSosSumTest; - surface outSurfRefTableVoxelToReceiverPathSosSum0; - surface outSurfRefTableVoxelToReceiverPathSosSum1; - surface outSurfRefTableVoxelToReceiverPathSosSum2; - surface outSurfRefTableVoxelToReceiverPathCount0; - surface outSurfRefTableVoxelToReceiverPathCount1; - surface outSurfRefTableVoxelToReceiverPathCount2; -#endif -#if defined(SaftTextureForEmRecSosPathsTablesFloat2) || defined(SaftTextureForEmRecSosPathsTablesFloat4) - surface outSurfRefTableVoxelToEmPathSosBoth; - - //Surfaces fuer Emitter - SosPathsTables - //surface outSurfRefTableVoxelToReceiverPathSosSumTest; - surface outSurfRefTableVoxelToRecPathSosBoth0; - surface outSurfRefTableVoxelToRecPathSosBoth1; - surface outSurfRefTableVoxelToRecPathSosBoth2; -#endif - -#ifdef SaftUseAscanIndexInterpolation - //Surfaces fuer AscanIndex - surface outSurfRefAscanIndexFloat0; - surface outSurfRefAscanIndexFloat1; - surface outSurfRefAscanIndexFloat2; - surface outSurfRefAscanIndexFloat3; -#endif +// Surfaces fuer AscanIndex +extern surface outSurfRefAscanIndexFloat0; +extern surface outSurfRefAscanIndexFloat1; +extern surface outSurfRefAscanIndexFloat2; +extern surface outSurfRefAscanIndexFloat3; // Texturmemory fuer Emitter - SosPathsTables -#ifdef SaftTextureForEmRecSosPathsTablesFloat1 - texture< float, cudaTextureType3D, cudaReadModeElementType> texTableVoxelToEmitterPathSosSum; - texture< float, cudaTextureType3D, cudaReadModeElementType> texTableVoxelToEmitterPathCount; -#endif -#ifdef SaftTextureForEmRecSosPathsTablesFloat2 - texture< float2, cudaTextureType3D, cudaReadModeElementType> texTableVoxelToEmitterPathSosBoth_preprocess; -#endif -#ifdef SaftTextureForEmRecSosPathsTablesFloat4 - texture< float4, cudaTextureType3D, cudaReadModeElementType> texTableVoxelToEmitterPathSosBoth_preprocess; -#endif + +extern texture texTableVoxelToEmitterPathSosBoth_preprocess; // Texturmemory fuer Receiver - SosPathsTables -#ifdef SaftTextureForEmRecSosPathsTablesFloat1 - texture< float, cudaTextureType3D, cudaReadModeElementType> texTableVoxelToReceiverPathSosSum0; - texture< float, cudaTextureType3D, cudaReadModeElementType> texTableVoxelToReceiverPathSosSum1; - texture< float, cudaTextureType3D, cudaReadModeElementType> texTableVoxelToReceiverPathSosSum2; - texture< float, cudaTextureType3D, cudaReadModeElementType> texTableVoxelToReceiverPathCount0; - texture< float, cudaTextureType3D, cudaReadModeElementType> texTableVoxelToReceiverPathCount1; - texture< float, cudaTextureType3D, cudaReadModeElementType> texTableVoxelToReceiverPathCount2; -#endif -#ifdef SaftTextureForEmRecSosPathsTablesFloat2 - texture< float2, cudaTextureType3D, cudaReadModeElementType> texTableVoxelToReceiverPathSosBoth0_preprocess; - texture< float2, cudaTextureType3D, cudaReadModeElementType> texTableVoxelToReceiverPathSosBoth1_preprocess; - texture< float2, cudaTextureType3D, cudaReadModeElementType> texTableVoxelToReceiverPathSosBoth2_preprocess; -#endif -#ifdef SaftTextureForEmRecSosPathsTablesFloat4 - texture< float4, cudaTextureType3D, cudaReadModeElementType> texTableVoxelToReceiverPathSosBoth0_preprocess; - texture< float4, cudaTextureType3D, cudaReadModeElementType> texTableVoxelToReceiverPathSosBoth1_preprocess; - texture< float4, cudaTextureType3D, cudaReadModeElementType> texTableVoxelToReceiverPathSosBoth2_preprocess; -#endif - - - -__global__ void precalculateAverageSpeedOfSoundKernel( -#ifndef SaftTextureForBresenhamSosPaths - float const * deviceSpeedOfSoundField, ///< Array of speed of sound samples. Dimensions ordered by speed of indices, commencing with the fastest moving one: 1. x 2. y 3. z -#else - - #ifdef SaftUseSosAttFloat1 // Nutze getrennte Texturen fuer beide Volumen (Sos+Att) - cudaArray *deviceSpeedOfSoundFieldCuArray, ///< CuArray fuer SOSFieldTextur - #endif - #ifdef SaftUseSosAttFloat2 // Nutze nur eine Textur fuer beide Volumen (Sos+Att) - cudaArray *deviceSosAttFieldCuArray, ///< CuArray fuer SosAttFieldTextur - #endif -#endif - int firstZLayer, ///< First z-layer in the speed of sound grid the pre-calculation is performed for. - int sosZLayerCount, ///< Number of z-layers in the speed of sound grid the pre-calculation is performed for. - #ifdef SaftUseConstantMemforGeometry - int geometry, ///< emitters=0 or receivers=1. - #else - float3 const * geometry, ///< Vector array describing the positions of emitters or receivers. - #endif - int geometryElementCount, ///< Number of elements in the geometry array. - int maxSoSReceiverArrayForTexture, ///< max amount of elements in the receiver CUDA array. - float * deviceVoxelCountOutputFloat, ///< fuer Count im Floatformat gedacht fuer Texturmemory. - float * speedOfSoundSumOutput, ///< fuer SoS im Floatformat gedacht fuer Texturmemory. - int3 SOSGrid_XYZ, - float3 sosOffset, - float3 regionOfInterestOffset, - float IMAGE_RESOLUTION, - float SOS_RESOLUTION, - float debugMode, - float debugModeParameter -) -{ - - - dim3 SosVoxel - ( - threadIdx.x , // SoS-Voxel X ? Threads fangen bei 0 an - blockIdx.x , // SoS-Voxel Y - blockIdx.y + firstZLayer // SoS-Voxel Z + Offset - ); - - #ifdef debug_CudaPrecalculateKernel - //printf(" SosVoxel.x,y,z = [%i %i %i]\n", SosVoxel.x, SosVoxel.y, SosVoxel.z); // Herausfinden welche berechnet werden - if ((SosVoxel.x == DebugSosVoxelX) && (SosVoxel.y == DebugSosVoxelY) && (SosVoxel.z == DebugSosVoxelZ)) - { - - int threadCountAll = gridDim.z * gridDim.x * blockDim.x; // = Anzahl aller Threads X*Y*Z - int threadIndex = blockDim.x * (blockIdx.y * gridDim.x + blockIdx.x) + threadIdx.x; - - printf("==================================================================\n"); - printf(" threadCountAll = %i\n", threadCountAll); // Anzahl aller Threads //Brauche ich wahrscheinlich gar nicht. - printf(" threadIndex = %i\n", threadIndex); // Threadindex von aktuellem Kernel - printf(" SosVoxel.x,y,z = [%i %i %i]\n", SosVoxel.x, SosVoxel.y, SosVoxel.z); // In welchem SoS-Voxel befinde ich mich? - printf(" geometryElementCount = %i\n", geometryElementCount); // Wie viele Elemente gibt es in der Emitter/receiverListe? - printf("==================================================================\n"); - } - #endif - -// if ((SosVoxel.x == DebugSosVoxelX) && (SosVoxel.y == DebugSosVoxelY) && (SosVoxel.z == DebugSosVoxelZ)) -// { -// printf(" PrecalculateKernel: debugMode [%i] for geometry[%i]\n", debugMode, geometry); -// } - - float voxelCount; // Anzahl der Voxel auf einem SoS-Pfad - float averageSpeed = 0.0; // SoSSumme auf einem SoS-Pfad - float totalAttenuation = 0.0; // AttSumme auf einem Attenuation-Pfad - - //dim3 SosGeometryVoxel; // SoSVoxel von Emitter/Receiver - float3 SosGeometryVoxelFloat; // SoSVoxel von Emitter/Receiver in Float - float SOS_RESOLUTION_FACTOR = 1 / SOS_RESOLUTION; // Aufluesung im SoS-Grid - //int tableIndex; // Index innerhalb TableVoxelToEmitter/ReceiverPath - - // Speicher in Texturformat - // int xmax = SOSGrid_XYZ.x; - // int ymax = SOSGrid_XYZ.y; - // int zmax = sosZLayerCount; //SOSGrid_XYZ.z; - int i_x = SosVoxel.x; - int i_y = SosVoxel.y; - int i_z = (SosVoxel.z-firstZLayer); // float SosVoxelTextureZ = (SosVoxelf.z - speedOfSoundZLayer); - //int Index; - int TexturGeometryIndexZ; - - float3 currentGeometry; - - for(int geometryIndexCounter = 0; geometryIndexCounter < geometryElementCount; geometryIndexCounter++) // Alle Emitter oder Receiver in der Liste von Matlab durchgehen - { - int lookUpGeometryIndex = 0; - - // Lade lookUpGeometryMemoryList-Eintrag, um Position im Memory zu bestimmen - if (geometry == 0) // => Emitter - { - lookUpGeometryIndex = lookUpGeometryMemoryListEmitter[geometryIndexCounter]; // Load from Constant Memory - } - else //if (geometry == 1) => Receiver - { - lookUpGeometryIndex = lookUpGeometryMemoryListReceiver[geometryIndexCounter]; // Load from Constant Memory - } - - - //if (currentGeometry.x != 255) // currentGeometry.x = 255 ist außerhalb des Wertebereichs und zeigt an, das Geometrie nicht genutzt wird. Darum muss nicht berechnet werden. - if (lookUpGeometryIndex != 65535) // currentGeometry.x = 65535 ist außerhalb des Wertebereichs und zeigt an, das Geometrie nicht genutzt wird. Darum muss nicht berechnet werden. - { - - #if defined(debug_CudaPrecalculateKernel) || defined(debug_OutputSOSPaths) - if ((SosVoxel.x == DebugSosVoxelX) && (SosVoxel.y == DebugSosVoxelY) && (SosVoxel.z == DebugSosVoxelZ)) - { - printf(">>>> %i >>>> Precalc: geometryIndexCounter(%i): lookUpGeometryIndex(%i)\n", geometry, geometryIndexCounter, lookUpGeometryIndex); // In welche Speicherstelle wird geschrieben - printf(" SOSGrid_XYZ.x,y,z = [%i %i %i]\n", SOSGrid_XYZ.x, SOSGrid_XYZ.y, SOSGrid_XYZ.z); // In welchem SoS-Voxel befinde ich mich? - printf(" geometryElementCount = %i\n", geometryElementCount); // Wie viele Elemente gibt es in der Emitter/receiverListe? - printf("-------------------------------------------------------------------\n"); - printf(" SosVoxel.x,y,z = [%i %i %i]\n", SosVoxel.x, SosVoxel.y, SosVoxel.z); // In welchem SoS-Voxel befinde ich mich? - printf(" geometryIndexCounter = %i\n", geometryIndexCounter); // Wie viele Elemente gibt es in der Emitter/receiverListe? - printf(" firstZLayer = %i\n", firstZLayer); // zLayer Offset, welcher wird zur Zeit berechnet? - printf(" sosZLayerCount = %i\n", sosZLayerCount); // Anzhal der zu berechnenden zLayer? - printf("-------------------------------------------------------------------\n"); - printf(" speedOfSoundSumOutput_Index= %i\n", SOSGrid_XYZ.x*(SOSGrid_XYZ.y*SOSGrid_XYZ.y*geometryIndexCounter+SOSGrid_XYZ.y*(SosVoxel.z-firstZLayer)+SosVoxel.y)+SosVoxel.x); // In welche Speicherstelle wird geschrieben - printf(" averageSpeed = %f\n", averageSpeed); // Berechnete Geschwindigkeit - printf(" write i_x,i_y,i_z = [%i %i %i]\n", i_x, i_y, i_z); // In welchem SoS-Voxel schreibe ich? - printf("==================================================================\n"); - } - #endif - - // Wenn Emitter/Receiver genutzt werden Koordinaten laden - #ifdef SaftUseConstantMemforGeometry - if (geometry == 0) // => Emitter - { - #ifdef SaftUseHarmonicMean - currentGeometry = emitterPOSharmon[geometryIndexCounter]; // Positionsdaten von Emitter lesen - #endif - //syncthreads(); - } - - else //if (geometry == 1) => Receiver - { - #ifdef SaftUseHarmonicMean - currentGeometry = receiverPOSharmon[geometryIndexCounter]; // Positionsdaten von Receiver lesen - #endif - //syncthreads(); - } - #endif - - #ifndef SaftUseConstantMemforGeometry - currentGeometry = geometry[geometryIndexCounter]; // Positionsdaten von Emitter/Receiver lesen - #endif - - - - - - // Versuche mit Geometrie d.h. E/R-Kooridinaten um einen halben Voxel zu verschieben ==> muesste eigentlich bei beiden, S/E-Koordinaten und Voxel, gemacht werden - //determineSpeedOfSoundFieldVoxel(currentGeometry , SosGeometryVoxel, SOS_RESOLUTION_FACTOR); // SoSVoxel von Emitter/Receiver bestimmen -// currentGeometry_plushalf.x = currentGeometry.x + IMAGE_RESOLUTION/2; // Emitter/Receiver-Position in SoS-Koordinaten Umwandeln -// currentGeometry_plushalf.y = currentGeometry.y + IMAGE_RESOLUTION/2; // halbe Koordinaten hier nicht benuetigt, da ja die genaue Positionsdaten da sind -// currentGeometry_plushalf.z = currentGeometry.z + IMAGE_RESOLUTION/2; - -// // Versuche im Integerformat -// //determineSpeedOfSoundFieldVoxel (currentGeometry_plushalf , SosGeometryVoxel, sosOffset, SOS_RESOLUTION_FACTOR);// SoSVoxel von E/R bestimmen // currentGeometry + 1/2--> SosGeometryVoxel -// determineSpeedOfSoundFieldVoxel (currentGeometry , SosGeometryVoxel, sosOffset, SOS_RESOLUTION_FACTOR); // SoSVoxel von E/R bestimmen // currentGeometry --> SosGeometryVoxel Integer -// // out out voxel1(E/R) voxel2(SoSVoxel) SoSField Size of SoSField -// performRayTracedSpeedAddition(voxelCount, averageSpeed, SosGeometryVoxel, SosVoxel, deviceSpeedOfSoundField, SOSGrid_XYZ); // SosGeometryVoxel im Integerformat, SoSVoxel als Integer - - - // Bestimmen der SoS-Koordinaten fuer die Sender/Empfuenger-Koordinaten - determineSpeedOfSoundFieldVoxelFloat(currentGeometry, SosGeometryVoxelFloat, sosOffset, SOS_RESOLUTION_FACTOR); // SoSVoxel von E/R bestimmen // currentGeometry --> SosGeometryVoxel Float - //determineSpeedOfSoundFieldVoxelFloat(currentGeometry_plushalf, SosGeometryVoxelFloat, sosOffset, SOS_RESOLUTION_FACTOR); // SoSVoxel von E/R bestimmen // currentGeometry --> SosGeometryVoxel Float - - // Nutzen der Bresenham-Floatvariante - // out out voxel1(E/R) voxel2(SoSVoxel) SoSField Size of SoSField , E=0/R=1 - //performRayTracedSpeedAdditionFloat(voxelCount, averageSpeed, SosGeometryVoxelFloat, SosVoxel, deviceSpeedOfSoundField, SOSGrid_XYZ , geometry); // SosGeometryVoxelFloat im Floatformat, SoSVoxel als Integer - - // Nutzen der Bresenham-Floatvariante mit Texturmemory und Interpolation - // out out voxel1(E/R) voxel2(SoSVoxel+0.5) SoSField Size of SoSField , E=0/R=1 - //performRayTracedSpeedAdditionTexture(voxelCount, averageSpeed, SosGeometryVoxelFloat, SosVoxel, deviceSpeedOfSoundField, SOSGrid_XYZ ,sosOffset, SOS_RESOLUTION, IMAGE_RESOLUTION, regionOfInterestOffset, geometry); // SosGeometryVoxelFloat im Floatformat, SoSVoxel als Integer - - #ifndef SaftTextureForBresenhamSosPaths // SOS-Volume ueber Array oder normal ansprechen?! - performRayTracedSpeedAdditionTexture(voxelCount, averageSpeed, SosGeometryVoxelFloat, SosVoxel, deviceSpeedOfSoundField, SOSGrid_XYZ, sosOffset, SOS_RESOLUTION, IMAGE_RESOLUTION, regionOfInterestOffset, geometry); // SosGeometryVoxelFloat im Floatformat, SoSVoxel als Integer - #else - //performRayTracedSpeedAdditionTexture(voxelCount, averageSpeed, SosGeometryVoxelFloat, SosVoxel, deviceSpeedOfSoundFieldCuArray, SOSGrid_XYZ, sosOffset, SOS_RESOLUTION, IMAGE_RESOLUTION, regionOfInterestOffset, geometry); // SosGeometryVoxelFloat im Floatformat, SoSVoxel als Integer - - #ifdef SaftUseSosAttFloat1 // Nutze getrennte Texturen fuer beide Volumen (Sos+Att) - performRayTracedSpeedAdditionTexture (voxelCount, averageSpeed, SosGeometryVoxelFloat, SosVoxel, deviceSpeedOfSoundFieldCuArray, SOSGrid_XYZ, sosOffset, SOS_RESOLUTION, IMAGE_RESOLUTION, regionOfInterestOffset, geometry); // SosGeometryVoxelFloat im Floatformat, SoSVoxel als Integer - #endif - #ifdef SaftUseSosAttFloat2 // Nutze nur eine Textur fuer beide Volumen (Sos+Att) - performRayTracedSpeedAdditionTexture (voxelCount, averageSpeed, totalAttenuation, SosGeometryVoxelFloat, SosVoxel, deviceSosAttFieldCuArray, SOSGrid_XYZ, sosOffset, SOS_RESOLUTION, IMAGE_RESOLUTION, regionOfInterestOffset, geometry); // SosGeometryVoxelFloat im Floatformat, SoSVoxel als Integer - #endif - #endif - - #if defined(debug_CudaPrecalculateKernel) || defined(debug_OutputSOSPaths) - //if ((SosVoxel.y == DebugSosVoxelY) && ( (SosVoxel.x == DebugSosVoxelX) || (SosVoxel.x == DebugSosVoxelX) || (SosVoxel.x == DebugSosVoxelX))){ - if ((SosVoxel.x == DebugSosVoxelX) && (SosVoxel.y == DebugSosVoxelY) && (SosVoxel.z == DebugSosVoxelZ)){ - //printf(">>>> %i >>>> Precalc: geometryIndexCounter(%i):[%+3.4f %+3.4f %+3.4f]:[%+3.4f %+3.4f %+3.4f] - SOSVoxel [%3i %3i %3i] \n>>>>>>>>>>>> VoxelCnt(%i), SoSaverageSpeed(%3.3f), Index[Table,Index] = [%i %i]\n", geometry, geometryIndexCounter, currentGeometry.x, currentGeometry.y, currentGeometry.z, SosGeometryVoxelFloat.x, SosGeometryVoxelFloat.y, SosGeometryVoxelFloat.z, SosVoxel.x,SosVoxel.y,SosVoxel.z, (int)voxelCount, averageSpeed, tableIndex, Index); // In welche Speicherstelle wird geschrieben - #ifdef SaftUseSosAttFloat1 // Nutze getrennte Texturen fuer beide Volumen (Sos+Att) - printf(">>>> %i >>>> Precalc: geometryIndexCounter(%i):[%+3.6f %+3.6f %+3.6f]:[%+3.6f %+3.6f %+3.6f] - SOSVoxel [%3i %3i %3i] \n>>>>>>>>>>>> VoxelCnt(%i), SoSaverageSpeed(%3.3f)\n\n", geometry, geometryIndexCounter, currentGeometry.x, currentGeometry.y, currentGeometry.z, SosGeometryVoxelFloat.x, SosGeometryVoxelFloat.y, SosGeometryVoxelFloat.z, SosVoxel.x,SosVoxel.y,SosVoxel.z, (int)voxelCount, averageSpeed); // In welche Speicherstelle wird geschrieben - #endif - #ifdef SaftUseSosAttFloat2 // Nutze nur eine Textur fuer beide Volumen (Sos+Att) - printf(">>>> %i >>>> Precalc: geometryIndexCounter(%i):[%+3.6f %+3.6f %+3.6f]:[%+3.6f %+3.6f %+3.6f] - SOSVoxel [%3i %3i %3i] \n>>>>>>>>>>>> VoxelCnt(%i), SoSaverageSpeed(%3.3f), totalAttenuation(%3.3f)\n\n", geometry, geometryIndexCounter, currentGeometry.x, currentGeometry.y, currentGeometry.z, SosGeometryVoxelFloat.x, SosGeometryVoxelFloat.y, SosGeometryVoxelFloat.z, SosVoxel.x,SosVoxel.y,SosVoxel.z, (int)voxelCount, averageSpeed, totalAttenuation); // In welche Speicherstelle wird geschrieben - #endif - } - #endif - - - - if (geometry == 0) // Emitter - { - //speedOfSoundSumOutput[Index] = averageSpeed; // Fuellen der TableVoxelToEmitter/ReceiverPathSosSum - //deviceVoxelCountOutputFloat[Index] = (float)voxelCount; - //speedOfSoundSumOutput[Index] = 0.0f; - //deviceVoxelCountOutputFloat[Index] = 0.0f; - - TexturGeometryIndexZ = sosZLayerCount * lookUpGeometryIndex + i_z; - - #ifdef SaftTextureForEmRecSosPathsTablesFloat1 // Float1 - surf3Dwrite( averageSpeed, outSurfRefTableVoxelToEmitterPathSosSum, i_x*sizeof(float), i_y, TexturGeometryIndexZ ); // Direkt in CUDA Array schreiben: TableVoxelToEmitterPathSosSum - surf3Dwrite((float)voxelCount, outSurfRefTableVoxelToEmitterPathCount, i_x*sizeof(float), i_y, TexturGeometryIndexZ ); // Direkt in CUDA Array schreiben: TableVoxelToEmitterPathCount - #endif - #ifdef SaftTextureForEmRecSosPathsTablesFloat2 // Float2 - float2 VoxelValues; - VoxelValues.x = averageSpeed; - VoxelValues.y = (float)voxelCount; - surf3Dwrite(VoxelValues, outSurfRefTableVoxelToEmPathSosBoth, i_x*sizeof(float2), i_y, TexturGeometryIndexZ ); // Direkt in CUDA Array schreiben - #endif - #ifdef SaftTextureForEmRecSosPathsTablesFloat4 // Float4 - float4 VoxelValues; - VoxelValues.x = averageSpeed; - VoxelValues.y = voxelCount; - -// if (totalAttenuation>debugModeParameter) // Max Border for Attenuation Correction -// VoxelValues.z = debugModeParameter; // Average Attenuation on this Path -// else - VoxelValues.z = totalAttenuation; // Average Attenuation on this Path - - - VoxelValues.w = 0.0f; - surf3Dwrite(VoxelValues, outSurfRefTableVoxelToEmPathSosBoth, i_x*sizeof(float4), i_y, TexturGeometryIndexZ ); // Direkt in CUDA Array schreiben - #endif - - } - else - { - //speedOfSoundSumOutput[Index] = averageSpeed; // Fuellen der TableVoxelToEmitter/ReceiverPathSosSum - //deviceVoxelCountOutputFloat[Index] = (float)voxelCount; - - //speedOfSoundSumOutput[Index] = 0.0f; - //deviceVoxelCountOutputFloat[Index] = 0.0f; - - TexturGeometryIndexZ = sosZLayerCount * ((lookUpGeometryIndex) % maxSoSReceiverArrayForTexture) + i_z; - - #if defined(debug_CudaPrecalculateKernel) || defined(debug_OutputSOSPaths) - //if ((SosVoxel.x == DebugSosVoxelX) && (SosVoxel.y == DebugSosVoxelY) && (SosVoxel.z == DebugSosVoxelZ)) - // printf(">>>> %i >>>> Precalc: geomIdxCounter(%4i):[%+3.4f %+3.4f %+3.4f] - SOSVoxel [%3i %3i %3i] firstZLayer(%i) ==> TexturNr.[%3i], TexturGeometryIndexZ(%3i), lookUpGeometryIndex(%4i)\n", geometry, geometryIndexCounter, currentGeometry.x, currentGeometry.y, currentGeometry.z, SosVoxel.x,SosVoxel.y,SosVoxel.z, firstZLayer, (int)floor((float)lookUpGeometryIndex / (float)maxSoSReceiverArrayForTexture) , TexturGeometryIndexZ, lookUpGeometryIndex); // In welche Speicherstelle wird geschrieben - #endif - - #ifdef SaftTextureForEmRecSosPathsTablesFloat1 // Float1 - if ( (int)floor((float)lookUpGeometryIndex / (float)maxSoSReceiverArrayForTexture) == 0){ - surf3Dwrite((float)averageSpeed, outSurfRefTableVoxelToReceiverPathSosSum0, i_x*sizeof(float), i_y, TexturGeometryIndexZ ); // Direkt in CUDA Array schreiben: TableVoxelToEmitterPathSosSum - surf3Dwrite((float)voxelCount, outSurfRefTableVoxelToReceiverPathCount0, i_x*sizeof(float), i_y, TexturGeometryIndexZ ); // Direkt in CUDA Array schreiben: TableVoxelToEmitterPathSosSum - } - else if ( (int)floor((float)lookUpGeometryIndex / (float)maxSoSReceiverArrayForTexture) == 1) { - surf3Dwrite( averageSpeed, outSurfRefTableVoxelToReceiverPathSosSum0, i_x*sizeof(float), i_y, TexturGeometryIndexZ ); // Direkt in CUDA Array schreiben: TableVoxelToEmitterPathSosSum - surf3Dwrite((float)voxelCount, outSurfRefTableVoxelToReceiverPathCount0, i_x*sizeof(float), i_y, TexturGeometryIndexZ ); // Direkt in CUDA Array schreiben: TableVoxelToEmitterPathSosSum - } - else if ( (int)floor((float)lookUpGeometryIndex / (float)maxSoSReceiverArrayForTexture) == 2){ - surf3Dwrite( averageSpeed, outSurfRefTableVoxelToReceiverPathSosSum2, i_x*sizeof(float), i_y, TexturGeometryIndexZ ); // Direkt in CUDA Array schreiben: TableVoxelToEmitterPathSosSum - surf3Dwrite((float)voxelCount, outSurfRefTableVoxelToReceiverPathCount2, i_x*sizeof(float), i_y, TexturGeometryIndexZ ); // Direkt in CUDA Array schreiben: TableVoxelToEmitterPathSosSum - } - #endif - #ifdef SaftTextureForEmRecSosPathsTablesFloat2 // Float2 - float2 VoxelValues; - VoxelValues.x = averageSpeed; - VoxelValues.y = (float)voxelCount; - - if ( (int)floor((float)lookUpGeometryIndex / (float)maxSoSReceiverArrayForTexture) == 0){ - surf3Dwrite(VoxelValues, outSurfRefTableVoxelToRecPathSosBoth0, i_x*sizeof(float2), i_y, TexturGeometryIndexZ ); // Direkt in CUDA Array schreiben - } - else if ( (int)floor((float)lookUpGeometryIndex / (float)maxSoSReceiverArrayForTexture) == 1) { - surf3Dwrite(VoxelValues, outSurfRefTableVoxelToRecPathSosBoth1, i_x*sizeof(float2), i_y, TexturGeometryIndexZ ); // Direkt in CUDA Array schreiben - } - else if ( (int)floor((float)lookUpGeometryIndex / (float)maxSoSReceiverArrayForTexture) == 2){ - surf3Dwrite(VoxelValues, outSurfRefTableVoxelToRecPathSosBoth2, i_x*sizeof(float2), i_y, TexturGeometryIndexZ ); // Direkt in CUDA Array schreiben - } - #endif - #ifdef SaftTextureForEmRecSosPathsTablesFloat4 // Float4 - float4 VoxelValues; - VoxelValues.x = averageSpeed; // Average SoS on this Path - VoxelValues.y = voxelCount; // Amount of visited voxel - -// if (totalAttenuation>debugModeParameter) // Max Border for Attenuation Correction -// VoxelValues.z = debugModeParameter; // Average Attenuation on this Path -// else - VoxelValues.z = totalAttenuation; // Average Attenuation on this Path - VoxelValues.w = 0.0f; // Amount of visited voxel - - if ( (int)floor((float)lookUpGeometryIndex / (float)maxSoSReceiverArrayForTexture) == 0){ - surf3Dwrite(VoxelValues, outSurfRefTableVoxelToRecPathSosBoth0, i_x*sizeof(float4), i_y, TexturGeometryIndexZ ); // Direkt in CUDA Array schreiben - } - else if ( (int)floor((float)lookUpGeometryIndex / (float)maxSoSReceiverArrayForTexture) == 1) { - surf3Dwrite(VoxelValues, outSurfRefTableVoxelToRecPathSosBoth1, i_x*sizeof(float4), i_y, TexturGeometryIndexZ ); // Direkt in CUDA Array schreiben - } - else if ( (int)floor((float)lookUpGeometryIndex / (float)maxSoSReceiverArrayForTexture) == 2){ - surf3Dwrite(VoxelValues, outSurfRefTableVoxelToRecPathSosBoth2, i_x*sizeof(float4), i_y, TexturGeometryIndexZ ); // Direkt in CUDA Array schreiben - } - #endif - //speedOfSoundSumOutput[Index] = averageSpeed; // Fuellen der TableVoxelToEmitter/ReceiverPathSosSum - //deviceVoxelCountOutput[tableIndex] = typedVoxelCount; // Fuellen der TableVoxelToEmitter/ReceiverPathCount - } - - #if defined(debug_CudaPrecalculateKernel) || defined(debug_OutputSOSPaths) || defined(precalculateAverageSpeedOfSoundKernel) - // //printf(" SosVoxel.x,y,z = [%i %i %i]\n", SosVoxel.x, SosVoxel.y, SosVoxel.z); // Herausfinden welche berechnet werden - - if ((SosVoxel.x == DebugSosVoxelX) && (SosVoxel.y == DebugSosVoxelY) && (SosVoxel.z == DebugSosVoxelZ)) - { - printf(">>>> %i >>>> Precalc: geometryIndexCounter(%i):[%+3.6f %+3.6f %+3.6f] - SOSVoxel [%3i %3i %3i] firstZLayer(%i)\n" - ">>>>>>>>>>>> surf3Dwrite Textur[%3i %3i %3i], TexturGeometryIndexZ(%3i) = VoxelCnt(%3.6f)\n" - ">>>>>>>>>>>> SoSSum(%3.6f) = avgSOS(%3.6f) \n" - ">>>>>>>>>>>> ATTSum(%3.6f) = avgATT(%3.6f) in dB \n", - geometry, geometryIndexCounter, currentGeometry.x, currentGeometry.y, currentGeometry.z, - SosVoxel.x,SosVoxel.y,SosVoxel.z, firstZLayer, - i_x,i_y,i_z, TexturGeometryIndexZ, (float)voxelCount, - averageSpeed, (1/(averageSpeed/(float)voxelCount)), - totalAttenuation, (1/(totalAttenuation/(float)voxelCount))); // In welche Speicherstelle wird geschrieben - - //printf("======%i %i %i============================================================\n", geometry,geometry,geometry); - printf(" SOSGrid_XYZ.x,y,z = [%i %i %i]\n", SOSGrid_XYZ.x, SOSGrid_XYZ.y, SOSGrid_XYZ.z); // In welchem SoS-Voxel befinde ich mich? - printf(" geometryElementCount = %i\n", geometryElementCount); // Wie viele Elemente gibt es in der Emitter/receiverListe? - printf(" SosGeometryVoxelFloat.x,y,z = [%+3.6f %+3.6f %+3.6f] in SOSVoxel\n", SosGeometryVoxelFloat.x, SosGeometryVoxelFloat.y, SosGeometryVoxelFloat.z); // Em/Rec SOSVoxel - printf(" -------------------------------------------------------------------\n"); - printf(" SosVoxel.x(i_x),y(i_y),z = [%i %i %i]\n", SosVoxel.x, SosVoxel.y, SosVoxel.z); // In welchem SoS-Voxel befinde ich mich? - printf(" firstZLayer = %i\n", firstZLayer); // zLayer Offset, welcher wird zur Zeit berechnet? - printf(" geometryIndexCounter = %i\n", geometryIndexCounter); // Welches Elemente aus der Emitter/receiverListe? - //printf(" TexturGeometryIndexZ = %i\n", TexturGeometryIndexZ); // zLayer Offset, welcher wird zur Zeit berechnet? - printf(" lookUpGeometryIndex = %i => Adress: ### %i in [%i] ###\n", lookUpGeometryIndex, TexturGeometryIndexZ, (int)floor((float)lookUpGeometryIndex / (float)maxSoSReceiverArrayForTexture)); // Welcher Index hat Emitter/receiver? - printf(" i_z = (SosVxl.z-firstZLay) = %i\n", i_z); // zLayer Offset, welcher wird zur Zeit berechnet? - - //printf("-------------------------------------------------------------------\n"); - //printf(" speedOfSoundSumOutput_Index= %i\n", SOSGrid_XYZ.x*(SOSGrid_XYZ.y*SOSGrid_XYZ.y*geometryIndexCounter+SOSGrid_XYZ.y*(SosVoxel.z-firstZLayer)+SosVoxel.y)+SosVoxel.x); // In welche Speicherstelle wird geschrieben -// printf(" averageSpeed = %f\n", averageSpeed); // Berechnete Geschwindigkeit -// printf("==================================================================\n"); - } - #endif - - -// //#ifdef debug_CudaPrecalculateKernel -// //if ((SosVoxel.y == DebugSosVoxelY) && ( (SosVoxel.x == DebugSosVoxelX) || (SosVoxel.x == DebugSosVoxelX) || (SosVoxel.x == DebugSosVoxelX))){ -// //printf(">>>> %i >>>> Precalc: geometryIndexCounter(%i):[%+3.4f %+3.4f %+3.4f] - SOSVoxel [%3i %3i %3i] \n>>>>>>>>>>>> VoxelCnt(%i), 1/SoSaverageSpeed(%3.3f), Index[Table,Index] = [%i %i]\n", geometry, geometryIndexCounter, currentGeometry.x, currentGeometry.y, currentGeometry.z, SosVoxel.x,SosVoxel.y,SosVoxel.z, (int)typedVoxelCount, averageSpeed, tableIndex, Index); // In welche Speicherstelle wird geschrieben ? -// printf(">>>> %i >>>> Precalc: geometryIndexCounter(%i):[%+3.4f %+3.4f %+3.4f] - SOSVoxel [%3i %3i %3i] \n>>>>>>>>>>>> VoxelCnt(%i), 1/SoSaverageSpeed(%3.3f) = [%i %i]\n", geometry, geometryIndexCounter, currentGeometry.x, currentGeometry.y, currentGeometry.z, SosVoxel.x,SosVoxel.y,SosVoxel.z, (float)voxelCount, averageSpeed); // In welche Speicherstelle wird geschrieben ? -// // // Speicher in Texturformat -// // // Indexberechnung für Einsatz des Texturmemorys -// // float xmax = SOSGrid_XYZ.x; -// // float ymax = SOSGrid_XYZ.y; -// // float zmax = (float)maxFeasibleSosZLayerCount; -// // float i_x = SosVoxel.x; -// // float i_y = SosVoxel.y; -// // float i_z = (float)(int)(SosVoxelTextureZ); // float SosVoxelTextureZ = (SosVoxelf.z - speedOfSoundZLayer); -// -// //Index = xmax*(ymax*(zmax*geometryIndexCounter+i_z)+i_y)+i_x; // ohne lookUpGeometryIndex-Liste linear im Speicher liegend -// //Index = xmax*(ymax*(zmax*lookUpGeometryIndex+i_z)+i_y)+i_x; // mit lookUpGeometryIndex-Liste -// -// //printf(">>>> %i >>>> Index = xmax(%i)*(ymax(%i)*(zmax(%i)*geometryIndexCounter(%i)+i_z(%i))+i_y(%i))+i_x(%i) = [%i]\n", geometry, (int)xmax, (int)ymax, (int)zmax, geometryIndexCounter, (SosVoxel.z-firstZLayer), SosVoxel.y, SosVoxel.x,Index); // In welche Speicherstelle wird geschrieben ? -// printf(">>>> %i >>>> Index = xmax(%i)*(ymax(%i)*(zmax(%i)*lookUpGeometryIndex(%i)+i_z(%i))+i_y(%i))+i_x(%i) = [%i]\n", geometry, (int)xmax, (int)ymax, (int)zmax, lookUpGeometryIndex, (SosVoxel.z-firstZLayer), SosVoxel.y, SosVoxel.x,Index); // In welche Speicherstelle wird geschrieben ? -// //} -// //#endif - - // Alle berechneten SOS-Voxel ausgeben mit Index - //printf(" SosVoxel.x,y,z = [%i %i %i] => Index (%i)\n", SosVoxel.x, SosVoxel.y, SosVoxel.z, Index); // In welchem SoS-Voxel befinde ich mich? - } - - } - -} - - -/** - Proxy function which calls the speed of sound precalculation kernel. - - Proxy-Funktion der einen Schallgeschwindigkeits-Kernel aufruft. -*/ -//precalculateAverageSpeedOfSound( -// currentSpeedOfSoundZLayer, -// maxFeasibleSosZLayerCount, -// 0, -// emitter_list_Size, -// deviceTableVoxelToEmitterPathCount, -// deviceTableVoxelToEmitterPathCountFloat, -// deviceTableVoxelToEmitterPathSosSum); - -void SAFTHandler::precalculateAverageSpeedOfSound -( - int firstZLayer, ///< First z-layer in the speed of sound grid the pre-calculation is performed for. - int sosZLayerCount, ///< Number of z-layers in the speed of sound grid the pre-calculation is performed for. -#ifdef SaftUseConstantMemforGeometry - int deviceListGeometry, ///< emitters=0 or receivers=1. -#else - float3 const * deviceListGeometry, ///< Vector array describing the positions of emitters or receivers. -#endif - - int geometryElementCount, ///< Number of elements in the geometry array got from Matlab - //VoxelCountType * deviceVoxelCountOutput, ///< Out: # of voxels in the path from a transducer element to a voxel. - float * deviceVoxelCountOutputFloat, ///< Out: # of voxels in the path from a transducer element to a voxel in Float format. - float * deviceSpeedOfSoundSumOutput ///< Out: Sum of SoS samples in the path from transducer to voxel. -// int blocksPerGrid, ///< Number of blocks per grid to be used to execute the kernel. -// int threadsPerBlock, ///< Number of threads per block to be used to execute the kernel. -// cudaStream_t stream, ///< Stream to be used for the execution of the kernel. -) -{ - #ifdef debug_OutputFunctions - printf( "==> SAFTHandler::precalculateAverageSpeedOfSound - Start\n"); - #endif - - - - dim3 threadsPerBlock (SOSGrid_XYZ.x,1,1); // max. 512 oder 1024 Threads werden vorgegeben und - //dim3 threadsPerBlock (SOSGrid_XYZ.x,SOSGrid_XYZ.y,1); // max. 512 oder 1024 Threads werden vorgegeben und - dim3 blocksPerGrid (1,1,1); // max. 65.535 Bloecke im Grid berechnet. Initialisierung - blocksPerGrid.x = SOSGrid_XYZ.y; - blocksPerGrid.y = sosZLayerCount; - blocksPerGrid.z = 1; - - #ifdef debug_CudaPrecalculateKernel - int sosZLayerVoxelCountToProcess = sosZLayerVoxelCount * sosZLayerCount; // Anzahl der Voxel die berechnet werden sollen - - printf("===========================================================================================\n"); - printf(" deviceListGeometry: %i (0=Em/1=Rec)\n", deviceListGeometry); - printf(" geometryElementCount: %i\n", geometryElementCount); - - printf(" sosZLayerVoxelCountToProcess = sosZLayerVoxelCount(%i) * sosZLayerCount(%i) = %i\n", sosZLayerVoxelCount, sosZLayerCount, sosZLayerVoxelCountToProcess); - printf(" threadsPerBlock x,y,z: [%i %i %i]\n", threadsPerBlock.x, threadsPerBlock.y, threadsPerBlock.z); - printf(" blocksPerGrid x,y,z: [%i %i %i]\n", blocksPerGrid.x, blocksPerGrid.y, blocksPerGrid.z); - printf(" firstZLayer (Start z): %i\n", firstZLayer); - //printf(" SOSGrid_XYZ x,y,z: [%i %i %i]\n", SOSGrid_XYZ.x, SOSGrid_XYZ.y, SOSGrid_XYZ.z); - printf("===========================================================================================\n"); - #endif - - #ifdef SaftTextureForBresenhamSosPaths - // Prepare Texture for SpeedOfSoundField - #ifdef SaftUseSosAttFloat1 // Nutze getrennte Texturen fuer beide Volumen (Sos+Att) - cudaChannelFormatDesc texChannelDescSpeedOfSoundField = cudaCreateChannelDesc(32, 0, 0, 0, cudaChannelFormatKindFloat); // Beschreibung des RueckgabeFormats der Textur fuer SpeedOfSoundField - - texRefSpeedOfSoundField.addressMode[0] = cudaAddressModeClamp; // Texturreferenz beschreiben - texRefSpeedOfSoundField.addressMode[1] = cudaAddressModeClamp; - texRefSpeedOfSoundField.addressMode[2] = cudaAddressModeClamp; - - if (SAFT_VARIANT[SAFT_VARIANT_3DVolumeInterpolationAtPreprocessing] == 1){ - texRefSpeedOfSoundField.filterMode = cudaFilterModeLinear; // Lineare Interpolation - } - else{ - texRefSpeedOfSoundField.filterMode = cudaFilterModePoint; // Nearest Neighbor - } -// #ifdef SaftTextureForBresenhamInterpolated -// texRefSpeedOfSoundField.filterMode = cudaFilterModeLinear; -// #else -// texRefSpeedOfSoundField.filterMode = cudaFilterModePoint; -// #endif - texRefSpeedOfSoundField.normalized = 0; - - CUDA_CHECK(cudaBindTextureToArray ( &texRefSpeedOfSoundField, deviceSpeedOfSoundFieldCuArray, &texChannelDescSpeedOfSoundField )); // Schritt 4.1 3DArray an Texturmemory binden - #endif - - - #ifdef SaftUseSosAttFloat2 // Nutze nur eine Textur fuer beide Volumen (Sos+Att) - cudaChannelFormatDesc texChannelDescSosAttField = cudaCreateChannelDesc(32, 32, 0, 0, cudaChannelFormatKindFloat); // Schritt 2.1 Output-Kanal anlegen und beschreiben - - texRefSosAttField.addressMode[0] = cudaAddressModeClamp; // Texturreferenz beschreiben - texRefSosAttField.addressMode[1] = cudaAddressModeClamp; - texRefSosAttField.addressMode[2] = cudaAddressModeClamp; - - if (SAFT_VARIANT[SAFT_VARIANT_3DVolumeInterpolationAtPreprocessing] == 1){ - texRefSosAttField.filterMode = cudaFilterModeLinear; // Lineare Interpolation - } - else{ - texRefSosAttField.filterMode = cudaFilterModePoint; // Nearest Neighbor - } -// #ifdef SaftTextureForBresenhamInterpolated -// texRefSosAttField.filterMode = cudaFilterModeLinear; -// #else -// texRefSosAttField.filterMode = cudaFilterModePoint; -// #endif - texRefSosAttField.normalized = 0; - - CUDA_CHECK(cudaBindTextureToArray ( &texRefSosAttField, deviceSosAttFieldCuArray, &texChannelDescSosAttField )); // Schritt 4.1 3DArray an Texturmemory binden - #endif - - #endif - - #ifdef SaftTextureForEmRecSosPathsTables - if (deviceListGeometry == 0){ - #ifdef SaftTextureForEmRecSosPathsTablesFloat1 - cudaBindSurfaceToArray(outSurfRefTableVoxelToEmitterPathSosSum, deviceTableVoxelToEmitterPathSosSumCuArray); - cudaBindSurfaceToArray(outSurfRefTableVoxelToEmitterPathCount, deviceTableVoxelToEmitterPathCountCuArray); - #endif - #ifdef SaftTextureForEmRecSosPathsTablesFloat2 - cudaBindSurfaceToArray(outSurfRefTableVoxelToEmPathSosBoth, deviceTableVoxelToEmPathSosBothCuArray); - #endif - #ifdef SaftTextureForEmRecSosPathsTablesFloat4 // TODO: hier Name aendern mit Att - cudaBindSurfaceToArray(outSurfRefTableVoxelToEmPathSosBoth, deviceTableVoxelToEmPathSosBothCuArray); - #endif - } - - //deviceTableVoxelToReceiverPathCountCuArray[0] = deviceTableVoxelToReceiverPathSosSumCuArrayTest; - //cudaBindSurfaceToArray(outSurfRefTableVoxelToReceiverPathSosSumTest, deviceTableVoxelToReceiverPathSosSumCuArrayTest); - - if (deviceListGeometry == 1){ - //printf( "#################(int)floor((float)geometryElementCount / (float)maxSoSReceiverArrayForTexture) == %i\n", (int)floor((float)geometryElementCount / (float)maxSoSReceiverArrayForTexture)); - //printf( "#################TableVoxelToReceiverPathSosAllocationCount == %i\n", TableVoxelToReceiverPathSosAllocationCount); - - if ( TableVoxelToReceiverPathSosAllocationCount > 0){ - - #ifdef SaftTextureForEmRecSosPathsTablesFloat1 - #ifdef debug_CudaPrecalculateKernel - printf( "cudaBindSurfaceToArray: deviceTableVoxelToReceiverPathSosSumCuArray[0](%X) deviceTableVoxelToReceiverPathCountCuArray[0](%X)\n", deviceTableVoxelToReceiverPathSosSumCuArray[0], deviceTableVoxelToReceiverPathCountCuArray[0]); - #endif - cudaBindSurfaceToArray(outSurfRefTableVoxelToReceiverPathSosSum0, deviceTableVoxelToReceiverPathSosSumCuArray[0]); - cudaBindSurfaceToArray(outSurfRefTableVoxelToReceiverPathCount0, deviceTableVoxelToReceiverPathCountCuArray[0]); - #endif - #ifdef SaftTextureForEmRecSosPathsTablesFloat2 - #ifdef debug_CudaPrecalculateKernel - printf( "cudaBindSurfaceToArray: deviceTableVoxelToRecPathSosBothCuArray[0](%X)\n", deviceTableVoxelToRecPathSosBothCuArray[0]); - #endif - cudaBindSurfaceToArray(outSurfRefTableVoxelToRecPathSosBoth0, deviceTableVoxelToRecPathSosBothCuArray[0]); - #endif - #ifdef SaftTextureForEmRecSosPathsTablesFloat4 // TODO: hier Name aendern mit Att - #ifdef debug_CudaPrecalculateKernel - printf( "cudaBindSurfaceToArray: deviceTableVoxelToRecPathSosBothCuArray[0](%X)\n", deviceTableVoxelToRecPathSosBothCuArray[0]); - #endif - cudaBindSurfaceToArray(outSurfRefTableVoxelToRecPathSosBoth0, deviceTableVoxelToRecPathSosBothCuArray[0]); - #endif - } - if ( TableVoxelToReceiverPathSosAllocationCount > 1) { - #ifdef SaftTextureForEmRecSosPathsTablesFloat1 - #ifdef debug_CudaPrecalculateKernel - printf( "cudaBindSurfaceToArray: deviceTableVoxelToReceiverPathSosSumCuArray[1](%X) deviceTableVoxelToReceiverPathCountCuArray[1](%X)\n", deviceTableVoxelToReceiverPathSosSumCuArray[1], deviceTableVoxelToReceiverPathCountCuArray[1]); - #endif - cudaBindSurfaceToArray(outSurfRefTableVoxelToReceiverPathSosSum1, deviceTableVoxelToReceiverPathSosSumCuArray[1]); - cudaBindSurfaceToArray(outSurfRefTableVoxelToReceiverPathCount1, deviceTableVoxelToReceiverPathCountCuArray[1]); - #endif - #ifdef SaftTextureForEmRecSosPathsTablesFloat2 - #ifdef debug_CudaPrecalculateKernel - printf( "cudaBindSurfaceToArray: deviceTableVoxelToRecPathSosBothCuArray[1](%X)\n", deviceTableVoxelToRecPathSosBothCuArray[1]); - #endif - cudaBindSurfaceToArray(outSurfRefTableVoxelToRecPathSosBoth1, deviceTableVoxelToRecPathSosBothCuArray[1]); - #endif - #ifdef SaftTextureForEmRecSosPathsTablesFloat4 - #ifdef debug_CudaPrecalculateKernel - printf( "cudaBindSurfaceToArray: deviceTableVoxelToRecPathSosBothCuArray[1](%X)\n", deviceTableVoxelToRecPathSosBothCuArray[1]); - #endif - cudaBindSurfaceToArray(outSurfRefTableVoxelToRecPathSosBoth1, deviceTableVoxelToRecPathSosBothCuArray[1]); - #endif - } - if ( TableVoxelToReceiverPathSosAllocationCount > 2){ - #ifdef SaftTextureForEmRecSosPathsTablesFloat1 - #ifdef debug_CudaPrecalculateKernel - printf( "cudaBindSurfaceToArray: deviceTableVoxelToReceiverPathSosSumCuArray[2](%X) deviceTableVoxelToReceiverPathCountCuArray[2](%X)\n", deviceTableVoxelToReceiverPathSosSumCuArray[2], deviceTableVoxelToReceiverPathCountCuArray[2]); - #endif - cudaBindSurfaceToArray(outSurfRefTableVoxelToReceiverPathSosSum2, deviceTableVoxelToReceiverPathSosSumCuArray[2]); - cudaBindSurfaceToArray(outSurfRefTableVoxelToReceiverPathCount2, deviceTableVoxelToReceiverPathCountCuArray[2]); - #endif - #ifdef SaftTextureForEmRecSosPathsTablesFloat2 - #ifdef debug_CudaPrecalculateKernel - printf( "cudaBindSurfaceToArray: deviceTableVoxelToRecPathSosBothCuArray[2](%X)\n", deviceTableVoxelToRecPathSosBothCuArray[2]); - #endif - cudaBindSurfaceToArray(outSurfRefTableVoxelToRecPathSosBoth2, deviceTableVoxelToRecPathSosBothCuArray[2]); - #endif - #ifdef SaftTextureForEmRecSosPathsTablesFloat4 - #ifdef debug_CudaPrecalculateKernel - printf( "cudaBindSurfaceToArray: deviceTableVoxelToRecPathSosBothCuArray[2](%X)\n", deviceTableVoxelToRecPathSosBothCuArray[2]); - #endif - cudaBindSurfaceToArray(outSurfRefTableVoxelToRecPathSosBoth2, deviceTableVoxelToRecPathSosBothCuArray[2]); - #endif - } - } - #endif - -// cudaBindSurfaceToArray(outSurfRefTableVoxelToReceiverPathSosSum0, deviceTableVoxelToReceiverPathSosSumCuArray[0]); -// cudaBindSurfaceToArray(outSurfRefTableVoxelToReceiverPathSosSum1, deviceTableVoxelToReceiverPathSosSumCuArray[1]); -// cudaBindSurfaceToArray(outSurfRefTableVoxelToReceiverPathSosSum2, deviceTableVoxelToReceiverPathSosSumCuArray[2]); - -// cudaBindSurfaceToArray(outSurfRefTableVoxelToReceiverPathCount0, deviceTableVoxelToReceiverPathCountCuArray[0]); -// cudaBindSurfaceToArray(outSurfRefTableVoxelToReceiverPathCount1, deviceTableVoxelToReceiverPathCountCuArray[1]); -// cudaBindSurfaceToArray(outSurfRefTableVoxelToReceiverPathCount2, deviceTableVoxelToReceiverPathCountCuArray[2]); - - - precalculateAverageSpeedOfSoundKernel <<< blocksPerGrid, threadsPerBlock >>> - ( - #ifndef SaftTextureForBresenhamSosPaths - deviceSpeedOfSoundField, - #else - #ifdef SaftUseSosAttFloat1 // Nutze getrennte Texturen fuer beide Volumen (Sos+Att) - deviceSpeedOfSoundFieldCuArray, - #endif - #ifdef SaftUseSosAttFloat2 // Nutze nur eine Textur fuer beide Volumen (Sos+Att) - deviceSosAttFieldCuArray, - #endif - #endif - firstZLayer, - sosZLayerCount, - deviceListGeometry, - geometryElementCount, - maxSoSReceiverArrayForTexture, // maximale Anzahl an Receivern in einem CUDA Array - - //deviceVoxelCountOutput, - deviceVoxelCountOutputFloat, - deviceSpeedOfSoundSumOutput, -// regionOfInterestOffset, - SOSGrid_XYZ, - sosOffset, - regionOfInterestOffset, - IMAGE_RESOLUTION, - SOS_RESOLUTION, - debugMode, - debugModeParameter - ); - CUDA_CHECK(cudaGetLastError()); - - #ifdef SaftTextureForBresenhamSosPaths - #ifdef SaftUseSosAttFloat1 // Nutze getrennte Texturen fuer beide Volumen (Sos+Att) - CUDA_CHECK(cudaUnbindTexture( &texRefSpeedOfSoundField )); - #endif - - #ifdef SaftUseSosAttFloat2 // Nutze nur eine Textur fuer beide Volumen (Sos+Att) - CUDA_CHECK(cudaUnbindTexture( &texRefSosAttField )); - #endif - #endif - - - - #ifdef debug_OutputFunctions - printf( "<== SAFTHandler::precalculateAverageSpeedOfSound - End\n"); - #endif -} - - - - - - - - - __global__ void precalculateAscanIndex_usePathsKernel( - int ascanIndexBatchOffset, ///< Offset of AscanIndex batch (bei mehreren Aufrufen) - int aScanWindowSize, ///< gibt Anzahl der Ascans ein - #ifndef SaftTextureForBresenhamSosPaths - float const * deviceSpeedOfSoundField, ///< Array of speed of sound samples. Dimensions ordered by speed of indices, commencing with the fastest moving one: 1. x 2. y 3. z - #else - - #ifdef SaftUseSosAttFloat1 // Nutze getrennte Texturen fuer beide Volumen (Sos+Att) - cudaArray *deviceSpeedOfSoundFieldCuArray, ///< CuArray fuer SOSFieldTextur - #endif - #ifdef SaftUseSosAttFloat2 // Nutze nur eine Textur fuer beide Volumen (Sos+Att) - cudaArray *deviceSosAttFieldCuArray, ///< CuArray fuer SosAttFieldTextur - #endif - #endif - int currentSpeedOfSoundZLayer, ///< First z-layer in the speed of sound grid the pre-calculation is performed for. - int maxFeasibleSosZLayerCount, ///< Number of z-layers in the speed of sound grid the pre-calculation is performed for. - //int currentEmIndexUsedForAscanIndexCalculation, ///< current Index of Em for which the AscanIndex is calculated - - //int geometryElementCount, ///< Number of elements in the geometry array. TODO: --> emitter_list_Size, receiver_list_Size - //int emitter_list_Size, ///< Number of emitter_array got from Matlab - //int receiver_list_Size, ///< Number of receiver_array got from Matlab - - int maxSoSReceiverArrayForTexture, - - unsigned short const * deviceEmitterIndex_block, ///< Speicheradresse fuer EmitterIndexdaten im AscanBlock - unsigned short const * deviceReceiverIndex_block, ///< Speicheradresse fuer ReceiverIndexdaten im AscanBlock - - int TableAscanIndexAllocationCount, ///< Anzahl der benoetigten AscanBlocks der Groesse 2048/4096 - int maxAscanIndexArraysInTexture, ///< maximale Anzahl an Em/Rec(Ascans) in einer Teiltabelle /CUDA Array - - cudaArray **deviceTextureAscanIndexFloatCuArray, - - int3 SOSGrid_XYZ, - float3 sosOffset, - float3 regionOfInterestOffset, - float IMAGE_RESOLUTION, - float SOS_RESOLUTION, - float debugMode, - float debugModeParameter, - int * deviceSAFT_VARIANT - ) - { - - //#ifdef debug_OutputFunctions - // printf( "==> precalculateAscanIndexKernel_usePathsKernel - Start\n"); - //#endif - - float3 SosVoxelFloat; - SosVoxelFloat.x = (float)threadIdx.x; // SoS-Voxel X ? Threads fangen an bei 0 an - SosVoxelFloat.y = (float)blockIdx.x; // SoS-Voxel X ? Threads fangen an bei 0 an - SosVoxelFloat.z = (float)(blockIdx.y + currentSpeedOfSoundZLayer); // SoS-Voxel X ? Threads fangen an bei 0 an - - //if ((threadIdx.x == 25) && ((blockIdx.y + currentSpeedOfSoundZLayer) == 11)) - // if (((blockIdx.y + currentSpeedOfSoundZLayer) == 11)) - // { - // //printf(" SosVoxel.x,y,z = [%30.25f %30.25f %30.25f]\n", (float)threadIdx.x, (float)blockIdx.x, (float)(blockIdx.y + currentSpeedOfSoundZLayer)); // In welchem SoS-Voxel befinde ich mich? - // //printf(" SosVoxel.x,y,z = [%i %i %i]\n\n", (int)(float)threadIdx.x, (int)(float)blockIdx.x, (int)(float)(blockIdx.y + currentSpeedOfSoundZLayer)); // In welchem SoS-Voxel befinde ich mich? - // - // //printf(" SosVoxel.x,y,z = [%30.25f %30.25f %30.25f] = [%i %i %i]\n\n", (float)threadIdx.x, (float)blockIdx.x, (float)(blockIdx.y + currentSpeedOfSoundZLayer, (int)(float)threadIdx.x, (int)(float)blockIdx.x, (int)(float)(blockIdx.y + currentSpeedOfSoundZLayer))); // In welchem SoS-Voxel befinde ich mich? - // //printf(" SosVoxel.x,y,z = [%30.25f %30.25f %30.25f] = [%i %i %i]\n\n", (float)threadIdx.x, (float)blockIdx.x, (float)(blockIdx.y + currentSpeedOfSoundZLayer), (int)(float)threadIdx.x, (int)(float)blockIdx.x, (int)(float)(blockIdx.y + currentSpeedOfSoundZLayer)); // In welchem SoS-Voxel befinde ich mich? - // printf(" SosVoxel.x,y,z = [%30.25f] = [%i] = %30.25f\n", (float)threadIdx.x, (int)(float)threadIdx.x, (float)threadIdx.x-(float)(int)(float)threadIdx.x); // In welchem SoS-Voxel befinde ich mich? - // } - - //#ifdef debug_CudaPrecalculateAscanIndexKernel - //printf(" SosVoxel.x,y,z = [%f %f %f]\n", SosVoxel.x, SosVoxel.y, SosVoxel.z); // Herausfinden welche berechnet werden - - //if ((threadIdx.x == DebugSosVoxelX) && (blockIdx.y == DebugSosVoxelY) && ((blockIdx.y + currentSpeedOfSoundZLayer) == DebugSosVoxelZ)) - //{ - - // int threadCountAll = gridDim.z * gridDim.x * blockDim.x; // = Anzahl aller Threads X*Y*Z - // int threadIndex = blockDim.x * (blockIdx.y * gridDim.x + blockIdx.x) + threadIdx.x; - - // printf("=========================== GPU: precalculateAscanIndexKernel_usePathsKernel ===================================\n"); - // printf(" threadCountAll = %i\n", threadCountAll); // Anzahl aller Threads //Brauche ich wahrscheinlich gar nicht. - // printf(" threadIndex = %i\n", threadIndex); // Threadindex von aktuellem Kernel - // printf(" SosVoxel.x,y,z = [%f %f %f]\n", SosVoxelFloat.x, SosVoxelFloat.y, SosVoxelFloat.z); // In welchem SoS-Voxel befinde ich mich? - //printf(" emitter_list_Size = %i\n", emitter_list_Size); // Wie viele Elemente gibt es in der EmitterListe? - //printf(" receiver_list_Size = %i\n", receiver_list_Size); // Wie viele Elemente gibt es in der ReceiverListe? - //printf("=================================================================================================\n"); - //} - //#endif - - // if ((SosVoxel.x == DebugSosVoxelX) && (SosVoxel.y == DebugSosVoxelY) && (SosVoxel.z == DebugSosVoxelZ)) - // { - // printf(" precalculateAscanIndexKernel_usePathsKernel: debugMode [%i] for geometry[%i]\n", debugMode, geometry); - // } - - //dim3 SosGeometryVoxel; // SoSVoxel von Emitter/Receiver -// float3 SosEmitterVoxelFloat; // SoSVoxel von Emitter in Float -// float3 SosReceiverVoxelFloat; // SoSVoxel von Receiver in Float -// float SOS_RESOLUTION_FACTOR = 1 / SOS_RESOLUTION; // Aufloesung im SoS-Grid -// //int tableIndex; // Index innerhalb TableVoxelToEmitter/ReceiverPath - - // Speicher in Texturformat - int i_x = (int)floor(SosVoxelFloat.x); - int i_y = (int)floor(SosVoxelFloat.y); - int i_z = (int)(SosVoxelFloat.z-currentSpeedOfSoundZLayer); - //int Index; - int TexturGeometryIndexZ; - - //int lookUpGeometryEmitterIndex = 0; // Index loaded from Index-List-Table which Emitter belongs to the current Ascan - //int lookUpGeometryReceiverIndex = 0; - - - - float3 voxelPosition; - //voxelPosition.x = (float)SosVoxel.x; //Unrechnung von SOS in m - voxelPosition.x = (float)SosVoxelFloat.x*SOS_RESOLUTION+sosOffset.x; //Umrechnung von SOS in m - voxelPosition.y = (float)SosVoxelFloat.y*SOS_RESOLUTION+sosOffset.y; - voxelPosition.z = (float)SosVoxelFloat.z*SOS_RESOLUTION+sosOffset.z; - - float currentAscanIndex = 0.0; // aktueller Ascan, fuer den der AscanIndex abhaengig vom SOS-Voxel berechnet wird - float2 currentSOSVoxel_AscanIndexAttValues; - int currentEmitterIndex_minus1 = 0.0; // aktueller Emitter - int oldEmitterIndex = 65535; // letzter Emitter - int currentReceiverIndex_minus1 = 0.0; // aktueller Receiver - - float3 currentEmitterGeometry; // Coordinates for current Emitter - float3 currentReceiverGeometry; // Coordinates for current Receiver - float emitterDistance; - float receiverDistance; - float totalDistance; - - unsigned short - lookUpEmitterIndex, // Index of Emitter in lookUp table gives the position in texture memory - lookUpReceiverIndex; // Index of Receiver in lookUp table gives the position in texture memory - int currentRecTextureIndex; // Index of ReceiverTexture due to division in several textures due to HW limitation - - float TexturIndexX = SosVoxelFloat.x+ 0.5f; //Index for access to Texturmemory - float TexturIndexY = SosVoxelFloat.y+ 0.5f; //Index for access to Texturmemory - float TexturIndexZEmitter = 0.0f; //Index for access to Texturmemory - float TexturIndexZReceiver = 0.0f; //Index for access to Texturmemory - - // Z offset inside precalculated SOS paths - #ifndef SOS_Version2 - float SosVoxelTextureZ = (SosVoxelFloat.z - (float)currentSpeedOfSoundZLayer); - #endif - float SosVoxelTextureZnotInterpolated = (SosVoxelFloat.z - (float)currentSpeedOfSoundZLayer); // + 0.5f - - - #ifdef SaftTextureForEmRecSosPathsTablesFloat2 - float2 VoxelSosAttValues; // SOS Values for one Voxel for Texture reading - #endif - #ifdef SaftTextureForEmRecSosPathsTablesFloat4 - float4 VoxelSosAttValues; // SOS and ATT Values for one Voxel for Texture reading - #endif - - // SOS-Correction and ATT-Correction - //float voxelCount = 0.0; // Anzahl der Voxel auf einem SoS-Pfad - float emitterVoxelVoxelCount = 0.0; - float receiverVoxelVoxelCount = 0.0; - float emitterReceiverTotalVoxelCount = 0.0; // Wird zwei mal genutz, Eventuell wieder rausnehmen. - // SOS - float emitterVoxelaverageSpeedSum = 0.0; - float receiverVoxelaverageSpeedSum = 0.0; - float averageSpeed = 0.0; // SoSSumme auf einem SoS-Pfad - //float sumAverageSpeed = 0.0; - // ATT - float emitterVoxelTotalAttenuationSum = 0.0; - float receiverVoxelTotalAttenuationSum = 0.0; - float totalAttenuation_dB = 0.0; // AttSumme auf einem Attenuation-Pfad - float totalAttenuation_multFactor = 0.0; // AttFaktor auf einem Attenuation-Pfad - - // Für Ascan-Index benoetigt man mehrere Texturen fuer jeweils 2 Z-Layer. 2*N < maxSurfaceTexture3DDimension(Fermi: 2048; Kepler: 2048) ==> (Fermi: 1024 ; Kepler: 1024 Em/Rec - Kombinationen) - // maxSurfaceTexture3DDimension = maximale Groesse die erlaubt ist - // TableAscanIndexAllocationCount = Anzahl der Teiltabellen ==> auch Anzahl der benoetigten Durchlaeufe - // maxFeasibleSosZLayerCount = Anzahl der SoS-Zlayer die gleichzeitig im Speicher pro EM/REC-Kombi vorgehalten werden (1 oder 2 bei Interpolierten Variante) - // maxAscanIndexArraysInTexture = Anzahl der Ascans in einer Teiltabelle - - // Gehe #Ascans durch und lade Em/Rec-Index - // Abhangig davon laden Koordinaten - // --> Bestimme SOS - // --> Berechne AscanIndex - #if defined(debug_CudaPrecalculateAscanIndexKernel) - if ((SosVoxelFloat.x == DebugSosVoxelX) && (SosVoxelFloat.y == DebugSosVoxelY) && (SosVoxelFloat.z == DebugSosVoxelZ)) - { - printf("------------ Run over ascanIndex_i (1:(aScanWindowSize (%i)) ; ascanIndexBatchOffset (%i)) currentSpeedOfSoundZLayer (%i) maxAscanIndexArraysInTexture (%i)--------------------\n", aScanWindowSize, ascanIndexBatchOffset, currentSpeedOfSoundZLayer, maxAscanIndexArraysInTexture); - } - #endif - //return; - - for(int ascanIndex_i = 0; ascanIndex_i < aScanWindowSize; ascanIndex_i++) // Alle Emitter oder Receiver in der Liste von Matlab durchgehen - { - - // #if defined(debug_CudaPrecalculateAscanIndexKernel) - if ((SosVoxelFloat.x == DebugSosVoxelX) && (SosVoxelFloat.y == DebugSosVoxelY) && (SosVoxelFloat.z == DebugSosVoxelZ)) - { - // printf("--------------------- ascanIndex_i (%i) - (ascanIndexOffset + ascanIndex_i) (%i) ---------------------------------------\n", ascanIndex_i, (ascanIndexOffset + ascanIndex_i)); - // // printf(" >>>> Precalc: Em (%i) Rec(%i)\n >>>> Em[%+3.3f %+3.3f %+3.3f] : SOSVoxel[%+3.3f %+3.3f %+3.3f]\n", currentEmIndexUsedForAscanIndexCalculation, SosEmitterVoxelFloat.x, SosEmitterVoxelFloat.y, SosEmitterVoxelFloat.z, SosVoxel.x,SosVoxel.y,SosVoxel.z); // In welche Speicherstelle wird geschrieben - //// printf(" SOSGrid_XYZ.x,y,z = [%i %i %i]\n", SOSGrid_XYZ.x, SOSGrid_XYZ.y, SOSGrid_XYZ.z); // In welchem SoS-Voxel befinde ich mich? - //// - //// printf(" emitter_list_Size = %i\n", emitter_list_Size); // Wie viele Elemente gibt es in der EmitterListe? - //// printf(" receiver_list_Size = %i\n", receiver_list_Size); // Wie viele Elemente gibt es in der ReceiverListe? - //// printf("-------------------------------------------------------------------\n"); - //// printf(" SosVoxel.x,y,z = [%f %f %f]\n", SosVoxelFloat.x, SosVoxelFloat.y, SosVoxelFloat.z); // In welchem SoS-Voxel befinde ich mich? - //// printf(" currentEmitterGeometry = [%f %f %f]\n", currentEmitterGeometry.x, currentEmitterGeometry.y, currentEmitterGeometry.z); // In welchem SoS-Voxel befinde ich mich? - //// printf(" SosEmitterVoxelFloat = [%f %f %f]\n", SosEmitterVoxelFloat.x, SosEmitterVoxelFloat.y, SosEmitterVoxelFloat.z); // In welchem SoS-Voxel befinde ich mich? - //// printf(" curEmIdxUsdForAscanIdxCalc = %i\n", currentEmIndexUsedForAscanIndexCalculation); // Welches Elemente ist aktuelle in emitterListe? - //// //printf(" receiverIndex_i = %i\n", receiverIndex_i); // Welches Element ist aktuelles in receiverListe? - //// printf(" lookUpGeometryEmitterIndex = %i\n", lookUpGeometryEmitterIndex); // zLayer Offset, welcher wird zur Zeit berechnet? - //// //printf("-------------------------------------------------------------------\n"); - //// //printf(" speedOfSoundSumOutput_Index= %i\n", SOSGrid_XYZ.x*(SOSGrid_XYZ.y*SOSGrid_XYZ.y*geometryIndexCounter+SOSGrid_XYZ.y*(SosVoxel.z-currentSpeedOfSoundZLayer)+SosVoxel.y)+SosVoxel.x); // In welche Speicherstelle wird geschrieben - //// //printf(" averageSpeed = %f\n", averageSpeed); // Berechnete Geschwindigkeit - //// //printf(" write i_x,i_y,i_z = [%i %i %i]\n", i_x, i_y, i_z); // In welchem SoS-Voxel schreibe ich? - // printf("==================================================================\n\n"); - } - // #endif - - - // 1. Ascans durchgehen - DONE - // 1.1 Emitter und Receiver PfadeID laden, Position laden und Abstand berechnen - DONE - // 1.2 Use Emitter-Cache // TODO - // 1.3 SOS und Attenuation aus Pfadvorberechnung laden - - // 2. Ascan-Index mit SAFT berechnen - DONE - // 2.1 Ascan-Index in Surface speichern - DONE - - - // In Index-Listen stehen die Indexe von Em/Rec pro Ascan - // In blockedReceiver/SenderList stehen die 3D-Koordinaten in jeweiliger Anzahl - - - // lade aktuellen Emitter&Receiver aus AscanBlockliste - // --------------------------------------------------------------- - currentEmitterIndex_minus1 = deviceEmitterIndex_block [ascanIndexBatchOffset + ascanIndex_i] -1; // Load EmitterIndex from Constant Memory fuer Geometrie,... -1 da Matlab mit 1 startet - currentReceiverIndex_minus1 = deviceReceiverIndex_block [ascanIndexBatchOffset + ascanIndex_i] -1; // Load ReceiverIndex from Constant Memory fuer Geometrie,... - lookUpEmitterIndex = lookUpGeometryMemoryListEmitter [currentEmitterIndex_minus1]; // Load EmitterLookUp Index from Constant Memory fuer SOS Pfade. Gibt an wo SOS Pfade im Texturspeicher liegen - lookUpReceiverIndex = lookUpGeometryMemoryListReceiver[currentReceiverIndex_minus1]; // Load ReceiverLookUp Index from Constant Memory fuer SOS Pfade. Gibt an wo SOS Pfade im Texturspeicher liegen - - // Berechne die mittlere SOS und ATT fuer diesen Pfad - // Nutzen der Bresenham-Floatvariante mit Texturmemory und Interpolation - // --------------------------------------------------------------- - - if (currentEmitterIndex_minus1 != oldEmitterIndex){ // Nur wenn neuer Emitter geladen wird neue Emitter-Koordinaten laden und Abstand berechnen - - - currentEmitterGeometry = emitterPOSharmon[currentEmitterIndex_minus1]; // Use Constant Memory - emitterDistance = sqrtf( SQR(voxelPosition.x-currentEmitterGeometry.x) + SQR(voxelPosition.y-currentEmitterGeometry.y) + SQR(voxelPosition.z-currentEmitterGeometry.z) ); - // Bestimmen der SoS-Koordinaten fuer die Sender/Empfuenger-Koordinaten - // determineSpeedOfSoundFieldVoxelFloat(currentEmitterGeometry, SosEmitterVoxelFloat , sosOffset, SOS_RESOLUTION_FACTOR); // SoSVoxel von E/R bestimmen // currentGeometry --> SosGeometryVoxel Float - - - // EmitterIndex in Z-Richtung fuer Zugriff auf Textur - #ifndef SOS_Version2 - TexturIndexZEmitter = maxFeasibleSosZLayerCount * (lookUpEmitterIndex) + SosVoxelTextureZ; // Z-Index fuer Zugriff auf Textur - #else - TexturIndexZEmitter = maxFeasibleSosZLayerCount * (lookUpEmitterIndex) + SosVoxelTextureZnotInterpolated + 0.5f; - #endif - - - // mittlere Schallgeschwindigkeit fuer Emitter->Voxel Pfad ermitteln - // ==================================== - - #ifdef SaftTextureForEmRecSosPathsTablesFloat1 - // Count - Emitter ---------------------------------------- - emitterVoxelVoxelCount = tex3D( texTableVoxelToEmitterPathCount, TexturIndexX, TexturIndexY, TexturIndexZEmitter); - - // SpeedSum - Emitter ---------------------------------------- - emitterVoxelaverageSpeedSum = tex3D( texTableVoxelToEmitterPathSosSum, TexturIndexX, TexturIndexY, TexturIndexZEmitter); - #endif - #ifdef SaftTextureForEmRecSosPathsTablesFloat2 - // SpeedSum + Count - Emitter ---------------------------------------- - //float2 VoxelSosAttValues; - VoxelSosAttValues = tex3D( texTableVoxelToEmitterPathSosBoth_preprocess, TexturIndexX, TexturIndexY, TexturIndexZEmitter); - emitterVoxelaverageSpeedSum = VoxelSosAttValues.x; - emitterVoxelVoxelCount = VoxelSosAttValues.y; - #endif - #ifdef SaftTextureForEmRecSosPathsTablesFloat4 - // SpeedSum + Count + Attenuation - Emitter ---------------------------------------- - // -------------------------------------------------------- - VoxelSosAttValues = tex3D( texTableVoxelToEmitterPathSosBoth_preprocess, TexturIndexX, TexturIndexY, TexturIndexZEmitter); - emitterVoxelaverageSpeedSum = VoxelSosAttValues.x; - emitterVoxelVoxelCount = VoxelSosAttValues.y; - emitterVoxelTotalAttenuationSum = VoxelSosAttValues.z; - // emitterVoxelVoxelCount VoxelSosAttValues.w; // only float 3 needed - #endif - - #ifdef debug_CudaSAFTKernel - //printf(" SosVoxelf [%3.12f %3.12f %3.12f]\n", SosVoxelf.x, SosVoxelf.y, SosVoxelf.z); - if ((regionOfInterestVoxel.x == OutputVolumeX) && (regionOfInterestVoxel.y == OutputVolumeY) && (regionOfInterestVoxel.z == OutputVolumeZ)) - { - if ((SosVoxel.x == DebugSoSVoxelX) && (SosVoxel.y == DebugSoSVoxelY) && (SosVoxel.z == DebugSoSVoxelZ)){ - //printf(" SosVoxelf [%3.12f %3.12f %3.12f] - SosVoxel [%3i %3i %3i] - regionOfInterestVoxel [%3i %3i %3i]\n", SosVoxelf.x, SosVoxelf.y, SosVoxelf.z, SosVoxel.x, SosVoxel.y, SosVoxel.z, regionOfInterestVoxel.x, regionOfInterestVoxel.y, regionOfInterestVoxel.z); - float xmax = SOSGrid_XYZ.x; - float ymax = SOSGrid_XYZ.y; - float zmax = (float)maxFeasibleSosZLayerCount; - float i_x = SosVoxel.x; - float i_y = SosVoxel.y; - float i_z = (float)(int)(SosVoxelTextureZ); - - int Index = xmax*(ymax*(zmax*currentEmitterIndex-1+i_z)+i_y)+i_x; // currentEmitterIndex-1 da Matlab bei 1 anfaengt und wir deshlab -0,5 machen - printf(">>>> %i >>>> Kernel: currentEmitterIndex(%3i) & SoSVoxel[%+3.2f %+3.2f %+3.2f] speedOfSoundZLayer(%3i) - regionOfInterestVoxel [%3i %3i %3i]\n>>>>>>>>>>>> emitterVoxelVoxelCount(%3.3f) emitterVoxelaverageSpeedSum(%3.3f) Index = %i\n", 0, currentEmitterIndex, SosVoxelf.x, SosVoxelf.y, SosVoxelf.z, speedOfSoundZLayer, regionOfInterestVoxel.x, regionOfInterestVoxel.y, regionOfInterestVoxel.z, emitterVoxelVoxelCount, emitterVoxelaverageSpeedSum, Index); // In welche Speicherstelle wird geschrieben - } - } - #endif - - - } - - currentReceiverGeometry = receiverPOSharmon[currentReceiverIndex_minus1]; // Use Constant Memory - //receiverDistance = sqrtf( SQR(voxelPosition.x-currentReceiverGeometry.x) + SQR(voxelPosition.y-currentReceiverGeometry.y) + SQR(voxelPosition.z-currentReceiverGeometry.z) ); - receiverDistance = (float)sqrtf( SQR((double)voxelPosition.x-(double)currentReceiverGeometry.x) + SQR((double)voxelPosition.y-(double)currentReceiverGeometry.y) + SQR((double)voxelPosition.z-(double)currentReceiverGeometry.z) ); - - // Bestimmen der SoS-Koordinaten fuer die Sender/Empfuenger-Koordinaten - // determineSpeedOfSoundFieldVoxelFloat(currentReceiverGeometry, SosReceiverVoxelFloat, sosOffset, SOS_RESOLUTION_FACTOR); // SoSVoxel von E/R bestimmen // currentGeometry --> SosGeometryVoxel Float - - - // Distanz Emitter -> Voxel -> Receiver berechnen - // ==================================== - totalDistance = emitterDistance + receiverDistance; // Gesamt-Abstand - - - - // mittlere Schallgeschwindigkeit fuer Emitter->Voxel Pfad ermitteln - // ==================================== - // Determine number of current used texture memory for this receiver - currentRecTextureIndex = (int)floor((float)lookUpReceiverIndex / (float)maxSoSReceiverArrayForTexture); - - - // ReceiverIndex in Z-Richtung fuer Zugriff auf Textur - - #ifndef SOS_Version2 - TexturIndexZReceiver = maxFeasibleSosZLayerCount * ((lookUpReceiverIndex) % maxSoSReceiverArrayForTexture) + SosVoxelTextureZ; // Z-Index fuer Zugriff auf Textur - #else - - TexturIndexZReceiver = maxFeasibleSosZLayerCount * ((lookUpReceiverIndex) % maxSoSReceiverArrayForTexture) + SosVoxelTextureZnotInterpolated + 0.5f; // Z-Index fuer Zugriff auf Textur - //TexturIndexZReceiver = maxFeasibleSosZLayerCount * ((lookUpReceiverIndex) % maxSoSReceiverArrayForTexture) + SosVoxelTextureZ; // Z-Index fuer Zugriff auf Textur - #endif - - // TODO: Aktuell ist die ungecachete Variante langsamer bei TITAN. und bei Fermi schneller. Warum? - // -> eventuell längerer controll-Pfad? -> bisher keine echte Erklärung dafür!!! - - // #ifdef SaftTextureForEmRecSosPathsTablesFloat1 - // // Count - Emitter ---------------------------------------- - // emitterVoxelVoxelCount = tex3D( texTableVoxelToEmitterPathCount, TexturIndexX, TexturIndexY, TexturIndexZEmitter); - // - // // SpeedSum - Emitter ---------------------------------------- - // emitterVoxelaverageSpeedSum = tex3D( texTableVoxelToEmitterPathSosSum, TexturIndexX, TexturIndexY, TexturIndexZEmitter); - // #endif - // #ifdef SaftTextureForEmRecSosPathsTablesFloat2 - // // SpeedSum + Count - Emitter ---------------------------------------- - // //float2 VoxelSosAttValues; - // VoxelSosAttValues = tex3D( texTableVoxelToEmitterPathSosBoth_preprocess, TexturIndexX, TexturIndexY, TexturIndexZEmitter); - // emitterVoxelaverageSpeedSum = VoxelSosAttValues.x; - // emitterVoxelVoxelCount = VoxelSosAttValues.y; - // #endif - // #ifdef SaftTextureForEmRecSosPathsTablesFloat4 - // // SpeedSum + Count - Emitter ---------------------------------------- - // // -------------------------------------------------------- - // // float4 VoxelSosAttValues; - // VoxelValues = tex3D( texTableVoxelToEmitterPathSosBoth_preprocess, TexturIndexX, TexturIndexY, TexturIndexZEmitter); - // emitterVoxelaverageSpeedSum = VoxelSosAttValues.x; - // emitterVoxelVoxelCount = VoxelSosAttValues.y; - // // emitterVoxelaverageSpeedSum = VoxelSosAttValues.z; - // // emitterVoxelVoxelCount = VoxelSosAttValues.w; - // #endif - - // mittlere Schallgeschwindigkeit fuer Voxel->Receiver Pfad ermitteln - // ==================================== - #ifdef SaftTextureForEmRecSosPathsTablesFloat1 - // Count - Receiver ---------------------------------------- - if ( currentRecTextureIndex == 0){ - receiverVoxelVoxelCount = tex3D( texTableVoxelToReceiverPathCount0, TexturIndexX, TexturIndexY, TexturIndexZReceiver); } - else if ( currentRecTextureIndex == 1) { - receiverVoxelVoxelCount = tex3D( texTableVoxelToReceiverPathCount1, TexturIndexX, TexturIndexY, TexturIndexZReceiver); } - else if ( currentRecTextureIndex == 2){ - receiverVoxelVoxelCount = tex3D( texTableVoxelToReceiverPathCount2, TexturIndexX, TexturIndexY, TexturIndexZReceiver); } - - // SpeedSum - Receiver ---------------------------------------- - if ( currentRecTextureIndex == 0){ - receiverVoxelaverageSpeedSum = tex3D( texTableVoxelToReceiverPathSosSum0, TexturIndexX, TexturIndexY, TexturIndexZReceiver); } - else if ( currentRecTextureIndex == 1) { - receiverVoxelaverageSpeedSum = tex3D( texTableVoxelToReceiverPathSosSum1, TexturIndexX, TexturIndexY, TexturIndexZReceiver); } - else if ( currentRecTextureIndex == 2){ - receiverVoxelaverageSpeedSum = tex3D( texTableVoxelToReceiverPathSosSum2, TexturIndexX, TexturIndexY, TexturIndexZReceiver); } - #endif - #ifdef SaftTextureForEmRecSosPathsTablesFloat2 - // SpeedSum + Count - Receiver ---------------------------------------- - //float2 VoxelSosAttValues; - if ( currentRecTextureIndex == 0){ - VoxelSosAttValues = tex3D( texTableVoxelToReceiverPathSosBoth0_preprocess, TexturIndexX, TexturIndexY, TexturIndexZReceiver); } - else if ( currentRecTextureIndex == 1) { - VoxelSosAttValues = tex3D( texTableVoxelToReceiverPathSosBoth1_preprocess, TexturIndexX, TexturIndexY, TexturIndexZReceiver); } - else if ( currentRecTextureIndex == 2){ - VoxelSosAttValues = tex3D( texTableVoxelToReceiverPathSosBoth2_preprocess, TexturIndexX, TexturIndexY, TexturIndexZReceiver); } - - receiverVoxelVoxelCount = VoxelSosAttValues.y; - receiverVoxelaverageSpeedSum = VoxelSosAttValues.x; - #endif - #ifdef SaftTextureForEmRecSosPathsTablesFloat4 - // SpeedSum + Count - Receiver ---------------------------------------- - //float4 VoxelValues; - if ( currentRecTextureIndex == 0){ - VoxelSosAttValues = tex3D( texTableVoxelToReceiverPathSosBoth0_preprocess, TexturIndexX, TexturIndexY, TexturIndexZReceiver); } - else if ( currentRecTextureIndex == 1) { - VoxelSosAttValues = tex3D( texTableVoxelToReceiverPathSosBoth1_preprocess, TexturIndexX, TexturIndexY, TexturIndexZReceiver); } - else if ( currentRecTextureIndex == 2){ - VoxelSosAttValues = tex3D( texTableVoxelToReceiverPathSosBoth2_preprocess, TexturIndexX, TexturIndexY, TexturIndexZReceiver); } - - receiverVoxelaverageSpeedSum = VoxelSosAttValues.x; - receiverVoxelVoxelCount = VoxelSosAttValues.y; - receiverVoxelTotalAttenuationSum = VoxelSosAttValues.z; - // receiverVoxelVoxelCount VoxelSosAttValues.w; // only float 3 needed - #endif - //#endif - - - - - - emitterReceiverTotalVoxelCount = (emitterVoxelVoxelCount+receiverVoxelVoxelCount); //kleine Optimierung? - - // Mittlere Schallgeschwindigkeit ueber beide Pfade Emitter/Voxel/Receiver berechnen - // ======================================================================= - //averageSpeed = (emitterVoxelVoxelCount+receiverVoxelVoxelCount)/(emitterVoxelaverageSpeedSum+receiverVoxelaverageSpeedSum); // harmonisches Mittel - averageSpeed = emitterReceiverTotalVoxelCount/(emitterVoxelaverageSpeedSum+receiverVoxelaverageSpeedSum); // harmonisches Mittel - - //////////////////////////////////////// SAFT-Calculation to determine Ascan-Index /////////////////////////////////////////////////////////////// - - // Für Ascan-Index benoetigt man mehrere Texturen fuer jeweils 2 Z-Layer. 2*N < maxSurfaceTexture3DDimension(Fermi: 2048; Kepler: 2048) ==> (Fermi: 2048 ; Kepler: 2048 Em/Rec - Kombinationen) - // maxSurfaceTexture3DDimension = maximale Groesse die erlaubt ist - // TableAscanIndexAllocationCount = Anzahl der Teiltabellen ==> auch Anzahl der benoetigten Durchlaeufe - // maxFeasibleSosZLayerCount = Anzahl der SoS-Zlayer die gleichzeitig im Speicher pro EM/REC-Kombi vorgehalten werden (1 oder 2 bei Interpolierten Variante) - // maxAscanIndexArraysInTexture = Anzahl der Ascans in einer Teiltabelle - - // outSurfRefAscanIndexFloat0..2 = Surface fuer die jeweiligen Ascans - - // totalDistance = emitterDistance + receiverDistance; // Gesamt-Abstand - // averageSpeed = deviceSoSData_block [currentAScanIndex]; - // sampleTime = totalDistance / (sampleRate*averageSpeed); // Sample Time bestimmen mit entsprechenden SoSDaten - - //currentAscanIndex = (emitterDistance + receiverDistance)/(1e-7*averageSpeed); - currentAscanIndex = (double)(emitterDistance + receiverDistance)/((double)1e-7*(double)averageSpeed); - currentSOSVoxel_AscanIndexAttValues.x = currentAscanIndex ; //- 0.12f - currentSOSVoxel_AscanIndexAttValues.y = 1; - - // Save AscanIndex in Texture - // Calculate the Z-Index for storing the AscanIndex value - TexturGeometryIndexZ = maxFeasibleSosZLayerCount * ((ascanIndex_i) % maxAscanIndexArraysInTexture) + i_z; - - #if defined(debug_CudaPrecalculateKernel) || defined(debug_OutputSOSPaths) - //if ((SosVoxel.x == DebugSosVoxelX) && (SosVoxel.y == DebugSosVoxelY) && (SosVoxel.z == DebugSosVoxelZ)) - // printf(">>>> %i >>>> Precalc: geomIdxCounter(%4i):[%+3.4f %+3.4f %+3.4f] - SOSVoxel [%3i %3i %3i] currentSpeedOfSoundZLayer(%i) ==> TexturNr.[%3i], TexturGeometryIndexZ(%3i), lookUpGeometryIndex(%4i)\n", geometry, geometryIndexCounter, currentGeometry.x, currentGeometry.y, currentGeometry.z, SosVoxel.x,SosVoxel.y,SosVoxel.z, currentSpeedOfSoundZLayer, (int)floor((float)lookUpGeometryIndex / (float)maxSoSReceiverArrayForTexture) , TexturGeometryIndexZ, lookUpGeometryIndex); // In welche Speicherstelle wird geschrieben - #endif - - // Write in the AscanIndex value for all Receiver in corresponding memory adress depending on SOS-Voxel, and Z-Layer - // floor(ascanIndex_i / maxAscanIndexArraysInTexture) gives the surface if more then one is used - int ascanIndexTexture_Nr = (int)floor((float)ascanIndex_i / (float)maxAscanIndexArraysInTexture); - - if ( ascanIndexTexture_Nr == 0){ - //surf3Dwrite((float)currentAscanIndex, outSurfRefAscanIndexFloat0, i_x*sizeof(float), i_y, TexturGeometryIndexZ ); // Direkt in CUDA Array schreiben: TableVoxelToEmitterPathSosSum - surf3Dwrite((float2)currentSOSVoxel_AscanIndexAttValues, outSurfRefAscanIndexFloat0, i_x*sizeof(float2), i_y, TexturGeometryIndexZ ); // Direkt in CUDA Array schreiben: TableVoxelToEmitterPathSosSum - } - else if ( ascanIndexTexture_Nr == 1) { - //surf3Dwrite((float)currentAscanIndex, outSurfRefAscanIndexFloat1, i_x*sizeof(float), i_y, TexturGeometryIndexZ ); // Direkt in CUDA Array schreiben: TableVoxelToEmitterPathSosSum - surf3Dwrite((float2)currentSOSVoxel_AscanIndexAttValues, outSurfRefAscanIndexFloat1, i_x*sizeof(float2), i_y, TexturGeometryIndexZ ); // Direkt in CUDA Array schreiben: TableVoxelToEmitterPathSosSum - } - else if ( ascanIndexTexture_Nr == 2){ - //surf3Dwrite((float)currentAscanIndex, outSurfRefAscanIndexFloat2, i_x*sizeof(float), i_y, TexturGeometryIndexZ ); // Direkt in CUDA Array schreiben: TableVoxelToEmitterPathSosSum - surf3Dwrite((float2)currentSOSVoxel_AscanIndexAttValues, outSurfRefAscanIndexFloat2, i_x*sizeof(float2), i_y, TexturGeometryIndexZ ); // Direkt in CUDA Array schreiben: TableVoxelToEmitterPathSosSum - } - else if ( ascanIndexTexture_Nr == 3){ - //surf3Dwrite((float)currentAscanIndex, outSurfRefAscanIndexFloat3, i_x*sizeof(float), i_y, TexturGeometryIndexZ ); // Direkt in CUDA Array schreiben: TableVoxelToEmitterPathSosSum - surf3Dwrite((float2)currentSOSVoxel_AscanIndexAttValues, outSurfRefAscanIndexFloat3, i_x*sizeof(float2), i_y, TexturGeometryIndexZ ); // Direkt in CUDA Array schreiben: TableVoxelToEmitterPathSosSum - } - - // Debug Output at the end - //#if defined(debug_CudaPrecalculateAscanIndexKernel) - // //printf(" SosVoxel.x,y,z = [%i %i %i]\n", SosVoxel.x, SosVoxel.y, SosVoxel.z); // Herausfinden welche berechnet werden - - //if ((SosVoxelFloat.x == DebugSosVoxelX) && (SosVoxelFloat.y == DebugSosVoxelY) && (SosVoxelFloat.z == (DebugSosVoxelZ)) && ((ascanIndex_i <= 10) || (ascanIndex_i >= 4090))) // Anfang - //if ((SosVoxel.x == DebugSosVoxelX) && (SosVoxel.y == DebugSosVoxelY) && (SosVoxel.z == DebugSosVoxelZ) && (ascanIndex_i >= 1020) && (ascanIndex_i <= 1030)) // Mitte - //if ((SosVoxel.x == DebugSosVoxelX) && (SosVoxel.y == DebugSosVoxelY) && (SosVoxel.z == DebugSosVoxelZ) && (ascanIndex_i >= 1400)) // Ende - //if ((SosVoxel.x == DebugSosVoxelX) && (SosVoxel.y == DebugSosVoxelY) && (SosVoxel.z == DebugSosVoxelZ) && (ascanIndex_i >= 2800)) // Ende - // { - // // Pracalculate AscanIndex - // printf(">>>> Precalc: Ascan(%i) : Em (%i) Re (%i) >>>> Em[%+3.3f %+3.3f %+3.3f] : SOSVoxel[%+3.3f %+3.3f %+3.3f] : Rec[%+3.3f %+3.3f %+3.3f]\n" - // " >>>> averageSpeed(%3.3f), totalAttenuation_multFactor(%3.3f) >>>> currentAscanIndex(%3.3f)\n" - // " >>>>>>>> surf3Dwrite Textur(%i)[%3i %3i %3i], TexturGeometryIndexZ(%3i) >>>> currentAscanIndex(%3.3f)\n", - // ascanIndex_i, currentEmitterIndex_minus1, currentReceiverIndex_minus1, - // currentEmitterGeometry.x, currentEmitterGeometry.y, currentEmitterGeometry.z, - // SosVoxelFloat.x, SosVoxelFloat.y, SosVoxelFloat.z, - // currentReceiverGeometry.x, currentReceiverGeometry.y, currentReceiverGeometry.z, - // averageSpeed, totalAttenuation_multFactor, currentAscanIndex, - // ascanIndexTexture_Nr, i_x, i_y, i_z, TexturGeometryIndexZ, currentAscanIndex); - // } - //#endif - - - } // Ascan_i-loop - - #ifdef debug_OutputFunctions - printf( "<== SAFTHandler::precalculateAscanIndexKernel_usePathsKernel - End\n"); - #endif - - } - - - - - __global__ void precalculateAscanIndex_usePathsKernel_SOS( - int ascanIndexBatchOffset, ///< Offset of AscanIndex batch (bei mehreren Aufrufen) - int aScanWindowSize, ///< gibt Anzahl der Ascans ein - #ifndef SaftTextureForBresenhamSosPaths - float const * deviceSpeedOfSoundField, ///< Array of speed of sound samples. Dimensions ordered by speed of indices, commencing with the fastest moving one: 1. x 2. y 3. z - #else - - #ifdef SaftUseSosAttFloat1 // Nutze getrennte Texturen fuer beide Volumen (Sos+Att) - cudaArray *deviceSpeedOfSoundFieldCuArray, ///< CuArray fuer SOSFieldTextur - #endif - #ifdef SaftUseSosAttFloat2 // Nutze nur eine Textur fuer beide Volumen (Sos+Att) - cudaArray *deviceSosAttFieldCuArray, ///< CuArray fuer SosAttFieldTextur - #endif - #endif - int currentSpeedOfSoundZLayer, ///< First z-layer in the speed of sound grid the pre-calculation is performed for. - int maxFeasibleSosZLayerCount, ///< Number of z-layers in the speed of sound grid the pre-calculation is performed for. - int maxSoSReceiverArrayForTexture, - - unsigned short const * deviceEmitterIndex_block, ///< Speicheradresse fuer EmitterIndexdaten im AscanBlock - unsigned short const * deviceReceiverIndex_block, ///< Speicheradresse fuer ReceiverIndexdaten im AscanBlock - - int TableAscanIndexAllocationCount, ///< Anzahl der benoetigten AscanBlocks der Groesse 2048/4096 - int maxAscanIndexArraysInTexture, ///< maximale Anzahl an Em/Rec(Ascans) in einer Teiltabelle /CUDA Array - - cudaArray **deviceTextureAscanIndexFloatCuArray, - - int3 SOSGrid_XYZ, - float3 sosOffset, - float3 regionOfInterestOffset, - float IMAGE_RESOLUTION, - float SOS_RESOLUTION, - float debugMode, - float debugModeParameter, - int * deviceSAFT_VARIANT - ) - { - - #ifdef debug_OutputFunctions - printf( "==> precalculateAscanIndexKernel_usePathsKernel_SOS - Start\n"); - #endif - - float3 SosVoxelFloat; - SosVoxelFloat.x = (float)threadIdx.x; // SoS-Voxel X Threads fangen an bei 0 an - SosVoxelFloat.y = (float)blockIdx.x; // SoS-Voxel Y Threads fangen an bei 0 an - SosVoxelFloat.z = (float)(blockIdx.y + currentSpeedOfSoundZLayer); // SoS-Voxel Z Threads fangen an bei 0 an - - #ifdef debug_CudaPrecalculateAscanIndexKernel - //printf(" SosVoxel.x,y,z = [%f %f %f]\n", SosVoxel.x, SosVoxel.y, SosVoxel.z); // Herausfinden welche berechnet werden - - //if ((threadIdx.x == DebugSosVoxelX) && (blockIdx.y == DebugSosVoxelY) && ((blockIdx.y + currentSpeedOfSoundZLayer) == DebugSosVoxelZ)) - //{ - - // int threadCountAll = gridDim.z * gridDim.x * blockDim.x; // = Anzahl aller Threads X*Y*Z - // int threadIndex = blockDim.x * (blockIdx.y * gridDim.x + blockIdx.x) + threadIdx.x; - - // printf("=========================== GPU: precalculateAscanIndexKernel_usePathsKernel ===================================\n"); - // printf(" threadCountAll = %i\n", threadCountAll); // Anzahl aller Threads //Brauche ich wahrscheinlich gar nicht. - // printf(" threadIndex = %i\n", threadIndex); // Threadindex von aktuellem Kernel - // printf(" SosVoxel.x,y,z = [%f %f %f]\n", SosVoxelFloat.x, SosVoxelFloat.y, SosVoxelFloat.z); // In welchem SoS-Voxel befinde ich mich? - //printf(" emitter_list_Size = %i\n", emitter_list_Size); // Wie viele Elemente gibt es in der EmitterListe? - //printf(" receiver_list_Size = %i\n", receiver_list_Size); // Wie viele Elemente gibt es in der ReceiverListe? - //printf("=================================================================================================\n"); - //} - - //if ((threadIdx.x == 25) && ((blockIdx.y + currentSpeedOfSoundZLayer) == 11)) - //if (((blockIdx.y + currentSpeedOfSoundZLayer) == 11)) - // { - // //printf(" SosVoxel.x,y,z = [%30.25f %30.25f %30.25f]\n", (float)threadIdx.x, (float)blockIdx.x, (float)(blockIdx.y + currentSpeedOfSoundZLayer)); // In welchem SoS-Voxel befinde ich mich? - // //printf(" SosVoxel.x,y,z = [%i %i %i]\n\n", (int)(float)threadIdx.x, (int)(float)blockIdx.x, (int)(float)(blockIdx.y + currentSpeedOfSoundZLayer)); // In welchem SoS-Voxel befinde ich mich? - // - // //printf(" SosVoxel.x,y,z = [%30.25f %30.25f %30.25f] = [%i %i %i]\n\n", (float)threadIdx.x, (float)blockIdx.x, (float)(blockIdx.y + currentSpeedOfSoundZLayer, (int)(float)threadIdx.x, (int)(float)blockIdx.x, (int)(float)(blockIdx.y + currentSpeedOfSoundZLayer))); // In welchem SoS-Voxel befinde ich mich? - // //printf(" SosVoxel.x,y,z = [%30.25f %30.25f %30.25f] = [%i %i %i]\n\n", (float)threadIdx.x, (float)blockIdx.x, (float)(blockIdx.y + currentSpeedOfSoundZLayer), (int)(float)threadIdx.x, (int)(float)blockIdx.x, (int)(float)(blockIdx.y + currentSpeedOfSoundZLayer)); // In welchem SoS-Voxel befinde ich mich? - // printf(" SosVoxel.x,y,z = [%30.25f] = [%i] = %30.25f\n", (float)threadIdx.x, (int)(float)threadIdx.x, (float)threadIdx.x-(float)(int)(float)threadIdx.x); // In welchem SoS-Voxel befinde ich mich? - // } - - // if ((SosVoxel.x == DebugSosVoxelX) && (SosVoxel.y == DebugSosVoxelY) && (SosVoxel.z == DebugSosVoxelZ)) - // { - // printf(" precalculateAscanIndexKernel_usePathsKernel: debugMode [%i] for geometry[%i]\n", debugMode, geometry); - // } - - #endif - -// float3 SosEmitterVoxelFloat; // SoSVoxel von Emitter in Float -// float3 SosReceiverVoxelFloat; // SoSVoxel von Receiver in Float -// float SOS_RESOLUTION_FACTOR = 1 / SOS_RESOLUTION; // Aufloesung im SoS-Grid -// //int tableIndex; // Index innerhalb TableVoxelToEmitter/ReceiverPath - - // Speicher in Texturformat - int i_x = (int)floor(SosVoxelFloat.x); - int i_y = (int)floor(SosVoxelFloat.y); - int i_z = (int)(SosVoxelFloat.z-currentSpeedOfSoundZLayer); // float SosVoxelTextureZ = (SosVoxelf.z - speedOfSoundZLayer); - //int Index; - int TexturGeometryIndexZ; - - float3 voxelPosition; - voxelPosition.x = SosVoxelFloat.x*SOS_RESOLUTION+sosOffset.x; //Umrechnung von SOS in m - voxelPosition.y = SosVoxelFloat.y*SOS_RESOLUTION+sosOffset.y; - voxelPosition.z = SosVoxelFloat.z*SOS_RESOLUTION+sosOffset.z; - - float currentAscanIndex = 0.0; // aktueller Ascan, fuer den der AscanIndex abhaengig vom SOS-Voxel berechnet wird - int currentEmitterIndex_minus1 = 0.0; // aktueller Emitter - int oldEmitterIndex = 65535; // letzter Emitter - int currentReceiverIndex_minus1 = 0.0; // aktueller Receiver - - float3 currentEmitterGeometry; // Coordinates for current Emitter - float3 currentReceiverGeometry; // Coordinates for current Receiver - float emitterDistance; - float receiverDistance; - float totalDistance; - - unsigned short - lookUpEmitterIndex, // Index of Emitter in lookUp table gives the position in texture memory - lookUpReceiverIndex; // Index of Receiver in lookUp table gives the position in texture memory - int currentRecTextureIndex; // Index of ReceiverTexture due to division in several textures due to HW limitation - - float TexturIndexX = SosVoxelFloat.x+ 0.5f; //Index for access to Texturmemory - float TexturIndexY = SosVoxelFloat.y+ 0.5f; //Index for access to Texturmemory - float TexturIndexZEmitter = 0.0f; //Index for access to Texturmemory - float TexturIndexZReceiver = 0.0f; //Index for access to Texturmemory - - // Z offset inside precalculated SOS paths // TODO: - IMAGE_RESOLUTION/4 hier eingefuegt, da ws Rundungsfeher und ich auf 0.0 wie auch 1.0 komme. Da 1.0 nicht definiert fuer optimized Fehler in Berechnung! - float SosVoxelTextureZ = (SosVoxelFloat.z - (float)currentSpeedOfSoundZLayer); - float SosVoxelTextureZnotInterpolated = (SosVoxelFloat.z - (float)currentSpeedOfSoundZLayer); // + 0.5f - - - #ifdef SaftTextureForEmRecSosPathsTablesFloat2 - float2 VoxelSosAttValues; // SOS Values for one Voxel for Texture reading - #endif - #ifdef SaftTextureForEmRecSosPathsTablesFloat4 - float4 VoxelSosAttValues; // SOS and ATT Values for one Voxel for Texture reading - #endif - - // SOS-Correction and ATT-Correction - float emitterVoxelVoxelCount = 0.0; - float receiverVoxelVoxelCount = 0.0; - float emitterReceiverTotalVoxelCount = 0.0; // Wird zwei mal genutz, Eventuell wieder rausnehmen. - // SOS - float emitterVoxelaverageSpeedSum = 0.0; - float receiverVoxelaverageSpeedSum = 0.0; - float averageSpeed = 0.0; - float sumAverageSpeed = 0.0; - - // Für Ascan-Index benoetigt man mehrere Texturen fuer jeweils 2 Z-Layer. 2*N < maxSurfaceTexture3DDimension(Fermi: 2048; Kepler: 2048) ==> (Fermi: 1024 ; Kepler: 1024 Em/Rec - Kombinationen) - // maxSurfaceTexture3DDimension = maximale Groesse die erlaubt ist - // TableAscanIndexAllocationCount = Anzahl der Teiltabellen ==> auch Anzahl der benoetigten Durchlaeufe - // maxFeasibleSosZLayerCount = Anzahl der SoS-Zlayer die gleichzeitig im Speicher pro EM/REC-Kombi vorgehalten werden (1 oder 2 bei Interpolierten Variante) - // maxAscanIndexArraysInTexture = Anzahl der Ascans in einer Teiltabelle - - // Gehe #Ascans durch und lade Em/Rec-Index - // Abhangig davon lade Koordinaten - // --> Bestimme SOS - // --> Berechne AscanIndex - #if defined(debug_CudaPrecalculateAscanIndexKernel) - if ((SosVoxelFloat.x == DebugSosVoxelX) && (SosVoxelFloat.y == DebugSosVoxelY) && (SosVoxelFloat.z == DebugSosVoxelZ)) - { - printf("------------ Run over ascanIndex_i (1:(aScanWindowSize (%i)) ; ascanIndexBatchOffset (%i)) currentSpeedOfSoundZLayer (%i) maxAscanIndexArraysInTexture (%i)--------------------\n", aScanWindowSize, ascanIndexBatchOffset, currentSpeedOfSoundZLayer, maxAscanIndexArraysInTexture); - } - #endif - //return; - - for(int ascanIndex_i = 0; ascanIndex_i < aScanWindowSize; ascanIndex_i++) // Alle Emitter oder Receiver in der Liste von Matlab durchgehen - { - - #if defined(debug_CudaPrecalculateAscanIndexKernel) - // if ((SosVoxelFloat.x == DebugSosVoxelX) && (SosVoxelFloat.y == DebugSosVoxelY) && (SosVoxelFloat.z == DebugSosVoxelZ)) - // { - // printf("--------------------- ascanIndex_i (%i) - (ascanIndexOffset + ascanIndex_i) (%i) ---------------------------------------\n", ascanIndex_i, (ascanIndexOffset + ascanIndex_i)); - // // printf(" >>>> Precalc: Em (%i) Rec(%i)\n >>>> Em[%+3.3f %+3.3f %+3.3f] : SOSVoxel[%+3.3f %+3.3f %+3.3f]\n", currentEmIndexUsedForAscanIndexCalculation, SosEmitterVoxelFloat.x, SosEmitterVoxelFloat.y, SosEmitterVoxelFloat.z, SosVoxel.x,SosVoxel.y,SosVoxel.z); // In welche Speicherstelle wird geschrieben - //// printf(" SOSGrid_XYZ.x,y,z = [%i %i %i]\n", SOSGrid_XYZ.x, SOSGrid_XYZ.y, SOSGrid_XYZ.z); // In welchem SoS-Voxel befinde ich mich? - //// - //// printf(" emitter_list_Size = %i\n", emitter_list_Size); // Wie viele Elemente gibt es in der EmitterListe? - //// printf(" receiver_list_Size = %i\n", receiver_list_Size); // Wie viele Elemente gibt es in der ReceiverListe? - //// printf("-------------------------------------------------------------------\n"); - //// printf(" SosVoxel.x,y,z = [%f %f %f]\n", SosVoxelFloat.x, SosVoxelFloat.y, SosVoxelFloat.z); // In welchem SoS-Voxel befinde ich mich? - //// printf(" currentEmitterGeometry = [%f %f %f]\n", currentEmitterGeometry.x, currentEmitterGeometry.y, currentEmitterGeometry.z); // In welchem SoS-Voxel befinde ich mich? - //// printf(" SosEmitterVoxelFloat = [%f %f %f]\n", SosEmitterVoxelFloat.x, SosEmitterVoxelFloat.y, SosEmitterVoxelFloat.z); // In welchem SoS-Voxel befinde ich mich? - //// printf(" curEmIdxUsdForAscanIdxCalc = %i\n", currentEmIndexUsedForAscanIndexCalculation); // Welches Elemente ist aktuelle in emitterListe? - //// //printf(" receiverIndex_i = %i\n", receiverIndex_i); // Welches Element ist aktuelles in receiverListe? - //// printf(" lookUpGeometryEmitterIndex = %i\n", lookUpGeometryEmitterIndex); // zLayer Offset, welcher wird zur Zeit berechnet? - //// //printf("-------------------------------------------------------------------\n"); - //// //printf(" speedOfSoundSumOutput_Index= %i\n", SOSGrid_XYZ.x*(SOSGrid_XYZ.y*SOSGrid_XYZ.y*geometryIndexCounter+SOSGrid_XYZ.y*(SosVoxel.z-currentSpeedOfSoundZLayer)+SosVoxel.y)+SosVoxel.x); // In welche Speicherstelle wird geschrieben - //// //printf(" averageSpeed = %f\n", averageSpeed); // Berechnete Geschwindigkeit - //// //printf(" write i_x,i_y,i_z = [%i %i %i]\n", i_x, i_y, i_z); // In welchem SoS-Voxel schreibe ich? - // printf("==================================================================\n\n"); - } - #endif - - - // 1. Ascans durchgehen - DONE - // 1.1 Emitter und Receiver PfadeID laden, Position laden und Abstand berechnen - DONE - // 1.2 Use Emitter-Cache -DONE - // 1.3 SOS und Attenuation aus Pfadvorberechnung laden - - // 2. Ascan-Index mit SAFT berechnen - DONE - // 2.1 Ascan-Index in Surface speichern - DONE - - - // In Index-Listen stehen die Indexe von Em/Rec pro Ascan - // In blockedReceiver/SenderList stehen die 3D-Koordinaten in jeweiliger Anzahl - - // lade aktuellen EmitterIdx&ReceiverIdx aus AscanBlockliste - // --------------------------------------------------------------- - currentEmitterIndex_minus1 = deviceEmitterIndex_block [ascanIndexBatchOffset + ascanIndex_i] -1; // Load EmitterIndex from Constant Memory fuer Geometrie,... -1 da Matlab mit 1 startet - currentReceiverIndex_minus1 = deviceReceiverIndex_block [ascanIndexBatchOffset + ascanIndex_i] -1; // Load ReceiverIndex from Constant Memory fuer Geometrie,... - lookUpEmitterIndex = lookUpGeometryMemoryListEmitter [currentEmitterIndex_minus1]; // Load EmitterLookUp Index from Constant Memory fuer SOS Pfade. Gibt an wo SOS Pfade im Texturspeicher liegen - lookUpReceiverIndex = lookUpGeometryMemoryListReceiver[currentReceiverIndex_minus1]; // Load ReceiverLookUp Index from Constant Memory fuer SOS Pfade. Gibt an wo SOS Pfade im Texturspeicher liegen - - // Berechne die mittlere SOS und ATT fuer diesen Pfad - // Nutzen der Bresenham-Floatvariante mit Texturmemory und Interpolation - // --------------------------------------------------------------- - - if (currentEmitterIndex_minus1 != oldEmitterIndex){ // Nur wenn neuer Emitter geladen wird neue Emitter-Koordinaten laden und Abstand berechnen - - - currentEmitterGeometry = emitterPOSharmon[currentEmitterIndex_minus1]; // Use Constant Memory - emitterDistance = sqrtf( SQR(voxelPosition.x-currentEmitterGeometry.x) + SQR(voxelPosition.y-currentEmitterGeometry.y) + SQR(voxelPosition.z-currentEmitterGeometry.z) ); - // Bestimmen der SoS-Koordinaten fuer die Sender/Empfuenger-Koordinaten - // determineSpeedOfSoundFieldVoxelFloat(currentEmitterGeometry, SosEmitterVoxelFloat , sosOffset, SOS_RESOLUTION_FACTOR); // SoSVoxel von E/R bestimmen // currentGeometry --> SosGeometryVoxel Float - - // EmitterIndex in Z-Richtung fuer Zugriff auf Textur - #ifndef SOS_Version2 - TexturIndexZEmitter = maxFeasibleSosZLayerCount * (lookUpEmitterIndex) + SosVoxelTextureZ; // Z-Index fuer Zugriff auf Textur - #else - TexturIndexZEmitter = maxFeasibleSosZLayerCount * (lookUpEmitterIndex) + SosVoxelTextureZnotInterpolated + 0.5f; - #endif - - // mittlere Schallgeschwindigkeit fuer Emitter->Voxel Pfad ermitteln - // ==================================== - - #ifdef SaftTextureForEmRecSosPathsTablesFloat1 - // Count - Emitter ---------------------------------------- - emitterVoxelVoxelCount = tex3D( texTableVoxelToEmitterPathCount, TexturIndexX, TexturIndexY, TexturIndexZEmitter); - - // SpeedSum - Emitter ---------------------------------------- - emitterVoxelaverageSpeedSum = tex3D( texTableVoxelToEmitterPathSosSum, TexturIndexX, TexturIndexY, TexturIndexZEmitter); - #endif - #ifdef SaftTextureForEmRecSosPathsTablesFloat2 - // SpeedSum + Count - Emitter ---------------------------------------- - //float2 VoxelSosAttValues; - VoxelSosAttValues = tex3D( texTableVoxelToEmitterPathSosBoth_preprocess, TexturIndexX, TexturIndexY, TexturIndexZEmitter); - emitterVoxelaverageSpeedSum = VoxelSosAttValues.x; - emitterVoxelVoxelCount = VoxelSosAttValues.y; - #endif - #ifdef SaftTextureForEmRecSosPathsTablesFloat4 - // SpeedSum + Count + Attenuation - Emitter ---------------------------------------- - // -------------------------------------------------------- - VoxelSosAttValues = tex3D( texTableVoxelToEmitterPathSosBoth_preprocess, TexturIndexX, TexturIndexY, TexturIndexZEmitter); - emitterVoxelaverageSpeedSum = VoxelSosAttValues.x; - emitterVoxelVoxelCount = VoxelSosAttValues.y; - #endif - - #ifdef debug_CudaSAFTKernel - //printf(" SosVoxelf [%3.12f %3.12f %3.12f]\n", SosVoxelf.x, SosVoxelf.y, SosVoxelf.z); - if ((regionOfInterestVoxel.x == OutputVolumeX) && (regionOfInterestVoxel.y == OutputVolumeY) && (regionOfInterestVoxel.z == OutputVolumeZ)) - { - if ((SosVoxel.x == DebugSoSVoxelX) && (SosVoxel.y == DebugSoSVoxelY) && (SosVoxel.z == DebugSoSVoxelZ)){ - //printf(" SosVoxelf [%3.12f %3.12f %3.12f] - SosVoxel [%3i %3i %3i] - regionOfInterestVoxel [%3i %3i %3i]\n", SosVoxelf.x, SosVoxelf.y, SosVoxelf.z, SosVoxel.x, SosVoxel.y, SosVoxel.z, regionOfInterestVoxel.x, regionOfInterestVoxel.y, regionOfInterestVoxel.z); - float xmax = SOSGrid_XYZ.x; - float ymax = SOSGrid_XYZ.y; - float zmax = (float)maxFeasibleSosZLayerCount; - float i_x = SosVoxel.x; - float i_y = SosVoxel.y; - float i_z = (float)(int)(SosVoxelTextureZ); - - int Index = xmax*(ymax*(zmax*currentEmitterIndex-1+i_z)+i_y)+i_x; // currentEmitterIndex-1 da Matlab bei 1 anfaengt und wir deshlab -0,5 machen - printf(">>>> %i >>>> Kernel: currentEmitterIndex(%3i) & SoSVoxel[%+3.2f %+3.2f %+3.2f] speedOfSoundZLayer(%3i) - regionOfInterestVoxel [%3i %3i %3i]\n>>>>>>>>>>>> emitterVoxelVoxelCount(%3.3f) emitterVoxelaverageSpeedSum(%3.3f) Index = %i\n", 0, currentEmitterIndex, SosVoxelf.x, SosVoxelf.y, SosVoxelf.z, speedOfSoundZLayer, regionOfInterestVoxel.x, regionOfInterestVoxel.y, regionOfInterestVoxel.z, emitterVoxelVoxelCount, emitterVoxelaverageSpeedSum, Index); // In welche Speicherstelle wird geschrieben - } - } - #endif - - - } - - currentReceiverGeometry = receiverPOSharmon[currentReceiverIndex_minus1]; // Use Constant Memory - receiverDistance = (float)sqrtf( SQR((double)voxelPosition.x-(double)currentReceiverGeometry.x) + SQR((double)voxelPosition.y-(double)currentReceiverGeometry.y) + SQR((double)voxelPosition.z-(double)currentReceiverGeometry.z) ); - - // Bestimmen der SoS-Koordinaten fuer die Sender/Empfuenger-Koordinaten - // determineSpeedOfSoundFieldVoxelFloat(currentReceiverGeometry, SosReceiverVoxelFloat, sosOffset, SOS_RESOLUTION_FACTOR); // SoSVoxel von E/R bestimmen // currentGeometry --> SosGeometryVoxel Float - - - // Distanz Emitter -> Voxel -> Receiver berechnen - // ==================================== - totalDistance = emitterDistance + receiverDistance; // Gesamt-Abstand - - - // mittlere Schallgeschwindigkeit fuer Emitter->Voxel Pfad ermitteln - // ==================================== - // Determine number of current used texture memory for this receiver - currentRecTextureIndex = (int)floor((float)lookUpReceiverIndex / (float)maxSoSReceiverArrayForTexture); - - - // ReceiverIndex in Z-Richtung fuer Zugriff auf Textur - #ifndef SOS_Version2 - TexturIndexZReceiver = maxFeasibleSosZLayerCount * ((lookUpReceiverIndex) % maxSoSReceiverArrayForTexture) + SosVoxelTextureZ; // Z-Index fuer Zugriff auf Textur - #else - TexturIndexZReceiver = maxFeasibleSosZLayerCount * ((lookUpReceiverIndex) % maxSoSReceiverArrayForTexture) + SosVoxelTextureZnotInterpolated + 0.5f; // Z-Index fuer Zugriff auf Textur - //TexturIndexZReceiver = maxFeasibleSosZLayerCount * ((lookUpReceiverIndex) % maxSoSReceiverArrayForTexture) + SosVoxelTextureZ; // Z-Index fuer Zugriff auf Textur - #endif - - // mittlere Schallgeschwindigkeit fuer Voxel->Receiver Pfad ermitteln - // ==================================== - #ifdef SaftTextureForEmRecSosPathsTablesFloat1 - // Count - Receiver ---------------------------------------- - if ( currentRecTextureIndex == 0){ - receiverVoxelVoxelCount = tex3D( texTableVoxelToReceiverPathCount0, TexturIndexX, TexturIndexY, TexturIndexZReceiver); } - else if ( currentRecTextureIndex == 1) { - receiverVoxelVoxelCount = tex3D( texTableVoxelToReceiverPathCount1, TexturIndexX, TexturIndexY, TexturIndexZReceiver); } - else if ( currentRecTextureIndex == 2){ - receiverVoxelVoxelCount = tex3D( texTableVoxelToReceiverPathCount2, TexturIndexX, TexturIndexY, TexturIndexZReceiver); } - - // SpeedSum - Receiver ---------------------------------------- - if ( currentRecTextureIndex == 0){ - receiverVoxelaverageSpeedSum = tex3D( texTableVoxelToReceiverPathSosSum0, TexturIndexX, TexturIndexY, TexturIndexZReceiver); } - else if ( currentRecTextureIndex == 1) { - receiverVoxelaverageSpeedSum = tex3D( texTableVoxelToReceiverPathSosSum1, TexturIndexX, TexturIndexY, TexturIndexZReceiver); } - else if ( currentRecTextureIndex == 2){ - receiverVoxelaverageSpeedSum = tex3D( texTableVoxelToReceiverPathSosSum2, TexturIndexX, TexturIndexY, TexturIndexZReceiver); } - #endif - #ifdef SaftTextureForEmRecSosPathsTablesFloat2 - // SpeedSum + Count - Receiver ---------------------------------------- - //float2 VoxelSosAttValues; - if ( currentRecTextureIndex == 0){ - VoxelSosAttValues = tex3D( texTableVoxelToReceiverPathSosBoth0_preprocess, TexturIndexX, TexturIndexY, TexturIndexZReceiver); } - else if ( currentRecTextureIndex == 1) { - VoxelSosAttValues = tex3D( texTableVoxelToReceiverPathSosBoth1_preprocess, TexturIndexX, TexturIndexY, TexturIndexZReceiver); } - else if ( currentRecTextureIndex == 2){ - VoxelSosAttValues = tex3D( texTableVoxelToReceiverPathSosBoth2_preprocess, TexturIndexX, TexturIndexY, TexturIndexZReceiver); } - - receiverVoxelVoxelCount = VoxelSosAttValues.y; - receiverVoxelaverageSpeedSum = VoxelSosAttValues.x; - #endif - #ifdef SaftTextureForEmRecSosPathsTablesFloat4 - // SpeedSum + Count - Receiver ---------------------------------------- - //float4 VoxelValues; - if ( currentRecTextureIndex == 0){ - VoxelSosAttValues = tex3D( texTableVoxelToReceiverPathSosBoth0_preprocess, TexturIndexX, TexturIndexY, TexturIndexZReceiver); } - else if ( currentRecTextureIndex == 1) { - VoxelSosAttValues = tex3D( texTableVoxelToReceiverPathSosBoth1_preprocess, TexturIndexX, TexturIndexY, TexturIndexZReceiver); } - else if ( currentRecTextureIndex == 2){ - VoxelSosAttValues = tex3D( texTableVoxelToReceiverPathSosBoth2_preprocess, TexturIndexX, TexturIndexY, TexturIndexZReceiver); } - - receiverVoxelaverageSpeedSum = VoxelSosAttValues.x; - receiverVoxelVoxelCount = VoxelSosAttValues.y; - // receiverVoxelVoxelCount VoxelSosAttValues.w; // only float 3 needed - #endif - - emitterReceiverTotalVoxelCount = (emitterVoxelVoxelCount+receiverVoxelVoxelCount); //kleine Optimierung? - - // Calculate harmonic mean of both paths Emitter/Voxel/Receiver - // ======================================================================= - averageSpeed = emitterReceiverTotalVoxelCount/(emitterVoxelaverageSpeedSum+receiverVoxelaverageSpeedSum); // harmonic mean - - //////////////////////////////////////// SAFT-Calculation to determine Ascan-Index /////////////////////////////////////////////////////////////// - - // Für Ascan-Index benoetigt man mehrere Texturen fuer jeweils 2 Z-Layer. 2*N < maxSurfaceTexture3DDimension(Fermi: 2048; Kepler: 2048) ==> (Fermi: 2048 ; Kepler: 2048 Em/Rec - Kombinationen) - // maxSurfaceTexture3DDimension = maximale Groesse die erlaubt ist - // TableAscanIndexAllocationCount = Anzahl der Teiltabellen ==> auch Anzahl der benoetigten Durchlaeufe - // maxFeasibleSosZLayerCount = Anzahl der SoS-Zlayer die gleichzeitig im Speicher pro EM/REC-Kombi vorgehalten werden (1 oder 2 bei Interpolierten Variante) - // maxAscanIndexArraysInTexture = Anzahl der Ascans in einer Teiltabelle - - // outSurfRefAscanIndexFloat0..2 = Surface fuer die jeweiligen Ascans - - // totalDistance = emitterDistance + receiverDistance; // Gesamt-Abstand - // averageSpeed = deviceSoSData_block [currentAScanIndex]; - // sampleTime = totalDistance / (sampleRate*averageSpeed); // Sample Time bestimmen mit entsprechenden SoSDaten - - //currentAscanIndex = (emitterDistance + receiverDistance)/(1e-7*averageSpeed); - currentAscanIndex = (double)(emitterDistance + receiverDistance)/((double)1e-7*(double)averageSpeed); - - // Save AscanIndex in Texture - // Calculate the Z-Index for storing the AscanIndex value - TexturGeometryIndexZ = maxFeasibleSosZLayerCount * ((ascanIndex_i) % maxAscanIndexArraysInTexture) + i_z; - - #if defined(debug_CudaPrecalculateKernel) || defined(debug_OutputSOSPaths) - //if ((SosVoxel.x == DebugSosVoxelX) && (SosVoxel.y == DebugSosVoxelY) && (SosVoxel.z == DebugSosVoxelZ)) - // printf(">>>> %i >>>> Precalc: geomIdxCounter(%4i):[%+3.4f %+3.4f %+3.4f] - SOSVoxel [%3i %3i %3i] currentSpeedOfSoundZLayer(%i) ==> TexturNr.[%3i], TexturGeometryIndexZ(%3i), lookUpGeometryIndex(%4i)\n", geometry, geometryIndexCounter, currentGeometry.x, currentGeometry.y, currentGeometry.z, SosVoxel.x,SosVoxel.y,SosVoxel.z, currentSpeedOfSoundZLayer, (int)floor((float)lookUpGeometryIndex / (float)maxSoSReceiverArrayForTexture) , TexturGeometryIndexZ, lookUpGeometryIndex); // In welche Speicherstelle wird geschrieben - #endif - - // Write in the AscanIndex value for all Receiver in corresponding memory adress depending on SOS-Voxel, and Z-Layer - // floor(ascanIndex_i / maxAscanIndexArraysInTexture) gives the surface if more then one is used - int ascanIndexTexture_Nr = (int)floor((float)ascanIndex_i / (float)maxAscanIndexArraysInTexture); - - if ( ascanIndexTexture_Nr == 0){ - surf3Dwrite((float)currentAscanIndex, outSurfRefAscanIndexFloat0, i_x*sizeof(float), i_y, TexturGeometryIndexZ ); // Direkt in CUDA Array schreiben: TableVoxelToEmitterPathSosSum - } - else if ( ascanIndexTexture_Nr == 1) { - surf3Dwrite((float)currentAscanIndex, outSurfRefAscanIndexFloat1, i_x*sizeof(float), i_y, TexturGeometryIndexZ ); // Direkt in CUDA Array schreiben: TableVoxelToEmitterPathSosSum - } - else if ( ascanIndexTexture_Nr == 2){ - surf3Dwrite((float)currentAscanIndex, outSurfRefAscanIndexFloat2, i_x*sizeof(float), i_y, TexturGeometryIndexZ ); // Direkt in CUDA Array schreiben: TableVoxelToEmitterPathSosSum - } - else if ( ascanIndexTexture_Nr == 3){ - surf3Dwrite((float)currentAscanIndex, outSurfRefAscanIndexFloat3, i_x*sizeof(float), i_y, TexturGeometryIndexZ ); // Direkt in CUDA Array schreiben: TableVoxelToEmitterPathSosSum - } - - // Debug Output at the end - #if defined(debug_CudaPrecalculateAscanIndexKernel) -// //printf(" SosVoxel.x,y,z = [%i %i %i]\n", SosVoxel.x, SosVoxel.y, SosVoxel.z); // Herausfinden welche berechnet werden -// -// if ((SosVoxelFloat.x == DebugSosVoxelX) && (SosVoxelFloat.y == DebugSosVoxelY) && (SosVoxelFloat.z == (DebugSosVoxelZ)) && ((ascanIndex_i <= 10) || (ascanIndex_i >= 4090))) // Anfang -// //if ((SosVoxelFloat.x == DebugSosVoxelX) && (SosVoxelFloat.y == DebugSosVoxelY) && (SosVoxelFloat.z == DebugSosVoxelZ) && (ascanIndex_i >= 1020) && (ascanIndex_i <= 1030)) // Mitte -// //if ((SosVoxelFloat.x == DebugSosVoxelX) && (SosVoxelFloat.y == DebugSosVoxelY) && (SosVoxelFloat.z == DebugSosVoxelZ) && (ascanIndex_i >= 1400)) // Ende -// //if ((SosVoxelFloat.x == DebugSosVoxelX) && (SosVoxelFloat.y == DebugSosVoxelY) && (SosVoxelFloat.z == DebugSosVoxelZ) && (ascanIndex_i >= 2800)) // Ende -// { -// // Pracalculate AscanIndex -// printf(">>>> Precalc: Ascan(%i) : Em (%i) Re (%i) >>>> Em[%+3.3f %+3.3f %+3.3f] : SOSVoxel[%+3.3f %+3.3f %+3.3f] : Rec[%+3.3f %+3.3f %+3.3f]\n" -// " >>>> averageSpeed(%3.3f), totalAttenuation_multFactor(%3.3f) >>>> currentAscanIndex(%3.3f)\n" -// " >>>>>>>> surf3Dwrite Textur(%i)[%3i %3i %3i], TexturGeometryIndexZ(%3i) >>>> currentAscanIndex(%3.3f)\n", -// ascanIndex_i, currentEmitterIndex_minus1, currentReceiverIndex_minus1, -// currentEmitterGeometry.x, currentEmitterGeometry.y, currentEmitterGeometry.z, -// SosVoxelFloat.x, SosVoxelFloat.y, SosVoxelFloat.z, -// currentReceiverGeometry.x, currentReceiverGeometry.y, currentReceiverGeometry.z, -// averageSpeed, totalAttenuation_multFactor, currentAscanIndex, -// ascanIndexTexture_Nr, i_x, i_y, i_z, TexturGeometryIndexZ, currentAscanIndex); -// } - #endif - - - } // Ascan_i-loop - - #ifdef debug_OutputFunctions - printf( "<== SAFTHandler::precalculateAscanIndexKernel_usePathsKernel - End\n"); - #endif - - } - - - - - - __global__ void precalculateAscanIndex_usePathsKernel_SOS_ATT( - int ascanIndexBatchOffset, ///< Offset of AscanIndex batch (bei mehreren Aufrufen) - int aScanWindowSize, ///< gibt Anzahl der Ascans ein - #ifndef SaftTextureForBresenhamSosPaths - float const * deviceSpeedOfSoundField, ///< Array of speed of sound samples. Dimensions ordered by speed of indices, commencing with the fastest moving one: 1. x 2. y 3. z - #else - - #ifdef SaftUseSosAttFloat1 // Nutze getrennte Texturen fuer beide Volumen (Sos+Att) - cudaArray *deviceSpeedOfSoundFieldCuArray, ///< CuArray fuer SOSFieldTextur - #endif - #ifdef SaftUseSosAttFloat2 // Nutze nur eine Textur fuer beide Volumen (Sos+Att) - cudaArray *deviceSosAttFieldCuArray, ///< CuArray fuer SosAttFieldTextur - #endif - #endif - int currentSpeedOfSoundZLayer, ///< First z-layer in the speed of sound grid the pre-calculation is performed for. - int maxFeasibleSosZLayerCount, ///< Number of z-layers in the speed of sound grid the pre-calculation is performed for. - int maxSoSReceiverArrayForTexture, - - unsigned short const * deviceEmitterIndex_block, ///< Speicheradresse fuer EmitterIndexdaten im AscanBlock - unsigned short const * deviceReceiverIndex_block, ///< Speicheradresse fuer ReceiverIndexdaten im AscanBlock - - int TableAscanIndexAllocationCount, ///< Anzahl der benoetigten AscanBlocks der Groesse 2048/4096 - int maxAscanIndexArraysInTexture, ///< maximale Anzahl an Em/Rec(Ascans) in einer Teiltabelle /CUDA Array - - cudaArray **deviceTextureAscanIndexFloatCuArray, - - int3 SOSGrid_XYZ, - float3 sosOffset, - float3 regionOfInterestOffset, - float IMAGE_RESOLUTION, - float SOS_RESOLUTION, - float debugMode, - float debugModeParameter, - int * deviceSAFT_VARIANT - ) - { - - #ifdef debug_OutputFunctions - printf( "==> precalculateAscanIndexKernel_usePathsKernel_SOS_ATT - Start\n"); - #endif - -// float3 SosVoxelFloat; -// SosVoxelFloat.x = (float)threadIdx.x; // SoS-Voxel X ? Threads fangen an bei 0 an -// SosVoxelFloat.y = (float)blockIdx.x; // SoS-Voxel X ? Threads fangen an bei 0 an -// SosVoxelFloat.z = (float)(blockIdx.y + currentSpeedOfSoundZLayer); // SoS-Voxel X ? Threads fangen an bei 0 an - - #ifdef debug_CudaPrecalculateAscanIndexKernel - //printf(" SosVoxel.x,y,z = [%f %f %f]\n", SosVoxel.x, SosVoxel.y, SosVoxel.z); // Herausfinden welche berechnet werden - - //if ((threadIdx.x == DebugSosVoxelX) && (blockIdx.y == DebugSosVoxelY) && ((blockIdx.y + currentSpeedOfSoundZLayer) == DebugSosVoxelZ)) - //{ - - // int threadCountAll = gridDim.z * gridDim.x * blockDim.x; // = Anzahl aller Threads X*Y*Z - // int threadIndex = blockDim.x * (blockIdx.y * gridDim.x + blockIdx.x) + threadIdx.x; - - // printf("=========================== GPU: precalculateAscanIndexKernel_usePathsKernel ===================================\n"); - // printf(" threadCountAll = %i\n", threadCountAll); // Anzahl aller Threads //Brauche ich wahrscheinlich gar nicht. - // printf(" threadIndex = %i\n", threadIndex); // Threadindex von aktuellem Kernel - // printf(" SosVoxel.x,y,z = [%f %f %f]\n", SosVoxelFloat.x, SosVoxelFloat.y, SosVoxelFloat.z); // In welchem SoS-Voxel befinde ich mich? - //printf(" emitter_list_Size = %i\n", emitter_list_Size); // Wie viele Elemente gibt es in der EmitterListe? - //printf(" receiver_list_Size = %i\n", receiver_list_Size); // Wie viele Elemente gibt es in der ReceiverListe? - //printf("=================================================================================================\n"); - //} - - //if ((threadIdx.x == 25) && ((blockIdx.y + currentSpeedOfSoundZLayer) == 11)) - //if (((blockIdx.y + currentSpeedOfSoundZLayer) == 11)) - // { - // //printf(" SosVoxel.x,y,z = [%30.25f %30.25f %30.25f]\n", (float)threadIdx.x, (float)blockIdx.x, (float)(blockIdx.y + currentSpeedOfSoundZLayer)); // In welchem SoS-Voxel befinde ich mich? - // //printf(" SosVoxel.x,y,z = [%i %i %i]\n\n", (int)(float)threadIdx.x, (int)(float)blockIdx.x, (int)(float)(blockIdx.y + currentSpeedOfSoundZLayer)); // In welchem SoS-Voxel befinde ich mich? - // - // //printf(" SosVoxel.x,y,z = [%30.25f %30.25f %30.25f] = [%i %i %i]\n\n", (float)threadIdx.x, (float)blockIdx.x, (float)(blockIdx.y + currentSpeedOfSoundZLayer, (int)(float)threadIdx.x, (int)(float)blockIdx.x, (int)(float)(blockIdx.y + currentSpeedOfSoundZLayer))); // In welchem SoS-Voxel befinde ich mich? - // //printf(" SosVoxel.x,y,z = [%30.25f %30.25f %30.25f] = [%i %i %i]\n\n", (float)threadIdx.x, (float)blockIdx.x, (float)(blockIdx.y + currentSpeedOfSoundZLayer), (int)(float)threadIdx.x, (int)(float)blockIdx.x, (int)(float)(blockIdx.y + currentSpeedOfSoundZLayer)); // In welchem SoS-Voxel befinde ich mich? - // printf(" SosVoxel.x,y,z = [%30.25f] = [%i] = %30.25f\n", (float)threadIdx.x, (int)(float)threadIdx.x, (float)threadIdx.x-(float)(int)(float)threadIdx.x); // In welchem SoS-Voxel befinde ich mich? - // } - - // if ((SosVoxel.x == DebugSosVoxelX) && (SosVoxel.y == DebugSosVoxelY) && (SosVoxel.z == DebugSosVoxelZ)) - // { - // printf(" precalculateAscanIndexKernel_usePathsKernel: debugMode [%i] for geometry[%i]\n", debugMode, geometry); - // } - - #endif - - // Speicher in Texturformat - int i_x = threadIdx.x; // SoS-Voxel X ? Threads fangen an bei 0 an - int i_y = blockIdx.x; // SoS-Voxel Y ? Threads fangen an bei 0 an - int i_z = blockIdx.y; // SoS-Voxel X ? Threads fangen an bei 0 an - int TexturGeometryIndexZ; - - float3 voxelPosition; - voxelPosition.x = (float)i_x*SOS_RESOLUTION+sosOffset.x; //Umrechnung von SOS in m - voxelPosition.y = (float)i_y*SOS_RESOLUTION+sosOffset.y; - voxelPosition.z = (float)(i_z+currentSpeedOfSoundZLayer)*SOS_RESOLUTION+sosOffset.z; - - //float currentAscanIndex = 0.0; // aktueller AscanIndex - float2 currentSOSVoxel_AscanIndexAttValues; - int currentEmitterIndex_minus1 = 0.0; // current EmitterID - int oldEmitterIndex = 65535; // last used EmitterID (check for Caching) - int currentReceiverIndex_minus1 = 0.0; // current ReceiverID - - float3 currentEmitterGeometry; // Coordinates for current Emitter - float3 currentReceiverGeometry; // Coordinates for current Receiver - float emitterDistance; - float receiverDistance; - float totalDistance; - - unsigned short - lookUpEmitterIndex, // Index of Emitter in lookUp table gives the position in texture memory - lookUpReceiverIndex; // Index of Receiver in lookUp table gives the position in texture memory - int currentRecTextureIndex; // Index of ReceiverTexture due to division in several textures due to HW limitation - - float TexturIndexX = (float)i_x + 0.5f; //Index for access to Texturmemory - float TexturIndexY = (float)i_y + 0.5f; //Index for access to Texturmemory - float SosVoxelTextureZ = (float)i_z + 0.5f; //Index for access to Texturmemory - float TexturIndexZEmitter = 0.0f; //Index for access to Texturmemory - float TexturIndexZReceiver = 0.0f; //Index for access to Texturmemory - - - #ifdef SaftTextureForEmRecSosPathsTablesFloat2 - float2 VoxelSosAttValues; // SOS Values for one Voxel for Texture reading - #endif - #ifdef SaftTextureForEmRecSosPathsTablesFloat4 - float4 VoxelSosAttValues; // SOS and ATT Values for one Voxel for Texture reading - #endif - - // SOS-Correction and ATT-Correction - float voxelCount = 0.0; // Anzahl der Voxel auf einem SoS-Pfad - float emitterVoxelVoxelCount = 0.0; - float receiverVoxelVoxelCount = 0.0; - // SOS - float emitterVoxelaverageSpeedSum = 0.0; - float receiverVoxelaverageSpeedSum = 0.0; - float averageSpeed = 0.0; // SoSSumme auf einem SoS-Pfad - // ATT - float emitterVoxelTotalAttenuationSum = 0.0; - float receiverVoxelTotalAttenuationSum = 0.0; - float totalAttenuation_multFactor = 0.0; // AttFaktor auf einem Attenuation-Pfad - - // Für Ascan-Index benoetigt man mehrere Texturen fuer jeweils 2 Z-Layer. 2*N < maxSurfaceTexture3DDimension(Fermi: 2048; Kepler: 2048) ==> (Fermi: 1024 ; Kepler: 1024 Em/Rec - Kombinationen) - // maxSurfaceTexture3DDimension = maximale Groesse die erlaubt ist - // TableAscanIndexAllocationCount = Anzahl der Teiltabellen ==> auch Anzahl der benoetigten Durchlaeufe - // maxFeasibleSosZLayerCount = Anzahl der SoS-Zlayer die gleichzeitig im Speicher pro EM/REC-Kombi vorgehalten werden (1 oder 2 bei Interpolierten Variante) - // maxAscanIndexArraysInTexture = Anzahl der Ascans in einer Teiltabelle - - // Gehe #Ascans durch und lade Em/Rec-Index - // Abhangig davon laden Koordinaten - // --> Bestimme SOS - // --> Berechne AscanIndex - #if defined(debug_CudaPrecalculateAscanIndexKernel) - if ((SosVoxelFloat.x == DebugSosVoxelX) && (SosVoxelFloat.y == DebugSosVoxelY) && (SosVoxelFloat.z == DebugSosVoxelZ)) - { - printf("------------ Run over ascanIndex_i (1:(aScanWindowSize (%i)) ; ascanIndexBatchOffset (%i)) currentSpeedOfSoundZLayer (%i) maxAscanIndexArraysInTexture (%i)--------------------\n", aScanWindowSize, ascanIndexBatchOffset, currentSpeedOfSoundZLayer, maxAscanIndexArraysInTexture); - } - #endif - - for(int ascanIndex_i = 0; ascanIndex_i < aScanWindowSize; ascanIndex_i++) // Alle Emitter oder Receiver in der Liste von Matlab durchgehen - { - - #if defined(debug_CudaPrecalculateAscanIndexKernel) - if ((SosVoxelFloat.x == DebugSosVoxelX) && (SosVoxelFloat.y == DebugSosVoxelY) && (SosVoxelFloat.z == DebugSosVoxelZ)) - { - //printf("--------------------- ascanIndex_i (%i) - (ascanIndexOffset + ascanIndex_i) (%i) ---------------------------------------\n", ascanIndex_i, (ascanIndexOffset + ascanIndex_i)); - // printf(" >>>> Precalc: Em (%i) Rec(%i)\n >>>> Em[%+3.3f %+3.3f %+3.3f] : SOSVoxel[%+3.3f %+3.3f %+3.3f]\n", currentEmIndexUsedForAscanIndexCalculation, SosEmitterVoxelFloat.x, SosEmitterVoxelFloat.y, SosEmitterVoxelFloat.z, SosVoxel.x,SosVoxel.y,SosVoxel.z); // In welche Speicherstelle wird geschrieben - //// printf(" SOSGrid_XYZ.x,y,z = [%i %i %i]\n", SOSGrid_XYZ.x, SOSGrid_XYZ.y, SOSGrid_XYZ.z); // In welchem SoS-Voxel befinde ich mich? - //// - //// printf(" emitter_list_Size = %i\n", emitter_list_Size); // Wie viele Elemente gibt es in der EmitterListe? - //// printf(" receiver_list_Size = %i\n", receiver_list_Size); // Wie viele Elemente gibt es in der ReceiverListe? - //// printf("-------------------------------------------------------------------\n"); - //// printf(" SosVoxel.x,y,z = [%f %f %f]\n", SosVoxelFloat.x, SosVoxelFloat.y, SosVoxelFloat.z); // In welchem SoS-Voxel befinde ich mich? - //// printf(" currentEmitterGeometry = [%f %f %f]\n", currentEmitterGeometry.x, currentEmitterGeometry.y, currentEmitterGeometry.z); // In welchem SoS-Voxel befinde ich mich? - //// printf(" SosEmitterVoxelFloat = [%f %f %f]\n", SosEmitterVoxelFloat.x, SosEmitterVoxelFloat.y, SosEmitterVoxelFloat.z); // In welchem SoS-Voxel befinde ich mich? - //// printf(" curEmIdxUsdForAscanIdxCalc = %i\n", currentEmIndexUsedForAscanIndexCalculation); // Welches Elemente ist aktuelle in emitterListe? - //// //printf(" receiverIndex_i = %i\n", receiverIndex_i); // Welches Element ist aktuelles in receiverListe? - //// printf(" lookUpGeometryEmitterIndex = %i\n", lookUpGeometryEmitterIndex); // zLayer Offset, welcher wird zur Zeit berechnet? - //// //printf("-------------------------------------------------------------------\n"); - //printf(" speedOfSoundSumOutput_Index= %i\n", SOSGrid_XYZ.x*(SOSGrid_XYZ.y*SOSGrid_XYZ.y*geometryIndexCounter+SOSGrid_XYZ.y*(SosVoxel.z-currentSpeedOfSoundZLayer)+SosVoxel.y)+SosVoxel.x); // In welche Speicherstelle wird geschrieben - //printf(" averageSpeed = %f\n", averageSpeed); // Berechnete Geschwindigkeit - //printf(" write i_x,i_y,i_z = [%i %i %i]\n", i_x, i_y, i_z); // In welchem SoS-Voxel schreibe ich? - //printf("==================================================================\n\n"); - } - #endif - - - // 1. Ascans durchgehen - DONE - // 1.1 Emitter und Receiver PfadeID laden, Position laden und Abstand berechnen - DONE - // In Index-Listen stehen die Indexe von Em/Rec pro Ascan - // In blockedReceiver/SenderList stehen die 3D-Koordinaten - // 1.2 Use Emitter-Cache - DONE - // 1.3 SOS und Attenuation aus Pfadvorberechnung laden - DONE - // 2. Ascan-Index mit SAFT berechnen - DONE - // 2.1 Ascan-Index in Surface speichern - DONE - - // load current Emitter&Receiver from Blocklists - // --------------------------------------------------------------- - currentEmitterIndex_minus1 = deviceEmitterIndex_block [ascanIndexBatchOffset + ascanIndex_i] -1; // Load EmitterIndex from Constant Memory fuer Geometrie,... -1 da Matlab mit 1 startet - currentReceiverIndex_minus1 = deviceReceiverIndex_block [ascanIndexBatchOffset + ascanIndex_i] -1; // Load ReceiverIndex from Constant Memory fuer Geometrie,... - lookUpEmitterIndex = lookUpGeometryMemoryListEmitter [currentEmitterIndex_minus1]; // Load EmitterLookUp Index from Constant Memory fuer SOS Pfade. Gibt an wo SOS Pfade im Texturspeicher liegen - lookUpReceiverIndex = lookUpGeometryMemoryListReceiver[currentReceiverIndex_minus1]; // Load ReceiverLookUp Index from Constant Memory fuer SOS Pfade. Gibt an wo SOS Pfade im Texturspeicher liegen - - // Load the precalculated SOS and ATT values for current path - // --------------------------------------------------------------- - - if (currentEmitterIndex_minus1 != oldEmitterIndex){ // Nur wenn neuer Emitter geladen wird neue Emitter-Koordinaten laden und Abstand berechnen - - currentEmitterGeometry = emitterPOSharmon[currentEmitterIndex_minus1]; // Use Constant Memory - emitterDistance = sqrtf( SQR(voxelPosition.x-currentEmitterGeometry.x) + SQR(voxelPosition.y-currentEmitterGeometry.y) + SQR(voxelPosition.z-currentEmitterGeometry.z) ); - - // EmitterIndex in Z-Richtung fuer Zugriff auf Textur - TexturIndexZEmitter = maxFeasibleSosZLayerCount * (lookUpEmitterIndex) + SosVoxelTextureZ; - - - // mittlere Schallgeschwindigkeit fuer Emitter->Voxel Pfad ermitteln - // ==================================== - #ifdef SaftTextureForEmRecSosPathsTablesFloat1 - // Count - Emitter ---------------------------------------- - emitterVoxelVoxelCount = tex3D( texTableVoxelToEmitterPathCount, TexturIndexX, TexturIndexY, TexturIndexZEmitter); - - // SpeedSum - Emitter ---------------------------------------- - emitterVoxelaverageSpeedSum = tex3D( texTableVoxelToEmitterPathSosSum, TexturIndexX, TexturIndexY, TexturIndexZEmitter); - #endif - #ifdef SaftTextureForEmRecSosPathsTablesFloat2 - // SpeedSum + Count - Emitter ---------------------------------------- - //float2 VoxelSosAttValues; - VoxelSosAttValues = tex3D( texTableVoxelToEmitterPathSosBoth_preprocess, TexturIndexX, TexturIndexY, TexturIndexZEmitter); - emitterVoxelaverageSpeedSum = VoxelSosAttValues.x; - emitterVoxelVoxelCount = VoxelSosAttValues.y; - #endif - #ifdef SaftTextureForEmRecSosPathsTablesFloat4 - // SpeedSum + Count + Attenuation - Emitter ---------------------------------------- - VoxelSosAttValues = tex3D( texTableVoxelToEmitterPathSosBoth_preprocess, TexturIndexX, TexturIndexY, TexturIndexZEmitter); - emitterVoxelaverageSpeedSum = VoxelSosAttValues.x; - emitterVoxelVoxelCount = VoxelSosAttValues.y; - emitterVoxelTotalAttenuationSum = VoxelSosAttValues.z; - // emitterVoxelVoxelCount VoxelSosAttValues.w; // only float 3 needed - #endif - - #ifdef debug_CudaSAFTKernel - //printf(" SosVoxelf [%3.12f %3.12f %3.12f]\n", SosVoxelf.x, SosVoxelf.y, SosVoxelf.z); - if ((regionOfInterestVoxel.x == OutputVolumeX) && (regionOfInterestVoxel.y == OutputVolumeY) && (regionOfInterestVoxel.z == OutputVolumeZ)) - { - if ((SosVoxel.x == DebugSoSVoxelX) && (SosVoxel.y == DebugSoSVoxelY) && (SosVoxel.z == DebugSoSVoxelZ)){ - //printf(" SosVoxelf [%3.12f %3.12f %3.12f] - SosVoxel [%3i %3i %3i] - regionOfInterestVoxel [%3i %3i %3i]\n", SosVoxelf.x, SosVoxelf.y, SosVoxelf.z, SosVoxel.x, SosVoxel.y, SosVoxel.z, regionOfInterestVoxel.x, regionOfInterestVoxel.y, regionOfInterestVoxel.z); - float xmax = SOSGrid_XYZ.x; - float ymax = SOSGrid_XYZ.y; - float zmax = (float)maxFeasibleSosZLayerCount; - float i_x = SosVoxel.x; - float i_y = SosVoxel.y; - float i_z = (float)(int)(SosVoxelTextureZ); - - int Index = xmax*(ymax*(zmax*currentEmitterIndex-1+i_z)+i_y)+i_x; // currentEmitterIndex-1 da Matlab bei 1 anfaengt und wir deshlab -0,5 machen - printf(">>>> %i >>>> Kernel: currentEmitterIndex(%3i) & SoSVoxel[%+3.2f %+3.2f %+3.2f] speedOfSoundZLayer(%3i) - regionOfInterestVoxel [%3i %3i %3i]\n>>>>>>>>>>>> emitterVoxelVoxelCount(%3.3f) emitterVoxelaverageSpeedSum(%3.3f) Index = %i\n", 0, currentEmitterIndex, SosVoxelf.x, SosVoxelf.y, SosVoxelf.z, speedOfSoundZLayer, regionOfInterestVoxel.x, regionOfInterestVoxel.y, regionOfInterestVoxel.z, emitterVoxelVoxelCount, emitterVoxelaverageSpeedSum, Index); // In welche Speicherstelle wird geschrieben - } - } - #endif - - - } - - currentReceiverGeometry = receiverPOSharmon[currentReceiverIndex_minus1]; // Use Constant Memory - receiverDistance = sqrtf( SQR(voxelPosition.x-currentReceiverGeometry.x) + SQR(voxelPosition.y-currentReceiverGeometry.y) + SQR(voxelPosition.z-currentReceiverGeometry.z) ); - //receiverDistance = (float)sqrtf( SQR((double)voxelPosition.x-(double)currentReceiverGeometry.x) + SQR((double)voxelPosition.y-(double)currentReceiverGeometry.y) + SQR((double)voxelPosition.z-(double)currentReceiverGeometry.z) ); - - // Distanz Emitter -> Voxel -> Receiver berechnen - // ==================================== - totalDistance = emitterDistance + receiverDistance; // Gesamt-Abstand - - - - // mittlere Schallgeschwindigkeit fuer Emitter->Voxel Pfad ermitteln - // ==================================== - // Determine number of current used texture memory for this receiver - currentRecTextureIndex = (int)floor((float)lookUpReceiverIndex / (float)maxSoSReceiverArrayForTexture); - - - // ReceiverIndex in Z-Richtung fuer Zugriff auf Textur - TexturIndexZReceiver = maxFeasibleSosZLayerCount * ((lookUpReceiverIndex) % maxSoSReceiverArrayForTexture) + SosVoxelTextureZ; // Z-Index fuer Zugriff auf Textur - - - // mittlere Schallgeschwindigkeit fuer Voxel->Receiver Pfad ermitteln - // ==================================== - #ifdef SaftTextureForEmRecSosPathsTablesFloat1 - // Count - Receiver ---------------------------------------- - if ( currentRecTextureIndex == 0){ - receiverVoxelVoxelCount = tex3D( texTableVoxelToReceiverPathCount0, TexturIndexX, TexturIndexY, TexturIndexZReceiver); } - else if ( currentRecTextureIndex == 1) { - receiverVoxelVoxelCount = tex3D( texTableVoxelToReceiverPathCount1, TexturIndexX, TexturIndexY, TexturIndexZReceiver); } - else if ( currentRecTextureIndex == 2){ - receiverVoxelVoxelCount = tex3D( texTableVoxelToReceiverPathCount2, TexturIndexX, TexturIndexY, TexturIndexZReceiver); } - - // SpeedSum - Receiver ---------------------------------------- - if ( currentRecTextureIndex == 0){ - receiverVoxelaverageSpeedSum = tex3D( texTableVoxelToReceiverPathSosSum0, TexturIndexX, TexturIndexY, TexturIndexZReceiver); } - else if ( currentRecTextureIndex == 1) { - receiverVoxelaverageSpeedSum = tex3D( texTableVoxelToReceiverPathSosSum1, TexturIndexX, TexturIndexY, TexturIndexZReceiver); } - else if ( currentRecTextureIndex == 2){ - receiverVoxelaverageSpeedSum = tex3D( texTableVoxelToReceiverPathSosSum2, TexturIndexX, TexturIndexY, TexturIndexZReceiver); } - #endif - #ifdef SaftTextureForEmRecSosPathsTablesFloat2 - // SpeedSum + Count - Receiver ---------------------------------------- - //float2 VoxelSosAttValues; - if ( currentRecTextureIndex == 0){ - VoxelSosAttValues = tex3D( texTableVoxelToReceiverPathSosBoth0_preprocess, TexturIndexX, TexturIndexY, TexturIndexZReceiver); } - else if ( currentRecTextureIndex == 1) { - VoxelSosAttValues = tex3D( texTableVoxelToReceiverPathSosBoth1_preprocess, TexturIndexX, TexturIndexY, TexturIndexZReceiver); } - else if ( currentRecTextureIndex == 2){ - VoxelSosAttValues = tex3D( texTableVoxelToReceiverPathSosBoth2_preprocess, TexturIndexX, TexturIndexY, TexturIndexZReceiver); } - - receiverVoxelVoxelCount = VoxelSosAttValues.y; - receiverVoxelaverageSpeedSum = VoxelSosAttValues.x; - #endif - #ifdef SaftTextureForEmRecSosPathsTablesFloat4 - // SpeedSum + Count + Attenuation- Receiver ---------------------------------------- - if ( currentRecTextureIndex == 0){ - VoxelSosAttValues = tex3D( texTableVoxelToReceiverPathSosBoth0_preprocess, TexturIndexX, TexturIndexY, TexturIndexZReceiver); } - else if ( currentRecTextureIndex == 1) { - VoxelSosAttValues = tex3D( texTableVoxelToReceiverPathSosBoth1_preprocess, TexturIndexX, TexturIndexY, TexturIndexZReceiver); } - else if ( currentRecTextureIndex == 2){ - VoxelSosAttValues = tex3D( texTableVoxelToReceiverPathSosBoth2_preprocess, TexturIndexX, TexturIndexY, TexturIndexZReceiver); } - - receiverVoxelaverageSpeedSum = VoxelSosAttValues.x; - receiverVoxelVoxelCount = VoxelSosAttValues.y; - receiverVoxelTotalAttenuationSum = VoxelSosAttValues.z; - #endif - - //emitterReceiverTotalVoxelCount = (emitterVoxelVoxelCount+receiverVoxelVoxelCount); //kleine Optimierung? - - // Mittlere Schallgeschwindigkeit ueber beide Pfade Emitter/Voxel/Receiver berechnen - // ======================================================================= - averageSpeed = (emitterVoxelVoxelCount+receiverVoxelVoxelCount)/(emitterVoxelaverageSpeedSum+receiverVoxelaverageSpeedSum); // harmonisches Mittel - - - // Calc Attenuation_multFactor with Sum // Calculate 10^(...*Totallength of path *1/20) - totalAttenuation_multFactor = powf( 10,( ((emitterVoxelTotalAttenuationSum + receiverVoxelTotalAttenuationSum)*totalDistance)/(emitterVoxelVoxelCount+receiverVoxelVoxelCount)*0.05 ) ); - - if (totalAttenuation_multFactor > debugModeParameter) { // Max Border for Attenuation Correction - totalAttenuation_multFactor = debugModeParameter; // Average Attenuation on this Path - //printf("over limit\n"); - } - - - #if defined(debug_CudaPrecalculateAscanIndexKernel) -// if ((SosVoxelFloat.x == DebugSosVoxelX) && (SosVoxelFloat.y == DebugSosVoxelY) && (SosVoxelFloat.z == DebugSosVoxelZ)) -// { -// printf("--------------------- ascanIndex_i (%i) ----------------------------------------------\n", ascanIndex_i); -// printf(" >>>> Precalc: Em (%i) Rec(%i) \n" -// " >>>> lookUpEmitterIndex (%i) lookUpReceiverIndex (%i) \n" -// " >>>> SosVoxelTextureZ (%3.6f) \n" -// " >>>> -> TexturIndexX (%3.6f), TexturIndexY (%3.6f) \n" -// " >>>> -> TexturIndexZEmitter (%3.6f), TexturIndexZReceiver (%3.6f)\n\n" -// " >>>> Em[%+3.6f %+3.6f %+3.6f] \n" -// " SOSVoxel[%+3.1f %+3.1f %+3.1f]=[%+3.6f %+3.6f %+3.6f]m \n" -// " Rec[%+3.6f %+3.6f %+3.6f] \n" -// " >>>> \n" -// " >>>> emitterVoxelaverageSpeedSum (%3.6f) \n" -// " >>>> emitterVoxelTotalAttenuationSum (%3.6f) \n" -// " >>>> emitterVoxelVoxelCount (%3.6f) \n" -// " >>>> \n" -// " >>>> receiverVoxelaverageSpeedSum (%3.6f) \n" -// " >>>> receiverVoxelTotalAttenuationSum (%3.6f) \n" -// " >>>> receiverVoxelVoxelCount (%3.6f) \n" -// " >>>> \n" -// " >>>> totalSOS (%f), totalATT (%fdB->%f), totalVoxelCount (%f)\n", -// currentEmitterIndex_minus1+1, currentReceiverIndex_minus1+1, -// lookUpEmitterIndex, lookUpReceiverIndex, -// SosVoxelTextureZ, -// TexturIndexX, TexturIndexY, -// TexturIndexZEmitter, TexturIndexZReceiver, -// -// currentEmitterGeometry.x, currentEmitterGeometry.y, currentEmitterGeometry.z, -// SosVoxelFloat.x,SosVoxelFloat.y,SosVoxelFloat.z, -// voxelPosition.x,voxelPosition.y,voxelPosition.z, -// currentReceiverGeometry.x, currentReceiverGeometry.y, currentReceiverGeometry.z, -// -// emitterVoxelaverageSpeedSum, emitterVoxelTotalAttenuationSum, emitterVoxelVoxelCount, -// receiverVoxelaverageSpeedSum, receiverVoxelTotalAttenuationSum, receiverVoxelVoxelCount, -// averageSpeed, totalAttenuation_dB, totalAttenuation_multFactor, emitterReceiverTotalVoxelCount -// ); // In welche Speicherstelle wird geschrieben -// -// } - #endif - - - - #if defined(debug_CudaPrecalculateAscanIndexKernel) - // if ((SosVoxel.x == DebugSosVoxelX) && (SosVoxel.y == DebugSosVoxelY) && (SosVoxel.z == DebugSosVoxelZ)){ - // // Pracalculate AscanIndex - // printf(">>>> Precalc: Ascan(%i) : Em (%i) Re (%i) >>>> Em[%+3.3f %+3.3f %+3.3f] : SOSVoxel[%+3.3f %+3.3f %+3.3f] : Rec[%+3.3f %+3.3f %+3.3f]\n" - // " >>>> averageSpeed(%3.3f), totalAttenuation(%3.3f) >>>> currentAscanIndex(%3.3f)\n", - // ascanIndex_i, Emitter_i, Receiver_i, - // currentEmitterGeometry.x, currentEmitterGeometry.y, currentEmitterGeometry.z, - // SosVoxel.x, SosVoxel.y, SosVoxel.z, - // currentReceiverGeometry.x, currentReceiverGeometry.y, currentReceiverGeometry.z, - // averageSpeed, totalAttenuation, currentAscanIndex); - // } - #endif - - - //////////////////////////////////////// SAFT-Calculation to determine Ascan-Index /////////////////////////////////////////////////////////////// - - // Für Ascan-Index benoetigt man mehrere Texturen fuer jeweils 2 Z-Layer. 2*N < maxSurfaceTexture3DDimension(Fermi: 2048; Kepler: 2048) ==> (Fermi: 2048 ; Kepler: 2048 Em/Rec - Kombinationen) - // maxSurfaceTexture3DDimension = maximale Groesse die erlaubt ist - // TableAscanIndexAllocationCount = Anzahl der Teiltabellen ==> auch Anzahl der benoetigten Durchlaeufe - // maxFeasibleSosZLayerCount = Anzahl der SoS-Zlayer die gleichzeitig im Speicher pro EM/REC-Kombi vorgehalten werden (1 oder 2 bei Interpolierten Variante) - // maxAscanIndexArraysInTexture = Anzahl der Ascans in einer Teiltabelle - - // outSurfRefAscanIndexFloat0..2 = Surface fuer die jeweiligen Ascans - - - currentSOSVoxel_AscanIndexAttValues.x = (double)(emitterDistance + receiverDistance)/((double)1e-7*(double)averageSpeed); - currentSOSVoxel_AscanIndexAttValues.y = totalAttenuation_multFactor; - - - // Calculate the Z-Index for storing the AscanIndex value - TexturGeometryIndexZ = maxFeasibleSosZLayerCount * ((ascanIndex_i) % maxAscanIndexArraysInTexture) + i_z; - - //#if defined(debug_CudaPrecalculateKernel) || defined(debug_OutputSOSPaths) - // if (((int)SosVoxelFloat.x == DebugSosVoxelX) && ((int)SosVoxelFloat.y == DebugSosVoxelY) && ((int)SosVoxelFloat.z == DebugSosVoxelZ)) - // printf(">>>> %i >>>> Precalc: geomIdxCounter(%4i):[%+3.4f %+3.4f %+3.4f] - SOSVoxel [%3i %3i %3i] currentSpeedOfSoundZLayer(%i) ==> TexturNr.[%3i], TexturGeometryIndexZ(%3i), lookUpGeometryIndex(%4i)\n", geometry, geometryIndexCounter, currentGeometry.x, currentGeometry.y, currentGeometry.z, SosVoxelFloat.x,SosVoxelFloat.y,SosVoxelFloat.z, currentSpeedOfSoundZLayer, (int)floor((float)lookUpGeometryIndex / (float)maxSoSReceiverArrayForTexture) , TexturGeometryIndexZ, lookUpGeometryIndex); // In welche Speicherstelle wird geschrieben - //#endif - - // Write in the AscanIndex value for all Receiver in corresponding memory adress depending on SOS-Voxel, and Z-Layer - // floor(ascanIndex_i / maxAscanIndexArraysInTexture) gives the surface if more then one is used - int ascanIndexTexture_Nr = (int)floor((float)ascanIndex_i / (float)maxAscanIndexArraysInTexture); - - if ( ascanIndexTexture_Nr == 0){ - surf3Dwrite((float2)currentSOSVoxel_AscanIndexAttValues, outSurfRefAscanIndexFloat0, i_x*sizeof(float2), i_y, TexturGeometryIndexZ ); // Direkt in CUDA Array schreiben: TableVoxelToEmitterPathSosSum - } - else if ( ascanIndexTexture_Nr == 1) { - surf3Dwrite((float2)currentSOSVoxel_AscanIndexAttValues, outSurfRefAscanIndexFloat1, i_x*sizeof(float2), i_y, TexturGeometryIndexZ ); // Direkt in CUDA Array schreiben: TableVoxelToEmitterPathSosSum - } - else if ( ascanIndexTexture_Nr == 2){ - surf3Dwrite((float2)currentSOSVoxel_AscanIndexAttValues, outSurfRefAscanIndexFloat2, i_x*sizeof(float2), i_y, TexturGeometryIndexZ ); // Direkt in CUDA Array schreiben: TableVoxelToEmitterPathSosSum - } - else if ( ascanIndexTexture_Nr == 3){ - surf3Dwrite((float2)currentSOSVoxel_AscanIndexAttValues, outSurfRefAscanIndexFloat3, i_x*sizeof(float2), i_y, TexturGeometryIndexZ ); // Direkt in CUDA Array schreiben: TableVoxelToEmitterPathSosSum - } - - // Debug Output at the end - #if defined(debug_CudaPrecalculateAscanIndexKernel) - // //printf(" SosVoxel.x,y,z = [%i %i %i]\n", SosVoxel.x, SosVoxel.y, SosVoxel.z); // Herausfinden welche berechnet werden - - //if ((SosVoxelFloat.x == DebugSosVoxelX) && (SosVoxelFloat.y == DebugSosVoxelY) && (SosVoxelFloat.z == (DebugSosVoxelZ)) && ((ascanIndex_i <= 10) || (ascanIndex_i >= 4090))) // Anfang - //if ((SosVoxel.x == DebugSosVoxelX) && (SosVoxel.y == DebugSosVoxelY) && (SosVoxel.z == DebugSosVoxelZ) && (ascanIndex_i >= 1020) && (ascanIndex_i <= 1030)) // Mitte - //if ((SosVoxel.x == DebugSosVoxelX) && (SosVoxel.y == DebugSosVoxelY) && (SosVoxel.z == DebugSosVoxelZ) && (ascanIndex_i >= 1400)) // Ende - //if ((SosVoxel.x == DebugSosVoxelX) && (SosVoxel.y == DebugSosVoxelY) && (SosVoxel.z == DebugSosVoxelZ) && (ascanIndex_i >= 2800)) // Ende - // { - // // Pracalculate AscanIndex - // printf(">>>> Precalc: Ascan(%i) : Em (%i) Re (%i) >>>> Em[%+3.3f %+3.3f %+3.3f] : SOSVoxel[%+3.3f %+3.3f %+3.3f] : Rec[%+3.3f %+3.3f %+3.3f]\n" - // " >>>> averageSpeed(%3.3f), totalAttenuation_multFactor(%3.3f) >>>> currentAscanIndex(%3.3f)\n" - // " >>>>>>>> surf3Dwrite Textur(%i)[%3i %3i %3i], TexturGeometryIndexZ(%3i) >>>> currentAscanIndex(%3.3f)\n", - // ascanIndex_i, currentEmitterIndex_minus1, currentReceiverIndex_minus1, - // currentEmitterGeometry.x, currentEmitterGeometry.y, currentEmitterGeometry.z, - // SosVoxelFloat.x, SosVoxelFloat.y, SosVoxelFloat.z, - // currentReceiverGeometry.x, currentReceiverGeometry.y, currentReceiverGeometry.z, - // averageSpeed, totalAttenuation_multFactor, currentAscanIndex, - // ascanIndexTexture_Nr, i_x, i_y, i_z, TexturGeometryIndexZ, currentAscanIndex); - // } - #endif - - - } // Ascan_i-loop - - #ifdef debug_OutputFunctions - printf( "<== SAFTHandler::precalculateAscanIndexKernel_usePathsKernel - End\n"); - #endif - - } - - - - - - -void SAFTHandler::precalculateAscanIndex_usePaths -( - int ascanIndex_i, ///< Offset of AscanIndex batch. - int aScanWindowSize, ///< Amount of Ascans in AscanIndex batch to process. - int currentSpeedOfSoundZLayer, ///< First z-layer in the speed of sound grid the pre-calculation is performed for. - int maxFeasibleSosZLayerCount ///< Number of z-layers in the speed of sound grid the pre-calculation is performed for. -// int currentEmIndexUsedForAscanIndexCalculation, ///< current Index of Em for which the AscanIndex is calculated -> No more necessary due to all Combinations-should be Calculated -// float * deviceTextureAscanIndexFloatCuArray ///< Out: AscanIndex for the path from Emitter to voxel to Receiver. -) -{ - // Use - // --------------------------------------------------------------- - // 1. Aufruf von precalculateAscanIndexKernel in precalculateAscanIndex für Anzahl der Threads - Done - // 2. Input von SAFT-Kernel zum Laden der Em & Receiver Teilpfade - - // 3. Ausgabe von precalculateAscanIndex fuer Speichern - Done - - // ----------------------------------------------------------------------------------------------------------------------------------------------------------------- - // Strategy: - // Step 1. Bereite Input-Surfaces fuer laden der Pfade vor - Done - // Step 2. Bereite Output-Textur fuer AscanIndex vor - Done - // Step 3. Fuere Kernel aus mit #Threads: SOS.x*SOS.y * maxFeasibleSosZLayerCount - Done - // Step 4. Unbind In-& Output-Textures - Done - // ----------------------------------------------------------------------------------------------------------------------------------------------------------------- - - // Step 1. Bereite Input-Surfaces fuer laden der Pfade vor: - // - texTableVoxelToEmitterPathSosBoth_preprocess - // - texTableVoxelToReceiverPathSosBoth0_preprocess - // - texTableVoxelToReceiverPathSosBoth1_preprocess - // - texTableVoxelToReceiverPathSosBoth2_preprocess - - // Texturmemory fuer Emitter - SosPathsTables - // =================================================================================================================== - - #ifdef SaftTextureForEmRecSosPathsTablesFloat1 - cudaChannelFormatDesc texChannelDescTableVoxelToEmRecPathSosSum = cudaCreateChannelDesc(32, 0, 0, 0, cudaChannelFormatKindFloat); // Schritt 2.1 Output-Kanal anlegen und beschreiben - cudaChannelFormatDesc texChannelDescTableVoxelToEmRecPathCount = cudaCreateChannelDesc(); // Schritt 2.1 Output-Kanal anlegen und beschreiben - // Sum Emitter Path Tables -------------------------------------------------------- - texTableVoxelToEmitterPathSosSum.addressMode[0] = cudaAddressModeClamp; // Texturreferenz beschreiben - texTableVoxelToEmitterPathSosSum.addressMode[1] = cudaAddressModeClamp; - texTableVoxelToEmitterPathSosSum.addressMode[2] = cudaAddressModeClamp; - - switch (SAFT_VARIANT[3]) - { - case 0: texTableVoxelToEmitterPathSosSum.filterMode = cudaFilterModePoint; break; - case 1: texTableVoxelToEmitterPathSosSum.filterMode = cudaFilterModeLinear; break; - } - texTableVoxelToEmitterPathSosSum.normalized = 0; - CUDA_CHECK(cudaBindTextureToArray ( &texTableVoxelToEmitterPathSosSum, deviceTableVoxelToEmitterPathSosSumCuArray, &texChannelDescTableVoxelToEmRecPathSosSum )); - - // Count Emitter Path Tables -------------------------------------------------------- - texTableVoxelToEmitterPathCount.addressMode[0] = cudaAddressModeClamp; // Texturreferenz beschreiben - texTableVoxelToEmitterPathCount.addressMode[1] = cudaAddressModeClamp; - texTableVoxelToEmitterPathCount.addressMode[2] = cudaAddressModeClamp; - // texTableVoxelToEmitterPathCount.addressMode[0] = cudaAddressModeWrap; // Texturreferenz beschreiben - // texTableVoxelToEmitterPathCount.addressMode[1] = cudaAddressModeWrap; - // texTableVoxelToEmitterPathCount.addressMode[2] = cudaAddressModeWrap; - switch (SAFT_VARIANT[3]) - { - case 0: texTableVoxelToEmitterPathCount.filterMode = cudaFilterModePoint; break; - case 1: texTableVoxelToEmitterPathCount.filterMode = cudaFilterModeLinear;break; - } - texTableVoxelToEmitterPathSosSum.normalized = 0; - CUDA_CHECK(cudaBindTextureToArray ( &texTableVoxelToEmitterPathCount, deviceTableVoxelToEmitterPathCountCuArray, &texChannelDescTableVoxelToEmRecPathCount )); - #endif - #ifdef SaftTextureForEmRecSosPathsTablesFloat2 - cudaChannelFormatDesc texChannelDescTableVoxelToEmRecPathSosBoth = cudaCreateChannelDesc(32, 32, 0, 0, cudaChannelFormatKindFloat); // Schritt 2.1 Output-Kanal anlegen und beschreiben - Float4 - // Both Emitter Path Tables -------------------------------------------------------- - texTableVoxelToEmitterPathSosBoth_preprocess.addressMode[0] = cudaAddressModeClamp; // Texturreferenz beschreiben - texTableVoxelToEmitterPathSosBoth_preprocess.addressMode[1] = cudaAddressModeClamp; - texTableVoxelToEmitterPathSosBoth_preprocess.addressMode[2] = cudaAddressModeClamp; - switch (SAFT_VARIANT[3]) - { - case 0: texTableVoxelToEmitterPathSosBoth_preprocess.filterMode = cudaFilterModePoint; break; - case 1: texTableVoxelToEmitterPathSosBoth_preprocess.filterMode = cudaFilterModeLinear; break; - } - texTableVoxelToEmitterPathSosBoth_preprocess.normalized = 0; - CUDA_CHECK(cudaBindTextureToArray ( &texTableVoxelToEmitterPathSosBoth_preprocess, deviceTableVoxelToEmPathSosBothCuArray, &texChannelDescTableVoxelToEmRecPathSosBoth )); - #endif - // TODO:kann zusammengefasst werden - #ifdef SaftTextureForEmRecSosPathsTablesFloat4 - cudaChannelFormatDesc texChannelDescTableVoxelToEmRecPathSosBoth = cudaCreateChannelDesc(32, 32, 32, 32, cudaChannelFormatKindFloat); // Schritt 2.1 Output-Kanal anlegen und beschreiben - Float4 - // Both Emitter Path Tables -------------------------------------------------------- - texTableVoxelToEmitterPathSosBoth_preprocess.addressMode[0] = cudaAddressModeClamp; // Texturreferenz beschreiben - texTableVoxelToEmitterPathSosBoth_preprocess.addressMode[1] = cudaAddressModeClamp; - texTableVoxelToEmitterPathSosBoth_preprocess.addressMode[2] = cudaAddressModeClamp; - switch (SAFT_VARIANT[SAFT_VARIANT_3DVolumeInterpolationAtReconstruction]) - { - case 0: texTableVoxelToEmitterPathSosBoth_preprocess.filterMode = cudaFilterModePoint; break; - case 1: texTableVoxelToEmitterPathSosBoth_preprocess.filterMode = cudaFilterModeLinear; break; - } - texTableVoxelToEmitterPathSosBoth_preprocess.normalized = 0; - CUDA_CHECK(cudaBindTextureToArray ( &texTableVoxelToEmitterPathSosBoth_preprocess, deviceTableVoxelToEmPathSosBothCuArray, &texChannelDescTableVoxelToEmRecPathSosBoth )); - #endif - - // Texturmemory fuer Receiver - SosPathsTables - // =================================================================================================================== - - #ifdef SaftTextureForEmRecSosPathsTablesFloat1 - // Sum Receiver Path Tables ------------------------------------------------------ - texTableVoxelToReceiverPathSosSum0.addressMode[0] = cudaAddressModeClamp; // Texturreferenz beschreiben - texTableVoxelToReceiverPathSosSum0.addressMode[1] = cudaAddressModeClamp; - texTableVoxelToReceiverPathSosSum0.addressMode[2] = cudaAddressModeClamp; - switch (SAFT_VARIANT[3]) - { - case 0: texTableVoxelToReceiverPathSosSum0.filterMode = cudaFilterModePoint; break; - case 1: texTableVoxelToReceiverPathSosSum0.filterMode = cudaFilterModeLinear; break; - } - texTableVoxelToReceiverPathSosSum0.normalized = 0; - - CUDA_CHECK(cudaBindTextureToArray ( &texTableVoxelToReceiverPathSosSum0, deviceTableVoxelToReceiverPathSosSumCuArray[0], &texChannelDescTableVoxelToEmRecPathSosSum )); - - if (TableVoxelToReceiverPathSosAllocationCount>1){ // TODO: mit Arrays flexibel programmieren!!! - texTableVoxelToReceiverPathSosSum1.addressMode[0] = cudaAddressModeClamp; // Texturreferenz beschreiben - texTableVoxelToReceiverPathSosSum1.addressMode[1] = cudaAddressModeClamp; - texTableVoxelToReceiverPathSosSum1.addressMode[2] = cudaAddressModeClamp; - switch (SAFT_VARIANT[3]) - { - case 0: texTableVoxelToReceiverPathSosSum1.filterMode = cudaFilterModePoint; break; - case 1: texTableVoxelToReceiverPathSosSum1.filterMode = cudaFilterModeLinear; break; - } - texTableVoxelToReceiverPathSosSum1.normalized = 0; - - CUDA_CHECK(cudaBindTextureToArray ( &texTableVoxelToReceiverPathSosSum1, deviceTableVoxelToReceiverPathSosSumCuArray[1], &texChannelDescTableVoxelToEmRecPathSosSum )); - } - - if (TableVoxelToReceiverPathSosAllocationCount>2){ - texTableVoxelToReceiverPathSosSum2.addressMode[0] = cudaAddressModeClamp; // Texturreferenz beschreiben - texTableVoxelToReceiverPathSosSum2.addressMode[1] = cudaAddressModeClamp; - texTableVoxelToReceiverPathSosSum2.addressMode[2] = cudaAddressModeClamp; - switch (SAFT_VARIANT[3]) - { - case 0: texTableVoxelToReceiverPathSosSum2.filterMode = cudaFilterModePoint; break; - case 1: texTableVoxelToReceiverPathSosSum2.filterMode = cudaFilterModeLinear; break; - } - texTableVoxelToReceiverPathSosSum2.normalized = 0; - - CUDA_CHECK(cudaBindTextureToArray ( &texTableVoxelToReceiverPathSosSum2, deviceTableVoxelToReceiverPathSosSumCuArray[2], &texChannelDescTableVoxelToEmRecPathSosSum )); - } - - - // Count Receiver Path Tables -------------------------------------------------------- - texTableVoxelToReceiverPathCount0.addressMode[0] = cudaAddressModeClamp; // Texturreferenz beschreiben - texTableVoxelToReceiverPathCount0.addressMode[1] = cudaAddressModeClamp; - texTableVoxelToReceiverPathCount0.addressMode[2] = cudaAddressModeClamp; - switch (SAFT_VARIANT[3]) - { - case 0: texTableVoxelToReceiverPathCount0.filterMode = cudaFilterModePoint; break; - case 1: texTableVoxelToReceiverPathCount0.filterMode = cudaFilterModeLinear; break; - } - texTableVoxelToReceiverPathCount0.normalized = 0; - CUDA_CHECK(cudaBindTextureToArray ( &texTableVoxelToReceiverPathCount0, deviceTableVoxelToReceiverPathCountCuArray[0], &texChannelDescTableVoxelToEmRecPathCount )); - - if (TableVoxelToReceiverPathSosAllocationCount>1){ - texTableVoxelToReceiverPathCount1.addressMode[0] = cudaAddressModeClamp; // Texturreferenz beschreiben - texTableVoxelToReceiverPathCount1.addressMode[1] = cudaAddressModeClamp; - texTableVoxelToReceiverPathCount1.addressMode[2] = cudaAddressModeClamp; - switch (SAFT_VARIANT[3]) - { - case 0: texTableVoxelToReceiverPathCount1.filterMode = cudaFilterModePoint; break; - case 1: texTableVoxelToReceiverPathCount1.filterMode = cudaFilterModeLinear; break; - } - texTableVoxelToReceiverPathCount1.normalized = 0; - - CUDA_CHECK(cudaBindTextureToArray ( &texTableVoxelToReceiverPathCount1, deviceTableVoxelToReceiverPathCountCuArray[1], &texChannelDescTableVoxelToEmRecPathCount )); - } - - if (TableVoxelToReceiverPathSosAllocationCount>2){ - texTableVoxelToReceiverPathCount2.addressMode[0] = cudaAddressModeClamp; // Texturreferenz beschreiben - texTableVoxelToReceiverPathCount2.addressMode[1] = cudaAddressModeClamp; - texTableVoxelToReceiverPathCount2.addressMode[2] = cudaAddressModeClamp; - switch (SAFT_VARIANT[3]) - { - case 0: texTableVoxelToReceiverPathCount2.filterMode = cudaFilterModePoint; break; - case 1: texTableVoxelToReceiverPathCount2.filterMode = cudaFilterModeLinear; break; - } - texTableVoxelToReceiverPathCount2.normalized = 0; - - CUDA_CHECK(cudaBindTextureToArray ( &texTableVoxelToReceiverPathCount2, deviceTableVoxelToReceiverPathCountCuArray[2], &texChannelDescTableVoxelToEmRecPathCount )); - } - #endif - #if defined (SaftTextureForEmRecSosPathsTablesFloat2) || defined (SaftTextureForEmRecSosPathsTablesFloat4) - // Both Receiver Path Tables ------------------------------------------------------ - texTableVoxelToReceiverPathSosBoth0_preprocess.addressMode[0] = cudaAddressModeClamp; // Texturreferenz beschreiben - texTableVoxelToReceiverPathSosBoth0_preprocess.addressMode[1] = cudaAddressModeClamp; - texTableVoxelToReceiverPathSosBoth0_preprocess.addressMode[2] = cudaAddressModeClamp; - switch (SAFT_VARIANT[SAFT_VARIANT_3DVolumeInterpolationAtReconstruction]) - { - case 0: texTableVoxelToReceiverPathSosBoth0_preprocess.filterMode = cudaFilterModePoint; break; - case 1: texTableVoxelToReceiverPathSosBoth0_preprocess.filterMode = cudaFilterModeLinear;break; - } - texTableVoxelToReceiverPathSosBoth0_preprocess.normalized = 0; - - CUDA_CHECK(cudaBindTextureToArray ( &texTableVoxelToReceiverPathSosBoth0_preprocess, deviceTableVoxelToRecPathSosBothCuArray[0], &texChannelDescTableVoxelToEmRecPathSosBoth )); - - if (TableVoxelToReceiverPathSosAllocationCount>1){ // TODO: mit Arrays flexibel programmieren, wenn moeglich!!! - texTableVoxelToReceiverPathSosBoth1_preprocess.addressMode[0] = cudaAddressModeClamp; // Texturreferenz beschreiben - texTableVoxelToReceiverPathSosBoth1_preprocess.addressMode[1] = cudaAddressModeClamp; - texTableVoxelToReceiverPathSosBoth1_preprocess.addressMode[2] = cudaAddressModeClamp; - switch (SAFT_VARIANT[SAFT_VARIANT_3DVolumeInterpolationAtReconstruction]) - { - case 0: texTableVoxelToReceiverPathSosBoth1_preprocess.filterMode = cudaFilterModePoint; break; - case 1: texTableVoxelToReceiverPathSosBoth1_preprocess.filterMode = cudaFilterModeLinear;break; - } - texTableVoxelToReceiverPathSosBoth1_preprocess.normalized = 0; - - CUDA_CHECK(cudaBindTextureToArray ( &texTableVoxelToReceiverPathSosBoth1_preprocess, deviceTableVoxelToRecPathSosBothCuArray[1], &texChannelDescTableVoxelToEmRecPathSosBoth )); - } - - if (TableVoxelToReceiverPathSosAllocationCount>2){ - texTableVoxelToReceiverPathSosBoth2_preprocess.addressMode[0] = cudaAddressModeClamp; // Texturreferenz beschreiben - texTableVoxelToReceiverPathSosBoth2_preprocess.addressMode[1] = cudaAddressModeClamp; - texTableVoxelToReceiverPathSosBoth2_preprocess.addressMode[2] = cudaAddressModeClamp; - switch (SAFT_VARIANT[SAFT_VARIANT_3DVolumeInterpolationAtReconstruction]) - { - case 0: texTableVoxelToReceiverPathSosBoth2_preprocess.filterMode = cudaFilterModePoint; break; - case 1: texTableVoxelToReceiverPathSosBoth2_preprocess.filterMode = cudaFilterModeLinear;break; - } - texTableVoxelToReceiverPathSosBoth2_preprocess.normalized = 0; - - CUDA_CHECK(cudaBindTextureToArray ( &texTableVoxelToReceiverPathSosBoth2_preprocess, deviceTableVoxelToRecPathSosBothCuArray[2], &texChannelDescTableVoxelToEmRecPathSosBoth )); - } - - #endif - - - - #ifdef debug_OutputFunctions - printf( "==> SAFTHandler::precalculateAscanIndex_usePaths - Start\n"); - #endif - - // Output Parameter from Call - #ifdef debug_CudaPrecalculateAscanIndexKernelProxy - // printf("==================== SAFTHandler::precalculateAscanIndex_usePaths ====================\n"); - // printf(" currentSpeedOfSoundZLayer: %i (Start z)\n", currentSpeedOfSoundZLayer); - // printf(" maxFeasibleSosZLayerCount: %i (# zLayers)\n", maxFeasibleSosZLayerCount); - // printf(" TableAscanIndexAllocationCount: %i (# zLayers)\n", TableAscanIndexAllocationCount); ///< Anzahl der benoetigten AscanBlocks der Groesse 2048/4096 - // printf(" maxAscanIndexArraysInTexture: %i (# zLayers)\n", maxAscanIndexArraysInTexture); ///< maximale Anzahl an Em/Rec in einem CUDA Array (fest definiert fuer bestimmung welche Textur genutzt wird) - // //printf(" currentEmIndexUsedForAscanIndexCalculation: %i\n", currentEmIndexUsedForAscanIndexCalculation); - // printf(" \n"); - // //printf(" emitter_list_Size : %i\n", emitter_list_Size); - // //printf(" receiver_list_Size: %i\n", receiver_list_Size); - // //printf("=============================================================================\n"); - #endif - - - dim3 threadsPerBlock (SOSGrid_XYZ.x,1,1); // determine neccessary amount of threads // max. 512 oder 1024 - dim3 blocksPerGrid (1,1,1); // determine neccessary amount of blocks in grid // max. 65.535 - blocksPerGrid.x = SOSGrid_XYZ.y; - blocksPerGrid.y = maxFeasibleSosZLayerCount; - blocksPerGrid.z = 1; - - #ifdef debug_CudaPrecalculateAscanIndexKernelProxy - // int sosZLayerVoxelCountToProcess = sosZLayerVoxelCount * maxFeasibleSosZLayerCount; // Anzahl der Voxel die berechnet werden sollen - // - // //printf("==================== SAFTHandler::precalculateAscanIndex_usePaths ====================\n"); - // //printf(" deviceListGeometry: %i (0=Em/1=Rec)\n", deviceListGeometry); - // printf(" TableAscanIndexAllocationCount: %i\n", TableAscanIndexAllocationCount); // Amount of Textures due to limitation of 2048/4096 - // - // printf(" SOSGrid_XYZ x,y,z: [%i %i %i]\n", SOSGrid_XYZ.x, SOSGrid_XYZ.y, SOSGrid_XYZ.z); - // printf(" threadsPerBlock x,y,z: [%i %i %i]\n", threadsPerBlock.x, threadsPerBlock.y, threadsPerBlock.z); - // printf(" blocksPerGrid x,y,z: [%i %i %i]\n", blocksPerGrid.x, blocksPerGrid.y, blocksPerGrid.z); - // printf(" sosZLayerVoxelCountToProcess = sosZLayerVoxelCount(%i) * maxFeasibleSosZLayerCount(%i) = %i\n", sosZLayerVoxelCount, maxFeasibleSosZLayerCount, sosZLayerVoxelCountToProcess); - // - // printf("=============================================================================\n"); - #endif - - - - // Step 2. Bereite Output-Textur fuer AscanIndex vor - - if ( TableAscanIndexAllocationCount > 0){ - #ifdef debug_CudaPrecalculateAscanIndexKernelProxy - printf( "cudaBindSurfaceToArray: deviceTextureAscanIndexFloatCuArray[0](%X)\n", deviceTextureAscanIndexFloatCuArray[0]); - #endif - cudaBindSurfaceToArray(outSurfRefAscanIndexFloat0, deviceTextureAscanIndexFloatCuArray[0]); - } - if ( TableAscanIndexAllocationCount > 1) { - #ifdef debug_CudaPrecalculateAscanIndexKernelProxy - printf( "cudaBindSurfaceToArray: deviceTextureAscanIndexFloatCuArray[1](%X)\n", deviceTextureAscanIndexFloatCuArray[1]); - #endif - cudaBindSurfaceToArray(outSurfRefAscanIndexFloat1, deviceTextureAscanIndexFloatCuArray[1]); - } - if ( TableAscanIndexAllocationCount > 2){ - #ifdef debug_CudaPrecalculateAscanIndexKernelProxy - printf( "cudaBindSurfaceToArray: deviceTextureAscanIndexFloatCuArray[2](%X)\n", deviceTextureAscanIndexFloatCuArray[2]); - #endif - cudaBindSurfaceToArray(outSurfRefAscanIndexFloat2, deviceTextureAscanIndexFloatCuArray[2]); - } - if ( TableAscanIndexAllocationCount > 3){ - #ifdef debug_CudaPrecalculateAscanIndexKernelProxy - printf( "cudaBindSurfaceToArray: deviceTextureAscanIndexFloatCuArray[3](%X)\n", deviceTextureAscanIndexFloatCuArray[3]); - #endif - cudaBindSurfaceToArray(outSurfRefAscanIndexFloat3, deviceTextureAscanIndexFloatCuArray[3]); - } - - // Step 3. Fuehre Kernel aus mit #Threads: SOS.x*SOS.y . Innerhalb werden immer 1024/2048 A-Scans durchlaufen und in AscanIndex-Textur geschrieben - - if ((SOSMode_3DVolume == false) && (ATTMode_3DVolume == false)){ // ==================================================== Blockmode with SOS-value per Ascan - #ifdef debug_OutputInfo - printf("\n\n --- precalculateAscanIndex_usePathsKernel --- use SoS-Grid Mode without SOS- and ATT-Correction\n"); - #endif - - precalculateAscanIndex_usePathsKernel <<< blocksPerGrid, threadsPerBlock >>> - ( - ascanIndex_i, ///< Offset of AscanIndex batch (bei mehreren Aufrufen) - aScanWindowSize, // aktuelle Anzahl der Ascans, die maximal vorberechnet werden können - #ifndef SaftTextureForBresenhamSosPaths - deviceSpeedOfSoundField, - #else - #ifdef SaftUseSosAttFloat1 // Nutze getrennte Texturen fuer beide Volumen (Sos+Att) - deviceSpeedOfSoundFieldCuArray, - #endif - #ifdef SaftUseSosAttFloat2 // Nutze nur eine Textur fuer beide Volumen (Sos+Att) - deviceSosAttFieldCuArray, - #endif - #endif - currentSpeedOfSoundZLayer, ///< First z-layer in the speed of sound grid the pre-calculation is performed for. - maxFeasibleSosZLayerCount, ///< Number of z-layers in the speed of sound grid the pre-calculation is performed for. - //currentEmIndexUsedForAscanIndexCalculation, ///< current Index of Em for which the AscanIndex is calculated - - maxSoSReceiverArrayForTexture, - - deviceEmitterIndex_block, // Speicheradresse fuer EmitterIndexdaten - deviceReceiverIndex_block, // Speicheradresse fuer ReceiverIndexdaten - - TableAscanIndexAllocationCount, ///< Anzahl der benoetigten AscanBlocks der Groesse 2048/4096 - maxAscanIndexArraysInTexture, ///< maximale Anzahl an Em/Rec in einem CUDA Array (fest definiert fuer bestimmung welche Textur genutzt wird) - - deviceTextureAscanIndexFloatCuArray, ///< Out: Sum of SoS samples in the path from transducer to voxel. - - SOSGrid_XYZ, - sosOffset, - regionOfInterestOffset, - IMAGE_RESOLUTION, - SOS_RESOLUTION, - debugMode, - debugModeParameter, - deviceSAFT_VARIANT - ); - - - } - else if ((SOSMode_3DVolume == true) && (ATTMode_3DVolume == false)){ // ==================================================== 3DVolume Mode with SOS-Correction no ATT-Correction - #ifdef debug_OutputInfo - printf("\n\n--- precalculateAscanIndex_usePathsKernel_SOS --- use SoS-Grid Mode with SOS- but no ATT-Correction\n"); - #endif - - precalculateAscanIndex_usePathsKernel_SOS <<< blocksPerGrid, threadsPerBlock >>> - ( - ascanIndex_i, ///< Offset of AscanIndex batch (bei mehreren Aufrufen) - aScanWindowSize, // aktuelle Anzahl der Ascans, die maximal vorberechnet werden können - #ifndef SaftTextureForBresenhamSosPaths - deviceSpeedOfSoundField, - #else - #ifdef SaftUseSosAttFloat1 // Nutze getrennte Texturen fuer beide Volumen (Sos+Att) - deviceSpeedOfSoundFieldCuArray, - #endif - #ifdef SaftUseSosAttFloat2 // Nutze nur eine Textur fuer beide Volumen (Sos+Att) - deviceSosAttFieldCuArray, - #endif - #endif - currentSpeedOfSoundZLayer, ///< First z-layer in the speed of sound grid the pre-calculation is performed for. - maxFeasibleSosZLayerCount, ///< Number of z-layers in the speed of sound grid the pre-calculation is performed for. - //currentEmIndexUsedForAscanIndexCalculation, ///< current Index of Em for which the AscanIndex is calculated - - maxSoSReceiverArrayForTexture, - - deviceEmitterIndex_block, // Speicheradresse fuer EmitterIndexdaten - deviceReceiverIndex_block, // Speicheradresse fuer ReceiverIndexdaten - - TableAscanIndexAllocationCount, ///< Anzahl der benoetigten AscanBlocks der Groesse 2048/4096 - maxAscanIndexArraysInTexture, ///< maximale Anzahl an Em/Rec in einem CUDA Array (fest definiert fuer bestimmung welche Textur genutzt wird) - - deviceTextureAscanIndexFloatCuArray, ///< Out: Sum of SoS samples in the path from transducer to voxel. - - SOSGrid_XYZ, - sosOffset, - regionOfInterestOffset, - IMAGE_RESOLUTION, - SOS_RESOLUTION, - debugMode, - debugModeParameter, - deviceSAFT_VARIANT - ); - - - } - else if ((SOSMode_3DVolume == true) && (ATTMode_3DVolume == true)){ // ==================================================== 3DVolume Mode with SOS- and ATT-Correction - #ifdef debug_OutputInfo - printf("\n\n--- precalculateAscanIndex_usePathsKernel_SOS_ATT --- use SoS-Grid Mode with SOS- and ATT-Correction\n"); - #endif - - precalculateAscanIndex_usePathsKernel_SOS_ATT <<< blocksPerGrid, threadsPerBlock >>> - ( - ascanIndex_i, ///< Offset of AscanIndex batch (bei mehreren Aufrufen) - aScanWindowSize, // aktuelle Anzahl der Ascans, die maximal vorberechnet werden können - #ifndef SaftTextureForBresenhamSosPaths - deviceSpeedOfSoundField, - #else - #ifdef SaftUseSosAttFloat1 // Nutze getrennte Texturen fuer beide Volumen (Sos+Att) - deviceSpeedOfSoundFieldCuArray, - #endif - #ifdef SaftUseSosAttFloat2 // Nutze nur eine Textur fuer beide Volumen (Sos+Att) - deviceSosAttFieldCuArray, - #endif - #endif - currentSpeedOfSoundZLayer, ///< First z-layer in the speed of sound grid the pre-calculation is performed for. - maxFeasibleSosZLayerCount, ///< Number of z-layers in the speed of sound grid the pre-calculation is performed for. - //currentEmIndexUsedForAscanIndexCalculation, ///< current Index of Em for which the AscanIndex is calculated - - maxSoSReceiverArrayForTexture, - - deviceEmitterIndex_block, // Speicheradresse fuer EmitterIndexdaten - deviceReceiverIndex_block, // Speicheradresse fuer ReceiverIndexdaten - - TableAscanIndexAllocationCount, ///< Anzahl der benoetigten AscanBlocks der Groesse 2048/4096 - maxAscanIndexArraysInTexture, ///< maximale Anzahl an Em/Rec in einem CUDA Array (fest definiert fuer bestimmung welche Textur genutzt wird) - - deviceTextureAscanIndexFloatCuArray, ///< Out: Sum of SoS samples in the path from transducer to voxel. - - SOSGrid_XYZ, - sosOffset, - regionOfInterestOffset, - IMAGE_RESOLUTION, - SOS_RESOLUTION, - debugMode, - debugModeParameter, - deviceSAFT_VARIANT - ); - - - } - - CUDA_CHECK(cudaGetLastError()); - - - // ==================================================== cudaUnbindTexture - - #ifdef SaftTextureForEmRecSosPathsTablesFloat1 - // Texturmemory fuer Emitter - SosPathsTables entbinden - CUDA_CHECK(cudaUnbindTexture ( &texTableVoxelToEmitterPathSosSum )); - CUDA_CHECK(cudaUnbindTexture ( &texTableVoxelToEmitterPathCount )); - // Texturmemory fuer Receiver - SosPathsTables entbinden - CUDA_CHECK(cudaUnbindTexture ( &texTableVoxelToReceiverPathSosSum0 )); - CUDA_CHECK(cudaUnbindTexture ( &texTableVoxelToReceiverPathSosSum1 )); - CUDA_CHECK(cudaUnbindTexture ( &texTableVoxelToReceiverPathSosSum2 )); - CUDA_CHECK(cudaUnbindTexture ( &texTableVoxelToReceiverPathCount0 )); - CUDA_CHECK(cudaUnbindTexture ( &texTableVoxelToReceiverPathCount1 )); - CUDA_CHECK(cudaUnbindTexture ( &texTableVoxelToReceiverPathCount2 )); - #endif - #if defined(SaftTextureForEmRecSosPathsTablesFloat2) || defined(SaftTextureForEmRecSosPathsTablesFloat4) - // Texturmemory fuer Emitter - SosPathsTables entbinden - CUDA_CHECK(cudaUnbindTexture ( &texTableVoxelToEmitterPathSosBoth_preprocess )); - // Texturmemory fuer Receiver - SosPathsTables entbinden - CUDA_CHECK(cudaUnbindTexture ( &texTableVoxelToReceiverPathSosBoth0_preprocess )); - CUDA_CHECK(cudaUnbindTexture ( &texTableVoxelToReceiverPathSosBoth1_preprocess )); - CUDA_CHECK(cudaUnbindTexture ( &texTableVoxelToReceiverPathSosBoth2_preprocess )); - #endif - - - #ifdef debug_OutputFunctions - printf( "<== SAFTHandler::precalculateAscanIndex_usePaths - End\n"); - #endif - - - -} - - - - - - - - - - - - - - - - - - - - +extern texture texTableVoxelToReceiverPathSosBoth0_preprocess; +extern texture texTableVoxelToReceiverPathSosBoth1_preprocess; +extern texture texTableVoxelToReceiverPathSosBoth2_preprocess; + +__global__ void precalculateAverageSpeedOfSoundKernel(cudaArray *deviceSosAttFieldCuArray, + int firstZLayer, + int sosZLayerCount, + int geometry, + int geometryElementCount, + int maxSoSReceiverArrayForTexture, + float *deviceVoxelCountOutputFloat, + float *speedOfSoundSumOutput, + int3 SOSGrid_XYZ, float3 sosOffset, float3 regionOfInterestOffset, float IMAGE_RESOLUTION, float SOS_RESOLUTION, float debugMode, + float debugModeParameter); + +__global__ void precalculateAscanIndex_usePathsKernel(int ascanIndexBatchOffset, + int aScanWindowSize, + cudaArray *deviceSosAttFieldCuArray, + int currentSpeedOfSoundZLayer, + int maxFeasibleSosZLayerCount, + int maxSoSReceiverArrayForTexture, + unsigned short const *deviceEmitterIndex_block, + unsigned short const *deviceReceiverIndex_block, + int TableAscanIndexAllocationCount, + int maxAscanIndexArraysInTexture, + cudaArray **deviceTextureAscanIndexFloatCuArray, int3 SOSGrid_XYZ, float3 sosOffset, float3 regionOfInterestOffset, float IMAGE_RESOLUTION, + float SOS_RESOLUTION, float debugMode, float debugModeParameter, int *deviceSAFT_VARIANT); + +__global__ void precalculateAscanIndex_usePathsKernel_SOS(int ascanIndexBatchOffset, + int aScanWindowSize, + cudaArray *deviceSosAttFieldCuArray, + int currentSpeedOfSoundZLayer, + int maxFeasibleSosZLayerCount, + int maxSoSReceiverArrayForTexture, + unsigned short const *deviceEmitterIndex_block, + unsigned short const *deviceReceiverIndex_block, + int TableAscanIndexAllocationCount, + int maxAscanIndexArraysInTexture, + cudaArray **deviceTextureAscanIndexFloatCuArray, int3 SOSGrid_XYZ, float3 sosOffset, float3 regionOfInterestOffset, float IMAGE_RESOLUTION, + float SOS_RESOLUTION, float debugMode, float debugModeParameter, int *deviceSAFT_VARIANT); + +__global__ void precalculateAscanIndex_usePathsKernel_SOS_ATT(int ascanIndexBatchOffset, + int aScanWindowSize, + cudaArray *deviceSosAttFieldCuArray, + int currentSpeedOfSoundZLayer, + int maxFeasibleSosZLayerCount, + int maxSoSReceiverArrayForTexture, + unsigned short const *deviceEmitterIndex_block, + unsigned short const *deviceReceiverIndex_block, + int TableAscanIndexAllocationCount, + int maxAscanIndexArraysInTexture, + cudaArray **deviceTextureAscanIndexFloatCuArray, + + int3 SOSGrid_XYZ, float3 sosOffset, float3 regionOfInterestOffset, float IMAGE_RESOLUTION, float SOS_RESOLUTION, float debugMode, + float debugModeParameter, int *deviceSAFT_VARIANT); __global__ void fillCuArrayKernel( - - float useValue, - - // cudaArray *deviceSosAttFieldCuArray, ///< CuArray to fill if no array of cudaArrays is used - cudaArray **deviceTextureAscanIndexFloatCuArray, ///< CuArray to fill - - int maxAscanIndexArraysInTexture, - int TableAscanIndexAllocationCount, - int maxFeasibleSosZLayerCount, ///< Number of z-layers in the speed of sound grid the pre-calculation is performed for. - + float useValue, + cudaArray **deviceTextureAscanIndexFloatCuArray, + int maxAscanIndexArraysInTexture, int TableAscanIndexAllocationCount, + int maxFeasibleSosZLayerCount, bool ATTMode_3DVolume, - - float debugMode, - float debugModeParameter -) -{ - - #ifdef debug_OutputFunctions - printf( "==> fillCuArrayKernel - Start\n"); - #endif - - float currentSOSVoxel_AscanIndexValues = useValue; - float2 currentSOSVoxel_AscanIndexAttValues = {useValue, useValue}; - - - // Memoryadress for access on Texture - int i_x = threadIdx.x; - int i_y = blockIdx.x; - int i_z = blockIdx.y; // float SosVoxelTextureZ = (SosVoxelf.z - speedOfSoundZLayer); - - int TexturGeometryIndexZ; - // if ((SosVoxel.x == DebugSosVoxelX) && (SosVoxel.y == DebugSosVoxelY) && (SosVoxel.z == DebugSosVoxelZ)) - // { - // printf(" PrecalculateKernel: debugMode [%i] for geometry[%i]\n", debugMode, geometry); - // } - - for(int ascanIndex_i = 0; ascanIndex_i < maxAscanIndexArraysInTexture; ascanIndex_i++) // Alle Emitter oder Receiver in der Liste von Matlab durchgehen - { - -// if (((int)SosVoxelFloat.x == DebugSosVoxelX) && ((int)SosVoxelFloat.y == DebugSosVoxelY) && ((int)SosVoxelFloat.z == DebugSosVoxelZ)) -// printf(">>>>>>>> fillCuArrayKernel: fill SOSVoxel [%3f %3f %3f] - ascanIndex_i(%i) - with (%f,%f) \n", SosVoxelFloat.x, SosVoxelFloat.y, SosVoxelFloat.z, ascanIndex_i, currentSOSVoxel_AscanIndexAttValues.x, currentSOSVoxel_AscanIndexAttValues.y); // In welche Speicherstelle wird geschrieben - - // Write in the AscanIndex value for all Receiver in corresponding memory adress depending on SOS-Voxel, and Z-Layer - - TexturGeometryIndexZ = maxFeasibleSosZLayerCount * ((ascanIndex_i) % maxAscanIndexArraysInTexture) + i_z; - if (ATTMode_3DVolume == false){ // ========= 3DVolume Mode without ATT-Correction - surf3Dwrite(currentSOSVoxel_AscanIndexValues, outSurfRefAscanIndexFloat0, i_x*sizeof(float), i_y, TexturGeometryIndexZ ); - } - else if (ATTMode_3DVolume == true){ // ========= 3DVolume Mode with ATT-Correction - surf3Dwrite((float2)currentSOSVoxel_AscanIndexAttValues, outSurfRefAscanIndexFloat0, i_x*sizeof(float2), i_y, TexturGeometryIndexZ ); - } - - #ifdef debug_CudaSAFTAscanIndexKernelDataAccess - if (((int)SosVoxelFloat.x == DebugSosVoxelX) && ((int)SosVoxelFloat.y == DebugSosVoxelY) && ((int)SosVoxelFloat.z == DebugSosVoxelZ)) - printf(">>>>>>>> fillCuArrayKernel: fill outSurfRefAscanIndexFloat0 SOSVoxel [%3f %3f %3f] - ascanIndex_i(%i) - with (%f,%f) \n", SosVoxelFloat.x, SosVoxelFloat.y, SosVoxelFloat.z, ascanIndex_i, currentSOSVoxel_AscanIndexAttValues.x, currentSOSVoxel_AscanIndexAttValues.y); // In welche Speicherstelle wird geschrieben - #endif - - if ( TableAscanIndexAllocationCount < 2) continue; - - if (ATTMode_3DVolume == false){ // ========= 3DVolume Mode without ATT-Correction - surf3Dwrite(currentSOSVoxel_AscanIndexValues, outSurfRefAscanIndexFloat1, i_x*sizeof(float), i_y, TexturGeometryIndexZ ); - } - else if (ATTMode_3DVolume == true){ // ========= 3DVolume Mode with ATT-Correction - surf3Dwrite((float2)currentSOSVoxel_AscanIndexAttValues, outSurfRefAscanIndexFloat1, i_x*sizeof(float2), i_y, TexturGeometryIndexZ ); - } - #ifdef debug_CudaSAFTAscanIndexKernelDataAccess - if (((int)SosVoxelFloat.x == DebugSosVoxelX) && ((int)SosVoxelFloat.y == DebugSosVoxelY) && ((int)SosVoxelFloat.z == DebugSosVoxelZ)) - printf(">>>>>>>> fillCuArrayKernel: fill outSurfRefAscanIndexFloat1 SOSVoxel [%3f %3f %3f] - ascanIndex_i(%i) - with (%f,%f) \n", SosVoxelFloat.x, SosVoxelFloat.y, SosVoxelFloat.z, ascanIndex_i, currentSOSVoxel_AscanIndexAttValues.x, currentSOSVoxel_AscanIndexAttValues.y); // In welche Speicherstelle wird geschrieben - #endif - - if ( TableAscanIndexAllocationCount < 3) continue; - - if (ATTMode_3DVolume == false){ // ========= 3DVolume Mode without ATT-Correction - surf3Dwrite(currentSOSVoxel_AscanIndexValues, outSurfRefAscanIndexFloat2, i_x*sizeof(float), i_y, TexturGeometryIndexZ ); - } - else if (ATTMode_3DVolume == true){ // ========= 3DVolume Mode with ATT-Correction - surf3Dwrite((float2)currentSOSVoxel_AscanIndexAttValues, outSurfRefAscanIndexFloat2, i_x*sizeof(float2), i_y, TexturGeometryIndexZ ); - } - #ifdef debug_CudaSAFTAscanIndexKernelDataAccess - if (((int)SosVoxelFloat.x == DebugSosVoxelX) && ((int)SosVoxelFloat.y == DebugSosVoxelY) && ((int)SosVoxelFloat.z == DebugSosVoxelZ)) - printf(">>>>>>>> fillCuArrayKernel: fill outSurfRefAscanIndexFloat2 SOSVoxel [%3f %3f %3f] - ascanIndex_i(%i) - with (%f,%f) \n", SosVoxelFloat.x, SosVoxelFloat.y, SosVoxelFloat.z, ascanIndex_i, currentSOSVoxel_AscanIndexAttValues.x, currentSOSVoxel_AscanIndexAttValues.y); // In welche Speicherstelle wird geschrieben - #endif - - if ( TableAscanIndexAllocationCount < 4) continue; - - if (ATTMode_3DVolume == false){ // ========= 3DVolume Mode without ATT-Correction - surf3Dwrite(currentSOSVoxel_AscanIndexValues, outSurfRefAscanIndexFloat3, i_x*sizeof(float), i_y, TexturGeometryIndexZ ); - } - else if (ATTMode_3DVolume == true){ // ========= 3DVolume Mode with ATT-Correction - surf3Dwrite((float2)currentSOSVoxel_AscanIndexAttValues, outSurfRefAscanIndexFloat3, i_x*sizeof(float2), i_y, TexturGeometryIndexZ ); - } - #ifdef debug_CudaSAFTAscanIndexKernelDataAccess - if (((int)SosVoxelFloat.x == DebugSosVoxelX) && ((int)SosVoxelFloat.y == DebugSosVoxelY) && ((int)SosVoxelFloat.z == DebugSosVoxelZ)) - printf(">>>>>>>> fillCuArrayKernel: fill outSurfRefAscanIndexFloat3 SOSVoxel [%3f %3f %3f] - ascanIndex_i(%i) - with (%f,%f) \n", SosVoxelFloat.x, SosVoxelFloat.y, SosVoxelFloat.z, ascanIndex_i, currentSOSVoxel_AscanIndexAttValues.x, currentSOSVoxel_AscanIndexAttValues.y); // In welche Speicherstelle wird geschrieben - #endif - - } - - #ifdef debug_OutputFunctions - printf( "<== SAFTHandler::fillCuArrayKernel - End\n"); - #endif - -} - - - - - -void SAFTHandler::fillCuArray -( - float useValue, - cudaArray **deviceTextureAscanIndexFloatCuArray, ///< CuArray to fill - int TableAscanIndexAllocationCount -) -{ - #ifdef debug_OutputFunctions - printf( "==> SAFTHandler::fillCuArray - Start\n"); - #endif - - dim3 threadsPerBlock (SOSGrid_XYZ.x,1,1); // determine neccessary amount of threads // max. 512 oder 1024 - dim3 blocksPerGrid (1,1,1); // determine neccessary amount of blocks in grid // max. 65.535 - blocksPerGrid.x = SOSGrid_XYZ.y; - blocksPerGrid.y = maxFeasibleSosZLayerCount; - blocksPerGrid.z = 1; - - #ifdef debug_CudaFillCuArrayKernelProxy - printf("==================== SAFTHandler::fillCuArray ====================\n"); - //printf(" deviceListGeometry: %i (0=Em/1=Rec)\n", deviceListGeometry); - printf(" TableAscanIndexAllocationCount: %i\n", TableAscanIndexAllocationCount); // Amount of Textures due to limitation of 2048/4096 - - printf(" SOSGrid_XYZ x,y,z: [%i %i %i]\n", SOSGrid_XYZ.x, SOSGrid_XYZ.y, SOSGrid_XYZ.z); - printf(" threadsPerBlock x,y,z: [%i %i %i]\n", threadsPerBlock.x, threadsPerBlock.y, threadsPerBlock.z); - printf(" blocksPerGrid x,y,z: [%i %i %i]\n", blocksPerGrid.x, blocksPerGrid.y, blocksPerGrid.z); - - printf("=============================================================================\n"); - #endif - - - // Step 1. Bereite Output-Textur fuer AscanIndex vor - #ifdef SaftUseAscanIndexInterpolation - - if ( TableAscanIndexAllocationCount > 0){ - #ifdef debug_CudaFillCuArrayKernelProxy - printf( "cudaBindSurfaceToArray: deviceTextureAscanIndexFloatCuArray[0](%X)\n", deviceTextureAscanIndexFloatCuArray[0]); - #endif - cudaBindSurfaceToArray(outSurfRefAscanIndexFloat0, deviceTextureAscanIndexFloatCuArray[0]); - } - if ( TableAscanIndexAllocationCount > 1) { - #ifdef debug_CudaFillCuArrayKernelProxy - printf( "cudaBindSurfaceToArray: deviceTextureAscanIndexFloatCuArray[1](%X)\n", deviceTextureAscanIndexFloatCuArray[1]); - #endif - cudaBindSurfaceToArray(outSurfRefAscanIndexFloat1, deviceTextureAscanIndexFloatCuArray[1]); - } - if ( TableAscanIndexAllocationCount > 2){ - #ifdef debug_CudaFillCuArrayKernelProxy - printf( "cudaBindSurfaceToArray: deviceTextureAscanIndexFloatCuArray[2](%X)\n", deviceTextureAscanIndexFloatCuArray[2]); - #endif - cudaBindSurfaceToArray(outSurfRefAscanIndexFloat2, deviceTextureAscanIndexFloatCuArray[2]); - } - if ( TableAscanIndexAllocationCount > 3){ - #ifdef debug_CudaFillCuArrayKernelProxy - printf( "cudaBindSurfaceToArray: deviceTextureAscanIndexFloatCuArray[3](%X)\n", deviceTextureAscanIndexFloatCuArray[3]); - #endif - cudaBindSurfaceToArray(outSurfRefAscanIndexFloat3, deviceTextureAscanIndexFloatCuArray[3]); - } - - #endif - - - - // Step 2. Fuere Kernel aus mit #Threads: SOS.x*SOS.y . Innerhalb werden immer 1024/2048 A-Scans durchgegangen und in AscanIndex-Textur geschrieben - fillCuArrayKernel <<< blocksPerGrid, threadsPerBlock >>> - ( - useValue, - - deviceTextureAscanIndexFloatCuArray, ///< Out: Sum of SoS samples in the path from transducer to voxel. - - maxAscanIndexArraysInTexture, - TableAscanIndexAllocationCount, ///< Amount of Surfaces in the Array of cuArrays - maxFeasibleSosZLayerCount, - - ATTMode_3DVolume, - - debugMode, - debugModeParameter - ); - - CUDA_CHECK(cudaGetLastError()); - - - - #ifdef debug_OutputFunctions - printf( "<== SAFTHandler::precalculateAscanIndex - End\n"); - #endif -} - + float debugMode, float debugModeParameter); \ No newline at end of file diff --git a/SAFT_TOFI/src/kernel/rayTracing.cu b/SAFT_TOFI/src/kernel/rayTracing.cu new file mode 100644 index 0000000..1f01c6e --- /dev/null +++ b/SAFT_TOFI/src/kernel/rayTracing.cu @@ -0,0 +1,3 @@ +#include "rayTracing.cuh" +texture texRefSpeedOfSoundField; // Schritt 1. Textur anlegen //TODO: fuer Float2 fall rausnehmen +texture texRefSosAttField; // Schritt 1. Textur anlegen diff --git a/SAFT_TOFI/src/kernel/rayTracing.cuh b/SAFT_TOFI/src/kernel/rayTracing.cuh index d45d867..bc77275 100644 --- a/SAFT_TOFI/src/kernel/rayTracing.cuh +++ b/SAFT_TOFI/src/kernel/rayTracing.cuh @@ -1,676 +1,167 @@ -#include #include "saft.hpp" -// Structure of File -//------------------------------------------------------- -// -// __device__ __forceinline__ void determineSpeedOfSoundFieldVoxelFloat // Teilpfade (genutzt)// Bestimme den SOSVoxel der zu einer Position gehoert, mit Nachkommastelle -// -// __device__ __forceinline__ void processRayTracedVoxelTexture // Für den Fall SaftUseSosAttFloat1 erstmal lassen -// __device__ __forceinline__ void processRayTracedVoxelTextureSosAtt // AscanIndex // Addiere lokale SOS&ATT-Werte und # Voxel im Pfad -// -// Bresenham -// __device__ __forceinline__ void performRayTracedSpeedAdditionTexture // Teilpfade // Dreidimensionale Version des Bresenham Line Algorithmus im Float-Format +#define SQR(X) ((X) * (X)) -// ------------------------------------------------------- +extern texture texRefSpeedOfSoundField; // Schritt 1. Textur anlegen //TODO: fuer Float2 fall rausnehmen +extern texture texRefSosAttField; // Schritt 1. Textur anlegen -// printf() is only supported -// for devices of compute capability 2.0 and above - -#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ < 200) - #define printf(f, ...) ((void)(f, __VA_ARGS__),0) -#endif - -#define SQR(X) ((X)*(X)) +#define OutputSOSPositionX 20 // Volumen Voxel im SOS-Grid! +#define OutputSOSPositionY 20 +#define OutputSOSPositionZ 20 // Echte Z-SoS-Layer ohne Offset -//#ifdef debug_CudaRayTraceKernel - #define OutputSOSPositionX 20 // Volumen Voxel im SOS-Grid! - #define OutputSOSPositionY 20 - #define OutputSOSPositionZ 20 // Echte Z-SoS-Layer ohne Offset - -// #define ER_PositionX 4 // Emitter Receiver Position im SOS-Grid! -// #define ER_PositionY 37 -// #define ER_PositionZ 9 -//#endif - -#ifdef debug_CudaRayTraceKernelLive - #define OutputPositionX 250 // Volumen Voxel im Volumen! - #define OutputPositionY 250 - #define OutputPositionZ 0 - - #define DebugSosVoxelX 10 - #define DebugSosVoxelY 10 - #define DebugSosVoxelZ 10 - - #define ER_PositionX 0 // Emitter Receiver Position im SOS-Grid! - #define ER_PositionY 32 - #define ER_PositionZ 0 -#endif - -//Textur fuer SoSField anlegen - -#ifdef SaftUseSosAttFloat1 // Nutze getrennte Texturen fuer beide Volumen (Sos+Att) - texture texRefSpeedOfSoundField; // Schritt 1. Textur anlegen -#endif - -#ifdef SaftUseSosAttFloat2 // Nutze nur eine Textur fuer beide Volumen (Sos+Att) - texture texRefSpeedOfSoundField; // Schritt 1. Textur anlegen //TODO: fuer Float2 fall rausnehmen - texture texRefSosAttField; // Schritt 1. Textur anlegen -#endif - - - -/** - Determines the voxel within the speed of sound field associated with a position. - - Bestimme den Voxel in dem Schallgeschwindigkeitsfeld der zu einer Position gehoert, mit Nachkommastelle -*/ -__device__ __forceinline__ void determineSpeedOfSoundFieldVoxelFloat( // __forceinline__ zwingt den Compiler diesen Code bei jeden Aufruf direkt einzubinden (nicht als Funktion). - float3 const & position, ///< Position within the scanner. - float3 & voxel, ///< This argument is written to. Output voxel. - float3 & sosOffset, - float & SOS_RESOLUTION_FACTOR - ) +__device__ __forceinline__ void determineSpeedOfSoundFieldVoxelFloat( // __forceinline__ zwingt den Compiler diesen Code bei jeden Aufruf direkt einzubinden (nicht als Funktion). + float3 const &position, ///< Position within the scanner. + float3 &voxel, ///< This argument is written to. Output voxel. + float3 &sosOffset, float &SOS_RESOLUTION_FACTOR) { - - #ifndef SOS_Version2 - voxel.x = (position.x - sosOffset.x ) * SOS_RESOLUTION_FACTOR + 0.5f; // SoSVoxel aus Positionsdaten bestimmen - voxel.y = (position.y - sosOffset.y ) * SOS_RESOLUTION_FACTOR + 0.5f; - voxel.z = (position.z - sosOffset.z ) * SOS_RESOLUTION_FACTOR + 0.5f; - #else - voxel.x = (position.x - sosOffset.x ) * SOS_RESOLUTION_FACTOR; // SoSVoxel aus Positionsdaten bestimmen - voxel.y = (position.y - sosOffset.y ) * SOS_RESOLUTION_FACTOR; - voxel.z = (position.z - sosOffset.z ) * SOS_RESOLUTION_FACTOR; - #endif - + voxel.x = (position.x - sosOffset.x) * SOS_RESOLUTION_FACTOR; // SoSVoxel aus Positionsdaten bestimmen + voxel.y = (position.y - sosOffset.y) * SOS_RESOLUTION_FACTOR; + voxel.z = (position.z - sosOffset.z) * SOS_RESOLUTION_FACTOR; } - - - /** Process a voxel in the Bresenham algorithm. Float Format Accumulate speed of sound samples and keep track of the number of voxels in the path. - Verarbeite einen Voxel in dem Bresenham Algorithmus - Addiere Schallgeschwindigkeits-Sample */ -__device__ __forceinline__ void processRayTracedVoxelTexture( - float const * currentVoxelFloat, ///< Bresenham speed of sound voxel float. - int & voxelCount, ///< This argument is written to. Number of voxels in the path so far. - float & totalSpeed, ///< This argument is written to. Speed of sound sample accumulator. - //float const * speedOfSoundField,///< Speed of sound field data containing samples. - cudaArray *deviceSpeedOfSoundFieldCuArray, ///< Pointer to cudaArray for SoSFieldData - int3 const & SOSGrid_XYZ ///< Size of SOS-Grid in XYZ - ) +__device__ __forceinline__ void processRayTracedVoxelTexture(float const *currentVoxelFloat, ///< Bresenham speed of sound voxel float. + int &voxelCount, ///< This argument is written to. Number of voxels in the path so far. + float &totalSpeed, ///< This argument is written to. Speed of sound sample accumulator. + // float const * speedOfSoundField,///< Speed of sound field data containing samples. + cudaArray *deviceSpeedOfSoundFieldCuArray, ///< Pointer to cudaArray for SoSFieldData + int3 const &SOSGrid_XYZ ///< Size of SOS-Grid in XYZ +) { - #ifdef SaftUseHarmonicMean - totalSpeed += 1/(float)tex3D( texRefSpeedOfSoundField, currentVoxelFloat[0] + 0.5f, currentVoxelFloat[1] + 0.5f, currentVoxelFloat[2] + 0.5f); - #endif - - #ifdef debug_CudaRayTraceKernel - if ((currentVoxelFloat[0] == DebugSosVoxelX) && (currentVoxelFloat[1] == DebugSosVoxelY) && (currentVoxelFloat[2] == DebugSosVoxelZ)) - { - printf("pSOS currentVoxel [%d %d %d]\n", currentVoxelFloat[0], currentVoxelFloat[1], currentVoxelFloat[2]); - //printf(" speedOfSoundSample = %2.10f\n", speedOfSoundSample); - printf(" totalSpeed = %2.10f\n", totalSpeed); - //printf(" totalAttenuation = %2.10f\n", totalAttenuation); - printf(" voxelCount = %d\n", voxelCount); - } - #endif + totalSpeed += 1 / (float)tex3D(texRefSpeedOfSoundField, currentVoxelFloat[0] + 0.5f, currentVoxelFloat[1] + 0.5f, currentVoxelFloat[2] + 0.5f); } - -/////////////////////////////////////////////////////////////////////////////////////////// -// AscanIndex - Variante -// kernel version Z.316 -/////////////////////////////////////////////////////////////////////////////////////////// -/////////////////////////////////////////////////////////////////////////////////////////// -/** - Process a voxel in the Bresenham algorithm. Float Format - Accumulate speed of sound samples and keep track of the number of voxels in the path. - - Addiere lokale Schallgeschwindigkeits-Werte und Anzahl besuchter Voxel im Pfad - */ -// Aufruf: processRayTracedVoxelTextureSosAtt(currentVoxelFloat, voxelCount, totalSpeed, totalAttenuation, deviceSosAttFieldCuArray, SOSGrid_XYZ); - __device__ __forceinline__ void processRayTracedVoxelTextureSosAtt( - float const * currentVoxelFloat, ///< SOS-coordinates of current Voxel for Bresenham in float[3]. - float & voxelCount, ///< This argument is written to. Number of voxels in the path so far. // TODO: not necessary here to sum up because it is already known from Bresenham length - float & totalSpeed, ///< This argument is written to. Sum Up total SOS - float & totalAttenuation, ///< This argument is written to. Sum Up total Attenuation - //float const * speedOfSoundField, ///< Speed of sound field data containing samples. - cudaArray *deviceSosAttFieldCuArray, ///< Pointer to cudaArray for SoSATTFieldData // TODO: not necessary if access is used with texture memory - int3 const & SOSGrid_XYZ ///< Size of SOS-Grid in XYZ // TODO: only necessary for boundary-check. - ) + float const *currentVoxelFloat, ///< SOS-coordinates of current Voxel for Bresenham in float[3]. + float &voxelCount, ///< This argument is written to. Number of voxels in the path so far. // TODO: not necessary here to sum up because it is already known from Bresenham length + float &totalSpeed, ///< This argument is written to. Sum Up total SOS + float &totalAttenuation, ///< This argument is written to. Sum Up total Attenuation + cudaArray *deviceSosAttFieldCuArray, ///< Pointer to cudaArray for SoSATTFieldData // TODO: not necessary if access is used with texture memory + int3 const &SOSGrid_XYZ ///< Size of SOS-Grid in XYZ // TODO: only necessary for boundary-check. +) { - float2 SosAttValue; - - #ifdef SaftUseHarmonicMean - //totalSpeed += 1/speedOfSoundSample; // harmonisches Mittel -// #ifdef SaftTextureForBresenhamInterpolated -// #ifndef SOS_Version2 -// totalSpeed += 1/(float)tex3D( texRefSpeedOfSoundField, currentVoxelFloat[0] + 0.5f, currentVoxelFloat[1] + 0.5f, currentVoxelFloat[2] + 0.5f ); -// #else -// totalSpeed += 1/(float)tex3D( texRefSpeedOfSoundField, currentVoxelFloat[0] + 0.5f, currentVoxelFloat[1] + 0.5f, currentVoxelFloat[2] + 0.5f ); -// #endif -// #else - #ifndef SOS_Version2 - totalSpeed += 1/(float)tex3D( texRefSpeedOfSoundField, currentVoxelFloat[0], currentVoxelFloat[1], currentVoxelFloat[2] ); - #else - //totalSpeed += 1/tex3D( texRefSpeedOfSoundField, currentVoxelFloat[0] + 0.5f, currentVoxelFloat[1] + 0.5f, currentVoxelFloat[2] + 0.5f); - //totalAttenuation += tex3D( texRefAbsorptionField, currentVoxelFloat[0] + 0.5f, currentVoxelFloat[1] + 0.5f, currentVoxelFloat[2] + 0.5f); - - - #ifdef SaftUseSosAttFloat1 // Nutze getrennte Texturen fuer beide Volumen (Sos+Att) - totalSpeed += 1/tex3D( texRefSpeedOfSoundField, currentVoxelFloat[0] + 0.5f, currentVoxelFloat[1] + 0.5f, currentVoxelFloat[2] + 0.5f); - //totalAttenuation += tex3D( texRefAbsorptionField, currentVoxelFloat[0] + 0.5f, currentVoxelFloat[1] + 0.5f, currentVoxelFloat[2] + 0.5f); - #endif - #ifdef SaftUseSosAttFloat2 // Nutze nur eine Textur fuer beide Volumen (Sos+Att) -// if ((currentVoxelFloat[0] == DebugSosVoxelX) && (currentVoxelFloat[1] == DebugSosVoxelY) && (currentVoxelFloat[2] == DebugSosVoxelZ)) -// { -// //printf("pSOSATT pathPoint [%f %f %f]\n", currentVoxelFloat[0], currentVoxelFloat[1], currentVoxelFloat[2]); -// //printf(" speedOfSoundSample = %2.10f\n", speedOfSoundSample); -// printf(". totalSpeed = %2.10f\n", totalSpeed); -// printf(". totalAttenuation = %2.10f\n", totalAttenuation); -// printf(". voxelCount = %d\n", voxelCount); -// } - SosAttValue = tex3D( texRefSosAttField, currentVoxelFloat[0] + 0.5f, currentVoxelFloat[1] + 0.5f, currentVoxelFloat[2] + 0.5f); - totalSpeed += 1/SosAttValue.x; - totalAttenuation += SosAttValue.y; - - - //if (SosAttValue.y > 20) - // printf("!!!!!!!!!!!!!!!!! pSOSATT pathPoint [%f %f %f] = %f\n", currentVoxelFloat[0], currentVoxelFloat[1], currentVoxelFloat[2], SosAttValue.y); - - -// if ((currentVoxelFloat[0] == DebugSosVoxelX) && (currentVoxelFloat[1] == DebugSosVoxelY) && (currentVoxelFloat[2] == DebugSosVoxelZ)) -// { -// //printf("pSOSATT pathPoint [%f %f %f]\n", currentVoxelFloat[0], currentVoxelFloat[1], currentVoxelFloat[2]); -// //printf(" speedOfSoundSample = %2.10f\n", speedOfSoundSample); -// printf(".. totalSpeed = %2.10f\n", totalSpeed); -// printf(".. totalAttenuation = %2.10f\n", totalAttenuation); -// printf(".. voxelCount = %d\n", voxelCount); -// } - #endif - - - #endif -// #endif - #endif - - - #ifdef debug_CudaRayTraceKernel - if ((currentVoxelFloat[0] == DebugSosVoxelX) && (currentVoxelFloat[1] == DebugSosVoxelY) && (currentVoxelFloat[2] == DebugSosVoxelZ)) - { - printf("pSOSATT currentVoxel [%f %f %f]\n", currentVoxelFloat[0], currentVoxelFloat[1], currentVoxelFloat[2]); - //printf(" speedOfSoundSample = %2.10f\n", speedOfSoundSample); - printf(" totalSpeed = %2.10f\n", totalSpeed); - printf(" totalAttenuation = %2.10f\n", totalAttenuation); - printf(" voxelCount = %d\n", voxelCount); - } - #endif - + float2 SosAttValue; + SosAttValue = tex3D(texRefSosAttField, currentVoxelFloat[0] + 0.5f, currentVoxelFloat[1] + 0.5f, currentVoxelFloat[2] + 0.5f); + totalSpeed += 1 / SosAttValue.x; + totalAttenuation += SosAttValue.y; } +__device__ __forceinline__ void performRayTracedSpeedAdditionTexture(float &voxelCount, ///< This argument is written to. Number of voxels within the path traced. + float &totalSpeed, ///< This argument is written to. Sum of the speed of sound samples in the path traced. + float &totalAttenuation, + float3 const &point1f, ///< Vector array describing the Voxelcoordinates of emitters or receivers. + dim3 const &point2, ///< Vector array describing the Voxelcoordinates of Voxels. -/////////////////////////////////////////////////////////////////////////////////////////// -// aktuell genutze Variante fuer Teilpfade -/////////////////////////////////////////////////////////////////////////////////////////// + cudaArray *deviceSpeedOfSoundFieldCuArray, ///< CuArray fuer SOSFieldTextur -/** - - Dreidimensionale Version des Bresenham Line Algorithmus im Float-Format -*/ -__device__ __forceinline__ void performRayTracedSpeedAdditionTexture( - float & voxelCount, ///< This argument is written to. Number of voxels within the path traced. - -#ifdef SaftUseSosAttFloat1 // Nutze getrennte Texturen fuer beide Volumen (Sos+Att) - float & totalSpeed, ///< This argument is written to. Sum of the speed of sound samples in the path traced. -#endif -#ifdef SaftUseSosAttFloat2 // Nutze nur eine Textur fuer beide Volumen (Sos+Att) - float & totalSpeed, ///< This argument is written to. Sum of the speed of sound samples in the path traced. - float & totalAttenuation, -#endif - - float3 const & point1f, ///< Vector array describing the Voxelcoordinates of emitters or receivers. - dim3 const & point2, ///< Vector array describing the Voxelcoordinates of Voxels. - -#ifndef SaftTextureForBresenhamSosPaths - float const * deviceSpeedOfSoundField, ///< Array of speed of sound samples. Dimensions ordered by speed of indices, commencing with the fastest moving one: 1. x 2. y 3. z -#else - cudaArray *deviceSpeedOfSoundFieldCuArray, ///< CuArray fuer SOSFieldTextur -#endif - - int3 const & SOSGrid_XYZ, ///< Size of SOS-Grid in XYZ - float3 & sosOffset, - float & SOS_RESOLUTION, - float & IMAGE_RESOLUTION, - float3 regionOfInterestOffset, - - #ifdef SaftUseConstantMemforGeometry - int geometry ///< emitters=0 or receivers=1. - #else - float3 const * geometry ///< Vector array describing the positions of emitters or receivers. - #endif - ) + int3 const &SOSGrid_XYZ, ///< Size of SOS-Grid in XYZ + float3 &sosOffset, float &SOS_RESOLUTION, float &IMAGE_RESOLUTION, float3 regionOfInterestOffset, + int geometry ///< emitters=0 or receivers=1. +) { + voxelCount = 0.0f; + totalSpeed = 0.0f; + totalAttenuation = 0.0f; - voxelCount = 0.0f; - totalSpeed = 0.0f; - totalAttenuation = 0.0f; + // Voxel-Koordinaten ebenfalls in float umwandeln + float voxel1f[3] = {point1f.x, point1f.y, point1f.z}; // Point1 liegt im float3 Format vor enthaellt schon +0.5 - // Voxel-Koordinaten ebenfalls in float umwandeln - float voxel1f[3]= {point1f.x,point1f.y,point1f.z}; //Point1 liegt im float3 Format vor enthaellt schon +0.5 + // float VoxelIncrement = IMAGE_RESOLUTION/SOS_RESOLUTION; + float voxel2f[3]; - //float VoxelIncrement = IMAGE_RESOLUTION/SOS_RESOLUTION; - float voxel2f[3]; + voxel2f[0] = (float)point2.x; // + 0.5f; + voxel2f[1] = (float)point2.y; // + 0.5f; + voxel2f[2] = (float)point2.z; // + 0.5f; - #ifndef SOS_Version2 - voxel2f[0] = (regionOfInterestOffset.x - sosOffset.x ) / SOS_RESOLUTION + 0.5f; // Start des Bildes im SOS-Grid aus Positionsdaten bestimmen - voxel2f[1] = (regionOfInterestOffset.y - sosOffset.y ) / SOS_RESOLUTION + 0.5f; - voxel2f[2] = (regionOfInterestOffset.z - sosOffset.z ) / SOS_RESOLUTION + 0.5f; - #endif + int greatestDistanceDim = 0; // Gibt die Richtung(X,Y oder Z) an mit der grueueten Entfernung + int slowDim1 = 0; // Gibt die Richtung(X,Y oder Z) der langsamen Richtung; + int slowDim2 = 0; // Gibt die Richtung(X,Y oder Z) der langsamen Richtung; + float greatestDistance_XYZ = 0.0; // Grueuete Distanz in die Richtung mit der grueueten Entfernung + int fastDirectionSteps = 0; // Schritte die gegangen werden. + float m_XYZ[3]; + float pathPoint[3]; + if ((abs(voxel1f[0] - voxel2f[0]) <= abs(voxel1f[2] - voxel2f[2]))) // X Z + slowDim1 = 0; + slowDim2 = 1; + } + else if ((abs(voxel1f[2] - voxel2f[2]) <= abs(voxel1f[1] - voxel2f[1]))) // Z Y + slowDim1 = 0; + slowDim2 = 2; + } + else // Z=Y + { + greatestDistanceDim = 2; // => Z + slowDim1 = 0; + slowDim2 = 1; + } + else // X>Z + if ((abs(voxel1f[1] - voxel2f[1]) <= abs(voxel1f[0] - voxel2f[0]))) // Y X + slowDim1 = 1; + slowDim2 = 2; + } + else // Y>X + { + greatestDistanceDim = 1; // => Y + slowDim1 = 0; + slowDim2 = 2; + } - #ifdef debug_CudaRayTraceKernel - if ((point2.x == OutputSOSPositionX) && (point2.y == OutputSOSPositionY) && (point2.z == OutputSOSPositionZ)) // Nur von VolumeOutput abhuengig - // if ((point2.x == OutputPositionX) && (point2.y == OutputPositionY) && (point2.z == OutputPositionZ) && (voxel1.x == ER_PositionX) && (voxel1.y == ER_PositionY) && (voxel1.z == ER_PositionZ)) // Auch von ER-Position - { -// printf("-> performRayTracedSpeedAdditionTexture\n"); -// printf(" point2 (Vol Pos im SoS) [%d %d %d]\n", point2.x , point2.y , point2.z); -// printf(" voxel1 (E/R Pos im SoS) [%13.6f %13.6f %13.6f]\n", voxel1f[0] , voxel1f[1] , voxel1f[2] ); // Voxel 1 = ER Position - printf(" voxel1f = [%3f %3f %3f]\n", voxel1f[0],voxel1f[1],voxel1f[2]); - printf(" voxel1f = [%3i %3i %3i]\n", (int)voxel1f[0],(int)voxel1f[1],(int)voxel1f[2]); -// printf(" regionOfInterestOffset [%f %f %f]\n", regionOfInterestOffset.x , regionOfInterestOffset.y , regionOfInterestOffset.z ); // Voxel 2 = Volumen Position -// printf(" sosOffset [%f %f %f]\n", sosOffset.x , sosOffset.y , sosOffset.z ); -// printf(" SOS_RESOLUTION = %12.8f\n", SOS_RESOLUTION ); -// printf(" VoxelIncrement = %12.8f\n", VoxelIncrement ); -// //printf(" -> voxel2Start (Vol Pos SoS) [%f %f %f]\n", voxel2f[0] , voxel2f[1] , voxel2f[2] ); // Voxel 2 Start -> wird noch angepasst - } + // ist Steigung negativ? & Bestimmen der fastDirectionSteps vom diskretisierten Startpunkt zum Endpunkt bzw. Voxel + if (voxel1f[greatestDistanceDim] >= voxel2f[greatestDistanceDim]) // voxel2f > voxel1f Endpukt > Startpkt -> Steigung positiv + { + fastDirectionSteps = floor(voxel1f[greatestDistanceDim] + 0.5f) - floor(voxel2f[greatestDistanceDim] + 0.5f) + 1; + pathPoint[greatestDistanceDim] = voxel2f[greatestDistanceDim]; + pathPoint[slowDim1] = voxel2f[slowDim1]; + pathPoint[slowDim2] = voxel2f[slowDim2]; + } + else // voxel2f < voxel1f Endpukt < Startpkt -> Steigung negativ + { + fastDirectionSteps = floor(voxel2f[greatestDistanceDim] + 0.5f) - floor(voxel1f[greatestDistanceDim] + 0.5f) + 1; + pathPoint[greatestDistanceDim] = voxel1f[greatestDistanceDim]; + pathPoint[slowDim1] = voxel1f[slowDim1]; + pathPoint[slowDim2] = voxel1f[slowDim2]; + } - #endif + // Steigung bestimmen + // Auf Gesamtentfernung bezogene Steigung in jede Richtung (Float) + greatestDistance_XYZ = ((float)voxel2f[greatestDistanceDim] - (float)voxel1f[greatestDistanceDim]); // Groessten Abstand in XY oder Z-Richtung ermitteln (Float) + m_XYZ[greatestDistanceDim] = 1.0f; // Wegen Rundungsfehler bei der Division, die auftreten koennen. + // m_XYZ[greatestDistanceDim] = (voxel2f[greatestDistanceDim] - voxel1f[greatestDistanceDim]) / greatestDistance_XYZ; + m_XYZ[slowDim1] = (voxel2f[slowDim1] - voxel1f[slowDim1]) / greatestDistance_XYZ; + m_XYZ[slowDim2] = (voxel2f[slowDim2] - voxel1f[slowDim2]) / greatestDistance_XYZ; - #ifndef SOS_Version2 - // Herausfinden wo genau der Voxel anfaengt/die Koordinaten des Voxels - voxel2f[0] = (float)point2.x + VoxelIncrement - fmod(((float)point2.x-voxel2f[0]), VoxelIncrement); - voxel2f[1] = (float)point2.y + VoxelIncrement - fmod(((float)point2.y-voxel2f[1]), VoxelIncrement); - voxel2f[2] = (float)point2.z + VoxelIncrement - fmod(((float)point2.z-voxel2f[2]), VoxelIncrement); - #else - //Vom Mittelpunkt ausgehen - voxel2f[0] = (float)point2.x;// + 0.5f; - voxel2f[1] = (float)point2.y;// + 0.5f; - voxel2f[2] = (float)point2.z;// + 0.5f; - #endif + int j = 0; + for (j = fastDirectionSteps; j > 0; j--) //(Alle Punkte innerhalb der Schleife berechnen) + { + processRayTracedVoxelTextureSosAtt(pathPoint, voxelCount, totalSpeed, totalAttenuation, deviceSpeedOfSoundFieldCuArray, SOSGrid_XYZ); + pathPoint[greatestDistanceDim] = pathPoint[greatestDistanceDim] + m_XYZ[greatestDistanceDim]; + pathPoint[slowDim1] = pathPoint[slowDim1] + m_XYZ[slowDim1]; + pathPoint[slowDim2] = pathPoint[slowDim2] + m_XYZ[slowDim2]; + } - - #ifdef debug_CudaRayTraceKernel - - if ((point2.x == OutputSOSPositionX) && (point2.y == OutputSOSPositionY) && (point2.z == OutputSOSPositionZ)) // Nur von VolumeOutput abhuengig - // if ((point2.x == OutputPositionX) && (point2.y == OutputPositionY) && (point2.z == OutputPositionZ) && (voxel1.x == ER_PositionX) && (voxel1.y == ER_PositionY) && (voxel1.z == ER_PositionZ)) // Auch von ER-Position - { - printf(" -> voxel2Angepasst (Vol Pos SoS) [%13.6f %13.6f %13.6f]\n", voxel2f[0] , voxel2f[1] , voxel2f[2] ); // Voxel 2 nach anpassung - //printf("-> performRayTracedSpeedAdditionTexture\n"); -// printf(" voxel1 (E/R Pos im SoS) [%f %f %f]\n", voxel1f[0] , voxel1f[1] , voxel1f[2] ); // Voxel 1 = ER Position -// printf(" voxel2 (Vol Pos im SoS) [%f %f %f]\n", voxel2f[0] , voxel2f[1] , voxel2f[2] ); // Voxel 2 = Volumen Position -// printf(" regionOfInterestOffset [%f %f %f]\n", regionOfInterestOffset.x , regionOfInterestOffset.y , regionOfInterestOffset.z ); // Voxel 2 = Volumen Position -// printf(" sosOffset [%f %f %f]\n", sosOffset.x , sosOffset.y , sosOffset.z ); -// printf(" SOS_RESOLUTION = %f\n", SOS_RESOLUTION ); -// printf(" VoxelIncrement = %f\n", VoxelIncrement ); -// printf(" point2 (Vol Pos im SoS) [%f %f %f]\n", point2.x , point2.y , point2.z); - - } - - #endif - - int greatestDistanceDim = 0; // Gibt die Richtung(X,Y oder Z) an mit der grueueten Entfernung - int slowDim1 = 0; // Gibt die Richtung(X,Y oder Z) der langsamen Richtung; - int slowDim2 = 0; // Gibt die Richtung(X,Y oder Z) der langsamen Richtung; - float greatestDistance_XYZ = 0.0; // Grueuete Distanz in die Richtung mit der grueueten Entfernung - #ifdef debug_CudaRayTraceKernel - bool m_positiv = 0; // Steigung positiv? - #endif - int fastDirectionSteps = 0; // Schritte die gegangen werden. - - #if defined(debug_CudaRayTraceKernel) || (! defined(SOS_Version2)) - float distance_XYZ[3]; - float mFastDirectionSteps_XYZ[3]; - float mDistance_XYZ[3]; - #endif - float m_XYZ[3]; - float pathPoint[3]; - #ifdef SOS_Version3 - float endPoint[3]; - #endif - - // Welcher Abstand in den Dimensionen XYZ ist der groesste? Um Richtung zu bestimmen - #ifndef SOS_Version2 - if ( (abs(voxel1f[0]-voxel2f[0]) <= abs(voxel1f[2]-voxel2f[2]))) // X Z - else if ( (abs(voxel1f[2]-voxel2f[2]) <= abs(voxel1f[1]-voxel2f[1]))) // Z Y - else // Z=Y - greatestDistanceDim = 2; // => Z - else // X>Z - if ( (abs(voxel1f[1]-voxel2f[1]) <= abs(voxel1f[0]-voxel2f[0]))) // Y X - else // Y>X - greatestDistanceDim = 1; // => Y - #else - if ( (abs(voxel1f[0]-voxel2f[0]) <= abs(voxel1f[2]-voxel2f[2]))) // X Z - slowDim1 = 0; - slowDim2 = 1; - } - else if ( (abs(voxel1f[2]-voxel2f[2]) <= abs(voxel1f[1]-voxel2f[1]))) // Z Y - slowDim1 = 0; - slowDim2 = 2; - } - else // Z=Y - { - greatestDistanceDim = 2; // => Z - slowDim1 = 0; - slowDim2 = 1; - } - else // X>Z - if ( (abs(voxel1f[1]-voxel2f[1]) <= abs(voxel1f[0]-voxel2f[0]))) // Y X - slowDim1 = 1; - slowDim2 = 2; - } - else // Y>X - { - greatestDistanceDim = 1; // => Y - slowDim1 = 0; - slowDim2 = 2; - } - #endif - - #ifndef SOS_Version2 - #ifdef debug_CudaRayTraceKernel - greatestDistance_XYZ=((float)voxel2f[greatestDistanceDim] - (float)voxel1f[greatestDistanceDim]); // Groessten Abstand in XY oder Z-Richtung ermitteln (Float) - - distance_XYZ[0] = (voxel2f[0] - voxel1f[0]); // Abstand in X-Richtung - distance_XYZ[1] = (voxel2f[1] - voxel1f[1]); // Abstand in Y-Richtung - distance_XYZ[2] = (voxel2f[2] - voxel1f[2]); // Abstand in Z-Richtung - #endif - - pathPoint[0] = voxel1f[0]; // Voxel1f bzw. Emitter als Startpunkt nutzen - pathPoint[1] = voxel1f[1]; - pathPoint[2] = voxel1f[2]; - - #ifdef debug_CudaRayTraceKernel - - if ((point2.x == OutputSOSPositionX) && (point2.y == OutputSOSPositionY) && (point2.z == OutputSOSPositionZ)) // Nur von VolumeOutput abhuengig - // if ((point2.x == OutputPositionX) && (point2.y == OutputPositionY) && (point2.z == OutputPositionZ) && (voxel1.x == ER_PositionX) && (voxel1.y == ER_PositionY) && (voxel1.z == ER_PositionZ)) // Auch von ER-Position - { - // Ausgabe der berechneten Werte - printf(" \n"); - printf(" voxel1f = [%3f %3f %3f]\n", voxel1f[0],voxel1f[1],voxel1f[2]); - printf(" voxel2f = [%3f %3f %3f]\n", voxel2f[0],voxel2f[1],voxel2f[2]); - printf(" greatestDistanceDim(X0 Y1 Z2) = %i\n", greatestDistanceDim); - printf(" greatestDistance_XYZ = %f\n", greatestDistance_XYZ); - printf(" distance_XYZ = [%3f %3f %3f]\n", distance_XYZ[0],distance_XYZ[1],distance_XYZ[2]); - //printf(" m_positiv = %i\n", m_positiv); - //printf(" fastDirectionSteps = %i\n", fastDirectionSteps); // Fehler wenn negativ! - printf(" pathPoint = [%3f %3f %3f]\n", pathPoint[0],pathPoint[1],pathPoint[2]); - } - - #endif - - #ifdef debug_CudaRayTraceKernel - // Steigung bestimmen - // Auf Gesamtentfernung bezogene Steigung in jede Richtung (Float) - mDistance_XYZ[0] = distance_XYZ[0] / greatestDistance_XYZ; - mDistance_XYZ[1] = distance_XYZ[1] / greatestDistance_XYZ; - mDistance_XYZ[2] = distance_XYZ[2] / greatestDistance_XYZ; - #endif - - // ist Steigung negativ? & Bestimmen der fastDirectionSteps vom diskretisierten Startpunkt zum Endpunkt bzw. Voxel - if (voxel2f[greatestDistanceDim] >= pathPoint[greatestDistanceDim]) // voxel2f > voxel1f Endpukt > Startpkt -> Steigung positiv - { - #ifdef debug_CudaRayTraceKernel - m_positiv = 1; - #endif - fastDirectionSteps = floor(voxel2f[greatestDistanceDim] ) - floor(pathPoint[greatestDistanceDim] ) + 1; - } - else // voxel2f < voxel1f Endpukt < Startpkt -> Steigung negativ - { - #ifdef debug_CudaRayTraceKernel - m_positiv = 0; - #endif - fastDirectionSteps = floor(pathPoint[greatestDistanceDim] ) - floor(voxel2f[greatestDistanceDim] ) + 1; - } - - // Steigung bezogen auf die Anzahl der noetigen ganzen Schritte - mFastDirectionSteps_XYZ[0] = distance_XYZ[0]/(float)(fastDirectionSteps-1); - mFastDirectionSteps_XYZ[1] = distance_XYZ[1]/(float)(fastDirectionSteps-1); - mFastDirectionSteps_XYZ[2] = distance_XYZ[2]/(float)(fastDirectionSteps-1); - - m_XYZ[0] = mFastDirectionSteps_XYZ[0] ; // Steigung bezogen auf die Anzahl der nuetigen ganzen Schritte in jede Richtung nutzen - m_XYZ[1] = mFastDirectionSteps_XYZ[1] ; - m_XYZ[2] = mFastDirectionSteps_XYZ[2] ; - #else - - // ist Steigung negativ? & Bestimmen der fastDirectionSteps vom diskretisierten Startpunkt zum Endpunkt bzw. Voxel - if (voxel1f[greatestDistanceDim] >= voxel2f[greatestDistanceDim]) // voxel2f > voxel1f Endpukt > Startpkt -> Steigung positiv - { - fastDirectionSteps = floor(voxel1f[greatestDistanceDim] + 0.5f ) - floor(voxel2f[greatestDistanceDim] + 0.5f ) + 1; - - pathPoint[greatestDistanceDim] = voxel2f[greatestDistanceDim]; - pathPoint[slowDim1] = voxel2f[slowDim1]; - pathPoint[slowDim2] = voxel2f[slowDim2]; - - #ifdef SOS_Version3 - endPoint[0] = voxel2f[0]; - endPoint[1] = voxel1f[1]; - endPoint[2] = voxel1f[2]; - #endif - } - else // voxel2f < voxel1f Endpukt < Startpkt -> Steigung negativ - { - fastDirectionSteps = floor(voxel2f[greatestDistanceDim] + 0.5f ) - floor(voxel1f[greatestDistanceDim] + 0.5f ) + 1; - - pathPoint[greatestDistanceDim] = voxel1f[greatestDistanceDim]; - pathPoint[slowDim1] = voxel1f[slowDim1]; - pathPoint[slowDim2] = voxel1f[slowDim2]; - - #ifdef SOS_Version3 - endPoint[0] = voxel2f[0]; - endPoint[1] = voxel2f[1]; - endPoint[2] = voxel2f[2]; - #endif - } - - - #ifdef debug_CudaRayTraceKernel - greatestDistance_XYZ=((float)voxel2f[greatestDistanceDim] - (float)voxel1f[greatestDistanceDim]); // Groessten Abstand in XY oder Z-Richtung ermitteln (Float) - - distance_XYZ[0] = (voxel2f[0] - voxel1f[0]); // Abstand in X-Richtung - distance_XYZ[1] = (voxel2f[1] - voxel1f[1]); // Abstand in Y-Richtung - distance_XYZ[2] = (voxel2f[2] - voxel1f[2]); // Abstand in Z-Richtung - - if ((point2.x == OutputSOSPositionX) && (point2.y == OutputSOSPositionY) && (point2.z == OutputSOSPositionZ)) // Nur von VolumeOutput abhuengig - // if ((point2.x == OutputPositionX) && (point2.y == OutputPositionY) && (point2.z == OutputPositionZ) && (voxel1.x == ER_PositionX) && (voxel1.y == ER_PositionY) && (voxel1.z == ER_PositionZ)) // Auch von ER-Position - { - // Ausgabe der berechneten Werte - printf(" \n"); - printf(" voxel1f = [%3f %3f %3f]\n", voxel1f[0],voxel1f[1],voxel1f[2]); - printf(" voxel2f = [%3f %3f %3f]\n", voxel2f[0],voxel2f[1],voxel2f[2]); - printf(" greatestDistanceDim(X0 Y1 Z2) = %i\n", greatestDistanceDim); - printf(" greatestDistance_XYZ = %f\n", greatestDistance_XYZ); - printf(" distance_XYZ = [%3f %3f %3f]\n", distance_XYZ[0],distance_XYZ[1],distance_XYZ[2]); - //printf(" m_positiv = %i\n", m_positiv); - //printf(" fastDirectionSteps = %i\n", fastDirectionSteps); // Fehler wenn negativ! - printf(" pathPoint = [%3f %3f %3f]\n", pathPoint[0],pathPoint[1],pathPoint[2]); - } - - // Steigung bestimmen - // Auf Gesamtentfernung bezogene Steigung in jede Richtung (Float) - mDistance_XYZ[0] = distance_XYZ[0] / greatestDistance_XYZ; - mDistance_XYZ[1] = distance_XYZ[1] / greatestDistance_XYZ; - mDistance_XYZ[2] = distance_XYZ[2] / greatestDistance_XYZ; - #endif - - // Steigung bestimmen - // Auf Gesamtentfernung bezogene Steigung in jede Richtung (Float) - greatestDistance_XYZ=((float)voxel2f[greatestDistanceDim] - (float)voxel1f[greatestDistanceDim]); // Groessten Abstand in XY oder Z-Richtung ermitteln (Float) - m_XYZ[greatestDistanceDim] = 1.0f; // Wegen Rundungsfehler bei der Division, die auftreten koennen. - //m_XYZ[greatestDistanceDim] = (voxel2f[greatestDistanceDim] - voxel1f[greatestDistanceDim]) / greatestDistance_XYZ; - m_XYZ[slowDim1] = (voxel2f[slowDim1] - voxel1f[slowDim1]) / greatestDistance_XYZ; - m_XYZ[slowDim2] = (voxel2f[slowDim2] - voxel1f[slowDim2]) / greatestDistance_XYZ; - #endif - - - #ifdef debug_CudaRayTraceKernel - - if ((point2.x == OutputSOSPositionX) && (point2.y == OutputSOSPositionY) && (point2.z == OutputSOSPositionZ)) // Nur von VolumeOutput abhuengig - // if ((point2.x == OutputPositionX) && (point2.y == OutputPositionY) && (point2.z == OutputPositionZ) && (voxel1.x == ER_PositionX) && (voxel1.y == ER_PositionY) && (voxel1.z == ER_PositionZ)) // Auch von ER-Position - { - - // Ausgabe der berechneten Werte - //printf(" greatestDistance_XYZ = %f\n", greatestDistance_XYZ); - //printf(" distance_XYZ = [%3f %3f %3f]\n", distance_XYZ[0],distance_XYZ[1],distance_XYZ[2]); - //printf(" m_positiv = %i\n", m_positiv); - printf(" fastDirectionSteps -1 = %i\n", (fastDirectionSteps-1)); - printf(" pathPoint = [%3f %3f %3f]\n", pathPoint[0],pathPoint[1],pathPoint[2]); - printf(" mFastDirectionSteps_XYZ = [%3f %3f %3f]\n", mFastDirectionSteps_XYZ[0],mFastDirectionSteps_XYZ[1],mFastDirectionSteps_XYZ[2]); - printf(" m_XYZ = [%3f %3f %3f]\n", m_XYZ[0],m_XYZ[1],m_XYZ[2]); - - // Punkte auf Pfad ablaufen -// printf(" \n"); -// printf(" Float Bresenham mit Anfangspkt- und Endpktkorrektur Texture\n"); -// printf(" =========================================================================================================\n"); -// printf(" Step | currentVoxel | X | Y | Z || SoSvalue | delta_SoSvalue\n"); -// printf(" ---------------------------------------------------------------------------------------------------------\n"); - } - #endif - - - - #ifndef SOS_Version2 - // Startpunkt (voxel1f) festlegen - pathPoint[0] = voxel1f[0]; - pathPoint[1] = voxel1f[1]; - pathPoint[2] = voxel1f[2]; - #endif - - // Innere schleife fuer den Bresenham ohne Endpunkt - #ifndef SOS_Version2 - int j = 1; - for (j=1; j<=(fastDirectionSteps-1); j++) //(Start und Endpunkt werden ausserhalb der Schleife festgelegt) - #else - #ifndef SOS_Version3 - int j=0; - for (j=fastDirectionSteps; j>0; j--) //(Alle Punkte innerhalb der Schleife berechnen) - #else - int j = 1; - for (j=1; j<=(fastDirectionSteps-1); j++) //(Endpunkt wird ausserhalb der Schleife festgelegt) - #endif - #endif - { - //processRayTracedVoxelTexture(pathPoint, voxelCount, totalSpeed, speedOfSoundField, deviceSpeedOfSoundFieldCuArray, SOSGrid_XYZ); - - #ifndef SaftTextureForBresenhamSosPaths - // AuskommentiertprocessRayTracedVoxelFloat(pathPoint, voxelCount, totalSpeed, deviceSpeedOfSoundField, SOSGrid_XYZ); - //processRayTracedVoxelTexture(pathPoint, voxelCount, totalSpeed, speedOfSoundField, SOSGrid_XYZ); - #else - //processRayTracedVoxelTexture(pathPoint, voxelCount, totalSpeed, deviceSpeedOfSoundFieldCuArray, SOSGrid_XYZ); - - #ifdef SaftUseSosAttFloat1 // Nutze getrennte Texturen fuer beide Volumen (Sos+Att) - processRayTracedVoxelTexture(pathPoint, voxelCount, totalSpeed, deviceSpeedOfSoundFieldCuArray, SOSGrid_XYZ); - #endif - - #ifdef SaftUseSosAttFloat2 // Nutze nur eine Textur fuer beide Volumen (Sos+Att) - processRayTracedVoxelTextureSosAtt(pathPoint, voxelCount, totalSpeed, totalAttenuation, deviceSpeedOfSoundFieldCuArray, SOSGrid_XYZ); - - #ifdef debug_CudaRayTraceKernel - if ((point2.x == OutputSOSPositionX) && (point2.y == OutputSOSPositionY) && (point2.z == OutputSOSPositionZ)) // Nur von VolumeOutput abhaengig - //if ((pathPoint[0] == DebugSosVoxelX) && (pathPoint[1] == DebugSosVoxelY) && (pathPoint[2] == DebugSosVoxelZ) && ((int)voxel1f[0] == ER_PositionX) && ((int)voxel1f[1] == ER_PositionY) && ((int)voxel1f[2] == ER_PositionZ)) // Auch von ER-Position - //if ((pathPoint[0] == DebugSosVoxelX) && (pathPoint[1] == DebugSosVoxelY) && (pathPoint[2] == DebugSosVoxelZ)) // Nur von SOS_XYZ-Position - //if ( ((int)voxel1f[0] == ER_PositionX) && ((int)voxel1f[1] == ER_PositionY) && ((int)voxel1f[2] == ER_PositionZ) ) // Nur von ER-Position - { - printf("pSOSATT currentVoxel [%f %f %f] - j(%i)\n", pathPoint[0], pathPoint[1], pathPoint[2], j); - //printf(" speedOfSoundSample = %2.10f\n", speedOfSoundSample); - - float2 SosAttValue = tex3D( texRefSosAttField, pathPoint[0] + 0.5f, pathPoint[1] + 0.5f, pathPoint[2] + 0.5f); - //totalSpeed += SosAttValue.x; - //totalAttenuation += SosAttValue.y; - - printf(" totalSpeed = %2.10f\n", SosAttValue.x, totalSpeed); - printf(" totalAttenuation + %2.10f = %2.10f\n", SosAttValue.y, totalAttenuation); - printf(" voxelCount = %d\n", voxelCount); - } - #endif - -// SosAttValue = tex3D( texRefSosAttField, currentVoxelFloat[0] + 0.5f, currentVoxelFloat[1] + 0.5f, currentVoxelFloat[2] + 0.5f); -// totalSpeed += SosAttValue.x; -// totalAttenuation += SosAttValue.y; - #endif - - #endif - - #ifdef debug_CudaRayTraceKernel - if ((point2.x == OutputSOSPositionX) && (point2.y == OutputSOSPositionY) && (point2.z == OutputSOSPositionZ)) // Nur von VolumeOutput abhaengig - // if ((point2.x == OutputPositionX) && (point2.y == OutputPositionY) && (point2.z == OutputPositionZ) && (voxel1.x == ER_PositionX) && (voxel1.y == ER_PositionY) && (voxel1.z == ER_PositionZ)) // Auch von ER-Position - { - //printf(" Step %3i| [%3i %3i %3i] | % 11.6f | % 11.6f | % 11.6f || % 13.6f | % 13.6f \n", j, (int)floor(pathPoint[0]), (int)floor(pathPoint[1]), (int)floor(pathPoint[2]), pathPoint[0], pathPoint[1], pathPoint[2], totalSpeed, speedOfSoundField[((int)floor(pathPoint[2]) * SOSGrid_XYZ.y + (int)floor(pathPoint[1])) * SOSGrid_XYZ.x + (int)floor(pathPoint[0])]); - printf(" Step %3i| [%3i %3i %3i] | % 17.12f % 17.12f % 17.12f || % 13.6f | % 13.6f \n", j, (int)floor(pathPoint[0]+0.5f), (int)floor(pathPoint[1]+0.5f), (int)floor(pathPoint[2]+0.5f), pathPoint[0], pathPoint[1], pathPoint[2], voxelCount/totalSpeed, (float)tex3D( texRefSpeedOfSoundField, pathPoint[0] + 0.5f, pathPoint[1] + 0.5f, pathPoint[2] + 0.5f ) ); - } - #endif - - #ifndef SOS_Version2 - pathPoint[0] = pathPoint[0] + m_XYZ[0]; - pathPoint[1] = pathPoint[1] + m_XYZ[1]; - pathPoint[2] = pathPoint[2] + m_XYZ[2]; - #else - pathPoint[greatestDistanceDim] = pathPoint[greatestDistanceDim] + m_XYZ[greatestDistanceDim]; - pathPoint[slowDim1] = pathPoint[slowDim1] + m_XYZ[slowDim1]; - pathPoint[slowDim2] = pathPoint[slowDim2] + m_XYZ[slowDim2]; - #endif - } - - #if (!defined (SOS_Version2)) || defined(SOS_Version3) - // Endpunkt - - #ifndef SaftTextureForBresenhamSosPaths - // AuskommentiertprocessRayTracedVoxelFloat(endPoint, voxelCount, totalSpeed, deviceSpeedOfSoundField, SOSGrid_XYZ); - //processRayTracedVoxelTexture(pathPoint, voxelCount, totalSpeed, speedOfSoundField, SOSGrid_XYZ); - #else - processRayTracedVoxelTexture(endPoint, voxelCount, totalSpeed, deviceSpeedOfSoundFieldCuArray, SOSGrid_XYZ); - // TODO: Wieder einen ganzen Schritt entfernen und nur Distanz des Voxels dazufuegen --> dazu erst voxelCount auf float umstellen - voxelCount--; - pathPoint[greatestDistanceDim] = pathPoint[greatestDistanceDim] - m_XYZ[greatestDistanceDim]; - pathPoint[slowDim1] = pathPoint[slowDim1] - m_XYZ[slowDim1]; - pathPoint[slowDim2] = pathPoint[slowDim2] - m_XYZ[slowDim2]; - voxelCount += sqrtf( SQR(pathPoint[1]-endPoint[1]) + SQR(pathPoint[2]-endPoint[2]) + SQR(pathPoint[3]-endPoint[3]) ); - #endif - - #ifdef debug_CudaRayTraceKernel - if ((point2.x == OutputSOSPositionX) && (point2.y == OutputSOSPositionY) && (point2.z == OutputSOSPositionZ)) // Nur von VolumeOutput abhuengig - // if ((point2.x == OutputPositionX) && (point2.y == OutputPositionY) && (point2.z == OutputPositionZ) && (voxel1.x == ER_PositionX) && (voxel1.y == ER_PositionY) && (voxel1.z == ER_PositionZ)) // Auch von ER-Position - { - //printf(" Step %3i| [%3i %3i %3i] | % 11.6f | % 11.6f | % 11.6f || % 13.6f | % 13.6f \n", j, (int)floor(voxel2f[0]), (int)floor(voxel2f[1]), (int)floor(voxel2f[2]), voxel2f[0], voxel2f[1], voxel2f[2], totalSpeed, speedOfSoundField[((int)floor(voxel2f[2]) * SOSGrid_XYZ.y + (int)floor(voxel2f[1])) * SOSGrid_XYZ.x + (int)floor(voxel2f[0])]); - printf("endPkt Step %3i| [%3i %3i %3i] | % 17.12f % 17.12f % 17.12f || % 13.6f | % 13.6f \n", j, (int)floor(endPoint[0]+0.5f), (int)floor(endPoint[1]+0.5f), (int)floor(endPoint[2]+0.5f), endPoint[0], endPoint[1], endPoint[2], voxelCount/totalSpeed, (float)tex3D( texRefSpeedOfSoundField, endPoint[0] + 0.5f, endPoint[1] + 0.5f, endPoint[2] + 0.5f ) ); - // printf("<- performRayTracedSpeedAdditionTexture\n"); - } - #endif - #else - // keinen Endpunkt extra dazurechnen. - #endif - - // Anzahl der Besuchten Voxel liegt von Anfang an fest, daher nicht noetig einzeln aufzuaddieren! - voxelCount=fastDirectionSteps; + // Anzahl der Besuchten Voxel liegt von Anfang an fest, daher nicht noetig einzeln aufzuaddieren! + voxelCount = fastDirectionSteps; } diff --git a/SAFT_TOFI/src/kernel/saftKernel.cu b/SAFT_TOFI/src/kernel/saftKernel.cu new file mode 100644 index 0000000..e672168 --- /dev/null +++ b/SAFT_TOFI/src/kernel/saftKernel.cu @@ -0,0 +1,561 @@ +#include "saftKernel.cuh" + +texture texRefAscans; // Schritt 1. Textur anlegen + +// Texture for loading AscanIndexes without ATT-Correction (float) +texture texTableAscanIndexFloat1_0; +texture texTableAscanIndexFloat1_1; +texture texTableAscanIndexFloat1_2; +texture texTableAscanIndexFloat1_3; + +// Texture for loading AscanIndexes with ATT-Correction (float2) +texture texTableAscanIndexFloat2_0; +texture texTableAscanIndexFloat2_1; +texture texTableAscanIndexFloat2_2; +texture texTableAscanIndexFloat2_3; + +__global__ void saftKernelAscanIndex_SOS_ATT( // Version SoSATT-Korrektur + + int const ascanIndexBatchOffset, ///< Offset of AScans if more then one AScan-Batch is used for Reconstruction + float const aScanWindowSize, ///< Amount of AScans used here for Reconstruction + int const maxAscanIndexArraysInTexture, ///< Maximum amount in A-Scans in one Texture + int const TableAscanIndexAllocationCount, ///< Used amount of Textures (currently limited to maximum 4 Textures) + + int3 const IMAGE_SIZE_XYZ, ///< XYZ des Outputvolumens + float3 const SosVoxelStartPosition, ///< Offset of SOS-Grids + float const IMAGE_RESOLUTION, ///< Resolution of Output-Volume + float const VoxelIncrement, ///< Step-Width of one Voxel in SOS-Korrdinates + + int const blockIndexOffset, ///< + int const speedOfSoundZLayer, ///< + dim3 const gridDimensions, ///< + dim3 const blockDimensions, ///< + float const debugMode, ///< + float const debugModeParameter, ///< + int const *deviceSAFT_VARIANT, ///< + double *output + +) +{ + ///////////////////////////////////////////////////////////////////////////////////////// + // 1. Determine which Voxel is to be calculated in this Kernel + ///////////////////////////////////////////////////////////////////////////////////////// + + dim3 blockVoxel( // Which Block(xyz) corresponds to this Thread? + (threadIdx.x / blockDimensions.y) % blockDimensions.x, threadIdx.x % blockDimensions.y, threadIdx.x / (blockDimensions.x * blockDimensions.y)); + + // Index of Block for this Thread + unsigned long long int blockIndex = ((blockIndexOffset + blockIdx.z) * gridDim.y + blockIdx.y) * gridDim.x + blockIdx.x; + + dim3 regionOfInterestVoxel( // Which Voxel corresponds to this Thread? Start with 0. + ((blockIndex / gridDimensions.y) % gridDimensions.x) * blockDimensions.x + blockVoxel.x, // Medium speed index + (blockIndex % gridDimensions.y) * blockDimensions.y + blockVoxel.y, // Fastest index + (blockIndex / (gridDimensions.x * gridDimensions.y)) * blockDimensions.z + blockVoxel.z // Slowest index + ); + + // If Voxel is outside the reconstructed Image leave Kernel + if ((regionOfInterestVoxel.x >= IMAGE_SIZE_XYZ.x) || (regionOfInterestVoxel.y >= IMAGE_SIZE_XYZ.y) || (regionOfInterestVoxel.z >= IMAGE_SIZE_XYZ.z)) + return; + ///////////////////////////////////////////////////////////////////////////////////////// + // 2. Determine + ///////////////////////////////////////////////////////////////////////////////////////// + // - SOSvoxel in which voxel is located + // - Index of OutputVolume, and tables of Emitter, Receiver coordinates and SOSpaths + // - Variable declarations + ///////////////////////////////////////////////////////////////////////////////////////// + + // Memory-Index for this Thread for Output-Array of this Voxel + unsigned long long int memoryIndex = + (((unsigned long long int)IMAGE_SIZE_XYZ.y * ((unsigned long long int)regionOfInterestVoxel.z - (unsigned long long int)blockIndexOffset) + (unsigned long long int)regionOfInterestVoxel.y) * + (unsigned long long int)IMAGE_SIZE_XYZ.x + + (unsigned long long int)regionOfInterestVoxel.x); + float3 SosVoxelf; // SoS-Voxel Koordinates in float + // Determine SOS-Voxel-Position + SosVoxelf.x = (SosVoxelStartPosition.x + (VoxelIncrement * regionOfInterestVoxel.x)); // Hier Addition der SOSVoxel im SoS-Grid durchfuehren + SosVoxelf.y = (SosVoxelStartPosition.y + (VoxelIncrement * regionOfInterestVoxel.y)); // Hier Addition der SOSVoxel im SoS-Grid durchfuehren + SosVoxelf.z = (SosVoxelStartPosition.z + (VoxelIncrement * regionOfInterestVoxel.z)); // Hier Addition der SOSVoxel im SoS-Grid durchfuehren + // printf("\n\n SosVoxelStartPosition [%f %f %f]\n",SosVoxelStartPosition.x,SosVoxelStartPosition.y,SosVoxelStartPosition.z); + + // TexturIndex for access on Texturmemory depending of Voxel + float TexturIndexZ_AscanIndex = 0.0f; // Z-Index for access on Texturmemory + float const TexturIndexX = SosVoxelf.x + 0.5f; // Due to Access over Texturmemory +0.5f. + float const TexturIndexY = SosVoxelf.y + 0.5f; + float const SosVoxelTextureZ = (SosVoxelf.z - (float)speedOfSoundZLayer) + 0.5f; // Z offset inside precalculated SOS paths + + float voxelValue = 0.0; // reflection value, which is summed up in Ascan-Loop = Outputvalue + + __syncthreads(); + + if (ascanIndexBatchOffset == 0) // Initialisierung beim ersten Kernelaufruf sprich ascanIndexBatchOffset == 0 + { + voxelValue = 0.0; + } + else + { + voxelValue = (float)output[memoryIndex]; // Alle anderen Kernelaufrufe muessen zuerst den alten Wert laden, ist bei mehreren Durchlaeufen noetig + } + + __syncthreads(); + + if (TableAscanIndexAllocationCount == 1) + { + float2 currentSOSVoxel_AscanIndexAttValues; + + float Offset_0 = (float)ascanIndexBatchOffset + 0.5f; + + // #pragma unroll 2 + for (float ascanIndex_i = 0.0f; ascanIndex_i < maxAscanIndexArraysInTexture; ascanIndex_i += 1.0f) // bis zu 60.8GV/s + { + TexturIndexZ_AscanIndex = 2.0f * ascanIndex_i + SosVoxelTextureZ; // Z-Index fuer Zugriff auf Textur interpoliert + + currentSOSVoxel_AscanIndexAttValues = tex3D(texTableAscanIndexFloat2_0, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); + __syncthreads(); + voxelValue += currentSOSVoxel_AscanIndexAttValues.y * tex2D(texRefAscans, currentSOSVoxel_AscanIndexAttValues.x - 0.5f, Offset_0 + ascanIndex_i); // i gibt Index fuer Ascan an + } + } + else if (TableAscanIndexAllocationCount == 2) + { + float2 currentSOSVoxel_AscanIndexAttValues; + + // #pragma unroll 3 + for (float ascanIndex_i = 0.0f; ascanIndex_i < maxAscanIndexArraysInTexture; ascanIndex_i += 1.0f) // bis zu 60.6GV/s + { + // 1ten Teil mit selben Index laden + TexturIndexZ_AscanIndex = 2 * ascanIndex_i + SosVoxelTextureZ; // Z-Index fuer Zugriff auf Textur interpoliert + currentSOSVoxel_AscanIndexAttValues = tex3D(texTableAscanIndexFloat2_0, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); + + voxelValue += currentSOSVoxel_AscanIndexAttValues.y * + tex2D(texRefAscans, currentSOSVoxel_AscanIndexAttValues.x - 0.5f, (float)(ascanIndexBatchOffset) + ascanIndex_i + 0.5f); // i gibt Index fuer Ascan an + + // 2ten Teil mit selben Index laden + currentSOSVoxel_AscanIndexAttValues = tex3D(texTableAscanIndexFloat2_1, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); + voxelValue += currentSOSVoxel_AscanIndexAttValues.y * + tex2D(texRefAscans, currentSOSVoxel_AscanIndexAttValues.x - 0.5f, (float)(ascanIndexBatchOffset) + ascanIndex_i + maxAscanIndexArraysInTexture + 0.5f); + } + } + else if (TableAscanIndexAllocationCount == 3) + { + + float2 currentSOSVoxel_AscanIndexAttValues_0; + float2 currentSOSVoxel_AscanIndexAttValues_1; + float2 currentSOSVoxel_AscanIndexAttValues_2; + + float Offset_0 = (float)ascanIndexBatchOffset + 0.5f; + float Offset_1 = (float)ascanIndexBatchOffset + maxAscanIndexArraysInTexture + 0.5f; + float Offset_2 = (float)ascanIndexBatchOffset + 2.0f * maxAscanIndexArraysInTexture + 0.5f; + + // #pragma unroll 2 + for (float ascanIndex_i = 0.0f; ascanIndex_i < maxAscanIndexArraysInTexture; ascanIndex_i += 1.0f) // bis zu 60.8GV/s + { + TexturIndexZ_AscanIndex = 2.0f * ascanIndex_i + SosVoxelTextureZ; // Z-Index fuer Zugriff auf Textur interpoliert + // load TOF-Index from Textur 0-3 + currentSOSVoxel_AscanIndexAttValues_0 = tex3D(texTableAscanIndexFloat2_0, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); + currentSOSVoxel_AscanIndexAttValues_1 = tex3D(texTableAscanIndexFloat2_1, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); + currentSOSVoxel_AscanIndexAttValues_2 = tex3D(texTableAscanIndexFloat2_2, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); + __syncthreads(); + voxelValue += currentSOSVoxel_AscanIndexAttValues_0.y * tex2D(texRefAscans, currentSOSVoxel_AscanIndexAttValues_0.x - 0.5f, Offset_0 + ascanIndex_i); // i gibt Index fuer Ascan an + voxelValue += currentSOSVoxel_AscanIndexAttValues_1.y * tex2D(texRefAscans, currentSOSVoxel_AscanIndexAttValues_1.x - 0.5f, Offset_1 + ascanIndex_i); + voxelValue += currentSOSVoxel_AscanIndexAttValues_2.y * tex2D(texRefAscans, currentSOSVoxel_AscanIndexAttValues_2.x - 0.5f, Offset_2 + ascanIndex_i); + } + } + else if (TableAscanIndexAllocationCount == 4) + { + float2 currentSOSVoxel_AscanIndexAttValues_0; + float2 currentSOSVoxel_AscanIndexAttValues_1; + float2 currentSOSVoxel_AscanIndexAttValues_2; + float2 currentSOSVoxel_AscanIndexAttValues_3; + + float Offset_0 = (float)ascanIndexBatchOffset + 0.5f; + float Offset_1 = (float)ascanIndexBatchOffset + maxAscanIndexArraysInTexture + 0.5f; + float Offset_2 = (float)ascanIndexBatchOffset + 2.0f * maxAscanIndexArraysInTexture + 0.5f; + float Offset_3 = (float)ascanIndexBatchOffset + 3.0f * maxAscanIndexArraysInTexture + 0.5f; + + // #pragma unroll 2 + for (float ascanIndex_i = 0.0f; ascanIndex_i < maxAscanIndexArraysInTexture; ascanIndex_i += 1.0f) // bis zu 60.8GV/s + { + TexturIndexZ_AscanIndex = 2.0f * ascanIndex_i + SosVoxelTextureZ; // Z-Index fuer Zugriff auf Textur interpoliert + // syncthreads(); + // load TOF-Index from Textur 0-3 + currentSOSVoxel_AscanIndexAttValues_0 = tex3D(texTableAscanIndexFloat2_0, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); + currentSOSVoxel_AscanIndexAttValues_1 = tex3D(texTableAscanIndexFloat2_1, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); + currentSOSVoxel_AscanIndexAttValues_2 = tex3D(texTableAscanIndexFloat2_2, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); + currentSOSVoxel_AscanIndexAttValues_3 = tex3D(texTableAscanIndexFloat2_3, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); + __syncthreads(); + voxelValue += currentSOSVoxel_AscanIndexAttValues_0.y * tex2D(texRefAscans, currentSOSVoxel_AscanIndexAttValues_0.x - 0.5f, Offset_0 + ascanIndex_i); // i gibt Index fuer Ascan an + voxelValue += currentSOSVoxel_AscanIndexAttValues_1.y * tex2D(texRefAscans, currentSOSVoxel_AscanIndexAttValues_1.x - 0.5f, Offset_1 + ascanIndex_i); + voxelValue += currentSOSVoxel_AscanIndexAttValues_2.y * tex2D(texRefAscans, currentSOSVoxel_AscanIndexAttValues_2.x - 0.5f, Offset_2 + ascanIndex_i); + voxelValue += currentSOSVoxel_AscanIndexAttValues_3.y * tex2D(texRefAscans, currentSOSVoxel_AscanIndexAttValues_3.x - 0.5f, Offset_3 + ascanIndex_i); + } + } + + __syncthreads(); + output[memoryIndex] = (double)voxelValue; +} + +__global__ void saftKernelAscanIndex_SOS( // Version SoS-Korrektur + + int const ascanIndexBatchOffset, ///< Offset of AScans if more then one AScan-Batch is used for Reconstruction + float const aScanWindowSize, ///< Amount of AScans used here for Reconstruction + int const maxAscanIndexArraysInTexture, ///< Maximum amount in A-Scans in one Texture + int const TableAscanIndexAllocationCount, ///< Used amount of Textures (currently limited to maximum 4 Textures) + + int3 const IMAGE_SIZE_XYZ, ///< XYZ des Outputvolumens + float3 const SosVoxelStartPosition, ///< Offset of SOS-Grids + float const IMAGE_RESOLUTION, ///< Resolution of Output-Volume + float const VoxelIncrement, ///< Step-Width of one Voxel in SOS-Korrdinates + + int const blockIndexOffset, int const speedOfSoundZLayer, dim3 const gridDimensions, dim3 const blockDimensions, double *output + +) +{ + ///////////////////////////////////////////////////////////////////////////////////////// + // 1. Determine which Voxel is to be calculated in this Kernel + ///////////////////////////////////////////////////////////////////////////////////////// + + dim3 blockVoxel( // Which Block(xyz) corresponds to this Thread? + (threadIdx.x / blockDimensions.y) % blockDimensions.x, threadIdx.x % blockDimensions.y, threadIdx.x / (blockDimensions.x * blockDimensions.y)); + + // Index of Block for this Thread + long blockIndex = ((blockIndexOffset + blockIdx.z) * gridDim.y + blockIdx.y) * gridDim.x + blockIdx.x; + + dim3 regionOfInterestVoxel( // Which Voxel corresponds to this Thread? Start with 0. + ((blockIndex / gridDimensions.y) % gridDimensions.x) * blockDimensions.x + blockVoxel.x, // Medium speed index + (blockIndex % gridDimensions.y) * blockDimensions.y + blockVoxel.y, // Fastest index + (blockIndex / (gridDimensions.x * gridDimensions.y)) * blockDimensions.z + blockVoxel.z // Slowest index + ); + + // If Voxel is outside the reconstructed Image leave Kernel + if ((regionOfInterestVoxel.x >= IMAGE_SIZE_XYZ.x) || (regionOfInterestVoxel.y >= IMAGE_SIZE_XYZ.y) || (regionOfInterestVoxel.z >= IMAGE_SIZE_XYZ.z)) + return; + ///////////////////////////////////////////////////////////////////////////////////////// + // 2. Determine + ///////////////////////////////////////////////////////////////////////////////////////// + // - SOSvoxel in which voxel is located + // - Index of OutputVolume, and tables of Emitter, Receiver coordinates and SOSpaths + // - Variable declarations + ///////////////////////////////////////////////////////////////////////////////////////// + + // Memory-Index for this Thread for Output-Array of this Voxel + long memoryIndex = ((IMAGE_SIZE_XYZ.y * (regionOfInterestVoxel.z - blockIndexOffset) + regionOfInterestVoxel.y) * IMAGE_SIZE_XYZ.x + regionOfInterestVoxel.x); + float3 SosVoxelf; // SoS-Voxel Koordinates in float + // Determine SOS-Voxel-Position + SosVoxelf.x = (SosVoxelStartPosition.x + (VoxelIncrement * regionOfInterestVoxel.x)); // Hier Addition der SOSVoxel im SoS-Grid durchfuehren + SosVoxelf.y = (SosVoxelStartPosition.y + (VoxelIncrement * regionOfInterestVoxel.y)); // Hier Addition der SOSVoxel im SoS-Grid durchfuehren + SosVoxelf.z = (SosVoxelStartPosition.z + (VoxelIncrement * regionOfInterestVoxel.z)); // Hier Addition der SOSVoxel im SoS-Grid durchfuehren + // printf("\n\n SosVoxelStartPosition [%f %f %f]\n",SosVoxelStartPosition.x,SosVoxelStartPosition.y,SosVoxelStartPosition.z); + + // TexturIndex for access on Texturmemory depending of Voxel + float TexturIndexZ_AscanIndex = 0.0f; // Z-Index for access on Texturmemory + float const TexturIndexX = SosVoxelf.x + 0.5f; // Due to Access over Texturmemory +0.5f. + float const TexturIndexY = SosVoxelf.y + 0.5f; + float const SosVoxelTextureZ = (SosVoxelf.z - (float)speedOfSoundZLayer) + 0.5f; // Z offset inside precalculated SOS paths + float voxelValue = 0.0; // reflection value, which is summed up in Ascan-Loop = Outputvalue + + float currentSOSVoxel_AscanIndexValues; + + ///////////////////////////////////////////////////////////////////////////////////////// + // 3. SAFT-Algorithmus + ///////////////////////////////////////////////////////////////////////////////////////// + // - Index aus Textur lesen und fuer zugriff auf A-Scan nutzen + ///////////////////////////////////////////////////////////////////////////////////////// + + // Vorgehen Ascanindexvariante + // 1. Bestimme Koordinaten in SOS-Koordinaten für festen Emitter und 1413 Receiver + // Index = X+Xmax*Y+Xmax*Ymax*(Zmax*RecNr+Z). + // Xmax = 128 + // Ymax = 128 + // Zmax = 2 + // => X = x + // => Y = y + // => Z = Zmax*RecNr+z + // 2. Lade an dieser Stelle den Interpolierten Index. + // 2.a Über Texturmemory + // 2.b über alle 8 benachbarten Voxel oder 64 bei tricubic-Interpolation in 3D + // 3. Lade Ascan-Sample an diesem Index und summiere auf + + __syncthreads(); + if (ascanIndexBatchOffset == 0) // Initialisierung beim ersten Kernelaufruf sprich ascanIndexBatchOffset == 0 + { + voxelValue = 0.0f; + } + else + { + voxelValue = (float)output[memoryIndex]; // Alle anderen Kernelaufrufe muessen zuerst den alten Wert laden, ist bei mehreren Durchlaeufen noetig + } + + __syncthreads(); + // float VoxelAscanIndex2 = 0.0f; + // float voxelValue2 = 0.0f; + + if (TableAscanIndexAllocationCount == 1) + { + // #pragma unroll 3 + // for(float ascanIndex_i = 0.0f; ascanIndex_i < maxAscanIndexArraysInTexture; ascanIndex_i+=1.0f) // 1.Teil Ascans durchlaufen // GV/s //2: GV/s, 4: GV/s, 8: GV/s, 16: GV/s + for (float ascanIndex_i = 0.0f; ascanIndex_i < maxAscanIndexArraysInTexture; ascanIndex_i += 1.0f) // bis zu 60.6GV/s + // for(float ascanIndex_i = 0.0f; ascanIndex_i < 1; ascanIndex_i+=1.0f) // bis zu 60.6GV/s + { + TexturIndexZ_AscanIndex = 2 * ascanIndex_i + SosVoxelTextureZ; // Z-Index fuer Zugriff auf Textur interpoliert + + // nutze immer nur 1tes Surface + currentSOSVoxel_AscanIndexValues = tex3D(texTableAscanIndexFloat1_0, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); + voxelValue += tex2D(texRefAscans, currentSOSVoxel_AscanIndexValues - 0.5f, ascanIndexBatchOffset + ascanIndex_i + 0.5f); // i gibt Index fuer Ascan a + } + } + else if (TableAscanIndexAllocationCount == 2) + { + // #pragma unroll 3 + // for(float ascanIndex_i = 0.0f; ascanIndex_i < maxAscanIndexArraysInTexture; ascanIndex_i+=1.0f) // 1.Teil Ascans durchlaufen // GV/s //2: GV/s, 4: GV/s, 8: GV/s, 16: GV/s + for (float ascanIndex_i = 0.0f; ascanIndex_i < maxAscanIndexArraysInTexture; ascanIndex_i += 1.0f) // bis zu 60.6GV/s + { + TexturIndexZ_AscanIndex = 2.0 * ascanIndex_i + SosVoxelTextureZ; // Z-Index fuer Zugriff auf Textur interpoliert + + // 1ten Teil mit selben Index laden + currentSOSVoxel_AscanIndexValues = tex3D(texTableAscanIndexFloat1_0, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); + voxelValue += tex2D(texRefAscans, currentSOSVoxel_AscanIndexValues - 0.5f, ascanIndexBatchOffset + ascanIndex_i + 0.5f); // i gibt Index fuer Ascan an + + // 2ten Teil mit selben Index laden + currentSOSVoxel_AscanIndexValues = tex3D(texTableAscanIndexFloat1_1, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); + voxelValue += tex2D(texRefAscans, currentSOSVoxel_AscanIndexValues - 0.5f, ascanIndexBatchOffset + ascanIndex_i + maxAscanIndexArraysInTexture + 0.5f); + } + } + else if (TableAscanIndexAllocationCount == 3) + { + // #pragma unroll 3 + // for(float ascanIndex_i = 0.0f; ascanIndex_i < maxAscanIndexArraysInTexture; ascanIndex_i+=1.0f) // 1.Teil Ascans durchlaufen // GV/s //2: GV/s, 4: GV/s, 8: GV/s, 16: GV/s + for (float ascanIndex_i = 0.0f; ascanIndex_i < maxAscanIndexArraysInTexture; ascanIndex_i += 1.0f) // bis zu 60.6GV/s + { + TexturIndexZ_AscanIndex = 2.0 * ascanIndex_i + SosVoxelTextureZ; // Z-Index fuer Zugriff auf Textur interpoliert + + // 1ten Teil mit selben Index laden + currentSOSVoxel_AscanIndexValues = tex3D(texTableAscanIndexFloat1_0, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); + voxelValue += tex2D(texRefAscans, currentSOSVoxel_AscanIndexValues - 0.5f, ascanIndexBatchOffset + ascanIndex_i + 0.5f); // i gibt Index fuer Ascan an + + // 2ten Teil mit selben Index laden + currentSOSVoxel_AscanIndexValues = tex3D(texTableAscanIndexFloat1_1, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); + voxelValue += tex2D(texRefAscans, currentSOSVoxel_AscanIndexValues - 0.5f, ascanIndexBatchOffset + ascanIndex_i + maxAscanIndexArraysInTexture + 0.5f); + + // 3ten Teil mit selben Index laden + currentSOSVoxel_AscanIndexValues = tex3D(texTableAscanIndexFloat1_2, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); + voxelValue += tex2D(texRefAscans, currentSOSVoxel_AscanIndexValues - 0.5f, ascanIndexBatchOffset + ascanIndex_i + 2 * maxAscanIndexArraysInTexture + 0.5f); + } + } + + else if (TableAscanIndexAllocationCount == 4) + { + float currentSOSVoxel_AscanIndexValues_0; + float currentSOSVoxel_AscanIndexValues_1; + float currentSOSVoxel_AscanIndexValues_2; + float currentSOSVoxel_AscanIndexValues_3; + + float Offset_0 = (float)ascanIndexBatchOffset + 0.5f; + float Offset_1 = (float)ascanIndexBatchOffset + maxAscanIndexArraysInTexture + 0.5f; + float Offset_2 = (float)ascanIndexBatchOffset + 2.0f * maxAscanIndexArraysInTexture + 0.5f; + float Offset_3 = (float)ascanIndexBatchOffset + 3.0f * maxAscanIndexArraysInTexture + 0.5f; + + // #pragma unroll 4 + for (float ascanIndex_i = 0.0f; ascanIndex_i < maxAscanIndexArraysInTexture; ascanIndex_i += 1.0f) // bis zu 60.8GV/s + { + TexturIndexZ_AscanIndex = 2.0f * ascanIndex_i + SosVoxelTextureZ; // Z-Index fuer Zugriff auf Textur interpoliert + // syncthreads(); + // load TOF-Index from Textur 0-3 + currentSOSVoxel_AscanIndexValues_0 = tex3D(texTableAscanIndexFloat1_0, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); + currentSOSVoxel_AscanIndexValues_1 = tex3D(texTableAscanIndexFloat1_1, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); + currentSOSVoxel_AscanIndexValues_2 = tex3D(texTableAscanIndexFloat1_2, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); + currentSOSVoxel_AscanIndexValues_3 = tex3D(texTableAscanIndexFloat1_3, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); + + voxelValue += tex2D(texRefAscans, currentSOSVoxel_AscanIndexValues_0 - 0.5f, Offset_0 + ascanIndex_i); // i gibt Index fuer Ascan an + voxelValue += tex2D(texRefAscans, currentSOSVoxel_AscanIndexValues_1 - 0.5f, Offset_1 + ascanIndex_i); + voxelValue += tex2D(texRefAscans, currentSOSVoxel_AscanIndexValues_2 - 0.5f, Offset_2 + ascanIndex_i); + voxelValue += tex2D(texRefAscans, currentSOSVoxel_AscanIndexValues_3 - 0.5f, Offset_3 + ascanIndex_i); + + // if (((int)ascanIndex_i & 31) == 0) __syncthreads(); + } + } + else + { + // Do nothing due to only 4 are defined + } + __syncthreads(); + output[memoryIndex] = (double)voxelValue; +} + +__global__ void saftKernelAscanIndex( // Version ohne-Korrektur + + int const ascanIndexBatchOffset, ///< Offset of AScans if more then one AScan-Batch is used for Reconstruction + float const aScanWindowSize, ///< Amount of AScans used here for Reconstruction + int const maxAscanIndexArraysInTexture, ///< Maximum amount in A-Scans in one Texture + int const TableAscanIndexAllocationCount, ///< Used amount of Textures (currently limited to maximum 4 Textures) + + int3 const IMAGE_SIZE_XYZ, ///< XYZ des Outputvolumens + float3 const SosVoxelStartPosition, ///< Offset of SOS-Grids + float const IMAGE_RESOLUTION, ///< Resolution of Output-Volume + float const VoxelIncrement, ///< Step-Width of one Voxel in SOS-Korrdinates + + int const blockIndexOffset, int const speedOfSoundZLayer, dim3 const gridDimensions, dim3 const blockDimensions, double *output + +) +{ + ///////////////////////////////////////////////////////////////////////////////////////// + // 1. Determine which Voxel is to be calculated in this Kernel + ///////////////////////////////////////////////////////////////////////////////////////// + + dim3 blockVoxel( // Which Block(xyz) corresponds to this Thread? + (threadIdx.x / blockDimensions.y) % blockDimensions.x, threadIdx.x % blockDimensions.y, threadIdx.x / (blockDimensions.x * blockDimensions.y)); + + // Index of Block for this Thread + long blockIndex = ((blockIndexOffset + blockIdx.z) * gridDim.y + blockIdx.y) * gridDim.x + blockIdx.x; + + dim3 regionOfInterestVoxel( // Which Voxel corresponds to this Thread? Start with 0. + ((blockIndex / gridDimensions.y) % gridDimensions.x) * blockDimensions.x + blockVoxel.x, // Medium speed index + (blockIndex % gridDimensions.y) * blockDimensions.y + blockVoxel.y, // Fastest index + (blockIndex / (gridDimensions.x * gridDimensions.y)) * blockDimensions.z + blockVoxel.z // Slowest index + ); + + // If Voxel is outside the reconstructed Image leave Kernel + if ((regionOfInterestVoxel.x >= IMAGE_SIZE_XYZ.x) || (regionOfInterestVoxel.y >= IMAGE_SIZE_XYZ.y) || (regionOfInterestVoxel.z >= IMAGE_SIZE_XYZ.z)) + return; + + ///////////////////////////////////////////////////////////////////////////////////////// + // 2. Determine + ///////////////////////////////////////////////////////////////////////////////////////// + // - SOSvoxel in which voxel is located + // - Index of OutputVolume, and tables of Emitter, Receiver coordinates and SOSpaths + // - Variable declarations + ///////////////////////////////////////////////////////////////////////////////////////// + + // Memory-Index for this Thread for Output-Array of this Voxel + long memoryIndex = ((IMAGE_SIZE_XYZ.y * (regionOfInterestVoxel.z - blockIndexOffset) + regionOfInterestVoxel.y) * IMAGE_SIZE_XYZ.x + regionOfInterestVoxel.x); + float3 SosVoxelf; // SoS-Voxel Koordinates in float + // Determine SOS-Voxel-Position + SosVoxelf.x = (SosVoxelStartPosition.x + (VoxelIncrement * regionOfInterestVoxel.x)); // Hier Addition der SOSVoxel im SoS-Grid durchfuehren + SosVoxelf.y = (SosVoxelStartPosition.y + (VoxelIncrement * regionOfInterestVoxel.y)); // Hier Addition der SOSVoxel im SoS-Grid durchfuehren + SosVoxelf.z = (SosVoxelStartPosition.z + (VoxelIncrement * regionOfInterestVoxel.z)); // Hier Addition der SOSVoxel im SoS-Grid durchfuehren + // printf("\n\n SosVoxelStartPosition [%f %f %f]\n",SosVoxelStartPosition.x,SosVoxelStartPosition.y,SosVoxelStartPosition.z); + + // TexturIndex for access on Texturmemory depending of Voxel + float TexturIndexZ_AscanIndex = 0.0f; // Z-Index for access on Texturmemory + float const TexturIndexX = SosVoxelf.x + 0.5f; // Due to Access over Texturmemory +0.5f. + float const TexturIndexY = SosVoxelf.y + 0.5f; + float const SosVoxelTextureZ = (SosVoxelf.z - (float)speedOfSoundZLayer) + 0.5f; // Z offset inside precalculated SOS paths + float voxelValue = 0.0; // reflection value, which is summed up in Ascan-Loop = Outputvalue + + float currentSOSVoxel_AscanIndexValues; + + ///////////////////////////////////////////////////////////////////////////////////////// + // 3. SAFT-Algorithmus + ///////////////////////////////////////////////////////////////////////////////////////// + // - Index aus Textur lesen und fuer zugriff auf A-Scan nutzen + ///////////////////////////////////////////////////////////////////////////////////////// + + // Vorgehen Ascanindexvariante + // 1. Bestimme Koordinaten in SOS-Koordinaten für festen Emitter und 1413 Receiver + // Index = X+Xmax*Y+Xmax*Ymax*(Zmax*RecNr+Z). + // Xmax = 128 + // Ymax = 128 + // Zmax = 2 + // => X = x + // => Y = y + // => Z = Zmax*RecNr+z + // 2. Lade an dieser Stelle den Interpolierten Index. + // 2.a Über Texturmemory + // 2.b über alle 8 benachbarten Voxel oder 64 bei tricubic-Interpolation in 3D + // 3. Lade Ascan-Sample an diesem Index und summiere auf + + __syncthreads(); + if (ascanIndexBatchOffset == 0) // Initialisierung beim ersten Kernelaufruf sprich ascanIndexBatchOffset == 0 + { + voxelValue = 0.0f; + } + else + { + voxelValue = (float)output[memoryIndex]; // Alle anderen Kernelaufrufe muessen zuerst den alten Wert laden, ist bei mehreren Durchlaeufen noetig + } + + __syncthreads(); + // float VoxelAscanIndex2 = 0.0f; + // float voxelValue2 = 0.0f; + + if (TableAscanIndexAllocationCount == 1) + { + // #pragma unroll 3 + // for(float ascanIndex_i = 0.0f; ascanIndex_i < maxAscanIndexArraysInTexture; ascanIndex_i+=1.0f) // 1.Teil Ascans durchlaufen // GV/s //2: GV/s, 4: GV/s, 8: GV/s, 16: GV/s + for (float ascanIndex_i = 0.0f; ascanIndex_i < maxAscanIndexArraysInTexture; ascanIndex_i += 1.0f) // bis zu 60.6GV/s + { + TexturIndexZ_AscanIndex = 2 * ascanIndex_i + SosVoxelTextureZ; // Z-Index fuer Zugriff auf Textur interpoliert + + // nutze immer nur 1tes Surface + currentSOSVoxel_AscanIndexValues = tex3D(texTableAscanIndexFloat1_0, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); + voxelValue += tex2D(texRefAscans, currentSOSVoxel_AscanIndexValues - 0.5f, ascanIndexBatchOffset + ascanIndex_i + 0.5f); // i gibt Index fuer Ascan an + } + } + else if (TableAscanIndexAllocationCount == 2) + { + // #pragma unroll 3 + // for(float ascanIndex_i = 0.0f; ascanIndex_i < maxAscanIndexArraysInTexture; ascanIndex_i+=1.0f) // 1.Teil Ascans durchlaufen // GV/s //2: GV/s, 4: GV/s, 8: GV/s, 16: GV/s + for (float ascanIndex_i = 0.0f; ascanIndex_i < maxAscanIndexArraysInTexture; ascanIndex_i += 1.0f) // bis zu 60.6GV/s + { + TexturIndexZ_AscanIndex = 2.0 * ascanIndex_i + SosVoxelTextureZ; // Z-Index fuer Zugriff auf Textur interpoliert + + // 1ten Teil mit selben Index laden + currentSOSVoxel_AscanIndexValues = tex3D(texTableAscanIndexFloat1_0, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); + voxelValue += tex2D(texRefAscans, currentSOSVoxel_AscanIndexValues - 0.5f, ascanIndexBatchOffset + ascanIndex_i + 0.5f); // i gibt Index fuer Ascan an + + // 2ten Teil mit selben Index laden + currentSOSVoxel_AscanIndexValues = tex3D(texTableAscanIndexFloat1_1, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); + voxelValue += tex2D(texRefAscans, currentSOSVoxel_AscanIndexValues - 0.5f, ascanIndexBatchOffset + ascanIndex_i + maxAscanIndexArraysInTexture + 0.5f); + } + } + else if (TableAscanIndexAllocationCount == 3) + { + // #pragma unroll 3 + // for(float ascanIndex_i = 0.0f; ascanIndex_i < maxAscanIndexArraysInTexture; ascanIndex_i+=1.0f) // 1.Teil Ascans durchlaufen // GV/s //2: GV/s, 4: GV/s, 8: GV/s, 16: GV/s + for (float ascanIndex_i = 0.0f; ascanIndex_i < maxAscanIndexArraysInTexture; ascanIndex_i += 1.0f) // bis zu 60.6GV/s + { + TexturIndexZ_AscanIndex = 2.0 * ascanIndex_i + SosVoxelTextureZ; // Z-Index fuer Zugriff auf Textur interpoliert + + // 1ten Teil mit selben Index laden + currentSOSVoxel_AscanIndexValues = tex3D(texTableAscanIndexFloat1_0, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); + voxelValue += tex2D(texRefAscans, currentSOSVoxel_AscanIndexValues - 0.5f, ascanIndexBatchOffset + ascanIndex_i + 0.5f); // i gibt Index fuer Ascan an + + // 2ten Teil mit selben Index laden + currentSOSVoxel_AscanIndexValues = tex3D(texTableAscanIndexFloat1_1, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); + voxelValue += tex2D(texRefAscans, currentSOSVoxel_AscanIndexValues - 0.5f, ascanIndexBatchOffset + ascanIndex_i + maxAscanIndexArraysInTexture + 0.5f); + + // 3ten Teil mit selben Index laden + currentSOSVoxel_AscanIndexValues = tex3D(texTableAscanIndexFloat1_2, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); + voxelValue += tex2D(texRefAscans, currentSOSVoxel_AscanIndexValues - 0.5f, ascanIndexBatchOffset + ascanIndex_i + 2 * maxAscanIndexArraysInTexture + 0.5f); + } + } + + else if (TableAscanIndexAllocationCount == 4) + { + float currentSOSVoxel_AscanIndexValues_0; + float currentSOSVoxel_AscanIndexValues_1; + float currentSOSVoxel_AscanIndexValues_2; + float currentSOSVoxel_AscanIndexValues_3; + + float Offset_0 = (float)ascanIndexBatchOffset + 0.5f; + float Offset_1 = (float)ascanIndexBatchOffset + maxAscanIndexArraysInTexture + 0.5f; + float Offset_2 = (float)ascanIndexBatchOffset + 2.0f * maxAscanIndexArraysInTexture + 0.5f; + float Offset_3 = (float)ascanIndexBatchOffset + 3.0f * maxAscanIndexArraysInTexture + 0.5f; + + // #pragma unroll 2 + for (float ascanIndex_i = 0.0f; ascanIndex_i < maxAscanIndexArraysInTexture; ascanIndex_i += 1.0f) // bis zu 60.8GV/s + { + TexturIndexZ_AscanIndex = 2.0f * ascanIndex_i + SosVoxelTextureZ; // Z-Index fuer Zugriff auf Textur interpoliert + //__syncthreads(); + // load TOF-Index from Textur 0-3 + currentSOSVoxel_AscanIndexValues_0 = tex3D(texTableAscanIndexFloat1_0, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); + currentSOSVoxel_AscanIndexValues_1 = tex3D(texTableAscanIndexFloat1_1, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); + currentSOSVoxel_AscanIndexValues_2 = tex3D(texTableAscanIndexFloat1_2, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); + currentSOSVoxel_AscanIndexValues_3 = tex3D(texTableAscanIndexFloat1_3, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); + + voxelValue += tex2D(texRefAscans, currentSOSVoxel_AscanIndexValues_0 - 0.5f, Offset_0 + ascanIndex_i); // i gibt Index fuer Ascan an + voxelValue += tex2D(texRefAscans, currentSOSVoxel_AscanIndexValues_1 - 0.5f, Offset_1 + ascanIndex_i); + voxelValue += tex2D(texRefAscans, currentSOSVoxel_AscanIndexValues_2 - 0.5f, Offset_2 + ascanIndex_i); + voxelValue += tex2D(texRefAscans, currentSOSVoxel_AscanIndexValues_3 - 0.5f, Offset_3 + ascanIndex_i); + } + } + __syncthreads(); + output[memoryIndex] = (double)voxelValue; +} \ No newline at end of file diff --git a/SAFT_TOFI/src/kernel/saftKernel.cuh b/SAFT_TOFI/src/kernel/saftKernel.cuh index 3c7a060..63f8ae4 100644 --- a/SAFT_TOFI/src/kernel/saftKernel.cuh +++ b/SAFT_TOFI/src/kernel/saftKernel.cuh @@ -1,2048 +1,91 @@ #include + #include "saft.hpp" // printf() is only supported // for devices of compute capability 2.0 and above #if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ < 200) - #define printf(f, ...) ((void)(f, __VA_ARGS__),0) +#define printf(f, ...) ((void)(f, __VA_ARGS__), 0) #endif +#define OutputVolumeX 128 // Koordinaten im Outputvolume +#define OutputVolumeY 128 +#define OutputVolumeZ 4 +#define DebugSoSVoxelX 64 // Koordinaten in SoSVoxel +#define DebugSoSVoxelY 64 +#define DebugSoSVoxelZ 0 -#define OutputVolumeX 128 // Koordinaten im Outputvolume -#define OutputVolumeY 128 -#define OutputVolumeZ 4 +#define CorrectionBorder 1e15 // Border Value to restart Calculation -#define DebugSoSVoxelX 64 // Koordinaten in SoSVoxel -#define DebugSoSVoxelY 64 -#define DebugSoSVoxelZ 0 +#define SQR(X) ((X) * (X)) -#define CorrectionBorder 1e15 // Border Value to restart Calculation +extern texture texRefAscans; // Schritt 1. Textur anlegen +// Texture for loading AscanIndexes without ATT-Correction (float) +extern texture texTableAscanIndexFloat1_0; +extern texture texTableAscanIndexFloat1_1; +extern texture texTableAscanIndexFloat1_2; +extern texture texTableAscanIndexFloat1_3; -#define SQR(X) ((X)*(X)) +// Texture for loading AscanIndexes with ATT-Correction (float2) +extern texture texTableAscanIndexFloat2_0; +extern texture texTableAscanIndexFloat2_1; +extern texture texTableAscanIndexFloat2_2; +extern texture texTableAscanIndexFloat2_3; +__global__ void saftKernelAscanIndex_SOS_ATT( // Version SoSATT-Korrektur -texture< float, cudaTextureType2D, cudaReadModeElementType> texRefAscans; // Schritt 1. Textur anlegen + int const ascanIndexBatchOffset, ///< Offset of AScans if more then one AScan-Batch is used for Reconstruction + float const aScanWindowSize, ///< Amount of AScans used here for Reconstruction + int const maxAscanIndexArraysInTexture, ///< Maximum amount in A-Scans in one Texture + int const TableAscanIndexAllocationCount, ///< Used amount of Textures (currently limited to maximum 4 Textures) + int3 const IMAGE_SIZE_XYZ, ///< XYZ des Outputvolumens + float3 const SosVoxelStartPosition, ///< Offset of SOS-Grids + float const IMAGE_RESOLUTION, ///< Resolution of Output-Volume + float const VoxelIncrement, ///< Step-Width of one Voxel in SOS-Korrdinates -//Texture for loading AscanIndexes without ATT-Correction (float) -texture texTableAscanIndexFloat1_0; -texture texTableAscanIndexFloat1_1; -texture texTableAscanIndexFloat1_2; -texture texTableAscanIndexFloat1_3; + int const blockIndexOffset, ///< + int const speedOfSoundZLayer, ///< + dim3 const gridDimensions, ///< + dim3 const blockDimensions, ///< + float const debugMode, ///< + float const debugModeParameter, ///< + int const *deviceSAFT_VARIANT, ///< + double *output -//Texture for loading AscanIndexes with ATT-Correction (float2) -texture texTableAscanIndexFloat2_0; -texture texTableAscanIndexFloat2_1; -texture texTableAscanIndexFloat2_2; -texture texTableAscanIndexFloat2_3; +); +__global__ void saftKernelAscanIndex_SOS( // Version SoS-Korrektur + int const ascanIndexBatchOffset, ///< Offset of AScans if more then one AScan-Batch is used for Reconstruction + float const aScanWindowSize, ///< Amount of AScans used here for Reconstruction + int const maxAscanIndexArraysInTexture, ///< Maximum amount in A-Scans in one Texture + int const TableAscanIndexAllocationCount, ///< Used amount of Textures (currently limited to maximum 4 Textures) + int3 const IMAGE_SIZE_XYZ, ///< XYZ des Outputvolumens + float3 const SosVoxelStartPosition, ///< Offset of SOS-Grids + float const IMAGE_RESOLUTION, ///< Resolution of Output-Volume + float const VoxelIncrement, ///< Step-Width of one Voxel in SOS-Korrdinates + int const blockIndexOffset, int const speedOfSoundZLayer, dim3 const gridDimensions, dim3 const blockDimensions, double *output +); +__global__ void saftKernelAscanIndex( // Version ohne-Korrektur + int const ascanIndexBatchOffset, ///< Offset of AScans if more then one AScan-Batch is used for Reconstruction + float const aScanWindowSize, ///< Amount of AScans used here for Reconstruction + int const maxAscanIndexArraysInTexture, ///< Maximum amount in A-Scans in one Texture + int const TableAscanIndexAllocationCount, ///< Used amount of Textures (currently limited to maximum 4 Textures) - // ######################################################################################################################### 3DVolume Mode with SOS- and ATT-Correction - /** - Synthetic Aperture Focusing Technique AscanIndex kernel - with SOS- and ATT-Correction - */ + int3 const IMAGE_SIZE_XYZ, ///< XYZ des Outputvolumens + float3 const SosVoxelStartPosition, ///< Offset of SOS-Grids + float const IMAGE_RESOLUTION, ///< Resolution of Output-Volume + float const VoxelIncrement, ///< Step-Width of one Voxel in SOS-Korrdinates - // Source: const __restrict__ - // See http://www.acceleware.com/blog/constant-cache-vs-read-only-cache - // See http://acceleware.com/blog/constant-cache-vs-read-only-cache-part-2 + int const blockIndexOffset, int const speedOfSoundZLayer, dim3 const gridDimensions, dim3 const blockDimensions, double *output - __global__ void saftKernelAscanIndex_SOS_ATT( // Version SoSATT-Korrektur - - int const ascanIndexBatchOffset, ///< Offset of AScans if more then one AScan-Batch is used for Reconstruction - float const aScanWindowSize, ///< Amount of AScans used here for Reconstruction - int const maxAscanIndexArraysInTexture, ///< Maximum amount in A-Scans in one Texture - int const TableAscanIndexAllocationCount, ///< Used amount of Textures (currently limited to maximum 4 Textures) - - int3 const IMAGE_SIZE_XYZ, ///< XYZ des Outputvolumens - float3 const SosVoxelStartPosition, ///< Offset of SOS-Grids - float const IMAGE_RESOLUTION, ///< Resolution of Output-Volume - float const VoxelIncrement, ///< Step-Width of one Voxel in SOS-Korrdinates - - int const blockIndexOffset, ///< - int const speedOfSoundZLayer, ///< - dim3 const gridDimensions, ///< - dim3 const blockDimensions, ///< - - //#ifdef debug_CudaSAFTAscanIndexKernel - float const debugMode, ///< - float const debugModeParameter, ///< - int const * deviceSAFT_VARIANT, ///< - //#endif - - double * output - - ) - - { - - ///////////////////////////////////////////////////////////////////////////////////////// - // 1. Determine which Voxel is to be calculated in this Kernel - ///////////////////////////////////////////////////////////////////////////////////////// - - dim3 blockVoxel( // Which Block(xyz) corresponds to this Thread? - (threadIdx.x / blockDimensions.y) % blockDimensions.x, - threadIdx.x % blockDimensions.y, - threadIdx.x / (blockDimensions.x * blockDimensions.y) - ); - - // Index of Block for this Thread - unsigned long long int blockIndex = ((blockIndexOffset + blockIdx.z) * gridDim.y + blockIdx.y) * gridDim.x + blockIdx.x; - - dim3 regionOfInterestVoxel( // Which Voxel corresponds to this Thread? Start with 0. - ((blockIndex / gridDimensions.y) % gridDimensions.x) * blockDimensions.x + blockVoxel.x, //Medium speed index - (blockIndex % gridDimensions.y) * blockDimensions.y + blockVoxel.y, //Fastest index - (blockIndex / (gridDimensions.x * gridDimensions.y)) * blockDimensions.z + blockVoxel.z //Slowest index - ); - - // If Voxel is outside the reconstructed Image leave Kernel - if ((regionOfInterestVoxel.x >= IMAGE_SIZE_XYZ.x) || (regionOfInterestVoxel.y >= IMAGE_SIZE_XYZ.y) || (regionOfInterestVoxel.z >= IMAGE_SIZE_XYZ.z)) - return; - ///////////////////////////////////////////////////////////////////////////////////////// - // 2. Determine - ///////////////////////////////////////////////////////////////////////////////////////// - // - SOSvoxel in which voxel is located - // - Index of OutputVolume, and tables of Emitter, Receiver coordinates and SOSpaths - // - Variable declarations - ///////////////////////////////////////////////////////////////////////////////////////// - - - // Memory-Index for this Thread for Output-Array of this Voxel - unsigned long long int memoryIndex = (((unsigned long long int)IMAGE_SIZE_XYZ.y * ((unsigned long long int)regionOfInterestVoxel.z - (unsigned long long int)blockIndexOffset) + (unsigned long long int)regionOfInterestVoxel.y) * (unsigned long long int)IMAGE_SIZE_XYZ.x + (unsigned long long int)regionOfInterestVoxel.x); - float3 SosVoxelf; //SoS-Voxel Koordinates in float - // Determine SOS-Voxel-Position - SosVoxelf.x = (SosVoxelStartPosition.x + (VoxelIncrement * regionOfInterestVoxel.x)); //Hier Addition der SOSVoxel im SoS-Grid durchfuehren - SosVoxelf.y = (SosVoxelStartPosition.y + (VoxelIncrement * regionOfInterestVoxel.y)); //Hier Addition der SOSVoxel im SoS-Grid durchfuehren - SosVoxelf.z = (SosVoxelStartPosition.z + (VoxelIncrement * regionOfInterestVoxel.z)); //Hier Addition der SOSVoxel im SoS-Grid durchfuehren - //printf("\n\n SosVoxelStartPosition [%f %f %f]\n",SosVoxelStartPosition.x,SosVoxelStartPosition.y,SosVoxelStartPosition.z); - - //TexturIndex for access on Texturmemory depending of Voxel - float TexturIndexZ_AscanIndex = 0.0f; //Z-Index for access on Texturmemory - float const TexturIndexX = SosVoxelf.x + 0.5f; // Due to Access over Texturmemory +0.5f. - float const TexturIndexY = SosVoxelf.y + 0.5f; - float const SosVoxelTextureZ = (SosVoxelf.z - (float)speedOfSoundZLayer) + 0.5f; // Z offset inside precalculated SOS paths - - - float voxelValue = 0.0; // reflection value, which is summed up in Ascan-Loop = Outputvalue - - ///////////////////////////////////////////////////////////////////////////////////////// - // 3. SAFT-Algorithmus - ///////////////////////////////////////////////////////////////////////////////////////// - // - Index aus Textur lesen und fuer zugriff auf A-scan nutzen - ///////////////////////////////////////////////////////////////////////////////////////// - - - - // Vorgehen Ascanindexvariante - // 1. Bestimme Koordinaten in SOS-Koordinaten für festen Emitter und 1413 Receiver - // Index = X+Xmax*Y+Xmax*Ymax*(Zmax*RecNr+Z). - // Xmax = 128 - // Ymax = 128 - // Zmax = 2 - // => X = x - // => Y = y - // => Z = Zmax*RecNr+z - // 2. Lade an dieser Stelle den Interpolierten Index. - // 2.a Über Texturmemory - // 2.b über alle 8 benachbarten Voxel oder 64 bei tricubic-Interpolation in 3D - // 3. Lade Ascan-Sample an diesem Index und summiere auf - - - __syncthreads(); - - if (ascanIndexBatchOffset == 0) // Initialisierung beim ersten Kernelaufruf sprich ascanIndexBatchOffset == 0 - { - voxelValue = 0.0; - } - else - { - voxelValue = (float)output[memoryIndex]; // Alle anderen Kernelaufrufe muessen zuerst den alten Wert laden, ist bei mehreren Durchlaeufen noetig - } - - __syncthreads(); - -#define useSameLoop // Zugriff auf Texturen optimieren. Durch Abfragen von beiden Texturen in der Schleife Overhead verringern // TITAN (Matlab/Kernel) 53.8 / 61.3 - #ifdef useSameLoop - #define tryOptimize_SOSATT - //#define addTexturIndexZ_AscanIndexInLoop - #define useSync_SOS_ATT - - if ( TableAscanIndexAllocationCount == 1){ - #ifdef tryOptimize_SOSATT // in the Case of 2 Textures normal case has faster Access on Data - float2 currentSOSVoxel_AscanIndexAttValues; - - float Offset_0 = (float)ascanIndexBatchOffset + 0.5f; - - //#pragma unroll 2 - for(float ascanIndex_i = 0.0f; ascanIndex_i < maxAscanIndexArraysInTexture; ascanIndex_i+=1.0f) // bis zu 60.8GV/s - { - - TexturIndexZ_AscanIndex = 2.0f*ascanIndex_i + SosVoxelTextureZ; // Z-Index fuer Zugriff auf Textur interpoliert - - currentSOSVoxel_AscanIndexAttValues = tex3D( texTableAscanIndexFloat2_0, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); - - - #ifdef useSync_SOS_ATT - __syncthreads(); - #endif - - voxelValue += currentSOSVoxel_AscanIndexAttValues.y * tex2D( texRefAscans, currentSOSVoxel_AscanIndexAttValues.x - 0.5f, Offset_0 + ascanIndex_i); // i gibt Index fuer Ascan an - } - #endif - - } - else if (TableAscanIndexAllocationCount == 2){ - #ifdef tryOptimize_SOSATT // in the Case of 2 Textures normal case has faster Access on Data - float2 currentSOSVoxel_AscanIndexAttValues; - - //#pragma unroll 3 - for(float ascanIndex_i = 0.0f; ascanIndex_i < maxAscanIndexArraysInTexture; ascanIndex_i+=1.0f) // bis zu 60.6GV/s - { - - // 1ten Teil mit selben Index laden - TexturIndexZ_AscanIndex = 2*ascanIndex_i + SosVoxelTextureZ; // Z-Index fuer Zugriff auf Textur interpoliert - currentSOSVoxel_AscanIndexAttValues = tex3D( texTableAscanIndexFloat2_0, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); - - voxelValue += currentSOSVoxel_AscanIndexAttValues.y * tex2D( texRefAscans, currentSOSVoxel_AscanIndexAttValues.x - 0.5f, (float)(ascanIndexBatchOffset) + ascanIndex_i + 0.5f); // i gibt Index fuer Ascan an - - // 2ten Teil mit selben Index laden - currentSOSVoxel_AscanIndexAttValues = tex3D( texTableAscanIndexFloat2_1, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); - voxelValue += currentSOSVoxel_AscanIndexAttValues.y * tex2D( texRefAscans, currentSOSVoxel_AscanIndexAttValues.x - 0.5f, (float)(ascanIndexBatchOffset) + ascanIndex_i + maxAscanIndexArraysInTexture +0.5f); - } - #endif - } - else if (TableAscanIndexAllocationCount == 3){ - #ifdef tryOptimize_SOSATT - - float2 currentSOSVoxel_AscanIndexAttValues_0; - float2 currentSOSVoxel_AscanIndexAttValues_1; - float2 currentSOSVoxel_AscanIndexAttValues_2; - - - float Offset_0 = (float)ascanIndexBatchOffset + 0.5f; - float Offset_1 = (float)ascanIndexBatchOffset + maxAscanIndexArraysInTexture + 0.5f; - float Offset_2 = (float)ascanIndexBatchOffset + 2.0f*maxAscanIndexArraysInTexture + 0.5f; - - //#pragma unroll 2 - for(float ascanIndex_i = 0.0f; ascanIndex_i < maxAscanIndexArraysInTexture; ascanIndex_i+=1.0f) // bis zu 60.8GV/s - { - - TexturIndexZ_AscanIndex = 2.0f*ascanIndex_i + SosVoxelTextureZ; // Z-Index fuer Zugriff auf Textur interpoliert - // load TOF-Index from Textur 0-3 - currentSOSVoxel_AscanIndexAttValues_0 = tex3D( texTableAscanIndexFloat2_0, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); - currentSOSVoxel_AscanIndexAttValues_1 = tex3D( texTableAscanIndexFloat2_1, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); - currentSOSVoxel_AscanIndexAttValues_2 = tex3D( texTableAscanIndexFloat2_2, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); - - #ifdef useSync_SOS_ATT - __syncthreads(); - #endif - - voxelValue += currentSOSVoxel_AscanIndexAttValues_0.y * tex2D( texRefAscans, currentSOSVoxel_AscanIndexAttValues_0.x - 0.5f, Offset_0 + ascanIndex_i); // i gibt Index fuer Ascan an - voxelValue += currentSOSVoxel_AscanIndexAttValues_1.y * tex2D( texRefAscans, currentSOSVoxel_AscanIndexAttValues_1.x - 0.5f, Offset_1 + ascanIndex_i); - voxelValue += currentSOSVoxel_AscanIndexAttValues_2.y * tex2D( texRefAscans, currentSOSVoxel_AscanIndexAttValues_2.x - 0.5f, Offset_2 + ascanIndex_i); - } - #endif - } - - else if (TableAscanIndexAllocationCount == 4){ - - #ifdef tryOptimize_SOSATT - - float2 currentSOSVoxel_AscanIndexAttValues_0; - float2 currentSOSVoxel_AscanIndexAttValues_1; - float2 currentSOSVoxel_AscanIndexAttValues_2; - float2 currentSOSVoxel_AscanIndexAttValues_3; - - - float Offset_0 = (float)ascanIndexBatchOffset + 0.5f; - float Offset_1 = (float)ascanIndexBatchOffset + maxAscanIndexArraysInTexture + 0.5f; - float Offset_2 = (float)ascanIndexBatchOffset + 2.0f*maxAscanIndexArraysInTexture + 0.5f; - float Offset_3 = (float)ascanIndexBatchOffset + 3.0f*maxAscanIndexArraysInTexture + 0.5f; - - //#pragma unroll 2 - for(float ascanIndex_i = 0.0f; ascanIndex_i < maxAscanIndexArraysInTexture; ascanIndex_i+=1.0f) // bis zu 60.8GV/s - { - TexturIndexZ_AscanIndex = 2.0f*ascanIndex_i + SosVoxelTextureZ; // Z-Index fuer Zugriff auf Textur interpoliert - //syncthreads(); - // load TOF-Index from Textur 0-3 - currentSOSVoxel_AscanIndexAttValues_0 = tex3D( texTableAscanIndexFloat2_0, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); - currentSOSVoxel_AscanIndexAttValues_1 = tex3D( texTableAscanIndexFloat2_1, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); - currentSOSVoxel_AscanIndexAttValues_2 = tex3D( texTableAscanIndexFloat2_2, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); - currentSOSVoxel_AscanIndexAttValues_3 = tex3D( texTableAscanIndexFloat2_3, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); - - #ifdef useSync_SOS_ATT - __syncthreads(); - #endif - - voxelValue += currentSOSVoxel_AscanIndexAttValues_0.y * tex2D( texRefAscans, currentSOSVoxel_AscanIndexAttValues_0.x - 0.5f, Offset_0 + ascanIndex_i); // i gibt Index fuer Ascan an - voxelValue += currentSOSVoxel_AscanIndexAttValues_1.y * tex2D( texRefAscans, currentSOSVoxel_AscanIndexAttValues_1.x - 0.5f, Offset_1 + ascanIndex_i); - voxelValue += currentSOSVoxel_AscanIndexAttValues_2.y * tex2D( texRefAscans, currentSOSVoxel_AscanIndexAttValues_2.x - 0.5f, Offset_2 + ascanIndex_i); - voxelValue += currentSOSVoxel_AscanIndexAttValues_3.y * tex2D( texRefAscans, currentSOSVoxel_AscanIndexAttValues_3.x - 0.5f, Offset_3 + ascanIndex_i); - } - - - #endif - - } - - - #endif - - __syncthreads(); - output[memoryIndex] = (double)voxelValue; - } - - - - - - - - - // ######################################################################################################################### 3DVolume Mode with SOS- and No ATT-Correction - /** - Synthetic Aperture Focusing Technique AscanIndex kernel - with SOS- but no ATT-Correction - */ - - // Try const __restrict__ - // See http://www.acceleware.com/blog/constant-cache-vs-read-only-cache - // See http://acceleware.com/blog/constant-cache-vs-read-only-cache-part-2 - - __global__ void saftKernelAscanIndex_SOS( // Version SoS-Korrektur - - int const ascanIndexBatchOffset, ///< Offset of AScans if more then one AScan-Batch is used for Reconstruction - float const aScanWindowSize, ///< Amount of AScans used here for Reconstruction - int const maxAscanIndexArraysInTexture, ///< Maximum amount in A-Scans in one Texture - int const TableAscanIndexAllocationCount, ///< Used amount of Textures (currently limited to maximum 4 Textures) - - int3 const IMAGE_SIZE_XYZ, ///< XYZ des Outputvolumens - float3 const SosVoxelStartPosition, ///< Offset of SOS-Grids - float const IMAGE_RESOLUTION, ///< Resolution of Output-Volume - float const VoxelIncrement, ///< Step-Width of one Voxel in SOS-Korrdinates - - int const blockIndexOffset, - int const speedOfSoundZLayer, - dim3 const gridDimensions, - dim3 const blockDimensions, - - #ifdef debug_CudaSAFTAscanIndexKernel - float const debugMode, - float const debugModeParameter, - int const * deviceSAFT_VARIANT, - #endif - - double * output - - ) - - { - - ///////////////////////////////////////////////////////////////////////////////////////// - // 1. Determine which Voxel is to be calculated in this Kernel - ///////////////////////////////////////////////////////////////////////////////////////// - - dim3 blockVoxel( // Which Block(xyz) corresponds to this Thread? - (threadIdx.x / blockDimensions.y) % blockDimensions.x, - threadIdx.x % blockDimensions.y, - threadIdx.x / (blockDimensions.x * blockDimensions.y) - ); - - // Index of Block for this Thread - long blockIndex = ((blockIndexOffset + blockIdx.z) * gridDim.y + blockIdx.y) * gridDim.x + blockIdx.x; - - dim3 regionOfInterestVoxel( // Which Voxel corresponds to this Thread? Start with 0. - ((blockIndex / gridDimensions.y) % gridDimensions.x) * blockDimensions.x + blockVoxel.x, //Medium speed index - (blockIndex % gridDimensions.y) * blockDimensions.y + blockVoxel.y, //Fastest index - (blockIndex / (gridDimensions.x * gridDimensions.y)) * blockDimensions.z + blockVoxel.z //Slowest index - ); - - // If Voxel is outside the reconstructed Image leave Kernel - if ((regionOfInterestVoxel.x >= IMAGE_SIZE_XYZ.x) || (regionOfInterestVoxel.y >= IMAGE_SIZE_XYZ.y) || (regionOfInterestVoxel.z >= IMAGE_SIZE_XYZ.z)) - return; - - #ifdef debug_CudaSAFTKernel - if ((regionOfInterestVoxel.x == OutputVolumeX) && (regionOfInterestVoxel.y == OutputVolumeY) && (regionOfInterestVoxel.z == OutputVolumeZ)) - { - printf("\n==== saftKernelAscanIndex_SOS Kernel ================================ 1 =====\n"); - printf("\n=============================================================================\n"); - printf(" => regionOfInterestVoxel [%d %d %d] Start bei 0\n", regionOfInterestVoxel.x, regionOfInterestVoxel.y, regionOfInterestVoxel.z); - printf("\n=============================================================================\n"); - } - #endif - ///////////////////////////////////////////////////////////////////////////////////////// - // 2. Determine - ///////////////////////////////////////////////////////////////////////////////////////// - // - SOSvoxel in which voxel is located - // - Index of OutputVolume, and tables of Emitter, Receiver coordinates and SOSpaths - // - Variable declarations - ///////////////////////////////////////////////////////////////////////////////////////// - - #ifdef debug_CudaSAFTKernel - if ((regionOfInterestVoxel.x == 1) && (regionOfInterestVoxel.y == 1)) - { - // printf(" Z2[%d] blockIndexOffset[%d]\n", regionOfInterestVoxel.z, blockIndexOffset); - } - #endif - - - // Memory-Index for this Thread for Output-Array of this Voxel - long memoryIndex = ((IMAGE_SIZE_XYZ.y * (regionOfInterestVoxel.z - blockIndexOffset) + regionOfInterestVoxel.y) * IMAGE_SIZE_XYZ.x + regionOfInterestVoxel.x); - float3 SosVoxelf; //SoS-Voxel Koordinates in float - // Determine SOS-Voxel-Position - SosVoxelf.x = (SosVoxelStartPosition.x + (VoxelIncrement * regionOfInterestVoxel.x)); //Hier Addition der SOSVoxel im SoS-Grid durchfuehren - SosVoxelf.y = (SosVoxelStartPosition.y + (VoxelIncrement * regionOfInterestVoxel.y)); //Hier Addition der SOSVoxel im SoS-Grid durchfuehren - SosVoxelf.z = (SosVoxelStartPosition.z + (VoxelIncrement * regionOfInterestVoxel.z)); //Hier Addition der SOSVoxel im SoS-Grid durchfuehren - //printf("\n\n SosVoxelStartPosition [%f %f %f]\n",SosVoxelStartPosition.x,SosVoxelStartPosition.y,SosVoxelStartPosition.z); - - //TexturIndex for access on Texturmemory depending of Voxel - float TexturIndexZ_AscanIndex = 0.0f; //Z-Index for access on Texturmemory - float const TexturIndexX = SosVoxelf.x + 0.5f; // Due to Access over Texturmemory +0.5f. - float const TexturIndexY = SosVoxelf.y + 0.5f; - float const SosVoxelTextureZ = (SosVoxelf.z - (float)speedOfSoundZLayer) + 0.5f; // Z offset inside precalculated SOS paths - - #ifdef debug_CudaSAFTAscanIndexKernel - if ((regionOfInterestVoxel.x == OutputVolumeX) && (regionOfInterestVoxel.y == OutputVolumeY) && (regionOfInterestVoxel.z == OutputVolumeZ)) - { - printf("\n==== SAFT (AscanIndex) Kernel Step 1 ==========================================\n"); - printf(" VoxelIncrement = %3.12f\n", VoxelIncrement ); - printf(" TableAscanIndexAllocationCount = %i\n", TableAscanIndexAllocationCount ); - printf("--------------------------------------------------------------------------------\n"); - printf(" => regionOfInterestVoxel [%d %d %d]\n", regionOfInterestVoxel.x, regionOfInterestVoxel.y, regionOfInterestVoxel.z); - printf(" => TexturIndexXYZ [%3.3f %3.3f %3.3f]\n", TexturIndexX, TexturIndexY, SosVoxelTextureZ); - printf(" SosVoxelStartPos(SoS) [%3.12f %3.12f %3.12f]\n", SosVoxelStartPosition.x, SosVoxelStartPosition.y, SosVoxelStartPosition.z); - printf(" SosVoxelf [%3.12f %3.12f %3.12f]\n", SosVoxelf.x, SosVoxelf.y, SosVoxelf.z); - printf("================================================================================\n"); - - } - #endif - - float voxelValue = 0.0; // reflection value, which is summed up in Ascan-Loop = Outputvalue - #ifdef debug_CudaSAFTAscanIndexKernelDataAccess - float voxelValue_old = 0.0; // only for debugging necessary - #endif - - - float currentSOSVoxel_AscanIndexValues; - - ///////////////////////////////////////////////////////////////////////////////////////// - // 3. SAFT-Algorithmus - ///////////////////////////////////////////////////////////////////////////////////////// - // - Index aus Textur lesen und fuer zugriff auf A-Scan nutzen - ///////////////////////////////////////////////////////////////////////////////////////// - - // Vorgehen Ascanindexvariante - // 1. Bestimme Koordinaten in SOS-Koordinaten für festen Emitter und 1413 Receiver - // Index = X+Xmax*Y+Xmax*Ymax*(Zmax*RecNr+Z). - // Xmax = 128 - // Ymax = 128 - // Zmax = 2 - // => X = x - // => Y = y - // => Z = Zmax*RecNr+z - // 2. Lade an dieser Stelle den Interpolierten Index. - // 2.a Über Texturmemory - // 2.b über alle 8 benachbarten Voxel oder 64 bei tricubic-Interpolation in 3D - // 3. Lade Ascan-Sample an diesem Index und summiere auf - - __syncthreads(); - if (ascanIndexBatchOffset == 0) // Initialisierung beim ersten Kernelaufruf sprich ascanIndexBatchOffset == 0 - { - voxelValue = 0.0f; - } - else - { - voxelValue = (float)output[memoryIndex]; // Alle anderen Kernelaufrufe muessen zuerst den alten Wert laden, ist bei mehreren Durchlaeufen noetig - } - - __syncthreads(); - - -// Acess on TextureMemory --> max 4 Textures are currently Supported because there is no Texture Array -//#define useOneLoop_SOS // Simple Access on Textures with one loop and calculation every time which Texture is used // TITAN (Matlab/Kernel) 30.8 / 32.9 -#define useSameLoop_SOS // Optimized Access on Textures. Reduce Overhead without branching within one Loop // TITAN (Matlab/Kernel) 54.0 / 60.8 - // Need cleaning of Ascanindex-Data - // Bei halber Nutzung der Textur TITAN (M/K) 27.3 / 37.9 -//#define useWhileLoop_SOS // Zugriff auf AscanIndex-Texturen innerhalb einer While-Schleife // TITAN (Matlab/Kernel) 22.6 / 28.7 -//#define useFourLoops_SOS // Die vier Schleifen bzw. Texturen nacheinander durchlaufen // TITAN (Matlab/Kernel) 39.5 / 42.8 - - - - #ifdef useOneLoop_SOS - //#pragma unroll 2 - //for(float ascanIndex_i = 0; ascanIndex_i < aScanWindowSize; ascanIndex_i+=1.0f) // Alle Ascans durchlaufen // 30 GV/s //2,4: 25GV/s - for(int ascanIndex_i = 0; ascanIndex_i < (int)aScanWindowSize; ascanIndex_i+=1.0f) // Alle Ascans durchlaufen // 30 GV/s //2,4: 25GV/s - //for(float ascanIndex_i = 0.0f; ascanIndex_i < 4096.0f; ascanIndex_i+=1.0f) // Alle Ascans durchlaufen // 40 GV/s //2: 30GV/s, 4: 34GV/s, 8: 34GV/s, 16: 32GV/s - { - - TexturIndexZ_AscanIndex = 2.0f * (float)((int)ascanIndex_i % maxAscanIndexArraysInTexture) + SosVoxelTextureZ; // Z-Index fuer Zugriff auf Textur interpoliert - - // Determine number of current used texture memory for this A-Scan - if ( ascanIndex_i < maxAscanIndexArraysInTexture ){ - currentSOSVoxel_AscanIndexValues = tex3D( texTableAscanIndexFloat0, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); } - else if ( ascanIndex_i < 2*maxAscanIndexArraysInTexture ) { - currentSOSVoxel_AscanIndexValues = tex3D( texTableAscanIndexFloat1, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); } - else if ( ascanIndex_i < 3*maxAscanIndexArraysInTexture ){ - currentSOSVoxel_AscanIndexValues = tex3D( texTableAscanIndexFloat2, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); } - else if ( ascanIndex_i < 4*maxAscanIndexArraysInTexture ){ - currentSOSVoxel_AscanIndexValues = tex3D( texTableAscanIndexFloat3, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); } - - voxelValue += tex2D( texRefAscans, currentSOSVoxel_AscanIndexValues.x - 0.5f, ascanIndexBatchOffset + ascanIndex_i + 0.5f); // i gibt Index fuer Ascan an - - } - #endif - - #ifdef useSameLoop_SOS // Nur eine Schleife voll durchlaufen. Immer beide Texturen lesen, dabei aber am Ende immer nuller laden - - //float VoxelAscanIndex2 = 0.0f; - //float voxelValue2 = 0.0f; - - if ( TableAscanIndexAllocationCount == 1){ - - //#pragma unroll 3 - //for(float ascanIndex_i = 0.0f; ascanIndex_i < maxAscanIndexArraysInTexture; ascanIndex_i+=1.0f) // 1.Teil Ascans durchlaufen // GV/s //2: GV/s, 4: GV/s, 8: GV/s, 16: GV/s - for(float ascanIndex_i = 0.0f; ascanIndex_i < maxAscanIndexArraysInTexture; ascanIndex_i+=1.0f) // bis zu 60.6GV/s - //for(float ascanIndex_i = 0.0f; ascanIndex_i < 1; ascanIndex_i+=1.0f) // bis zu 60.6GV/s - { - - TexturIndexZ_AscanIndex = 2*ascanIndex_i + SosVoxelTextureZ; // Z-Index fuer Zugriff auf Textur interpoliert - - // nutze immer nur 1tes Surface - currentSOSVoxel_AscanIndexValues = tex3D( texTableAscanIndexFloat1_0, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); - voxelValue += tex2D( texRefAscans, currentSOSVoxel_AscanIndexValues - 0.5f, ascanIndexBatchOffset + ascanIndex_i + 0.5f); // i gibt Index fuer Ascan an - - #ifdef debug_CudaSAFTAscanIndexKernelDataAccess - if ((regionOfInterestVoxel.x == OutputVolumeX) && (regionOfInterestVoxel.y == OutputVolumeY) && (regionOfInterestVoxel.z == OutputVolumeZ))//&& (i == 30)) - { - printf("ascanIndex_i(%.0f) - AIndexTextures=1: TexturIndexXYT, TexturIndexZ_AscanIndex [%6.3f %6.3f %6.3f]\n " - "VoxelAscanIndex(tof_idx %30.24f, att %19.12f) , voxelValue = %9.4f\n", - ascanIndex_i, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex, - currentSOSVoxel_AscanIndexAttValues.x, currentSOSVoxel_AscanIndexAttValues.y, voxelValue); - //printf("ascanIndex_i(%.0f) - 0 : VoxelAscanIndex(idx %30.24f, att %19.12f) , voxelValue += %30.24f = %9.4f\n", ascanIndex_i, currentSOSVoxel_AscanIndexAttValues.x,currentSOSVoxel_AscanIndexAttValues.y, voxelValue-voxelValue_old, voxelValue); - //voxelValue_old = voxelValue; - } - #endif - } - - - } - else if (TableAscanIndexAllocationCount == 2){ - //#pragma unroll 3 - //for(float ascanIndex_i = 0.0f; ascanIndex_i < maxAscanIndexArraysInTexture; ascanIndex_i+=1.0f) // 1.Teil Ascans durchlaufen // GV/s //2: GV/s, 4: GV/s, 8: GV/s, 16: GV/s - for(float ascanIndex_i = 0.0f; ascanIndex_i < maxAscanIndexArraysInTexture; ascanIndex_i+=1.0f) // bis zu 60.6GV/s - { - - TexturIndexZ_AscanIndex = 2.0*ascanIndex_i + SosVoxelTextureZ; // Z-Index fuer Zugriff auf Textur interpoliert - - // 1ten Teil mit selben Index laden - currentSOSVoxel_AscanIndexValues = tex3D( texTableAscanIndexFloat1_0, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); - voxelValue += tex2D( texRefAscans, currentSOSVoxel_AscanIndexValues - 0.5f, ascanIndexBatchOffset + ascanIndex_i + 0.5f); // i gibt Index fuer Ascan an - - // 2ten Teil mit selben Index laden - currentSOSVoxel_AscanIndexValues = tex3D( texTableAscanIndexFloat1_1, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); - voxelValue += tex2D( texRefAscans, currentSOSVoxel_AscanIndexValues - 0.5f, ascanIndexBatchOffset + ascanIndex_i + maxAscanIndexArraysInTexture +0.5f); - - } - } - else if (TableAscanIndexAllocationCount == 3){ - //#pragma unroll 3 - //for(float ascanIndex_i = 0.0f; ascanIndex_i < maxAscanIndexArraysInTexture; ascanIndex_i+=1.0f) // 1.Teil Ascans durchlaufen // GV/s //2: GV/s, 4: GV/s, 8: GV/s, 16: GV/s - for(float ascanIndex_i = 0.0f; ascanIndex_i < maxAscanIndexArraysInTexture; ascanIndex_i+=1.0f) // bis zu 60.6GV/s - { - - TexturIndexZ_AscanIndex = 2.0*ascanIndex_i + SosVoxelTextureZ; // Z-Index fuer Zugriff auf Textur interpoliert - - // 1ten Teil mit selben Index laden - currentSOSVoxel_AscanIndexValues = tex3D( texTableAscanIndexFloat1_0, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); - voxelValue += tex2D( texRefAscans, currentSOSVoxel_AscanIndexValues - 0.5f, ascanIndexBatchOffset + ascanIndex_i + 0.5f); // i gibt Index fuer Ascan an - - // 2ten Teil mit selben Index laden - currentSOSVoxel_AscanIndexValues = tex3D( texTableAscanIndexFloat1_1, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); - voxelValue += tex2D( texRefAscans, currentSOSVoxel_AscanIndexValues - 0.5f, ascanIndexBatchOffset + ascanIndex_i + maxAscanIndexArraysInTexture +0.5f); - - // 3ten Teil mit selben Index laden - currentSOSVoxel_AscanIndexValues = tex3D( texTableAscanIndexFloat1_2, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); - voxelValue += tex2D( texRefAscans, currentSOSVoxel_AscanIndexValues - 0.5f, ascanIndexBatchOffset + ascanIndex_i + 2*maxAscanIndexArraysInTexture +0.5f); - } - - } - - else if (TableAscanIndexAllocationCount == 4){ - - #define tryOptimize_SOS - #ifdef tryOptimize_SOS - - float currentSOSVoxel_AscanIndexValues_0; - float currentSOSVoxel_AscanIndexValues_1; - float currentSOSVoxel_AscanIndexValues_2; - float currentSOSVoxel_AscanIndexValues_3; - - - float Offset_0 = (float)ascanIndexBatchOffset + 0.5f; - float Offset_1 = (float)ascanIndexBatchOffset + maxAscanIndexArraysInTexture + 0.5f; - float Offset_2 = (float)ascanIndexBatchOffset + 2.0f*maxAscanIndexArraysInTexture + 0.5f; - float Offset_3 = (float)ascanIndexBatchOffset + 3.0f*maxAscanIndexArraysInTexture + 0.5f; - - //#define addTexturIndexZ_AscanIndexInLoop - //#define useSync_SOS - - #ifdef addTexturIndexZ_AscanIndexInLoop - TexturIndexZ_AscanIndex = SosVoxelTextureZ - 2.0f; // Z-Index fuer Zugriff auf Textur interpoliert - #endif - - //#pragma unroll 4 - for(float ascanIndex_i = 0.0f; ascanIndex_i < maxAscanIndexArraysInTexture; ascanIndex_i+=1.0f) // bis zu 60.8GV/s - { - - #ifdef addTexturIndexZ_AscanIndexInLoop - TexturIndexZ_AscanIndex += 2.0f; // Z-Index fuer Zugriff auf Textur interpoliert - #else - TexturIndexZ_AscanIndex = 2.0f*ascanIndex_i + SosVoxelTextureZ; // Z-Index fuer Zugriff auf Textur interpoliert - #endif - //syncthreads(); - // load TOF-Index from Textur 0-3 - currentSOSVoxel_AscanIndexValues_0 = tex3D( texTableAscanIndexFloat1_0, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); - currentSOSVoxel_AscanIndexValues_1 = tex3D( texTableAscanIndexFloat1_1, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); - currentSOSVoxel_AscanIndexValues_2 = tex3D( texTableAscanIndexFloat1_2, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); - currentSOSVoxel_AscanIndexValues_3 = tex3D( texTableAscanIndexFloat1_3, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); - - #ifdef useSync_SOS - __syncthreads(); - #endif - - voxelValue += tex2D( texRefAscans, currentSOSVoxel_AscanIndexValues_0 - 0.5f, Offset_0 + ascanIndex_i); // i gibt Index fuer Ascan an - voxelValue += tex2D( texRefAscans, currentSOSVoxel_AscanIndexValues_1 - 0.5f, Offset_1 + ascanIndex_i); - voxelValue += tex2D( texRefAscans, currentSOSVoxel_AscanIndexValues_2 - 0.5f, Offset_2 + ascanIndex_i); - voxelValue += tex2D( texRefAscans, currentSOSVoxel_AscanIndexValues_3 - 0.5f, Offset_3 + ascanIndex_i); - - //if (((int)ascanIndex_i & 31) == 0) __syncthreads(); - } - #else - //#pragma unroll 2 - //for(float ascanIndex_i = 0.0f; ascanIndex_i < maxAscanIndexArraysInTexture; ascanIndex_i+=1.0f) // 1.Teil Ascans durchlaufen // GV/s //2: GV/s, 4: GV/s, 8: GV/s, 16: GV/s - for(float ascanIndex_i = 0.0f; ascanIndex_i < maxAscanIndexArraysInTexture; ascanIndex_i+=1.0f) // bis zu 60.6GV/s - { - - - TexturIndexZ_AscanIndex = 2.0f*ascanIndex_i + SosVoxelTextureZ; // Z-Index fuer Zugriff auf Textur interpoliert - __syncthreads(); - - // 1ten Teil mit selben Index laden - currentSOSVoxel_AscanIndexAttValues = tex3D( texTableAscanIndexFloat1_0, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); - voxelValue += tex2D( texRefAscans, currentSOSVoxel_AscanIndexAttValues - 0.5f, ascanIndexBatchOffset + ascanIndex_i + 0.5f); // i gibt Index fuer Ascan an - - //syncthreads(); - // 2ten Teil mit selben Index laden - currentSOSVoxel_AscanIndexAttValues = tex3D( texTableAscanIndexFloat1_1, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); - voxelValue += tex2D( texRefAscans, currentSOSVoxel_AscanIndexAttValues - 0.5f, ascanIndexBatchOffset + ascanIndex_i + maxAscanIndexArraysInTexture +0.5f); - - __syncthreads(); - // 3ten Teil mit selben Index laden - currentSOSVoxel_AscanIndexAttValues = tex3D( texTableAscanIndexFloat1_2, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); - voxelValue += tex2D( texRefAscans, currentSOSVoxel_AscanIndexAttValues - 0.5f, ascanIndexBatchOffset + ascanIndex_i + 2.0f*maxAscanIndexArraysInTexture +0.5f); - - //syncthreads(); - // 4ten Teil mit selben Index laden - currentSOSVoxel_AscanIndexAttValues = tex3D( texTableAscanIndexFloat1_3, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); - voxelValue += tex2D( texRefAscans, currentSOSVoxel_AscanIndexAttValues - 0.5f, ascanIndexBatchOffset + ascanIndex_i + 3.0f*maxAscanIndexArraysInTexture +0.5f); - } - #endif - - - } - else - { - // Do nothing due to only 4 are defined - } - - #ifdef debug_CudaSAFTAscanIndexKernelDataAccess - if ((regionOfInterestVoxel.x == OutputVolumeX) && (regionOfInterestVoxel.y == OutputVolumeY) && (regionOfInterestVoxel.z == OutputVolumeZ))//&& (i == 30)) - { - printf("<<<< VoxelAscanIndex(sos %19.12f, att %19.12f) , voxelValue = %9.4f\n", currentSOSVoxel_AscanIndexAttValues.x,currentSOSVoxel_AscanIndexAttValues.y, voxelValue); - //printf("ascanIndex_i(%.0f) - 0 : VoxelAscanIndex(idx %30.24f, att %19.12f) , voxelValue += %30.24f = %9.4f\n", ascanIndex_i, currentSOSVoxel_AscanIndexAttValues.x,currentSOSVoxel_AscanIndexAttValues.y, voxelValue-voxelValue_old, voxelValue); - //voxelValue_old = voxelValue; - } - #endif - - #endif - - #ifdef useFourLoops_SOS - if (TableAscanIndexAllocationCount == 4){ - - - #define addTexturIndexZ_AscanIndexInLoop - #ifdef addTexturIndexZ_AscanIndexInLoop - TexturIndexZ_AscanIndex = SosVoxelTextureZ - 2.0f; // Z-Index fuer Zugriff auf Textur interpoliert - #endif - - //#define useSync - - float Offset = (float)ascanIndexBatchOffset + 0.5f; - //#pragma unroll 2 - for(float ascanIndex_i = 0.0f; ascanIndex_i < maxAscanIndexArraysInTexture; ascanIndex_i+=1.0f) // bis zu 60.8GV/s - { - - #ifdef addTexturIndexZ_AscanIndexInLoop - TexturIndexZ_AscanIndex += 2.0f; // Z-Index fuer Zugriff auf Textur interpoliert - #else - TexturIndexZ_AscanIndex = 2.0f*ascanIndex_i + SosVoxelTextureZ; // Z-Index fuer Zugriff auf Textur interpoliert - #endif - - // 1ten Teil mit selben Index laden - currentSOSVoxel_AscanIndexValues = tex3D( texTableAscanIndexFloat1_0, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); - - #ifdef useSync - __syncthreads(); - #endif - voxelValue += tex2D( texRefAscans, currentSOSVoxel_AscanIndexValues - 0.5f, Offset + ascanIndex_i); // i gibt Index fuer Ascan an - } - - #ifdef addTexturIndexZ_AscanIndexInLoop - TexturIndexZ_AscanIndex = SosVoxelTextureZ - 2.0f; // Z-Index fuer Zugriff auf Textur interpoliert - #endif - Offset = (float)ascanIndexBatchOffset + maxAscanIndexArraysInTexture + 0.5f; - - //#pragma unroll 2 - for(float ascanIndex_i = 0.0f; ascanIndex_i < maxAscanIndexArraysInTexture; ascanIndex_i+=1.0f) // bis zu 60.8GV/s - { - - #ifdef addTexturIndexZ_AscanIndexInLoop - TexturIndexZ_AscanIndex += 2.0f; // Z-Index fuer Zugriff auf Textur interpoliert - #else - TexturIndexZ_AscanIndex = 2.0f*ascanIndex_i + SosVoxelTextureZ; // Z-Index fuer Zugriff auf Textur interpoliert - #endif - - // 2ten Teil mit selben Index laden - currentSOSVoxel_AscanIndexValues = tex3D( texTableAscanIndexFloat1_1, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); - - #ifdef useSync - __syncthreads(); - #endif - voxelValue += tex2D( texRefAscans, currentSOSVoxel_AscanIndexValues - 0.5f, Offset + ascanIndex_i); - } - - #ifdef addTexturIndexZ_AscanIndexInLoop - TexturIndexZ_AscanIndex = SosVoxelTextureZ - 2.0f; // Z-Index fuer Zugriff auf Textur interpoliert - #endif - Offset = (float)ascanIndexBatchOffset + 2.0f*maxAscanIndexArraysInTexture + 0.5f; - - //#pragma unroll 2 - for(float ascanIndex_i = 0.0f; ascanIndex_i < maxAscanIndexArraysInTexture; ascanIndex_i+=1.0f) // bis zu 60.8GV/s - { - - #ifdef addTexturIndexZ_AscanIndexInLoop - TexturIndexZ_AscanIndex += 2.0f; // Z-Index fuer Zugriff auf Textur interpoliert - #else - TexturIndexZ_AscanIndex = 2.0f*ascanIndex_i + SosVoxelTextureZ; // Z-Index fuer Zugriff auf Textur interpoliert - #endif - - // 3ten Teil mit selben Index laden - currentSOSVoxel_AscanIndexValues = tex3D( texTableAscanIndexFloat1_2, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); - - #ifdef useSync - __syncthreads(); - #endif - voxelValue += tex2D( texRefAscans, currentSOSVoxel_AscanIndexValues - 0.5f, Offset + ascanIndex_i); - } - - #ifdef addTexturIndexZ_AscanIndexInLoop - TexturIndexZ_AscanIndex = SosVoxelTextureZ - 2.0f; // Z-Index fuer Zugriff auf Textur interpoliert - #endif - Offset = (float)ascanIndexBatchOffset + 3.0f*maxAscanIndexArraysInTexture + 0.5f; - - //#pragma unroll 2 - for(float ascanIndex_i = 0.0f; ascanIndex_i < maxAscanIndexArraysInTexture; ascanIndex_i+=1.0f) // bis zu 60.8GV/s - { - - #ifdef addTexturIndexZ_AscanIndexInLoop - TexturIndexZ_AscanIndex += 2.0f; // Z-Index fuer Zugriff auf Textur interpoliert - #else - TexturIndexZ_AscanIndex = 2.0f*ascanIndex_i + SosVoxelTextureZ; // Z-Index fuer Zugriff auf Textur interpoliert - #endif - - // 4ten Teil mit selben Index laden - currentSOSVoxel_AscanIndexValues = tex3D( texTableAscanIndexFloat1_3, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); - - #ifdef useSync - __syncthreads(); - #endif - voxelValue += tex2D( texRefAscans, currentSOSVoxel_AscanIndexValues - 0.5f, Offset + ascanIndex_i); - } - } - #endif - - #ifdef useWhileLoop_SOS - - float ascanIndex_i = 0.0; - int compareIndexNextTexture = maxAscanIndexArraysInTexture; // Index ab dem die naechste Textur genutzt werden soll - - //float IndexTextureOffset = 0.0f; // Offset des AscanIndex fuer TexturIndexZ_AscanIndex - float IndexTextureOffset = SosVoxelTextureZ; // Offset des AscanIndex fuer TexturIndexZ_AscanIndex - - do{ - - if ( ascanIndex_i >= compareIndexNextTexture){ - compareIndexNextTexture += maxAscanIndexArraysInTexture; - //IndexTextureOffset += maxAscanIndexArraysInTexture; - IndexTextureOffset -= 2.0f *maxAscanIndexArraysInTexture; - } - - // Calculate the Z-Index for storing the AscanIndex value - // TexturIndexZ_AscanIndex = 2.0f *(ascanIndex_i - IndexTextureOffset) + SosVoxelTextureZ; - TexturIndexZ_AscanIndex = 2.0f *ascanIndex_i + IndexTextureOffset; - - if ( compareIndexNextTexture == maxAscanIndexArraysInTexture){ - //VoxelAscanIndex = tex3D( texTableAscanIndexFloat0, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); } - currentSOSVoxel_AscanIndexAttValues = tex3D( texTableAscanIndexFloat1_0, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); } - else if ( compareIndexNextTexture == 2*maxAscanIndexArraysInTexture) { - //VoxelAscanIndex = tex3D( texTableAscanIndexFloat1, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); } - currentSOSVoxel_AscanIndexAttValues = tex3D( texTableAscanIndexFloat1_1, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); } - else if ( compareIndexNextTexture == 3*maxAscanIndexArraysInTexture){ - //VoxelAscanIndex = tex3D( texTableAscanIndexFloat2, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); } - currentSOSVoxel_AscanIndexAttValues = tex3D( texTableAscanIndexFloat1_2, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); } - else if ( compareIndexNextTexture == 4*maxAscanIndexArraysInTexture){ - //VoxelAscanIndex = tex3D( texTableAscanIndexFloat3, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); } - currentSOSVoxel_AscanIndexAttValues = tex3D( texTableAscanIndexFloat1_3, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); } - - //currentSOSVoxel_AscanIndexAttValues.x = currentAscanIndex; - //currentSOSVoxel_AscanIndexAttValues.y = totalAttenuation_multFactor; - #define Interpolation_Textur_Standard - //#define Interpolation_Cosinus - //#define Interpolation_Spline - - #ifdef Interpolation_Textur_Standard - voxelValue += tex2D( texRefAscans, currentSOSVoxel_AscanIndexAttValues.x - 0.5f, (float)ascanIndexBatchOffset + ascanIndex_i + 0.5f); // i gibt Index fuer Ascan an - #endif - - #ifdef Interpolation_Cosinus - float y1 = tex2D( texRefAscans, currentSOSVoxel_AscanIndexAttValues.x + 0.5f, ascanIndexBatchOffset + ascanIndex_i + 0.5f); - float y2 = tex2D( texRefAscans, currentSOSVoxel_AscanIndexAttValues.x + 1.5f, ascanIndexBatchOffset + ascanIndex_i + 0.5f); - float mu = currentSOSVoxel_AscanIndexAttValues.x-floor(currentSOSVoxel_AscanIndexAttValues.x); - float mu2 = (1-cos(mu*3.14159265359f))/2; - voxelValue += currentSOSVoxel_AscanIndexAttValues.y * (y1*(1-mu2)+y2*mu2); - #endif - - #ifdef Interpolation_Spline - float y0 = tex2D( texRefAscans, currentSOSVoxel_AscanIndexAttValues.x - 0.5f, ascanIndexBatchOffset + ascanIndex_i + 0.5f); - float y1 = tex2D( texRefAscans, currentSOSVoxel_AscanIndexAttValues.x + 0.5f, ascanIndexBatchOffset + ascanIndex_i + 0.5f); - float y2 = tex2D( texRefAscans, currentSOSVoxel_AscanIndexAttValues.x + 1.5f, ascanIndexBatchOffset + ascanIndex_i + 0.5f); - float y3 = tex2D( texRefAscans, currentSOSVoxel_AscanIndexAttValues.x + 2.5f, ascanIndexBatchOffset + ascanIndex_i + 0.5f); - float mu = currentSOSVoxel_AscanIndexAttValues.x-floor(currentSOSVoxel_AscanIndexAttValues.x); - float mu2 = mu*mu; - float a0 = y3 - y2 - y0 + y1; - float a1 = y0 - y1 - a0; - float a2 = y2 - y0; - float a3 = y1; - voxelValue += currentSOSVoxel_AscanIndexAttValues.y * (a0*mu*mu2+a1*mu2+a2*mu+a3); - #endif - - //#ifdef debug_CudaSAFTAscanIndexKernelDataAccess -// if ((regionOfInterestVoxel.x == OutputVolumeX) && (regionOfInterestVoxel.y == OutputVolumeY) && (regionOfInterestVoxel.z == OutputVolumeZ))//&& (i == 30)) -// { -// printf("ascanIndex_i(%.0f) - %.0f: VoxelAscanIndex(idx %30.24f) , voxelValue = %9.4f\n", ascanIndex_i, (compareIndexNextTexture), currentSOSVoxel_AscanIndexAttValues.x, voxelValue); -// //voxelValue_old = voxelValue; -// } - //#endif - - - -// if ((regionOfInterestVoxel.x == OutputVolumeX) && (regionOfInterestVoxel.y == OutputVolumeY) && (regionOfInterestVoxel.z == OutputVolumeZ)){ -// -// if ((ascanIndex_i <= 10)) // Anfang -// //if ((ascanIndex_i >= 1020) && (ascanIndex_i <= 1030)) // Mitte -// //if ((ascanIndex_i >= 1400)) // Ende -// //if ((ascanIndex_i >= 2800)) // Ende -// printf("#### ascanIndex_i = %.0f , compIdxNextText = %.0f; IndexTextOffset = %.0f; TexturIndexZ_AscanIndex = %.0f, VoxelAscanIndex=%f = %i\n", ascanIndex_i, compareIndexNextTexture, IndexTextureOffset, TexturIndexZ_AscanIndex, VoxelAscanIndex, ( compareIndexNextTexture == 3*maxAscanIndexArraysInTexture)); -// -// } - - ascanIndex_i += 1.0f; - }while(ascanIndex_i < (int)aScanWindowSize); - - - #endif - -// #ifdef GTX_Kepler -// __syncthreads(); -// #endif - #ifdef debug_CudaSAFTAscanIndexKernel - //printf(" >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> debugModeParameter %f\n", debugModeParameter); - switch ( (int)(debugMode) ) - { - case 0: output[memoryIndex] = (double)voxelValue; break; - case 1: output[memoryIndex] = (double)blockIndex; break; - case 2: output[memoryIndex] = (double)memoryIndex; break; - - case 3: output[memoryIndex] = (double)regionOfInterestVoxel.x; break; // Coordinates in Voxel - case 4: output[memoryIndex] = (double)regionOfInterestVoxel.y; break; - case 5: output[memoryIndex] = (double)regionOfInterestVoxel.z; break; - - case 6: output[memoryIndex] = (double)SosVoxelf.x; break;// Coordinates in SOS-Voxel (float) - case 7: output[memoryIndex] = (double)SosVoxelf.y; break; - case 8: output[memoryIndex] = (double)SosVoxelf.z; break; - - case 9: output[memoryIndex] = (double)TexturIndexX; break;// Coordinates in SOS-Voxel (float) - case 10: output[memoryIndex] = (double)TexturIndexY; break; - case 11: output[memoryIndex] = (double)SosVoxelTextureZ; break; - case 12: output[memoryIndex] = (double)TexturIndexZ_AscanIndex; break; - - case 13: output[memoryIndex] = (double)currentSOSVoxel_AscanIndexValues; break; // VoxelAscanIndex - //case 14: output[memoryIndex] = (double)currentSOSVoxel_AscanIndexAttValues.y; break; // VoxelAttenuation - - case 20: output[memoryIndex] = (double)speedOfSoundZLayer; break; - case 21: output[memoryIndex] = (double)TableAscanIndexAllocationCount; break; - - - default: output[memoryIndex] = (double)0; break; - } - #else - __syncthreads(); - output[memoryIndex] = (double)voxelValue; - #endif - - #ifdef debug_CudaSAFTKernel - if (debugMode != 0) - { - if ((regionOfInterestVoxel.x == OutputVolumeX) && (regionOfInterestVoxel.y == OutputVolumeY) && (regionOfInterestVoxel.z == OutputVolumeZ)) - { - //printf(" SoSVoxel[%3i %3i %3i] speedOfSoundZLayer(%3i) - regionOfInterestVoxel [%3i %3i %3i]\n", SosVoxel.x, SosVoxel.y, SosVoxel.z, speedOfSoundZLayer, regionOfInterestVoxel.x, regionOfInterestVoxel.y, regionOfInterestVoxel.z); - //if ((SosVoxel.x == DebugSoSVoxelX) && (SosVoxel.y == DebugSoSVoxelY) && (SosVoxel.z == DebugSoSVoxelZ)){ - printf("\n saftKernel: debugMode [%f]\n", debugMode); - //printf(" => voxelValue = %f\n", voxelValue); - //printf(" SoSVoxel[%+3.2f %+3.2f %+3.2f] speedOfSoundZLayer(%3i) - regionOfInterestVoxel [%3i %3i %3i]\n", SosVoxelf.x, SosVoxelf.y, SosVoxelf.z, speedOfSoundZLayer, regionOfInterestVoxel.x, regionOfInterestVoxel.y, regionOfInterestVoxel.z); - printf(" SoSVoxel[%3i %3i %3i] speedOfSoundZLayer(%3i) - regionOfInterestVoxel [%3i %3i %3i]\n", SosVoxel.x, SosVoxel.y, SosVoxel.z, speedOfSoundZLayer, regionOfInterestVoxel.x, regionOfInterestVoxel.y, regionOfInterestVoxel.z); - printf(" (current)speedOfSoundZLayer %d\n", speedOfSoundZLayer); - printf(" maxFeasibleSosZLayerCount [%i]\n", maxFeasibleSosZLayerCount); - - printf(" SosVoxelf [%3.15f %3.15f %3.15f]\n", SosVoxelf.x, SosVoxelf.y, SosVoxelf.z); - - // SosVoxelTextureZ = (SosVoxelf.z - speedOfSoundZLayer); - printf(" SosVoxelTextureZ [%3.15f]\n", SosVoxelTextureZ); - - // TexturIndexZEmitter = maxFeasibleSosZLayerCount * (currentEmitterIndex-1) + SosVoxelTextureZ; // Index für Zugriff auf Textur - // TexturIndexZReceiver = maxFeasibleSosZLayerCount * ((currentReceiverIndex-1) % maxSoSReceiverArrayForTexture) + SosVoxelTextureZ; // Index für Zugriff auf Textur - printf(" TexturIndexXYZ Em/Rec [%3.15f %3.15f %3.15f/%3.15f]\n\n", TexturIndexX, TexturIndexY, TexturIndexZEmitter, TexturIndexZReceiver); - - - switch (deviceSAFT_VARIANT[SAFT_VARIANT_3DVolumeInterpolationAtReconstruction]) - //switch ( debugMode >= 60 ? 60 : debugMode >= 30 ? 30 : 0 ) // drei Bereiche - { - case 0: // mit Textur - //TexturIndexZReceiver = maxFeasibleSosZLayerCount * ((currentReceiverIndex-1) % maxSoSReceiverArrayForTexture) + SosVoxelTextureZ; // Index für Zugriff auf Textur - printf(" TexturIndexZReceiver = maxFeasibleSosZLayerCount(%i) * ((currentReceiverIndex-1 (%i)) %% maxSoSReceiverArrayForTexture(%i)) + SosVoxelTextureZ(%3.15f) = (%3.15f)\n\n", maxFeasibleSosZLayerCount, currentReceiverIndex-1, maxSoSReceiverArrayForTexture, SosVoxelTextureZ, TexturIndexZReceiver); - break; - case 1: // mit Textur interpoliert - //TexturIndexZReceiver = maxFeasibleSosZLayerCount * ((currentReceiverIndex-1) % maxSoSReceiverArrayForTexture) + SosVoxelTextureZ + 0.5f; // Index für Zugriff auf Textur - printf(" TexturIndexZReceiver = maxFeasibleSosZLayerCount(%i) * ((currentReceiverIndex-1 (%i)) %% maxSoSReceiverArrayForTexture(%i)) + SosVoxelTextureZ(%3.15f) + 0.5f = (%3.15f)\n\n", maxFeasibleSosZLayerCount, currentReceiverIndex-1, maxSoSReceiverArrayForTexture, SosVoxelTextureZ, TexturIndexZReceiver); - break; - } - - // // Speicher in Texturformat - // // Indexberechnung für Einsatz des Texturmemorys - // float xmax = SOSGrid_XYZ.x; - // float ymax = SOSGrid_XYZ.y; - // float zmax = (float)maxFeasibleSosZLayerCount; - // float i_x = SosVoxel.x; - // float i_y = SosVoxel.y; - // float i_z = (float)(int)(SosVoxelTextureZ); // float SosVoxelTextureZ = (SosVoxelf.z - speedOfSoundZLayer); - - //Index = xmax*(ymax*(zmax*geometryIndexCounter+i_z)+i_y)+i_x; - printf(" currentEmitterIndex %d\n", currentEmitterIndex); - printf(" currentReceiverIndex %d\n", currentReceiverIndex); - printf(" (currentReceiverIndex-1) mod maxSoSReceiverArrayForTexture) %d\n", ((currentReceiverIndex-1) % maxSoSReceiverArrayForTexture)); - - printf(" EmIndex = xmax(%i)*(ymax(%i)*( TexturIndexZEmitter(%i) )+i_y(%i))+i_x(%i) = [%i]\n", (int)SOSGrid_XYZ.x, (int)SOSGrid_XYZ.y, (int)TexturIndexZEmitter, (int)TexturIndexY, (int)TexturIndexX, (int)SOSGrid_XYZ.x*((int)SOSGrid_XYZ.y*((int)TexturIndexZEmitter )+(int)TexturIndexY)+(int)TexturIndexX); - printf(" RecIndex = xmax(%i)*(ymax(%i)*( TexturIndexZReceiver(%i) )+i_y(%i))+i_x(%i) = [%i]\n", (int)SOSGrid_XYZ.x, (int)SOSGrid_XYZ.y, (int)TexturIndexZReceiver, (int)TexturIndexY, (int)TexturIndexX, (int)SOSGrid_XYZ.x*((int)SOSGrid_XYZ.y*((int)TexturIndexZReceiver)+(int)TexturIndexY)+(int)TexturIndexX); - - - printf(" emitterPosition [%3.15f %3.15f %3.15f]\n", emitterPosition.x, emitterPosition.y, emitterPosition.z); - printf(" emitterVoxelVoxelCount %f\n", emitterVoxelVoxelCount); - printf(" 1/emitterVoxelTotalSpeedSum %f\n", emitterVoxelTotalSpeedSum); - - printf(" receiverPosition [%3.15f %3.15f %3.15f]\n", receiverPosition.x, receiverPosition.y, receiverPosition.z); - printf(" receiverVoxelVoxelCount %f\n", receiverVoxelVoxelCount); - printf(" 1/receiverVoxelTotalSpeedSum %f\n", receiverVoxelTotalSpeedSum); - - printf(" => averageSpeed %f\n\n", averageSpeed); - // printf(" emitterPosition [%f %f %f]\n", emitterPosition.x, emitterPosition.y, emitterPosition.z); - // printf(" emitterDistance %f\n", emitterDistance); - // printf(" receiverPosition [%f %f %f]\n", receiverPosition.x, receiverPosition.y, receiverPosition.z); - // printf(" receiverDistance %f\n", receiverDistance); - // printf(" sampleTime %f\n", sampleTime); - // printf(" totalDistance %f\n\n", totalDistance); - // - // printf(" analyticAverageSpeed %f\n", analyticAverageSpeed); - // printf(" analyticTotalTime %f\n", analyticTotalTime); - // printf(" analyticSampleTime %f\n", analyticSampleTime); - // printf(" S_Wasser(%f) S_Kugel(%f) \n", S_Wasser, S_Kugel); - // printf(" cWasser(%f) cKugel(%f) \n", cWasser, cKugel); - // printf(" sampleRate %.10f\n", sampleRate); - printf("\n"); - - // printf(" => voxelValue = %3.12f\n", voxelValue); - - - //} - } - } - #endif - - } - - - - // ######################################################################################################################### 3DVolume Mode with SOS- and No ATT-Correction - /** - Synthetic Aperture Focusing Technique AscanIndex kernel - with SOS- but no ATT-Correction - */ - - // Try const __restrict__ - // See http://www.acceleware.com/blog/constant-cache-vs-read-only-cache - // See http://acceleware.com/blog/constant-cache-vs-read-only-cache-part-2 - - __global__ void saftKernelAscanIndex( // Version ohne-Korrektur - - int const ascanIndexBatchOffset, ///< Offset of AScans if more then one AScan-Batch is used for Reconstruction - float const aScanWindowSize, ///< Amount of AScans used here for Reconstruction - int const maxAscanIndexArraysInTexture, ///< Maximum amount in A-Scans in one Texture - int const TableAscanIndexAllocationCount, ///< Used amount of Textures (currently limited to maximum 4 Textures) - - int3 const IMAGE_SIZE_XYZ, ///< XYZ des Outputvolumens - float3 const SosVoxelStartPosition, ///< Offset of SOS-Grids - float const IMAGE_RESOLUTION, ///< Resolution of Output-Volume - float const VoxelIncrement, ///< Step-Width of one Voxel in SOS-Korrdinates - - int const blockIndexOffset, - int const speedOfSoundZLayer, - dim3 const gridDimensions, - dim3 const blockDimensions, - - #ifdef debug_CudaSAFTAscanIndexKernel - float const debugMode, - float const debugModeParameter, - int const * deviceSAFT_VARIANT, - #endif - - double * output - - ) - - { - - ///////////////////////////////////////////////////////////////////////////////////////// - // 1. Determine which Voxel is to be calculated in this Kernel - ///////////////////////////////////////////////////////////////////////////////////////// - - dim3 blockVoxel( // Which Block(xyz) corresponds to this Thread? - (threadIdx.x / blockDimensions.y) % blockDimensions.x, - threadIdx.x % blockDimensions.y, - threadIdx.x / (blockDimensions.x * blockDimensions.y) - ); - - // Index of Block for this Thread - long blockIndex = ((blockIndexOffset + blockIdx.z) * gridDim.y + blockIdx.y) * gridDim.x + blockIdx.x; - - dim3 regionOfInterestVoxel( // Which Voxel corresponds to this Thread? Start with 0. - ((blockIndex / gridDimensions.y) % gridDimensions.x) * blockDimensions.x + blockVoxel.x, //Medium speed index - (blockIndex % gridDimensions.y) * blockDimensions.y + blockVoxel.y, //Fastest index - (blockIndex / (gridDimensions.x * gridDimensions.y)) * blockDimensions.z + blockVoxel.z //Slowest index - ); - - // If Voxel is outside the reconstructed Image leave Kernel - if ((regionOfInterestVoxel.x >= IMAGE_SIZE_XYZ.x) || (regionOfInterestVoxel.y >= IMAGE_SIZE_XYZ.y) || (regionOfInterestVoxel.z >= IMAGE_SIZE_XYZ.z)) - return; - - #ifdef debug_CudaSAFTKernel - if ((regionOfInterestVoxel.x == OutputVolumeX) && (regionOfInterestVoxel.y == OutputVolumeY) && (regionOfInterestVoxel.z == OutputVolumeZ)) - { - printf("\n==== saftKernelAscanIndex Kernel ==================================== 1 =====\n"); - printf("\n=============================================================================\n"); - printf(" => regionOfInterestVoxel [%d %d %d] Start bei 0\n", regionOfInterestVoxel.x, regionOfInterestVoxel.y, regionOfInterestVoxel.z); - printf("\n=============================================================================\n"); - } - #endif - ///////////////////////////////////////////////////////////////////////////////////////// - // 2. Determine - ///////////////////////////////////////////////////////////////////////////////////////// - // - SOSvoxel in which voxel is located - // - Index of OutputVolume, and tables of Emitter, Receiver coordinates and SOSpaths - // - Variable declarations - ///////////////////////////////////////////////////////////////////////////////////////// - - #ifdef debug_CudaSAFTKernel - if ((regionOfInterestVoxel.x == 1) && (regionOfInterestVoxel.y == 1)) - { - //printf(" Z2[%d] blockIndexOffset[%d]\n", regionOfInterestVoxel.z, blockIndexOffset); - } - #endif - - - // Memory-Index for this Thread for Output-Array of this Voxel - long memoryIndex = ((IMAGE_SIZE_XYZ.y * (regionOfInterestVoxel.z - blockIndexOffset) + regionOfInterestVoxel.y) * IMAGE_SIZE_XYZ.x + regionOfInterestVoxel.x); - float3 SosVoxelf; //SoS-Voxel Koordinates in float - // Determine SOS-Voxel-Position - SosVoxelf.x = (SosVoxelStartPosition.x + (VoxelIncrement * regionOfInterestVoxel.x)); //Hier Addition der SOSVoxel im SoS-Grid durchfuehren - SosVoxelf.y = (SosVoxelStartPosition.y + (VoxelIncrement * regionOfInterestVoxel.y)); //Hier Addition der SOSVoxel im SoS-Grid durchfuehren - SosVoxelf.z = (SosVoxelStartPosition.z + (VoxelIncrement * regionOfInterestVoxel.z)); //Hier Addition der SOSVoxel im SoS-Grid durchfuehren - //printf("\n\n SosVoxelStartPosition [%f %f %f]\n",SosVoxelStartPosition.x,SosVoxelStartPosition.y,SosVoxelStartPosition.z); - - //TexturIndex for access on Texturmemory depending of Voxel - float TexturIndexZ_AscanIndex = 0.0f; //Z-Index for access on Texturmemory - float const TexturIndexX = SosVoxelf.x + 0.5f; // Due to Access over Texturmemory +0.5f. - float const TexturIndexY = SosVoxelf.y + 0.5f; - float const SosVoxelTextureZ = (SosVoxelf.z - (float)speedOfSoundZLayer) + 0.5f; // Z offset inside precalculated SOS paths - - #ifdef debug_CudaSAFTAscanIndexKernel - if ((regionOfInterestVoxel.x == OutputVolumeX) && (regionOfInterestVoxel.y == OutputVolumeY) && (regionOfInterestVoxel.z == OutputVolumeZ)) - { - printf("\n==== SAFT (AscanIndex) Kernel Step 1 ==========================================\n"); - printf(" VoxelIncrement = %3.12f\n", VoxelIncrement ); - printf(" TableAscanIndexAllocationCount = %i\n", TableAscanIndexAllocationCount ); - printf("--------------------------------------------------------------------------------\n"); - printf(" => regionOfInterestVoxel [%d %d %d]\n", regionOfInterestVoxel.x, regionOfInterestVoxel.y, regionOfInterestVoxel.z); - printf(" => TexturIndexXYZ [%3.3f %3.3f %3.3f]\n", TexturIndexX, TexturIndexY, SosVoxelTextureZ); - printf(" SosVoxelStartPos(SoS) [%3.12f %3.12f %3.12f]\n", SosVoxelStartPosition.x, SosVoxelStartPosition.y, SosVoxelStartPosition.z); - printf(" SosVoxelf [%3.12f %3.12f %3.12f]\n", SosVoxelf.x, SosVoxelf.y, SosVoxelf.z); - printf("================================================================================\n"); - } - #endif - - float voxelValue = 0.0; // reflection value, which is summed up in Ascan-Loop = Outputvalue - #ifdef debug_CudaSAFTAscanIndexKernelDataAccess - float voxelValue_old = 0.0; // only for debugging necessary - #endif - float currentSOSVoxel_AscanIndexValues; - - ///////////////////////////////////////////////////////////////////////////////////////// - // 3. SAFT-Algorithmus - ///////////////////////////////////////////////////////////////////////////////////////// - // - Index aus Textur lesen und fuer zugriff auf A-Scan nutzen - ///////////////////////////////////////////////////////////////////////////////////////// - - // Vorgehen Ascanindexvariante - // 1. Bestimme Koordinaten in SOS-Koordinaten für festen Emitter und 1413 Receiver - // Index = X+Xmax*Y+Xmax*Ymax*(Zmax*RecNr+Z). - // Xmax = 128 - // Ymax = 128 - // Zmax = 2 - // => X = x - // => Y = y - // => Z = Zmax*RecNr+z - // 2. Lade an dieser Stelle den Interpolierten Index. - // 2.a Über Texturmemory - // 2.b über alle 8 benachbarten Voxel oder 64 bei tricubic-Interpolation in 3D - // 3. Lade Ascan-Sample an diesem Index und summiere auf - - __syncthreads(); - if (ascanIndexBatchOffset == 0) // Initialisierung beim ersten Kernelaufruf sprich ascanIndexBatchOffset == 0 - { - voxelValue = 0.0f; - } - else - { - voxelValue = (float)output[memoryIndex]; // Alle anderen Kernelaufrufe muessen zuerst den alten Wert laden, ist bei mehreren Durchlaeufen noetig - } - - //#ifdef GTX_Kepler - __syncthreads(); - //#endif - -// Acess on TextureMemory --> max 4 Textures are currently Supported because there is no Texture Array -//#define useOneLoop_SOS // Simple Access on Textures with one loop and calculation every time which Texture is used // TITAN (Matlab/Kernel) 30.8 / 32.9 -#define useSameLoop_SOS // Optimized Access on Textures. Reduce Overhead without branching within one Loop // TITAN (Matlab/Kernel) 54.0 / 60.8 - // Need cleaning of Ascanindex-Data - // Bei halber Nutzung der Textur TITAN (M/K) 27.3 / 37.9 -//#define useWhileLoop_SOS // Zugriff auf AscanIndex-Texturen innerhalb einer While-Schleife // TITAN (Matlab/Kernel) 22.6 / 28.7 -//#define useFourLoops_SOS // Die vier Schleifen bzw. Texturen nacheinander durchlaufen // TITAN (Matlab/Kernel) 39.5 / 42.8 - - - - #ifdef useOneLoop_SOS - //#pragma unroll 2 - //for(float ascanIndex_i = 0; ascanIndex_i < aScanWindowSize; ascanIndex_i+=1.0f) // Alle Ascans durchlaufen // 30 GV/s //2,4: 25GV/s - for(int ascanIndex_i = 0; ascanIndex_i < (int)aScanWindowSize; ascanIndex_i+=1.0f) // Alle Ascans durchlaufen // 30 GV/s //2,4: 25GV/s - //for(float ascanIndex_i = 0.0f; ascanIndex_i < 4096.0f; ascanIndex_i+=1.0f) // Alle Ascans durchlaufen // 40 GV/s //2: 30GV/s, 4: 34GV/s, 8: 34GV/s, 16: 32GV/s - { - - TexturIndexZ_AscanIndex = 2.0f * (float)((int)ascanIndex_i % maxAscanIndexArraysInTexture) + SosVoxelTextureZ; // Z-Index fuer Zugriff auf Textur interpoliert - - // Determine number of current used texture memory for this A-Scan - if ( ascanIndex_i < maxAscanIndexArraysInTexture ){ - currentSOSVoxel_AscanIndexValues = tex3D( texTableAscanIndexFloat1_0, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); } - else if ( ascanIndex_i < 2*maxAscanIndexArraysInTexture ) { - currentSOSVoxel_AscanIndexValues = tex3D( texTableAscanIndexFloat1_1, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); } - else if ( ascanIndex_i < 3*maxAscanIndexArraysInTexture ){ - currentSOSVoxel_AscanIndexValues = tex3D( texTableAscanIndexFloat1_2, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); } - else if ( ascanIndex_i < 4*maxAscanIndexArraysInTexture ){ - currentSOSVoxel_AscanIndexValues = tex3D( texTableAscanIndexFloat1_3, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); } - - voxelValue += tex2D( texRefAscans, currentSOSVoxel_AscanIndexValues.x - 0.5f, ascanIndexBatchOffset + ascanIndex_i + 0.5f); // i gibt Index fuer Ascan an - - } - #endif - - #ifdef useSameLoop_SOS // Nur eine Schleife voll durchlaufen. Immer beide Texturen lesen, dabei aber am Ende immer nuller laden - - //float VoxelAscanIndex2 = 0.0f; - //float voxelValue2 = 0.0f; - - if ( TableAscanIndexAllocationCount == 1){ - - //#pragma unroll 3 - //for(float ascanIndex_i = 0.0f; ascanIndex_i < maxAscanIndexArraysInTexture; ascanIndex_i+=1.0f) // 1.Teil Ascans durchlaufen // GV/s //2: GV/s, 4: GV/s, 8: GV/s, 16: GV/s - for(float ascanIndex_i = 0.0f; ascanIndex_i < maxAscanIndexArraysInTexture; ascanIndex_i+=1.0f) // bis zu 60.6GV/s - { - - TexturIndexZ_AscanIndex = 2*ascanIndex_i + SosVoxelTextureZ; // Z-Index fuer Zugriff auf Textur interpoliert - - // nutze immer nur 1tes Surface - currentSOSVoxel_AscanIndexValues = tex3D( texTableAscanIndexFloat1_0, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); - voxelValue += tex2D( texRefAscans, currentSOSVoxel_AscanIndexValues - 0.5f, ascanIndexBatchOffset + ascanIndex_i + 0.5f); // i gibt Index fuer Ascan an - - #ifdef debug_CudaSAFTAscanIndexKernelDataAccess - if ((regionOfInterestVoxel.x == OutputVolumeX) && (regionOfInterestVoxel.y == OutputVolumeY) && (regionOfInterestVoxel.z == OutputVolumeZ))//&& (i == 30)) - { - printf("ascanIndex_i(%.0f) - AIndexTextures=1: TexturIndexXYT, TexturIndexZ_AscanIndex [%6.3f %6.3f %6.3f]\n " - "VoxelAscanIndex(tof_idx %30.24f, att %19.12f) , voxelValue = %9.4f\n", - ascanIndex_i, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex, - currentSOSVoxel_AscanIndexAttValues.x, currentSOSVoxel_AscanIndexAttValues.y, voxelValue); - //printf("ascanIndex_i(%.0f) - 0 : VoxelAscanIndex(idx %30.24f, att %19.12f) , voxelValue += %30.24f = %9.4f\n", ascanIndex_i, currentSOSVoxel_AscanIndexAttValues.x,currentSOSVoxel_AscanIndexAttValues.y, voxelValue-voxelValue_old, voxelValue); - //voxelValue_old = voxelValue; - } - #endif - } - - - } - else if (TableAscanIndexAllocationCount == 2){ - //#pragma unroll 3 - //for(float ascanIndex_i = 0.0f; ascanIndex_i < maxAscanIndexArraysInTexture; ascanIndex_i+=1.0f) // 1.Teil Ascans durchlaufen // GV/s //2: GV/s, 4: GV/s, 8: GV/s, 16: GV/s - for(float ascanIndex_i = 0.0f; ascanIndex_i < maxAscanIndexArraysInTexture; ascanIndex_i+=1.0f) // bis zu 60.6GV/s - { - - TexturIndexZ_AscanIndex = 2.0*ascanIndex_i + SosVoxelTextureZ; // Z-Index fuer Zugriff auf Textur interpoliert - - // 1ten Teil mit selben Index laden - currentSOSVoxel_AscanIndexValues = tex3D( texTableAscanIndexFloat1_0, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); - voxelValue += tex2D( texRefAscans, currentSOSVoxel_AscanIndexValues - 0.5f, ascanIndexBatchOffset + ascanIndex_i + 0.5f); // i gibt Index fuer Ascan an - - // 2ten Teil mit selben Index laden - currentSOSVoxel_AscanIndexValues = tex3D( texTableAscanIndexFloat1_1, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); - voxelValue += tex2D( texRefAscans, currentSOSVoxel_AscanIndexValues - 0.5f, ascanIndexBatchOffset + ascanIndex_i + maxAscanIndexArraysInTexture +0.5f); - - } - } - else if (TableAscanIndexAllocationCount == 3){ - //#pragma unroll 3 - //for(float ascanIndex_i = 0.0f; ascanIndex_i < maxAscanIndexArraysInTexture; ascanIndex_i+=1.0f) // 1.Teil Ascans durchlaufen // GV/s //2: GV/s, 4: GV/s, 8: GV/s, 16: GV/s - for(float ascanIndex_i = 0.0f; ascanIndex_i < maxAscanIndexArraysInTexture; ascanIndex_i+=1.0f) // bis zu 60.6GV/s - { - - TexturIndexZ_AscanIndex = 2.0*ascanIndex_i + SosVoxelTextureZ; // Z-Index fuer Zugriff auf Textur interpoliert - - // 1ten Teil mit selben Index laden - currentSOSVoxel_AscanIndexValues = tex3D( texTableAscanIndexFloat1_0, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); - voxelValue += tex2D( texRefAscans, currentSOSVoxel_AscanIndexValues - 0.5f, ascanIndexBatchOffset + ascanIndex_i + 0.5f); // i gibt Index fuer Ascan an - - // 2ten Teil mit selben Index laden - currentSOSVoxel_AscanIndexValues = tex3D( texTableAscanIndexFloat1_1, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); - voxelValue += tex2D( texRefAscans, currentSOSVoxel_AscanIndexValues - 0.5f, ascanIndexBatchOffset + ascanIndex_i + maxAscanIndexArraysInTexture +0.5f); - - // 3ten Teil mit selben Index laden - currentSOSVoxel_AscanIndexValues = tex3D( texTableAscanIndexFloat1_2, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); - voxelValue += tex2D( texRefAscans, currentSOSVoxel_AscanIndexValues - 0.5f, ascanIndexBatchOffset + ascanIndex_i + 2*maxAscanIndexArraysInTexture +0.5f); - } - - } - - else if (TableAscanIndexAllocationCount == 4){ - - #define tryOptimize_SOS - #ifdef tryOptimize_SOS - - float currentSOSVoxel_AscanIndexValues_0; - float currentSOSVoxel_AscanIndexValues_1; - float currentSOSVoxel_AscanIndexValues_2; - float currentSOSVoxel_AscanIndexValues_3; - - - float Offset_0 = (float)ascanIndexBatchOffset + 0.5f; - float Offset_1 = (float)ascanIndexBatchOffset + maxAscanIndexArraysInTexture + 0.5f; - float Offset_2 = (float)ascanIndexBatchOffset + 2.0f*maxAscanIndexArraysInTexture + 0.5f; - float Offset_3 = (float)ascanIndexBatchOffset + 3.0f*maxAscanIndexArraysInTexture + 0.5f; - - //#define addTexturIndexZ_AscanIndexInLoop - //#define useSync - - #ifdef addTexturIndexZ_AscanIndexInLoop - TexturIndexZ_AscanIndex = SosVoxelTextureZ - 2.0f; // Z-Index fuer Zugriff auf Textur interpoliert - #endif - - //#pragma unroll 2 - for(float ascanIndex_i = 0.0f; ascanIndex_i < maxAscanIndexArraysInTexture; ascanIndex_i+=1.0f) // bis zu 60.8GV/s - { - - #ifdef addTexturIndexZ_AscanIndexInLoop - TexturIndexZ_AscanIndex += 2.0f; // Z-Index fuer Zugriff auf Textur interpoliert - #else - TexturIndexZ_AscanIndex = 2.0f*ascanIndex_i + SosVoxelTextureZ; // Z-Index fuer Zugriff auf Textur interpoliert - #endif - //__syncthreads(); - // load TOF-Index from Textur 0-3 - currentSOSVoxel_AscanIndexValues_0 = tex3D( texTableAscanIndexFloat1_0, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); - currentSOSVoxel_AscanIndexValues_1 = tex3D( texTableAscanIndexFloat1_1, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); - currentSOSVoxel_AscanIndexValues_2 = tex3D( texTableAscanIndexFloat1_2, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); - currentSOSVoxel_AscanIndexValues_3 = tex3D( texTableAscanIndexFloat1_3, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); - - #ifdef useSync - __syncthreads(); - #endif - - voxelValue += tex2D( texRefAscans, currentSOSVoxel_AscanIndexValues_0 - 0.5f, Offset_0 + ascanIndex_i); // i gibt Index fuer Ascan an - voxelValue += tex2D( texRefAscans, currentSOSVoxel_AscanIndexValues_1 - 0.5f, Offset_1 + ascanIndex_i); - voxelValue += tex2D( texRefAscans, currentSOSVoxel_AscanIndexValues_2 - 0.5f, Offset_2 + ascanIndex_i); - voxelValue += tex2D( texRefAscans, currentSOSVoxel_AscanIndexValues_3 - 0.5f, Offset_3 + ascanIndex_i); - } - #else - //#pragma unroll 2 - //for(float ascanIndex_i = 0.0f; ascanIndex_i < maxAscanIndexArraysInTexture; ascanIndex_i+=1.0f) // 1.Teil Ascans durchlaufen // GV/s //2: GV/s, 4: GV/s, 8: GV/s, 16: GV/s - for(float ascanIndex_i = 0.0f; ascanIndex_i < maxAscanIndexArraysInTexture; ascanIndex_i+=1.0f) // bis zu 60.6GV/s - { - - - TexturIndexZ_AscanIndex = 2.0f*ascanIndex_i + SosVoxelTextureZ; // Z-Index fuer Zugriff auf Textur interpoliert - __syncthreads(); - - // 1ten Teil mit selben Index laden - currentSOSVoxel_AscanIndexAttValues = tex3D( texTableAscanIndexFloat1_0, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); - voxelValue += tex2D( texRefAscans, currentSOSVoxel_AscanIndexAttValues - 0.5f, ascanIndexBatchOffset + ascanIndex_i + 0.5f); // i gibt Index fuer Ascan an - - //__syncthreads(); - // 2ten Teil mit selben Index laden - currentSOSVoxel_AscanIndexAttValues = tex3D( texTableAscanIndexFloat1_1, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); - voxelValue += tex2D( texRefAscans, currentSOSVoxel_AscanIndexAttValues - 0.5f, ascanIndexBatchOffset + ascanIndex_i + maxAscanIndexArraysInTexture +0.5f); - - __syncthreads(); - // 3ten Teil mit selben Index laden - currentSOSVoxel_AscanIndexAttValues = tex3D( texTableAscanIndexFloat1_2, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); - voxelValue += tex2D( texRefAscans, currentSOSVoxel_AscanIndexAttValues - 0.5f, ascanIndexBatchOffset + ascanIndex_i + 2.0f*maxAscanIndexArraysInTexture +0.5f); - - //__syncthreads(); - // 4ten Teil mit selben Index laden - currentSOSVoxel_AscanIndexAttValues = tex3D( texTableAscanIndexFloat1_3, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); - voxelValue += tex2D( texRefAscans, currentSOSVoxel_AscanIndexAttValues - 0.5f, ascanIndexBatchOffset + ascanIndex_i + 3.0f*maxAscanIndexArraysInTexture +0.5f); - } - #endif - - - } - else - { - // Do nothing due to only 4 are defined - } - - #ifdef debug_CudaSAFTAscanIndexKernelDataAccess - if ((regionOfInterestVoxel.x == OutputVolumeX) && (regionOfInterestVoxel.y == OutputVolumeY) && (regionOfInterestVoxel.z == OutputVolumeZ))//&& (i == 30)) - { - printf("<<<< VoxelAscanIndex(sos %19.12f, att %19.12f) , voxelValue = %9.4f\n", currentSOSVoxel_AscanIndexAttValues.x,currentSOSVoxel_AscanIndexAttValues.y, voxelValue); - //printf("ascanIndex_i(%.0f) - 0 : VoxelAscanIndex(idx %30.24f, att %19.12f) , voxelValue += %30.24f = %9.4f\n", ascanIndex_i, currentSOSVoxel_AscanIndexAttValues.x,currentSOSVoxel_AscanIndexAttValues.y, voxelValue-voxelValue_old, voxelValue); - //voxelValue_old = voxelValue; - } - #endif - - #endif - - #ifdef useFourLoops_SOS - if (TableAscanIndexAllocationCount == 4){ - - - #define addTexturIndexZ_AscanIndexInLoop - #ifdef addTexturIndexZ_AscanIndexInLoop - TexturIndexZ_AscanIndex = SosVoxelTextureZ - 2.0f; // Z-Index fuer Zugriff auf Textur interpoliert - #endif - - //#define useSync - - float Offset = (float)ascanIndexBatchOffset + 0.5f; - //#pragma unroll 2 - for(float ascanIndex_i = 0.0f; ascanIndex_i < maxAscanIndexArraysInTexture; ascanIndex_i+=1.0f) // bis zu 60.8GV/s - { - - #ifdef addTexturIndexZ_AscanIndexInLoop - TexturIndexZ_AscanIndex += 2.0f; // Z-Index fuer Zugriff auf Textur interpoliert - #else - TexturIndexZ_AscanIndex = 2.0f*ascanIndex_i + SosVoxelTextureZ; // Z-Index fuer Zugriff auf Textur interpoliert - #endif - - // 1ten Teil mit selben Index laden - currentSOSVoxel_AscanIndexValues = tex3D( texTableAscanIndexFloat1_0, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); - - #ifdef useSync - __syncthreads(); - #endif - voxelValue += tex2D( texRefAscans, currentSOSVoxel_AscanIndexValues - 0.5f, Offset + ascanIndex_i); // i gibt Index fuer Ascan an - } - - #ifdef addTexturIndexZ_AscanIndexInLoop - TexturIndexZ_AscanIndex = SosVoxelTextureZ - 2.0f; // Z-Index fuer Zugriff auf Textur interpoliert - #endif - Offset = (float)ascanIndexBatchOffset + maxAscanIndexArraysInTexture + 0.5f; - - //#pragma unroll 2 - for(float ascanIndex_i = 0.0f; ascanIndex_i < maxAscanIndexArraysInTexture; ascanIndex_i+=1.0f) // bis zu 60.8GV/s - { - - #ifdef addTexturIndexZ_AscanIndexInLoop - TexturIndexZ_AscanIndex += 2.0f; // Z-Index fuer Zugriff auf Textur interpoliert - #else - TexturIndexZ_AscanIndex = 2.0f*ascanIndex_i + SosVoxelTextureZ; // Z-Index fuer Zugriff auf Textur interpoliert - #endif - - // 2ten Teil mit selben Index laden - currentSOSVoxel_AscanIndexValues = tex3D( texTableAscanIndexFloat1_1, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); - - #ifdef useSync - __syncthreads(); - #endif - voxelValue += tex2D( texRefAscans, currentSOSVoxel_AscanIndexValues - 0.5f, Offset + ascanIndex_i); - } - - #ifdef addTexturIndexZ_AscanIndexInLoop - TexturIndexZ_AscanIndex = SosVoxelTextureZ - 2.0f; // Z-Index fuer Zugriff auf Textur interpoliert - #endif - Offset = (float)ascanIndexBatchOffset + 2.0f*maxAscanIndexArraysInTexture + 0.5f; - - //#pragma unroll 2 - for(float ascanIndex_i = 0.0f; ascanIndex_i < maxAscanIndexArraysInTexture; ascanIndex_i+=1.0f) // bis zu 60.8GV/s - { - - #ifdef addTexturIndexZ_AscanIndexInLoop - TexturIndexZ_AscanIndex += 2.0f; // Z-Index fuer Zugriff auf Textur interpoliert - #else - TexturIndexZ_AscanIndex = 2.0f*ascanIndex_i + SosVoxelTextureZ; // Z-Index fuer Zugriff auf Textur interpoliert - #endif - - // 3ten Teil mit selben Index laden - currentSOSVoxel_AscanIndexValues = tex3D( texTableAscanIndexFloat2, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); - - #ifdef useSync - __syncthreads(); - #endif - voxelValue += tex2D( texRefAscans, currentSOSVoxel_AscanIndexValues - 0.5f, Offset + ascanIndex_i); - } - - #ifdef addTexturIndexZ_AscanIndexInLoop - TexturIndexZ_AscanIndex = SosVoxelTextureZ - 2.0f; // Z-Index fuer Zugriff auf Textur interpoliert - #endif - Offset = (float)ascanIndexBatchOffset + 3.0f*maxAscanIndexArraysInTexture + 0.5f; - - //#pragma unroll 2 - for(float ascanIndex_i = 0.0f; ascanIndex_i < maxAscanIndexArraysInTexture; ascanIndex_i+=1.0f) // bis zu 60.8GV/s - { - - #ifdef addTexturIndexZ_AscanIndexInLoop - TexturIndexZ_AscanIndex += 2.0f; // Z-Index fuer Zugriff auf Textur interpoliert - #else - TexturIndexZ_AscanIndex = 2.0f*ascanIndex_i + SosVoxelTextureZ; // Z-Index fuer Zugriff auf Textur interpoliert - #endif - - // 4ten Teil mit selben Index laden - currentSOSVoxel_AscanIndexValues = tex3D( texTableAscanIndexFloat1_3, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); - - #ifdef useSync - __syncthreads(); - #endif - voxelValue += tex2D( texRefAscans, currentSOSVoxel_AscanIndexValues - 0.5f, Offset + ascanIndex_i); - } - } - #endif - - #ifdef useWhileLoop_SOS - - float ascanIndex_i = 0.0; - int compareIndexNextTexture = maxAscanIndexArraysInTexture; // Index ab dem die naechste Textur genutzt werden soll - - //float IndexTextureOffset = 0.0f; // Offset des AscanIndex fuer TexturIndexZ_AscanIndex - float IndexTextureOffset = SosVoxelTextureZ; // Offset des AscanIndex fuer TexturIndexZ_AscanIndex - - do{ - - if ( ascanIndex_i >= compareIndexNextTexture){ - compareIndexNextTexture += maxAscanIndexArraysInTexture; - //IndexTextureOffset += maxAscanIndexArraysInTexture; - IndexTextureOffset -= 2.0f *maxAscanIndexArraysInTexture; - } - - // Calculate the Z-Index for storing the AscanIndex value - // TexturIndexZ_AscanIndex = 2.0f *(ascanIndex_i - IndexTextureOffset) + SosVoxelTextureZ; - TexturIndexZ_AscanIndex = 2.0f *ascanIndex_i + IndexTextureOffset; - - if ( compareIndexNextTexture == maxAscanIndexArraysInTexture){ - //VoxelAscanIndex = tex3D( texTableAscanIndexFloat0, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); } - currentSOSVoxel_AscanIndexAttValues = tex3D( texTableAscanIndexFloat1_0, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); } - else if ( compareIndexNextTexture == 2*maxAscanIndexArraysInTexture) { - //VoxelAscanIndex = tex3D( texTableAscanIndexFloat1, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); } - currentSOSVoxel_AscanIndexAttValues = tex3D( texTableAscanIndexFloat1_1, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); } - else if ( compareIndexNextTexture == 3*maxAscanIndexArraysInTexture){ - //VoxelAscanIndex = tex3D( texTableAscanIndexFloat2, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); } - currentSOSVoxel_AscanIndexAttValues = tex3D( texTableAscanIndexFloat1_2, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); } - else if ( compareIndexNextTexture == 4*maxAscanIndexArraysInTexture){ - //VoxelAscanIndex = tex3D( texTableAscanIndexFloat3, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); } - currentSOSVoxel_AscanIndexAttValues = tex3D( texTableAscanIndexFloat1_3, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); } - - //currentSOSVoxel_AscanIndexAttValues.x = currentAscanIndex; - //currentSOSVoxel_AscanIndexAttValues.y = totalAttenuation_multFactor; - #define Interpolation_Textur_Standard - //#define Interpolation_Cosinus - //#define Interpolation_Spline - - #ifdef Interpolation_Textur_Standard - voxelValue += tex2D( texRefAscans, currentSOSVoxel_AscanIndexAttValues.x - 0.5f, (float)ascanIndexBatchOffset + ascanIndex_i + 0.5f); // i gibt Index fuer Ascan an - #endif - - #ifdef Interpolation_Cosinus - float y1 = tex2D( texRefAscans, currentSOSVoxel_AscanIndexAttValues.x + 0.5f, ascanIndexBatchOffset + ascanIndex_i + 0.5f); - float y2 = tex2D( texRefAscans, currentSOSVoxel_AscanIndexAttValues.x + 1.5f, ascanIndexBatchOffset + ascanIndex_i + 0.5f); - float mu = currentSOSVoxel_AscanIndexAttValues.x-floor(currentSOSVoxel_AscanIndexAttValues.x); - float mu2 = (1-cos(mu*3.14159265359f))/2; - voxelValue += currentSOSVoxel_AscanIndexAttValues.y * (y1*(1-mu2)+y2*mu2); - #endif - - #ifdef Interpolation_Spline - float y0 = tex2D( texRefAscans, currentSOSVoxel_AscanIndexAttValues.x - 0.5f, ascanIndexBatchOffset + ascanIndex_i + 0.5f); - float y1 = tex2D( texRefAscans, currentSOSVoxel_AscanIndexAttValues.x + 0.5f, ascanIndexBatchOffset + ascanIndex_i + 0.5f); - float y2 = tex2D( texRefAscans, currentSOSVoxel_AscanIndexAttValues.x + 1.5f, ascanIndexBatchOffset + ascanIndex_i + 0.5f); - float y3 = tex2D( texRefAscans, currentSOSVoxel_AscanIndexAttValues.x + 2.5f, ascanIndexBatchOffset + ascanIndex_i + 0.5f); - float mu = currentSOSVoxel_AscanIndexAttValues.x-floor(currentSOSVoxel_AscanIndexAttValues.x); - float mu2 = mu*mu; - float a0 = y3 - y2 - y0 + y1; - float a1 = y0 - y1 - a0; - float a2 = y2 - y0; - float a3 = y1; - voxelValue += currentSOSVoxel_AscanIndexAttValues.y * (a0*mu*mu2+a1*mu2+a2*mu+a3); - #endif - - //#ifdef debug_CudaSAFTAscanIndexKernelDataAccess -// if ((regionOfInterestVoxel.x == OutputVolumeX) && (regionOfInterestVoxel.y == OutputVolumeY) && (regionOfInterestVoxel.z == OutputVolumeZ))//&& (i == 30)) -// { -// printf("ascanIndex_i(%.0f) - %.0f: VoxelAscanIndex(idx %30.24f) , voxelValue = %9.4f\n", ascanIndex_i, (compareIndexNextTexture), currentSOSVoxel_AscanIndexAttValues.x, voxelValue); -// //voxelValue_old = voxelValue; -// } - //#endif - - - -// if ((regionOfInterestVoxel.x == OutputVolumeX) && (regionOfInterestVoxel.y == OutputVolumeY) && (regionOfInterestVoxel.z == OutputVolumeZ)){ -// -// if ((ascanIndex_i <= 10)) // Anfang -// //if ((ascanIndex_i >= 1020) && (ascanIndex_i <= 1030)) // Mitte -// //if ((ascanIndex_i >= 1400)) // Ende -// //if ((ascanIndex_i >= 2800)) // Ende -// printf("#### ascanIndex_i = %.0f , compIdxNextText = %.0f; IndexTextOffset = %.0f; TexturIndexZ_AscanIndex = %.0f, VoxelAscanIndex=%f = %i\n", ascanIndex_i, compareIndexNextTexture, IndexTextureOffset, TexturIndexZ_AscanIndex, VoxelAscanIndex, ( compareIndexNextTexture == 3*maxAscanIndexArraysInTexture)); -// -// } - - ascanIndex_i += 1.0f; - }while(ascanIndex_i < (int)aScanWindowSize); - - - #endif - -// #ifdef GTX_Kepler -// __syncthreads(); -// #endif - #ifdef debug_CudaSAFTAscanIndexKernel - //printf(" >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> debugModeParameter %f\n", debugModeParameter); - switch ( (int)(debugMode) ) - { - case 0: output[memoryIndex] = (double)voxelValue; break; - case 1: output[memoryIndex] = (double)blockIndex; break; - case 2: output[memoryIndex] = (double)memoryIndex; break; - - case 3: output[memoryIndex] = (double)regionOfInterestVoxel.x; break; // Coordinates in Voxel - case 4: output[memoryIndex] = (double)regionOfInterestVoxel.y; break; - case 5: output[memoryIndex] = (double)regionOfInterestVoxel.z; break; - - case 6: output[memoryIndex] = (double)SosVoxelf.x; break;// Coordinates in SOS-Voxel (float) - case 7: output[memoryIndex] = (double)SosVoxelf.y; break; - case 8: output[memoryIndex] = (double)SosVoxelf.z; break; - - case 9: output[memoryIndex] = (double)TexturIndexX; break;// Coordinates in SOS-Voxel (float) - case 10: output[memoryIndex] = (double)TexturIndexY; break; - case 11: output[memoryIndex] = (double)SosVoxelTextureZ; break; - case 12: output[memoryIndex] = (double)TexturIndexZ_AscanIndex; break; - - case 13: output[memoryIndex] = (double)currentSOSVoxel_AscanIndexValues; break; // VoxelAscanIndex - //case 14: output[memoryIndex] = (double)currentSOSVoxel_AscanIndexAttValues.y; break; // VoxelAttenuation - - case 20: output[memoryIndex] = (double)speedOfSoundZLayer; break; - case 21: output[memoryIndex] = (double)TableAscanIndexAllocationCount; break; - - - default: output[memoryIndex] = (double)0; break; - } - #else - __syncthreads(); - output[memoryIndex] = (double)voxelValue; - #endif - - #ifdef debug_CudaSAFTKernel - if (debugMode != 0) - { - if ((regionOfInterestVoxel.x == OutputVolumeX) && (regionOfInterestVoxel.y == OutputVolumeY) && (regionOfInterestVoxel.z == OutputVolumeZ)) - { - //printf(" SoSVoxel[%3i %3i %3i] speedOfSoundZLayer(%3i) - regionOfInterestVoxel [%3i %3i %3i]\n", SosVoxel.x, SosVoxel.y, SosVoxel.z, speedOfSoundZLayer, regionOfInterestVoxel.x, regionOfInterestVoxel.y, regionOfInterestVoxel.z); - //if ((SosVoxel.x == DebugSoSVoxelX) && (SosVoxel.y == DebugSoSVoxelY) && (SosVoxel.z == DebugSoSVoxelZ)){ - printf("\n saftKernel: debugMode [%f]\n", debugMode); - //printf(" => voxelValue = %f\n", voxelValue); - //printf(" SoSVoxel[%+3.2f %+3.2f %+3.2f] speedOfSoundZLayer(%3i) - regionOfInterestVoxel [%3i %3i %3i]\n", SosVoxelf.x, SosVoxelf.y, SosVoxelf.z, speedOfSoundZLayer, regionOfInterestVoxel.x, regionOfInterestVoxel.y, regionOfInterestVoxel.z); - printf(" SoSVoxel[%3i %3i %3i] speedOfSoundZLayer(%3i) - regionOfInterestVoxel [%3i %3i %3i]\n", SosVoxel.x, SosVoxel.y, SosVoxel.z, speedOfSoundZLayer, regionOfInterestVoxel.x, regionOfInterestVoxel.y, regionOfInterestVoxel.z); - printf(" (current)speedOfSoundZLayer %d\n", speedOfSoundZLayer); - printf(" maxFeasibleSosZLayerCount [%i]\n", maxFeasibleSosZLayerCount); - - printf(" SosVoxelf [%3.15f %3.15f %3.15f]\n", SosVoxelf.x, SosVoxelf.y, SosVoxelf.z); - - // SosVoxelTextureZ = (SosVoxelf.z - speedOfSoundZLayer); - printf(" SosVoxelTextureZ [%3.15f]\n", SosVoxelTextureZ); - - // TexturIndexZEmitter = maxFeasibleSosZLayerCount * (currentEmitterIndex-1) + SosVoxelTextureZ; // Index für Zugriff auf Textur - // TexturIndexZReceiver = maxFeasibleSosZLayerCount * ((currentReceiverIndex-1) % maxSoSReceiverArrayForTexture) + SosVoxelTextureZ; // Index für Zugriff auf Textur - printf(" TexturIndexXYZ Em/Rec [%3.15f %3.15f %3.15f/%3.15f]\n\n", TexturIndexX, TexturIndexY, TexturIndexZEmitter, TexturIndexZReceiver); - - - switch (deviceSAFT_VARIANT[SAFT_VARIANT_3DVolumeInterpolationAtReconstruction]) - //switch ( debugMode >= 60 ? 60 : debugMode >= 30 ? 30 : 0 ) // drei Bereiche - { - case 0: // mit Textur - //TexturIndexZReceiver = maxFeasibleSosZLayerCount * ((currentReceiverIndex-1) % maxSoSReceiverArrayForTexture) + SosVoxelTextureZ; // Index für Zugriff auf Textur - printf(" TexturIndexZReceiver = maxFeasibleSosZLayerCount(%i) * ((currentReceiverIndex-1 (%i)) %% maxSoSReceiverArrayForTexture(%i)) + SosVoxelTextureZ(%3.15f) = (%3.15f)\n\n", maxFeasibleSosZLayerCount, currentReceiverIndex-1, maxSoSReceiverArrayForTexture, SosVoxelTextureZ, TexturIndexZReceiver); - break; - case 1: // mit Textur interpoliert - //TexturIndexZReceiver = maxFeasibleSosZLayerCount * ((currentReceiverIndex-1) % maxSoSReceiverArrayForTexture) + SosVoxelTextureZ + 0.5f; // Index für Zugriff auf Textur - printf(" TexturIndexZReceiver = maxFeasibleSosZLayerCount(%i) * ((currentReceiverIndex-1 (%i)) %% maxSoSReceiverArrayForTexture(%i)) + SosVoxelTextureZ(%3.15f) + 0.5f = (%3.15f)\n\n", maxFeasibleSosZLayerCount, currentReceiverIndex-1, maxSoSReceiverArrayForTexture, SosVoxelTextureZ, TexturIndexZReceiver); - break; - } - - // // Speicher in Texturformat - // // Indexberechnung für Einsatz des Texturmemorys - // float xmax = SOSGrid_XYZ.x; - // float ymax = SOSGrid_XYZ.y; - // float zmax = (float)maxFeasibleSosZLayerCount; - // float i_x = SosVoxel.x; - // float i_y = SosVoxel.y; - // float i_z = (float)(int)(SosVoxelTextureZ); // float SosVoxelTextureZ = (SosVoxelf.z - speedOfSoundZLayer); - - //Index = xmax*(ymax*(zmax*geometryIndexCounter+i_z)+i_y)+i_x; - printf(" currentEmitterIndex %d\n", currentEmitterIndex); - printf(" currentReceiverIndex %d\n", currentReceiverIndex); - printf(" (currentReceiverIndex-1) mod maxSoSReceiverArrayForTexture) %d\n", ((currentReceiverIndex-1) % maxSoSReceiverArrayForTexture)); - - printf(" EmIndex = xmax(%i)*(ymax(%i)*( TexturIndexZEmitter(%i) )+i_y(%i))+i_x(%i) = [%i]\n", (int)SOSGrid_XYZ.x, (int)SOSGrid_XYZ.y, (int)TexturIndexZEmitter, (int)TexturIndexY, (int)TexturIndexX, (int)SOSGrid_XYZ.x*((int)SOSGrid_XYZ.y*((int)TexturIndexZEmitter )+(int)TexturIndexY)+(int)TexturIndexX); - printf(" RecIndex = xmax(%i)*(ymax(%i)*( TexturIndexZReceiver(%i) )+i_y(%i))+i_x(%i) = [%i]\n", (int)SOSGrid_XYZ.x, (int)SOSGrid_XYZ.y, (int)TexturIndexZReceiver, (int)TexturIndexY, (int)TexturIndexX, (int)SOSGrid_XYZ.x*((int)SOSGrid_XYZ.y*((int)TexturIndexZReceiver)+(int)TexturIndexY)+(int)TexturIndexX); - - - printf(" emitterPosition [%3.15f %3.15f %3.15f]\n", emitterPosition.x, emitterPosition.y, emitterPosition.z); - printf(" emitterVoxelVoxelCount %f\n", emitterVoxelVoxelCount); - printf(" 1/emitterVoxelTotalSpeedSum %f\n", emitterVoxelTotalSpeedSum); - - printf(" receiverPosition [%3.15f %3.15f %3.15f]\n", receiverPosition.x, receiverPosition.y, receiverPosition.z); - printf(" receiverVoxelVoxelCount %f\n", receiverVoxelVoxelCount); - printf(" 1/receiverVoxelTotalSpeedSum %f\n", receiverVoxelTotalSpeedSum); - - printf(" => averageSpeed %f\n\n", averageSpeed); - // printf(" emitterPosition [%f %f %f]\n", emitterPosition.x, emitterPosition.y, emitterPosition.z); - // printf(" emitterDistance %f\n", emitterDistance); - // printf(" receiverPosition [%f %f %f]\n", receiverPosition.x, receiverPosition.y, receiverPosition.z); - // printf(" receiverDistance %f\n", receiverDistance); - // printf(" sampleTime %f\n", sampleTime); - // printf(" totalDistance %f\n\n", totalDistance); - // - // printf(" analyticAverageSpeed %f\n", analyticAverageSpeed); - // printf(" analyticTotalTime %f\n", analyticTotalTime); - // printf(" analyticSampleTime %f\n", analyticSampleTime); - // printf(" S_Wasser(%f) S_Kugel(%f) \n", S_Wasser, S_Kugel); - // printf(" cWasser(%f) cKugel(%f) \n", cWasser, cKugel); - // printf(" sampleRate %.10f\n", sampleRate); - printf("\n"); - - // printf(" => voxelValue = %3.12f\n", voxelValue); - - - //} - } - } - #endif - - } - -// ====================================================================================================================================================================================== -/** - Proxy function to launch the SAFT kernel. -*/ -void SAFTHandler::performSAFT( - int aScanIndex, ///< The A-scan index is increased by the A-scan batch size in every iteration. It describes the offset into the A-scan samples the SAFT kernel is operating with. - size_t aScanWindowSize, ///< A-scan batch size in terms of number of samples within one window. - int3 IMAGE_SIZE_XYZ, ///< Bildbereichsgroesse/ROI in Voxel - int3 SOSGrid_XYZ, ///< SoSGridgroesse in Voxel - int blockIndexOffset, ///< Additional offset added to the z component of the block index, required because of the adjustments for partial reconstruction in different z-layers. - int outputWindowVoxelCount, ///< Number of Voxels in the output window. - int speedOfSoundZLayer, ///< current SoS z-layer Offset in the speed of sound grid. - int speedOfSoundVoxelsWithinZLayers, ///< Number of z-layers in the speed of sound grid touched by the z-layers of the active zone of reconstruction in the region of interest. - int maxFeasibleSosZLayerCount, - int currentEmIndexUsedForAscanIndexCalculation, ///< current Index of Em for which the AscanIndex is calculated - dim3 const & windowGridDimensions, ///< Grid dimensions to be used to launch the SAFT kernel. It is smaller than the full grid dimensions and only represents the current reconstruction window. - dim3 const & gridDimensions, ///< Full grid dimensions of the reconstruction. - dim3 const & blockDimensions, ///< Block dimensions to be used with the SAFT kernel. - float * deviceSpeedOfSoundField, ///< Pointer to SoSGrid. - cudaArray *deviceAScansCuArray - // cudaStream_t stream ///< Stream to execute the SAFT kernel on. - ) -{ - - - #ifdef debug_OutputFunctions - printf("==> SAFTHandler::performSAFT - Start\n"); - #endif - - dim3 - reducedGridDimensions, - reducedBlockDimensions; - - reduceKernelDimensions(windowGridDimensions, blockDimensions, reducedGridDimensions, reducedBlockDimensions); -// printf( "actual windowGridDimensions x,y,z: %i %i %i\n", windowGridDimensions.x, windowGridDimensions.y, windowGridDimensions.z); -// printf( " -> reducedGridDimensions x,y,z: %i %i %i\n", reducedGridDimensions.x, reducedGridDimensions.y, reducedGridDimensions.z); -// printf( "actual blockDimensions x,y,z: %i %i %i\n", blockDimensions.x, blockDimensions.y, blockDimensions.z); -// printf( " -> reducedBlockDimensions x,y,z: %i %i %i\n", reducedBlockDimensions.x, reducedBlockDimensions.y, reducedBlockDimensions.z); - - // cudaFuncCachePreferShared: shared memory is 48 KB - // cudaFuncCachePreferL1: shared memory is 16 - // cudaFuncCachePreferNone: no preference - #ifdef SaftPreferL1SharedMem // cudaFuncCachePreferL1: shared memory is 16 KB - CUDA_CHECK(cudaFuncSetCacheConfig(saftKernelAscanIndex_SOS_ATT , cudaFuncCachePreferL1)); - CUDA_CHECK(cudaFuncSetCacheConfig(saftKernelAscanIndex_SOS , cudaFuncCachePreferL1)); - CUDA_CHECK(cudaFuncSetCacheConfig(saftKernelAscanIndex , cudaFuncCachePreferL1)); - #endif - - // Texture Memory Adressing-mode // http://docs.nvidia.com/cuda/pdf/CUDA_C_Programming_Guide.pdf -> 3.2.11.1. Texture Memory S. 42 - // cudaAddressModeClamp - Return values at the boarders if out-of range - default - // cudaAddressModeBorder - Return 0 if out-of range - // cudaAddressModeMirror - Mirror the values - For normalized coordinates - // cudaAddressModeWrap - Repeating the values - For normalized coordinates - - // Texturmemory fuer Ascans - cudaChannelFormatDesc texChannelDesc = cudaCreateChannelDesc(32, 0, 0,0, cudaChannelFormatKindFloat); // Beschreibung des RueckgabeFormats der Textur - texRefAscans.addressMode[0] = cudaAddressModeBorder; // Texturreferenz beschreiben - texRefAscans.addressMode[1] = cudaAddressModeBorder; - //texRefAscans.addressMode[0] = cudaAddressModeClamp; // Texturreferenz beschreiben - //texRefAscans.addressMode[1] = cudaAddressModeClamp; - - #if !(defined(Interpolation_Cosinus) || defined(Interpolation_Spline)) - if (SAFT_VARIANT[SAFT_VARIANT_AscanInterpolation] == 1){ - texRefAscans.filterMode = cudaFilterModeLinear; // Lineare Interpolation - } - else{ - texRefAscans.filterMode = cudaFilterModePoint; // Nearest Neighbor - } - #else - texRefAscans.filterMode = cudaFilterModePoint; // Nearest Neighbor - #endif - - texRefAscans.normalized = 0; - CUDA_CHECK(cudaBindTextureToArray ( &texRefAscans, deviceAScansCuArray, &texChannelDesc )); - - - if (ATTMode_3DVolume == false){ // ========= 3DVolume Mode without ATT-Correction - - cudaChannelFormatDesc texChannelDescTableAscanIndexFloat = cudaCreateChannelDesc(); // Should do the same - - // AscanIndex Path Tables ------------------------------------------------------ - texTableAscanIndexFloat1_0.addressMode[0] = cudaAddressModeClamp; // Texturreferenz beschreiben - texTableAscanIndexFloat1_0.addressMode[1] = cudaAddressModeClamp; - texTableAscanIndexFloat1_0.addressMode[2] = cudaAddressModeClamp; - //texTableAscanIndexFloat1_0.addressMode[0] = cudaAddressModeBorder; // Texturreferenz beschreiben - //texTableAscanIndexFloat1_0.addressMode[1] = cudaAddressModeBorder; - //texTableAscanIndexFloat1_0.addressMode[2] = cudaAddressModeBorder; - switch (SAFT_VARIANT[SAFT_VARIANT_3DVolumeInterpolationAtReconstruction]) - { - case 0: texTableAscanIndexFloat1_0.filterMode = cudaFilterModePoint; break; - case 1: texTableAscanIndexFloat1_0.filterMode = cudaFilterModeLinear; break; - } - texTableAscanIndexFloat1_0.normalized = 0; - - CUDA_CHECK(cudaBindTextureToArray ( &texTableAscanIndexFloat1_0, deviceTextureAscanIndexFloatCuArray[0], &texChannelDescTableAscanIndexFloat )); - - if (TableAscanIndexAllocationCount>1){ // TODO: mit Arrays flexibel programmieren!!! - texTableAscanIndexFloat1_1.addressMode[0] = cudaAddressModeClamp; // Texturreferenz beschreiben - texTableAscanIndexFloat1_1.addressMode[1] = cudaAddressModeClamp; - texTableAscanIndexFloat1_1.addressMode[2] = cudaAddressModeClamp; - switch (SAFT_VARIANT[SAFT_VARIANT_3DVolumeInterpolationAtReconstruction]) - { - case 0: texTableAscanIndexFloat1_1.filterMode = cudaFilterModePoint; break; - case 1: texTableAscanIndexFloat1_1.filterMode = cudaFilterModeLinear; break; - } - texTableAscanIndexFloat1_1.normalized = 0; - - CUDA_CHECK(cudaBindTextureToArray ( &texTableAscanIndexFloat1_1, deviceTextureAscanIndexFloatCuArray[1], &texChannelDescTableAscanIndexFloat )); - - } - - if (TableAscanIndexAllocationCount>2){ - texTableAscanIndexFloat1_2.addressMode[0] = cudaAddressModeClamp; // Texturreferenz beschreiben - texTableAscanIndexFloat1_2.addressMode[1] = cudaAddressModeClamp; - texTableAscanIndexFloat1_2.addressMode[2] = cudaAddressModeClamp; - switch (SAFT_VARIANT[SAFT_VARIANT_3DVolumeInterpolationAtReconstruction]) - { - case 0: texTableAscanIndexFloat1_2.filterMode = cudaFilterModePoint; break; - case 1: texTableAscanIndexFloat1_2.filterMode = cudaFilterModeLinear; break; - } - texTableAscanIndexFloat1_2.normalized = 0; - - - CUDA_CHECK(cudaBindTextureToArray ( &texTableAscanIndexFloat1_2, deviceTextureAscanIndexFloatCuArray[2], &texChannelDescTableAscanIndexFloat )); - - } - - if (TableAscanIndexAllocationCount>3){ - texTableAscanIndexFloat1_3.addressMode[0] = cudaAddressModeClamp; // Texturreferenz beschreiben - texTableAscanIndexFloat1_3.addressMode[1] = cudaAddressModeClamp; - texTableAscanIndexFloat1_3.addressMode[2] = cudaAddressModeClamp; - //texTableAscanIndexFloat1_3.addressMode[0] = cudaAddressModeBorder; // Texturreferenz beschreiben - //texTableAscanIndexFloat1_3.addressMode[1] = cudaAddressModeBorder; - //texTableAscanIndexFloat1_3.addressMode[2] = cudaAddressModeBorder; - switch (SAFT_VARIANT[SAFT_VARIANT_3DVolumeInterpolationAtReconstruction]) - { - case 0: texTableAscanIndexFloat1_3.filterMode = cudaFilterModePoint; break; - case 1: texTableAscanIndexFloat1_3.filterMode = cudaFilterModeLinear; break; - } - texTableAscanIndexFloat1_3.normalized = 0; - - - CUDA_CHECK(cudaBindTextureToArray ( &texTableAscanIndexFloat1_3, deviceTextureAscanIndexFloatCuArray[3], &texChannelDescTableAscanIndexFloat )); - - } - } - else if (ATTMode_3DVolume == true){ // ========= 3DVolume Mode with ATT-Correction - - cudaChannelFormatDesc texChannelDescTableAscanIndexFloat = cudaCreateChannelDesc(); // Should do the same - - // AscanIndex Path Tables ------------------------------------------------------ - texTableAscanIndexFloat2_0.addressMode[0] = cudaAddressModeClamp; // Texturreferenz beschreiben - texTableAscanIndexFloat2_0.addressMode[1] = cudaAddressModeClamp; - texTableAscanIndexFloat2_0.addressMode[2] = cudaAddressModeClamp; - switch (SAFT_VARIANT[SAFT_VARIANT_3DVolumeInterpolationAtReconstruction]) - { - case 0: texTableAscanIndexFloat2_0.filterMode = cudaFilterModePoint; break; - case 1: texTableAscanIndexFloat2_0.filterMode = cudaFilterModeLinear; break; - } - texTableAscanIndexFloat2_0.normalized = 0; - - - CUDA_CHECK(cudaBindTextureToArray ( &texTableAscanIndexFloat2_0, deviceTextureAscanIndexFloatCuArray[0], &texChannelDescTableAscanIndexFloat )); - - - if (TableAscanIndexAllocationCount>1){ // TODO: mit Arrays flexibel programmieren!!! - texTableAscanIndexFloat2_1.addressMode[0] = cudaAddressModeClamp; // Texturreferenz beschreiben - texTableAscanIndexFloat2_1.addressMode[1] = cudaAddressModeClamp; - texTableAscanIndexFloat2_1.addressMode[2] = cudaAddressModeClamp; - switch (SAFT_VARIANT[SAFT_VARIANT_3DVolumeInterpolationAtReconstruction]) - { - case 0: texTableAscanIndexFloat2_1.filterMode = cudaFilterModePoint; break; - case 1: texTableAscanIndexFloat2_1.filterMode = cudaFilterModeLinear; break; - } - texTableAscanIndexFloat2_1.normalized = 0; - - - CUDA_CHECK(cudaBindTextureToArray ( &texTableAscanIndexFloat2_1, deviceTextureAscanIndexFloatCuArray[1], &texChannelDescTableAscanIndexFloat )); - - } - - if (TableAscanIndexAllocationCount>2){ - texTableAscanIndexFloat2_2.addressMode[0] = cudaAddressModeClamp; // Texturreferenz beschreiben - texTableAscanIndexFloat2_2.addressMode[1] = cudaAddressModeClamp; - texTableAscanIndexFloat2_2.addressMode[2] = cudaAddressModeClamp; - switch (SAFT_VARIANT[SAFT_VARIANT_3DVolumeInterpolationAtReconstruction]) - { - case 0: texTableAscanIndexFloat2_2.filterMode = cudaFilterModePoint; break; - case 1: texTableAscanIndexFloat2_2.filterMode = cudaFilterModeLinear; break; - } - texTableAscanIndexFloat2_2.normalized = 0; - - - CUDA_CHECK(cudaBindTextureToArray ( &texTableAscanIndexFloat2_2, deviceTextureAscanIndexFloatCuArray[2], &texChannelDescTableAscanIndexFloat )); - - } - - if (TableAscanIndexAllocationCount>3){ - texTableAscanIndexFloat2_3.addressMode[0] = cudaAddressModeClamp; // Texturreferenz beschreiben - texTableAscanIndexFloat2_3.addressMode[1] = cudaAddressModeClamp; - texTableAscanIndexFloat2_3.addressMode[2] = cudaAddressModeClamp; - switch (SAFT_VARIANT[SAFT_VARIANT_3DVolumeInterpolationAtReconstruction]) - { - case 0: texTableAscanIndexFloat2_3.filterMode = cudaFilterModePoint; break; - case 1: texTableAscanIndexFloat2_3.filterMode = cudaFilterModeLinear; break; - } - texTableAscanIndexFloat2_3.normalized = 0; - - - CUDA_CHECK(cudaBindTextureToArray ( &texTableAscanIndexFloat2_3, deviceTextureAscanIndexFloatCuArray[3], &texChannelDescTableAscanIndexFloat )); - - } - } - //SAFT_VARIANT[SAFT_VARIANT_3DVolumeInterpolationAtReconstruction] = 1; - //printf("\n\n --- SAFT (AscanIndex) --- SAFT_VARIANT[SAFT_VARIANT_3DVolumeInterpolationAtReconstruction] = %i\n", SAFT_VARIANT[SAFT_VARIANT_3DVolumeInterpolationAtReconstruction]); - - - // Vorberechnung der Koordinaten --> schnelleres Bestimmen der Voxelposition - float VoxelIncrement = IMAGE_RESOLUTION/SOS_RESOLUTION; - float3 SosVoxelStartPosition; - SosVoxelStartPosition.x = (regionOfInterestOffset.x - sosOffset.x ) / SOS_RESOLUTION; // Start des Bildes im SOS-Grid aus Positionsdaten bestimmen - SosVoxelStartPosition.y = (regionOfInterestOffset.y - sosOffset.y ) / SOS_RESOLUTION; - SosVoxelStartPosition.z = (regionOfInterestOffset.z - sosOffset.z ) / SOS_RESOLUTION; - //printf("\n\n SosVoxelStartPosition [%f %f %f]\n",SosVoxelStartPosition.x,SosVoxelStartPosition.y,SosVoxelStartPosition.z); - - // Anzahl der Teiltabellen, bereits hier berechnen oder übergeben - int TableAscanIndexAllocationCount = ceil((float)aScanWindowSize/(float)maxAscanIndexArraysInTexture); // float is important due to ceiling - //printf( "TableAscanIndexAllocationCount = (%i/%i) = %i = %i\n", aScanWindowSize, maxAscanIndexArraysInTexture, TableAscanIndexAllocationCount, ceil(aScanWindowSize/maxAscanIndexArraysInTexture)); - - - // Call of 3 SAFT Versions - AscanIndex-Varainten - // ##################################################################################################################################################################### - // ==================================================== Blockmode with SOS-value per Ascan - // ==================================================== 3DVolume Mode with SOS-Correction no ATT-Correction - // ==================================================== 3DVolume Mode with SOS- and ATT-Correction - - if ((SOSMode_3DVolume == false) && (ATTMode_3DVolume == false)){ // ==================================================== Blockmode with SOS-value per Ascan - //#ifdef debug_OutputInfo - printf("\n\n --- SAFT (AscanIndex) without SOS currently not implemented --- \n"); - //#endif - -// saftKernelAscanIndex <<>>( // , 0, stream>>>( -// -// aScanIndex, -// -// (float) aScanWindowSize, // maxAscanIndexArraysInTexture -// maxAscanIndexArraysInTexture, // maxSoSReceiverArrayForTexture -// TableAscanIndexAllocationCount, // Anzahl der genutzten Teiltabellen -// -// IMAGE_SIZE_XYZ, -// -// SosVoxelStartPosition, -// IMAGE_RESOLUTION, -// VoxelIncrement, -// -// blockIndexOffset, -// speedOfSoundZLayer, -// -// gridDimensions, -// blockDimensions, -// -// #ifdef debug_CudaSAFTAscanIndexKernel -// debugMode, -// debugModeParameter, -// deviceSAFT_VARIANT, -// #endif -// -// deviceOutput -// -// ); - - } - else if ((SOSMode_3DVolume == true) && (ATTMode_3DVolume == false)){ // ==================================================== 3DVolume Mode with SOS-Correction no ATT-Correction - #ifdef debug_OutputInfo - printf("--- SAFT (AscanIndex: %i, aScanWindowSize: %i, sosOffset: %i) --- use SoS-Grid Mode with SOS-Correction per Path but no ATT-Correction\n", aScanIndex, aScanWindowSize, sosOffset); - #endif - - - saftKernelAscanIndex_SOS<<>>( // , 0, stream>>>( - - aScanIndex, - - (float) aScanWindowSize, // maxAscanIndexArraysInTexture - maxAscanIndexArraysInTexture, // maxSoSReceiverArrayForTexture - TableAscanIndexAllocationCount, // Anzahl der genutzten Teiltabellen - - IMAGE_SIZE_XYZ, - - SosVoxelStartPosition, - IMAGE_RESOLUTION, - VoxelIncrement, - - blockIndexOffset, - speedOfSoundZLayer, - - gridDimensions, - blockDimensions, - - #ifdef debug_CudaSAFTAscanIndexKernel - debugMode, - debugModeParameter, - deviceSAFT_VARIANT, - #endif - - deviceOutput - - ); - } - else if ((SOSMode_3DVolume == true) && (ATTMode_3DVolume == true)){ // ==================================================== 3DVolume Mode with SOS- and ATT-Correction - #ifdef debug_OutputInfo - printf("--- SAFT (AscanIndex: %i, aScanWindowSize: %i, sosOffset: %i) --- use SoS-Grid Mode with SOS- and ATT-Correction per Path\n", aScanIndex, aScanWindowSize, sosOffset); - #endif - - saftKernelAscanIndex_SOS_ATT<<>>( // , 0, stream>>>( - aScanIndex, - - (float) aScanWindowSize, // maxAscanIndexArraysInTexture - maxAscanIndexArraysInTexture, // maxSoSReceiverArrayForTexture - TableAscanIndexAllocationCount, // Anzahl der genutzten Teiltabellen - - IMAGE_SIZE_XYZ, - SosVoxelStartPosition, - IMAGE_RESOLUTION, - VoxelIncrement, - - blockIndexOffset, - speedOfSoundZLayer, - gridDimensions, - blockDimensions, - - //#ifdef debug_CudaSAFTAscanIndexKernel - debugMode, - debugModeParameter, - deviceSAFT_VARIANT, - //#endif - - deviceOutput - - ); - } - - // Unbind Textures - CUDA_CHECK(cudaUnbindTexture( &texRefAscans )); - - if (ATTMode_3DVolume == false){ // ========= 3DVolume Mode without ATT-Correction - CUDA_CHECK(cudaUnbindTexture ( &texTableAscanIndexFloat1_0 )); - CUDA_CHECK(cudaUnbindTexture ( &texTableAscanIndexFloat1_1 )); - CUDA_CHECK(cudaUnbindTexture ( &texTableAscanIndexFloat1_2 )); - CUDA_CHECK(cudaUnbindTexture ( &texTableAscanIndexFloat1_3 )); - } - else if (ATTMode_3DVolume == true){ // ========= 3DVolume Mode with ATT-Correction - CUDA_CHECK(cudaUnbindTexture ( &texTableAscanIndexFloat2_0 )); - CUDA_CHECK(cudaUnbindTexture ( &texTableAscanIndexFloat2_1 )); - CUDA_CHECK(cudaUnbindTexture ( &texTableAscanIndexFloat2_2 )); - CUDA_CHECK(cudaUnbindTexture ( &texTableAscanIndexFloat2_3 )); - } - - - CUDA_CHECK(cudaGetLastError()); - - #ifdef debug_OutputFunctions - printf("<== SAFTHandler::performSAFT - End\n"); - #endif -} +); \ No newline at end of file diff --git a/SAFT_TOFI/src/kernel/saftPrivate.cu b/SAFT_TOFI/src/kernel/saftPrivate.cu new file mode 100644 index 0000000..1b67a8a --- /dev/null +++ b/SAFT_TOFI/src/kernel/saftPrivate.cu @@ -0,0 +1,624 @@ +#include "precalculateSpeedOfSoundKernel.cuh" +#include "rayTracing.cuh" +#include "saftKernel.cuh" +#include "saft.hpp" + +void SAFTHandler::precalculateAscanIndex_usePaths(int ascanIndex_i, int aScanWindowSize, int currentSpeedOfSoundZLayer, int maxFeasibleSosZLayerCount) +{ + cudaChannelFormatDesc texChannelDescTableVoxelToEmRecPathSosBoth = cudaCreateChannelDesc(32, 32, 32, 32, + cudaChannelFormatKindFloat); // Schritt 2.1 Output-Kanal anlegen und + // beschreiben - Float4 + // Both Emitter Path Tables + // -------------------------------------------------------- + texTableVoxelToEmitterPathSosBoth_preprocess.addressMode[0] = cudaAddressModeClamp; // Texturreferenz beschreiben + texTableVoxelToEmitterPathSosBoth_preprocess.addressMode[1] = cudaAddressModeClamp; + texTableVoxelToEmitterPathSosBoth_preprocess.addressMode[2] = cudaAddressModeClamp; + switch (SAFT_VARIANT[SAFT_VARIANT_3DVolumeInterpolationAtReconstruction]) + { + case 0: + texTableVoxelToEmitterPathSosBoth_preprocess.filterMode = cudaFilterModePoint; + break; + case 1: + texTableVoxelToEmitterPathSosBoth_preprocess.filterMode = cudaFilterModeLinear; + break; + } + texTableVoxelToEmitterPathSosBoth_preprocess.normalized = 0; + CUDA_CHECK(cudaBindTextureToArray(&texTableVoxelToEmitterPathSosBoth_preprocess, deviceTableVoxelToEmPathSosBothCuArray, &texChannelDescTableVoxelToEmRecPathSosBoth)); + + // Texturmemory fuer Receiver - SosPathsTables + // =================================================================================================================== + // Both Receiver Path Tables + // ------------------------------------------------------ + texTableVoxelToReceiverPathSosBoth0_preprocess.addressMode[0] = cudaAddressModeClamp; // Texturreferenz beschreiben + texTableVoxelToReceiverPathSosBoth0_preprocess.addressMode[1] = cudaAddressModeClamp; + texTableVoxelToReceiverPathSosBoth0_preprocess.addressMode[2] = cudaAddressModeClamp; + switch (SAFT_VARIANT[SAFT_VARIANT_3DVolumeInterpolationAtReconstruction]) + { + case 0: + texTableVoxelToReceiverPathSosBoth0_preprocess.filterMode = cudaFilterModePoint; + break; + case 1: + texTableVoxelToReceiverPathSosBoth0_preprocess.filterMode = cudaFilterModeLinear; + break; + } + texTableVoxelToReceiverPathSosBoth0_preprocess.normalized = 0; + + CUDA_CHECK(cudaBindTextureToArray(&texTableVoxelToReceiverPathSosBoth0_preprocess, deviceTableVoxelToRecPathSosBothCuArray[0], &texChannelDescTableVoxelToEmRecPathSosBoth)); + + if (TableVoxelToReceiverPathSosAllocationCount > 1) + { // TODO: mit Arrays flexibel programmieren, wenn moeglich!!! + texTableVoxelToReceiverPathSosBoth1_preprocess.addressMode[0] = cudaAddressModeClamp; // Texturreferenz beschreiben + texTableVoxelToReceiverPathSosBoth1_preprocess.addressMode[1] = cudaAddressModeClamp; + texTableVoxelToReceiverPathSosBoth1_preprocess.addressMode[2] = cudaAddressModeClamp; + switch (SAFT_VARIANT[SAFT_VARIANT_3DVolumeInterpolationAtReconstruction]) + { + case 0: + texTableVoxelToReceiverPathSosBoth1_preprocess.filterMode = cudaFilterModePoint; + break; + case 1: + texTableVoxelToReceiverPathSosBoth1_preprocess.filterMode = cudaFilterModeLinear; + break; + } + texTableVoxelToReceiverPathSosBoth1_preprocess.normalized = 0; + + CUDA_CHECK(cudaBindTextureToArray(&texTableVoxelToReceiverPathSosBoth1_preprocess, deviceTableVoxelToRecPathSosBothCuArray[1], &texChannelDescTableVoxelToEmRecPathSosBoth)); + } + + if (TableVoxelToReceiverPathSosAllocationCount > 2) + { + texTableVoxelToReceiverPathSosBoth2_preprocess.addressMode[0] = cudaAddressModeClamp; // Texturreferenz beschreiben + texTableVoxelToReceiverPathSosBoth2_preprocess.addressMode[1] = cudaAddressModeClamp; + texTableVoxelToReceiverPathSosBoth2_preprocess.addressMode[2] = cudaAddressModeClamp; + switch (SAFT_VARIANT[SAFT_VARIANT_3DVolumeInterpolationAtReconstruction]) + { + case 0: + texTableVoxelToReceiverPathSosBoth2_preprocess.filterMode = cudaFilterModePoint; + break; + case 1: + texTableVoxelToReceiverPathSosBoth2_preprocess.filterMode = cudaFilterModeLinear; + break; + } + texTableVoxelToReceiverPathSosBoth2_preprocess.normalized = 0; + + CUDA_CHECK(cudaBindTextureToArray(&texTableVoxelToReceiverPathSosBoth2_preprocess, deviceTableVoxelToRecPathSosBothCuArray[2], &texChannelDescTableVoxelToEmRecPathSosBoth)); + } + + dim3 threadsPerBlock(SOSGrid_XYZ.x, 1, 1); + dim3 blocksPerGrid(1, 1, 1); + blocksPerGrid.x = SOSGrid_XYZ.y; + blocksPerGrid.y = maxFeasibleSosZLayerCount; + blocksPerGrid.z = 1; + + // Step 2. Bereite Output-Textur fuer AscanIndex vor + + if (TableAscanIndexAllocationCount > 0) + { + cudaBindSurfaceToArray(outSurfRefAscanIndexFloat0, deviceTextureAscanIndexFloatCuArray[0]); + } + if (TableAscanIndexAllocationCount > 1) + { + cudaBindSurfaceToArray(outSurfRefAscanIndexFloat1, deviceTextureAscanIndexFloatCuArray[1]); + } + if (TableAscanIndexAllocationCount > 2) + { + cudaBindSurfaceToArray(outSurfRefAscanIndexFloat2, deviceTextureAscanIndexFloatCuArray[2]); + } + if (TableAscanIndexAllocationCount > 3) + { + cudaBindSurfaceToArray(outSurfRefAscanIndexFloat3, deviceTextureAscanIndexFloatCuArray[3]); + } + + // Step 3. Fuehre Kernel aus mit #Threads: SOS.x*SOS.y . Innerhalb werden + // immer 1024/2048 A-Scans durchlaufen und in AscanIndex-Textur geschrieben + + if ((SOSMode_3DVolume == false) && (ATTMode_3DVolume == false)) + { // ==================================================== + // Blockmode with SOS-value per Ascan + + precalculateAscanIndex_usePathsKernel<<>>(ascanIndex_i, ///< Offset of AscanIndex batch (bei mehreren Aufrufen) + aScanWindowSize, // aktuelle Anzahl der Ascans, die maximal + // vorberechnet werden können + deviceSosAttFieldCuArray, + currentSpeedOfSoundZLayer, ///< First z-layer in the speed of sound + ///< grid the pre-calculation is performed + ///< for. + maxFeasibleSosZLayerCount, ///< Number of z-layers in the speed of + ///< sound grid the pre-calculation is + ///< performed for. + // currentEmIndexUsedForAscanIndexCalculation, ///< current Index of Em + // for which the AscanIndex is calculated + + maxSoSReceiverArrayForTexture, + + deviceEmitterIndex_block, // Speicheradresse fuer EmitterIndexdaten + deviceReceiverIndex_block, // Speicheradresse fuer ReceiverIndexdaten + + TableAscanIndexAllocationCount, ///< Anzahl der benoetigten AscanBlocks + ///< der Groesse 2048/4096 + maxAscanIndexArraysInTexture, ///< maximale Anzahl an Em/Rec in einem + ///< CUDA Array (fest definiert fuer + ///< bestimmung welche Textur genutzt + ///< wird) + + deviceTextureAscanIndexFloatCuArray, ///< Out: Sum of SoS samples in + ///< the path from transducer to + ///< voxel. + + SOSGrid_XYZ, sosOffset, regionOfInterestOffset, IMAGE_RESOLUTION, SOS_RESOLUTION, debugMode, debugModeParameter, + deviceSAFT_VARIANT); + } + else if ((SOSMode_3DVolume == true) && (ATTMode_3DVolume == false)) + { // ==================================================== + // 3DVolume Mode with SOS-Correction no ATT-Correction + + precalculateAscanIndex_usePathsKernel_SOS<<>>(ascanIndex_i, ///< Offset of AscanIndex batch (bei mehreren Aufrufen) + aScanWindowSize, // aktuelle Anzahl der Ascans, die maximal + // vorberechnet werden können + deviceSosAttFieldCuArray, + currentSpeedOfSoundZLayer, ///< First z-layer in the speed of sound + ///< grid the pre-calculation is performed + ///< for. + maxFeasibleSosZLayerCount, ///< Number of z-layers in the speed of + ///< sound grid the pre-calculation is + ///< performed for. + // currentEmIndexUsedForAscanIndexCalculation, ///< current + // Index of Em for which the AscanIndex is calculated + + maxSoSReceiverArrayForTexture, + + deviceEmitterIndex_block, // Speicheradresse fuer EmitterIndexdaten + deviceReceiverIndex_block, // Speicheradresse fuer ReceiverIndexdaten + + TableAscanIndexAllocationCount, ///< Anzahl der benoetigten AscanBlocks + ///< der Groesse 2048/4096 + maxAscanIndexArraysInTexture, ///< maximale Anzahl an Em/Rec in einem + ///< CUDA Array (fest definiert fuer + ///< bestimmung welche Textur genutzt + ///< wird) + + deviceTextureAscanIndexFloatCuArray, ///< Out: Sum of SoS samples in + ///< the path from transducer to + ///< voxel. + + SOSGrid_XYZ, sosOffset, regionOfInterestOffset, IMAGE_RESOLUTION, SOS_RESOLUTION, debugMode, debugModeParameter, + deviceSAFT_VARIANT); + } + else if ((SOSMode_3DVolume == true) && (ATTMode_3DVolume == true)) + { // ==================================================== + // 3DVolume Mode with SOS- and ATT-Correction + + precalculateAscanIndex_usePathsKernel_SOS_ATT<<>>(ascanIndex_i, ///< Offset of AscanIndex batch (bei mehreren Aufrufen) + aScanWindowSize, // aktuelle Anzahl der Ascans, die maximal + // vorberechnet werden können + deviceSosAttFieldCuArray, + currentSpeedOfSoundZLayer, ///< First z-layer in the speed of sound + ///< grid the pre-calculation is performed + ///< for. + maxFeasibleSosZLayerCount, ///< Number of z-layers in the speed of + ///< sound grid the pre-calculation is + ///< performed for. + // currentEmIndexUsedForAscanIndexCalculation, ///< current + // Index of Em for which the AscanIndex is calculated + maxSoSReceiverArrayForTexture, + deviceEmitterIndex_block, // Speicheradresse fuer EmitterIndexdaten + deviceReceiverIndex_block, // Speicheradresse fuer ReceiverIndexdaten + TableAscanIndexAllocationCount, ///< Anzahl der benoetigten AscanBlocks + ///< der Groesse 2048/4096 + maxAscanIndexArraysInTexture, ///< maximale Anzahl an Em/Rec in einem + ///< CUDA Array (fest definiert fuer + ///< bestimmung welche Textur genutzt + ///< wird) + deviceTextureAscanIndexFloatCuArray, ///< Out: Sum of SoS samples in + ///< the path from transducer to + ///< voxel. + SOSGrid_XYZ, sosOffset, regionOfInterestOffset, IMAGE_RESOLUTION, SOS_RESOLUTION, debugMode, debugModeParameter, + deviceSAFT_VARIANT); + } + + CUDA_CHECK(cudaGetLastError()); + + // ==================================================== cudaUnbindTexture + // Texturmemory fuer Emitter - SosPathsTables entbinden + CUDA_CHECK(cudaUnbindTexture(&texTableVoxelToEmitterPathSosBoth_preprocess)); + // Texturmemory fuer Receiver - SosPathsTables entbinden + CUDA_CHECK(cudaUnbindTexture(&texTableVoxelToReceiverPathSosBoth0_preprocess)); + CUDA_CHECK(cudaUnbindTexture(&texTableVoxelToReceiverPathSosBoth1_preprocess)); + CUDA_CHECK(cudaUnbindTexture(&texTableVoxelToReceiverPathSosBoth2_preprocess)); +} + +void SAFTHandler::precalculateAverageSpeedOfSound(int firstZLayer, int sosZLayerCount, int deviceListGeometry, int geometryElementCount, float *deviceVoxelCountOutputFloat, + float *deviceSpeedOfSoundSumOutput) +{ + dim3 threadsPerBlock(SOSGrid_XYZ.x, 1, + 1); // max. 512 oder 1024 Threads werden vorgegeben und + // dim3 threadsPerBlock (SOSGrid_XYZ.x,SOSGrid_XYZ.y,1); // max. 512 oder + // 1024 Threads werden vorgegeben und + dim3 blocksPerGrid(1, 1, 1); // max. 65.535 Bloecke im Grid + // berechnet. Initialisierung + blocksPerGrid.x = SOSGrid_XYZ.y; + blocksPerGrid.y = sosZLayerCount; + blocksPerGrid.z = 1; + + cudaChannelFormatDesc texChannelDescSosAttField = cudaCreateChannelDesc(32, 32, 0, 0, + cudaChannelFormatKindFloat); // Schritt 2.1 Output-Kanal + // anlegen und beschreiben + + texRefSosAttField.addressMode[0] = cudaAddressModeClamp; // Texturreferenz beschreiben + texRefSosAttField.addressMode[1] = cudaAddressModeClamp; + texRefSosAttField.addressMode[2] = cudaAddressModeClamp; + + if (SAFT_VARIANT[SAFT_VARIANT_3DVolumeInterpolationAtPreprocessing] == 1) + { + texRefSosAttField.filterMode = cudaFilterModeLinear; // Lineare Interpolation + } + else + { + texRefSosAttField.filterMode = cudaFilterModePoint; // Nearest Neighbor + } + texRefSosAttField.normalized = 0; + + CUDA_CHECK(cudaBindTextureToArray(&texRefSosAttField, deviceSosAttFieldCuArray, + &texChannelDescSosAttField)); // Schritt 4.1 3DArray an Texturmemory + // binden + + if (deviceListGeometry == 0) + { + cudaBindSurfaceToArray(outSurfRefTableVoxelToEmPathSosBoth, deviceTableVoxelToEmPathSosBothCuArray); + } + + if (deviceListGeometry == 1) + { + if (TableVoxelToReceiverPathSosAllocationCount > 0) + { + cudaBindSurfaceToArray(outSurfRefTableVoxelToRecPathSosBoth0, deviceTableVoxelToRecPathSosBothCuArray[0]); + } + if (TableVoxelToReceiverPathSosAllocationCount > 1) + { + cudaBindSurfaceToArray(outSurfRefTableVoxelToRecPathSosBoth1, deviceTableVoxelToRecPathSosBothCuArray[1]); + } + if (TableVoxelToReceiverPathSosAllocationCount > 2) + { + cudaBindSurfaceToArray(outSurfRefTableVoxelToRecPathSosBoth2, deviceTableVoxelToRecPathSosBothCuArray[2]); + } + } + + precalculateAverageSpeedOfSoundKernel<<>>(deviceSosAttFieldCuArray, firstZLayer, sosZLayerCount, deviceListGeometry, geometryElementCount, + maxSoSReceiverArrayForTexture, // maximale Anzahl an Receivern in einem + // CUDA Array + + // deviceVoxelCountOutput, + deviceVoxelCountOutputFloat, deviceSpeedOfSoundSumOutput, + // regionOfInterestOffset, + SOSGrid_XYZ, sosOffset, regionOfInterestOffset, IMAGE_RESOLUTION, SOS_RESOLUTION, debugMode, debugModeParameter); + CUDA_CHECK(cudaGetLastError()); + + CUDA_CHECK(cudaUnbindTexture(&texRefSosAttField)); +} + +void SAFTHandler::fillCuArray(float useValue, + cudaArray **deviceTextureAscanIndexFloatCuArray, ///< CuArray to fill + int TableAscanIndexAllocationCount) +{ + dim3 threadsPerBlock(SOSGrid_XYZ.x, 1, + 1); // determine neccessary amount of threads + // // max. 512 oder 1024 + dim3 blocksPerGrid(1, 1, + 1); // determine neccessary amount of blocks in grid // max. 65.535 + blocksPerGrid.x = SOSGrid_XYZ.y; + blocksPerGrid.y = maxFeasibleSosZLayerCount; + blocksPerGrid.z = 1; + + // Step 1. Bereite Output-Textur fuer AscanIndex vor + if (TableAscanIndexAllocationCount > 0) + { + cudaBindSurfaceToArray(outSurfRefAscanIndexFloat0, deviceTextureAscanIndexFloatCuArray[0]); + } + if (TableAscanIndexAllocationCount > 1) + { + cudaBindSurfaceToArray(outSurfRefAscanIndexFloat1, deviceTextureAscanIndexFloatCuArray[1]); + } + if (TableAscanIndexAllocationCount > 2) + { + cudaBindSurfaceToArray(outSurfRefAscanIndexFloat2, deviceTextureAscanIndexFloatCuArray[2]); + } + if (TableAscanIndexAllocationCount > 3) + { + cudaBindSurfaceToArray(outSurfRefAscanIndexFloat3, deviceTextureAscanIndexFloatCuArray[3]); + } + + // Step 2. Fuere Kernel aus mit #Threads: SOS.x*SOS.y . Innerhalb werden immer + // 1024/2048 A-Scans durchgegangen und in AscanIndex-Textur geschrieben + fillCuArrayKernel<<>>(useValue, + deviceTextureAscanIndexFloatCuArray, ///< Out: Sum of SoS + ///< samples in the path + ///< from transducer to + ///< voxel. + maxAscanIndexArraysInTexture, + TableAscanIndexAllocationCount, ///< Amount of Surfaces in + ///< the Array of cuArrays + maxFeasibleSosZLayerCount, ATTMode_3DVolume, debugMode, debugModeParameter); + + CUDA_CHECK(cudaGetLastError()); +} + +void SAFTHandler::performSAFT( + int aScanIndex, ///< The A-scan index is increased by the A-scan batch size in every iteration. It describes the offset into the A-scan samples the SAFT kernel is operating with. + size_t aScanWindowSize, ///< A-scan batch size in terms of number of samples within one window. + int3 IMAGE_SIZE_XYZ, ///< Bildbereichsgroesse/ROI in Voxel + int3 SOSGrid_XYZ, ///< SoSGridgroesse in Voxel + int blockIndexOffset, ///< Additional offset added to the z component of the block index, required because of the adjustments for partial reconstruction in different z-layers. + int outputWindowVoxelCount, ///< Number of Voxels in the output window. + int speedOfSoundZLayer, ///< current SoS z-layer Offset in the speed of sound grid. + int speedOfSoundVoxelsWithinZLayers, ///< Number of z-layers in the speed of sound grid touched by the z-layers of the active zone of reconstruction in the region of interest. + int maxFeasibleSosZLayerCount, + int currentEmIndexUsedForAscanIndexCalculation, ///< current Index of Em for which the AscanIndex is calculated + dim3 const &windowGridDimensions, ///< Grid dimensions to be used to launch the SAFT kernel. It is smaller than the full grid dimensions and only represents the current reconstruction window. + dim3 const &gridDimensions, ///< Full grid dimensions of the reconstruction. + dim3 const &blockDimensions, ///< Block dimensions to be used with the SAFT kernel. + float *deviceSpeedOfSoundField, ///< Pointer to SoSGrid. + cudaArray *deviceAScansCuArray + // cudaStream_t stream ///< Stream to execute the SAFT kernel on. +) +{ + + dim3 reducedGridDimensions, reducedBlockDimensions; + + reduceKernelDimensions(windowGridDimensions, blockDimensions, reducedGridDimensions, reducedBlockDimensions); + + CUDA_CHECK(cudaFuncSetCacheConfig(saftKernelAscanIndex_SOS_ATT, cudaFuncCachePreferL1)); + CUDA_CHECK(cudaFuncSetCacheConfig(saftKernelAscanIndex_SOS, cudaFuncCachePreferL1)); + CUDA_CHECK(cudaFuncSetCacheConfig(saftKernelAscanIndex, cudaFuncCachePreferL1)); + + // Texture Memory Adressing-mode // http://docs.nvidia.com/cuda/pdf/CUDA_C_Programming_Guide.pdf -> 3.2.11.1. Texture Memory S. 42 + // cudaAddressModeClamp - Return values at the boarders if out-of range - default + // cudaAddressModeBorder - Return 0 if out-of range + // cudaAddressModeMirror - Mirror the values - For normalized coordinates + // cudaAddressModeWrap - Repeating the values - For normalized coordinates + + // Texturmemory fuer Ascans + cudaChannelFormatDesc texChannelDesc = cudaCreateChannelDesc(32, 0, 0, 0, cudaChannelFormatKindFloat); // Beschreibung des RueckgabeFormats der Textur + texRefAscans.addressMode[0] = cudaAddressModeBorder; // Texturreferenz beschreiben + texRefAscans.addressMode[1] = cudaAddressModeBorder; + + if (SAFT_VARIANT[SAFT_VARIANT_AscanInterpolation] == 1) + { + texRefAscans.filterMode = cudaFilterModeLinear; // Lineare Interpolation + } + else + { + texRefAscans.filterMode = cudaFilterModePoint; // Nearest Neighbor + } + + texRefAscans.normalized = 0; + CUDA_CHECK(cudaBindTextureToArray(&texRefAscans, deviceAScansCuArray, &texChannelDesc)); + + if (ATTMode_3DVolume == false) + { // ========= 3DVolume Mode without ATT-Correction + + cudaChannelFormatDesc texChannelDescTableAscanIndexFloat = cudaCreateChannelDesc(); // Should do the same + + // AscanIndex Path Tables ------------------------------------------------------ + texTableAscanIndexFloat1_0.addressMode[0] = cudaAddressModeClamp; // Texturreferenz beschreiben + texTableAscanIndexFloat1_0.addressMode[1] = cudaAddressModeClamp; + texTableAscanIndexFloat1_0.addressMode[2] = cudaAddressModeClamp; + switch (SAFT_VARIANT[SAFT_VARIANT_3DVolumeInterpolationAtReconstruction]) + { + case 0: + texTableAscanIndexFloat1_0.filterMode = cudaFilterModePoint; + break; + case 1: + texTableAscanIndexFloat1_0.filterMode = cudaFilterModeLinear; + break; + } + texTableAscanIndexFloat1_0.normalized = 0; + + CUDA_CHECK(cudaBindTextureToArray(&texTableAscanIndexFloat1_0, deviceTextureAscanIndexFloatCuArray[0], &texChannelDescTableAscanIndexFloat)); + + if (TableAscanIndexAllocationCount > 1) + { // TODO: mit Arrays flexibel programmieren!!! + texTableAscanIndexFloat1_1.addressMode[0] = cudaAddressModeClamp; // Texturreferenz beschreiben + texTableAscanIndexFloat1_1.addressMode[1] = cudaAddressModeClamp; + texTableAscanIndexFloat1_1.addressMode[2] = cudaAddressModeClamp; + switch (SAFT_VARIANT[SAFT_VARIANT_3DVolumeInterpolationAtReconstruction]) + { + case 0: + texTableAscanIndexFloat1_1.filterMode = cudaFilterModePoint; + break; + case 1: + texTableAscanIndexFloat1_1.filterMode = cudaFilterModeLinear; + break; + } + texTableAscanIndexFloat1_1.normalized = 0; + + CUDA_CHECK(cudaBindTextureToArray(&texTableAscanIndexFloat1_1, deviceTextureAscanIndexFloatCuArray[1], &texChannelDescTableAscanIndexFloat)); + } + + if (TableAscanIndexAllocationCount > 2) + { + texTableAscanIndexFloat1_2.addressMode[0] = cudaAddressModeClamp; // Texturreferenz beschreiben + texTableAscanIndexFloat1_2.addressMode[1] = cudaAddressModeClamp; + texTableAscanIndexFloat1_2.addressMode[2] = cudaAddressModeClamp; + switch (SAFT_VARIANT[SAFT_VARIANT_3DVolumeInterpolationAtReconstruction]) + { + case 0: + texTableAscanIndexFloat1_2.filterMode = cudaFilterModePoint; + break; + case 1: + texTableAscanIndexFloat1_2.filterMode = cudaFilterModeLinear; + break; + } + texTableAscanIndexFloat1_2.normalized = 0; + + CUDA_CHECK(cudaBindTextureToArray(&texTableAscanIndexFloat1_2, deviceTextureAscanIndexFloatCuArray[2], &texChannelDescTableAscanIndexFloat)); + } + + if (TableAscanIndexAllocationCount > 3) + { + texTableAscanIndexFloat1_3.addressMode[0] = cudaAddressModeClamp; // Texturreferenz beschreiben + texTableAscanIndexFloat1_3.addressMode[1] = cudaAddressModeClamp; + texTableAscanIndexFloat1_3.addressMode[2] = cudaAddressModeClamp; + switch (SAFT_VARIANT[SAFT_VARIANT_3DVolumeInterpolationAtReconstruction]) + { + case 0: + texTableAscanIndexFloat1_3.filterMode = cudaFilterModePoint; + break; + case 1: + texTableAscanIndexFloat1_3.filterMode = cudaFilterModeLinear; + break; + } + texTableAscanIndexFloat1_3.normalized = 0; + + CUDA_CHECK(cudaBindTextureToArray(&texTableAscanIndexFloat1_3, deviceTextureAscanIndexFloatCuArray[3], &texChannelDescTableAscanIndexFloat)); + } + } + else if (ATTMode_3DVolume == true) + { // ========= 3DVolume Mode with ATT-Correction + + cudaChannelFormatDesc texChannelDescTableAscanIndexFloat = cudaCreateChannelDesc(); // Should do the same + + // AscanIndex Path Tables ------------------------------------------------------ + texTableAscanIndexFloat2_0.addressMode[0] = cudaAddressModeClamp; // Texturreferenz beschreiben + texTableAscanIndexFloat2_0.addressMode[1] = cudaAddressModeClamp; + texTableAscanIndexFloat2_0.addressMode[2] = cudaAddressModeClamp; + switch (SAFT_VARIANT[SAFT_VARIANT_3DVolumeInterpolationAtReconstruction]) + { + case 0: + texTableAscanIndexFloat2_0.filterMode = cudaFilterModePoint; + break; + case 1: + texTableAscanIndexFloat2_0.filterMode = cudaFilterModeLinear; + break; + } + texTableAscanIndexFloat2_0.normalized = 0; + + CUDA_CHECK(cudaBindTextureToArray(&texTableAscanIndexFloat2_0, deviceTextureAscanIndexFloatCuArray[0], &texChannelDescTableAscanIndexFloat)); + + if (TableAscanIndexAllocationCount > 1) + { // TODO: mit Arrays flexibel programmieren!!! + texTableAscanIndexFloat2_1.addressMode[0] = cudaAddressModeClamp; // Texturreferenz beschreiben + texTableAscanIndexFloat2_1.addressMode[1] = cudaAddressModeClamp; + texTableAscanIndexFloat2_1.addressMode[2] = cudaAddressModeClamp; + switch (SAFT_VARIANT[SAFT_VARIANT_3DVolumeInterpolationAtReconstruction]) + { + case 0: + texTableAscanIndexFloat2_1.filterMode = cudaFilterModePoint; + break; + case 1: + texTableAscanIndexFloat2_1.filterMode = cudaFilterModeLinear; + break; + } + texTableAscanIndexFloat2_1.normalized = 0; + + CUDA_CHECK(cudaBindTextureToArray(&texTableAscanIndexFloat2_1, deviceTextureAscanIndexFloatCuArray[1], &texChannelDescTableAscanIndexFloat)); + } + + if (TableAscanIndexAllocationCount > 2) + { + texTableAscanIndexFloat2_2.addressMode[0] = cudaAddressModeClamp; // Texturreferenz beschreiben + texTableAscanIndexFloat2_2.addressMode[1] = cudaAddressModeClamp; + texTableAscanIndexFloat2_2.addressMode[2] = cudaAddressModeClamp; + switch (SAFT_VARIANT[SAFT_VARIANT_3DVolumeInterpolationAtReconstruction]) + { + case 0: + texTableAscanIndexFloat2_2.filterMode = cudaFilterModePoint; + break; + case 1: + texTableAscanIndexFloat2_2.filterMode = cudaFilterModeLinear; + break; + } + texTableAscanIndexFloat2_2.normalized = 0; + + CUDA_CHECK(cudaBindTextureToArray(&texTableAscanIndexFloat2_2, deviceTextureAscanIndexFloatCuArray[2], &texChannelDescTableAscanIndexFloat)); + } + + if (TableAscanIndexAllocationCount > 3) + { + texTableAscanIndexFloat2_3.addressMode[0] = cudaAddressModeClamp; // Texturreferenz beschreiben + texTableAscanIndexFloat2_3.addressMode[1] = cudaAddressModeClamp; + texTableAscanIndexFloat2_3.addressMode[2] = cudaAddressModeClamp; + switch (SAFT_VARIANT[SAFT_VARIANT_3DVolumeInterpolationAtReconstruction]) + { + case 0: + texTableAscanIndexFloat2_3.filterMode = cudaFilterModePoint; + break; + case 1: + texTableAscanIndexFloat2_3.filterMode = cudaFilterModeLinear; + break; + } + texTableAscanIndexFloat2_3.normalized = 0; + + CUDA_CHECK(cudaBindTextureToArray(&texTableAscanIndexFloat2_3, deviceTextureAscanIndexFloatCuArray[3], &texChannelDescTableAscanIndexFloat)); + } + } + // Vorberechnung der Koordinaten --> schnelleres Bestimmen der Voxelposition + float VoxelIncrement = IMAGE_RESOLUTION / SOS_RESOLUTION; + float3 SosVoxelStartPosition; + SosVoxelStartPosition.x = (regionOfInterestOffset.x - sosOffset.x) / SOS_RESOLUTION; // Start des Bildes im SOS-Grid aus Positionsdaten bestimmen + SosVoxelStartPosition.y = (regionOfInterestOffset.y - sosOffset.y) / SOS_RESOLUTION; + SosVoxelStartPosition.z = (regionOfInterestOffset.z - sosOffset.z) / SOS_RESOLUTION; + // printf("\n\n SosVoxelStartPosition [%f %f %f]\n",SosVoxelStartPosition.x,SosVoxelStartPosition.y,SosVoxelStartPosition.z); + + // Anzahl der Teiltabellen, bereits hier berechnen oder übergeben + int TableAscanIndexAllocationCount = ceil((float)aScanWindowSize / (float)maxAscanIndexArraysInTexture); // float is important due to ceiling + // printf( "TableAscanIndexAllocationCount = (%i/%i) = %i = %i\n", aScanWindowSize, maxAscanIndexArraysInTexture, TableAscanIndexAllocationCount, + // ceil(aScanWindowSize/maxAscanIndexArraysInTexture)); + + // Call of 3 SAFT Versions - AscanIndex-Varainten + // ##################################################################################################################################################################### + // ==================================================== Blockmode with SOS-value per Ascan + // ==================================================== 3DVolume Mode with SOS-Correction no ATT-Correction + // ==================================================== 3DVolume Mode with SOS- and ATT-Correction + + if ((SOSMode_3DVolume == false) && (ATTMode_3DVolume == false)) + { // ==================================================== Blockmode with SOS-value per Ascan + printf("\n\n --- SAFT (AscanIndex) without SOS currently not implemented --- \n"); + } + else if ((SOSMode_3DVolume == true) && (ATTMode_3DVolume == false)) + { + // ==================================================== 3DVolume Mode with SOS-Correction no ATT-Correction + saftKernelAscanIndex_SOS<<>>( // , 0, stream>>>( + + aScanIndex, + + (float)aScanWindowSize, // maxAscanIndexArraysInTexture + maxAscanIndexArraysInTexture, // maxSoSReceiverArrayForTexture + TableAscanIndexAllocationCount, // Anzahl der genutzten Teiltabellen + IMAGE_SIZE_XYZ, SosVoxelStartPosition, IMAGE_RESOLUTION, VoxelIncrement, blockIndexOffset, speedOfSoundZLayer, gridDimensions, blockDimensions, deviceOutput); + } + else if ((SOSMode_3DVolume == true) && (ATTMode_3DVolume == true)) + { // ==================================================== 3DVolume Mode with SOS- and ATT-Correction + + saftKernelAscanIndex_SOS_ATT<<>>( // , 0, stream>>>( + aScanIndex, + (float)aScanWindowSize, // maxAscanIndexArraysInTexture + maxAscanIndexArraysInTexture, // maxSoSReceiverArrayForTexture + TableAscanIndexAllocationCount, // Anzahl der genutzten Teiltabellen + IMAGE_SIZE_XYZ, SosVoxelStartPosition, IMAGE_RESOLUTION, VoxelIncrement, + blockIndexOffset, speedOfSoundZLayer, gridDimensions, blockDimensions, + debugMode, debugModeParameter, deviceSAFT_VARIANT, + deviceOutput + + ); + } + + // Unbind Textures + CUDA_CHECK(cudaUnbindTexture(&texRefAscans)); + + if (ATTMode_3DVolume == false) + { // ========= 3DVolume Mode without ATT-Correction + CUDA_CHECK(cudaUnbindTexture(&texTableAscanIndexFloat1_0)); + CUDA_CHECK(cudaUnbindTexture(&texTableAscanIndexFloat1_1)); + CUDA_CHECK(cudaUnbindTexture(&texTableAscanIndexFloat1_2)); + CUDA_CHECK(cudaUnbindTexture(&texTableAscanIndexFloat1_3)); + } + else if (ATTMode_3DVolume == true) + { // ========= 3DVolume Mode with ATT-Correction + CUDA_CHECK(cudaUnbindTexture(&texTableAscanIndexFloat2_0)); + CUDA_CHECK(cudaUnbindTexture(&texTableAscanIndexFloat2_1)); + CUDA_CHECK(cudaUnbindTexture(&texTableAscanIndexFloat2_2)); + CUDA_CHECK(cudaUnbindTexture(&texTableAscanIndexFloat2_3)); + } + CUDA_CHECK(cudaGetLastError()); +} \ No newline at end of file diff --git a/SAFT_TOFI/src/saft.cu b/SAFT_TOFI/src/saft.cu index 1e80524..78fe0da 100644 --- a/SAFT_TOFI/src/saft.cu +++ b/SAFT_TOFI/src/saft.cu @@ -9,8 +9,7 @@ - Das wird gemacht, weil CUDA keine externen Referenzen unterst�tzt, um Daten von anderen Compilierungs Einheiten zu referenzieren. */ -#include "kernel/constantMemory.cuh" // Deklaration der Daten, die sich im Constant-Memory befinden Geometriedaten -#include "kernel/rayTracing.cuh" // GPU-Code für Bresenham -#include "kernel/precalculateSpeedOfSoundKernel.cuh" // GPU-Code Partitionierung für Bresenham. Ruft den Bresenham auf. -#include "kernel/saftKernel.cuh" // GPU-Kernel für SAFT +// #include "kernel/rayTracing.cuh" // GPU-Code für Bresenham +// #include "kernel/precalculateSpeedOfSoundKernel.cuh" // GPU-Code Partitionierung für Bresenham. Ruft den Bresenham auf. +// #include "kernel/saftKernel.cuh" // GPU-Kernel für SAFT