Files
URDepends/SAFT_TOFI/src/kernel/saftPrivate.cu

618 lines
38 KiB
Plaintext

#include "precalculateSpeedOfSoundKernel.cuh"
#include "rayTracing.cuh"
#include "saftKernel.cuh"
#include "saft.hpp"
void SAFTHandler::precalculateAscanIndex_usePaths(int ascanIndex_i, int aScanWindowSize, int currentSpeedOfSoundZLayer, int maxFeasibleSosZLayerCount)
{
cudaChannelFormatDesc texChannelDescTableVoxelToEmRecPathSosBoth = cudaCreateChannelDesc(32, 32, 32, 32,
cudaChannelFormatKindFloat); // Schritt 2.1 Output-Kanal anlegen und
// beschreiben - Float4
// Both Emitter Path Tables
// --------------------------------------------------------
texTableVoxelToEmitterPathSosBoth_preprocess.addressMode[0] = cudaAddressModeClamp; // Texturreferenz beschreiben
texTableVoxelToEmitterPathSosBoth_preprocess.addressMode[1] = cudaAddressModeClamp;
texTableVoxelToEmitterPathSosBoth_preprocess.addressMode[2] = cudaAddressModeClamp;
switch (SAFT_VARIANT[SAFT_VARIANT_3DVolumeInterpolationAtReconstruction])
{
case 0:
texTableVoxelToEmitterPathSosBoth_preprocess.filterMode = cudaFilterModePoint;
break;
case 1:
texTableVoxelToEmitterPathSosBoth_preprocess.filterMode = cudaFilterModeLinear;
break;
}
texTableVoxelToEmitterPathSosBoth_preprocess.normalized = 0;
CUDA_CHECK(cudaBindTextureToArray(&texTableVoxelToEmitterPathSosBoth_preprocess, deviceTableVoxelToEmPathSosBothCuArray, &texChannelDescTableVoxelToEmRecPathSosBoth));
// Texturmemory fuer Receiver - SosPathsTables
// ===================================================================================================================
// Both Receiver Path Tables
// ------------------------------------------------------
texTableVoxelToReceiverPathSosBoth0_preprocess.addressMode[0] = cudaAddressModeClamp; // Texturreferenz beschreiben
texTableVoxelToReceiverPathSosBoth0_preprocess.addressMode[1] = cudaAddressModeClamp;
texTableVoxelToReceiverPathSosBoth0_preprocess.addressMode[2] = cudaAddressModeClamp;
switch (SAFT_VARIANT[SAFT_VARIANT_3DVolumeInterpolationAtReconstruction])
{
case 0:
texTableVoxelToReceiverPathSosBoth0_preprocess.filterMode = cudaFilterModePoint;
break;
case 1:
texTableVoxelToReceiverPathSosBoth0_preprocess.filterMode = cudaFilterModeLinear;
break;
}
texTableVoxelToReceiverPathSosBoth0_preprocess.normalized = 0;
CUDA_CHECK(cudaBindTextureToArray(&texTableVoxelToReceiverPathSosBoth0_preprocess, deviceTableVoxelToRecPathSosBothCuArray[0], &texChannelDescTableVoxelToEmRecPathSosBoth));
if (TableVoxelToReceiverPathSosAllocationCount > 1)
{ // TODO: mit Arrays flexibel programmieren, wenn moeglich!!!
texTableVoxelToReceiverPathSosBoth1_preprocess.addressMode[0] = cudaAddressModeClamp; // Texturreferenz beschreiben
texTableVoxelToReceiverPathSosBoth1_preprocess.addressMode[1] = cudaAddressModeClamp;
texTableVoxelToReceiverPathSosBoth1_preprocess.addressMode[2] = cudaAddressModeClamp;
switch (SAFT_VARIANT[SAFT_VARIANT_3DVolumeInterpolationAtReconstruction])
{
case 0:
texTableVoxelToReceiverPathSosBoth1_preprocess.filterMode = cudaFilterModePoint;
break;
case 1:
texTableVoxelToReceiverPathSosBoth1_preprocess.filterMode = cudaFilterModeLinear;
break;
}
texTableVoxelToReceiverPathSosBoth1_preprocess.normalized = 0;
CUDA_CHECK(cudaBindTextureToArray(&texTableVoxelToReceiverPathSosBoth1_preprocess, deviceTableVoxelToRecPathSosBothCuArray[1], &texChannelDescTableVoxelToEmRecPathSosBoth));
}
if (TableVoxelToReceiverPathSosAllocationCount > 2)
{
texTableVoxelToReceiverPathSosBoth2_preprocess.addressMode[0] = cudaAddressModeClamp; // Texturreferenz beschreiben
texTableVoxelToReceiverPathSosBoth2_preprocess.addressMode[1] = cudaAddressModeClamp;
texTableVoxelToReceiverPathSosBoth2_preprocess.addressMode[2] = cudaAddressModeClamp;
switch (SAFT_VARIANT[SAFT_VARIANT_3DVolumeInterpolationAtReconstruction])
{
case 0:
texTableVoxelToReceiverPathSosBoth2_preprocess.filterMode = cudaFilterModePoint;
break;
case 1:
texTableVoxelToReceiverPathSosBoth2_preprocess.filterMode = cudaFilterModeLinear;
break;
}
texTableVoxelToReceiverPathSosBoth2_preprocess.normalized = 0;
CUDA_CHECK(cudaBindTextureToArray(&texTableVoxelToReceiverPathSosBoth2_preprocess, deviceTableVoxelToRecPathSosBothCuArray[2], &texChannelDescTableVoxelToEmRecPathSosBoth));
}
dim3 threadsPerBlock(SOSGrid_XYZ.x, 1, 1);
dim3 blocksPerGrid(1, 1, 1);
blocksPerGrid.x = SOSGrid_XYZ.y;
blocksPerGrid.y = maxFeasibleSosZLayerCount;
blocksPerGrid.z = 1;
// Step 2. Bereite Output-Textur fuer AscanIndex vor
if (TableAscanIndexAllocationCount > 0)
{
cudaBindSurfaceToArray(outSurfRefAscanIndexFloat0, deviceTextureAscanIndexFloatCuArray[0]);
}
if (TableAscanIndexAllocationCount > 1)
{
cudaBindSurfaceToArray(outSurfRefAscanIndexFloat1, deviceTextureAscanIndexFloatCuArray[1]);
}
if (TableAscanIndexAllocationCount > 2)
{
cudaBindSurfaceToArray(outSurfRefAscanIndexFloat2, deviceTextureAscanIndexFloatCuArray[2]);
}
if (TableAscanIndexAllocationCount > 3)
{
cudaBindSurfaceToArray(outSurfRefAscanIndexFloat3, deviceTextureAscanIndexFloatCuArray[3]);
}
// Step 3. Fuehre Kernel aus mit #Threads: SOS.x*SOS.y . Innerhalb werden
// immer 1024/2048 A-Scans durchlaufen und in AscanIndex-Textur geschrieben
if ((SOSMode_3DVolume == false) && (ATTMode_3DVolume == false))
{ // ====================================================
// Blockmode with SOS-value per Ascan
precalculateAscanIndex_usePathsKernel<<<blocksPerGrid, threadsPerBlock>>>(ascanIndex_i, ///< Offset of AscanIndex batch (bei mehreren Aufrufen)
aScanWindowSize, // aktuelle Anzahl der Ascans, die maximal
// vorberechnet werden können
deviceSosAttFieldCuArray,
currentSpeedOfSoundZLayer, ///< First z-layer in the speed of sound
///< grid the pre-calculation is performed
///< for.
maxFeasibleSosZLayerCount, ///< Number of z-layers in the speed of
///< sound grid the pre-calculation is
///< performed for.
// currentEmIndexUsedForAscanIndexCalculation, ///< current Index of Em
// for which the AscanIndex is calculated
maxSoSReceiverArrayForTexture,
deviceEmitterIndex_block, // Speicheradresse fuer EmitterIndexdaten
deviceReceiverIndex_block, // Speicheradresse fuer ReceiverIndexdaten
TableAscanIndexAllocationCount, ///< Anzahl der benoetigten AscanBlocks
///< der Groesse 2048/4096
maxAscanIndexArraysInTexture, ///< maximale Anzahl an Em/Rec in einem
///< CUDA Array (fest definiert fuer
///< bestimmung welche Textur genutzt
///< wird)
deviceTextureAscanIndexFloatCuArray, ///< Out: Sum of SoS samples in
///< the path from transducer to
///< voxel.
SOSGrid_XYZ, sosOffset, regionOfInterestOffset, IMAGE_RESOLUTION, SOS_RESOLUTION, debugMode, debugModeParameter,
deviceSAFT_VARIANT);
}
else if ((SOSMode_3DVolume == true) && (ATTMode_3DVolume == false))
{ // ====================================================
// 3DVolume Mode with SOS-Correction no ATT-Correction
precalculateAscanIndex_usePathsKernel_SOS<<<blocksPerGrid,
threadsPerBlock>>>(ascanIndex_i, ///< Offset of AscanIndex batch (bei mehreren Aufrufen)
aScanWindowSize, // aktuelle Anzahl der Ascans, die maximal
// vorberechnet werden können
deviceSosAttFieldCuArray,
currentSpeedOfSoundZLayer, ///< First z-layer in the speed of sound
///< grid the pre-calculation is performed
///< for.
maxFeasibleSosZLayerCount, ///< Number of z-layers in the speed of
///< sound grid the pre-calculation is
///< performed for.
// currentEmIndexUsedForAscanIndexCalculation, ///< current
// Index of Em for which the AscanIndex is calculated
maxSoSReceiverArrayForTexture,
deviceEmitterIndex_block, // Speicheradresse fuer EmitterIndexdaten
deviceReceiverIndex_block, // Speicheradresse fuer ReceiverIndexdaten
TableAscanIndexAllocationCount, ///< Anzahl der benoetigten AscanBlocks
///< der Groesse 2048/4096
maxAscanIndexArraysInTexture, ///< maximale Anzahl an Em/Rec in einem
///< CUDA Array (fest definiert fuer
///< bestimmung welche Textur genutzt
///< wird)
deviceTextureAscanIndexFloatCuArray, ///< Out: Sum of SoS samples in
///< the path from transducer to
///< voxel.
SOSGrid_XYZ, sosOffset, regionOfInterestOffset, IMAGE_RESOLUTION, SOS_RESOLUTION, debugMode, debugModeParameter,
deviceSAFT_VARIANT);
}
else if ((SOSMode_3DVolume == true) && (ATTMode_3DVolume == true))
{ // ====================================================
// 3DVolume Mode with SOS- and ATT-Correction
precalculateAscanIndex_usePathsKernel_SOS_ATT<<<blocksPerGrid,
threadsPerBlock>>>(ascanIndex_i, ///< Offset of AscanIndex batch (bei mehreren Aufrufen)
aScanWindowSize, // aktuelle Anzahl der Ascans, die maximal
// vorberechnet werden können
deviceSosAttFieldCuArray,
currentSpeedOfSoundZLayer, ///< First z-layer in the speed of sound
///< grid the pre-calculation is performed
///< for.
maxFeasibleSosZLayerCount, ///< Number of z-layers in the speed of
///< sound grid the pre-calculation is
///< performed for.
// currentEmIndexUsedForAscanIndexCalculation, ///< current
// Index of Em for which the AscanIndex is calculated
maxSoSReceiverArrayForTexture,
deviceEmitterIndex_block, // Speicheradresse fuer EmitterIndexdaten
deviceReceiverIndex_block, // Speicheradresse fuer ReceiverIndexdaten
TableAscanIndexAllocationCount, ///< Anzahl der benoetigten AscanBlocks
///< der Groesse 2048/4096
maxAscanIndexArraysInTexture, ///< maximale Anzahl an Em/Rec in einem
///< CUDA Array (fest definiert fuer
///< bestimmung welche Textur genutzt
///< wird)
deviceTextureAscanIndexFloatCuArray, ///< Out: Sum of SoS samples in
///< the path from transducer to
///< voxel.
SOSGrid_XYZ, sosOffset, regionOfInterestOffset, IMAGE_RESOLUTION, SOS_RESOLUTION, debugMode, debugModeParameter,
deviceSAFT_VARIANT);
}
CUDA_CHECK(cudaGetLastError());
// ==================================================== cudaUnbindTexture
// Texturmemory fuer Emitter - SosPathsTables entbinden
CUDA_CHECK(cudaUnbindTexture(&texTableVoxelToEmitterPathSosBoth_preprocess));
// Texturmemory fuer Receiver - SosPathsTables entbinden
CUDA_CHECK(cudaUnbindTexture(&texTableVoxelToReceiverPathSosBoth0_preprocess));
CUDA_CHECK(cudaUnbindTexture(&texTableVoxelToReceiverPathSosBoth1_preprocess));
CUDA_CHECK(cudaUnbindTexture(&texTableVoxelToReceiverPathSosBoth2_preprocess));
}
void SAFTHandler::precalculateAverageSpeedOfSound(int firstZLayer, int sosZLayerCount, int deviceListGeometry, int geometryElementCount)
{
dim3 threadsPerBlock(SOSGrid_XYZ.x, 1,
1); // max. 512 oder 1024 Threads werden vorgegeben und
// dim3 threadsPerBlock (SOSGrid_XYZ.x,SOSGrid_XYZ.y,1); // max. 512 oder
// 1024 Threads werden vorgegeben und
dim3 blocksPerGrid(1, 1, 1); // max. 65.535 Bloecke im Grid
// berechnet. Initialisierung
blocksPerGrid.x = SOSGrid_XYZ.y;
blocksPerGrid.y = sosZLayerCount;
blocksPerGrid.z = 1;
cudaChannelFormatDesc texChannelDescSosAttField = cudaCreateChannelDesc(32, 32, 0, 0,
cudaChannelFormatKindFloat); // Schritt 2.1 Output-Kanal
// anlegen und beschreiben
texRefSosAttField.addressMode[0] = cudaAddressModeClamp; // Texturreferenz beschreiben
texRefSosAttField.addressMode[1] = cudaAddressModeClamp;
texRefSosAttField.addressMode[2] = cudaAddressModeClamp;
if (SAFT_VARIANT[SAFT_VARIANT_3DVolumeInterpolationAtPreprocessing] == 1)
{
texRefSosAttField.filterMode = cudaFilterModeLinear; // Lineare Interpolation
}
else
{
texRefSosAttField.filterMode = cudaFilterModePoint; // Nearest Neighbor
}
texRefSosAttField.normalized = 0;
CUDA_CHECK(cudaBindTextureToArray(&texRefSosAttField, deviceSosAttFieldCuArray,
&texChannelDescSosAttField)); // Schritt 4.1 3DArray an Texturmemory
// binden
if (deviceListGeometry == 0)
{
cudaBindSurfaceToArray(outSurfRefTableVoxelToEmPathSosBoth, deviceTableVoxelToEmPathSosBothCuArray);
}
if (deviceListGeometry == 1)
{
if (TableVoxelToReceiverPathSosAllocationCount > 0)
{
cudaBindSurfaceToArray(outSurfRefTableVoxelToRecPathSosBoth0, deviceTableVoxelToRecPathSosBothCuArray[0]);
}
if (TableVoxelToReceiverPathSosAllocationCount > 1)
{
cudaBindSurfaceToArray(outSurfRefTableVoxelToRecPathSosBoth1, deviceTableVoxelToRecPathSosBothCuArray[1]);
}
if (TableVoxelToReceiverPathSosAllocationCount > 2)
{
cudaBindSurfaceToArray(outSurfRefTableVoxelToRecPathSosBoth2, deviceTableVoxelToRecPathSosBothCuArray[2]);
}
}
precalculateAverageSpeedOfSoundKernel<<<blocksPerGrid, threadsPerBlock>>>(firstZLayer, sosZLayerCount, deviceListGeometry, geometryElementCount,
maxSoSReceiverArrayForTexture,
sosOffset, regionOfInterestOffset, IMAGE_RESOLUTION, SOS_RESOLUTION, debugMode, debugModeParameter);
CUDA_CHECK(cudaGetLastError());
CUDA_CHECK(cudaUnbindTexture(&texRefSosAttField));
}
void SAFTHandler::fillCuArray(float useValue,
cudaArray **deviceTextureAscanIndexFloatCuArray, ///< CuArray to fill
int TableAscanIndexAllocationCount)
{
dim3 threadsPerBlock(SOSGrid_XYZ.x, 1,
1); // determine neccessary amount of threads
// // max. 512 oder 1024
dim3 blocksPerGrid(1, 1,
1); // determine neccessary amount of blocks in grid // max. 65.535
blocksPerGrid.x = SOSGrid_XYZ.y;
blocksPerGrid.y = maxFeasibleSosZLayerCount;
blocksPerGrid.z = 1;
// Step 1. Bereite Output-Textur fuer AscanIndex vor
if (TableAscanIndexAllocationCount > 0)
{
cudaBindSurfaceToArray(outSurfRefAscanIndexFloat0, deviceTextureAscanIndexFloatCuArray[0]);
}
if (TableAscanIndexAllocationCount > 1)
{
cudaBindSurfaceToArray(outSurfRefAscanIndexFloat1, deviceTextureAscanIndexFloatCuArray[1]);
}
if (TableAscanIndexAllocationCount > 2)
{
cudaBindSurfaceToArray(outSurfRefAscanIndexFloat2, deviceTextureAscanIndexFloatCuArray[2]);
}
if (TableAscanIndexAllocationCount > 3)
{
cudaBindSurfaceToArray(outSurfRefAscanIndexFloat3, deviceTextureAscanIndexFloatCuArray[3]);
}
// Step 2. Fuere Kernel aus mit #Threads: SOS.x*SOS.y . Innerhalb werden immer
// 1024/2048 A-Scans durchgegangen und in AscanIndex-Textur geschrieben
fillCuArrayKernel<<<blocksPerGrid, threadsPerBlock>>>(useValue,
deviceTextureAscanIndexFloatCuArray, ///< Out: Sum of SoS
///< samples in the path
///< from transducer to
///< voxel.
maxAscanIndexArraysInTexture,
TableAscanIndexAllocationCount, ///< Amount of Surfaces in
///< the Array of cuArrays
maxFeasibleSosZLayerCount, ATTMode_3DVolume, debugMode, debugModeParameter);
CUDA_CHECK(cudaGetLastError());
}
void SAFTHandler::performSAFT(
int aScanIndex, ///< The A-scan index is increased by the A-scan batch size in every iteration. It describes the offset into the A-scan samples the SAFT kernel is operating with.
size_t aScanWindowSize, ///< A-scan batch size in terms of number of samples within one window.
int3 IMAGE_SIZE_XYZ, ///< Bildbereichsgroesse/ROI in Voxel
int3 SOSGrid_XYZ, ///< SoSGridgroesse in Voxel
int blockIndexOffset, ///< Additional offset added to the z component of the block index, required because of the adjustments for partial reconstruction in different z-layers.
int outputWindowVoxelCount, ///< Number of Voxels in the output window.
int speedOfSoundZLayer, ///< current SoS z-layer Offset in the speed of sound grid.
int speedOfSoundVoxelsWithinZLayers, ///< Number of z-layers in the speed of sound grid touched by the z-layers of the active zone of reconstruction in the region of interest.
int maxFeasibleSosZLayerCount,
int currentEmIndexUsedForAscanIndexCalculation, ///< current Index of Em for which the AscanIndex is calculated
dim3 const &windowGridDimensions, ///< Grid dimensions to be used to launch the SAFT kernel. It is smaller than the full grid dimensions and only represents the current reconstruction window.
dim3 const &gridDimensions, ///< Full grid dimensions of the reconstruction.
dim3 const &blockDimensions, ///< Block dimensions to be used with the SAFT kernel.
float *deviceSpeedOfSoundField, ///< Pointer to SoSGrid.
cudaArray *deviceAScansCuArray
// cudaStream_t stream ///< Stream to execute the SAFT kernel on.
)
{
dim3 reducedGridDimensions, reducedBlockDimensions;
reduceKernelDimensions(windowGridDimensions, blockDimensions, reducedGridDimensions, reducedBlockDimensions);
CUDA_CHECK(cudaFuncSetCacheConfig(saftKernelAscanIndex_SOS_ATT, cudaFuncCachePreferL1));
CUDA_CHECK(cudaFuncSetCacheConfig(saftKernelAscanIndex_SOS, cudaFuncCachePreferL1));
CUDA_CHECK(cudaFuncSetCacheConfig(saftKernelAscanIndex, cudaFuncCachePreferL1));
// Texture Memory Adressing-mode // http://docs.nvidia.com/cuda/pdf/CUDA_C_Programming_Guide.pdf -> 3.2.11.1. Texture Memory S. 42
// cudaAddressModeClamp - Return values at the boarders if out-of range - default
// cudaAddressModeBorder - Return 0 if out-of range
// cudaAddressModeMirror - Mirror the values - For normalized coordinates
// cudaAddressModeWrap - Repeating the values - For normalized coordinates
// Texturmemory fuer Ascans
cudaChannelFormatDesc texChannelDesc = cudaCreateChannelDesc(32, 0, 0, 0, cudaChannelFormatKindFloat); // Beschreibung des RueckgabeFormats der Textur
texRefAscans.addressMode[0] = cudaAddressModeBorder; // Texturreferenz beschreiben
texRefAscans.addressMode[1] = cudaAddressModeBorder;
if (SAFT_VARIANT[SAFT_VARIANT_AscanInterpolation] == 1)
{
texRefAscans.filterMode = cudaFilterModeLinear; // Lineare Interpolation
}
else
{
texRefAscans.filterMode = cudaFilterModePoint; // Nearest Neighbor
}
texRefAscans.normalized = 0;
CUDA_CHECK(cudaBindTextureToArray(&texRefAscans, deviceAScansCuArray, &texChannelDesc));
if (ATTMode_3DVolume == false)
{ // ========= 3DVolume Mode without ATT-Correction
cudaChannelFormatDesc texChannelDescTableAscanIndexFloat = cudaCreateChannelDesc<float>(); // Should do the same
// AscanIndex Path Tables ------------------------------------------------------
texTableAscanIndexFloat1_0.addressMode[0] = cudaAddressModeClamp; // Texturreferenz beschreiben
texTableAscanIndexFloat1_0.addressMode[1] = cudaAddressModeClamp;
texTableAscanIndexFloat1_0.addressMode[2] = cudaAddressModeClamp;
switch (SAFT_VARIANT[SAFT_VARIANT_3DVolumeInterpolationAtReconstruction])
{
case 0:
texTableAscanIndexFloat1_0.filterMode = cudaFilterModePoint;
break;
case 1:
texTableAscanIndexFloat1_0.filterMode = cudaFilterModeLinear;
break;
}
texTableAscanIndexFloat1_0.normalized = 0;
CUDA_CHECK(cudaBindTextureToArray(&texTableAscanIndexFloat1_0, deviceTextureAscanIndexFloatCuArray[0], &texChannelDescTableAscanIndexFloat));
if (TableAscanIndexAllocationCount > 1)
{ // TODO: mit Arrays flexibel programmieren!!!
texTableAscanIndexFloat1_1.addressMode[0] = cudaAddressModeClamp; // Texturreferenz beschreiben
texTableAscanIndexFloat1_1.addressMode[1] = cudaAddressModeClamp;
texTableAscanIndexFloat1_1.addressMode[2] = cudaAddressModeClamp;
switch (SAFT_VARIANT[SAFT_VARIANT_3DVolumeInterpolationAtReconstruction])
{
case 0:
texTableAscanIndexFloat1_1.filterMode = cudaFilterModePoint;
break;
case 1:
texTableAscanIndexFloat1_1.filterMode = cudaFilterModeLinear;
break;
}
texTableAscanIndexFloat1_1.normalized = 0;
CUDA_CHECK(cudaBindTextureToArray(&texTableAscanIndexFloat1_1, deviceTextureAscanIndexFloatCuArray[1], &texChannelDescTableAscanIndexFloat));
}
if (TableAscanIndexAllocationCount > 2)
{
texTableAscanIndexFloat1_2.addressMode[0] = cudaAddressModeClamp; // Texturreferenz beschreiben
texTableAscanIndexFloat1_2.addressMode[1] = cudaAddressModeClamp;
texTableAscanIndexFloat1_2.addressMode[2] = cudaAddressModeClamp;
switch (SAFT_VARIANT[SAFT_VARIANT_3DVolumeInterpolationAtReconstruction])
{
case 0:
texTableAscanIndexFloat1_2.filterMode = cudaFilterModePoint;
break;
case 1:
texTableAscanIndexFloat1_2.filterMode = cudaFilterModeLinear;
break;
}
texTableAscanIndexFloat1_2.normalized = 0;
CUDA_CHECK(cudaBindTextureToArray(&texTableAscanIndexFloat1_2, deviceTextureAscanIndexFloatCuArray[2], &texChannelDescTableAscanIndexFloat));
}
if (TableAscanIndexAllocationCount > 3)
{
texTableAscanIndexFloat1_3.addressMode[0] = cudaAddressModeClamp; // Texturreferenz beschreiben
texTableAscanIndexFloat1_3.addressMode[1] = cudaAddressModeClamp;
texTableAscanIndexFloat1_3.addressMode[2] = cudaAddressModeClamp;
switch (SAFT_VARIANT[SAFT_VARIANT_3DVolumeInterpolationAtReconstruction])
{
case 0:
texTableAscanIndexFloat1_3.filterMode = cudaFilterModePoint;
break;
case 1:
texTableAscanIndexFloat1_3.filterMode = cudaFilterModeLinear;
break;
}
texTableAscanIndexFloat1_3.normalized = 0;
CUDA_CHECK(cudaBindTextureToArray(&texTableAscanIndexFloat1_3, deviceTextureAscanIndexFloatCuArray[3], &texChannelDescTableAscanIndexFloat));
}
}
else if (ATTMode_3DVolume == true)
{ // ========= 3DVolume Mode with ATT-Correction
cudaChannelFormatDesc texChannelDescTableAscanIndexFloat = cudaCreateChannelDesc<float2>(); // Should do the same
// AscanIndex Path Tables ------------------------------------------------------
texTableAscanIndexFloat2_0.addressMode[0] = cudaAddressModeClamp; // Texturreferenz beschreiben
texTableAscanIndexFloat2_0.addressMode[1] = cudaAddressModeClamp;
texTableAscanIndexFloat2_0.addressMode[2] = cudaAddressModeClamp;
switch (SAFT_VARIANT[SAFT_VARIANT_3DVolumeInterpolationAtReconstruction])
{
case 0:
texTableAscanIndexFloat2_0.filterMode = cudaFilterModePoint;
break;
case 1:
texTableAscanIndexFloat2_0.filterMode = cudaFilterModeLinear;
break;
}
texTableAscanIndexFloat2_0.normalized = 0;
CUDA_CHECK(cudaBindTextureToArray(&texTableAscanIndexFloat2_0, deviceTextureAscanIndexFloatCuArray[0], &texChannelDescTableAscanIndexFloat));
if (TableAscanIndexAllocationCount > 1)
{ // TODO: mit Arrays flexibel programmieren!!!
texTableAscanIndexFloat2_1.addressMode[0] = cudaAddressModeClamp; // Texturreferenz beschreiben
texTableAscanIndexFloat2_1.addressMode[1] = cudaAddressModeClamp;
texTableAscanIndexFloat2_1.addressMode[2] = cudaAddressModeClamp;
switch (SAFT_VARIANT[SAFT_VARIANT_3DVolumeInterpolationAtReconstruction])
{
case 0:
texTableAscanIndexFloat2_1.filterMode = cudaFilterModePoint;
break;
case 1:
texTableAscanIndexFloat2_1.filterMode = cudaFilterModeLinear;
break;
}
texTableAscanIndexFloat2_1.normalized = 0;
CUDA_CHECK(cudaBindTextureToArray(&texTableAscanIndexFloat2_1, deviceTextureAscanIndexFloatCuArray[1], &texChannelDescTableAscanIndexFloat));
}
if (TableAscanIndexAllocationCount > 2)
{
texTableAscanIndexFloat2_2.addressMode[0] = cudaAddressModeClamp; // Texturreferenz beschreiben
texTableAscanIndexFloat2_2.addressMode[1] = cudaAddressModeClamp;
texTableAscanIndexFloat2_2.addressMode[2] = cudaAddressModeClamp;
switch (SAFT_VARIANT[SAFT_VARIANT_3DVolumeInterpolationAtReconstruction])
{
case 0:
texTableAscanIndexFloat2_2.filterMode = cudaFilterModePoint;
break;
case 1:
texTableAscanIndexFloat2_2.filterMode = cudaFilterModeLinear;
break;
}
texTableAscanIndexFloat2_2.normalized = 0;
CUDA_CHECK(cudaBindTextureToArray(&texTableAscanIndexFloat2_2, deviceTextureAscanIndexFloatCuArray[2], &texChannelDescTableAscanIndexFloat));
}
if (TableAscanIndexAllocationCount > 3)
{
texTableAscanIndexFloat2_3.addressMode[0] = cudaAddressModeClamp; // Texturreferenz beschreiben
texTableAscanIndexFloat2_3.addressMode[1] = cudaAddressModeClamp;
texTableAscanIndexFloat2_3.addressMode[2] = cudaAddressModeClamp;
switch (SAFT_VARIANT[SAFT_VARIANT_3DVolumeInterpolationAtReconstruction])
{
case 0:
texTableAscanIndexFloat2_3.filterMode = cudaFilterModePoint;
break;
case 1:
texTableAscanIndexFloat2_3.filterMode = cudaFilterModeLinear;
break;
}
texTableAscanIndexFloat2_3.normalized = 0;
CUDA_CHECK(cudaBindTextureToArray(&texTableAscanIndexFloat2_3, deviceTextureAscanIndexFloatCuArray[3], &texChannelDescTableAscanIndexFloat));
}
}
// Vorberechnung der Koordinaten --> schnelleres Bestimmen der Voxelposition
float VoxelIncrement = IMAGE_RESOLUTION / SOS_RESOLUTION;
float3 SosVoxelStartPosition;
SosVoxelStartPosition.x = (regionOfInterestOffset.x - sosOffset.x) / SOS_RESOLUTION; // Start des Bildes im SOS-Grid aus Positionsdaten bestimmen
SosVoxelStartPosition.y = (regionOfInterestOffset.y - sosOffset.y) / SOS_RESOLUTION;
SosVoxelStartPosition.z = (regionOfInterestOffset.z - sosOffset.z) / SOS_RESOLUTION;
// printf("\n\n SosVoxelStartPosition [%f %f %f]\n",SosVoxelStartPosition.x,SosVoxelStartPosition.y,SosVoxelStartPosition.z);
// Anzahl der Teiltabellen, bereits hier berechnen oder übergeben
int TableAscanIndexAllocationCount = ceil((float)aScanWindowSize / (float)maxAscanIndexArraysInTexture); // float is important due to ceiling
// printf( "TableAscanIndexAllocationCount = (%i/%i) = %i = %i\n", aScanWindowSize, maxAscanIndexArraysInTexture, TableAscanIndexAllocationCount,
// ceil(aScanWindowSize/maxAscanIndexArraysInTexture));
// Call of 3 SAFT Versions - AscanIndex-Varainten
// #####################################################################################################################################################################
// ==================================================== Blockmode with SOS-value per Ascan
// ==================================================== 3DVolume Mode with SOS-Correction no ATT-Correction
// ==================================================== 3DVolume Mode with SOS- and ATT-Correction
if ((SOSMode_3DVolume == false) && (ATTMode_3DVolume == false))
{ // ==================================================== Blockmode with SOS-value per Ascan
printf("\n\n --- SAFT (AscanIndex) without SOS currently not implemented --- \n");
}
else if ((SOSMode_3DVolume == true) && (ATTMode_3DVolume == false))
{
// ==================================================== 3DVolume Mode with SOS-Correction no ATT-Correction
saftKernelAscanIndex_SOS<<<reducedGridDimensions, reducedBlockDimensions>>>( // , 0, stream>>>(
aScanIndex,
(float)aScanWindowSize, // maxAscanIndexArraysInTexture
maxAscanIndexArraysInTexture, // maxSoSReceiverArrayForTexture
TableAscanIndexAllocationCount, // Anzahl der genutzten Teiltabellen
IMAGE_SIZE_XYZ, SosVoxelStartPosition, IMAGE_RESOLUTION, VoxelIncrement, blockIndexOffset, speedOfSoundZLayer, gridDimensions, blockDimensions, deviceOutput);
}
else if ((SOSMode_3DVolume == true) && (ATTMode_3DVolume == true))
{ // ==================================================== 3DVolume Mode with SOS- and ATT-Correction
saftKernelAscanIndex_SOS_ATT<<<reducedGridDimensions, reducedBlockDimensions>>>( // , 0, stream>>>(
aScanIndex,
(float)aScanWindowSize, // maxAscanIndexArraysInTexture
maxAscanIndexArraysInTexture, // maxSoSReceiverArrayForTexture
TableAscanIndexAllocationCount, // Anzahl der genutzten Teiltabellen
IMAGE_SIZE_XYZ, SosVoxelStartPosition, IMAGE_RESOLUTION, VoxelIncrement,
blockIndexOffset, speedOfSoundZLayer, gridDimensions, blockDimensions,
debugMode, debugModeParameter, deviceSAFT_VARIANT,
deviceOutput
);
}
// Unbind Textures
CUDA_CHECK(cudaUnbindTexture(&texRefAscans));
if (ATTMode_3DVolume == false)
{ // ========= 3DVolume Mode without ATT-Correction
CUDA_CHECK(cudaUnbindTexture(&texTableAscanIndexFloat1_0));
CUDA_CHECK(cudaUnbindTexture(&texTableAscanIndexFloat1_1));
CUDA_CHECK(cudaUnbindTexture(&texTableAscanIndexFloat1_2));
CUDA_CHECK(cudaUnbindTexture(&texTableAscanIndexFloat1_3));
}
else if (ATTMode_3DVolume == true)
{ // ========= 3DVolume Mode with ATT-Correction
CUDA_CHECK(cudaUnbindTexture(&texTableAscanIndexFloat2_0));
CUDA_CHECK(cudaUnbindTexture(&texTableAscanIndexFloat2_1));
CUDA_CHECK(cudaUnbindTexture(&texTableAscanIndexFloat2_2));
CUDA_CHECK(cudaUnbindTexture(&texTableAscanIndexFloat2_3));
}
CUDA_CHECK(cudaGetLastError());
}