618 lines
38 KiB
Plaintext
618 lines
38 KiB
Plaintext
#include "precalculateSpeedOfSoundKernel.cuh"
|
|
#include "rayTracing.cuh"
|
|
#include "saftKernel.cuh"
|
|
#include "saft.hpp"
|
|
|
|
void SAFTHandler::precalculateAscanIndex_usePaths(int ascanIndex_i, int aScanWindowSize, int currentSpeedOfSoundZLayer, int maxFeasibleSosZLayerCount)
|
|
{
|
|
cudaChannelFormatDesc texChannelDescTableVoxelToEmRecPathSosBoth = cudaCreateChannelDesc(32, 32, 32, 32,
|
|
cudaChannelFormatKindFloat); // Schritt 2.1 Output-Kanal anlegen und
|
|
// beschreiben - Float4
|
|
// Both Emitter Path Tables
|
|
// --------------------------------------------------------
|
|
texTableVoxelToEmitterPathSosBoth_preprocess.addressMode[0] = cudaAddressModeClamp; // Texturreferenz beschreiben
|
|
texTableVoxelToEmitterPathSosBoth_preprocess.addressMode[1] = cudaAddressModeClamp;
|
|
texTableVoxelToEmitterPathSosBoth_preprocess.addressMode[2] = cudaAddressModeClamp;
|
|
switch (SAFT_VARIANT[SAFT_VARIANT_3DVolumeInterpolationAtReconstruction])
|
|
{
|
|
case 0:
|
|
texTableVoxelToEmitterPathSosBoth_preprocess.filterMode = cudaFilterModePoint;
|
|
break;
|
|
case 1:
|
|
texTableVoxelToEmitterPathSosBoth_preprocess.filterMode = cudaFilterModeLinear;
|
|
break;
|
|
}
|
|
texTableVoxelToEmitterPathSosBoth_preprocess.normalized = 0;
|
|
CUDA_CHECK(cudaBindTextureToArray(&texTableVoxelToEmitterPathSosBoth_preprocess, deviceTableVoxelToEmPathSosBothCuArray, &texChannelDescTableVoxelToEmRecPathSosBoth));
|
|
|
|
// Texturmemory fuer Receiver - SosPathsTables
|
|
// ===================================================================================================================
|
|
// Both Receiver Path Tables
|
|
// ------------------------------------------------------
|
|
texTableVoxelToReceiverPathSosBoth0_preprocess.addressMode[0] = cudaAddressModeClamp; // Texturreferenz beschreiben
|
|
texTableVoxelToReceiverPathSosBoth0_preprocess.addressMode[1] = cudaAddressModeClamp;
|
|
texTableVoxelToReceiverPathSosBoth0_preprocess.addressMode[2] = cudaAddressModeClamp;
|
|
switch (SAFT_VARIANT[SAFT_VARIANT_3DVolumeInterpolationAtReconstruction])
|
|
{
|
|
case 0:
|
|
texTableVoxelToReceiverPathSosBoth0_preprocess.filterMode = cudaFilterModePoint;
|
|
break;
|
|
case 1:
|
|
texTableVoxelToReceiverPathSosBoth0_preprocess.filterMode = cudaFilterModeLinear;
|
|
break;
|
|
}
|
|
texTableVoxelToReceiverPathSosBoth0_preprocess.normalized = 0;
|
|
|
|
CUDA_CHECK(cudaBindTextureToArray(&texTableVoxelToReceiverPathSosBoth0_preprocess, deviceTableVoxelToRecPathSosBothCuArray[0], &texChannelDescTableVoxelToEmRecPathSosBoth));
|
|
|
|
if (TableVoxelToReceiverPathSosAllocationCount > 1)
|
|
{ // TODO: mit Arrays flexibel programmieren, wenn moeglich!!!
|
|
texTableVoxelToReceiverPathSosBoth1_preprocess.addressMode[0] = cudaAddressModeClamp; // Texturreferenz beschreiben
|
|
texTableVoxelToReceiverPathSosBoth1_preprocess.addressMode[1] = cudaAddressModeClamp;
|
|
texTableVoxelToReceiverPathSosBoth1_preprocess.addressMode[2] = cudaAddressModeClamp;
|
|
switch (SAFT_VARIANT[SAFT_VARIANT_3DVolumeInterpolationAtReconstruction])
|
|
{
|
|
case 0:
|
|
texTableVoxelToReceiverPathSosBoth1_preprocess.filterMode = cudaFilterModePoint;
|
|
break;
|
|
case 1:
|
|
texTableVoxelToReceiverPathSosBoth1_preprocess.filterMode = cudaFilterModeLinear;
|
|
break;
|
|
}
|
|
texTableVoxelToReceiverPathSosBoth1_preprocess.normalized = 0;
|
|
|
|
CUDA_CHECK(cudaBindTextureToArray(&texTableVoxelToReceiverPathSosBoth1_preprocess, deviceTableVoxelToRecPathSosBothCuArray[1], &texChannelDescTableVoxelToEmRecPathSosBoth));
|
|
}
|
|
|
|
if (TableVoxelToReceiverPathSosAllocationCount > 2)
|
|
{
|
|
texTableVoxelToReceiverPathSosBoth2_preprocess.addressMode[0] = cudaAddressModeClamp; // Texturreferenz beschreiben
|
|
texTableVoxelToReceiverPathSosBoth2_preprocess.addressMode[1] = cudaAddressModeClamp;
|
|
texTableVoxelToReceiverPathSosBoth2_preprocess.addressMode[2] = cudaAddressModeClamp;
|
|
switch (SAFT_VARIANT[SAFT_VARIANT_3DVolumeInterpolationAtReconstruction])
|
|
{
|
|
case 0:
|
|
texTableVoxelToReceiverPathSosBoth2_preprocess.filterMode = cudaFilterModePoint;
|
|
break;
|
|
case 1:
|
|
texTableVoxelToReceiverPathSosBoth2_preprocess.filterMode = cudaFilterModeLinear;
|
|
break;
|
|
}
|
|
texTableVoxelToReceiverPathSosBoth2_preprocess.normalized = 0;
|
|
|
|
CUDA_CHECK(cudaBindTextureToArray(&texTableVoxelToReceiverPathSosBoth2_preprocess, deviceTableVoxelToRecPathSosBothCuArray[2], &texChannelDescTableVoxelToEmRecPathSosBoth));
|
|
}
|
|
|
|
dim3 threadsPerBlock(SOSGrid_XYZ.x, 1, 1);
|
|
dim3 blocksPerGrid(1, 1, 1);
|
|
blocksPerGrid.x = SOSGrid_XYZ.y;
|
|
blocksPerGrid.y = maxFeasibleSosZLayerCount;
|
|
blocksPerGrid.z = 1;
|
|
|
|
// Step 2. Bereite Output-Textur fuer AscanIndex vor
|
|
|
|
if (TableAscanIndexAllocationCount > 0)
|
|
{
|
|
cudaBindSurfaceToArray(outSurfRefAscanIndexFloat0, deviceTextureAscanIndexFloatCuArray[0]);
|
|
}
|
|
if (TableAscanIndexAllocationCount > 1)
|
|
{
|
|
cudaBindSurfaceToArray(outSurfRefAscanIndexFloat1, deviceTextureAscanIndexFloatCuArray[1]);
|
|
}
|
|
if (TableAscanIndexAllocationCount > 2)
|
|
{
|
|
cudaBindSurfaceToArray(outSurfRefAscanIndexFloat2, deviceTextureAscanIndexFloatCuArray[2]);
|
|
}
|
|
if (TableAscanIndexAllocationCount > 3)
|
|
{
|
|
cudaBindSurfaceToArray(outSurfRefAscanIndexFloat3, deviceTextureAscanIndexFloatCuArray[3]);
|
|
}
|
|
|
|
// Step 3. Fuehre Kernel aus mit #Threads: SOS.x*SOS.y . Innerhalb werden
|
|
// immer 1024/2048 A-Scans durchlaufen und in AscanIndex-Textur geschrieben
|
|
|
|
if ((SOSMode_3DVolume == false) && (ATTMode_3DVolume == false))
|
|
{ // ====================================================
|
|
// Blockmode with SOS-value per Ascan
|
|
|
|
precalculateAscanIndex_usePathsKernel<<<blocksPerGrid, threadsPerBlock>>>(ascanIndex_i, ///< Offset of AscanIndex batch (bei mehreren Aufrufen)
|
|
aScanWindowSize, // aktuelle Anzahl der Ascans, die maximal
|
|
// vorberechnet werden können
|
|
deviceSosAttFieldCuArray,
|
|
currentSpeedOfSoundZLayer, ///< First z-layer in the speed of sound
|
|
///< grid the pre-calculation is performed
|
|
///< for.
|
|
maxFeasibleSosZLayerCount, ///< Number of z-layers in the speed of
|
|
///< sound grid the pre-calculation is
|
|
///< performed for.
|
|
// currentEmIndexUsedForAscanIndexCalculation, ///< current Index of Em
|
|
// for which the AscanIndex is calculated
|
|
|
|
maxSoSReceiverArrayForTexture,
|
|
|
|
deviceEmitterIndex_block, // Speicheradresse fuer EmitterIndexdaten
|
|
deviceReceiverIndex_block, // Speicheradresse fuer ReceiverIndexdaten
|
|
|
|
TableAscanIndexAllocationCount, ///< Anzahl der benoetigten AscanBlocks
|
|
///< der Groesse 2048/4096
|
|
maxAscanIndexArraysInTexture, ///< maximale Anzahl an Em/Rec in einem
|
|
///< CUDA Array (fest definiert fuer
|
|
///< bestimmung welche Textur genutzt
|
|
///< wird)
|
|
|
|
deviceTextureAscanIndexFloatCuArray, ///< Out: Sum of SoS samples in
|
|
///< the path from transducer to
|
|
///< voxel.
|
|
|
|
SOSGrid_XYZ, sosOffset, regionOfInterestOffset, IMAGE_RESOLUTION, SOS_RESOLUTION, debugMode, debugModeParameter,
|
|
deviceSAFT_VARIANT);
|
|
}
|
|
else if ((SOSMode_3DVolume == true) && (ATTMode_3DVolume == false))
|
|
{ // ====================================================
|
|
// 3DVolume Mode with SOS-Correction no ATT-Correction
|
|
|
|
precalculateAscanIndex_usePathsKernel_SOS<<<blocksPerGrid,
|
|
threadsPerBlock>>>(ascanIndex_i, ///< Offset of AscanIndex batch (bei mehreren Aufrufen)
|
|
aScanWindowSize, // aktuelle Anzahl der Ascans, die maximal
|
|
// vorberechnet werden können
|
|
deviceSosAttFieldCuArray,
|
|
currentSpeedOfSoundZLayer, ///< First z-layer in the speed of sound
|
|
///< grid the pre-calculation is performed
|
|
///< for.
|
|
maxFeasibleSosZLayerCount, ///< Number of z-layers in the speed of
|
|
///< sound grid the pre-calculation is
|
|
///< performed for.
|
|
// currentEmIndexUsedForAscanIndexCalculation, ///< current
|
|
// Index of Em for which the AscanIndex is calculated
|
|
|
|
maxSoSReceiverArrayForTexture,
|
|
|
|
deviceEmitterIndex_block, // Speicheradresse fuer EmitterIndexdaten
|
|
deviceReceiverIndex_block, // Speicheradresse fuer ReceiverIndexdaten
|
|
|
|
TableAscanIndexAllocationCount, ///< Anzahl der benoetigten AscanBlocks
|
|
///< der Groesse 2048/4096
|
|
maxAscanIndexArraysInTexture, ///< maximale Anzahl an Em/Rec in einem
|
|
///< CUDA Array (fest definiert fuer
|
|
///< bestimmung welche Textur genutzt
|
|
///< wird)
|
|
|
|
deviceTextureAscanIndexFloatCuArray, ///< Out: Sum of SoS samples in
|
|
///< the path from transducer to
|
|
///< voxel.
|
|
|
|
SOSGrid_XYZ, sosOffset, regionOfInterestOffset, IMAGE_RESOLUTION, SOS_RESOLUTION, debugMode, debugModeParameter,
|
|
deviceSAFT_VARIANT);
|
|
}
|
|
else if ((SOSMode_3DVolume == true) && (ATTMode_3DVolume == true))
|
|
{ // ====================================================
|
|
// 3DVolume Mode with SOS- and ATT-Correction
|
|
|
|
precalculateAscanIndex_usePathsKernel_SOS_ATT<<<blocksPerGrid,
|
|
threadsPerBlock>>>(ascanIndex_i, ///< Offset of AscanIndex batch (bei mehreren Aufrufen)
|
|
aScanWindowSize, // aktuelle Anzahl der Ascans, die maximal
|
|
// vorberechnet werden können
|
|
deviceSosAttFieldCuArray,
|
|
currentSpeedOfSoundZLayer, ///< First z-layer in the speed of sound
|
|
///< grid the pre-calculation is performed
|
|
///< for.
|
|
maxFeasibleSosZLayerCount, ///< Number of z-layers in the speed of
|
|
///< sound grid the pre-calculation is
|
|
///< performed for.
|
|
// currentEmIndexUsedForAscanIndexCalculation, ///< current
|
|
// Index of Em for which the AscanIndex is calculated
|
|
maxSoSReceiverArrayForTexture,
|
|
deviceEmitterIndex_block, // Speicheradresse fuer EmitterIndexdaten
|
|
deviceReceiverIndex_block, // Speicheradresse fuer ReceiverIndexdaten
|
|
TableAscanIndexAllocationCount, ///< Anzahl der benoetigten AscanBlocks
|
|
///< der Groesse 2048/4096
|
|
maxAscanIndexArraysInTexture, ///< maximale Anzahl an Em/Rec in einem
|
|
///< CUDA Array (fest definiert fuer
|
|
///< bestimmung welche Textur genutzt
|
|
///< wird)
|
|
deviceTextureAscanIndexFloatCuArray, ///< Out: Sum of SoS samples in
|
|
///< the path from transducer to
|
|
///< voxel.
|
|
SOSGrid_XYZ, sosOffset, regionOfInterestOffset, IMAGE_RESOLUTION, SOS_RESOLUTION, debugMode, debugModeParameter,
|
|
deviceSAFT_VARIANT);
|
|
}
|
|
|
|
CUDA_CHECK(cudaGetLastError());
|
|
|
|
// ==================================================== cudaUnbindTexture
|
|
// Texturmemory fuer Emitter - SosPathsTables entbinden
|
|
CUDA_CHECK(cudaUnbindTexture(&texTableVoxelToEmitterPathSosBoth_preprocess));
|
|
// Texturmemory fuer Receiver - SosPathsTables entbinden
|
|
CUDA_CHECK(cudaUnbindTexture(&texTableVoxelToReceiverPathSosBoth0_preprocess));
|
|
CUDA_CHECK(cudaUnbindTexture(&texTableVoxelToReceiverPathSosBoth1_preprocess));
|
|
CUDA_CHECK(cudaUnbindTexture(&texTableVoxelToReceiverPathSosBoth2_preprocess));
|
|
}
|
|
|
|
void SAFTHandler::precalculateAverageSpeedOfSound(int firstZLayer, int sosZLayerCount, int deviceListGeometry, int geometryElementCount)
|
|
{
|
|
dim3 threadsPerBlock(SOSGrid_XYZ.x, 1,
|
|
1); // max. 512 oder 1024 Threads werden vorgegeben und
|
|
// dim3 threadsPerBlock (SOSGrid_XYZ.x,SOSGrid_XYZ.y,1); // max. 512 oder
|
|
// 1024 Threads werden vorgegeben und
|
|
dim3 blocksPerGrid(1, 1, 1); // max. 65.535 Bloecke im Grid
|
|
// berechnet. Initialisierung
|
|
blocksPerGrid.x = SOSGrid_XYZ.y;
|
|
blocksPerGrid.y = sosZLayerCount;
|
|
blocksPerGrid.z = 1;
|
|
|
|
cudaChannelFormatDesc texChannelDescSosAttField = cudaCreateChannelDesc(32, 32, 0, 0,
|
|
cudaChannelFormatKindFloat); // Schritt 2.1 Output-Kanal
|
|
// anlegen und beschreiben
|
|
|
|
texRefSosAttField.addressMode[0] = cudaAddressModeClamp; // Texturreferenz beschreiben
|
|
texRefSosAttField.addressMode[1] = cudaAddressModeClamp;
|
|
texRefSosAttField.addressMode[2] = cudaAddressModeClamp;
|
|
|
|
if (SAFT_VARIANT[SAFT_VARIANT_3DVolumeInterpolationAtPreprocessing] == 1)
|
|
{
|
|
texRefSosAttField.filterMode = cudaFilterModeLinear; // Lineare Interpolation
|
|
}
|
|
else
|
|
{
|
|
texRefSosAttField.filterMode = cudaFilterModePoint; // Nearest Neighbor
|
|
}
|
|
texRefSosAttField.normalized = 0;
|
|
|
|
CUDA_CHECK(cudaBindTextureToArray(&texRefSosAttField, deviceSosAttFieldCuArray,
|
|
&texChannelDescSosAttField)); // Schritt 4.1 3DArray an Texturmemory
|
|
// binden
|
|
|
|
if (deviceListGeometry == 0)
|
|
{
|
|
cudaBindSurfaceToArray(outSurfRefTableVoxelToEmPathSosBoth, deviceTableVoxelToEmPathSosBothCuArray);
|
|
}
|
|
|
|
if (deviceListGeometry == 1)
|
|
{
|
|
if (TableVoxelToReceiverPathSosAllocationCount > 0)
|
|
{
|
|
cudaBindSurfaceToArray(outSurfRefTableVoxelToRecPathSosBoth0, deviceTableVoxelToRecPathSosBothCuArray[0]);
|
|
}
|
|
if (TableVoxelToReceiverPathSosAllocationCount > 1)
|
|
{
|
|
cudaBindSurfaceToArray(outSurfRefTableVoxelToRecPathSosBoth1, deviceTableVoxelToRecPathSosBothCuArray[1]);
|
|
}
|
|
if (TableVoxelToReceiverPathSosAllocationCount > 2)
|
|
{
|
|
cudaBindSurfaceToArray(outSurfRefTableVoxelToRecPathSosBoth2, deviceTableVoxelToRecPathSosBothCuArray[2]);
|
|
}
|
|
}
|
|
|
|
precalculateAverageSpeedOfSoundKernel<<<blocksPerGrid, threadsPerBlock>>>(firstZLayer, sosZLayerCount, deviceListGeometry, geometryElementCount,
|
|
maxSoSReceiverArrayForTexture,
|
|
sosOffset, regionOfInterestOffset, IMAGE_RESOLUTION, SOS_RESOLUTION, debugMode, debugModeParameter);
|
|
CUDA_CHECK(cudaGetLastError());
|
|
|
|
CUDA_CHECK(cudaUnbindTexture(&texRefSosAttField));
|
|
}
|
|
|
|
void SAFTHandler::fillCuArray(float useValue,
|
|
cudaArray **deviceTextureAscanIndexFloatCuArray, ///< CuArray to fill
|
|
int TableAscanIndexAllocationCount)
|
|
{
|
|
dim3 threadsPerBlock(SOSGrid_XYZ.x, 1,
|
|
1); // determine neccessary amount of threads
|
|
// // max. 512 oder 1024
|
|
dim3 blocksPerGrid(1, 1,
|
|
1); // determine neccessary amount of blocks in grid // max. 65.535
|
|
blocksPerGrid.x = SOSGrid_XYZ.y;
|
|
blocksPerGrid.y = maxFeasibleSosZLayerCount;
|
|
blocksPerGrid.z = 1;
|
|
|
|
// Step 1. Bereite Output-Textur fuer AscanIndex vor
|
|
if (TableAscanIndexAllocationCount > 0)
|
|
{
|
|
cudaBindSurfaceToArray(outSurfRefAscanIndexFloat0, deviceTextureAscanIndexFloatCuArray[0]);
|
|
}
|
|
if (TableAscanIndexAllocationCount > 1)
|
|
{
|
|
cudaBindSurfaceToArray(outSurfRefAscanIndexFloat1, deviceTextureAscanIndexFloatCuArray[1]);
|
|
}
|
|
if (TableAscanIndexAllocationCount > 2)
|
|
{
|
|
cudaBindSurfaceToArray(outSurfRefAscanIndexFloat2, deviceTextureAscanIndexFloatCuArray[2]);
|
|
}
|
|
if (TableAscanIndexAllocationCount > 3)
|
|
{
|
|
cudaBindSurfaceToArray(outSurfRefAscanIndexFloat3, deviceTextureAscanIndexFloatCuArray[3]);
|
|
}
|
|
|
|
// Step 2. Fuere Kernel aus mit #Threads: SOS.x*SOS.y . Innerhalb werden immer
|
|
// 1024/2048 A-Scans durchgegangen und in AscanIndex-Textur geschrieben
|
|
fillCuArrayKernel<<<blocksPerGrid, threadsPerBlock>>>(useValue,
|
|
deviceTextureAscanIndexFloatCuArray, ///< Out: Sum of SoS
|
|
///< samples in the path
|
|
///< from transducer to
|
|
///< voxel.
|
|
maxAscanIndexArraysInTexture,
|
|
TableAscanIndexAllocationCount, ///< Amount of Surfaces in
|
|
///< the Array of cuArrays
|
|
maxFeasibleSosZLayerCount, ATTMode_3DVolume, debugMode, debugModeParameter);
|
|
|
|
CUDA_CHECK(cudaGetLastError());
|
|
}
|
|
|
|
void SAFTHandler::performSAFT(
|
|
int aScanIndex, ///< The A-scan index is increased by the A-scan batch size in every iteration. It describes the offset into the A-scan samples the SAFT kernel is operating with.
|
|
size_t aScanWindowSize, ///< A-scan batch size in terms of number of samples within one window.
|
|
int3 IMAGE_SIZE_XYZ, ///< Bildbereichsgroesse/ROI in Voxel
|
|
int3 SOSGrid_XYZ, ///< SoSGridgroesse in Voxel
|
|
int blockIndexOffset, ///< Additional offset added to the z component of the block index, required because of the adjustments for partial reconstruction in different z-layers.
|
|
int outputWindowVoxelCount, ///< Number of Voxels in the output window.
|
|
int speedOfSoundZLayer, ///< current SoS z-layer Offset in the speed of sound grid.
|
|
int speedOfSoundVoxelsWithinZLayers, ///< Number of z-layers in the speed of sound grid touched by the z-layers of the active zone of reconstruction in the region of interest.
|
|
int maxFeasibleSosZLayerCount,
|
|
int currentEmIndexUsedForAscanIndexCalculation, ///< current Index of Em for which the AscanIndex is calculated
|
|
dim3 const &windowGridDimensions, ///< Grid dimensions to be used to launch the SAFT kernel. It is smaller than the full grid dimensions and only represents the current reconstruction window.
|
|
dim3 const &gridDimensions, ///< Full grid dimensions of the reconstruction.
|
|
dim3 const &blockDimensions, ///< Block dimensions to be used with the SAFT kernel.
|
|
float *deviceSpeedOfSoundField, ///< Pointer to SoSGrid.
|
|
cudaArray *deviceAScansCuArray
|
|
// cudaStream_t stream ///< Stream to execute the SAFT kernel on.
|
|
)
|
|
{
|
|
|
|
dim3 reducedGridDimensions, reducedBlockDimensions;
|
|
|
|
reduceKernelDimensions(windowGridDimensions, blockDimensions, reducedGridDimensions, reducedBlockDimensions);
|
|
|
|
CUDA_CHECK(cudaFuncSetCacheConfig(saftKernelAscanIndex_SOS_ATT, cudaFuncCachePreferL1));
|
|
CUDA_CHECK(cudaFuncSetCacheConfig(saftKernelAscanIndex_SOS, cudaFuncCachePreferL1));
|
|
CUDA_CHECK(cudaFuncSetCacheConfig(saftKernelAscanIndex, cudaFuncCachePreferL1));
|
|
|
|
// Texture Memory Adressing-mode // http://docs.nvidia.com/cuda/pdf/CUDA_C_Programming_Guide.pdf -> 3.2.11.1. Texture Memory S. 42
|
|
// cudaAddressModeClamp - Return values at the boarders if out-of range - default
|
|
// cudaAddressModeBorder - Return 0 if out-of range
|
|
// cudaAddressModeMirror - Mirror the values - For normalized coordinates
|
|
// cudaAddressModeWrap - Repeating the values - For normalized coordinates
|
|
|
|
// Texturmemory fuer Ascans
|
|
cudaChannelFormatDesc texChannelDesc = cudaCreateChannelDesc(32, 0, 0, 0, cudaChannelFormatKindFloat); // Beschreibung des RueckgabeFormats der Textur
|
|
texRefAscans.addressMode[0] = cudaAddressModeBorder; // Texturreferenz beschreiben
|
|
texRefAscans.addressMode[1] = cudaAddressModeBorder;
|
|
|
|
if (SAFT_VARIANT[SAFT_VARIANT_AscanInterpolation] == 1)
|
|
{
|
|
texRefAscans.filterMode = cudaFilterModeLinear; // Lineare Interpolation
|
|
}
|
|
else
|
|
{
|
|
texRefAscans.filterMode = cudaFilterModePoint; // Nearest Neighbor
|
|
}
|
|
|
|
texRefAscans.normalized = 0;
|
|
CUDA_CHECK(cudaBindTextureToArray(&texRefAscans, deviceAScansCuArray, &texChannelDesc));
|
|
|
|
if (ATTMode_3DVolume == false)
|
|
{ // ========= 3DVolume Mode without ATT-Correction
|
|
|
|
cudaChannelFormatDesc texChannelDescTableAscanIndexFloat = cudaCreateChannelDesc<float>(); // Should do the same
|
|
|
|
// AscanIndex Path Tables ------------------------------------------------------
|
|
texTableAscanIndexFloat1_0.addressMode[0] = cudaAddressModeClamp; // Texturreferenz beschreiben
|
|
texTableAscanIndexFloat1_0.addressMode[1] = cudaAddressModeClamp;
|
|
texTableAscanIndexFloat1_0.addressMode[2] = cudaAddressModeClamp;
|
|
switch (SAFT_VARIANT[SAFT_VARIANT_3DVolumeInterpolationAtReconstruction])
|
|
{
|
|
case 0:
|
|
texTableAscanIndexFloat1_0.filterMode = cudaFilterModePoint;
|
|
break;
|
|
case 1:
|
|
texTableAscanIndexFloat1_0.filterMode = cudaFilterModeLinear;
|
|
break;
|
|
}
|
|
texTableAscanIndexFloat1_0.normalized = 0;
|
|
|
|
CUDA_CHECK(cudaBindTextureToArray(&texTableAscanIndexFloat1_0, deviceTextureAscanIndexFloatCuArray[0], &texChannelDescTableAscanIndexFloat));
|
|
|
|
if (TableAscanIndexAllocationCount > 1)
|
|
{ // TODO: mit Arrays flexibel programmieren!!!
|
|
texTableAscanIndexFloat1_1.addressMode[0] = cudaAddressModeClamp; // Texturreferenz beschreiben
|
|
texTableAscanIndexFloat1_1.addressMode[1] = cudaAddressModeClamp;
|
|
texTableAscanIndexFloat1_1.addressMode[2] = cudaAddressModeClamp;
|
|
switch (SAFT_VARIANT[SAFT_VARIANT_3DVolumeInterpolationAtReconstruction])
|
|
{
|
|
case 0:
|
|
texTableAscanIndexFloat1_1.filterMode = cudaFilterModePoint;
|
|
break;
|
|
case 1:
|
|
texTableAscanIndexFloat1_1.filterMode = cudaFilterModeLinear;
|
|
break;
|
|
}
|
|
texTableAscanIndexFloat1_1.normalized = 0;
|
|
|
|
CUDA_CHECK(cudaBindTextureToArray(&texTableAscanIndexFloat1_1, deviceTextureAscanIndexFloatCuArray[1], &texChannelDescTableAscanIndexFloat));
|
|
}
|
|
|
|
if (TableAscanIndexAllocationCount > 2)
|
|
{
|
|
texTableAscanIndexFloat1_2.addressMode[0] = cudaAddressModeClamp; // Texturreferenz beschreiben
|
|
texTableAscanIndexFloat1_2.addressMode[1] = cudaAddressModeClamp;
|
|
texTableAscanIndexFloat1_2.addressMode[2] = cudaAddressModeClamp;
|
|
switch (SAFT_VARIANT[SAFT_VARIANT_3DVolumeInterpolationAtReconstruction])
|
|
{
|
|
case 0:
|
|
texTableAscanIndexFloat1_2.filterMode = cudaFilterModePoint;
|
|
break;
|
|
case 1:
|
|
texTableAscanIndexFloat1_2.filterMode = cudaFilterModeLinear;
|
|
break;
|
|
}
|
|
texTableAscanIndexFloat1_2.normalized = 0;
|
|
|
|
CUDA_CHECK(cudaBindTextureToArray(&texTableAscanIndexFloat1_2, deviceTextureAscanIndexFloatCuArray[2], &texChannelDescTableAscanIndexFloat));
|
|
}
|
|
|
|
if (TableAscanIndexAllocationCount > 3)
|
|
{
|
|
texTableAscanIndexFloat1_3.addressMode[0] = cudaAddressModeClamp; // Texturreferenz beschreiben
|
|
texTableAscanIndexFloat1_3.addressMode[1] = cudaAddressModeClamp;
|
|
texTableAscanIndexFloat1_3.addressMode[2] = cudaAddressModeClamp;
|
|
switch (SAFT_VARIANT[SAFT_VARIANT_3DVolumeInterpolationAtReconstruction])
|
|
{
|
|
case 0:
|
|
texTableAscanIndexFloat1_3.filterMode = cudaFilterModePoint;
|
|
break;
|
|
case 1:
|
|
texTableAscanIndexFloat1_3.filterMode = cudaFilterModeLinear;
|
|
break;
|
|
}
|
|
texTableAscanIndexFloat1_3.normalized = 0;
|
|
|
|
CUDA_CHECK(cudaBindTextureToArray(&texTableAscanIndexFloat1_3, deviceTextureAscanIndexFloatCuArray[3], &texChannelDescTableAscanIndexFloat));
|
|
}
|
|
}
|
|
else if (ATTMode_3DVolume == true)
|
|
{ // ========= 3DVolume Mode with ATT-Correction
|
|
|
|
cudaChannelFormatDesc texChannelDescTableAscanIndexFloat = cudaCreateChannelDesc<float2>(); // Should do the same
|
|
|
|
// AscanIndex Path Tables ------------------------------------------------------
|
|
texTableAscanIndexFloat2_0.addressMode[0] = cudaAddressModeClamp; // Texturreferenz beschreiben
|
|
texTableAscanIndexFloat2_0.addressMode[1] = cudaAddressModeClamp;
|
|
texTableAscanIndexFloat2_0.addressMode[2] = cudaAddressModeClamp;
|
|
switch (SAFT_VARIANT[SAFT_VARIANT_3DVolumeInterpolationAtReconstruction])
|
|
{
|
|
case 0:
|
|
texTableAscanIndexFloat2_0.filterMode = cudaFilterModePoint;
|
|
break;
|
|
case 1:
|
|
texTableAscanIndexFloat2_0.filterMode = cudaFilterModeLinear;
|
|
break;
|
|
}
|
|
texTableAscanIndexFloat2_0.normalized = 0;
|
|
|
|
CUDA_CHECK(cudaBindTextureToArray(&texTableAscanIndexFloat2_0, deviceTextureAscanIndexFloatCuArray[0], &texChannelDescTableAscanIndexFloat));
|
|
|
|
if (TableAscanIndexAllocationCount > 1)
|
|
{ // TODO: mit Arrays flexibel programmieren!!!
|
|
texTableAscanIndexFloat2_1.addressMode[0] = cudaAddressModeClamp; // Texturreferenz beschreiben
|
|
texTableAscanIndexFloat2_1.addressMode[1] = cudaAddressModeClamp;
|
|
texTableAscanIndexFloat2_1.addressMode[2] = cudaAddressModeClamp;
|
|
switch (SAFT_VARIANT[SAFT_VARIANT_3DVolumeInterpolationAtReconstruction])
|
|
{
|
|
case 0:
|
|
texTableAscanIndexFloat2_1.filterMode = cudaFilterModePoint;
|
|
break;
|
|
case 1:
|
|
texTableAscanIndexFloat2_1.filterMode = cudaFilterModeLinear;
|
|
break;
|
|
}
|
|
texTableAscanIndexFloat2_1.normalized = 0;
|
|
|
|
CUDA_CHECK(cudaBindTextureToArray(&texTableAscanIndexFloat2_1, deviceTextureAscanIndexFloatCuArray[1], &texChannelDescTableAscanIndexFloat));
|
|
}
|
|
|
|
if (TableAscanIndexAllocationCount > 2)
|
|
{
|
|
texTableAscanIndexFloat2_2.addressMode[0] = cudaAddressModeClamp; // Texturreferenz beschreiben
|
|
texTableAscanIndexFloat2_2.addressMode[1] = cudaAddressModeClamp;
|
|
texTableAscanIndexFloat2_2.addressMode[2] = cudaAddressModeClamp;
|
|
switch (SAFT_VARIANT[SAFT_VARIANT_3DVolumeInterpolationAtReconstruction])
|
|
{
|
|
case 0:
|
|
texTableAscanIndexFloat2_2.filterMode = cudaFilterModePoint;
|
|
break;
|
|
case 1:
|
|
texTableAscanIndexFloat2_2.filterMode = cudaFilterModeLinear;
|
|
break;
|
|
}
|
|
texTableAscanIndexFloat2_2.normalized = 0;
|
|
|
|
CUDA_CHECK(cudaBindTextureToArray(&texTableAscanIndexFloat2_2, deviceTextureAscanIndexFloatCuArray[2], &texChannelDescTableAscanIndexFloat));
|
|
}
|
|
|
|
if (TableAscanIndexAllocationCount > 3)
|
|
{
|
|
texTableAscanIndexFloat2_3.addressMode[0] = cudaAddressModeClamp; // Texturreferenz beschreiben
|
|
texTableAscanIndexFloat2_3.addressMode[1] = cudaAddressModeClamp;
|
|
texTableAscanIndexFloat2_3.addressMode[2] = cudaAddressModeClamp;
|
|
switch (SAFT_VARIANT[SAFT_VARIANT_3DVolumeInterpolationAtReconstruction])
|
|
{
|
|
case 0:
|
|
texTableAscanIndexFloat2_3.filterMode = cudaFilterModePoint;
|
|
break;
|
|
case 1:
|
|
texTableAscanIndexFloat2_3.filterMode = cudaFilterModeLinear;
|
|
break;
|
|
}
|
|
texTableAscanIndexFloat2_3.normalized = 0;
|
|
|
|
CUDA_CHECK(cudaBindTextureToArray(&texTableAscanIndexFloat2_3, deviceTextureAscanIndexFloatCuArray[3], &texChannelDescTableAscanIndexFloat));
|
|
}
|
|
}
|
|
// Vorberechnung der Koordinaten --> schnelleres Bestimmen der Voxelposition
|
|
float VoxelIncrement = IMAGE_RESOLUTION / SOS_RESOLUTION;
|
|
float3 SosVoxelStartPosition;
|
|
SosVoxelStartPosition.x = (regionOfInterestOffset.x - sosOffset.x) / SOS_RESOLUTION; // Start des Bildes im SOS-Grid aus Positionsdaten bestimmen
|
|
SosVoxelStartPosition.y = (regionOfInterestOffset.y - sosOffset.y) / SOS_RESOLUTION;
|
|
SosVoxelStartPosition.z = (regionOfInterestOffset.z - sosOffset.z) / SOS_RESOLUTION;
|
|
// printf("\n\n SosVoxelStartPosition [%f %f %f]\n",SosVoxelStartPosition.x,SosVoxelStartPosition.y,SosVoxelStartPosition.z);
|
|
|
|
// Anzahl der Teiltabellen, bereits hier berechnen oder übergeben
|
|
int TableAscanIndexAllocationCount = ceil((float)aScanWindowSize / (float)maxAscanIndexArraysInTexture); // float is important due to ceiling
|
|
// printf( "TableAscanIndexAllocationCount = (%i/%i) = %i = %i\n", aScanWindowSize, maxAscanIndexArraysInTexture, TableAscanIndexAllocationCount,
|
|
// ceil(aScanWindowSize/maxAscanIndexArraysInTexture));
|
|
|
|
// Call of 3 SAFT Versions - AscanIndex-Varainten
|
|
// #####################################################################################################################################################################
|
|
// ==================================================== Blockmode with SOS-value per Ascan
|
|
// ==================================================== 3DVolume Mode with SOS-Correction no ATT-Correction
|
|
// ==================================================== 3DVolume Mode with SOS- and ATT-Correction
|
|
|
|
if ((SOSMode_3DVolume == false) && (ATTMode_3DVolume == false))
|
|
{ // ==================================================== Blockmode with SOS-value per Ascan
|
|
printf("\n\n --- SAFT (AscanIndex) without SOS currently not implemented --- \n");
|
|
}
|
|
else if ((SOSMode_3DVolume == true) && (ATTMode_3DVolume == false))
|
|
{
|
|
// ==================================================== 3DVolume Mode with SOS-Correction no ATT-Correction
|
|
saftKernelAscanIndex_SOS<<<reducedGridDimensions, reducedBlockDimensions>>>( // , 0, stream>>>(
|
|
|
|
aScanIndex,
|
|
|
|
(float)aScanWindowSize, // maxAscanIndexArraysInTexture
|
|
maxAscanIndexArraysInTexture, // maxSoSReceiverArrayForTexture
|
|
TableAscanIndexAllocationCount, // Anzahl der genutzten Teiltabellen
|
|
IMAGE_SIZE_XYZ, SosVoxelStartPosition, IMAGE_RESOLUTION, VoxelIncrement, blockIndexOffset, speedOfSoundZLayer, gridDimensions, blockDimensions, deviceOutput);
|
|
}
|
|
else if ((SOSMode_3DVolume == true) && (ATTMode_3DVolume == true))
|
|
{ // ==================================================== 3DVolume Mode with SOS- and ATT-Correction
|
|
|
|
saftKernelAscanIndex_SOS_ATT<<<reducedGridDimensions, reducedBlockDimensions>>>( // , 0, stream>>>(
|
|
aScanIndex,
|
|
(float)aScanWindowSize, // maxAscanIndexArraysInTexture
|
|
maxAscanIndexArraysInTexture, // maxSoSReceiverArrayForTexture
|
|
TableAscanIndexAllocationCount, // Anzahl der genutzten Teiltabellen
|
|
IMAGE_SIZE_XYZ, SosVoxelStartPosition, IMAGE_RESOLUTION, VoxelIncrement,
|
|
blockIndexOffset, speedOfSoundZLayer, gridDimensions, blockDimensions,
|
|
debugMode, debugModeParameter, deviceSAFT_VARIANT,
|
|
deviceOutput
|
|
|
|
);
|
|
}
|
|
|
|
// Unbind Textures
|
|
CUDA_CHECK(cudaUnbindTexture(&texRefAscans));
|
|
|
|
if (ATTMode_3DVolume == false)
|
|
{ // ========= 3DVolume Mode without ATT-Correction
|
|
CUDA_CHECK(cudaUnbindTexture(&texTableAscanIndexFloat1_0));
|
|
CUDA_CHECK(cudaUnbindTexture(&texTableAscanIndexFloat1_1));
|
|
CUDA_CHECK(cudaUnbindTexture(&texTableAscanIndexFloat1_2));
|
|
CUDA_CHECK(cudaUnbindTexture(&texTableAscanIndexFloat1_3));
|
|
}
|
|
else if (ATTMode_3DVolume == true)
|
|
{ // ========= 3DVolume Mode with ATT-Correction
|
|
CUDA_CHECK(cudaUnbindTexture(&texTableAscanIndexFloat2_0));
|
|
CUDA_CHECK(cudaUnbindTexture(&texTableAscanIndexFloat2_1));
|
|
CUDA_CHECK(cudaUnbindTexture(&texTableAscanIndexFloat2_2));
|
|
CUDA_CHECK(cudaUnbindTexture(&texTableAscanIndexFloat2_3));
|
|
}
|
|
CUDA_CHECK(cudaGetLastError());
|
|
} |