#include "precalculateSpeedOfSoundKernel.cuh" #include "rayTracing.cuh" #include "saftKernel.cuh" #include "saft.hpp" void SAFTHandler::precalculateAscanIndex_usePaths(int ascanIndex_i, int aScanWindowSize, int currentSpeedOfSoundZLayer, int maxFeasibleSosZLayerCount) { cudaChannelFormatDesc texChannelDescTableVoxelToEmRecPathSosBoth = cudaCreateChannelDesc(32, 32, 32, 32, cudaChannelFormatKindFloat); // Schritt 2.1 Output-Kanal anlegen und // beschreiben - Float4 // Both Emitter Path Tables // -------------------------------------------------------- texTableVoxelToEmitterPathSosBoth_preprocess.addressMode[0] = cudaAddressModeClamp; // Texturreferenz beschreiben texTableVoxelToEmitterPathSosBoth_preprocess.addressMode[1] = cudaAddressModeClamp; texTableVoxelToEmitterPathSosBoth_preprocess.addressMode[2] = cudaAddressModeClamp; switch (SAFT_VARIANT[SAFT_VARIANT_3DVolumeInterpolationAtReconstruction]) { case 0: texTableVoxelToEmitterPathSosBoth_preprocess.filterMode = cudaFilterModePoint; break; case 1: texTableVoxelToEmitterPathSosBoth_preprocess.filterMode = cudaFilterModeLinear; break; } texTableVoxelToEmitterPathSosBoth_preprocess.normalized = 0; CUDA_CHECK(cudaBindTextureToArray(&texTableVoxelToEmitterPathSosBoth_preprocess, deviceTableVoxelToEmPathSosBothCuArray, &texChannelDescTableVoxelToEmRecPathSosBoth)); // Texturmemory fuer Receiver - SosPathsTables // =================================================================================================================== // Both Receiver Path Tables // ------------------------------------------------------ texTableVoxelToReceiverPathSosBoth0_preprocess.addressMode[0] = cudaAddressModeClamp; // Texturreferenz beschreiben texTableVoxelToReceiverPathSosBoth0_preprocess.addressMode[1] = cudaAddressModeClamp; texTableVoxelToReceiverPathSosBoth0_preprocess.addressMode[2] = cudaAddressModeClamp; switch (SAFT_VARIANT[SAFT_VARIANT_3DVolumeInterpolationAtReconstruction]) { case 0: texTableVoxelToReceiverPathSosBoth0_preprocess.filterMode = cudaFilterModePoint; break; case 1: texTableVoxelToReceiverPathSosBoth0_preprocess.filterMode = cudaFilterModeLinear; break; } texTableVoxelToReceiverPathSosBoth0_preprocess.normalized = 0; CUDA_CHECK(cudaBindTextureToArray(&texTableVoxelToReceiverPathSosBoth0_preprocess, deviceTableVoxelToRecPathSosBothCuArray[0], &texChannelDescTableVoxelToEmRecPathSosBoth)); if (TableVoxelToReceiverPathSosAllocationCount > 1) { // TODO: mit Arrays flexibel programmieren, wenn moeglich!!! texTableVoxelToReceiverPathSosBoth1_preprocess.addressMode[0] = cudaAddressModeClamp; // Texturreferenz beschreiben texTableVoxelToReceiverPathSosBoth1_preprocess.addressMode[1] = cudaAddressModeClamp; texTableVoxelToReceiverPathSosBoth1_preprocess.addressMode[2] = cudaAddressModeClamp; switch (SAFT_VARIANT[SAFT_VARIANT_3DVolumeInterpolationAtReconstruction]) { case 0: texTableVoxelToReceiverPathSosBoth1_preprocess.filterMode = cudaFilterModePoint; break; case 1: texTableVoxelToReceiverPathSosBoth1_preprocess.filterMode = cudaFilterModeLinear; break; } texTableVoxelToReceiverPathSosBoth1_preprocess.normalized = 0; CUDA_CHECK(cudaBindTextureToArray(&texTableVoxelToReceiverPathSosBoth1_preprocess, deviceTableVoxelToRecPathSosBothCuArray[1], &texChannelDescTableVoxelToEmRecPathSosBoth)); } if (TableVoxelToReceiverPathSosAllocationCount > 2) { texTableVoxelToReceiverPathSosBoth2_preprocess.addressMode[0] = cudaAddressModeClamp; // Texturreferenz beschreiben texTableVoxelToReceiverPathSosBoth2_preprocess.addressMode[1] = cudaAddressModeClamp; texTableVoxelToReceiverPathSosBoth2_preprocess.addressMode[2] = cudaAddressModeClamp; switch (SAFT_VARIANT[SAFT_VARIANT_3DVolumeInterpolationAtReconstruction]) { case 0: texTableVoxelToReceiverPathSosBoth2_preprocess.filterMode = cudaFilterModePoint; break; case 1: texTableVoxelToReceiverPathSosBoth2_preprocess.filterMode = cudaFilterModeLinear; break; } texTableVoxelToReceiverPathSosBoth2_preprocess.normalized = 0; CUDA_CHECK(cudaBindTextureToArray(&texTableVoxelToReceiverPathSosBoth2_preprocess, deviceTableVoxelToRecPathSosBothCuArray[2], &texChannelDescTableVoxelToEmRecPathSosBoth)); } dim3 threadsPerBlock(SOSGrid_XYZ.x, 1, 1); dim3 blocksPerGrid(1, 1, 1); blocksPerGrid.x = SOSGrid_XYZ.y; blocksPerGrid.y = maxFeasibleSosZLayerCount; blocksPerGrid.z = 1; // Step 2. Bereite Output-Textur fuer AscanIndex vor if (TableAscanIndexAllocationCount > 0) { cudaBindSurfaceToArray(outSurfRefAscanIndexFloat0, deviceTextureAscanIndexFloatCuArray[0]); } if (TableAscanIndexAllocationCount > 1) { cudaBindSurfaceToArray(outSurfRefAscanIndexFloat1, deviceTextureAscanIndexFloatCuArray[1]); } if (TableAscanIndexAllocationCount > 2) { cudaBindSurfaceToArray(outSurfRefAscanIndexFloat2, deviceTextureAscanIndexFloatCuArray[2]); } if (TableAscanIndexAllocationCount > 3) { cudaBindSurfaceToArray(outSurfRefAscanIndexFloat3, deviceTextureAscanIndexFloatCuArray[3]); } // Step 3. Fuehre Kernel aus mit #Threads: SOS.x*SOS.y . Innerhalb werden // immer 1024/2048 A-Scans durchlaufen und in AscanIndex-Textur geschrieben if ((SOSMode_3DVolume == false) && (ATTMode_3DVolume == false)) { // ==================================================== // Blockmode with SOS-value per Ascan precalculateAscanIndex_usePathsKernel<<>>(ascanIndex_i, ///< Offset of AscanIndex batch (bei mehreren Aufrufen) aScanWindowSize, // aktuelle Anzahl der Ascans, die maximal // vorberechnet werden können deviceSosAttFieldCuArray, currentSpeedOfSoundZLayer, ///< First z-layer in the speed of sound ///< grid the pre-calculation is performed ///< for. maxFeasibleSosZLayerCount, ///< Number of z-layers in the speed of ///< sound grid the pre-calculation is ///< performed for. // currentEmIndexUsedForAscanIndexCalculation, ///< current Index of Em // for which the AscanIndex is calculated maxSoSReceiverArrayForTexture, deviceEmitterIndex_block, // Speicheradresse fuer EmitterIndexdaten deviceReceiverIndex_block, // Speicheradresse fuer ReceiverIndexdaten TableAscanIndexAllocationCount, ///< Anzahl der benoetigten AscanBlocks ///< der Groesse 2048/4096 maxAscanIndexArraysInTexture, ///< maximale Anzahl an Em/Rec in einem ///< CUDA Array (fest definiert fuer ///< bestimmung welche Textur genutzt ///< wird) deviceTextureAscanIndexFloatCuArray, ///< Out: Sum of SoS samples in ///< the path from transducer to ///< voxel. SOSGrid_XYZ, sosOffset, regionOfInterestOffset, IMAGE_RESOLUTION, SOS_RESOLUTION, debugMode, debugModeParameter, deviceSAFT_VARIANT); } else if ((SOSMode_3DVolume == true) && (ATTMode_3DVolume == false)) { // ==================================================== // 3DVolume Mode with SOS-Correction no ATT-Correction precalculateAscanIndex_usePathsKernel_SOS<<>>(ascanIndex_i, ///< Offset of AscanIndex batch (bei mehreren Aufrufen) aScanWindowSize, // aktuelle Anzahl der Ascans, die maximal // vorberechnet werden können deviceSosAttFieldCuArray, currentSpeedOfSoundZLayer, ///< First z-layer in the speed of sound ///< grid the pre-calculation is performed ///< for. maxFeasibleSosZLayerCount, ///< Number of z-layers in the speed of ///< sound grid the pre-calculation is ///< performed for. // currentEmIndexUsedForAscanIndexCalculation, ///< current // Index of Em for which the AscanIndex is calculated maxSoSReceiverArrayForTexture, deviceEmitterIndex_block, // Speicheradresse fuer EmitterIndexdaten deviceReceiverIndex_block, // Speicheradresse fuer ReceiverIndexdaten TableAscanIndexAllocationCount, ///< Anzahl der benoetigten AscanBlocks ///< der Groesse 2048/4096 maxAscanIndexArraysInTexture, ///< maximale Anzahl an Em/Rec in einem ///< CUDA Array (fest definiert fuer ///< bestimmung welche Textur genutzt ///< wird) deviceTextureAscanIndexFloatCuArray, ///< Out: Sum of SoS samples in ///< the path from transducer to ///< voxel. SOSGrid_XYZ, sosOffset, regionOfInterestOffset, IMAGE_RESOLUTION, SOS_RESOLUTION, debugMode, debugModeParameter, deviceSAFT_VARIANT); } else if ((SOSMode_3DVolume == true) && (ATTMode_3DVolume == true)) { // ==================================================== // 3DVolume Mode with SOS- and ATT-Correction precalculateAscanIndex_usePathsKernel_SOS_ATT<<>>(ascanIndex_i, ///< Offset of AscanIndex batch (bei mehreren Aufrufen) aScanWindowSize, // aktuelle Anzahl der Ascans, die maximal // vorberechnet werden können deviceSosAttFieldCuArray, currentSpeedOfSoundZLayer, ///< First z-layer in the speed of sound ///< grid the pre-calculation is performed ///< for. maxFeasibleSosZLayerCount, ///< Number of z-layers in the speed of ///< sound grid the pre-calculation is ///< performed for. // currentEmIndexUsedForAscanIndexCalculation, ///< current // Index of Em for which the AscanIndex is calculated maxSoSReceiverArrayForTexture, deviceEmitterIndex_block, // Speicheradresse fuer EmitterIndexdaten deviceReceiverIndex_block, // Speicheradresse fuer ReceiverIndexdaten TableAscanIndexAllocationCount, ///< Anzahl der benoetigten AscanBlocks ///< der Groesse 2048/4096 maxAscanIndexArraysInTexture, ///< maximale Anzahl an Em/Rec in einem ///< CUDA Array (fest definiert fuer ///< bestimmung welche Textur genutzt ///< wird) deviceTextureAscanIndexFloatCuArray, ///< Out: Sum of SoS samples in ///< the path from transducer to ///< voxel. SOSGrid_XYZ, sosOffset, regionOfInterestOffset, IMAGE_RESOLUTION, SOS_RESOLUTION, debugMode, debugModeParameter, deviceSAFT_VARIANT); } CUDA_CHECK(cudaGetLastError()); // ==================================================== cudaUnbindTexture // Texturmemory fuer Emitter - SosPathsTables entbinden CUDA_CHECK(cudaUnbindTexture(&texTableVoxelToEmitterPathSosBoth_preprocess)); // Texturmemory fuer Receiver - SosPathsTables entbinden CUDA_CHECK(cudaUnbindTexture(&texTableVoxelToReceiverPathSosBoth0_preprocess)); CUDA_CHECK(cudaUnbindTexture(&texTableVoxelToReceiverPathSosBoth1_preprocess)); CUDA_CHECK(cudaUnbindTexture(&texTableVoxelToReceiverPathSosBoth2_preprocess)); } void SAFTHandler::precalculateAverageSpeedOfSound(int firstZLayer, int sosZLayerCount, int deviceListGeometry, int geometryElementCount) { dim3 threadsPerBlock(SOSGrid_XYZ.x, 1, 1); // max. 512 oder 1024 Threads werden vorgegeben und // dim3 threadsPerBlock (SOSGrid_XYZ.x,SOSGrid_XYZ.y,1); // max. 512 oder // 1024 Threads werden vorgegeben und dim3 blocksPerGrid(1, 1, 1); // max. 65.535 Bloecke im Grid // berechnet. Initialisierung blocksPerGrid.x = SOSGrid_XYZ.y; blocksPerGrid.y = sosZLayerCount; blocksPerGrid.z = 1; cudaChannelFormatDesc texChannelDescSosAttField = cudaCreateChannelDesc(32, 32, 0, 0, cudaChannelFormatKindFloat); // Schritt 2.1 Output-Kanal // anlegen und beschreiben texRefSosAttField.addressMode[0] = cudaAddressModeClamp; // Texturreferenz beschreiben texRefSosAttField.addressMode[1] = cudaAddressModeClamp; texRefSosAttField.addressMode[2] = cudaAddressModeClamp; if (SAFT_VARIANT[SAFT_VARIANT_3DVolumeInterpolationAtPreprocessing] == 1) { texRefSosAttField.filterMode = cudaFilterModeLinear; // Lineare Interpolation } else { texRefSosAttField.filterMode = cudaFilterModePoint; // Nearest Neighbor } texRefSosAttField.normalized = 0; CUDA_CHECK(cudaBindTextureToArray(&texRefSosAttField, deviceSosAttFieldCuArray, &texChannelDescSosAttField)); // Schritt 4.1 3DArray an Texturmemory // binden if (deviceListGeometry == 0) { cudaBindSurfaceToArray(outSurfRefTableVoxelToEmPathSosBoth, deviceTableVoxelToEmPathSosBothCuArray); } if (deviceListGeometry == 1) { if (TableVoxelToReceiverPathSosAllocationCount > 0) { cudaBindSurfaceToArray(outSurfRefTableVoxelToRecPathSosBoth0, deviceTableVoxelToRecPathSosBothCuArray[0]); } if (TableVoxelToReceiverPathSosAllocationCount > 1) { cudaBindSurfaceToArray(outSurfRefTableVoxelToRecPathSosBoth1, deviceTableVoxelToRecPathSosBothCuArray[1]); } if (TableVoxelToReceiverPathSosAllocationCount > 2) { cudaBindSurfaceToArray(outSurfRefTableVoxelToRecPathSosBoth2, deviceTableVoxelToRecPathSosBothCuArray[2]); } } precalculateAverageSpeedOfSoundKernel<<>>(firstZLayer, sosZLayerCount, deviceListGeometry, geometryElementCount, maxSoSReceiverArrayForTexture, sosOffset, regionOfInterestOffset, IMAGE_RESOLUTION, SOS_RESOLUTION, debugMode, debugModeParameter); CUDA_CHECK(cudaGetLastError()); CUDA_CHECK(cudaUnbindTexture(&texRefSosAttField)); } void SAFTHandler::fillCuArray(float useValue, cudaArray **deviceTextureAscanIndexFloatCuArray, ///< CuArray to fill int TableAscanIndexAllocationCount) { dim3 threadsPerBlock(SOSGrid_XYZ.x, 1, 1); // determine neccessary amount of threads // // max. 512 oder 1024 dim3 blocksPerGrid(1, 1, 1); // determine neccessary amount of blocks in grid // max. 65.535 blocksPerGrid.x = SOSGrid_XYZ.y; blocksPerGrid.y = maxFeasibleSosZLayerCount; blocksPerGrid.z = 1; // Step 1. Bereite Output-Textur fuer AscanIndex vor if (TableAscanIndexAllocationCount > 0) { cudaBindSurfaceToArray(outSurfRefAscanIndexFloat0, deviceTextureAscanIndexFloatCuArray[0]); } if (TableAscanIndexAllocationCount > 1) { cudaBindSurfaceToArray(outSurfRefAscanIndexFloat1, deviceTextureAscanIndexFloatCuArray[1]); } if (TableAscanIndexAllocationCount > 2) { cudaBindSurfaceToArray(outSurfRefAscanIndexFloat2, deviceTextureAscanIndexFloatCuArray[2]); } if (TableAscanIndexAllocationCount > 3) { cudaBindSurfaceToArray(outSurfRefAscanIndexFloat3, deviceTextureAscanIndexFloatCuArray[3]); } // Step 2. Fuere Kernel aus mit #Threads: SOS.x*SOS.y . Innerhalb werden immer // 1024/2048 A-Scans durchgegangen und in AscanIndex-Textur geschrieben fillCuArrayKernel<<>>(useValue, deviceTextureAscanIndexFloatCuArray, ///< Out: Sum of SoS ///< samples in the path ///< from transducer to ///< voxel. maxAscanIndexArraysInTexture, TableAscanIndexAllocationCount, ///< Amount of Surfaces in ///< the Array of cuArrays maxFeasibleSosZLayerCount, ATTMode_3DVolume, debugMode, debugModeParameter); CUDA_CHECK(cudaGetLastError()); } void SAFTHandler::performSAFT( int aScanIndex, ///< The A-scan index is increased by the A-scan batch size in every iteration. It describes the offset into the A-scan samples the SAFT kernel is operating with. size_t aScanWindowSize, ///< A-scan batch size in terms of number of samples within one window. int3 IMAGE_SIZE_XYZ, ///< Bildbereichsgroesse/ROI in Voxel int3 SOSGrid_XYZ, ///< SoSGridgroesse in Voxel int blockIndexOffset, ///< Additional offset added to the z component of the block index, required because of the adjustments for partial reconstruction in different z-layers. int outputWindowVoxelCount, ///< Number of Voxels in the output window. int speedOfSoundZLayer, ///< current SoS z-layer Offset in the speed of sound grid. int speedOfSoundVoxelsWithinZLayers, ///< Number of z-layers in the speed of sound grid touched by the z-layers of the active zone of reconstruction in the region of interest. int maxFeasibleSosZLayerCount, int currentEmIndexUsedForAscanIndexCalculation, ///< current Index of Em for which the AscanIndex is calculated dim3 const &windowGridDimensions, ///< Grid dimensions to be used to launch the SAFT kernel. It is smaller than the full grid dimensions and only represents the current reconstruction window. dim3 const &gridDimensions, ///< Full grid dimensions of the reconstruction. dim3 const &blockDimensions, ///< Block dimensions to be used with the SAFT kernel. float *deviceSpeedOfSoundField, ///< Pointer to SoSGrid. cudaArray *deviceAScansCuArray // cudaStream_t stream ///< Stream to execute the SAFT kernel on. ) { dim3 reducedGridDimensions, reducedBlockDimensions; reduceKernelDimensions(windowGridDimensions, blockDimensions, reducedGridDimensions, reducedBlockDimensions); CUDA_CHECK(cudaFuncSetCacheConfig(saftKernelAscanIndex_SOS_ATT, cudaFuncCachePreferL1)); CUDA_CHECK(cudaFuncSetCacheConfig(saftKernelAscanIndex_SOS, cudaFuncCachePreferL1)); CUDA_CHECK(cudaFuncSetCacheConfig(saftKernelAscanIndex, cudaFuncCachePreferL1)); // Texture Memory Adressing-mode // http://docs.nvidia.com/cuda/pdf/CUDA_C_Programming_Guide.pdf -> 3.2.11.1. Texture Memory S. 42 // cudaAddressModeClamp - Return values at the boarders if out-of range - default // cudaAddressModeBorder - Return 0 if out-of range // cudaAddressModeMirror - Mirror the values - For normalized coordinates // cudaAddressModeWrap - Repeating the values - For normalized coordinates // Texturmemory fuer Ascans cudaChannelFormatDesc texChannelDesc = cudaCreateChannelDesc(32, 0, 0, 0, cudaChannelFormatKindFloat); // Beschreibung des RueckgabeFormats der Textur texRefAscans.addressMode[0] = cudaAddressModeBorder; // Texturreferenz beschreiben texRefAscans.addressMode[1] = cudaAddressModeBorder; if (SAFT_VARIANT[SAFT_VARIANT_AscanInterpolation] == 1) { texRefAscans.filterMode = cudaFilterModeLinear; // Lineare Interpolation } else { texRefAscans.filterMode = cudaFilterModePoint; // Nearest Neighbor } texRefAscans.normalized = 0; CUDA_CHECK(cudaBindTextureToArray(&texRefAscans, deviceAScansCuArray, &texChannelDesc)); if (ATTMode_3DVolume == false) { // ========= 3DVolume Mode without ATT-Correction cudaChannelFormatDesc texChannelDescTableAscanIndexFloat = cudaCreateChannelDesc(); // Should do the same // AscanIndex Path Tables ------------------------------------------------------ texTableAscanIndexFloat1_0.addressMode[0] = cudaAddressModeClamp; // Texturreferenz beschreiben texTableAscanIndexFloat1_0.addressMode[1] = cudaAddressModeClamp; texTableAscanIndexFloat1_0.addressMode[2] = cudaAddressModeClamp; switch (SAFT_VARIANT[SAFT_VARIANT_3DVolumeInterpolationAtReconstruction]) { case 0: texTableAscanIndexFloat1_0.filterMode = cudaFilterModePoint; break; case 1: texTableAscanIndexFloat1_0.filterMode = cudaFilterModeLinear; break; } texTableAscanIndexFloat1_0.normalized = 0; CUDA_CHECK(cudaBindTextureToArray(&texTableAscanIndexFloat1_0, deviceTextureAscanIndexFloatCuArray[0], &texChannelDescTableAscanIndexFloat)); if (TableAscanIndexAllocationCount > 1) { // TODO: mit Arrays flexibel programmieren!!! texTableAscanIndexFloat1_1.addressMode[0] = cudaAddressModeClamp; // Texturreferenz beschreiben texTableAscanIndexFloat1_1.addressMode[1] = cudaAddressModeClamp; texTableAscanIndexFloat1_1.addressMode[2] = cudaAddressModeClamp; switch (SAFT_VARIANT[SAFT_VARIANT_3DVolumeInterpolationAtReconstruction]) { case 0: texTableAscanIndexFloat1_1.filterMode = cudaFilterModePoint; break; case 1: texTableAscanIndexFloat1_1.filterMode = cudaFilterModeLinear; break; } texTableAscanIndexFloat1_1.normalized = 0; CUDA_CHECK(cudaBindTextureToArray(&texTableAscanIndexFloat1_1, deviceTextureAscanIndexFloatCuArray[1], &texChannelDescTableAscanIndexFloat)); } if (TableAscanIndexAllocationCount > 2) { texTableAscanIndexFloat1_2.addressMode[0] = cudaAddressModeClamp; // Texturreferenz beschreiben texTableAscanIndexFloat1_2.addressMode[1] = cudaAddressModeClamp; texTableAscanIndexFloat1_2.addressMode[2] = cudaAddressModeClamp; switch (SAFT_VARIANT[SAFT_VARIANT_3DVolumeInterpolationAtReconstruction]) { case 0: texTableAscanIndexFloat1_2.filterMode = cudaFilterModePoint; break; case 1: texTableAscanIndexFloat1_2.filterMode = cudaFilterModeLinear; break; } texTableAscanIndexFloat1_2.normalized = 0; CUDA_CHECK(cudaBindTextureToArray(&texTableAscanIndexFloat1_2, deviceTextureAscanIndexFloatCuArray[2], &texChannelDescTableAscanIndexFloat)); } if (TableAscanIndexAllocationCount > 3) { texTableAscanIndexFloat1_3.addressMode[0] = cudaAddressModeClamp; // Texturreferenz beschreiben texTableAscanIndexFloat1_3.addressMode[1] = cudaAddressModeClamp; texTableAscanIndexFloat1_3.addressMode[2] = cudaAddressModeClamp; switch (SAFT_VARIANT[SAFT_VARIANT_3DVolumeInterpolationAtReconstruction]) { case 0: texTableAscanIndexFloat1_3.filterMode = cudaFilterModePoint; break; case 1: texTableAscanIndexFloat1_3.filterMode = cudaFilterModeLinear; break; } texTableAscanIndexFloat1_3.normalized = 0; CUDA_CHECK(cudaBindTextureToArray(&texTableAscanIndexFloat1_3, deviceTextureAscanIndexFloatCuArray[3], &texChannelDescTableAscanIndexFloat)); } } else if (ATTMode_3DVolume == true) { // ========= 3DVolume Mode with ATT-Correction cudaChannelFormatDesc texChannelDescTableAscanIndexFloat = cudaCreateChannelDesc(); // Should do the same // AscanIndex Path Tables ------------------------------------------------------ texTableAscanIndexFloat2_0.addressMode[0] = cudaAddressModeClamp; // Texturreferenz beschreiben texTableAscanIndexFloat2_0.addressMode[1] = cudaAddressModeClamp; texTableAscanIndexFloat2_0.addressMode[2] = cudaAddressModeClamp; switch (SAFT_VARIANT[SAFT_VARIANT_3DVolumeInterpolationAtReconstruction]) { case 0: texTableAscanIndexFloat2_0.filterMode = cudaFilterModePoint; break; case 1: texTableAscanIndexFloat2_0.filterMode = cudaFilterModeLinear; break; } texTableAscanIndexFloat2_0.normalized = 0; CUDA_CHECK(cudaBindTextureToArray(&texTableAscanIndexFloat2_0, deviceTextureAscanIndexFloatCuArray[0], &texChannelDescTableAscanIndexFloat)); if (TableAscanIndexAllocationCount > 1) { // TODO: mit Arrays flexibel programmieren!!! texTableAscanIndexFloat2_1.addressMode[0] = cudaAddressModeClamp; // Texturreferenz beschreiben texTableAscanIndexFloat2_1.addressMode[1] = cudaAddressModeClamp; texTableAscanIndexFloat2_1.addressMode[2] = cudaAddressModeClamp; switch (SAFT_VARIANT[SAFT_VARIANT_3DVolumeInterpolationAtReconstruction]) { case 0: texTableAscanIndexFloat2_1.filterMode = cudaFilterModePoint; break; case 1: texTableAscanIndexFloat2_1.filterMode = cudaFilterModeLinear; break; } texTableAscanIndexFloat2_1.normalized = 0; CUDA_CHECK(cudaBindTextureToArray(&texTableAscanIndexFloat2_1, deviceTextureAscanIndexFloatCuArray[1], &texChannelDescTableAscanIndexFloat)); } if (TableAscanIndexAllocationCount > 2) { texTableAscanIndexFloat2_2.addressMode[0] = cudaAddressModeClamp; // Texturreferenz beschreiben texTableAscanIndexFloat2_2.addressMode[1] = cudaAddressModeClamp; texTableAscanIndexFloat2_2.addressMode[2] = cudaAddressModeClamp; switch (SAFT_VARIANT[SAFT_VARIANT_3DVolumeInterpolationAtReconstruction]) { case 0: texTableAscanIndexFloat2_2.filterMode = cudaFilterModePoint; break; case 1: texTableAscanIndexFloat2_2.filterMode = cudaFilterModeLinear; break; } texTableAscanIndexFloat2_2.normalized = 0; CUDA_CHECK(cudaBindTextureToArray(&texTableAscanIndexFloat2_2, deviceTextureAscanIndexFloatCuArray[2], &texChannelDescTableAscanIndexFloat)); } if (TableAscanIndexAllocationCount > 3) { texTableAscanIndexFloat2_3.addressMode[0] = cudaAddressModeClamp; // Texturreferenz beschreiben texTableAscanIndexFloat2_3.addressMode[1] = cudaAddressModeClamp; texTableAscanIndexFloat2_3.addressMode[2] = cudaAddressModeClamp; switch (SAFT_VARIANT[SAFT_VARIANT_3DVolumeInterpolationAtReconstruction]) { case 0: texTableAscanIndexFloat2_3.filterMode = cudaFilterModePoint; break; case 1: texTableAscanIndexFloat2_3.filterMode = cudaFilterModeLinear; break; } texTableAscanIndexFloat2_3.normalized = 0; CUDA_CHECK(cudaBindTextureToArray(&texTableAscanIndexFloat2_3, deviceTextureAscanIndexFloatCuArray[3], &texChannelDescTableAscanIndexFloat)); } } // Vorberechnung der Koordinaten --> schnelleres Bestimmen der Voxelposition float VoxelIncrement = IMAGE_RESOLUTION / SOS_RESOLUTION; float3 SosVoxelStartPosition; SosVoxelStartPosition.x = (regionOfInterestOffset.x - sosOffset.x) / SOS_RESOLUTION; // Start des Bildes im SOS-Grid aus Positionsdaten bestimmen SosVoxelStartPosition.y = (regionOfInterestOffset.y - sosOffset.y) / SOS_RESOLUTION; SosVoxelStartPosition.z = (regionOfInterestOffset.z - sosOffset.z) / SOS_RESOLUTION; // printf("\n\n SosVoxelStartPosition [%f %f %f]\n",SosVoxelStartPosition.x,SosVoxelStartPosition.y,SosVoxelStartPosition.z); // Anzahl der Teiltabellen, bereits hier berechnen oder übergeben int TableAscanIndexAllocationCount = ceil((float)aScanWindowSize / (float)maxAscanIndexArraysInTexture); // float is important due to ceiling // printf( "TableAscanIndexAllocationCount = (%i/%i) = %i = %i\n", aScanWindowSize, maxAscanIndexArraysInTexture, TableAscanIndexAllocationCount, // ceil(aScanWindowSize/maxAscanIndexArraysInTexture)); // Call of 3 SAFT Versions - AscanIndex-Varainten // ##################################################################################################################################################################### // ==================================================== Blockmode with SOS-value per Ascan // ==================================================== 3DVolume Mode with SOS-Correction no ATT-Correction // ==================================================== 3DVolume Mode with SOS- and ATT-Correction if ((SOSMode_3DVolume == false) && (ATTMode_3DVolume == false)) { // ==================================================== Blockmode with SOS-value per Ascan printf("\n\n --- SAFT (AscanIndex) without SOS currently not implemented --- \n"); } else if ((SOSMode_3DVolume == true) && (ATTMode_3DVolume == false)) { // ==================================================== 3DVolume Mode with SOS-Correction no ATT-Correction saftKernelAscanIndex_SOS<<>>( // , 0, stream>>>( aScanIndex, (float)aScanWindowSize, // maxAscanIndexArraysInTexture maxAscanIndexArraysInTexture, // maxSoSReceiverArrayForTexture TableAscanIndexAllocationCount, // Anzahl der genutzten Teiltabellen IMAGE_SIZE_XYZ, SosVoxelStartPosition, IMAGE_RESOLUTION, VoxelIncrement, blockIndexOffset, speedOfSoundZLayer, gridDimensions, blockDimensions, deviceOutput); } else if ((SOSMode_3DVolume == true) && (ATTMode_3DVolume == true)) { // ==================================================== 3DVolume Mode with SOS- and ATT-Correction saftKernelAscanIndex_SOS_ATT<<>>( // , 0, stream>>>( aScanIndex, (float)aScanWindowSize, // maxAscanIndexArraysInTexture maxAscanIndexArraysInTexture, // maxSoSReceiverArrayForTexture TableAscanIndexAllocationCount, // Anzahl der genutzten Teiltabellen IMAGE_SIZE_XYZ, SosVoxelStartPosition, IMAGE_RESOLUTION, VoxelIncrement, blockIndexOffset, speedOfSoundZLayer, gridDimensions, blockDimensions, debugMode, debugModeParameter, deviceSAFT_VARIANT, deviceOutput ); } // Unbind Textures CUDA_CHECK(cudaUnbindTexture(&texRefAscans)); if (ATTMode_3DVolume == false) { // ========= 3DVolume Mode without ATT-Correction CUDA_CHECK(cudaUnbindTexture(&texTableAscanIndexFloat1_0)); CUDA_CHECK(cudaUnbindTexture(&texTableAscanIndexFloat1_1)); CUDA_CHECK(cudaUnbindTexture(&texTableAscanIndexFloat1_2)); CUDA_CHECK(cudaUnbindTexture(&texTableAscanIndexFloat1_3)); } else if (ATTMode_3DVolume == true) { // ========= 3DVolume Mode with ATT-Correction CUDA_CHECK(cudaUnbindTexture(&texTableAscanIndexFloat2_0)); CUDA_CHECK(cudaUnbindTexture(&texTableAscanIndexFloat2_1)); CUDA_CHECK(cudaUnbindTexture(&texTableAscanIndexFloat2_2)); CUDA_CHECK(cudaUnbindTexture(&texTableAscanIndexFloat2_3)); } CUDA_CHECK(cudaGetLastError()); }