From f744f7bb4ea6576b7e1d67e90ec349e5d32f622c Mon Sep 17 00:00:00 2001 From: kradchen Date: Mon, 16 Oct 2023 13:14:43 +0800 Subject: [PATCH] Remove macros from SOS_ATT --- SAFT_TOFI/src/kernel/saftKernel.cuh | 860 +--------------------------- 1 file changed, 7 insertions(+), 853 deletions(-) diff --git a/SAFT_TOFI/src/kernel/saftKernel.cuh b/SAFT_TOFI/src/kernel/saftKernel.cuh index 4af1dd1..3c7a060 100644 --- a/SAFT_TOFI/src/kernel/saftKernel.cuh +++ b/SAFT_TOFI/src/kernel/saftKernel.cuh @@ -106,16 +106,6 @@ texture texTableAscanIndexF // If Voxel is outside the reconstructed Image leave Kernel if ((regionOfInterestVoxel.x >= IMAGE_SIZE_XYZ.x) || (regionOfInterestVoxel.y >= IMAGE_SIZE_XYZ.y) || (regionOfInterestVoxel.z >= IMAGE_SIZE_XYZ.z)) return; - - #ifdef debug_CudaSAFTKernel - if ((regionOfInterestVoxel.x == OutputVolumeX) && (regionOfInterestVoxel.y == OutputVolumeY) && (regionOfInterestVoxel.z == OutputVolumeZ)) - { - printf("\n==== saftKernelAscanIndex_SOS_ATT Kernel ============================ 1 =====\n"); - printf("\n=============================================================================\n"); - printf(" => regionOfInterestVoxel [%d %d %d] Start bei 0\n", regionOfInterestVoxel.x, regionOfInterestVoxel.y, regionOfInterestVoxel.z); - printf("\n=============================================================================\n"); - } - #endif ///////////////////////////////////////////////////////////////////////////////////////// // 2. Determine ///////////////////////////////////////////////////////////////////////////////////////// @@ -124,13 +114,6 @@ texture texTableAscanIndexF // - Variable declarations ///////////////////////////////////////////////////////////////////////////////////////// - #ifdef debug_CudaSAFTKernel - if ((regionOfInterestVoxel.x == 1) && (regionOfInterestVoxel.y == 1)) - { - // printf(" Z2[%d] blockIndexOffset[%d]\n", regionOfInterestVoxel.z, blockIndexOffset); - } - #endif - // Memory-Index for this Thread for Output-Array of this Voxel unsigned long long int memoryIndex = (((unsigned long long int)IMAGE_SIZE_XYZ.y * ((unsigned long long int)regionOfInterestVoxel.z - (unsigned long long int)blockIndexOffset) + (unsigned long long int)regionOfInterestVoxel.y) * (unsigned long long int)IMAGE_SIZE_XYZ.x + (unsigned long long int)regionOfInterestVoxel.x); @@ -147,25 +130,8 @@ texture texTableAscanIndexF float const TexturIndexY = SosVoxelf.y + 0.5f; float const SosVoxelTextureZ = (SosVoxelf.z - (float)speedOfSoundZLayer) + 0.5f; // Z offset inside precalculated SOS paths - #ifdef debug_CudaSAFTAscanIndexKernel - if ((regionOfInterestVoxel.x == OutputVolumeX) && (regionOfInterestVoxel.y == OutputVolumeY) && (regionOfInterestVoxel.z == OutputVolumeZ)) - { - printf("\n==== SAFT (AscanIndex) Kernel Step 1 ==========================================\n"); - printf(" VoxelIncrement = %3.12f\n", VoxelIncrement ); - printf(" TableAscanIndexAllocationCount = %i\n", TableAscanIndexAllocationCount ); - printf("--------------------------------------------------------------------------------\n"); - printf(" => regionOfInterestVoxel [%d %d %d]\n", regionOfInterestVoxel.x, regionOfInterestVoxel.y, regionOfInterestVoxel.z); - printf(" => TexturIndexXYZ [%3.3f %3.3f %3.3f]\n", TexturIndexX, TexturIndexY, SosVoxelTextureZ); - printf(" SosVoxelStartPos(SoS) [%3.12f %3.12f %3.12f]\n", SosVoxelStartPosition.x, SosVoxelStartPosition.y, SosVoxelStartPosition.z); - printf(" SosVoxelf [%3.12f %3.12f %3.12f]\n", SosVoxelf.x, SosVoxelf.y, SosVoxelf.z); - printf("================================================================================\n"); - } - #endif float voxelValue = 0.0; // reflection value, which is summed up in Ascan-Loop = Outputvalue - #ifdef debug_CudaSAFTAscanIndexKernelDataAccess - float voxelValue_old = 0.0; // only for debugging necessary - #endif ///////////////////////////////////////////////////////////////////////////////////////// // 3. SAFT-Algorithmus @@ -203,176 +169,7 @@ texture texTableAscanIndexF __syncthreads(); -//#define useOneLoop // Zugriff auf Texturen mit jeweiligen Abfragen // TITAN (Matlab/Kernel) 30.8 / 32.9 #define useSameLoop // Zugriff auf Texturen optimieren. Durch Abfragen von beiden Texturen in der Schleife Overhead verringern // TITAN (Matlab/Kernel) 53.8 / 61.3 - // benötigt das zuvorige Löschen der Ascanindex --> Da nicht klar wie viele Surfaces genutzt werden können sind hier mehrere bis max 4 Stück Implementiert - // Bei halber Nutzung der Textur TITAN (M/K) 27.3 / 37.9 -//#define useWhileLoop // Zugriff auf AscanIndex-Texturen innerhalb einer While-Schleife // TITAN (Matlab/Kernel) 22.6 / 28.7 - -// #ifdef debug_CudaSAFTAscanIndexKernel -// float2 currentSOSVoxel_AscanIndexAttValues; // For Debugging deklaration outside -// #endif - - #ifdef useOneLoop - //#pragma unroll 2 - //for(float ascanIndex_i = 0; ascanIndex_i < aScanWindowSize; ascanIndex_i+=1.0f) // Alle Ascans durchlaufen // 30 GV/s //2,4: 25GV/s - for(float ascanIndex_i = 0.0f; ascanIndex_i < 4096.0f; ascanIndex_i+=1.0f) // Alle Ascans durchlaufen // 40 GV/s //2: 30GV/s, 4: 34GV/s, 8: 34GV/s, 16: 32GV/s - //for(int ascanIndex_i = 0; ascanIndex_i < 1413; ascanIndex_i++) // Alle Ascans durchlaufen // 34 GV/s //2: 29GV/s, 4: 39GV/s, 8: 34GV/s, 9: 35GV/s - { - // if ((regionOfInterestVoxel.x == OutputVolumeX) && (regionOfInterestVoxel.y == OutputVolumeY) && (regionOfInterestVoxel.z == OutputVolumeZ)){ - // printf("## i = %f :", i); - // } - - // lade AscanIndex fuer Pfad aus Textur - // ==================================== - //#define variante0 // TITAN (M/K) 20.5 / 25.9 - #define variante0_1 // TITAN (M/K) 30.8 / 32.9 - #ifdef variante0 - // Determine number of current used texture memory for this Ascan -> currentRecTextureIndex = 0,1,2 - //currentRecTextureIndex = (int)floor((float)lookUpReceiverIndex / (float)maxSoSReceiverArrayForTexture); - int currentTextureIndex = (int)floor(ascanIndex_i /maxAscanIndexArraysInTexture); - //TexturIndexZ_AscanIndex = 2 * (float)((int)ascanIndex_i % maxSoSReceiverArrayForTexture) + SosVoxelTextureZ; // Z-Index fuer Zugriff auf Textur interpoliert - TexturIndexZ_AscanIndex = 2 * (float)((int)ascanIndex_i % maxAscanIndexArraysInTexture) + SosVoxelTextureZ; // Z-Index fuer Zugriff auf Textur interpoliert - //TexturIndexZ_AscanIndex = ascanIndex_i + ascanIndex_i + SosVoxelTextureZ; - //TexturIndexZ_AscanIndex = 2 *(ascanIndex_i - IndexTextureOffset) + SosVoxelTextureZ; - - // if ( currentTextureIndex == 0){ - // VoxelAscanIndex = tex3D( texTableAscanIndexFloat0, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); } - // else if ( currentTextureIndex == 1) { - // VoxelAscanIndex = tex3D( texTableAscanIndexFloat1, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); } - // else if ( currentTextureIndex == 2){ - // VoxelAscanIndex = tex3D( texTableAscanIndexFloat2, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); } - // else if ( currentTextureIndex == 3){ - // VoxelAscanIndex = tex3D( texTableAscanIndexFloat3, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); } - - switch ( (int)(currentTextureIndex) ) - { - case 0: VoxelAscanIndex = tex3D( texTableAscanIndexFloat0, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); break; - case 1: VoxelAscanIndex = tex3D( texTableAscanIndexFloat1, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); break; - case 2: VoxelAscanIndex = tex3D( texTableAscanIndexFloat2, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); break; - case 3: VoxelAscanIndex = tex3D( texTableAscanIndexFloat3, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); break; - } - - #endif - #ifdef variante0_1 - // Determine number of current used texture memory for this receiver --> Wird benoetigt - //currentRecTextureIndex = (int)floor((float)lookUpReceiverIndex / (float)maxSoSReceiverArrayForTexture); - //int currentRecTextureIndex = (int)floor(i /maxSoSReceiverArrayForTexture); - TexturIndexZ_AscanIndex = 2 * (float)((int)ascanIndex_i % maxAscanIndexArraysInTexture) + SosVoxelTextureZ; // Z-Index fuer Zugriff auf Textur interpoliert - - if ( (int)floor(ascanIndex_i /maxAscanIndexArraysInTexture) == 0){ - VoxelAscanIndex = tex3D( texTableAscanIndexFloat0, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); } - else if ( (int)floor(ascanIndex_i /maxAscanIndexArraysInTexture) == 1) { - VoxelAscanIndex = tex3D( texTableAscanIndexFloat1, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); } - else if ( (int)floor(ascanIndex_i /maxAscanIndexArraysInTexture) == 2){ - VoxelAscanIndex = tex3D( texTableAscanIndexFloat2, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); } - else if ( (int)floor(ascanIndex_i /maxAscanIndexArraysInTexture) == 3){ - VoxelAscanIndex = tex3D( texTableAscanIndexFloat3, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); } - #endif - #ifdef variante1 - - // Determine number of current used texture memory for this receiver --> Wird benoetigt - //currentRecTextureIndex = (int)floor((float)lookUpReceiverIndex / (float)maxSoSReceiverArrayForTexture); - - //if ( currentRecTextureIndex <= maxSoSReceiverArrayForTexture){ - //if ( currentRecTextureIndex <= maxSoSReceiverArrayForTexture){ - if ( i <= 709.0f){ - //TexturIndexZ_AscanIndex = 2 * i + SosVoxelTextureZ + 0.5f; // Z-Index fuer Zugriff auf Textur interpoliert - //TexturIndexZ_AscanIndex = 2 * i + SosVoxelTextureZ + 0.5f; // Z-Index fuer Zugriff auf Textur interpoliert - TexturIndexZ_AscanIndex = 2*i + SosVoxelTextureZ; // Z-Index fuer Zugriff auf Textur interpoliert - VoxelAscanIndex = tex3D( texTableAscanIndexFloat0, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); - // if ((regionOfInterestVoxel.x == OutputVolumeX) && (regionOfInterestVoxel.y == OutputVolumeY) && (regionOfInterestVoxel.z == OutputVolumeZ)){ - // printf("#### i1 = %f : VoxelAscanIndex %f", i, VoxelAscanIndex); - // } - } - //else if ( currentRecTextureIndex <= 2*maxSoSReceiverArrayForTexture) { - //else if ( currentRecTextureIndex <= 1420) { - else if ( i <= 1412.0f) { - //TexturIndexZ_AscanIndex = 2 * (i - maxSoSReceiverArrayForTexture) + SosVoxelTextureZ + 0.5f; // Z-Index fuer Zugriff auf Textur interpoliert - //TexturIndexZ_AscanIndex = 2 * (i - maxSoSReceiverArrayForTexture) + SosVoxelTextureZ + 0.5f; // Z-Index fuer Zugriff auf Textur interpoliert - TexturIndexZ_AscanIndex = 2 * (i - maxSoSReceiverArrayForTexture) + SosVoxelTextureZ; // Z-Index fuer Zugriff auf Textur interpoliert - VoxelAscanIndex = tex3D( texTableAscanIndexFloat1, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); - // if ((regionOfInterestVoxel.x == OutputVolumeX) && (regionOfInterestVoxel.y == OutputVolumeY) && (regionOfInterestVoxel.z == OutputVolumeZ)){ - // printf("#### i2 = %f : VoxelAscanIndex %f", i-maxSoSReceiverArrayForTexture, VoxelAscanIndex); - // } - } - //else if ( currentRecTextureIndex <= 3*maxSoSReceiverArrayForTexture) { - //else if ( currentRecTextureIndex <= 2130) { - // else if ( i <= 2130.0f) { - // //TexturIndexZ_AscanIndex = 2 * (i - 2*maxSoSReceiverArrayForTexture) + SosVoxelTextureZ + 0.5f; // Z-Index fuer Zugriff auf Textur interpoliert - // //TexturIndexZ_AscanIndex = 2 * (i - 1420) + SosVoxelTextureZ + 0.5f; // Z-Index fuer Zugriff auf Textur interpoliert - // TexturIndexZ_AscanIndex = 2 * (i - 1420) + SosVoxelTextureZ; // Z-Index fuer Zugriff auf Textur interpoliert - // VoxelAscanIndex = tex3D( texTableAscanIndexFloat2, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); } - #endif - #ifdef variante2 - - // Determine number of current used texture memory for this receiver --> Wird benoetigt - //currentRecTextureIndex = (int)floor((float)lookUpReceiverIndex / (float)maxSoSReceiverArrayForTexture); - //TexturIndexZ_AscanIndex = 2 * i + SosVoxelTextureZ + 0.5f; // Z-Index fuer Zugriff auf Textur interpoliert - TexturIndexZ_AscanIndex = 2 * i + SosVoxelTextureZ; // Z-Index fuer Zugriff auf Textur interpoliert - VoxelAscanIndex = tex3D( texTableAscanIndexFloat0, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); - #endif - - //voxelValue += attenuationFactor * tex2D( texRefAscans, sampleTime - 0.5f, (float)i + 0.5f); // i gibt Index fuer Ascan an - //voxelValue += tex2D( texRefAscans, VoxelAscanIndex - 0.5f, (float)i + 0.5f); // i gibt Index fuer Ascan an - voxelValue += tex2D( texRefAscans, VoxelAscanIndex - 0.5f, ascanIndex_i + 0.5f); // i gibt Index fuer Ascan an - //voxelValue += tex2D( texRefAscans, VoxelAscanIndex, i + 0.5f); // i gibt Index fuer Ascan an // Wenn +0.5f schon im Preprocessing drin ist sogar langsamer?! - // if ((regionOfInterestVoxel.x == OutputVolumeX) && (regionOfInterestVoxel.y == OutputVolumeY) && (regionOfInterestVoxel.z == OutputVolumeZ)){ - // printf("= %f\n", voxelValue); - // } - - // #ifdef debug_CudaSAFTAscanIndexKernel - // if ((regionOfInterestVoxel.x == OutputVolumeX) && (regionOfInterestVoxel.y == OutputVolumeY) && (regionOfInterestVoxel.z == OutputVolumeZ))//&& (i == 30)) - // { - // //if ((SosVoxel.x == DebugSoSVoxelX) && (SosVoxel.y == DebugSoSVoxelY) && (SosVoxel.z == DebugSoSVoxelZ)) - // //{ - // //printf("\n==== SAFT (AscanIndex) Kernel Step 4 - End ===========================================\n"); - // - // printf(" => regionOfInterestVoxel [%d %d %d] (Start bei 0)", regionOfInterestVoxel.x, regionOfInterestVoxel.y, regionOfInterestVoxel.z); - // printf("--------------------------------------------------------------------------------\n"); - // printf(" ascanIndex_i %f\n", ascanIndex_i); - //// printf(" currentAScanIndex %i\n", currentAScanIndex); - //// printf(" currentEmitterIndex %i\n", currentEmitterIndex); - //// printf(" currentReceiverIndex %i\n", currentReceiverIndex); - // - // printf(" SosVoxelf [%3.12f %3.12f %3.12f] (Start bei 0)\n", SosVoxelf.x, SosVoxelf.y, SosVoxelf.z); - // printf(" speedOfSoundZLayer [%3i]\n", speedOfSoundZLayer); - // - // printf(" TextureIndexXYZ [%3.12f %3.12f %3.12f]\n", TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); - // printf(" SosVoxelTextureZ %f\n", SosVoxelTextureZ); - // - // //printf(" emitterPosition [%3.12f %3.12f %3.12f]\n", emitterPosition.x, emitterPosition.y, emitterPosition.z); - // //printf(" emitterDistance %f\n", emitterDistance); - // //printf(" receiverPosition [%3.12f %3.12f %3.12f]\n", receiverPosition.x, receiverPosition.y, receiverPosition.z); - // //printf(" receiverDistance %f\n", receiverDistance); - // //printf(" sampleRate %e\n", sampleRate); - // //printf(" totalDistance %f\n\n", totalDistance); - // - // // printf(" emitterVoxelVoxelCount %f\n", emitterVoxelVoxelCount); - // // printf(" receiverVoxelVoxelCount %f\n", receiverVoxelVoxelCount); - // // printf(" -> emitterReceiverTotalVoxelCount %f\n", emitterReceiverTotalVoxelCount); - // // printf(" emitterVoxelTotalSpeedSum %f\n", emitterVoxelTotalSpeedSum); - // // printf(" receiverVoxelTotalSpeedSum %f\n", receiverVoxelTotalSpeedSum); - // // printf(" -> averageSpeed %f\n", averageSpeed); - // //printf(" sampleTime %f\n", sampleTime); - // - // // printf(" emitterVoxelTotalAttenuationSum %f\n", emitterVoxelTotalAttenuationSum); - // // printf(" receiverVoxelTotalAttenuationSum %f\n", receiverVoxelTotalAttenuationSum); - // // printf(" emitterReceiverTotalVoxelCount %f\n", emitterReceiverTotalVoxelCount); - // // printf(" -> attenuationFactor %f\n\n", attenuationFactor); - // - // printf(" VoxelAscanIndex %f\n", VoxelAscanIndex); - // - // - // printf(" => voxelValue = %f\n", voxelValue); - // - // printf("======================================================\n"); - // //} - // } - // #endif - } - #endif - #ifdef useSameLoop #define tryOptimize_SOSATT //#define addTexturIndexZ_AscanIndexInLoop @@ -380,26 +177,16 @@ texture texTableAscanIndexF if ( TableAscanIndexAllocationCount == 1){ #ifdef tryOptimize_SOSATT // in the Case of 2 Textures normal case has faster Access on Data - //#ifndef debug_CudaSAFTAscanIndexKernel float2 currentSOSVoxel_AscanIndexAttValues; - //#endif float Offset_0 = (float)ascanIndexBatchOffset + 0.5f; - #ifdef addTexturIndexZ_AscanIndexInLoop - TexturIndexZ_AscanIndex = SosVoxelTextureZ - 2.0f; // Z-Index fuer Zugriff auf Textur interpoliert - #endif - //#pragma unroll 2 for(float ascanIndex_i = 0.0f; ascanIndex_i < maxAscanIndexArraysInTexture; ascanIndex_i+=1.0f) // bis zu 60.8GV/s - //for(float ascanIndex_i = 0.0f; ascanIndex_i < 1; ascanIndex_i+=1.0f) // bis zu 60.8GV/s { - #ifdef addTexturIndexZ_AscanIndexInLoop - TexturIndexZ_AscanIndex += 2.0f; // Z-Index fuer Zugriff auf Textur interpoliert - #else - TexturIndexZ_AscanIndex = 2.0f*ascanIndex_i + SosVoxelTextureZ; // Z-Index fuer Zugriff auf Textur interpoliert - #endif + TexturIndexZ_AscanIndex = 2.0f*ascanIndex_i + SosVoxelTextureZ; // Z-Index fuer Zugriff auf Textur interpoliert + currentSOSVoxel_AscanIndexAttValues = tex3D( texTableAscanIndexFloat2_0, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); @@ -408,81 +195,15 @@ texture texTableAscanIndexF #endif voxelValue += currentSOSVoxel_AscanIndexAttValues.y * tex2D( texRefAscans, currentSOSVoxel_AscanIndexAttValues.x - 0.5f, Offset_0 + ascanIndex_i); // i gibt Index fuer Ascan an - - //#ifdef debug_CudaSAFTAscanIndexKernelDataAccess -// if ((regionOfInterestVoxel.x == OutputVolumeX) && (regionOfInterestVoxel.y == OutputVolumeY) && (regionOfInterestVoxel.z == OutputVolumeZ))//&& (i == 30)) -// { -// printf("ascanIndex_i(%.0f) - 0 : VoxelAscanIndex(idx %30.24f, att %19.12f) , voxelValue = %9.4f\n", ascanIndex_i, currentSOSVoxel_AscanIndexAttValues.x,currentSOSVoxel_AscanIndexAttValues.y, voxelValue); -// //voxelValue_old = voxelValue; -// } - //#endif - } - #else - float2 currentSOSVoxel_AscanIndexAttValues; - - //#pragma unroll 3 - //for(float ascanIndex_i = 0.0f; ascanIndex_i < maxAscanIndexArraysInTexture; ascanIndex_i+=1.0f) // 1.Teil Ascans durchlaufen // GV/s //2: GV/s, 4: GV/s, 8: GV/s, 16: GV/s - for(float ascanIndex_i = 0.0f; ascanIndex_i < maxAscanIndexArraysInTexture; ascanIndex_i+=1.0f) // bis zu 60.6GV/s - { - - // nutze immer nur 1tes Surface - TexturIndexZ_AscanIndex = 2*ascanIndex_i + SosVoxelTextureZ; // Z-Index fuer Zugriff auf Textur interpoliert - currentSOSVoxel_AscanIndexAttValues = tex3D( texTableAscanIndexFloat2_0, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); - - - voxelValue += currentSOSVoxel_AscanIndexAttValues.y * tex2D( texRefAscans, currentSOSVoxel_AscanIndexAttValues.x - 0.5f, (float)(ascanIndexBatchOffset) + ascanIndex_i + 0.5f); // i gibt Index fuer Ascan an - - #ifdef debug_CudaSAFTAscanIndexKernelDataAccess - if ((regionOfInterestVoxel.x == OutputVolumeX) && (regionOfInterestVoxel.y == OutputVolumeY) && (regionOfInterestVoxel.z == OutputVolumeZ))//&& (i == 30)) - { - printf("ascanIndex_i(%.0f) - 0 : VoxelAscanIndex(idx %30.24f, att %19.12f) , voxelValue += %30.24f = %9.4f\n", ascanIndex_i, currentSOSVoxel_AscanIndexAttValues.x,currentSOSVoxel_AscanIndexAttValues.y, voxelValue-voxelValue_old, voxelValue); - voxelValue_old = voxelValue; - } - #endif - } #endif } else if (TableAscanIndexAllocationCount == 2){ - #ifndef tryOptimize_SOSATT // in the Case of 2 Textures normal case has faster Access on Data - - float2 currentSOSVoxel_AscanIndexAttValues_0; - float2 currentSOSVoxel_AscanIndexAttValues_1; - - float Offset_0 = (float)ascanIndexBatchOffset + 0.5f; - float Offset_1 = (float)ascanIndexBatchOffset + maxAscanIndexArraysInTexture + 0.5f; - - - #ifdef addTexturIndexZ_AscanIndexInLoop - TexturIndexZ_AscanIndex = SosVoxelTextureZ - 2.0f; // Z-Index fuer Zugriff auf Textur interpoliert - #endif - - //#pragma unroll 2 - for(float ascanIndex_i = 0.0f; ascanIndex_i < maxAscanIndexArraysInTexture; ascanIndex_i+=1.0f) // bis zu 60.8GV/s - { - - #ifdef addTexturIndexZ_AscanIndexInLoop - TexturIndexZ_AscanIndex += 2.0f; // Z-Index fuer Zugriff auf Textur interpoliert - #else - TexturIndexZ_AscanIndex = 2.0f*ascanIndex_i + SosVoxelTextureZ; // Z-Index fuer Zugriff auf Textur interpoliert - #endif - // load TOF-Index from Textur 0-3 - currentSOSVoxel_AscanIndexAttValues_0 = tex3D( texTableAscanIndexFloat0, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); - currentSOSVoxel_AscanIndexAttValues_1 = tex3D( texTableAscanIndexFloat1, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); - - #ifdef useSync - __syncthreads(); - #endif - - voxelValue += currentSOSVoxel_AscanIndexAttValues_0.y * tex2D( texRefAscans, currentSOSVoxel_AscanIndexAttValues_0.x - 0.5f, Offset_0 + ascanIndex_i); // i gibt Index fuer Ascan an - voxelValue += currentSOSVoxel_AscanIndexAttValues_1.y * tex2D( texRefAscans, currentSOSVoxel_AscanIndexAttValues_1.x - 0.5f, Offset_1 + ascanIndex_i); - } - #else + #ifdef tryOptimize_SOSATT // in the Case of 2 Textures normal case has faster Access on Data float2 currentSOSVoxel_AscanIndexAttValues; //#pragma unroll 3 - //for(float ascanIndex_i = 0.0f; ascanIndex_i < maxAscanIndexArraysInTexture; ascanIndex_i+=1.0f) // 1.Teil Ascans durchlaufen // GV/s //2: GV/s, 4: GV/s, 8: GV/s, 16: GV/s for(float ascanIndex_i = 0.0f; ascanIndex_i < maxAscanIndexArraysInTexture; ascanIndex_i+=1.0f) // bis zu 60.6GV/s { @@ -492,27 +213,9 @@ texture texTableAscanIndexF voxelValue += currentSOSVoxel_AscanIndexAttValues.y * tex2D( texRefAscans, currentSOSVoxel_AscanIndexAttValues.x - 0.5f, (float)(ascanIndexBatchOffset) + ascanIndex_i + 0.5f); // i gibt Index fuer Ascan an - - #ifdef debug_CudaSAFTAscanIndexKernelDataAccess - if ((regionOfInterestVoxel.x == OutputVolumeX) && (regionOfInterestVoxel.y == OutputVolumeY) && (regionOfInterestVoxel.z == OutputVolumeZ))//&& (i == 30)) - { - printf("ascanIndex_i(%.0f) - 0 : VoxelAscanIndex(idx %30.24f, att %19.12f) , voxelValue += %30.24f = %9.4f\n", ascanIndex_i, currentSOSVoxel_AscanIndexAttValues.x,currentSOSVoxel_AscanIndexAttValues.y, voxelValue-voxelValue_old, voxelValue); - voxelValue_old = voxelValue; - } - #endif - // 2ten Teil mit selben Index laden currentSOSVoxel_AscanIndexAttValues = tex3D( texTableAscanIndexFloat2_1, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); voxelValue += currentSOSVoxel_AscanIndexAttValues.y * tex2D( texRefAscans, currentSOSVoxel_AscanIndexAttValues.x - 0.5f, (float)(ascanIndexBatchOffset) + ascanIndex_i + maxAscanIndexArraysInTexture +0.5f); - - #ifdef debug_CudaSAFTAscanIndexKernelDataAccess - if ((regionOfInterestVoxel.x == OutputVolumeX) && (regionOfInterestVoxel.y == OutputVolumeY) && (regionOfInterestVoxel.z == OutputVolumeZ))//&& (i == 30)) - { - printf("ascanIndex_i(%.0f) - 1 : VoxelAscanIndex(idx %30.24f, att %19.12f) , voxelValue += %30.24f = %9.4f\n", ascanIndex_i, currentSOSVoxel_AscanIndexAttValues.x,currentSOSVoxel_AscanIndexAttValues.y, voxelValue-voxelValue_old, voxelValue); - voxelValue_old = voxelValue; - } - #endif - } #endif } @@ -528,20 +231,11 @@ texture texTableAscanIndexF float Offset_1 = (float)ascanIndexBatchOffset + maxAscanIndexArraysInTexture + 0.5f; float Offset_2 = (float)ascanIndexBatchOffset + 2.0f*maxAscanIndexArraysInTexture + 0.5f; - - #ifdef addTexturIndexZ_AscanIndexInLoop - TexturIndexZ_AscanIndex = SosVoxelTextureZ - 2.0f; // Z-Index fuer Zugriff auf Textur interpoliert - #endif - //#pragma unroll 2 for(float ascanIndex_i = 0.0f; ascanIndex_i < maxAscanIndexArraysInTexture; ascanIndex_i+=1.0f) // bis zu 60.8GV/s { - #ifdef addTexturIndexZ_AscanIndexInLoop - TexturIndexZ_AscanIndex += 2.0f; // Z-Index fuer Zugriff auf Textur interpoliert - #else - TexturIndexZ_AscanIndex = 2.0f*ascanIndex_i + SosVoxelTextureZ; // Z-Index fuer Zugriff auf Textur interpoliert - #endif + TexturIndexZ_AscanIndex = 2.0f*ascanIndex_i + SosVoxelTextureZ; // Z-Index fuer Zugriff auf Textur interpoliert // load TOF-Index from Textur 0-3 currentSOSVoxel_AscanIndexAttValues_0 = tex3D( texTableAscanIndexFloat2_0, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); currentSOSVoxel_AscanIndexAttValues_1 = tex3D( texTableAscanIndexFloat2_1, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); @@ -555,57 +249,6 @@ texture texTableAscanIndexF voxelValue += currentSOSVoxel_AscanIndexAttValues_1.y * tex2D( texRefAscans, currentSOSVoxel_AscanIndexAttValues_1.x - 0.5f, Offset_1 + ascanIndex_i); voxelValue += currentSOSVoxel_AscanIndexAttValues_2.y * tex2D( texRefAscans, currentSOSVoxel_AscanIndexAttValues_2.x - 0.5f, Offset_2 + ascanIndex_i); } - #else - - - float2 currentSOSVoxel_AscanIndexAttValues; - - //#pragma unroll 3 - //for(float ascanIndex_i = 0.0f; ascanIndex_i < maxAscanIndexArraysInTexture; ascanIndex_i+=1.0f) // 1.Teil Ascans durchlaufen // GV/s //2: GV/s, 4: GV/s, 8: GV/s, 16: GV/s - for(float ascanIndex_i = 0.0f; ascanIndex_i < maxAscanIndexArraysInTexture; ascanIndex_i+=1.0f) // bis zu 60.6GV/s - { - - - // 1ten Teil mit selben Index laden - TexturIndexZ_AscanIndex = 2*ascanIndex_i + SosVoxelTextureZ; // Z-Index fuer Zugriff auf Textur interpoliert - currentSOSVoxel_AscanIndexAttValues = tex3D( texTableAscanIndexFloat0, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); - - voxelValue += currentSOSVoxel_AscanIndexAttValues.y * tex2D( texRefAscans, currentSOSVoxel_AscanIndexAttValues.x - 0.5f, (float)(ascanIndexBatchOffset) + ascanIndex_i + 0.5f); // i gibt Index fuer Ascan an - - - #ifdef debug_CudaSAFTAscanIndexKernelDataAccess - if ((regionOfInterestVoxel.x == OutputVolumeX) && (regionOfInterestVoxel.y == OutputVolumeY) && (regionOfInterestVoxel.z == OutputVolumeZ))//&& (i == 30)) - { - printf("ascanIndex_i(%.0f) - 0 : VoxelAscanIndex(idx %30.24f, att %19.12f) , voxelValue += %30.24f = %9.4f\n", ascanIndex_i, currentSOSVoxel_AscanIndexAttValues.x,currentSOSVoxel_AscanIndexAttValues.y, voxelValue-voxelValue_old, voxelValue); - voxelValue_old = voxelValue; - } - #endif - - // 2ten Teil mit selben Index laden - currentSOSVoxel_AscanIndexAttValues = tex3D( texTableAscanIndexFloat1, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); - voxelValue += currentSOSVoxel_AscanIndexAttValues.y * tex2D( texRefAscans, currentSOSVoxel_AscanIndexAttValues.x - 0.5f, (float)(ascanIndexBatchOffset) + ascanIndex_i + maxAscanIndexArraysInTexture +0.5f); - - #ifdef debug_CudaSAFTAscanIndexKernelDataAccess - if ((regionOfInterestVoxel.x == OutputVolumeX) && (regionOfInterestVoxel.y == OutputVolumeY) && (regionOfInterestVoxel.z == OutputVolumeZ))//&& (i == 30)) - { - printf("ascanIndex_i(%.0f) - 1 : VoxelAscanIndex(idx %30.24f, att %19.12f) , voxelValue += %30.24f = %9.4f\n", ascanIndex_i, currentSOSVoxel_AscanIndexAttValues.x,currentSOSVoxel_AscanIndexAttValues.y, voxelValue-voxelValue_old, voxelValue); - voxelValue_old = voxelValue; - } - #endif - - - // 3ten Teil mit selben Index laden - currentSOSVoxel_AscanIndexAttValues = tex3D( texTableAscanIndexFloat2, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); - voxelValue += currentSOSVoxel_AscanIndexAttValues.y * tex2D( texRefAscans, currentSOSVoxel_AscanIndexAttValues.x - 0.5f, (float)(ascanIndexBatchOffset) + ascanIndex_i + 2*maxAscanIndexArraysInTexture +0.5f); - - #ifdef debug_CudaSAFTAscanIndexKernelDataAccess - if ((regionOfInterestVoxel.x == OutputVolumeX) && (regionOfInterestVoxel.y == OutputVolumeY) && (regionOfInterestVoxel.z == OutputVolumeZ))//&& (i == 30)) - { - printf("ascanIndex_i(%.0f) - 2 : VoxelAscanIndex(idx %30.24f, att %19.12f) , voxelValue += %30.24f = %9.4f\n", ascanIndex_i, currentSOSVoxel_AscanIndexAttValues.x,currentSOSVoxel_AscanIndexAttValues.y, voxelValue-voxelValue_old, voxelValue); - voxelValue_old = voxelValue; - } - #endif - } #endif } @@ -624,19 +267,10 @@ texture texTableAscanIndexF float Offset_2 = (float)ascanIndexBatchOffset + 2.0f*maxAscanIndexArraysInTexture + 0.5f; float Offset_3 = (float)ascanIndexBatchOffset + 3.0f*maxAscanIndexArraysInTexture + 0.5f; - #ifdef addTexturIndexZ_AscanIndexInLoop - TexturIndexZ_AscanIndex = SosVoxelTextureZ - 2.0f; // Z-Index fuer Zugriff auf Textur interpoliert - #endif - //#pragma unroll 2 for(float ascanIndex_i = 0.0f; ascanIndex_i < maxAscanIndexArraysInTexture; ascanIndex_i+=1.0f) // bis zu 60.8GV/s { - - #ifdef addTexturIndexZ_AscanIndexInLoop - TexturIndexZ_AscanIndex += 2.0f; // Z-Index fuer Zugriff auf Textur interpoliert - #else - TexturIndexZ_AscanIndex = 2.0f*ascanIndex_i + SosVoxelTextureZ; // Z-Index fuer Zugriff auf Textur interpoliert - #endif + TexturIndexZ_AscanIndex = 2.0f*ascanIndex_i + SosVoxelTextureZ; // Z-Index fuer Zugriff auf Textur interpoliert //syncthreads(); // load TOF-Index from Textur 0-3 currentSOSVoxel_AscanIndexAttValues_0 = tex3D( texTableAscanIndexFloat2_0, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); @@ -653,497 +287,17 @@ texture texTableAscanIndexF voxelValue += currentSOSVoxel_AscanIndexAttValues_2.y * tex2D( texRefAscans, currentSOSVoxel_AscanIndexAttValues_2.x - 0.5f, Offset_2 + ascanIndex_i); voxelValue += currentSOSVoxel_AscanIndexAttValues_3.y * tex2D( texRefAscans, currentSOSVoxel_AscanIndexAttValues_3.x - 0.5f, Offset_3 + ascanIndex_i); } - #else - float2 currentSOSVoxel_AscanIndexAttValues; + - //#pragma unroll 3 - //for(float ascanIndex_i = 0.0f; ascanIndex_i < maxAscanIndexArraysInTexture; ascanIndex_i+=1.0f) // 1.Teil Ascans durchlaufen // GV/s //2: GV/s, 4: GV/s, 8: GV/s, 16: GV/s - for(float ascanIndex_i = 0.0f; ascanIndex_i < maxAscanIndexArraysInTexture; ascanIndex_i+=1.0f) // bis zu 60.6GV/s - { - - - // 1ten Teil mit selben Index laden - TexturIndexZ_AscanIndex = 2*ascanIndex_i + SosVoxelTextureZ; // Z-Index fuer Zugriff auf Textur interpoliert - currentSOSVoxel_AscanIndexAttValues = tex3D( texTableAscanIndexFloat0, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); - - voxelValue += currentSOSVoxel_AscanIndexAttValues.y * tex2D( texRefAscans, currentSOSVoxel_AscanIndexAttValues.x - 0.5f, (float)(ascanIndexBatchOffset) + ascanIndex_i + 0.5f); // i gibt Index fuer Ascan an - - - #ifdef debug_CudaSAFTAscanIndexKernelDataAccess - if ((regionOfInterestVoxel.x == OutputVolumeX) && (regionOfInterestVoxel.y == OutputVolumeY) && (regionOfInterestVoxel.z == OutputVolumeZ))//&& (i == 30)) - { - printf("ascanIndex_i(%.0f) - 0 : VoxelAscanIndex(idx %30.24f, att %19.12f) , voxelValue += %30.24f = %9.4f\n", ascanIndex_i, currentSOSVoxel_AscanIndexAttValues.x,currentSOSVoxel_AscanIndexAttValues.y, voxelValue-voxelValue_old, voxelValue); - voxelValue_old = voxelValue; - } - #endif - - - // 2ten Teil mit selben Index laden - currentSOSVoxel_AscanIndexAttValues = tex3D( texTableAscanIndexFloat1, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); - voxelValue += currentSOSVoxel_AscanIndexAttValues.y * tex2D( texRefAscans, currentSOSVoxel_AscanIndexAttValues.x - 0.5f, (float)(ascanIndexBatchOffset) + ascanIndex_i + maxAscanIndexArraysInTexture +0.5f); - - #ifdef debug_CudaSAFTAscanIndexKernelDataAccess - if ((regionOfInterestVoxel.x == OutputVolumeX) && (regionOfInterestVoxel.y == OutputVolumeY) && (regionOfInterestVoxel.z == OutputVolumeZ))//&& (i == 30)) - { - printf("ascanIndex_i(%.0f) - 1 : VoxelAscanIndex(idx %30.24f, att %19.12f) , voxelValue += %30.24f = %9.4f\n", ascanIndex_i, currentSOSVoxel_AscanIndexAttValues.x,currentSOSVoxel_AscanIndexAttValues.y, voxelValue-voxelValue_old, voxelValue); - voxelValue_old = voxelValue; - } - #endif - - - // 3ten Teil mit selben Index laden - currentSOSVoxel_AscanIndexAttValues = tex3D( texTableAscanIndexFloat2, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); - voxelValue += currentSOSVoxel_AscanIndexAttValues.y * tex2D( texRefAscans, currentSOSVoxel_AscanIndexAttValues.x - 0.5f, (float)(ascanIndexBatchOffset) + ascanIndex_i + 2*maxAscanIndexArraysInTexture +0.5f); - - #ifdef debug_CudaSAFTAscanIndexKernelDataAccess - if ((regionOfInterestVoxel.x == OutputVolumeX) && (regionOfInterestVoxel.y == OutputVolumeY) && (regionOfInterestVoxel.z == OutputVolumeZ))//&& (i == 30)) - { - printf("ascanIndex_i(%.0f) - 2 : VoxelAscanIndex(idx %30.24f, att %19.12f) , voxelValue += %30.24f = %9.4f\n", ascanIndex_i, currentSOSVoxel_AscanIndexAttValues.x,currentSOSVoxel_AscanIndexAttValues.y, voxelValue-voxelValue_old, voxelValue); - voxelValue_old = voxelValue; - } - #endif - - - // 4ten Teil mit selben Index laden - currentSOSVoxel_AscanIndexAttValues = tex3D( texTableAscanIndexFloat3, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); - voxelValue += currentSOSVoxel_AscanIndexAttValues.y * tex2D( texRefAscans, currentSOSVoxel_AscanIndexAttValues.x - 0.5f, (float)(ascanIndexBatchOffset) + ascanIndex_i + 3*maxAscanIndexArraysInTexture +0.5f); - - #ifdef debug_CudaSAFTAscanIndexKernelDataAccess - if ((regionOfInterestVoxel.x == OutputVolumeX) && (regionOfInterestVoxel.y == OutputVolumeY) && (regionOfInterestVoxel.z == OutputVolumeZ))//&& (i == 30)) - { - printf("ascanIndex_i(%.0f) - 3 : VoxelAscanIndex(idx %30.24f, att %19.12f) , voxelValue += %30.24f = %9.4f\n", ascanIndex_i, currentSOSVoxel_AscanIndexAttValues.x,currentSOSVoxel_AscanIndexAttValues.y, voxelValue-voxelValue_old, voxelValue); - voxelValue_old = voxelValue; - } - #endif - //VoxelAscanIndex = tex3D( texTableAscanIndexFloat3, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); } - - // Für Version mit SOS und ATT - //currentSOSVoxel_AscanIndexAttValues = tex3D( texTableAscanIndexFloat3, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); - //voxelValue += currentSOSVoxel_AscanIndexAttValues.y * tex2D( texRefAscans, currentSOSVoxel_AscanIndexAttValues.x - 0.5f, ascanIndex_i + 0.5f); // i gibt Index fuer Ascan an - - } - #endif + #endif } -// else -// { -// // Do nothing due to until now only 4 are defined -// } #endif - #ifdef useWhileLoop -//#define varianteWhile0 // Hart-Codierte Aufteilung //etwas schneller -#define varianteWhile1 // mit Variabler Aufteilung -//#define varianteWhile3 // Hart-Codierte Aufteilung mit ?-Abfrage // TITAN (M/K) 32.8 / 32.7 - - #ifdef varianteWhile0 - float ascanIndex_i = 0.0; - float compareIndexNextTexture = maxAscanIndexArraysInTexture; // Index ab dem die naechste Textur genutzt werden soll - float IndexTextureOffset = 0.0f; // Offset des AscanIndex fuer TexturIndexZ_AscanIndex - TexturIndexZ_AscanIndex = ascanIndex_i + ascanIndex_i + SosVoxelTextureZ; - do{ - - if ( ascanIndex_i >= compareIndexNextTexture){ - compareIndexNextTexture += maxAscanIndexArraysInTexture; - IndexTextureOffset += maxAscanIndexArraysInTexture; - } - - // Calculate the Z-Index for storing the AscanIndex value - //TexturGeometryIndexZ = maxFeasibleSosZLayerCount * ((ascanIndex_i) % maxAscanIndexArraysInTexture) + i_z; - TexturIndexZ_AscanIndex = 2 *(ascanIndex_i - IndexTextureOffset) + SosVoxelTextureZ; - - if ( compareIndexNextTexture <= 2048){ - if (compareIndexNextTexture == 1024){ - VoxelAscanIndex = tex3D( texTableAscanIndexFloat0, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); - } - else { - VoxelAscanIndex = tex3D( texTableAscanIndexFloat1, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); - } - } - else{ - if (compareIndexNextTexture == 3072){ - VoxelAscanIndex = tex3D( texTableAscanIndexFloat2, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); - } - else { - VoxelAscanIndex = tex3D( texTableAscanIndexFloat3, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); - } - } - -// if ( compareIndexNextTexture == 1024){ -// VoxelAscanIndex = tex3D( texTableAscanIndexFloat2_0, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); } -// else if ( compareIndexNextTexture == 2048) { -// VoxelAscanIndex = tex3D( texTableAscanIndexFloat2_1, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); } -// else if ( compareIndexNextTexture == 3072){ -// VoxelAscanIndex = tex3D( texTableAscanIndexFloat2_2, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); } -// else if ( compareIndexNextTexture == 4096){ -// VoxelAscanIndex = tex3D( texTableAscanIndexFloat2_3, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); } - -// switch ( (int)(compareIndexNextTexture) ) -// { -// case 1024: VoxelAscanIndex = tex3D( texTableAscanIndexFloat2_0, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); break; -// case 2048: VoxelAscanIndex = tex3D( texTableAscanIndexFloat2_1, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); break; -// case 3072: VoxelAscanIndex = tex3D( texTableAscanIndexFloat2_2, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); break; -// case 4096: VoxelAscanIndex = tex3D( texTableAscanIndexFloat2_3, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); break; -// } - - - voxelValue += tex2D( texRefAscans, VoxelAscanIndex - 0.5f, ascanIndex_i + 0.5f); // i gibt Index fuer Ascan an - -// if ((regionOfInterestVoxel.x == OutputVolumeX) && (regionOfInterestVoxel.y == OutputVolumeY) && (regionOfInterestVoxel.z == OutputVolumeZ)){ -// -// if ((ascanIndex_i <= 10)) // Anfang -// //if ((ascanIndex_i >= 1020) && (ascanIndex_i <= 1030)) // Mitte -// //if ((ascanIndex_i >= 1400)) // Ende -// //if ((ascanIndex_i >= 2800)) // Ende -// printf("#### ascanIndex_i = %.0f , compIdxNextText = %.0f; IndexTextOffset = %.0f; TexturIndexZ_AscanIndex = %.0f, VoxelAscanIndex=%f = %i\n", ascanIndex_i, compareIndexNextTexture, IndexTextureOffset, TexturIndexZ_AscanIndex, VoxelAscanIndex, ( compareIndexNextTexture == 3*maxAscanIndexArraysInTexture)); -// -// } - - ascanIndex_i += 1.0f; - - - // if (ascanIndex_i == aScanWindowSize) break; //20,7 GV/s - //}while(1); - }while(ascanIndex_i < aScanWindowSize); //20,7 GV/s - //}while(ascanIndex_i < 3900.0f); //14,5 GV/s - #endif - - #ifdef varianteWhile1 - float ascanIndex_i = 0.0; - float compareIndexNextTexture = maxAscanIndexArraysInTexture; // Index ab dem die naechste Textur genutzt werden soll - float IndexTextureOffset = 0.0f; // Offset des AscanIndex fuer TexturIndexZ_AscanIndex - // TexturIndexZ_AscanIndex = ascanIndex_i + ascanIndex_i + SosVoxelTextureZ; - - do{ - - if ( ascanIndex_i >= compareIndexNextTexture){ - compareIndexNextTexture += maxAscanIndexArraysInTexture; - IndexTextureOffset += maxAscanIndexArraysInTexture; - } - - // Calculate the Z-Index for storing the AscanIndex value - //TexturGeometryIndexZ = maxFeasibleSosZLayerCount * ((ascanIndex_i) % maxAscanIndexArraysInTexture) + i_z; - TexturIndexZ_AscanIndex = 2.0f *(ascanIndex_i - IndexTextureOffset) + SosVoxelTextureZ; - - if ( compareIndexNextTexture == maxAscanIndexArraysInTexture){ - //VoxelAscanIndex = tex3D( texTableAscanIndexFloat0, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); } - currentSOSVoxel_AscanIndexAttValues = tex3D( texTableAscanIndexFloat0, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); } - else if ( compareIndexNextTexture == 2*maxAscanIndexArraysInTexture) { - //VoxelAscanIndex = tex3D( texTableAscanIndexFloat1, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); } - currentSOSVoxel_AscanIndexAttValues = tex3D( texTableAscanIndexFloat1, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); } - else if ( compareIndexNextTexture == 3*maxAscanIndexArraysInTexture){ - //VoxelAscanIndex = tex3D( texTableAscanIndexFloat2, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); } - currentSOSVoxel_AscanIndexAttValues = tex3D( texTableAscanIndexFloat2, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); } - else if ( compareIndexNextTexture == 4*maxAscanIndexArraysInTexture){ - //VoxelAscanIndex = tex3D( texTableAscanIndexFloat3, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); } - currentSOSVoxel_AscanIndexAttValues = tex3D( texTableAscanIndexFloat3, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); } - - //currentSOSVoxel_AscanIndexAttValues.x = currentAscanIndex; - //currentSOSVoxel_AscanIndexAttValues.y = totalAttenuation_multFactor; - #define Interpolation_Textur_Standard - //#define Interpolation_Cosinus - //#define Interpolation_Spline - - #ifdef Interpolation_Textur_Standard - voxelValue += currentSOSVoxel_AscanIndexAttValues.y * tex2D( texRefAscans, currentSOSVoxel_AscanIndexAttValues.x - 0.5f, ascanIndex_i + 0.5f); // i gibt Index fuer Ascan an - #endif - - #ifdef Interpolation_Cosinus - float y1 = tex2D( texRefAscans, currentSOSVoxel_AscanIndexAttValues.x + 0.5f, ascanIndex_i + 0.5f); - float y2 = tex2D( texRefAscans, currentSOSVoxel_AscanIndexAttValues.x + 1.5f, ascanIndex_i + 0.5f); - float mu = currentSOSVoxel_AscanIndexAttValues.x-floor(currentSOSVoxel_AscanIndexAttValues.x); - float mu2 = (1-cos(mu*3.14159265359f))/2; - voxelValue += currentSOSVoxel_AscanIndexAttValues.y * (y1*(1-mu2)+y2*mu2); - #endif - - #ifdef Interpolation_Spline - float y0 = tex2D( texRefAscans, currentSOSVoxel_AscanIndexAttValues.x - 0.5f, ascanIndex_i + 0.5f); - float y1 = tex2D( texRefAscans, currentSOSVoxel_AscanIndexAttValues.x + 0.5f, ascanIndex_i + 0.5f); - float y2 = tex2D( texRefAscans, currentSOSVoxel_AscanIndexAttValues.x + 1.5f, ascanIndex_i + 0.5f); - float y3 = tex2D( texRefAscans, currentSOSVoxel_AscanIndexAttValues.x + 2.5f, ascanIndex_i + 0.5f); - float mu = currentSOSVoxel_AscanIndexAttValues.x-floor(currentSOSVoxel_AscanIndexAttValues.x); - float mu2 = mu*mu; - float a0 = y3 - y2 - y0 + y1; - float a1 = y0 - y1 - a0; - float a2 = y2 - y0; - float a3 = y1; - voxelValue += currentSOSVoxel_AscanIndexAttValues.y * (a0*mu*mu2+a1*mu2+a2*mu+a3); - #endif - - #ifdef debug_CudaSAFTAscanIndexKernelDataAccess - if ((regionOfInterestVoxel.x == OutputVolumeX) && (regionOfInterestVoxel.y == OutputVolumeY) && (regionOfInterestVoxel.z == OutputVolumeZ))//&& (i == 30)) - { - printf("ascanIndex_i(%.0f) - %.0f: VoxelAscanIndex(idx %30.24f, att %19.12f) , voxelValue += %30.24f = %9.4f\n", ascanIndex_i, floor(compareIndexNextTexture/maxAscanIndexArraysInTexture)-1.0, currentSOSVoxel_AscanIndexAttValues.x,currentSOSVoxel_AscanIndexAttValues.y, voxelValue-voxelValue_old, voxelValue); - voxelValue_old = voxelValue; - } - #endif - - - -// if ((regionOfInterestVoxel.x == OutputVolumeX) && (regionOfInterestVoxel.y == OutputVolumeY) && (regionOfInterestVoxel.z == OutputVolumeZ)){ -// -// if ((ascanIndex_i <= 10)) // Anfang -// //if ((ascanIndex_i >= 1020) && (ascanIndex_i <= 1030)) // Mitte -// //if ((ascanIndex_i >= 1400)) // Ende -// //if ((ascanIndex_i >= 2800)) // Ende -// printf("#### ascanIndex_i = %.0f , compIdxNextText = %.0f; IndexTextOffset = %.0f; TexturIndexZ_AscanIndex = %.0f, VoxelAscanIndex=%f = %i\n", ascanIndex_i, compareIndexNextTexture, IndexTextureOffset, TexturIndexZ_AscanIndex, VoxelAscanIndex, ( compareIndexNextTexture == 3*maxAscanIndexArraysInTexture)); -// -// } - - ascanIndex_i += 1.0f; - }while(ascanIndex_i < aScanWindowSize); - #endif - - - #ifdef varianteWhile3 - float ascanIndex_i = 0.0; - float compareIndexNextTexture = maxAscanIndexArraysInTexture; // Index ab dem die naechste Textur genutzt werden soll - float IndexTextureOffset = 0.0f; // Offset des AscanIndex fuer TexturIndexZ_AscanIndex - TexturIndexZ_AscanIndex = (ascanIndex_i + ascanIndex_i) + SosVoxelTextureZ; - do{ - - if ( ascanIndex_i >= compareIndexNextTexture){ - compareIndexNextTexture += maxAscanIndexArraysInTexture; - IndexTextureOffset += maxAscanIndexArraysInTexture; - TexturIndexZ_AscanIndex -= (maxAscanIndexArraysInTexture+maxAscanIndexArraysInTexture); - } - - // Calculate the Z-Index for storing the AscanIndex value - //TexturIndexZ_AscanIndex = 2 * (ascanIndex_i - IndexTextureOffset) + SosVoxelTextureZ; - //TexturIndexZ_AscanIndex = ascanIndex_i + ascanIndex_i + SosVoxelTextureZ - (IndexTextureOffset + IndexTextureOffset) ; - TexturIndexZ_AscanIndex += 2.0f; - - if ( compareIndexNextTexture <= 2*maxAscanIndexArraysInTexture){ - if (compareIndexNextTexture == maxAscanIndexArraysInTexture){ - VoxelAscanIndex = tex3D( texTableAscanIndexFloat0, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); - } - else { - VoxelAscanIndex = tex3D( texTableAscanIndexFloat1, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); - } - } - else{ - if (compareIndexNextTexture == 3*maxAscanIndexArraysInTexture){ - VoxelAscanIndex = tex3D( texTableAscanIndexFloat2, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); - } - else { - VoxelAscanIndex = tex3D( texTableAscanIndexFloat3, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); - } - } - - voxelValue += tex2D( texRefAscans, VoxelAscanIndex - 0.5f, ascanIndex_i + 0.5f); // i gibt Index fuer Ascan an - - ascanIndex_i += 1.0f; - - }while(ascanIndex_i < aScanWindowSize); //32,1 GV/s - #endif - - - #endif - -// #ifdef GTX_Kepler -// __syncthreads(); -// #endif - #ifdef debug_CudaSAFTAscanIndexKernel - //printf(" >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> debugModeParameter %f\n", debugModeParameter); - #ifdef debug_CudaSAFTKernel - if ((regionOfInterestVoxel.x == OutputVolumeX) && (regionOfInterestVoxel.y == OutputVolumeY) && (regionOfInterestVoxel.z == OutputVolumeZ)){ - - printf("#### debugMode = %f \n", debugMode); - printf("#### sumDiffAverageSpeed = %f \n", sumDiffAverageSpeed); - printf("#### DiffAverageSpeed = %f \n", DiffAverageSpeed); - printf("#### analyticAverageSpeed = %f \n", analyticAverageSpeed); - printf("#### averageSpeed = %f \n", averageSpeed); - - printf("#### S_Wasser = %f \n", S_Wasser); - printf("#### S_Kugel = %f \n", S_Kugel); - printf("#### cWasser = %f \n", cWasser); - printf("#### cKugel = %f \n", cKugel); - } - #endif - //switch ( debugMode >= 60 ? (debugMode-60) : debugMode >= 30 ? (debugMode-30) : debugMode ) - switch ( (int)(debugMode) ) - { - case 0: output[memoryIndex] = (double)voxelValue; break; - //case 0: output[memoryIndex] = (double)1.5; break; - //case 1: output[memoryIndex] = (double)averageSpeed; break; - //case 2: output[memoryIndex] = (double)totalDistance; break; - //case 3: output[memoryIndex] = (double)sampleTime; break; - //case 4: output[memoryIndex] = (double)attenuationFactor; break; - - case 1: output[memoryIndex] = (double)blockIndex; break; - case 2: output[memoryIndex] = (double)memoryIndex; break; - - case 3: output[memoryIndex] = (double)regionOfInterestVoxel.x; break; // Coordinates in Voxel - case 4: output[memoryIndex] = (double)regionOfInterestVoxel.y; break; - case 5: output[memoryIndex] = (double)regionOfInterestVoxel.z; break; - - case 6: output[memoryIndex] = (double)debugMode; break; - - //case 7: output[memoryIndex] = (double)SosVoxelStartPosition.x; break; // VoxelAscanIndex - //case 7: output[memoryIndex] = (double)TexturIndexX; break; // VoxelAscanIndex - //case 7: output[memoryIndex] = (double)TexturIndexY; break; // VoxelAscanIndex - //case 7: output[memoryIndex] = (double)TexturIndexZ_AscanIndex; break; // VoxelAscanIndex - - //case 7: output[memoryIndex] = (double)currentSOSVoxel_AscanIndexAttValues.x; break; // VoxelAscanIndex - //case 8: output[memoryIndex] = (double)currentSOSVoxel_AscanIndexAttValues.y; break; // VoxelAttenuation - - - - //case 9: output[memoryIndex] = (double)voxelPosition.x; break; // Coordinates in m - //case 10: output[memoryIndex] = (double)voxelPosition.y; break; - //case 11: output[memoryIndex] = (double)voxelPosition.z; break; - - //case 12: output[memoryIndex] = (double)emitterPosition.x; break; - //case 13: output[memoryIndex] = (double)emitterPosition.y; break; - //case 14: output[memoryIndex] = (double)emitterPosition.z; break; - //case 15: output[memoryIndex] = (double)receiverPosition.x; break; - //case 16: output[memoryIndex] = (double)receiverPosition.y; break; - //case 17: output[memoryIndex] = (double)receiverPosition.z; break; - //case 18: output[memoryIndex] = (double)emitterDistance; break; - //case 19: output[memoryIndex] = (double)receiverDistance; break; - - //case 20: output[memoryIndex] = (double)SosVoxel.x; break; // Coordinates in SOS-Voxel - //case 21: output[memoryIndex] = (double)SosVoxel.y; break; - //case 22: output[memoryIndex] = (double)SosVoxel.z; break; - - case 15: output[memoryIndex] = (double)SosVoxelf.x; break;// Coordinates in SOS-Voxel (float) - case 16: output[memoryIndex] = (double)SosVoxelf.y; break; - case 17: output[memoryIndex] = (double)SosVoxelf.z; break; - -// #ifdef noGeometryLoading -// case 26: output[memoryIndex] = (double)currentAScanIndex; break; -// //case 27: output[memoryIndex] = (double)currentEmitterIndex; break; -// case 28: output[memoryIndex] = (double)currentReceiverIndex; break; -// #endif - - - //case 29: output[memoryIndex] = (double)emitterVoxelTotalSpeedSum; break; - //case 30: output[memoryIndex] = (double)emitterVoxelVoxelCount; break; - //case 31: output[memoryIndex] = (double)receiverVoxelTotalSpeedSum; break; - //case 32: output[memoryIndex] = (double)receiverVoxelVoxelCount; break; - - //case 33: output[memoryIndex] = (double)emitterReceiverTotalVoxelCount; break; - //case 34: output[memoryIndex] = (double)emitterVoxelTotalAttenuationSum; break; // Attenuation on Emitter Path - //case 35: output[memoryIndex] = (double)receiverVoxelTotalAttenuationSum;break; // on Receiver Path - - - case 36: output[memoryIndex] = (double)SosVoxelTextureZ; break; - //case 37: output[memoryIndex] = (double)TexturIndexZEmitter; break; - case 38: output[memoryIndex] = (double)TexturIndexZ_AscanIndex; break; - case 39: output[memoryIndex] = (double)speedOfSoundZLayer; break; - - //case 40: output[memoryIndex] = (double)sumAverageSpeed; break; - - case 41: output[memoryIndex] = (double)TexturIndexX; break; - case 42: output[memoryIndex] = (double)TexturIndexY; break; - case 43: output[memoryIndex] = (double)TexturIndexZ_AscanIndex; break; - //case 44: output[memoryIndex] = (double)VoxelAscanIndex; break; - - - #ifdef debug_CudaSAFTKernel_EnableAnalyticAverageSpeedCalculation - case 25: output[memoryIndex] = (double)DiffAverageSpeed; break; // Fehler zwischen analytisch und bresenham (nur 1-Ascan) - case 26: output[memoryIndex] = (double)sumDiffAverageSpeed; break; // Summe Mittlerer Fehler über alle A-Scans - case 27: output[memoryIndex] = (double)stdDiffAverageSpeed; break; // Standartabweichung über alle A-Scans // Dafür brauche ich mittelwert --> Matlab - case 28: output[memoryIndex] = (double)sumDiffAnalyticWithoutSpeed; break; - case 29: output[memoryIndex] = (double)sumAnalyticAverageSpeed; break; - #endif - - default: output[memoryIndex] = (double)0; break; - } - #else __syncthreads(); output[memoryIndex] = (double)voxelValue; - #endif - - #ifdef debug_CudaSAFTKernel - if (debugMode != 0) - { - if ((regionOfInterestVoxel.x == OutputVolumeX) && (regionOfInterestVoxel.y == OutputVolumeY) && (regionOfInterestVoxel.z == OutputVolumeZ)) - { - //printf(" SoSVoxel[%3i %3i %3i] speedOfSoundZLayer(%3i) - regionOfInterestVoxel [%3i %3i %3i]\n", SosVoxel.x, SosVoxel.y, SosVoxel.z, speedOfSoundZLayer, regionOfInterestVoxel.x, regionOfInterestVoxel.y, regionOfInterestVoxel.z); - //if ((SosVoxel.x == DebugSoSVoxelX) && (SosVoxel.y == DebugSoSVoxelY) && (SosVoxel.z == DebugSoSVoxelZ)){ - printf("\n saftKernel: debugMode [%f]\n", debugMode); - //printf(" => voxelValue = %f\n", voxelValue); - //printf(" SoSVoxel[%+3.2f %+3.2f %+3.2f] speedOfSoundZLayer(%3i) - regionOfInterestVoxel [%3i %3i %3i]\n", SosVoxelf.x, SosVoxelf.y, SosVoxelf.z, speedOfSoundZLayer, regionOfInterestVoxel.x, regionOfInterestVoxel.y, regionOfInterestVoxel.z); - printf(" SoSVoxel[%3i %3i %3i] speedOfSoundZLayer(%3i) - regionOfInterestVoxel [%3i %3i %3i]\n", SosVoxel.x, SosVoxel.y, SosVoxel.z, speedOfSoundZLayer, regionOfInterestVoxel.x, regionOfInterestVoxel.y, regionOfInterestVoxel.z); - printf(" (current)speedOfSoundZLayer %d\n", speedOfSoundZLayer); - printf(" maxFeasibleSosZLayerCount [%i]\n", maxFeasibleSosZLayerCount); - - printf(" SosVoxelf [%3.15f %3.15f %3.15f]\n", SosVoxelf.x, SosVoxelf.y, SosVoxelf.z); - - // SosVoxelTextureZ = (SosVoxelf.z - speedOfSoundZLayer); - printf(" SosVoxelTextureZ [%3.15f]\n", SosVoxelTextureZ); - - // TexturIndexZEmitter = maxFeasibleSosZLayerCount * (currentEmitterIndex-1) + SosVoxelTextureZ; // Index für Zugriff auf Textur - // TexturIndexZReceiver = maxFeasibleSosZLayerCount * ((currentReceiverIndex-1) % maxSoSReceiverArrayForTexture) + SosVoxelTextureZ; // Index für Zugriff auf Textur - printf(" TexturIndexXYZ Em/Rec [%3.15f %3.15f %3.15f/%3.15f]\n\n", TexturIndexX, TexturIndexY, TexturIndexZEmitter, TexturIndexZReceiver); - - - switch (deviceSAFT_VARIANT[SAFT_VARIANT_3DVolumeInterpolationAtReconstruction]) - //switch ( debugMode >= 60 ? 60 : debugMode >= 30 ? 30 : 0 ) // drei Bereiche - { - case 0: // mit Textur - //TexturIndexZReceiver = maxFeasibleSosZLayerCount * ((currentReceiverIndex-1) % maxSoSReceiverArrayForTexture) + SosVoxelTextureZ; // Index für Zugriff auf Textur - printf(" TexturIndexZReceiver = maxFeasibleSosZLayerCount(%i) * ((currentReceiverIndex-1 (%i)) %% maxSoSReceiverArrayForTexture(%i)) + SosVoxelTextureZ(%3.15f) = (%3.15f)\n\n", maxFeasibleSosZLayerCount, currentReceiverIndex-1, maxSoSReceiverArrayForTexture, SosVoxelTextureZ, TexturIndexZReceiver); - break; - case 1: // mit Textur interpoliert - //TexturIndexZReceiver = maxFeasibleSosZLayerCount * ((currentReceiverIndex-1) % maxSoSReceiverArrayForTexture) + SosVoxelTextureZ + 0.5f; // Index für Zugriff auf Textur - printf(" TexturIndexZReceiver = maxFeasibleSosZLayerCount(%i) * ((currentReceiverIndex-1 (%i)) %% maxSoSReceiverArrayForTexture(%i)) + SosVoxelTextureZ(%3.15f) + 0.5f = (%3.15f)\n\n", maxFeasibleSosZLayerCount, currentReceiverIndex-1, maxSoSReceiverArrayForTexture, SosVoxelTextureZ, TexturIndexZReceiver); - break; - } - - // // Speicher in Texturformat - // // Indexberechnung für Einsatz des Texturmemorys - // float xmax = SOSGrid_XYZ.x; - // float ymax = SOSGrid_XYZ.y; - // float zmax = (float)maxFeasibleSosZLayerCount; - // float i_x = SosVoxel.x; - // float i_y = SosVoxel.y; - // float i_z = (float)(int)(SosVoxelTextureZ); // float SosVoxelTextureZ = (SosVoxelf.z - speedOfSoundZLayer); - - //Index = xmax*(ymax*(zmax*geometryIndexCounter+i_z)+i_y)+i_x; - printf(" currentEmitterIndex %d\n", currentEmitterIndex); - printf(" currentReceiverIndex %d\n", currentReceiverIndex); - printf(" (currentReceiverIndex-1) mod maxSoSReceiverArrayForTexture) %d\n", ((currentReceiverIndex-1) % maxSoSReceiverArrayForTexture)); - - printf(" EmIndex = xmax(%i)*(ymax(%i)*( TexturIndexZEmitter(%i) )+i_y(%i))+i_x(%i) = [%i]\n", (int)SOSGrid_XYZ.x, (int)SOSGrid_XYZ.y, (int)TexturIndexZEmitter, (int)TexturIndexY, (int)TexturIndexX, (int)SOSGrid_XYZ.x*((int)SOSGrid_XYZ.y*((int)TexturIndexZEmitter )+(int)TexturIndexY)+(int)TexturIndexX); - printf(" RecIndex = xmax(%i)*(ymax(%i)*( TexturIndexZReceiver(%i) )+i_y(%i))+i_x(%i) = [%i]\n", (int)SOSGrid_XYZ.x, (int)SOSGrid_XYZ.y, (int)TexturIndexZReceiver, (int)TexturIndexY, (int)TexturIndexX, (int)SOSGrid_XYZ.x*((int)SOSGrid_XYZ.y*((int)TexturIndexZReceiver)+(int)TexturIndexY)+(int)TexturIndexX); - - - printf(" emitterPosition [%3.15f %3.15f %3.15f]\n", emitterPosition.x, emitterPosition.y, emitterPosition.z); - printf(" emitterVoxelVoxelCount %f\n", emitterVoxelVoxelCount); - printf(" 1/emitterVoxelTotalSpeedSum %f\n", emitterVoxelTotalSpeedSum); - - printf(" receiverPosition [%3.15f %3.15f %3.15f]\n", receiverPosition.x, receiverPosition.y, receiverPosition.z); - printf(" receiverVoxelVoxelCount %f\n", receiverVoxelVoxelCount); - printf(" 1/receiverVoxelTotalSpeedSum %f\n", receiverVoxelTotalSpeedSum); - - printf(" => averageSpeed %f\n\n", averageSpeed); - // printf(" emitterPosition [%f %f %f]\n", emitterPosition.x, emitterPosition.y, emitterPosition.z); - // printf(" emitterDistance %f\n", emitterDistance); - // printf(" receiverPosition [%f %f %f]\n", receiverPosition.x, receiverPosition.y, receiverPosition.z); - // printf(" receiverDistance %f\n", receiverDistance); - // printf(" sampleTime %f\n", sampleTime); - // printf(" totalDistance %f\n\n", totalDistance); - // - // printf(" analyticAverageSpeed %f\n", analyticAverageSpeed); - // printf(" analyticTotalTime %f\n", analyticTotalTime); - // printf(" analyticSampleTime %f\n", analyticSampleTime); - // printf(" S_Wasser(%f) S_Kugel(%f) \n", S_Wasser, S_Kugel); - // printf(" cWasser(%f) cKugel(%f) \n", cWasser, cKugel); - // printf(" sampleRate %.10f\n", sampleRate); - printf("\n"); - - // printf(" => voxelValue = %3.12f\n", voxelValue); - - - //} - } - } - #endif - }