Remove macros from SOS_ATT

This commit is contained in:
kradchen
2023-10-16 13:14:43 +08:00
parent d63f255db2
commit f744f7bb4e

View File

@@ -106,16 +106,6 @@ texture <float2, cudaTextureType3D, cudaReadModeElementType> texTableAscanIndexF
// If Voxel is outside the reconstructed Image leave Kernel // If Voxel is outside the reconstructed Image leave Kernel
if ((regionOfInterestVoxel.x >= IMAGE_SIZE_XYZ.x) || (regionOfInterestVoxel.y >= IMAGE_SIZE_XYZ.y) || (regionOfInterestVoxel.z >= IMAGE_SIZE_XYZ.z)) if ((regionOfInterestVoxel.x >= IMAGE_SIZE_XYZ.x) || (regionOfInterestVoxel.y >= IMAGE_SIZE_XYZ.y) || (regionOfInterestVoxel.z >= IMAGE_SIZE_XYZ.z))
return; return;
#ifdef debug_CudaSAFTKernel
if ((regionOfInterestVoxel.x == OutputVolumeX) && (regionOfInterestVoxel.y == OutputVolumeY) && (regionOfInterestVoxel.z == OutputVolumeZ))
{
printf("\n==== saftKernelAscanIndex_SOS_ATT Kernel ============================ 1 =====\n");
printf("\n=============================================================================\n");
printf(" => regionOfInterestVoxel [%d %d %d] Start bei 0\n", regionOfInterestVoxel.x, regionOfInterestVoxel.y, regionOfInterestVoxel.z);
printf("\n=============================================================================\n");
}
#endif
///////////////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////////
// 2. Determine // 2. Determine
///////////////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////////
@@ -124,13 +114,6 @@ texture <float2, cudaTextureType3D, cudaReadModeElementType> texTableAscanIndexF
// - Variable declarations // - Variable declarations
///////////////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////////
#ifdef debug_CudaSAFTKernel
if ((regionOfInterestVoxel.x == 1) && (regionOfInterestVoxel.y == 1))
{
// printf(" Z2[%d] blockIndexOffset[%d]\n", regionOfInterestVoxel.z, blockIndexOffset);
}
#endif
// Memory-Index for this Thread for Output-Array of this Voxel // Memory-Index for this Thread for Output-Array of this Voxel
unsigned long long int memoryIndex = (((unsigned long long int)IMAGE_SIZE_XYZ.y * ((unsigned long long int)regionOfInterestVoxel.z - (unsigned long long int)blockIndexOffset) + (unsigned long long int)regionOfInterestVoxel.y) * (unsigned long long int)IMAGE_SIZE_XYZ.x + (unsigned long long int)regionOfInterestVoxel.x); unsigned long long int memoryIndex = (((unsigned long long int)IMAGE_SIZE_XYZ.y * ((unsigned long long int)regionOfInterestVoxel.z - (unsigned long long int)blockIndexOffset) + (unsigned long long int)regionOfInterestVoxel.y) * (unsigned long long int)IMAGE_SIZE_XYZ.x + (unsigned long long int)regionOfInterestVoxel.x);
@@ -147,25 +130,8 @@ texture <float2, cudaTextureType3D, cudaReadModeElementType> texTableAscanIndexF
float const TexturIndexY = SosVoxelf.y + 0.5f; float const TexturIndexY = SosVoxelf.y + 0.5f;
float const SosVoxelTextureZ = (SosVoxelf.z - (float)speedOfSoundZLayer) + 0.5f; // Z offset inside precalculated SOS paths float const SosVoxelTextureZ = (SosVoxelf.z - (float)speedOfSoundZLayer) + 0.5f; // Z offset inside precalculated SOS paths
#ifdef debug_CudaSAFTAscanIndexKernel
if ((regionOfInterestVoxel.x == OutputVolumeX) && (regionOfInterestVoxel.y == OutputVolumeY) && (regionOfInterestVoxel.z == OutputVolumeZ))
{
printf("\n==== SAFT (AscanIndex) Kernel Step 1 ==========================================\n");
printf(" VoxelIncrement = %3.12f\n", VoxelIncrement );
printf(" TableAscanIndexAllocationCount = %i\n", TableAscanIndexAllocationCount );
printf("--------------------------------------------------------------------------------\n");
printf(" => regionOfInterestVoxel [%d %d %d]\n", regionOfInterestVoxel.x, regionOfInterestVoxel.y, regionOfInterestVoxel.z);
printf(" => TexturIndexXYZ [%3.3f %3.3f %3.3f]\n", TexturIndexX, TexturIndexY, SosVoxelTextureZ);
printf(" SosVoxelStartPos(SoS) [%3.12f %3.12f %3.12f]\n", SosVoxelStartPosition.x, SosVoxelStartPosition.y, SosVoxelStartPosition.z);
printf(" SosVoxelf [%3.12f %3.12f %3.12f]\n", SosVoxelf.x, SosVoxelf.y, SosVoxelf.z);
printf("================================================================================\n");
}
#endif
float voxelValue = 0.0; // reflection value, which is summed up in Ascan-Loop = Outputvalue float voxelValue = 0.0; // reflection value, which is summed up in Ascan-Loop = Outputvalue
#ifdef debug_CudaSAFTAscanIndexKernelDataAccess
float voxelValue_old = 0.0; // only for debugging necessary
#endif
///////////////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////////
// 3. SAFT-Algorithmus // 3. SAFT-Algorithmus
@@ -203,176 +169,7 @@ texture <float2, cudaTextureType3D, cudaReadModeElementType> texTableAscanIndexF
__syncthreads(); __syncthreads();
//#define useOneLoop // Zugriff auf Texturen mit jeweiligen Abfragen // TITAN (Matlab/Kernel) 30.8 / 32.9
#define useSameLoop // Zugriff auf Texturen optimieren. Durch Abfragen von beiden Texturen in der Schleife Overhead verringern // TITAN (Matlab/Kernel) 53.8 / 61.3 #define useSameLoop // Zugriff auf Texturen optimieren. Durch Abfragen von beiden Texturen in der Schleife Overhead verringern // TITAN (Matlab/Kernel) 53.8 / 61.3
// benötigt das zuvorige Löschen der Ascanindex --> Da nicht klar wie viele Surfaces genutzt werden können sind hier mehrere bis max 4 Stück Implementiert
// Bei halber Nutzung der Textur TITAN (M/K) 27.3 / 37.9
//#define useWhileLoop // Zugriff auf AscanIndex-Texturen innerhalb einer While-Schleife // TITAN (Matlab/Kernel) 22.6 / 28.7
// #ifdef debug_CudaSAFTAscanIndexKernel
// float2 currentSOSVoxel_AscanIndexAttValues; // For Debugging deklaration outside
// #endif
#ifdef useOneLoop
//#pragma unroll 2
//for(float ascanIndex_i = 0; ascanIndex_i < aScanWindowSize; ascanIndex_i+=1.0f) // Alle Ascans durchlaufen // 30 GV/s //2,4: 25GV/s
for(float ascanIndex_i = 0.0f; ascanIndex_i < 4096.0f; ascanIndex_i+=1.0f) // Alle Ascans durchlaufen // 40 GV/s //2: 30GV/s, 4: 34GV/s, 8: 34GV/s, 16: 32GV/s
//for(int ascanIndex_i = 0; ascanIndex_i < 1413; ascanIndex_i++) // Alle Ascans durchlaufen // 34 GV/s //2: 29GV/s, 4: 39GV/s, 8: 34GV/s, 9: 35GV/s
{
// if ((regionOfInterestVoxel.x == OutputVolumeX) && (regionOfInterestVoxel.y == OutputVolumeY) && (regionOfInterestVoxel.z == OutputVolumeZ)){
// printf("## i = %f :", i);
// }
// lade AscanIndex fuer Pfad aus Textur
// ====================================
//#define variante0 // TITAN (M/K) 20.5 / 25.9
#define variante0_1 // TITAN (M/K) 30.8 / 32.9
#ifdef variante0
// Determine number of current used texture memory for this Ascan -> currentRecTextureIndex = 0,1,2
//currentRecTextureIndex = (int)floor((float)lookUpReceiverIndex / (float)maxSoSReceiverArrayForTexture);
int currentTextureIndex = (int)floor(ascanIndex_i /maxAscanIndexArraysInTexture);
//TexturIndexZ_AscanIndex = 2 * (float)((int)ascanIndex_i % maxSoSReceiverArrayForTexture) + SosVoxelTextureZ; // Z-Index fuer Zugriff auf Textur interpoliert
TexturIndexZ_AscanIndex = 2 * (float)((int)ascanIndex_i % maxAscanIndexArraysInTexture) + SosVoxelTextureZ; // Z-Index fuer Zugriff auf Textur interpoliert
//TexturIndexZ_AscanIndex = ascanIndex_i + ascanIndex_i + SosVoxelTextureZ;
//TexturIndexZ_AscanIndex = 2 *(ascanIndex_i - IndexTextureOffset) + SosVoxelTextureZ;
// if ( currentTextureIndex == 0){
// VoxelAscanIndex = tex3D( texTableAscanIndexFloat0, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); }
// else if ( currentTextureIndex == 1) {
// VoxelAscanIndex = tex3D( texTableAscanIndexFloat1, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); }
// else if ( currentTextureIndex == 2){
// VoxelAscanIndex = tex3D( texTableAscanIndexFloat2, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); }
// else if ( currentTextureIndex == 3){
// VoxelAscanIndex = tex3D( texTableAscanIndexFloat3, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); }
switch ( (int)(currentTextureIndex) )
{
case 0: VoxelAscanIndex = tex3D( texTableAscanIndexFloat0, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); break;
case 1: VoxelAscanIndex = tex3D( texTableAscanIndexFloat1, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); break;
case 2: VoxelAscanIndex = tex3D( texTableAscanIndexFloat2, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); break;
case 3: VoxelAscanIndex = tex3D( texTableAscanIndexFloat3, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); break;
}
#endif
#ifdef variante0_1
// Determine number of current used texture memory for this receiver --> Wird benoetigt
//currentRecTextureIndex = (int)floor((float)lookUpReceiverIndex / (float)maxSoSReceiverArrayForTexture);
//int currentRecTextureIndex = (int)floor(i /maxSoSReceiverArrayForTexture);
TexturIndexZ_AscanIndex = 2 * (float)((int)ascanIndex_i % maxAscanIndexArraysInTexture) + SosVoxelTextureZ; // Z-Index fuer Zugriff auf Textur interpoliert
if ( (int)floor(ascanIndex_i /maxAscanIndexArraysInTexture) == 0){
VoxelAscanIndex = tex3D( texTableAscanIndexFloat0, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); }
else if ( (int)floor(ascanIndex_i /maxAscanIndexArraysInTexture) == 1) {
VoxelAscanIndex = tex3D( texTableAscanIndexFloat1, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); }
else if ( (int)floor(ascanIndex_i /maxAscanIndexArraysInTexture) == 2){
VoxelAscanIndex = tex3D( texTableAscanIndexFloat2, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); }
else if ( (int)floor(ascanIndex_i /maxAscanIndexArraysInTexture) == 3){
VoxelAscanIndex = tex3D( texTableAscanIndexFloat3, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); }
#endif
#ifdef variante1
// Determine number of current used texture memory for this receiver --> Wird benoetigt
//currentRecTextureIndex = (int)floor((float)lookUpReceiverIndex / (float)maxSoSReceiverArrayForTexture);
//if ( currentRecTextureIndex <= maxSoSReceiverArrayForTexture){
//if ( currentRecTextureIndex <= maxSoSReceiverArrayForTexture){
if ( i <= 709.0f){
//TexturIndexZ_AscanIndex = 2 * i + SosVoxelTextureZ + 0.5f; // Z-Index fuer Zugriff auf Textur interpoliert
//TexturIndexZ_AscanIndex = 2 * i + SosVoxelTextureZ + 0.5f; // Z-Index fuer Zugriff auf Textur interpoliert
TexturIndexZ_AscanIndex = 2*i + SosVoxelTextureZ; // Z-Index fuer Zugriff auf Textur interpoliert
VoxelAscanIndex = tex3D( texTableAscanIndexFloat0, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex);
// if ((regionOfInterestVoxel.x == OutputVolumeX) && (regionOfInterestVoxel.y == OutputVolumeY) && (regionOfInterestVoxel.z == OutputVolumeZ)){
// printf("#### i1 = %f : VoxelAscanIndex %f", i, VoxelAscanIndex);
// }
}
//else if ( currentRecTextureIndex <= 2*maxSoSReceiverArrayForTexture) {
//else if ( currentRecTextureIndex <= 1420) {
else if ( i <= 1412.0f) {
//TexturIndexZ_AscanIndex = 2 * (i - maxSoSReceiverArrayForTexture) + SosVoxelTextureZ + 0.5f; // Z-Index fuer Zugriff auf Textur interpoliert
//TexturIndexZ_AscanIndex = 2 * (i - maxSoSReceiverArrayForTexture) + SosVoxelTextureZ + 0.5f; // Z-Index fuer Zugriff auf Textur interpoliert
TexturIndexZ_AscanIndex = 2 * (i - maxSoSReceiverArrayForTexture) + SosVoxelTextureZ; // Z-Index fuer Zugriff auf Textur interpoliert
VoxelAscanIndex = tex3D( texTableAscanIndexFloat1, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex);
// if ((regionOfInterestVoxel.x == OutputVolumeX) && (regionOfInterestVoxel.y == OutputVolumeY) && (regionOfInterestVoxel.z == OutputVolumeZ)){
// printf("#### i2 = %f : VoxelAscanIndex %f", i-maxSoSReceiverArrayForTexture, VoxelAscanIndex);
// }
}
//else if ( currentRecTextureIndex <= 3*maxSoSReceiverArrayForTexture) {
//else if ( currentRecTextureIndex <= 2130) {
// else if ( i <= 2130.0f) {
// //TexturIndexZ_AscanIndex = 2 * (i - 2*maxSoSReceiverArrayForTexture) + SosVoxelTextureZ + 0.5f; // Z-Index fuer Zugriff auf Textur interpoliert
// //TexturIndexZ_AscanIndex = 2 * (i - 1420) + SosVoxelTextureZ + 0.5f; // Z-Index fuer Zugriff auf Textur interpoliert
// TexturIndexZ_AscanIndex = 2 * (i - 1420) + SosVoxelTextureZ; // Z-Index fuer Zugriff auf Textur interpoliert
// VoxelAscanIndex = tex3D( texTableAscanIndexFloat2, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); }
#endif
#ifdef variante2
// Determine number of current used texture memory for this receiver --> Wird benoetigt
//currentRecTextureIndex = (int)floor((float)lookUpReceiverIndex / (float)maxSoSReceiverArrayForTexture);
//TexturIndexZ_AscanIndex = 2 * i + SosVoxelTextureZ + 0.5f; // Z-Index fuer Zugriff auf Textur interpoliert
TexturIndexZ_AscanIndex = 2 * i + SosVoxelTextureZ; // Z-Index fuer Zugriff auf Textur interpoliert
VoxelAscanIndex = tex3D( texTableAscanIndexFloat0, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex);
#endif
//voxelValue += attenuationFactor * tex2D( texRefAscans, sampleTime - 0.5f, (float)i + 0.5f); // i gibt Index fuer Ascan an
//voxelValue += tex2D( texRefAscans, VoxelAscanIndex - 0.5f, (float)i + 0.5f); // i gibt Index fuer Ascan an
voxelValue += tex2D( texRefAscans, VoxelAscanIndex - 0.5f, ascanIndex_i + 0.5f); // i gibt Index fuer Ascan an
//voxelValue += tex2D( texRefAscans, VoxelAscanIndex, i + 0.5f); // i gibt Index fuer Ascan an // Wenn +0.5f schon im Preprocessing drin ist sogar langsamer?!
// if ((regionOfInterestVoxel.x == OutputVolumeX) && (regionOfInterestVoxel.y == OutputVolumeY) && (regionOfInterestVoxel.z == OutputVolumeZ)){
// printf("= %f\n", voxelValue);
// }
// #ifdef debug_CudaSAFTAscanIndexKernel
// if ((regionOfInterestVoxel.x == OutputVolumeX) && (regionOfInterestVoxel.y == OutputVolumeY) && (regionOfInterestVoxel.z == OutputVolumeZ))//&& (i == 30))
// {
// //if ((SosVoxel.x == DebugSoSVoxelX) && (SosVoxel.y == DebugSoSVoxelY) && (SosVoxel.z == DebugSoSVoxelZ))
// //{
// //printf("\n==== SAFT (AscanIndex) Kernel Step 4 - End ===========================================\n");
//
// printf(" => regionOfInterestVoxel [%d %d %d] (Start bei 0)", regionOfInterestVoxel.x, regionOfInterestVoxel.y, regionOfInterestVoxel.z);
// printf("--------------------------------------------------------------------------------\n");
// printf(" ascanIndex_i %f\n", ascanIndex_i);
//// printf(" currentAScanIndex %i\n", currentAScanIndex);
//// printf(" currentEmitterIndex %i\n", currentEmitterIndex);
//// printf(" currentReceiverIndex %i\n", currentReceiverIndex);
//
// printf(" SosVoxelf [%3.12f %3.12f %3.12f] (Start bei 0)\n", SosVoxelf.x, SosVoxelf.y, SosVoxelf.z);
// printf(" speedOfSoundZLayer [%3i]\n", speedOfSoundZLayer);
//
// printf(" TextureIndexXYZ [%3.12f %3.12f %3.12f]\n", TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex);
// printf(" SosVoxelTextureZ %f\n", SosVoxelTextureZ);
//
// //printf(" emitterPosition [%3.12f %3.12f %3.12f]\n", emitterPosition.x, emitterPosition.y, emitterPosition.z);
// //printf(" emitterDistance %f\n", emitterDistance);
// //printf(" receiverPosition [%3.12f %3.12f %3.12f]\n", receiverPosition.x, receiverPosition.y, receiverPosition.z);
// //printf(" receiverDistance %f\n", receiverDistance);
// //printf(" sampleRate %e\n", sampleRate);
// //printf(" totalDistance %f\n\n", totalDistance);
//
// // printf(" emitterVoxelVoxelCount %f\n", emitterVoxelVoxelCount);
// // printf(" receiverVoxelVoxelCount %f\n", receiverVoxelVoxelCount);
// // printf(" -> emitterReceiverTotalVoxelCount %f\n", emitterReceiverTotalVoxelCount);
// // printf(" emitterVoxelTotalSpeedSum %f\n", emitterVoxelTotalSpeedSum);
// // printf(" receiverVoxelTotalSpeedSum %f\n", receiverVoxelTotalSpeedSum);
// // printf(" -> averageSpeed %f\n", averageSpeed);
// //printf(" sampleTime %f\n", sampleTime);
//
// // printf(" emitterVoxelTotalAttenuationSum %f\n", emitterVoxelTotalAttenuationSum);
// // printf(" receiverVoxelTotalAttenuationSum %f\n", receiverVoxelTotalAttenuationSum);
// // printf(" emitterReceiverTotalVoxelCount %f\n", emitterReceiverTotalVoxelCount);
// // printf(" -> attenuationFactor %f\n\n", attenuationFactor);
//
// printf(" VoxelAscanIndex %f\n", VoxelAscanIndex);
//
//
// printf(" => voxelValue = %f\n", voxelValue);
//
// printf("======================================================\n");
// //}
// }
// #endif
}
#endif
#ifdef useSameLoop #ifdef useSameLoop
#define tryOptimize_SOSATT #define tryOptimize_SOSATT
//#define addTexturIndexZ_AscanIndexInLoop //#define addTexturIndexZ_AscanIndexInLoop
@@ -380,26 +177,16 @@ texture <float2, cudaTextureType3D, cudaReadModeElementType> texTableAscanIndexF
if ( TableAscanIndexAllocationCount == 1){ if ( TableAscanIndexAllocationCount == 1){
#ifdef tryOptimize_SOSATT // in the Case of 2 Textures normal case has faster Access on Data #ifdef tryOptimize_SOSATT // in the Case of 2 Textures normal case has faster Access on Data
//#ifndef debug_CudaSAFTAscanIndexKernel
float2 currentSOSVoxel_AscanIndexAttValues; float2 currentSOSVoxel_AscanIndexAttValues;
//#endif
float Offset_0 = (float)ascanIndexBatchOffset + 0.5f; float Offset_0 = (float)ascanIndexBatchOffset + 0.5f;
#ifdef addTexturIndexZ_AscanIndexInLoop
TexturIndexZ_AscanIndex = SosVoxelTextureZ - 2.0f; // Z-Index fuer Zugriff auf Textur interpoliert
#endif
//#pragma unroll 2 //#pragma unroll 2
for(float ascanIndex_i = 0.0f; ascanIndex_i < maxAscanIndexArraysInTexture; ascanIndex_i+=1.0f) // bis zu 60.8GV/s for(float ascanIndex_i = 0.0f; ascanIndex_i < maxAscanIndexArraysInTexture; ascanIndex_i+=1.0f) // bis zu 60.8GV/s
//for(float ascanIndex_i = 0.0f; ascanIndex_i < 1; ascanIndex_i+=1.0f) // bis zu 60.8GV/s
{ {
#ifdef addTexturIndexZ_AscanIndexInLoop
TexturIndexZ_AscanIndex += 2.0f; // Z-Index fuer Zugriff auf Textur interpoliert
#else
TexturIndexZ_AscanIndex = 2.0f*ascanIndex_i + SosVoxelTextureZ; // Z-Index fuer Zugriff auf Textur interpoliert TexturIndexZ_AscanIndex = 2.0f*ascanIndex_i + SosVoxelTextureZ; // Z-Index fuer Zugriff auf Textur interpoliert
#endif
currentSOSVoxel_AscanIndexAttValues = tex3D( texTableAscanIndexFloat2_0, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); currentSOSVoxel_AscanIndexAttValues = tex3D( texTableAscanIndexFloat2_0, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex);
@@ -408,81 +195,15 @@ texture <float2, cudaTextureType3D, cudaReadModeElementType> texTableAscanIndexF
#endif #endif
voxelValue += currentSOSVoxel_AscanIndexAttValues.y * tex2D( texRefAscans, currentSOSVoxel_AscanIndexAttValues.x - 0.5f, Offset_0 + ascanIndex_i); // i gibt Index fuer Ascan an voxelValue += currentSOSVoxel_AscanIndexAttValues.y * tex2D( texRefAscans, currentSOSVoxel_AscanIndexAttValues.x - 0.5f, Offset_0 + ascanIndex_i); // i gibt Index fuer Ascan an
//#ifdef debug_CudaSAFTAscanIndexKernelDataAccess
// if ((regionOfInterestVoxel.x == OutputVolumeX) && (regionOfInterestVoxel.y == OutputVolumeY) && (regionOfInterestVoxel.z == OutputVolumeZ))//&& (i == 30))
// {
// printf("ascanIndex_i(%.0f) - 0 : VoxelAscanIndex(idx %30.24f, att %19.12f) , voxelValue = %9.4f\n", ascanIndex_i, currentSOSVoxel_AscanIndexAttValues.x,currentSOSVoxel_AscanIndexAttValues.y, voxelValue);
// //voxelValue_old = voxelValue;
// }
//#endif
}
#else
float2 currentSOSVoxel_AscanIndexAttValues;
//#pragma unroll 3
//for(float ascanIndex_i = 0.0f; ascanIndex_i < maxAscanIndexArraysInTexture; ascanIndex_i+=1.0f) // 1.Teil Ascans durchlaufen // GV/s //2: GV/s, 4: GV/s, 8: GV/s, 16: GV/s
for(float ascanIndex_i = 0.0f; ascanIndex_i < maxAscanIndexArraysInTexture; ascanIndex_i+=1.0f) // bis zu 60.6GV/s
{
// nutze immer nur 1tes Surface
TexturIndexZ_AscanIndex = 2*ascanIndex_i + SosVoxelTextureZ; // Z-Index fuer Zugriff auf Textur interpoliert
currentSOSVoxel_AscanIndexAttValues = tex3D( texTableAscanIndexFloat2_0, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex);
voxelValue += currentSOSVoxel_AscanIndexAttValues.y * tex2D( texRefAscans, currentSOSVoxel_AscanIndexAttValues.x - 0.5f, (float)(ascanIndexBatchOffset) + ascanIndex_i + 0.5f); // i gibt Index fuer Ascan an
#ifdef debug_CudaSAFTAscanIndexKernelDataAccess
if ((regionOfInterestVoxel.x == OutputVolumeX) && (regionOfInterestVoxel.y == OutputVolumeY) && (regionOfInterestVoxel.z == OutputVolumeZ))//&& (i == 30))
{
printf("ascanIndex_i(%.0f) - 0 : VoxelAscanIndex(idx %30.24f, att %19.12f) , voxelValue += %30.24f = %9.4f\n", ascanIndex_i, currentSOSVoxel_AscanIndexAttValues.x,currentSOSVoxel_AscanIndexAttValues.y, voxelValue-voxelValue_old, voxelValue);
voxelValue_old = voxelValue;
}
#endif
} }
#endif #endif
} }
else if (TableAscanIndexAllocationCount == 2){ else if (TableAscanIndexAllocationCount == 2){
#ifndef tryOptimize_SOSATT // in the Case of 2 Textures normal case has faster Access on Data #ifdef tryOptimize_SOSATT // in the Case of 2 Textures normal case has faster Access on Data
float2 currentSOSVoxel_AscanIndexAttValues_0;
float2 currentSOSVoxel_AscanIndexAttValues_1;
float Offset_0 = (float)ascanIndexBatchOffset + 0.5f;
float Offset_1 = (float)ascanIndexBatchOffset + maxAscanIndexArraysInTexture + 0.5f;
#ifdef addTexturIndexZ_AscanIndexInLoop
TexturIndexZ_AscanIndex = SosVoxelTextureZ - 2.0f; // Z-Index fuer Zugriff auf Textur interpoliert
#endif
//#pragma unroll 2
for(float ascanIndex_i = 0.0f; ascanIndex_i < maxAscanIndexArraysInTexture; ascanIndex_i+=1.0f) // bis zu 60.8GV/s
{
#ifdef addTexturIndexZ_AscanIndexInLoop
TexturIndexZ_AscanIndex += 2.0f; // Z-Index fuer Zugriff auf Textur interpoliert
#else
TexturIndexZ_AscanIndex = 2.0f*ascanIndex_i + SosVoxelTextureZ; // Z-Index fuer Zugriff auf Textur interpoliert
#endif
// load TOF-Index from Textur 0-3
currentSOSVoxel_AscanIndexAttValues_0 = tex3D( texTableAscanIndexFloat0, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex);
currentSOSVoxel_AscanIndexAttValues_1 = tex3D( texTableAscanIndexFloat1, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex);
#ifdef useSync
__syncthreads();
#endif
voxelValue += currentSOSVoxel_AscanIndexAttValues_0.y * tex2D( texRefAscans, currentSOSVoxel_AscanIndexAttValues_0.x - 0.5f, Offset_0 + ascanIndex_i); // i gibt Index fuer Ascan an
voxelValue += currentSOSVoxel_AscanIndexAttValues_1.y * tex2D( texRefAscans, currentSOSVoxel_AscanIndexAttValues_1.x - 0.5f, Offset_1 + ascanIndex_i);
}
#else
float2 currentSOSVoxel_AscanIndexAttValues; float2 currentSOSVoxel_AscanIndexAttValues;
//#pragma unroll 3 //#pragma unroll 3
//for(float ascanIndex_i = 0.0f; ascanIndex_i < maxAscanIndexArraysInTexture; ascanIndex_i+=1.0f) // 1.Teil Ascans durchlaufen // GV/s //2: GV/s, 4: GV/s, 8: GV/s, 16: GV/s
for(float ascanIndex_i = 0.0f; ascanIndex_i < maxAscanIndexArraysInTexture; ascanIndex_i+=1.0f) // bis zu 60.6GV/s for(float ascanIndex_i = 0.0f; ascanIndex_i < maxAscanIndexArraysInTexture; ascanIndex_i+=1.0f) // bis zu 60.6GV/s
{ {
@@ -492,27 +213,9 @@ texture <float2, cudaTextureType3D, cudaReadModeElementType> texTableAscanIndexF
voxelValue += currentSOSVoxel_AscanIndexAttValues.y * tex2D( texRefAscans, currentSOSVoxel_AscanIndexAttValues.x - 0.5f, (float)(ascanIndexBatchOffset) + ascanIndex_i + 0.5f); // i gibt Index fuer Ascan an voxelValue += currentSOSVoxel_AscanIndexAttValues.y * tex2D( texRefAscans, currentSOSVoxel_AscanIndexAttValues.x - 0.5f, (float)(ascanIndexBatchOffset) + ascanIndex_i + 0.5f); // i gibt Index fuer Ascan an
#ifdef debug_CudaSAFTAscanIndexKernelDataAccess
if ((regionOfInterestVoxel.x == OutputVolumeX) && (regionOfInterestVoxel.y == OutputVolumeY) && (regionOfInterestVoxel.z == OutputVolumeZ))//&& (i == 30))
{
printf("ascanIndex_i(%.0f) - 0 : VoxelAscanIndex(idx %30.24f, att %19.12f) , voxelValue += %30.24f = %9.4f\n", ascanIndex_i, currentSOSVoxel_AscanIndexAttValues.x,currentSOSVoxel_AscanIndexAttValues.y, voxelValue-voxelValue_old, voxelValue);
voxelValue_old = voxelValue;
}
#endif
// 2ten Teil mit selben Index laden // 2ten Teil mit selben Index laden
currentSOSVoxel_AscanIndexAttValues = tex3D( texTableAscanIndexFloat2_1, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); currentSOSVoxel_AscanIndexAttValues = tex3D( texTableAscanIndexFloat2_1, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex);
voxelValue += currentSOSVoxel_AscanIndexAttValues.y * tex2D( texRefAscans, currentSOSVoxel_AscanIndexAttValues.x - 0.5f, (float)(ascanIndexBatchOffset) + ascanIndex_i + maxAscanIndexArraysInTexture +0.5f); voxelValue += currentSOSVoxel_AscanIndexAttValues.y * tex2D( texRefAscans, currentSOSVoxel_AscanIndexAttValues.x - 0.5f, (float)(ascanIndexBatchOffset) + ascanIndex_i + maxAscanIndexArraysInTexture +0.5f);
#ifdef debug_CudaSAFTAscanIndexKernelDataAccess
if ((regionOfInterestVoxel.x == OutputVolumeX) && (regionOfInterestVoxel.y == OutputVolumeY) && (regionOfInterestVoxel.z == OutputVolumeZ))//&& (i == 30))
{
printf("ascanIndex_i(%.0f) - 1 : VoxelAscanIndex(idx %30.24f, att %19.12f) , voxelValue += %30.24f = %9.4f\n", ascanIndex_i, currentSOSVoxel_AscanIndexAttValues.x,currentSOSVoxel_AscanIndexAttValues.y, voxelValue-voxelValue_old, voxelValue);
voxelValue_old = voxelValue;
}
#endif
} }
#endif #endif
} }
@@ -528,20 +231,11 @@ texture <float2, cudaTextureType3D, cudaReadModeElementType> texTableAscanIndexF
float Offset_1 = (float)ascanIndexBatchOffset + maxAscanIndexArraysInTexture + 0.5f; float Offset_1 = (float)ascanIndexBatchOffset + maxAscanIndexArraysInTexture + 0.5f;
float Offset_2 = (float)ascanIndexBatchOffset + 2.0f*maxAscanIndexArraysInTexture + 0.5f; float Offset_2 = (float)ascanIndexBatchOffset + 2.0f*maxAscanIndexArraysInTexture + 0.5f;
#ifdef addTexturIndexZ_AscanIndexInLoop
TexturIndexZ_AscanIndex = SosVoxelTextureZ - 2.0f; // Z-Index fuer Zugriff auf Textur interpoliert
#endif
//#pragma unroll 2 //#pragma unroll 2
for(float ascanIndex_i = 0.0f; ascanIndex_i < maxAscanIndexArraysInTexture; ascanIndex_i+=1.0f) // bis zu 60.8GV/s for(float ascanIndex_i = 0.0f; ascanIndex_i < maxAscanIndexArraysInTexture; ascanIndex_i+=1.0f) // bis zu 60.8GV/s
{ {
#ifdef addTexturIndexZ_AscanIndexInLoop
TexturIndexZ_AscanIndex += 2.0f; // Z-Index fuer Zugriff auf Textur interpoliert
#else
TexturIndexZ_AscanIndex = 2.0f*ascanIndex_i + SosVoxelTextureZ; // Z-Index fuer Zugriff auf Textur interpoliert TexturIndexZ_AscanIndex = 2.0f*ascanIndex_i + SosVoxelTextureZ; // Z-Index fuer Zugriff auf Textur interpoliert
#endif
// load TOF-Index from Textur 0-3 // load TOF-Index from Textur 0-3
currentSOSVoxel_AscanIndexAttValues_0 = tex3D( texTableAscanIndexFloat2_0, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); currentSOSVoxel_AscanIndexAttValues_0 = tex3D( texTableAscanIndexFloat2_0, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex);
currentSOSVoxel_AscanIndexAttValues_1 = tex3D( texTableAscanIndexFloat2_1, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); currentSOSVoxel_AscanIndexAttValues_1 = tex3D( texTableAscanIndexFloat2_1, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex);
@@ -555,57 +249,6 @@ texture <float2, cudaTextureType3D, cudaReadModeElementType> texTableAscanIndexF
voxelValue += currentSOSVoxel_AscanIndexAttValues_1.y * tex2D( texRefAscans, currentSOSVoxel_AscanIndexAttValues_1.x - 0.5f, Offset_1 + ascanIndex_i); voxelValue += currentSOSVoxel_AscanIndexAttValues_1.y * tex2D( texRefAscans, currentSOSVoxel_AscanIndexAttValues_1.x - 0.5f, Offset_1 + ascanIndex_i);
voxelValue += currentSOSVoxel_AscanIndexAttValues_2.y * tex2D( texRefAscans, currentSOSVoxel_AscanIndexAttValues_2.x - 0.5f, Offset_2 + ascanIndex_i); voxelValue += currentSOSVoxel_AscanIndexAttValues_2.y * tex2D( texRefAscans, currentSOSVoxel_AscanIndexAttValues_2.x - 0.5f, Offset_2 + ascanIndex_i);
} }
#else
float2 currentSOSVoxel_AscanIndexAttValues;
//#pragma unroll 3
//for(float ascanIndex_i = 0.0f; ascanIndex_i < maxAscanIndexArraysInTexture; ascanIndex_i+=1.0f) // 1.Teil Ascans durchlaufen // GV/s //2: GV/s, 4: GV/s, 8: GV/s, 16: GV/s
for(float ascanIndex_i = 0.0f; ascanIndex_i < maxAscanIndexArraysInTexture; ascanIndex_i+=1.0f) // bis zu 60.6GV/s
{
// 1ten Teil mit selben Index laden
TexturIndexZ_AscanIndex = 2*ascanIndex_i + SosVoxelTextureZ; // Z-Index fuer Zugriff auf Textur interpoliert
currentSOSVoxel_AscanIndexAttValues = tex3D( texTableAscanIndexFloat0, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex);
voxelValue += currentSOSVoxel_AscanIndexAttValues.y * tex2D( texRefAscans, currentSOSVoxel_AscanIndexAttValues.x - 0.5f, (float)(ascanIndexBatchOffset) + ascanIndex_i + 0.5f); // i gibt Index fuer Ascan an
#ifdef debug_CudaSAFTAscanIndexKernelDataAccess
if ((regionOfInterestVoxel.x == OutputVolumeX) && (regionOfInterestVoxel.y == OutputVolumeY) && (regionOfInterestVoxel.z == OutputVolumeZ))//&& (i == 30))
{
printf("ascanIndex_i(%.0f) - 0 : VoxelAscanIndex(idx %30.24f, att %19.12f) , voxelValue += %30.24f = %9.4f\n", ascanIndex_i, currentSOSVoxel_AscanIndexAttValues.x,currentSOSVoxel_AscanIndexAttValues.y, voxelValue-voxelValue_old, voxelValue);
voxelValue_old = voxelValue;
}
#endif
// 2ten Teil mit selben Index laden
currentSOSVoxel_AscanIndexAttValues = tex3D( texTableAscanIndexFloat1, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex);
voxelValue += currentSOSVoxel_AscanIndexAttValues.y * tex2D( texRefAscans, currentSOSVoxel_AscanIndexAttValues.x - 0.5f, (float)(ascanIndexBatchOffset) + ascanIndex_i + maxAscanIndexArraysInTexture +0.5f);
#ifdef debug_CudaSAFTAscanIndexKernelDataAccess
if ((regionOfInterestVoxel.x == OutputVolumeX) && (regionOfInterestVoxel.y == OutputVolumeY) && (regionOfInterestVoxel.z == OutputVolumeZ))//&& (i == 30))
{
printf("ascanIndex_i(%.0f) - 1 : VoxelAscanIndex(idx %30.24f, att %19.12f) , voxelValue += %30.24f = %9.4f\n", ascanIndex_i, currentSOSVoxel_AscanIndexAttValues.x,currentSOSVoxel_AscanIndexAttValues.y, voxelValue-voxelValue_old, voxelValue);
voxelValue_old = voxelValue;
}
#endif
// 3ten Teil mit selben Index laden
currentSOSVoxel_AscanIndexAttValues = tex3D( texTableAscanIndexFloat2, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex);
voxelValue += currentSOSVoxel_AscanIndexAttValues.y * tex2D( texRefAscans, currentSOSVoxel_AscanIndexAttValues.x - 0.5f, (float)(ascanIndexBatchOffset) + ascanIndex_i + 2*maxAscanIndexArraysInTexture +0.5f);
#ifdef debug_CudaSAFTAscanIndexKernelDataAccess
if ((regionOfInterestVoxel.x == OutputVolumeX) && (regionOfInterestVoxel.y == OutputVolumeY) && (regionOfInterestVoxel.z == OutputVolumeZ))//&& (i == 30))
{
printf("ascanIndex_i(%.0f) - 2 : VoxelAscanIndex(idx %30.24f, att %19.12f) , voxelValue += %30.24f = %9.4f\n", ascanIndex_i, currentSOSVoxel_AscanIndexAttValues.x,currentSOSVoxel_AscanIndexAttValues.y, voxelValue-voxelValue_old, voxelValue);
voxelValue_old = voxelValue;
}
#endif
}
#endif #endif
} }
@@ -624,19 +267,10 @@ texture <float2, cudaTextureType3D, cudaReadModeElementType> texTableAscanIndexF
float Offset_2 = (float)ascanIndexBatchOffset + 2.0f*maxAscanIndexArraysInTexture + 0.5f; float Offset_2 = (float)ascanIndexBatchOffset + 2.0f*maxAscanIndexArraysInTexture + 0.5f;
float Offset_3 = (float)ascanIndexBatchOffset + 3.0f*maxAscanIndexArraysInTexture + 0.5f; float Offset_3 = (float)ascanIndexBatchOffset + 3.0f*maxAscanIndexArraysInTexture + 0.5f;
#ifdef addTexturIndexZ_AscanIndexInLoop
TexturIndexZ_AscanIndex = SosVoxelTextureZ - 2.0f; // Z-Index fuer Zugriff auf Textur interpoliert
#endif
//#pragma unroll 2 //#pragma unroll 2
for(float ascanIndex_i = 0.0f; ascanIndex_i < maxAscanIndexArraysInTexture; ascanIndex_i+=1.0f) // bis zu 60.8GV/s for(float ascanIndex_i = 0.0f; ascanIndex_i < maxAscanIndexArraysInTexture; ascanIndex_i+=1.0f) // bis zu 60.8GV/s
{ {
#ifdef addTexturIndexZ_AscanIndexInLoop
TexturIndexZ_AscanIndex += 2.0f; // Z-Index fuer Zugriff auf Textur interpoliert
#else
TexturIndexZ_AscanIndex = 2.0f*ascanIndex_i + SosVoxelTextureZ; // Z-Index fuer Zugriff auf Textur interpoliert TexturIndexZ_AscanIndex = 2.0f*ascanIndex_i + SosVoxelTextureZ; // Z-Index fuer Zugriff auf Textur interpoliert
#endif
//syncthreads(); //syncthreads();
// load TOF-Index from Textur 0-3 // load TOF-Index from Textur 0-3
currentSOSVoxel_AscanIndexAttValues_0 = tex3D( texTableAscanIndexFloat2_0, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); currentSOSVoxel_AscanIndexAttValues_0 = tex3D( texTableAscanIndexFloat2_0, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex);
@@ -653,497 +287,17 @@ texture <float2, cudaTextureType3D, cudaReadModeElementType> texTableAscanIndexF
voxelValue += currentSOSVoxel_AscanIndexAttValues_2.y * tex2D( texRefAscans, currentSOSVoxel_AscanIndexAttValues_2.x - 0.5f, Offset_2 + ascanIndex_i); voxelValue += currentSOSVoxel_AscanIndexAttValues_2.y * tex2D( texRefAscans, currentSOSVoxel_AscanIndexAttValues_2.x - 0.5f, Offset_2 + ascanIndex_i);
voxelValue += currentSOSVoxel_AscanIndexAttValues_3.y * tex2D( texRefAscans, currentSOSVoxel_AscanIndexAttValues_3.x - 0.5f, Offset_3 + ascanIndex_i); voxelValue += currentSOSVoxel_AscanIndexAttValues_3.y * tex2D( texRefAscans, currentSOSVoxel_AscanIndexAttValues_3.x - 0.5f, Offset_3 + ascanIndex_i);
} }
#else
float2 currentSOSVoxel_AscanIndexAttValues;
//#pragma unroll 3
//for(float ascanIndex_i = 0.0f; ascanIndex_i < maxAscanIndexArraysInTexture; ascanIndex_i+=1.0f) // 1.Teil Ascans durchlaufen // GV/s //2: GV/s, 4: GV/s, 8: GV/s, 16: GV/s
for(float ascanIndex_i = 0.0f; ascanIndex_i < maxAscanIndexArraysInTexture; ascanIndex_i+=1.0f) // bis zu 60.6GV/s
{
// 1ten Teil mit selben Index laden
TexturIndexZ_AscanIndex = 2*ascanIndex_i + SosVoxelTextureZ; // Z-Index fuer Zugriff auf Textur interpoliert
currentSOSVoxel_AscanIndexAttValues = tex3D( texTableAscanIndexFloat0, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex);
voxelValue += currentSOSVoxel_AscanIndexAttValues.y * tex2D( texRefAscans, currentSOSVoxel_AscanIndexAttValues.x - 0.5f, (float)(ascanIndexBatchOffset) + ascanIndex_i + 0.5f); // i gibt Index fuer Ascan an
#ifdef debug_CudaSAFTAscanIndexKernelDataAccess
if ((regionOfInterestVoxel.x == OutputVolumeX) && (regionOfInterestVoxel.y == OutputVolumeY) && (regionOfInterestVoxel.z == OutputVolumeZ))//&& (i == 30))
{
printf("ascanIndex_i(%.0f) - 0 : VoxelAscanIndex(idx %30.24f, att %19.12f) , voxelValue += %30.24f = %9.4f\n", ascanIndex_i, currentSOSVoxel_AscanIndexAttValues.x,currentSOSVoxel_AscanIndexAttValues.y, voxelValue-voxelValue_old, voxelValue);
voxelValue_old = voxelValue;
}
#endif
// 2ten Teil mit selben Index laden
currentSOSVoxel_AscanIndexAttValues = tex3D( texTableAscanIndexFloat1, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex);
voxelValue += currentSOSVoxel_AscanIndexAttValues.y * tex2D( texRefAscans, currentSOSVoxel_AscanIndexAttValues.x - 0.5f, (float)(ascanIndexBatchOffset) + ascanIndex_i + maxAscanIndexArraysInTexture +0.5f);
#ifdef debug_CudaSAFTAscanIndexKernelDataAccess
if ((regionOfInterestVoxel.x == OutputVolumeX) && (regionOfInterestVoxel.y == OutputVolumeY) && (regionOfInterestVoxel.z == OutputVolumeZ))//&& (i == 30))
{
printf("ascanIndex_i(%.0f) - 1 : VoxelAscanIndex(idx %30.24f, att %19.12f) , voxelValue += %30.24f = %9.4f\n", ascanIndex_i, currentSOSVoxel_AscanIndexAttValues.x,currentSOSVoxel_AscanIndexAttValues.y, voxelValue-voxelValue_old, voxelValue);
voxelValue_old = voxelValue;
}
#endif
// 3ten Teil mit selben Index laden
currentSOSVoxel_AscanIndexAttValues = tex3D( texTableAscanIndexFloat2, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex);
voxelValue += currentSOSVoxel_AscanIndexAttValues.y * tex2D( texRefAscans, currentSOSVoxel_AscanIndexAttValues.x - 0.5f, (float)(ascanIndexBatchOffset) + ascanIndex_i + 2*maxAscanIndexArraysInTexture +0.5f);
#ifdef debug_CudaSAFTAscanIndexKernelDataAccess
if ((regionOfInterestVoxel.x == OutputVolumeX) && (regionOfInterestVoxel.y == OutputVolumeY) && (regionOfInterestVoxel.z == OutputVolumeZ))//&& (i == 30))
{
printf("ascanIndex_i(%.0f) - 2 : VoxelAscanIndex(idx %30.24f, att %19.12f) , voxelValue += %30.24f = %9.4f\n", ascanIndex_i, currentSOSVoxel_AscanIndexAttValues.x,currentSOSVoxel_AscanIndexAttValues.y, voxelValue-voxelValue_old, voxelValue);
voxelValue_old = voxelValue;
}
#endif
// 4ten Teil mit selben Index laden
currentSOSVoxel_AscanIndexAttValues = tex3D( texTableAscanIndexFloat3, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex);
voxelValue += currentSOSVoxel_AscanIndexAttValues.y * tex2D( texRefAscans, currentSOSVoxel_AscanIndexAttValues.x - 0.5f, (float)(ascanIndexBatchOffset) + ascanIndex_i + 3*maxAscanIndexArraysInTexture +0.5f);
#ifdef debug_CudaSAFTAscanIndexKernelDataAccess
if ((regionOfInterestVoxel.x == OutputVolumeX) && (regionOfInterestVoxel.y == OutputVolumeY) && (regionOfInterestVoxel.z == OutputVolumeZ))//&& (i == 30))
{
printf("ascanIndex_i(%.0f) - 3 : VoxelAscanIndex(idx %30.24f, att %19.12f) , voxelValue += %30.24f = %9.4f\n", ascanIndex_i, currentSOSVoxel_AscanIndexAttValues.x,currentSOSVoxel_AscanIndexAttValues.y, voxelValue-voxelValue_old, voxelValue);
voxelValue_old = voxelValue;
}
#endif
//VoxelAscanIndex = tex3D( texTableAscanIndexFloat3, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); }
// Für Version mit SOS und ATT
//currentSOSVoxel_AscanIndexAttValues = tex3D( texTableAscanIndexFloat3, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex);
//voxelValue += currentSOSVoxel_AscanIndexAttValues.y * tex2D( texRefAscans, currentSOSVoxel_AscanIndexAttValues.x - 0.5f, ascanIndex_i + 0.5f); // i gibt Index fuer Ascan an
}
#endif
}
// else
// {
// // Do nothing due to until now only 4 are defined
// }
#endif #endif
#ifdef useWhileLoop
//#define varianteWhile0 // Hart-Codierte Aufteilung //etwas schneller
#define varianteWhile1 // mit Variabler Aufteilung
//#define varianteWhile3 // Hart-Codierte Aufteilung mit ?-Abfrage // TITAN (M/K) 32.8 / 32.7
#ifdef varianteWhile0
float ascanIndex_i = 0.0;
float compareIndexNextTexture = maxAscanIndexArraysInTexture; // Index ab dem die naechste Textur genutzt werden soll
float IndexTextureOffset = 0.0f; // Offset des AscanIndex fuer TexturIndexZ_AscanIndex
TexturIndexZ_AscanIndex = ascanIndex_i + ascanIndex_i + SosVoxelTextureZ;
do{
if ( ascanIndex_i >= compareIndexNextTexture){
compareIndexNextTexture += maxAscanIndexArraysInTexture;
IndexTextureOffset += maxAscanIndexArraysInTexture;
} }
// Calculate the Z-Index for storing the AscanIndex value
//TexturGeometryIndexZ = maxFeasibleSosZLayerCount * ((ascanIndex_i) % maxAscanIndexArraysInTexture) + i_z;
TexturIndexZ_AscanIndex = 2 *(ascanIndex_i - IndexTextureOffset) + SosVoxelTextureZ;
if ( compareIndexNextTexture <= 2048){
if (compareIndexNextTexture == 1024){
VoxelAscanIndex = tex3D( texTableAscanIndexFloat0, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex);
}
else {
VoxelAscanIndex = tex3D( texTableAscanIndexFloat1, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex);
}
}
else{
if (compareIndexNextTexture == 3072){
VoxelAscanIndex = tex3D( texTableAscanIndexFloat2, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex);
}
else {
VoxelAscanIndex = tex3D( texTableAscanIndexFloat3, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex);
}
}
// if ( compareIndexNextTexture == 1024){
// VoxelAscanIndex = tex3D( texTableAscanIndexFloat2_0, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); }
// else if ( compareIndexNextTexture == 2048) {
// VoxelAscanIndex = tex3D( texTableAscanIndexFloat2_1, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); }
// else if ( compareIndexNextTexture == 3072){
// VoxelAscanIndex = tex3D( texTableAscanIndexFloat2_2, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); }
// else if ( compareIndexNextTexture == 4096){
// VoxelAscanIndex = tex3D( texTableAscanIndexFloat2_3, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); }
// switch ( (int)(compareIndexNextTexture) )
// {
// case 1024: VoxelAscanIndex = tex3D( texTableAscanIndexFloat2_0, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); break;
// case 2048: VoxelAscanIndex = tex3D( texTableAscanIndexFloat2_1, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); break;
// case 3072: VoxelAscanIndex = tex3D( texTableAscanIndexFloat2_2, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); break;
// case 4096: VoxelAscanIndex = tex3D( texTableAscanIndexFloat2_3, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); break;
// }
voxelValue += tex2D( texRefAscans, VoxelAscanIndex - 0.5f, ascanIndex_i + 0.5f); // i gibt Index fuer Ascan an
// if ((regionOfInterestVoxel.x == OutputVolumeX) && (regionOfInterestVoxel.y == OutputVolumeY) && (regionOfInterestVoxel.z == OutputVolumeZ)){
//
// if ((ascanIndex_i <= 10)) // Anfang
// //if ((ascanIndex_i >= 1020) && (ascanIndex_i <= 1030)) // Mitte
// //if ((ascanIndex_i >= 1400)) // Ende
// //if ((ascanIndex_i >= 2800)) // Ende
// printf("#### ascanIndex_i = %.0f , compIdxNextText = %.0f; IndexTextOffset = %.0f; TexturIndexZ_AscanIndex = %.0f, VoxelAscanIndex=%f = %i\n", ascanIndex_i, compareIndexNextTexture, IndexTextureOffset, TexturIndexZ_AscanIndex, VoxelAscanIndex, ( compareIndexNextTexture == 3*maxAscanIndexArraysInTexture));
//
// }
ascanIndex_i += 1.0f;
// if (ascanIndex_i == aScanWindowSize) break; //20,7 GV/s
//}while(1);
}while(ascanIndex_i < aScanWindowSize); //20,7 GV/s
//}while(ascanIndex_i < 3900.0f); //14,5 GV/s
#endif
#ifdef varianteWhile1
float ascanIndex_i = 0.0;
float compareIndexNextTexture = maxAscanIndexArraysInTexture; // Index ab dem die naechste Textur genutzt werden soll
float IndexTextureOffset = 0.0f; // Offset des AscanIndex fuer TexturIndexZ_AscanIndex
// TexturIndexZ_AscanIndex = ascanIndex_i + ascanIndex_i + SosVoxelTextureZ;
do{
if ( ascanIndex_i >= compareIndexNextTexture){
compareIndexNextTexture += maxAscanIndexArraysInTexture;
IndexTextureOffset += maxAscanIndexArraysInTexture;
}
// Calculate the Z-Index for storing the AscanIndex value
//TexturGeometryIndexZ = maxFeasibleSosZLayerCount * ((ascanIndex_i) % maxAscanIndexArraysInTexture) + i_z;
TexturIndexZ_AscanIndex = 2.0f *(ascanIndex_i - IndexTextureOffset) + SosVoxelTextureZ;
if ( compareIndexNextTexture == maxAscanIndexArraysInTexture){
//VoxelAscanIndex = tex3D( texTableAscanIndexFloat0, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); }
currentSOSVoxel_AscanIndexAttValues = tex3D( texTableAscanIndexFloat0, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); }
else if ( compareIndexNextTexture == 2*maxAscanIndexArraysInTexture) {
//VoxelAscanIndex = tex3D( texTableAscanIndexFloat1, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); }
currentSOSVoxel_AscanIndexAttValues = tex3D( texTableAscanIndexFloat1, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); }
else if ( compareIndexNextTexture == 3*maxAscanIndexArraysInTexture){
//VoxelAscanIndex = tex3D( texTableAscanIndexFloat2, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); }
currentSOSVoxel_AscanIndexAttValues = tex3D( texTableAscanIndexFloat2, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); }
else if ( compareIndexNextTexture == 4*maxAscanIndexArraysInTexture){
//VoxelAscanIndex = tex3D( texTableAscanIndexFloat3, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); }
currentSOSVoxel_AscanIndexAttValues = tex3D( texTableAscanIndexFloat3, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); }
//currentSOSVoxel_AscanIndexAttValues.x = currentAscanIndex;
//currentSOSVoxel_AscanIndexAttValues.y = totalAttenuation_multFactor;
#define Interpolation_Textur_Standard
//#define Interpolation_Cosinus
//#define Interpolation_Spline
#ifdef Interpolation_Textur_Standard
voxelValue += currentSOSVoxel_AscanIndexAttValues.y * tex2D( texRefAscans, currentSOSVoxel_AscanIndexAttValues.x - 0.5f, ascanIndex_i + 0.5f); // i gibt Index fuer Ascan an
#endif
#ifdef Interpolation_Cosinus
float y1 = tex2D( texRefAscans, currentSOSVoxel_AscanIndexAttValues.x + 0.5f, ascanIndex_i + 0.5f);
float y2 = tex2D( texRefAscans, currentSOSVoxel_AscanIndexAttValues.x + 1.5f, ascanIndex_i + 0.5f);
float mu = currentSOSVoxel_AscanIndexAttValues.x-floor(currentSOSVoxel_AscanIndexAttValues.x);
float mu2 = (1-cos(mu*3.14159265359f))/2;
voxelValue += currentSOSVoxel_AscanIndexAttValues.y * (y1*(1-mu2)+y2*mu2);
#endif
#ifdef Interpolation_Spline
float y0 = tex2D( texRefAscans, currentSOSVoxel_AscanIndexAttValues.x - 0.5f, ascanIndex_i + 0.5f);
float y1 = tex2D( texRefAscans, currentSOSVoxel_AscanIndexAttValues.x + 0.5f, ascanIndex_i + 0.5f);
float y2 = tex2D( texRefAscans, currentSOSVoxel_AscanIndexAttValues.x + 1.5f, ascanIndex_i + 0.5f);
float y3 = tex2D( texRefAscans, currentSOSVoxel_AscanIndexAttValues.x + 2.5f, ascanIndex_i + 0.5f);
float mu = currentSOSVoxel_AscanIndexAttValues.x-floor(currentSOSVoxel_AscanIndexAttValues.x);
float mu2 = mu*mu;
float a0 = y3 - y2 - y0 + y1;
float a1 = y0 - y1 - a0;
float a2 = y2 - y0;
float a3 = y1;
voxelValue += currentSOSVoxel_AscanIndexAttValues.y * (a0*mu*mu2+a1*mu2+a2*mu+a3);
#endif
#ifdef debug_CudaSAFTAscanIndexKernelDataAccess
if ((regionOfInterestVoxel.x == OutputVolumeX) && (regionOfInterestVoxel.y == OutputVolumeY) && (regionOfInterestVoxel.z == OutputVolumeZ))//&& (i == 30))
{
printf("ascanIndex_i(%.0f) - %.0f: VoxelAscanIndex(idx %30.24f, att %19.12f) , voxelValue += %30.24f = %9.4f\n", ascanIndex_i, floor(compareIndexNextTexture/maxAscanIndexArraysInTexture)-1.0, currentSOSVoxel_AscanIndexAttValues.x,currentSOSVoxel_AscanIndexAttValues.y, voxelValue-voxelValue_old, voxelValue);
voxelValue_old = voxelValue;
}
#endif
// if ((regionOfInterestVoxel.x == OutputVolumeX) && (regionOfInterestVoxel.y == OutputVolumeY) && (regionOfInterestVoxel.z == OutputVolumeZ)){
//
// if ((ascanIndex_i <= 10)) // Anfang
// //if ((ascanIndex_i >= 1020) && (ascanIndex_i <= 1030)) // Mitte
// //if ((ascanIndex_i >= 1400)) // Ende
// //if ((ascanIndex_i >= 2800)) // Ende
// printf("#### ascanIndex_i = %.0f , compIdxNextText = %.0f; IndexTextOffset = %.0f; TexturIndexZ_AscanIndex = %.0f, VoxelAscanIndex=%f = %i\n", ascanIndex_i, compareIndexNextTexture, IndexTextureOffset, TexturIndexZ_AscanIndex, VoxelAscanIndex, ( compareIndexNextTexture == 3*maxAscanIndexArraysInTexture));
//
// }
ascanIndex_i += 1.0f;
}while(ascanIndex_i < aScanWindowSize);
#endif
#ifdef varianteWhile3
float ascanIndex_i = 0.0;
float compareIndexNextTexture = maxAscanIndexArraysInTexture; // Index ab dem die naechste Textur genutzt werden soll
float IndexTextureOffset = 0.0f; // Offset des AscanIndex fuer TexturIndexZ_AscanIndex
TexturIndexZ_AscanIndex = (ascanIndex_i + ascanIndex_i) + SosVoxelTextureZ;
do{
if ( ascanIndex_i >= compareIndexNextTexture){
compareIndexNextTexture += maxAscanIndexArraysInTexture;
IndexTextureOffset += maxAscanIndexArraysInTexture;
TexturIndexZ_AscanIndex -= (maxAscanIndexArraysInTexture+maxAscanIndexArraysInTexture);
}
// Calculate the Z-Index for storing the AscanIndex value
//TexturIndexZ_AscanIndex = 2 * (ascanIndex_i - IndexTextureOffset) + SosVoxelTextureZ;
//TexturIndexZ_AscanIndex = ascanIndex_i + ascanIndex_i + SosVoxelTextureZ - (IndexTextureOffset + IndexTextureOffset) ;
TexturIndexZ_AscanIndex += 2.0f;
if ( compareIndexNextTexture <= 2*maxAscanIndexArraysInTexture){
if (compareIndexNextTexture == maxAscanIndexArraysInTexture){
VoxelAscanIndex = tex3D( texTableAscanIndexFloat0, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex);
}
else {
VoxelAscanIndex = tex3D( texTableAscanIndexFloat1, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex);
}
}
else{
if (compareIndexNextTexture == 3*maxAscanIndexArraysInTexture){
VoxelAscanIndex = tex3D( texTableAscanIndexFloat2, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex);
}
else {
VoxelAscanIndex = tex3D( texTableAscanIndexFloat3, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex);
}
}
voxelValue += tex2D( texRefAscans, VoxelAscanIndex - 0.5f, ascanIndex_i + 0.5f); // i gibt Index fuer Ascan an
ascanIndex_i += 1.0f;
}while(ascanIndex_i < aScanWindowSize); //32,1 GV/s
#endif
#endif #endif
// #ifdef GTX_Kepler
// __syncthreads();
// #endif
#ifdef debug_CudaSAFTAscanIndexKernel
//printf(" >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> debugModeParameter %f\n", debugModeParameter);
#ifdef debug_CudaSAFTKernel
if ((regionOfInterestVoxel.x == OutputVolumeX) && (regionOfInterestVoxel.y == OutputVolumeY) && (regionOfInterestVoxel.z == OutputVolumeZ)){
printf("#### debugMode = %f \n", debugMode);
printf("#### sumDiffAverageSpeed = %f \n", sumDiffAverageSpeed);
printf("#### DiffAverageSpeed = %f \n", DiffAverageSpeed);
printf("#### analyticAverageSpeed = %f \n", analyticAverageSpeed);
printf("#### averageSpeed = %f \n", averageSpeed);
printf("#### S_Wasser = %f \n", S_Wasser);
printf("#### S_Kugel = %f \n", S_Kugel);
printf("#### cWasser = %f \n", cWasser);
printf("#### cKugel = %f \n", cKugel);
}
#endif
//switch ( debugMode >= 60 ? (debugMode-60) : debugMode >= 30 ? (debugMode-30) : debugMode )
switch ( (int)(debugMode) )
{
case 0: output[memoryIndex] = (double)voxelValue; break;
//case 0: output[memoryIndex] = (double)1.5; break;
//case 1: output[memoryIndex] = (double)averageSpeed; break;
//case 2: output[memoryIndex] = (double)totalDistance; break;
//case 3: output[memoryIndex] = (double)sampleTime; break;
//case 4: output[memoryIndex] = (double)attenuationFactor; break;
case 1: output[memoryIndex] = (double)blockIndex; break;
case 2: output[memoryIndex] = (double)memoryIndex; break;
case 3: output[memoryIndex] = (double)regionOfInterestVoxel.x; break; // Coordinates in Voxel
case 4: output[memoryIndex] = (double)regionOfInterestVoxel.y; break;
case 5: output[memoryIndex] = (double)regionOfInterestVoxel.z; break;
case 6: output[memoryIndex] = (double)debugMode; break;
//case 7: output[memoryIndex] = (double)SosVoxelStartPosition.x; break; // VoxelAscanIndex
//case 7: output[memoryIndex] = (double)TexturIndexX; break; // VoxelAscanIndex
//case 7: output[memoryIndex] = (double)TexturIndexY; break; // VoxelAscanIndex
//case 7: output[memoryIndex] = (double)TexturIndexZ_AscanIndex; break; // VoxelAscanIndex
//case 7: output[memoryIndex] = (double)currentSOSVoxel_AscanIndexAttValues.x; break; // VoxelAscanIndex
//case 8: output[memoryIndex] = (double)currentSOSVoxel_AscanIndexAttValues.y; break; // VoxelAttenuation
//case 9: output[memoryIndex] = (double)voxelPosition.x; break; // Coordinates in m
//case 10: output[memoryIndex] = (double)voxelPosition.y; break;
//case 11: output[memoryIndex] = (double)voxelPosition.z; break;
//case 12: output[memoryIndex] = (double)emitterPosition.x; break;
//case 13: output[memoryIndex] = (double)emitterPosition.y; break;
//case 14: output[memoryIndex] = (double)emitterPosition.z; break;
//case 15: output[memoryIndex] = (double)receiverPosition.x; break;
//case 16: output[memoryIndex] = (double)receiverPosition.y; break;
//case 17: output[memoryIndex] = (double)receiverPosition.z; break;
//case 18: output[memoryIndex] = (double)emitterDistance; break;
//case 19: output[memoryIndex] = (double)receiverDistance; break;
//case 20: output[memoryIndex] = (double)SosVoxel.x; break; // Coordinates in SOS-Voxel
//case 21: output[memoryIndex] = (double)SosVoxel.y; break;
//case 22: output[memoryIndex] = (double)SosVoxel.z; break;
case 15: output[memoryIndex] = (double)SosVoxelf.x; break;// Coordinates in SOS-Voxel (float)
case 16: output[memoryIndex] = (double)SosVoxelf.y; break;
case 17: output[memoryIndex] = (double)SosVoxelf.z; break;
// #ifdef noGeometryLoading
// case 26: output[memoryIndex] = (double)currentAScanIndex; break;
// //case 27: output[memoryIndex] = (double)currentEmitterIndex; break;
// case 28: output[memoryIndex] = (double)currentReceiverIndex; break;
// #endif
//case 29: output[memoryIndex] = (double)emitterVoxelTotalSpeedSum; break;
//case 30: output[memoryIndex] = (double)emitterVoxelVoxelCount; break;
//case 31: output[memoryIndex] = (double)receiverVoxelTotalSpeedSum; break;
//case 32: output[memoryIndex] = (double)receiverVoxelVoxelCount; break;
//case 33: output[memoryIndex] = (double)emitterReceiverTotalVoxelCount; break;
//case 34: output[memoryIndex] = (double)emitterVoxelTotalAttenuationSum; break; // Attenuation on Emitter Path
//case 35: output[memoryIndex] = (double)receiverVoxelTotalAttenuationSum;break; // on Receiver Path
case 36: output[memoryIndex] = (double)SosVoxelTextureZ; break;
//case 37: output[memoryIndex] = (double)TexturIndexZEmitter; break;
case 38: output[memoryIndex] = (double)TexturIndexZ_AscanIndex; break;
case 39: output[memoryIndex] = (double)speedOfSoundZLayer; break;
//case 40: output[memoryIndex] = (double)sumAverageSpeed; break;
case 41: output[memoryIndex] = (double)TexturIndexX; break;
case 42: output[memoryIndex] = (double)TexturIndexY; break;
case 43: output[memoryIndex] = (double)TexturIndexZ_AscanIndex; break;
//case 44: output[memoryIndex] = (double)VoxelAscanIndex; break;
#ifdef debug_CudaSAFTKernel_EnableAnalyticAverageSpeedCalculation
case 25: output[memoryIndex] = (double)DiffAverageSpeed; break; // Fehler zwischen analytisch und bresenham (nur 1-Ascan)
case 26: output[memoryIndex] = (double)sumDiffAverageSpeed; break; // Summe Mittlerer Fehler über alle A-Scans
case 27: output[memoryIndex] = (double)stdDiffAverageSpeed; break; // Standartabweichung über alle A-Scans // Dafür brauche ich mittelwert --> Matlab
case 28: output[memoryIndex] = (double)sumDiffAnalyticWithoutSpeed; break;
case 29: output[memoryIndex] = (double)sumAnalyticAverageSpeed; break;
#endif
default: output[memoryIndex] = (double)0; break;
}
#else
__syncthreads(); __syncthreads();
output[memoryIndex] = (double)voxelValue; output[memoryIndex] = (double)voxelValue;
#endif
#ifdef debug_CudaSAFTKernel
if (debugMode != 0)
{
if ((regionOfInterestVoxel.x == OutputVolumeX) && (regionOfInterestVoxel.y == OutputVolumeY) && (regionOfInterestVoxel.z == OutputVolumeZ))
{
//printf(" SoSVoxel[%3i %3i %3i] speedOfSoundZLayer(%3i) - regionOfInterestVoxel [%3i %3i %3i]\n", SosVoxel.x, SosVoxel.y, SosVoxel.z, speedOfSoundZLayer, regionOfInterestVoxel.x, regionOfInterestVoxel.y, regionOfInterestVoxel.z);
//if ((SosVoxel.x == DebugSoSVoxelX) && (SosVoxel.y == DebugSoSVoxelY) && (SosVoxel.z == DebugSoSVoxelZ)){
printf("\n saftKernel: debugMode [%f]\n", debugMode);
//printf(" => voxelValue = %f\n", voxelValue);
//printf(" SoSVoxel[%+3.2f %+3.2f %+3.2f] speedOfSoundZLayer(%3i) - regionOfInterestVoxel [%3i %3i %3i]\n", SosVoxelf.x, SosVoxelf.y, SosVoxelf.z, speedOfSoundZLayer, regionOfInterestVoxel.x, regionOfInterestVoxel.y, regionOfInterestVoxel.z);
printf(" SoSVoxel[%3i %3i %3i] speedOfSoundZLayer(%3i) - regionOfInterestVoxel [%3i %3i %3i]\n", SosVoxel.x, SosVoxel.y, SosVoxel.z, speedOfSoundZLayer, regionOfInterestVoxel.x, regionOfInterestVoxel.y, regionOfInterestVoxel.z);
printf(" (current)speedOfSoundZLayer %d\n", speedOfSoundZLayer);
printf(" maxFeasibleSosZLayerCount [%i]\n", maxFeasibleSosZLayerCount);
printf(" SosVoxelf [%3.15f %3.15f %3.15f]\n", SosVoxelf.x, SosVoxelf.y, SosVoxelf.z);
// SosVoxelTextureZ = (SosVoxelf.z - speedOfSoundZLayer);
printf(" SosVoxelTextureZ [%3.15f]\n", SosVoxelTextureZ);
// TexturIndexZEmitter = maxFeasibleSosZLayerCount * (currentEmitterIndex-1) + SosVoxelTextureZ; // Index für Zugriff auf Textur
// TexturIndexZReceiver = maxFeasibleSosZLayerCount * ((currentReceiverIndex-1) % maxSoSReceiverArrayForTexture) + SosVoxelTextureZ; // Index für Zugriff auf Textur
printf(" TexturIndexXYZ Em/Rec [%3.15f %3.15f %3.15f/%3.15f]\n\n", TexturIndexX, TexturIndexY, TexturIndexZEmitter, TexturIndexZReceiver);
switch (deviceSAFT_VARIANT[SAFT_VARIANT_3DVolumeInterpolationAtReconstruction])
//switch ( debugMode >= 60 ? 60 : debugMode >= 30 ? 30 : 0 ) // drei Bereiche
{
case 0: // mit Textur
//TexturIndexZReceiver = maxFeasibleSosZLayerCount * ((currentReceiverIndex-1) % maxSoSReceiverArrayForTexture) + SosVoxelTextureZ; // Index für Zugriff auf Textur
printf(" TexturIndexZReceiver = maxFeasibleSosZLayerCount(%i) * ((currentReceiverIndex-1 (%i)) %% maxSoSReceiverArrayForTexture(%i)) + SosVoxelTextureZ(%3.15f) = (%3.15f)\n\n", maxFeasibleSosZLayerCount, currentReceiverIndex-1, maxSoSReceiverArrayForTexture, SosVoxelTextureZ, TexturIndexZReceiver);
break;
case 1: // mit Textur interpoliert
//TexturIndexZReceiver = maxFeasibleSosZLayerCount * ((currentReceiverIndex-1) % maxSoSReceiverArrayForTexture) + SosVoxelTextureZ + 0.5f; // Index für Zugriff auf Textur
printf(" TexturIndexZReceiver = maxFeasibleSosZLayerCount(%i) * ((currentReceiverIndex-1 (%i)) %% maxSoSReceiverArrayForTexture(%i)) + SosVoxelTextureZ(%3.15f) + 0.5f = (%3.15f)\n\n", maxFeasibleSosZLayerCount, currentReceiverIndex-1, maxSoSReceiverArrayForTexture, SosVoxelTextureZ, TexturIndexZReceiver);
break;
}
// // Speicher in Texturformat
// // Indexberechnung für Einsatz des Texturmemorys
// float xmax = SOSGrid_XYZ.x;
// float ymax = SOSGrid_XYZ.y;
// float zmax = (float)maxFeasibleSosZLayerCount;
// float i_x = SosVoxel.x;
// float i_y = SosVoxel.y;
// float i_z = (float)(int)(SosVoxelTextureZ); // float SosVoxelTextureZ = (SosVoxelf.z - speedOfSoundZLayer);
//Index = xmax*(ymax*(zmax*geometryIndexCounter+i_z)+i_y)+i_x;
printf(" currentEmitterIndex %d\n", currentEmitterIndex);
printf(" currentReceiverIndex %d\n", currentReceiverIndex);
printf(" (currentReceiverIndex-1) mod maxSoSReceiverArrayForTexture) %d\n", ((currentReceiverIndex-1) % maxSoSReceiverArrayForTexture));
printf(" EmIndex = xmax(%i)*(ymax(%i)*( TexturIndexZEmitter(%i) )+i_y(%i))+i_x(%i) = [%i]\n", (int)SOSGrid_XYZ.x, (int)SOSGrid_XYZ.y, (int)TexturIndexZEmitter, (int)TexturIndexY, (int)TexturIndexX, (int)SOSGrid_XYZ.x*((int)SOSGrid_XYZ.y*((int)TexturIndexZEmitter )+(int)TexturIndexY)+(int)TexturIndexX);
printf(" RecIndex = xmax(%i)*(ymax(%i)*( TexturIndexZReceiver(%i) )+i_y(%i))+i_x(%i) = [%i]\n", (int)SOSGrid_XYZ.x, (int)SOSGrid_XYZ.y, (int)TexturIndexZReceiver, (int)TexturIndexY, (int)TexturIndexX, (int)SOSGrid_XYZ.x*((int)SOSGrid_XYZ.y*((int)TexturIndexZReceiver)+(int)TexturIndexY)+(int)TexturIndexX);
printf(" emitterPosition [%3.15f %3.15f %3.15f]\n", emitterPosition.x, emitterPosition.y, emitterPosition.z);
printf(" emitterVoxelVoxelCount %f\n", emitterVoxelVoxelCount);
printf(" 1/emitterVoxelTotalSpeedSum %f\n", emitterVoxelTotalSpeedSum);
printf(" receiverPosition [%3.15f %3.15f %3.15f]\n", receiverPosition.x, receiverPosition.y, receiverPosition.z);
printf(" receiverVoxelVoxelCount %f\n", receiverVoxelVoxelCount);
printf(" 1/receiverVoxelTotalSpeedSum %f\n", receiverVoxelTotalSpeedSum);
printf(" => averageSpeed %f\n\n", averageSpeed);
// printf(" emitterPosition [%f %f %f]\n", emitterPosition.x, emitterPosition.y, emitterPosition.z);
// printf(" emitterDistance %f\n", emitterDistance);
// printf(" receiverPosition [%f %f %f]\n", receiverPosition.x, receiverPosition.y, receiverPosition.z);
// printf(" receiverDistance %f\n", receiverDistance);
// printf(" sampleTime %f\n", sampleTime);
// printf(" totalDistance %f\n\n", totalDistance);
//
// printf(" analyticAverageSpeed %f\n", analyticAverageSpeed);
// printf(" analyticTotalTime %f\n", analyticTotalTime);
// printf(" analyticSampleTime %f\n", analyticSampleTime);
// printf(" S_Wasser(%f) S_Kugel(%f) \n", S_Wasser, S_Kugel);
// printf(" cWasser(%f) cKugel(%f) \n", cWasser, cKugel);
// printf(" sampleRate %.10f\n", sampleRate);
printf("\n");
// printf(" => voxelValue = %3.12f\n", voxelValue);
//}
}
}
#endif
} }