Remove macros from SOS_ATT
This commit is contained in:
@@ -106,16 +106,6 @@ texture <float2, cudaTextureType3D, cudaReadModeElementType> texTableAscanIndexF
|
||||
// If Voxel is outside the reconstructed Image leave Kernel
|
||||
if ((regionOfInterestVoxel.x >= IMAGE_SIZE_XYZ.x) || (regionOfInterestVoxel.y >= IMAGE_SIZE_XYZ.y) || (regionOfInterestVoxel.z >= IMAGE_SIZE_XYZ.z))
|
||||
return;
|
||||
|
||||
#ifdef debug_CudaSAFTKernel
|
||||
if ((regionOfInterestVoxel.x == OutputVolumeX) && (regionOfInterestVoxel.y == OutputVolumeY) && (regionOfInterestVoxel.z == OutputVolumeZ))
|
||||
{
|
||||
printf("\n==== saftKernelAscanIndex_SOS_ATT Kernel ============================ 1 =====\n");
|
||||
printf("\n=============================================================================\n");
|
||||
printf(" => regionOfInterestVoxel [%d %d %d] Start bei 0\n", regionOfInterestVoxel.x, regionOfInterestVoxel.y, regionOfInterestVoxel.z);
|
||||
printf("\n=============================================================================\n");
|
||||
}
|
||||
#endif
|
||||
/////////////////////////////////////////////////////////////////////////////////////////
|
||||
// 2. Determine
|
||||
/////////////////////////////////////////////////////////////////////////////////////////
|
||||
@@ -124,13 +114,6 @@ texture <float2, cudaTextureType3D, cudaReadModeElementType> texTableAscanIndexF
|
||||
// - Variable declarations
|
||||
/////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifdef debug_CudaSAFTKernel
|
||||
if ((regionOfInterestVoxel.x == 1) && (regionOfInterestVoxel.y == 1))
|
||||
{
|
||||
// printf(" Z2[%d] blockIndexOffset[%d]\n", regionOfInterestVoxel.z, blockIndexOffset);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
// Memory-Index for this Thread for Output-Array of this Voxel
|
||||
unsigned long long int memoryIndex = (((unsigned long long int)IMAGE_SIZE_XYZ.y * ((unsigned long long int)regionOfInterestVoxel.z - (unsigned long long int)blockIndexOffset) + (unsigned long long int)regionOfInterestVoxel.y) * (unsigned long long int)IMAGE_SIZE_XYZ.x + (unsigned long long int)regionOfInterestVoxel.x);
|
||||
@@ -147,25 +130,8 @@ texture <float2, cudaTextureType3D, cudaReadModeElementType> texTableAscanIndexF
|
||||
float const TexturIndexY = SosVoxelf.y + 0.5f;
|
||||
float const SosVoxelTextureZ = (SosVoxelf.z - (float)speedOfSoundZLayer) + 0.5f; // Z offset inside precalculated SOS paths
|
||||
|
||||
#ifdef debug_CudaSAFTAscanIndexKernel
|
||||
if ((regionOfInterestVoxel.x == OutputVolumeX) && (regionOfInterestVoxel.y == OutputVolumeY) && (regionOfInterestVoxel.z == OutputVolumeZ))
|
||||
{
|
||||
printf("\n==== SAFT (AscanIndex) Kernel Step 1 ==========================================\n");
|
||||
printf(" VoxelIncrement = %3.12f\n", VoxelIncrement );
|
||||
printf(" TableAscanIndexAllocationCount = %i\n", TableAscanIndexAllocationCount );
|
||||
printf("--------------------------------------------------------------------------------\n");
|
||||
printf(" => regionOfInterestVoxel [%d %d %d]\n", regionOfInterestVoxel.x, regionOfInterestVoxel.y, regionOfInterestVoxel.z);
|
||||
printf(" => TexturIndexXYZ [%3.3f %3.3f %3.3f]\n", TexturIndexX, TexturIndexY, SosVoxelTextureZ);
|
||||
printf(" SosVoxelStartPos(SoS) [%3.12f %3.12f %3.12f]\n", SosVoxelStartPosition.x, SosVoxelStartPosition.y, SosVoxelStartPosition.z);
|
||||
printf(" SosVoxelf [%3.12f %3.12f %3.12f]\n", SosVoxelf.x, SosVoxelf.y, SosVoxelf.z);
|
||||
printf("================================================================================\n");
|
||||
}
|
||||
#endif
|
||||
|
||||
float voxelValue = 0.0; // reflection value, which is summed up in Ascan-Loop = Outputvalue
|
||||
#ifdef debug_CudaSAFTAscanIndexKernelDataAccess
|
||||
float voxelValue_old = 0.0; // only for debugging necessary
|
||||
#endif
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////
|
||||
// 3. SAFT-Algorithmus
|
||||
@@ -203,176 +169,7 @@ texture <float2, cudaTextureType3D, cudaReadModeElementType> texTableAscanIndexF
|
||||
|
||||
__syncthreads();
|
||||
|
||||
//#define useOneLoop // Zugriff auf Texturen mit jeweiligen Abfragen // TITAN (Matlab/Kernel) 30.8 / 32.9
|
||||
#define useSameLoop // Zugriff auf Texturen optimieren. Durch Abfragen von beiden Texturen in der Schleife Overhead verringern // TITAN (Matlab/Kernel) 53.8 / 61.3
|
||||
// benötigt das zuvorige Löschen der Ascanindex --> Da nicht klar wie viele Surfaces genutzt werden können sind hier mehrere bis max 4 Stück Implementiert
|
||||
// Bei halber Nutzung der Textur TITAN (M/K) 27.3 / 37.9
|
||||
//#define useWhileLoop // Zugriff auf AscanIndex-Texturen innerhalb einer While-Schleife // TITAN (Matlab/Kernel) 22.6 / 28.7
|
||||
|
||||
// #ifdef debug_CudaSAFTAscanIndexKernel
|
||||
// float2 currentSOSVoxel_AscanIndexAttValues; // For Debugging deklaration outside
|
||||
// #endif
|
||||
|
||||
#ifdef useOneLoop
|
||||
//#pragma unroll 2
|
||||
//for(float ascanIndex_i = 0; ascanIndex_i < aScanWindowSize; ascanIndex_i+=1.0f) // Alle Ascans durchlaufen // 30 GV/s //2,4: 25GV/s
|
||||
for(float ascanIndex_i = 0.0f; ascanIndex_i < 4096.0f; ascanIndex_i+=1.0f) // Alle Ascans durchlaufen // 40 GV/s //2: 30GV/s, 4: 34GV/s, 8: 34GV/s, 16: 32GV/s
|
||||
//for(int ascanIndex_i = 0; ascanIndex_i < 1413; ascanIndex_i++) // Alle Ascans durchlaufen // 34 GV/s //2: 29GV/s, 4: 39GV/s, 8: 34GV/s, 9: 35GV/s
|
||||
{
|
||||
// if ((regionOfInterestVoxel.x == OutputVolumeX) && (regionOfInterestVoxel.y == OutputVolumeY) && (regionOfInterestVoxel.z == OutputVolumeZ)){
|
||||
// printf("## i = %f :", i);
|
||||
// }
|
||||
|
||||
// lade AscanIndex fuer Pfad aus Textur
|
||||
// ====================================
|
||||
//#define variante0 // TITAN (M/K) 20.5 / 25.9
|
||||
#define variante0_1 // TITAN (M/K) 30.8 / 32.9
|
||||
#ifdef variante0
|
||||
// Determine number of current used texture memory for this Ascan -> currentRecTextureIndex = 0,1,2
|
||||
//currentRecTextureIndex = (int)floor((float)lookUpReceiverIndex / (float)maxSoSReceiverArrayForTexture);
|
||||
int currentTextureIndex = (int)floor(ascanIndex_i /maxAscanIndexArraysInTexture);
|
||||
//TexturIndexZ_AscanIndex = 2 * (float)((int)ascanIndex_i % maxSoSReceiverArrayForTexture) + SosVoxelTextureZ; // Z-Index fuer Zugriff auf Textur interpoliert
|
||||
TexturIndexZ_AscanIndex = 2 * (float)((int)ascanIndex_i % maxAscanIndexArraysInTexture) + SosVoxelTextureZ; // Z-Index fuer Zugriff auf Textur interpoliert
|
||||
//TexturIndexZ_AscanIndex = ascanIndex_i + ascanIndex_i + SosVoxelTextureZ;
|
||||
//TexturIndexZ_AscanIndex = 2 *(ascanIndex_i - IndexTextureOffset) + SosVoxelTextureZ;
|
||||
|
||||
// if ( currentTextureIndex == 0){
|
||||
// VoxelAscanIndex = tex3D( texTableAscanIndexFloat0, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); }
|
||||
// else if ( currentTextureIndex == 1) {
|
||||
// VoxelAscanIndex = tex3D( texTableAscanIndexFloat1, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); }
|
||||
// else if ( currentTextureIndex == 2){
|
||||
// VoxelAscanIndex = tex3D( texTableAscanIndexFloat2, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); }
|
||||
// else if ( currentTextureIndex == 3){
|
||||
// VoxelAscanIndex = tex3D( texTableAscanIndexFloat3, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); }
|
||||
|
||||
switch ( (int)(currentTextureIndex) )
|
||||
{
|
||||
case 0: VoxelAscanIndex = tex3D( texTableAscanIndexFloat0, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); break;
|
||||
case 1: VoxelAscanIndex = tex3D( texTableAscanIndexFloat1, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); break;
|
||||
case 2: VoxelAscanIndex = tex3D( texTableAscanIndexFloat2, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); break;
|
||||
case 3: VoxelAscanIndex = tex3D( texTableAscanIndexFloat3, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); break;
|
||||
}
|
||||
|
||||
#endif
|
||||
#ifdef variante0_1
|
||||
// Determine number of current used texture memory for this receiver --> Wird benoetigt
|
||||
//currentRecTextureIndex = (int)floor((float)lookUpReceiverIndex / (float)maxSoSReceiverArrayForTexture);
|
||||
//int currentRecTextureIndex = (int)floor(i /maxSoSReceiverArrayForTexture);
|
||||
TexturIndexZ_AscanIndex = 2 * (float)((int)ascanIndex_i % maxAscanIndexArraysInTexture) + SosVoxelTextureZ; // Z-Index fuer Zugriff auf Textur interpoliert
|
||||
|
||||
if ( (int)floor(ascanIndex_i /maxAscanIndexArraysInTexture) == 0){
|
||||
VoxelAscanIndex = tex3D( texTableAscanIndexFloat0, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); }
|
||||
else if ( (int)floor(ascanIndex_i /maxAscanIndexArraysInTexture) == 1) {
|
||||
VoxelAscanIndex = tex3D( texTableAscanIndexFloat1, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); }
|
||||
else if ( (int)floor(ascanIndex_i /maxAscanIndexArraysInTexture) == 2){
|
||||
VoxelAscanIndex = tex3D( texTableAscanIndexFloat2, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); }
|
||||
else if ( (int)floor(ascanIndex_i /maxAscanIndexArraysInTexture) == 3){
|
||||
VoxelAscanIndex = tex3D( texTableAscanIndexFloat3, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); }
|
||||
#endif
|
||||
#ifdef variante1
|
||||
|
||||
// Determine number of current used texture memory for this receiver --> Wird benoetigt
|
||||
//currentRecTextureIndex = (int)floor((float)lookUpReceiverIndex / (float)maxSoSReceiverArrayForTexture);
|
||||
|
||||
//if ( currentRecTextureIndex <= maxSoSReceiverArrayForTexture){
|
||||
//if ( currentRecTextureIndex <= maxSoSReceiverArrayForTexture){
|
||||
if ( i <= 709.0f){
|
||||
//TexturIndexZ_AscanIndex = 2 * i + SosVoxelTextureZ + 0.5f; // Z-Index fuer Zugriff auf Textur interpoliert
|
||||
//TexturIndexZ_AscanIndex = 2 * i + SosVoxelTextureZ + 0.5f; // Z-Index fuer Zugriff auf Textur interpoliert
|
||||
TexturIndexZ_AscanIndex = 2*i + SosVoxelTextureZ; // Z-Index fuer Zugriff auf Textur interpoliert
|
||||
VoxelAscanIndex = tex3D( texTableAscanIndexFloat0, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex);
|
||||
// if ((regionOfInterestVoxel.x == OutputVolumeX) && (regionOfInterestVoxel.y == OutputVolumeY) && (regionOfInterestVoxel.z == OutputVolumeZ)){
|
||||
// printf("#### i1 = %f : VoxelAscanIndex %f", i, VoxelAscanIndex);
|
||||
// }
|
||||
}
|
||||
//else if ( currentRecTextureIndex <= 2*maxSoSReceiverArrayForTexture) {
|
||||
//else if ( currentRecTextureIndex <= 1420) {
|
||||
else if ( i <= 1412.0f) {
|
||||
//TexturIndexZ_AscanIndex = 2 * (i - maxSoSReceiverArrayForTexture) + SosVoxelTextureZ + 0.5f; // Z-Index fuer Zugriff auf Textur interpoliert
|
||||
//TexturIndexZ_AscanIndex = 2 * (i - maxSoSReceiverArrayForTexture) + SosVoxelTextureZ + 0.5f; // Z-Index fuer Zugriff auf Textur interpoliert
|
||||
TexturIndexZ_AscanIndex = 2 * (i - maxSoSReceiverArrayForTexture) + SosVoxelTextureZ; // Z-Index fuer Zugriff auf Textur interpoliert
|
||||
VoxelAscanIndex = tex3D( texTableAscanIndexFloat1, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex);
|
||||
// if ((regionOfInterestVoxel.x == OutputVolumeX) && (regionOfInterestVoxel.y == OutputVolumeY) && (regionOfInterestVoxel.z == OutputVolumeZ)){
|
||||
// printf("#### i2 = %f : VoxelAscanIndex %f", i-maxSoSReceiverArrayForTexture, VoxelAscanIndex);
|
||||
// }
|
||||
}
|
||||
//else if ( currentRecTextureIndex <= 3*maxSoSReceiverArrayForTexture) {
|
||||
//else if ( currentRecTextureIndex <= 2130) {
|
||||
// else if ( i <= 2130.0f) {
|
||||
// //TexturIndexZ_AscanIndex = 2 * (i - 2*maxSoSReceiverArrayForTexture) + SosVoxelTextureZ + 0.5f; // Z-Index fuer Zugriff auf Textur interpoliert
|
||||
// //TexturIndexZ_AscanIndex = 2 * (i - 1420) + SosVoxelTextureZ + 0.5f; // Z-Index fuer Zugriff auf Textur interpoliert
|
||||
// TexturIndexZ_AscanIndex = 2 * (i - 1420) + SosVoxelTextureZ; // Z-Index fuer Zugriff auf Textur interpoliert
|
||||
// VoxelAscanIndex = tex3D( texTableAscanIndexFloat2, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); }
|
||||
#endif
|
||||
#ifdef variante2
|
||||
|
||||
// Determine number of current used texture memory for this receiver --> Wird benoetigt
|
||||
//currentRecTextureIndex = (int)floor((float)lookUpReceiverIndex / (float)maxSoSReceiverArrayForTexture);
|
||||
//TexturIndexZ_AscanIndex = 2 * i + SosVoxelTextureZ + 0.5f; // Z-Index fuer Zugriff auf Textur interpoliert
|
||||
TexturIndexZ_AscanIndex = 2 * i + SosVoxelTextureZ; // Z-Index fuer Zugriff auf Textur interpoliert
|
||||
VoxelAscanIndex = tex3D( texTableAscanIndexFloat0, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex);
|
||||
#endif
|
||||
|
||||
//voxelValue += attenuationFactor * tex2D( texRefAscans, sampleTime - 0.5f, (float)i + 0.5f); // i gibt Index fuer Ascan an
|
||||
//voxelValue += tex2D( texRefAscans, VoxelAscanIndex - 0.5f, (float)i + 0.5f); // i gibt Index fuer Ascan an
|
||||
voxelValue += tex2D( texRefAscans, VoxelAscanIndex - 0.5f, ascanIndex_i + 0.5f); // i gibt Index fuer Ascan an
|
||||
//voxelValue += tex2D( texRefAscans, VoxelAscanIndex, i + 0.5f); // i gibt Index fuer Ascan an // Wenn +0.5f schon im Preprocessing drin ist sogar langsamer?!
|
||||
// if ((regionOfInterestVoxel.x == OutputVolumeX) && (regionOfInterestVoxel.y == OutputVolumeY) && (regionOfInterestVoxel.z == OutputVolumeZ)){
|
||||
// printf("= %f\n", voxelValue);
|
||||
// }
|
||||
|
||||
// #ifdef debug_CudaSAFTAscanIndexKernel
|
||||
// if ((regionOfInterestVoxel.x == OutputVolumeX) && (regionOfInterestVoxel.y == OutputVolumeY) && (regionOfInterestVoxel.z == OutputVolumeZ))//&& (i == 30))
|
||||
// {
|
||||
// //if ((SosVoxel.x == DebugSoSVoxelX) && (SosVoxel.y == DebugSoSVoxelY) && (SosVoxel.z == DebugSoSVoxelZ))
|
||||
// //{
|
||||
// //printf("\n==== SAFT (AscanIndex) Kernel Step 4 - End ===========================================\n");
|
||||
//
|
||||
// printf(" => regionOfInterestVoxel [%d %d %d] (Start bei 0)", regionOfInterestVoxel.x, regionOfInterestVoxel.y, regionOfInterestVoxel.z);
|
||||
// printf("--------------------------------------------------------------------------------\n");
|
||||
// printf(" ascanIndex_i %f\n", ascanIndex_i);
|
||||
//// printf(" currentAScanIndex %i\n", currentAScanIndex);
|
||||
//// printf(" currentEmitterIndex %i\n", currentEmitterIndex);
|
||||
//// printf(" currentReceiverIndex %i\n", currentReceiverIndex);
|
||||
//
|
||||
// printf(" SosVoxelf [%3.12f %3.12f %3.12f] (Start bei 0)\n", SosVoxelf.x, SosVoxelf.y, SosVoxelf.z);
|
||||
// printf(" speedOfSoundZLayer [%3i]\n", speedOfSoundZLayer);
|
||||
//
|
||||
// printf(" TextureIndexXYZ [%3.12f %3.12f %3.12f]\n", TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex);
|
||||
// printf(" SosVoxelTextureZ %f\n", SosVoxelTextureZ);
|
||||
//
|
||||
// //printf(" emitterPosition [%3.12f %3.12f %3.12f]\n", emitterPosition.x, emitterPosition.y, emitterPosition.z);
|
||||
// //printf(" emitterDistance %f\n", emitterDistance);
|
||||
// //printf(" receiverPosition [%3.12f %3.12f %3.12f]\n", receiverPosition.x, receiverPosition.y, receiverPosition.z);
|
||||
// //printf(" receiverDistance %f\n", receiverDistance);
|
||||
// //printf(" sampleRate %e\n", sampleRate);
|
||||
// //printf(" totalDistance %f\n\n", totalDistance);
|
||||
//
|
||||
// // printf(" emitterVoxelVoxelCount %f\n", emitterVoxelVoxelCount);
|
||||
// // printf(" receiverVoxelVoxelCount %f\n", receiverVoxelVoxelCount);
|
||||
// // printf(" -> emitterReceiverTotalVoxelCount %f\n", emitterReceiverTotalVoxelCount);
|
||||
// // printf(" emitterVoxelTotalSpeedSum %f\n", emitterVoxelTotalSpeedSum);
|
||||
// // printf(" receiverVoxelTotalSpeedSum %f\n", receiverVoxelTotalSpeedSum);
|
||||
// // printf(" -> averageSpeed %f\n", averageSpeed);
|
||||
// //printf(" sampleTime %f\n", sampleTime);
|
||||
//
|
||||
// // printf(" emitterVoxelTotalAttenuationSum %f\n", emitterVoxelTotalAttenuationSum);
|
||||
// // printf(" receiverVoxelTotalAttenuationSum %f\n", receiverVoxelTotalAttenuationSum);
|
||||
// // printf(" emitterReceiverTotalVoxelCount %f\n", emitterReceiverTotalVoxelCount);
|
||||
// // printf(" -> attenuationFactor %f\n\n", attenuationFactor);
|
||||
//
|
||||
// printf(" VoxelAscanIndex %f\n", VoxelAscanIndex);
|
||||
//
|
||||
//
|
||||
// printf(" => voxelValue = %f\n", voxelValue);
|
||||
//
|
||||
// printf("======================================================\n");
|
||||
// //}
|
||||
// }
|
||||
// #endif
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef useSameLoop
|
||||
#define tryOptimize_SOSATT
|
||||
//#define addTexturIndexZ_AscanIndexInLoop
|
||||
@@ -380,26 +177,16 @@ texture <float2, cudaTextureType3D, cudaReadModeElementType> texTableAscanIndexF
|
||||
|
||||
if ( TableAscanIndexAllocationCount == 1){
|
||||
#ifdef tryOptimize_SOSATT // in the Case of 2 Textures normal case has faster Access on Data
|
||||
//#ifndef debug_CudaSAFTAscanIndexKernel
|
||||
float2 currentSOSVoxel_AscanIndexAttValues;
|
||||
//#endif
|
||||
|
||||
float Offset_0 = (float)ascanIndexBatchOffset + 0.5f;
|
||||
|
||||
#ifdef addTexturIndexZ_AscanIndexInLoop
|
||||
TexturIndexZ_AscanIndex = SosVoxelTextureZ - 2.0f; // Z-Index fuer Zugriff auf Textur interpoliert
|
||||
#endif
|
||||
|
||||
//#pragma unroll 2
|
||||
for(float ascanIndex_i = 0.0f; ascanIndex_i < maxAscanIndexArraysInTexture; ascanIndex_i+=1.0f) // bis zu 60.8GV/s
|
||||
//for(float ascanIndex_i = 0.0f; ascanIndex_i < 1; ascanIndex_i+=1.0f) // bis zu 60.8GV/s
|
||||
{
|
||||
|
||||
#ifdef addTexturIndexZ_AscanIndexInLoop
|
||||
TexturIndexZ_AscanIndex += 2.0f; // Z-Index fuer Zugriff auf Textur interpoliert
|
||||
#else
|
||||
TexturIndexZ_AscanIndex = 2.0f*ascanIndex_i + SosVoxelTextureZ; // Z-Index fuer Zugriff auf Textur interpoliert
|
||||
#endif
|
||||
|
||||
currentSOSVoxel_AscanIndexAttValues = tex3D( texTableAscanIndexFloat2_0, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex);
|
||||
|
||||
|
||||
@@ -408,81 +195,15 @@ texture <float2, cudaTextureType3D, cudaReadModeElementType> texTableAscanIndexF
|
||||
#endif
|
||||
|
||||
voxelValue += currentSOSVoxel_AscanIndexAttValues.y * tex2D( texRefAscans, currentSOSVoxel_AscanIndexAttValues.x - 0.5f, Offset_0 + ascanIndex_i); // i gibt Index fuer Ascan an
|
||||
|
||||
//#ifdef debug_CudaSAFTAscanIndexKernelDataAccess
|
||||
// if ((regionOfInterestVoxel.x == OutputVolumeX) && (regionOfInterestVoxel.y == OutputVolumeY) && (regionOfInterestVoxel.z == OutputVolumeZ))//&& (i == 30))
|
||||
// {
|
||||
// printf("ascanIndex_i(%.0f) - 0 : VoxelAscanIndex(idx %30.24f, att %19.12f) , voxelValue = %9.4f\n", ascanIndex_i, currentSOSVoxel_AscanIndexAttValues.x,currentSOSVoxel_AscanIndexAttValues.y, voxelValue);
|
||||
// //voxelValue_old = voxelValue;
|
||||
// }
|
||||
//#endif
|
||||
}
|
||||
#else
|
||||
float2 currentSOSVoxel_AscanIndexAttValues;
|
||||
|
||||
//#pragma unroll 3
|
||||
//for(float ascanIndex_i = 0.0f; ascanIndex_i < maxAscanIndexArraysInTexture; ascanIndex_i+=1.0f) // 1.Teil Ascans durchlaufen // GV/s //2: GV/s, 4: GV/s, 8: GV/s, 16: GV/s
|
||||
for(float ascanIndex_i = 0.0f; ascanIndex_i < maxAscanIndexArraysInTexture; ascanIndex_i+=1.0f) // bis zu 60.6GV/s
|
||||
{
|
||||
|
||||
// nutze immer nur 1tes Surface
|
||||
TexturIndexZ_AscanIndex = 2*ascanIndex_i + SosVoxelTextureZ; // Z-Index fuer Zugriff auf Textur interpoliert
|
||||
currentSOSVoxel_AscanIndexAttValues = tex3D( texTableAscanIndexFloat2_0, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex);
|
||||
|
||||
|
||||
voxelValue += currentSOSVoxel_AscanIndexAttValues.y * tex2D( texRefAscans, currentSOSVoxel_AscanIndexAttValues.x - 0.5f, (float)(ascanIndexBatchOffset) + ascanIndex_i + 0.5f); // i gibt Index fuer Ascan an
|
||||
|
||||
#ifdef debug_CudaSAFTAscanIndexKernelDataAccess
|
||||
if ((regionOfInterestVoxel.x == OutputVolumeX) && (regionOfInterestVoxel.y == OutputVolumeY) && (regionOfInterestVoxel.z == OutputVolumeZ))//&& (i == 30))
|
||||
{
|
||||
printf("ascanIndex_i(%.0f) - 0 : VoxelAscanIndex(idx %30.24f, att %19.12f) , voxelValue += %30.24f = %9.4f\n", ascanIndex_i, currentSOSVoxel_AscanIndexAttValues.x,currentSOSVoxel_AscanIndexAttValues.y, voxelValue-voxelValue_old, voxelValue);
|
||||
voxelValue_old = voxelValue;
|
||||
}
|
||||
#endif
|
||||
|
||||
}
|
||||
#endif
|
||||
|
||||
}
|
||||
else if (TableAscanIndexAllocationCount == 2){
|
||||
#ifndef tryOptimize_SOSATT // in the Case of 2 Textures normal case has faster Access on Data
|
||||
|
||||
float2 currentSOSVoxel_AscanIndexAttValues_0;
|
||||
float2 currentSOSVoxel_AscanIndexAttValues_1;
|
||||
|
||||
float Offset_0 = (float)ascanIndexBatchOffset + 0.5f;
|
||||
float Offset_1 = (float)ascanIndexBatchOffset + maxAscanIndexArraysInTexture + 0.5f;
|
||||
|
||||
|
||||
#ifdef addTexturIndexZ_AscanIndexInLoop
|
||||
TexturIndexZ_AscanIndex = SosVoxelTextureZ - 2.0f; // Z-Index fuer Zugriff auf Textur interpoliert
|
||||
#endif
|
||||
|
||||
//#pragma unroll 2
|
||||
for(float ascanIndex_i = 0.0f; ascanIndex_i < maxAscanIndexArraysInTexture; ascanIndex_i+=1.0f) // bis zu 60.8GV/s
|
||||
{
|
||||
|
||||
#ifdef addTexturIndexZ_AscanIndexInLoop
|
||||
TexturIndexZ_AscanIndex += 2.0f; // Z-Index fuer Zugriff auf Textur interpoliert
|
||||
#else
|
||||
TexturIndexZ_AscanIndex = 2.0f*ascanIndex_i + SosVoxelTextureZ; // Z-Index fuer Zugriff auf Textur interpoliert
|
||||
#endif
|
||||
// load TOF-Index from Textur 0-3
|
||||
currentSOSVoxel_AscanIndexAttValues_0 = tex3D( texTableAscanIndexFloat0, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex);
|
||||
currentSOSVoxel_AscanIndexAttValues_1 = tex3D( texTableAscanIndexFloat1, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex);
|
||||
|
||||
#ifdef useSync
|
||||
__syncthreads();
|
||||
#endif
|
||||
|
||||
voxelValue += currentSOSVoxel_AscanIndexAttValues_0.y * tex2D( texRefAscans, currentSOSVoxel_AscanIndexAttValues_0.x - 0.5f, Offset_0 + ascanIndex_i); // i gibt Index fuer Ascan an
|
||||
voxelValue += currentSOSVoxel_AscanIndexAttValues_1.y * tex2D( texRefAscans, currentSOSVoxel_AscanIndexAttValues_1.x - 0.5f, Offset_1 + ascanIndex_i);
|
||||
}
|
||||
#else
|
||||
#ifdef tryOptimize_SOSATT // in the Case of 2 Textures normal case has faster Access on Data
|
||||
float2 currentSOSVoxel_AscanIndexAttValues;
|
||||
|
||||
//#pragma unroll 3
|
||||
//for(float ascanIndex_i = 0.0f; ascanIndex_i < maxAscanIndexArraysInTexture; ascanIndex_i+=1.0f) // 1.Teil Ascans durchlaufen // GV/s //2: GV/s, 4: GV/s, 8: GV/s, 16: GV/s
|
||||
for(float ascanIndex_i = 0.0f; ascanIndex_i < maxAscanIndexArraysInTexture; ascanIndex_i+=1.0f) // bis zu 60.6GV/s
|
||||
{
|
||||
|
||||
@@ -492,27 +213,9 @@ texture <float2, cudaTextureType3D, cudaReadModeElementType> texTableAscanIndexF
|
||||
|
||||
voxelValue += currentSOSVoxel_AscanIndexAttValues.y * tex2D( texRefAscans, currentSOSVoxel_AscanIndexAttValues.x - 0.5f, (float)(ascanIndexBatchOffset) + ascanIndex_i + 0.5f); // i gibt Index fuer Ascan an
|
||||
|
||||
|
||||
#ifdef debug_CudaSAFTAscanIndexKernelDataAccess
|
||||
if ((regionOfInterestVoxel.x == OutputVolumeX) && (regionOfInterestVoxel.y == OutputVolumeY) && (regionOfInterestVoxel.z == OutputVolumeZ))//&& (i == 30))
|
||||
{
|
||||
printf("ascanIndex_i(%.0f) - 0 : VoxelAscanIndex(idx %30.24f, att %19.12f) , voxelValue += %30.24f = %9.4f\n", ascanIndex_i, currentSOSVoxel_AscanIndexAttValues.x,currentSOSVoxel_AscanIndexAttValues.y, voxelValue-voxelValue_old, voxelValue);
|
||||
voxelValue_old = voxelValue;
|
||||
}
|
||||
#endif
|
||||
|
||||
// 2ten Teil mit selben Index laden
|
||||
currentSOSVoxel_AscanIndexAttValues = tex3D( texTableAscanIndexFloat2_1, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex);
|
||||
voxelValue += currentSOSVoxel_AscanIndexAttValues.y * tex2D( texRefAscans, currentSOSVoxel_AscanIndexAttValues.x - 0.5f, (float)(ascanIndexBatchOffset) + ascanIndex_i + maxAscanIndexArraysInTexture +0.5f);
|
||||
|
||||
#ifdef debug_CudaSAFTAscanIndexKernelDataAccess
|
||||
if ((regionOfInterestVoxel.x == OutputVolumeX) && (regionOfInterestVoxel.y == OutputVolumeY) && (regionOfInterestVoxel.z == OutputVolumeZ))//&& (i == 30))
|
||||
{
|
||||
printf("ascanIndex_i(%.0f) - 1 : VoxelAscanIndex(idx %30.24f, att %19.12f) , voxelValue += %30.24f = %9.4f\n", ascanIndex_i, currentSOSVoxel_AscanIndexAttValues.x,currentSOSVoxel_AscanIndexAttValues.y, voxelValue-voxelValue_old, voxelValue);
|
||||
voxelValue_old = voxelValue;
|
||||
}
|
||||
#endif
|
||||
|
||||
}
|
||||
#endif
|
||||
}
|
||||
@@ -528,20 +231,11 @@ texture <float2, cudaTextureType3D, cudaReadModeElementType> texTableAscanIndexF
|
||||
float Offset_1 = (float)ascanIndexBatchOffset + maxAscanIndexArraysInTexture + 0.5f;
|
||||
float Offset_2 = (float)ascanIndexBatchOffset + 2.0f*maxAscanIndexArraysInTexture + 0.5f;
|
||||
|
||||
|
||||
#ifdef addTexturIndexZ_AscanIndexInLoop
|
||||
TexturIndexZ_AscanIndex = SosVoxelTextureZ - 2.0f; // Z-Index fuer Zugriff auf Textur interpoliert
|
||||
#endif
|
||||
|
||||
//#pragma unroll 2
|
||||
for(float ascanIndex_i = 0.0f; ascanIndex_i < maxAscanIndexArraysInTexture; ascanIndex_i+=1.0f) // bis zu 60.8GV/s
|
||||
{
|
||||
|
||||
#ifdef addTexturIndexZ_AscanIndexInLoop
|
||||
TexturIndexZ_AscanIndex += 2.0f; // Z-Index fuer Zugriff auf Textur interpoliert
|
||||
#else
|
||||
TexturIndexZ_AscanIndex = 2.0f*ascanIndex_i + SosVoxelTextureZ; // Z-Index fuer Zugriff auf Textur interpoliert
|
||||
#endif
|
||||
// load TOF-Index from Textur 0-3
|
||||
currentSOSVoxel_AscanIndexAttValues_0 = tex3D( texTableAscanIndexFloat2_0, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex);
|
||||
currentSOSVoxel_AscanIndexAttValues_1 = tex3D( texTableAscanIndexFloat2_1, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex);
|
||||
@@ -555,57 +249,6 @@ texture <float2, cudaTextureType3D, cudaReadModeElementType> texTableAscanIndexF
|
||||
voxelValue += currentSOSVoxel_AscanIndexAttValues_1.y * tex2D( texRefAscans, currentSOSVoxel_AscanIndexAttValues_1.x - 0.5f, Offset_1 + ascanIndex_i);
|
||||
voxelValue += currentSOSVoxel_AscanIndexAttValues_2.y * tex2D( texRefAscans, currentSOSVoxel_AscanIndexAttValues_2.x - 0.5f, Offset_2 + ascanIndex_i);
|
||||
}
|
||||
#else
|
||||
|
||||
|
||||
float2 currentSOSVoxel_AscanIndexAttValues;
|
||||
|
||||
//#pragma unroll 3
|
||||
//for(float ascanIndex_i = 0.0f; ascanIndex_i < maxAscanIndexArraysInTexture; ascanIndex_i+=1.0f) // 1.Teil Ascans durchlaufen // GV/s //2: GV/s, 4: GV/s, 8: GV/s, 16: GV/s
|
||||
for(float ascanIndex_i = 0.0f; ascanIndex_i < maxAscanIndexArraysInTexture; ascanIndex_i+=1.0f) // bis zu 60.6GV/s
|
||||
{
|
||||
|
||||
|
||||
// 1ten Teil mit selben Index laden
|
||||
TexturIndexZ_AscanIndex = 2*ascanIndex_i + SosVoxelTextureZ; // Z-Index fuer Zugriff auf Textur interpoliert
|
||||
currentSOSVoxel_AscanIndexAttValues = tex3D( texTableAscanIndexFloat0, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex);
|
||||
|
||||
voxelValue += currentSOSVoxel_AscanIndexAttValues.y * tex2D( texRefAscans, currentSOSVoxel_AscanIndexAttValues.x - 0.5f, (float)(ascanIndexBatchOffset) + ascanIndex_i + 0.5f); // i gibt Index fuer Ascan an
|
||||
|
||||
|
||||
#ifdef debug_CudaSAFTAscanIndexKernelDataAccess
|
||||
if ((regionOfInterestVoxel.x == OutputVolumeX) && (regionOfInterestVoxel.y == OutputVolumeY) && (regionOfInterestVoxel.z == OutputVolumeZ))//&& (i == 30))
|
||||
{
|
||||
printf("ascanIndex_i(%.0f) - 0 : VoxelAscanIndex(idx %30.24f, att %19.12f) , voxelValue += %30.24f = %9.4f\n", ascanIndex_i, currentSOSVoxel_AscanIndexAttValues.x,currentSOSVoxel_AscanIndexAttValues.y, voxelValue-voxelValue_old, voxelValue);
|
||||
voxelValue_old = voxelValue;
|
||||
}
|
||||
#endif
|
||||
|
||||
// 2ten Teil mit selben Index laden
|
||||
currentSOSVoxel_AscanIndexAttValues = tex3D( texTableAscanIndexFloat1, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex);
|
||||
voxelValue += currentSOSVoxel_AscanIndexAttValues.y * tex2D( texRefAscans, currentSOSVoxel_AscanIndexAttValues.x - 0.5f, (float)(ascanIndexBatchOffset) + ascanIndex_i + maxAscanIndexArraysInTexture +0.5f);
|
||||
|
||||
#ifdef debug_CudaSAFTAscanIndexKernelDataAccess
|
||||
if ((regionOfInterestVoxel.x == OutputVolumeX) && (regionOfInterestVoxel.y == OutputVolumeY) && (regionOfInterestVoxel.z == OutputVolumeZ))//&& (i == 30))
|
||||
{
|
||||
printf("ascanIndex_i(%.0f) - 1 : VoxelAscanIndex(idx %30.24f, att %19.12f) , voxelValue += %30.24f = %9.4f\n", ascanIndex_i, currentSOSVoxel_AscanIndexAttValues.x,currentSOSVoxel_AscanIndexAttValues.y, voxelValue-voxelValue_old, voxelValue);
|
||||
voxelValue_old = voxelValue;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
// 3ten Teil mit selben Index laden
|
||||
currentSOSVoxel_AscanIndexAttValues = tex3D( texTableAscanIndexFloat2, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex);
|
||||
voxelValue += currentSOSVoxel_AscanIndexAttValues.y * tex2D( texRefAscans, currentSOSVoxel_AscanIndexAttValues.x - 0.5f, (float)(ascanIndexBatchOffset) + ascanIndex_i + 2*maxAscanIndexArraysInTexture +0.5f);
|
||||
|
||||
#ifdef debug_CudaSAFTAscanIndexKernelDataAccess
|
||||
if ((regionOfInterestVoxel.x == OutputVolumeX) && (regionOfInterestVoxel.y == OutputVolumeY) && (regionOfInterestVoxel.z == OutputVolumeZ))//&& (i == 30))
|
||||
{
|
||||
printf("ascanIndex_i(%.0f) - 2 : VoxelAscanIndex(idx %30.24f, att %19.12f) , voxelValue += %30.24f = %9.4f\n", ascanIndex_i, currentSOSVoxel_AscanIndexAttValues.x,currentSOSVoxel_AscanIndexAttValues.y, voxelValue-voxelValue_old, voxelValue);
|
||||
voxelValue_old = voxelValue;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -624,19 +267,10 @@ texture <float2, cudaTextureType3D, cudaReadModeElementType> texTableAscanIndexF
|
||||
float Offset_2 = (float)ascanIndexBatchOffset + 2.0f*maxAscanIndexArraysInTexture + 0.5f;
|
||||
float Offset_3 = (float)ascanIndexBatchOffset + 3.0f*maxAscanIndexArraysInTexture + 0.5f;
|
||||
|
||||
#ifdef addTexturIndexZ_AscanIndexInLoop
|
||||
TexturIndexZ_AscanIndex = SosVoxelTextureZ - 2.0f; // Z-Index fuer Zugriff auf Textur interpoliert
|
||||
#endif
|
||||
|
||||
//#pragma unroll 2
|
||||
for(float ascanIndex_i = 0.0f; ascanIndex_i < maxAscanIndexArraysInTexture; ascanIndex_i+=1.0f) // bis zu 60.8GV/s
|
||||
{
|
||||
|
||||
#ifdef addTexturIndexZ_AscanIndexInLoop
|
||||
TexturIndexZ_AscanIndex += 2.0f; // Z-Index fuer Zugriff auf Textur interpoliert
|
||||
#else
|
||||
TexturIndexZ_AscanIndex = 2.0f*ascanIndex_i + SosVoxelTextureZ; // Z-Index fuer Zugriff auf Textur interpoliert
|
||||
#endif
|
||||
//syncthreads();
|
||||
// load TOF-Index from Textur 0-3
|
||||
currentSOSVoxel_AscanIndexAttValues_0 = tex3D( texTableAscanIndexFloat2_0, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex);
|
||||
@@ -653,497 +287,17 @@ texture <float2, cudaTextureType3D, cudaReadModeElementType> texTableAscanIndexF
|
||||
voxelValue += currentSOSVoxel_AscanIndexAttValues_2.y * tex2D( texRefAscans, currentSOSVoxel_AscanIndexAttValues_2.x - 0.5f, Offset_2 + ascanIndex_i);
|
||||
voxelValue += currentSOSVoxel_AscanIndexAttValues_3.y * tex2D( texRefAscans, currentSOSVoxel_AscanIndexAttValues_3.x - 0.5f, Offset_3 + ascanIndex_i);
|
||||
}
|
||||
#else
|
||||
float2 currentSOSVoxel_AscanIndexAttValues;
|
||||
|
||||
//#pragma unroll 3
|
||||
//for(float ascanIndex_i = 0.0f; ascanIndex_i < maxAscanIndexArraysInTexture; ascanIndex_i+=1.0f) // 1.Teil Ascans durchlaufen // GV/s //2: GV/s, 4: GV/s, 8: GV/s, 16: GV/s
|
||||
for(float ascanIndex_i = 0.0f; ascanIndex_i < maxAscanIndexArraysInTexture; ascanIndex_i+=1.0f) // bis zu 60.6GV/s
|
||||
{
|
||||
|
||||
|
||||
// 1ten Teil mit selben Index laden
|
||||
TexturIndexZ_AscanIndex = 2*ascanIndex_i + SosVoxelTextureZ; // Z-Index fuer Zugriff auf Textur interpoliert
|
||||
currentSOSVoxel_AscanIndexAttValues = tex3D( texTableAscanIndexFloat0, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex);
|
||||
|
||||
voxelValue += currentSOSVoxel_AscanIndexAttValues.y * tex2D( texRefAscans, currentSOSVoxel_AscanIndexAttValues.x - 0.5f, (float)(ascanIndexBatchOffset) + ascanIndex_i + 0.5f); // i gibt Index fuer Ascan an
|
||||
|
||||
|
||||
#ifdef debug_CudaSAFTAscanIndexKernelDataAccess
|
||||
if ((regionOfInterestVoxel.x == OutputVolumeX) && (regionOfInterestVoxel.y == OutputVolumeY) && (regionOfInterestVoxel.z == OutputVolumeZ))//&& (i == 30))
|
||||
{
|
||||
printf("ascanIndex_i(%.0f) - 0 : VoxelAscanIndex(idx %30.24f, att %19.12f) , voxelValue += %30.24f = %9.4f\n", ascanIndex_i, currentSOSVoxel_AscanIndexAttValues.x,currentSOSVoxel_AscanIndexAttValues.y, voxelValue-voxelValue_old, voxelValue);
|
||||
voxelValue_old = voxelValue;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
// 2ten Teil mit selben Index laden
|
||||
currentSOSVoxel_AscanIndexAttValues = tex3D( texTableAscanIndexFloat1, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex);
|
||||
voxelValue += currentSOSVoxel_AscanIndexAttValues.y * tex2D( texRefAscans, currentSOSVoxel_AscanIndexAttValues.x - 0.5f, (float)(ascanIndexBatchOffset) + ascanIndex_i + maxAscanIndexArraysInTexture +0.5f);
|
||||
|
||||
#ifdef debug_CudaSAFTAscanIndexKernelDataAccess
|
||||
if ((regionOfInterestVoxel.x == OutputVolumeX) && (regionOfInterestVoxel.y == OutputVolumeY) && (regionOfInterestVoxel.z == OutputVolumeZ))//&& (i == 30))
|
||||
{
|
||||
printf("ascanIndex_i(%.0f) - 1 : VoxelAscanIndex(idx %30.24f, att %19.12f) , voxelValue += %30.24f = %9.4f\n", ascanIndex_i, currentSOSVoxel_AscanIndexAttValues.x,currentSOSVoxel_AscanIndexAttValues.y, voxelValue-voxelValue_old, voxelValue);
|
||||
voxelValue_old = voxelValue;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
// 3ten Teil mit selben Index laden
|
||||
currentSOSVoxel_AscanIndexAttValues = tex3D( texTableAscanIndexFloat2, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex);
|
||||
voxelValue += currentSOSVoxel_AscanIndexAttValues.y * tex2D( texRefAscans, currentSOSVoxel_AscanIndexAttValues.x - 0.5f, (float)(ascanIndexBatchOffset) + ascanIndex_i + 2*maxAscanIndexArraysInTexture +0.5f);
|
||||
|
||||
#ifdef debug_CudaSAFTAscanIndexKernelDataAccess
|
||||
if ((regionOfInterestVoxel.x == OutputVolumeX) && (regionOfInterestVoxel.y == OutputVolumeY) && (regionOfInterestVoxel.z == OutputVolumeZ))//&& (i == 30))
|
||||
{
|
||||
printf("ascanIndex_i(%.0f) - 2 : VoxelAscanIndex(idx %30.24f, att %19.12f) , voxelValue += %30.24f = %9.4f\n", ascanIndex_i, currentSOSVoxel_AscanIndexAttValues.x,currentSOSVoxel_AscanIndexAttValues.y, voxelValue-voxelValue_old, voxelValue);
|
||||
voxelValue_old = voxelValue;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
// 4ten Teil mit selben Index laden
|
||||
currentSOSVoxel_AscanIndexAttValues = tex3D( texTableAscanIndexFloat3, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex);
|
||||
voxelValue += currentSOSVoxel_AscanIndexAttValues.y * tex2D( texRefAscans, currentSOSVoxel_AscanIndexAttValues.x - 0.5f, (float)(ascanIndexBatchOffset) + ascanIndex_i + 3*maxAscanIndexArraysInTexture +0.5f);
|
||||
|
||||
#ifdef debug_CudaSAFTAscanIndexKernelDataAccess
|
||||
if ((regionOfInterestVoxel.x == OutputVolumeX) && (regionOfInterestVoxel.y == OutputVolumeY) && (regionOfInterestVoxel.z == OutputVolumeZ))//&& (i == 30))
|
||||
{
|
||||
printf("ascanIndex_i(%.0f) - 3 : VoxelAscanIndex(idx %30.24f, att %19.12f) , voxelValue += %30.24f = %9.4f\n", ascanIndex_i, currentSOSVoxel_AscanIndexAttValues.x,currentSOSVoxel_AscanIndexAttValues.y, voxelValue-voxelValue_old, voxelValue);
|
||||
voxelValue_old = voxelValue;
|
||||
}
|
||||
#endif
|
||||
//VoxelAscanIndex = tex3D( texTableAscanIndexFloat3, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); }
|
||||
|
||||
// Für Version mit SOS und ATT
|
||||
//currentSOSVoxel_AscanIndexAttValues = tex3D( texTableAscanIndexFloat3, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex);
|
||||
//voxelValue += currentSOSVoxel_AscanIndexAttValues.y * tex2D( texRefAscans, currentSOSVoxel_AscanIndexAttValues.x - 0.5f, ascanIndex_i + 0.5f); // i gibt Index fuer Ascan an
|
||||
|
||||
}
|
||||
#endif
|
||||
|
||||
}
|
||||
// else
|
||||
// {
|
||||
// // Do nothing due to until now only 4 are defined
|
||||
// }
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef useWhileLoop
|
||||
//#define varianteWhile0 // Hart-Codierte Aufteilung //etwas schneller
|
||||
#define varianteWhile1 // mit Variabler Aufteilung
|
||||
//#define varianteWhile3 // Hart-Codierte Aufteilung mit ?-Abfrage // TITAN (M/K) 32.8 / 32.7
|
||||
|
||||
#ifdef varianteWhile0
|
||||
float ascanIndex_i = 0.0;
|
||||
float compareIndexNextTexture = maxAscanIndexArraysInTexture; // Index ab dem die naechste Textur genutzt werden soll
|
||||
float IndexTextureOffset = 0.0f; // Offset des AscanIndex fuer TexturIndexZ_AscanIndex
|
||||
TexturIndexZ_AscanIndex = ascanIndex_i + ascanIndex_i + SosVoxelTextureZ;
|
||||
do{
|
||||
|
||||
if ( ascanIndex_i >= compareIndexNextTexture){
|
||||
compareIndexNextTexture += maxAscanIndexArraysInTexture;
|
||||
IndexTextureOffset += maxAscanIndexArraysInTexture;
|
||||
}
|
||||
|
||||
// Calculate the Z-Index for storing the AscanIndex value
|
||||
//TexturGeometryIndexZ = maxFeasibleSosZLayerCount * ((ascanIndex_i) % maxAscanIndexArraysInTexture) + i_z;
|
||||
TexturIndexZ_AscanIndex = 2 *(ascanIndex_i - IndexTextureOffset) + SosVoxelTextureZ;
|
||||
|
||||
if ( compareIndexNextTexture <= 2048){
|
||||
if (compareIndexNextTexture == 1024){
|
||||
VoxelAscanIndex = tex3D( texTableAscanIndexFloat0, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex);
|
||||
}
|
||||
else {
|
||||
VoxelAscanIndex = tex3D( texTableAscanIndexFloat1, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex);
|
||||
}
|
||||
}
|
||||
else{
|
||||
if (compareIndexNextTexture == 3072){
|
||||
VoxelAscanIndex = tex3D( texTableAscanIndexFloat2, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex);
|
||||
}
|
||||
else {
|
||||
VoxelAscanIndex = tex3D( texTableAscanIndexFloat3, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex);
|
||||
}
|
||||
}
|
||||
|
||||
// if ( compareIndexNextTexture == 1024){
|
||||
// VoxelAscanIndex = tex3D( texTableAscanIndexFloat2_0, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); }
|
||||
// else if ( compareIndexNextTexture == 2048) {
|
||||
// VoxelAscanIndex = tex3D( texTableAscanIndexFloat2_1, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); }
|
||||
// else if ( compareIndexNextTexture == 3072){
|
||||
// VoxelAscanIndex = tex3D( texTableAscanIndexFloat2_2, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); }
|
||||
// else if ( compareIndexNextTexture == 4096){
|
||||
// VoxelAscanIndex = tex3D( texTableAscanIndexFloat2_3, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); }
|
||||
|
||||
// switch ( (int)(compareIndexNextTexture) )
|
||||
// {
|
||||
// case 1024: VoxelAscanIndex = tex3D( texTableAscanIndexFloat2_0, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); break;
|
||||
// case 2048: VoxelAscanIndex = tex3D( texTableAscanIndexFloat2_1, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); break;
|
||||
// case 3072: VoxelAscanIndex = tex3D( texTableAscanIndexFloat2_2, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); break;
|
||||
// case 4096: VoxelAscanIndex = tex3D( texTableAscanIndexFloat2_3, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); break;
|
||||
// }
|
||||
|
||||
|
||||
voxelValue += tex2D( texRefAscans, VoxelAscanIndex - 0.5f, ascanIndex_i + 0.5f); // i gibt Index fuer Ascan an
|
||||
|
||||
// if ((regionOfInterestVoxel.x == OutputVolumeX) && (regionOfInterestVoxel.y == OutputVolumeY) && (regionOfInterestVoxel.z == OutputVolumeZ)){
|
||||
//
|
||||
// if ((ascanIndex_i <= 10)) // Anfang
|
||||
// //if ((ascanIndex_i >= 1020) && (ascanIndex_i <= 1030)) // Mitte
|
||||
// //if ((ascanIndex_i >= 1400)) // Ende
|
||||
// //if ((ascanIndex_i >= 2800)) // Ende
|
||||
// printf("#### ascanIndex_i = %.0f , compIdxNextText = %.0f; IndexTextOffset = %.0f; TexturIndexZ_AscanIndex = %.0f, VoxelAscanIndex=%f = %i\n", ascanIndex_i, compareIndexNextTexture, IndexTextureOffset, TexturIndexZ_AscanIndex, VoxelAscanIndex, ( compareIndexNextTexture == 3*maxAscanIndexArraysInTexture));
|
||||
//
|
||||
// }
|
||||
|
||||
ascanIndex_i += 1.0f;
|
||||
|
||||
|
||||
// if (ascanIndex_i == aScanWindowSize) break; //20,7 GV/s
|
||||
//}while(1);
|
||||
}while(ascanIndex_i < aScanWindowSize); //20,7 GV/s
|
||||
//}while(ascanIndex_i < 3900.0f); //14,5 GV/s
|
||||
#endif
|
||||
|
||||
#ifdef varianteWhile1
|
||||
float ascanIndex_i = 0.0;
|
||||
float compareIndexNextTexture = maxAscanIndexArraysInTexture; // Index ab dem die naechste Textur genutzt werden soll
|
||||
float IndexTextureOffset = 0.0f; // Offset des AscanIndex fuer TexturIndexZ_AscanIndex
|
||||
// TexturIndexZ_AscanIndex = ascanIndex_i + ascanIndex_i + SosVoxelTextureZ;
|
||||
|
||||
do{
|
||||
|
||||
if ( ascanIndex_i >= compareIndexNextTexture){
|
||||
compareIndexNextTexture += maxAscanIndexArraysInTexture;
|
||||
IndexTextureOffset += maxAscanIndexArraysInTexture;
|
||||
}
|
||||
|
||||
// Calculate the Z-Index for storing the AscanIndex value
|
||||
//TexturGeometryIndexZ = maxFeasibleSosZLayerCount * ((ascanIndex_i) % maxAscanIndexArraysInTexture) + i_z;
|
||||
TexturIndexZ_AscanIndex = 2.0f *(ascanIndex_i - IndexTextureOffset) + SosVoxelTextureZ;
|
||||
|
||||
if ( compareIndexNextTexture == maxAscanIndexArraysInTexture){
|
||||
//VoxelAscanIndex = tex3D( texTableAscanIndexFloat0, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); }
|
||||
currentSOSVoxel_AscanIndexAttValues = tex3D( texTableAscanIndexFloat0, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); }
|
||||
else if ( compareIndexNextTexture == 2*maxAscanIndexArraysInTexture) {
|
||||
//VoxelAscanIndex = tex3D( texTableAscanIndexFloat1, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); }
|
||||
currentSOSVoxel_AscanIndexAttValues = tex3D( texTableAscanIndexFloat1, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); }
|
||||
else if ( compareIndexNextTexture == 3*maxAscanIndexArraysInTexture){
|
||||
//VoxelAscanIndex = tex3D( texTableAscanIndexFloat2, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); }
|
||||
currentSOSVoxel_AscanIndexAttValues = tex3D( texTableAscanIndexFloat2, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); }
|
||||
else if ( compareIndexNextTexture == 4*maxAscanIndexArraysInTexture){
|
||||
//VoxelAscanIndex = tex3D( texTableAscanIndexFloat3, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); }
|
||||
currentSOSVoxel_AscanIndexAttValues = tex3D( texTableAscanIndexFloat3, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex); }
|
||||
|
||||
//currentSOSVoxel_AscanIndexAttValues.x = currentAscanIndex;
|
||||
//currentSOSVoxel_AscanIndexAttValues.y = totalAttenuation_multFactor;
|
||||
#define Interpolation_Textur_Standard
|
||||
//#define Interpolation_Cosinus
|
||||
//#define Interpolation_Spline
|
||||
|
||||
#ifdef Interpolation_Textur_Standard
|
||||
voxelValue += currentSOSVoxel_AscanIndexAttValues.y * tex2D( texRefAscans, currentSOSVoxel_AscanIndexAttValues.x - 0.5f, ascanIndex_i + 0.5f); // i gibt Index fuer Ascan an
|
||||
#endif
|
||||
|
||||
#ifdef Interpolation_Cosinus
|
||||
float y1 = tex2D( texRefAscans, currentSOSVoxel_AscanIndexAttValues.x + 0.5f, ascanIndex_i + 0.5f);
|
||||
float y2 = tex2D( texRefAscans, currentSOSVoxel_AscanIndexAttValues.x + 1.5f, ascanIndex_i + 0.5f);
|
||||
float mu = currentSOSVoxel_AscanIndexAttValues.x-floor(currentSOSVoxel_AscanIndexAttValues.x);
|
||||
float mu2 = (1-cos(mu*3.14159265359f))/2;
|
||||
voxelValue += currentSOSVoxel_AscanIndexAttValues.y * (y1*(1-mu2)+y2*mu2);
|
||||
#endif
|
||||
|
||||
#ifdef Interpolation_Spline
|
||||
float y0 = tex2D( texRefAscans, currentSOSVoxel_AscanIndexAttValues.x - 0.5f, ascanIndex_i + 0.5f);
|
||||
float y1 = tex2D( texRefAscans, currentSOSVoxel_AscanIndexAttValues.x + 0.5f, ascanIndex_i + 0.5f);
|
||||
float y2 = tex2D( texRefAscans, currentSOSVoxel_AscanIndexAttValues.x + 1.5f, ascanIndex_i + 0.5f);
|
||||
float y3 = tex2D( texRefAscans, currentSOSVoxel_AscanIndexAttValues.x + 2.5f, ascanIndex_i + 0.5f);
|
||||
float mu = currentSOSVoxel_AscanIndexAttValues.x-floor(currentSOSVoxel_AscanIndexAttValues.x);
|
||||
float mu2 = mu*mu;
|
||||
float a0 = y3 - y2 - y0 + y1;
|
||||
float a1 = y0 - y1 - a0;
|
||||
float a2 = y2 - y0;
|
||||
float a3 = y1;
|
||||
voxelValue += currentSOSVoxel_AscanIndexAttValues.y * (a0*mu*mu2+a1*mu2+a2*mu+a3);
|
||||
#endif
|
||||
|
||||
#ifdef debug_CudaSAFTAscanIndexKernelDataAccess
|
||||
if ((regionOfInterestVoxel.x == OutputVolumeX) && (regionOfInterestVoxel.y == OutputVolumeY) && (regionOfInterestVoxel.z == OutputVolumeZ))//&& (i == 30))
|
||||
{
|
||||
printf("ascanIndex_i(%.0f) - %.0f: VoxelAscanIndex(idx %30.24f, att %19.12f) , voxelValue += %30.24f = %9.4f\n", ascanIndex_i, floor(compareIndexNextTexture/maxAscanIndexArraysInTexture)-1.0, currentSOSVoxel_AscanIndexAttValues.x,currentSOSVoxel_AscanIndexAttValues.y, voxelValue-voxelValue_old, voxelValue);
|
||||
voxelValue_old = voxelValue;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
// if ((regionOfInterestVoxel.x == OutputVolumeX) && (regionOfInterestVoxel.y == OutputVolumeY) && (regionOfInterestVoxel.z == OutputVolumeZ)){
|
||||
//
|
||||
// if ((ascanIndex_i <= 10)) // Anfang
|
||||
// //if ((ascanIndex_i >= 1020) && (ascanIndex_i <= 1030)) // Mitte
|
||||
// //if ((ascanIndex_i >= 1400)) // Ende
|
||||
// //if ((ascanIndex_i >= 2800)) // Ende
|
||||
// printf("#### ascanIndex_i = %.0f , compIdxNextText = %.0f; IndexTextOffset = %.0f; TexturIndexZ_AscanIndex = %.0f, VoxelAscanIndex=%f = %i\n", ascanIndex_i, compareIndexNextTexture, IndexTextureOffset, TexturIndexZ_AscanIndex, VoxelAscanIndex, ( compareIndexNextTexture == 3*maxAscanIndexArraysInTexture));
|
||||
//
|
||||
// }
|
||||
|
||||
ascanIndex_i += 1.0f;
|
||||
}while(ascanIndex_i < aScanWindowSize);
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef varianteWhile3
|
||||
float ascanIndex_i = 0.0;
|
||||
float compareIndexNextTexture = maxAscanIndexArraysInTexture; // Index ab dem die naechste Textur genutzt werden soll
|
||||
float IndexTextureOffset = 0.0f; // Offset des AscanIndex fuer TexturIndexZ_AscanIndex
|
||||
TexturIndexZ_AscanIndex = (ascanIndex_i + ascanIndex_i) + SosVoxelTextureZ;
|
||||
do{
|
||||
|
||||
if ( ascanIndex_i >= compareIndexNextTexture){
|
||||
compareIndexNextTexture += maxAscanIndexArraysInTexture;
|
||||
IndexTextureOffset += maxAscanIndexArraysInTexture;
|
||||
TexturIndexZ_AscanIndex -= (maxAscanIndexArraysInTexture+maxAscanIndexArraysInTexture);
|
||||
}
|
||||
|
||||
// Calculate the Z-Index for storing the AscanIndex value
|
||||
//TexturIndexZ_AscanIndex = 2 * (ascanIndex_i - IndexTextureOffset) + SosVoxelTextureZ;
|
||||
//TexturIndexZ_AscanIndex = ascanIndex_i + ascanIndex_i + SosVoxelTextureZ - (IndexTextureOffset + IndexTextureOffset) ;
|
||||
TexturIndexZ_AscanIndex += 2.0f;
|
||||
|
||||
if ( compareIndexNextTexture <= 2*maxAscanIndexArraysInTexture){
|
||||
if (compareIndexNextTexture == maxAscanIndexArraysInTexture){
|
||||
VoxelAscanIndex = tex3D( texTableAscanIndexFloat0, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex);
|
||||
}
|
||||
else {
|
||||
VoxelAscanIndex = tex3D( texTableAscanIndexFloat1, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex);
|
||||
}
|
||||
}
|
||||
else{
|
||||
if (compareIndexNextTexture == 3*maxAscanIndexArraysInTexture){
|
||||
VoxelAscanIndex = tex3D( texTableAscanIndexFloat2, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex);
|
||||
}
|
||||
else {
|
||||
VoxelAscanIndex = tex3D( texTableAscanIndexFloat3, TexturIndexX, TexturIndexY, TexturIndexZ_AscanIndex);
|
||||
}
|
||||
}
|
||||
|
||||
voxelValue += tex2D( texRefAscans, VoxelAscanIndex - 0.5f, ascanIndex_i + 0.5f); // i gibt Index fuer Ascan an
|
||||
|
||||
ascanIndex_i += 1.0f;
|
||||
|
||||
}while(ascanIndex_i < aScanWindowSize); //32,1 GV/s
|
||||
#endif
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
// #ifdef GTX_Kepler
|
||||
// __syncthreads();
|
||||
// #endif
|
||||
#ifdef debug_CudaSAFTAscanIndexKernel
|
||||
//printf(" >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> debugModeParameter %f\n", debugModeParameter);
|
||||
#ifdef debug_CudaSAFTKernel
|
||||
if ((regionOfInterestVoxel.x == OutputVolumeX) && (regionOfInterestVoxel.y == OutputVolumeY) && (regionOfInterestVoxel.z == OutputVolumeZ)){
|
||||
|
||||
printf("#### debugMode = %f \n", debugMode);
|
||||
printf("#### sumDiffAverageSpeed = %f \n", sumDiffAverageSpeed);
|
||||
printf("#### DiffAverageSpeed = %f \n", DiffAverageSpeed);
|
||||
printf("#### analyticAverageSpeed = %f \n", analyticAverageSpeed);
|
||||
printf("#### averageSpeed = %f \n", averageSpeed);
|
||||
|
||||
printf("#### S_Wasser = %f \n", S_Wasser);
|
||||
printf("#### S_Kugel = %f \n", S_Kugel);
|
||||
printf("#### cWasser = %f \n", cWasser);
|
||||
printf("#### cKugel = %f \n", cKugel);
|
||||
}
|
||||
#endif
|
||||
//switch ( debugMode >= 60 ? (debugMode-60) : debugMode >= 30 ? (debugMode-30) : debugMode )
|
||||
switch ( (int)(debugMode) )
|
||||
{
|
||||
case 0: output[memoryIndex] = (double)voxelValue; break;
|
||||
//case 0: output[memoryIndex] = (double)1.5; break;
|
||||
//case 1: output[memoryIndex] = (double)averageSpeed; break;
|
||||
//case 2: output[memoryIndex] = (double)totalDistance; break;
|
||||
//case 3: output[memoryIndex] = (double)sampleTime; break;
|
||||
//case 4: output[memoryIndex] = (double)attenuationFactor; break;
|
||||
|
||||
case 1: output[memoryIndex] = (double)blockIndex; break;
|
||||
case 2: output[memoryIndex] = (double)memoryIndex; break;
|
||||
|
||||
case 3: output[memoryIndex] = (double)regionOfInterestVoxel.x; break; // Coordinates in Voxel
|
||||
case 4: output[memoryIndex] = (double)regionOfInterestVoxel.y; break;
|
||||
case 5: output[memoryIndex] = (double)regionOfInterestVoxel.z; break;
|
||||
|
||||
case 6: output[memoryIndex] = (double)debugMode; break;
|
||||
|
||||
//case 7: output[memoryIndex] = (double)SosVoxelStartPosition.x; break; // VoxelAscanIndex
|
||||
//case 7: output[memoryIndex] = (double)TexturIndexX; break; // VoxelAscanIndex
|
||||
//case 7: output[memoryIndex] = (double)TexturIndexY; break; // VoxelAscanIndex
|
||||
//case 7: output[memoryIndex] = (double)TexturIndexZ_AscanIndex; break; // VoxelAscanIndex
|
||||
|
||||
//case 7: output[memoryIndex] = (double)currentSOSVoxel_AscanIndexAttValues.x; break; // VoxelAscanIndex
|
||||
//case 8: output[memoryIndex] = (double)currentSOSVoxel_AscanIndexAttValues.y; break; // VoxelAttenuation
|
||||
|
||||
|
||||
|
||||
//case 9: output[memoryIndex] = (double)voxelPosition.x; break; // Coordinates in m
|
||||
//case 10: output[memoryIndex] = (double)voxelPosition.y; break;
|
||||
//case 11: output[memoryIndex] = (double)voxelPosition.z; break;
|
||||
|
||||
//case 12: output[memoryIndex] = (double)emitterPosition.x; break;
|
||||
//case 13: output[memoryIndex] = (double)emitterPosition.y; break;
|
||||
//case 14: output[memoryIndex] = (double)emitterPosition.z; break;
|
||||
//case 15: output[memoryIndex] = (double)receiverPosition.x; break;
|
||||
//case 16: output[memoryIndex] = (double)receiverPosition.y; break;
|
||||
//case 17: output[memoryIndex] = (double)receiverPosition.z; break;
|
||||
//case 18: output[memoryIndex] = (double)emitterDistance; break;
|
||||
//case 19: output[memoryIndex] = (double)receiverDistance; break;
|
||||
|
||||
//case 20: output[memoryIndex] = (double)SosVoxel.x; break; // Coordinates in SOS-Voxel
|
||||
//case 21: output[memoryIndex] = (double)SosVoxel.y; break;
|
||||
//case 22: output[memoryIndex] = (double)SosVoxel.z; break;
|
||||
|
||||
case 15: output[memoryIndex] = (double)SosVoxelf.x; break;// Coordinates in SOS-Voxel (float)
|
||||
case 16: output[memoryIndex] = (double)SosVoxelf.y; break;
|
||||
case 17: output[memoryIndex] = (double)SosVoxelf.z; break;
|
||||
|
||||
// #ifdef noGeometryLoading
|
||||
// case 26: output[memoryIndex] = (double)currentAScanIndex; break;
|
||||
// //case 27: output[memoryIndex] = (double)currentEmitterIndex; break;
|
||||
// case 28: output[memoryIndex] = (double)currentReceiverIndex; break;
|
||||
// #endif
|
||||
|
||||
|
||||
//case 29: output[memoryIndex] = (double)emitterVoxelTotalSpeedSum; break;
|
||||
//case 30: output[memoryIndex] = (double)emitterVoxelVoxelCount; break;
|
||||
//case 31: output[memoryIndex] = (double)receiverVoxelTotalSpeedSum; break;
|
||||
//case 32: output[memoryIndex] = (double)receiverVoxelVoxelCount; break;
|
||||
|
||||
//case 33: output[memoryIndex] = (double)emitterReceiverTotalVoxelCount; break;
|
||||
//case 34: output[memoryIndex] = (double)emitterVoxelTotalAttenuationSum; break; // Attenuation on Emitter Path
|
||||
//case 35: output[memoryIndex] = (double)receiverVoxelTotalAttenuationSum;break; // on Receiver Path
|
||||
|
||||
|
||||
case 36: output[memoryIndex] = (double)SosVoxelTextureZ; break;
|
||||
//case 37: output[memoryIndex] = (double)TexturIndexZEmitter; break;
|
||||
case 38: output[memoryIndex] = (double)TexturIndexZ_AscanIndex; break;
|
||||
case 39: output[memoryIndex] = (double)speedOfSoundZLayer; break;
|
||||
|
||||
//case 40: output[memoryIndex] = (double)sumAverageSpeed; break;
|
||||
|
||||
case 41: output[memoryIndex] = (double)TexturIndexX; break;
|
||||
case 42: output[memoryIndex] = (double)TexturIndexY; break;
|
||||
case 43: output[memoryIndex] = (double)TexturIndexZ_AscanIndex; break;
|
||||
//case 44: output[memoryIndex] = (double)VoxelAscanIndex; break;
|
||||
|
||||
|
||||
#ifdef debug_CudaSAFTKernel_EnableAnalyticAverageSpeedCalculation
|
||||
case 25: output[memoryIndex] = (double)DiffAverageSpeed; break; // Fehler zwischen analytisch und bresenham (nur 1-Ascan)
|
||||
case 26: output[memoryIndex] = (double)sumDiffAverageSpeed; break; // Summe Mittlerer Fehler über alle A-Scans
|
||||
case 27: output[memoryIndex] = (double)stdDiffAverageSpeed; break; // Standartabweichung über alle A-Scans // Dafür brauche ich mittelwert --> Matlab
|
||||
case 28: output[memoryIndex] = (double)sumDiffAnalyticWithoutSpeed; break;
|
||||
case 29: output[memoryIndex] = (double)sumAnalyticAverageSpeed; break;
|
||||
#endif
|
||||
|
||||
default: output[memoryIndex] = (double)0; break;
|
||||
}
|
||||
#else
|
||||
__syncthreads();
|
||||
output[memoryIndex] = (double)voxelValue;
|
||||
#endif
|
||||
|
||||
#ifdef debug_CudaSAFTKernel
|
||||
if (debugMode != 0)
|
||||
{
|
||||
if ((regionOfInterestVoxel.x == OutputVolumeX) && (regionOfInterestVoxel.y == OutputVolumeY) && (regionOfInterestVoxel.z == OutputVolumeZ))
|
||||
{
|
||||
//printf(" SoSVoxel[%3i %3i %3i] speedOfSoundZLayer(%3i) - regionOfInterestVoxel [%3i %3i %3i]\n", SosVoxel.x, SosVoxel.y, SosVoxel.z, speedOfSoundZLayer, regionOfInterestVoxel.x, regionOfInterestVoxel.y, regionOfInterestVoxel.z);
|
||||
//if ((SosVoxel.x == DebugSoSVoxelX) && (SosVoxel.y == DebugSoSVoxelY) && (SosVoxel.z == DebugSoSVoxelZ)){
|
||||
printf("\n saftKernel: debugMode [%f]\n", debugMode);
|
||||
//printf(" => voxelValue = %f\n", voxelValue);
|
||||
//printf(" SoSVoxel[%+3.2f %+3.2f %+3.2f] speedOfSoundZLayer(%3i) - regionOfInterestVoxel [%3i %3i %3i]\n", SosVoxelf.x, SosVoxelf.y, SosVoxelf.z, speedOfSoundZLayer, regionOfInterestVoxel.x, regionOfInterestVoxel.y, regionOfInterestVoxel.z);
|
||||
printf(" SoSVoxel[%3i %3i %3i] speedOfSoundZLayer(%3i) - regionOfInterestVoxel [%3i %3i %3i]\n", SosVoxel.x, SosVoxel.y, SosVoxel.z, speedOfSoundZLayer, regionOfInterestVoxel.x, regionOfInterestVoxel.y, regionOfInterestVoxel.z);
|
||||
printf(" (current)speedOfSoundZLayer %d\n", speedOfSoundZLayer);
|
||||
printf(" maxFeasibleSosZLayerCount [%i]\n", maxFeasibleSosZLayerCount);
|
||||
|
||||
printf(" SosVoxelf [%3.15f %3.15f %3.15f]\n", SosVoxelf.x, SosVoxelf.y, SosVoxelf.z);
|
||||
|
||||
// SosVoxelTextureZ = (SosVoxelf.z - speedOfSoundZLayer);
|
||||
printf(" SosVoxelTextureZ [%3.15f]\n", SosVoxelTextureZ);
|
||||
|
||||
// TexturIndexZEmitter = maxFeasibleSosZLayerCount * (currentEmitterIndex-1) + SosVoxelTextureZ; // Index für Zugriff auf Textur
|
||||
// TexturIndexZReceiver = maxFeasibleSosZLayerCount * ((currentReceiverIndex-1) % maxSoSReceiverArrayForTexture) + SosVoxelTextureZ; // Index für Zugriff auf Textur
|
||||
printf(" TexturIndexXYZ Em/Rec [%3.15f %3.15f %3.15f/%3.15f]\n\n", TexturIndexX, TexturIndexY, TexturIndexZEmitter, TexturIndexZReceiver);
|
||||
|
||||
|
||||
switch (deviceSAFT_VARIANT[SAFT_VARIANT_3DVolumeInterpolationAtReconstruction])
|
||||
//switch ( debugMode >= 60 ? 60 : debugMode >= 30 ? 30 : 0 ) // drei Bereiche
|
||||
{
|
||||
case 0: // mit Textur
|
||||
//TexturIndexZReceiver = maxFeasibleSosZLayerCount * ((currentReceiverIndex-1) % maxSoSReceiverArrayForTexture) + SosVoxelTextureZ; // Index für Zugriff auf Textur
|
||||
printf(" TexturIndexZReceiver = maxFeasibleSosZLayerCount(%i) * ((currentReceiverIndex-1 (%i)) %% maxSoSReceiverArrayForTexture(%i)) + SosVoxelTextureZ(%3.15f) = (%3.15f)\n\n", maxFeasibleSosZLayerCount, currentReceiverIndex-1, maxSoSReceiverArrayForTexture, SosVoxelTextureZ, TexturIndexZReceiver);
|
||||
break;
|
||||
case 1: // mit Textur interpoliert
|
||||
//TexturIndexZReceiver = maxFeasibleSosZLayerCount * ((currentReceiverIndex-1) % maxSoSReceiverArrayForTexture) + SosVoxelTextureZ + 0.5f; // Index für Zugriff auf Textur
|
||||
printf(" TexturIndexZReceiver = maxFeasibleSosZLayerCount(%i) * ((currentReceiverIndex-1 (%i)) %% maxSoSReceiverArrayForTexture(%i)) + SosVoxelTextureZ(%3.15f) + 0.5f = (%3.15f)\n\n", maxFeasibleSosZLayerCount, currentReceiverIndex-1, maxSoSReceiverArrayForTexture, SosVoxelTextureZ, TexturIndexZReceiver);
|
||||
break;
|
||||
}
|
||||
|
||||
// // Speicher in Texturformat
|
||||
// // Indexberechnung für Einsatz des Texturmemorys
|
||||
// float xmax = SOSGrid_XYZ.x;
|
||||
// float ymax = SOSGrid_XYZ.y;
|
||||
// float zmax = (float)maxFeasibleSosZLayerCount;
|
||||
// float i_x = SosVoxel.x;
|
||||
// float i_y = SosVoxel.y;
|
||||
// float i_z = (float)(int)(SosVoxelTextureZ); // float SosVoxelTextureZ = (SosVoxelf.z - speedOfSoundZLayer);
|
||||
|
||||
//Index = xmax*(ymax*(zmax*geometryIndexCounter+i_z)+i_y)+i_x;
|
||||
printf(" currentEmitterIndex %d\n", currentEmitterIndex);
|
||||
printf(" currentReceiverIndex %d\n", currentReceiverIndex);
|
||||
printf(" (currentReceiverIndex-1) mod maxSoSReceiverArrayForTexture) %d\n", ((currentReceiverIndex-1) % maxSoSReceiverArrayForTexture));
|
||||
|
||||
printf(" EmIndex = xmax(%i)*(ymax(%i)*( TexturIndexZEmitter(%i) )+i_y(%i))+i_x(%i) = [%i]\n", (int)SOSGrid_XYZ.x, (int)SOSGrid_XYZ.y, (int)TexturIndexZEmitter, (int)TexturIndexY, (int)TexturIndexX, (int)SOSGrid_XYZ.x*((int)SOSGrid_XYZ.y*((int)TexturIndexZEmitter )+(int)TexturIndexY)+(int)TexturIndexX);
|
||||
printf(" RecIndex = xmax(%i)*(ymax(%i)*( TexturIndexZReceiver(%i) )+i_y(%i))+i_x(%i) = [%i]\n", (int)SOSGrid_XYZ.x, (int)SOSGrid_XYZ.y, (int)TexturIndexZReceiver, (int)TexturIndexY, (int)TexturIndexX, (int)SOSGrid_XYZ.x*((int)SOSGrid_XYZ.y*((int)TexturIndexZReceiver)+(int)TexturIndexY)+(int)TexturIndexX);
|
||||
|
||||
|
||||
printf(" emitterPosition [%3.15f %3.15f %3.15f]\n", emitterPosition.x, emitterPosition.y, emitterPosition.z);
|
||||
printf(" emitterVoxelVoxelCount %f\n", emitterVoxelVoxelCount);
|
||||
printf(" 1/emitterVoxelTotalSpeedSum %f\n", emitterVoxelTotalSpeedSum);
|
||||
|
||||
printf(" receiverPosition [%3.15f %3.15f %3.15f]\n", receiverPosition.x, receiverPosition.y, receiverPosition.z);
|
||||
printf(" receiverVoxelVoxelCount %f\n", receiverVoxelVoxelCount);
|
||||
printf(" 1/receiverVoxelTotalSpeedSum %f\n", receiverVoxelTotalSpeedSum);
|
||||
|
||||
printf(" => averageSpeed %f\n\n", averageSpeed);
|
||||
// printf(" emitterPosition [%f %f %f]\n", emitterPosition.x, emitterPosition.y, emitterPosition.z);
|
||||
// printf(" emitterDistance %f\n", emitterDistance);
|
||||
// printf(" receiverPosition [%f %f %f]\n", receiverPosition.x, receiverPosition.y, receiverPosition.z);
|
||||
// printf(" receiverDistance %f\n", receiverDistance);
|
||||
// printf(" sampleTime %f\n", sampleTime);
|
||||
// printf(" totalDistance %f\n\n", totalDistance);
|
||||
//
|
||||
// printf(" analyticAverageSpeed %f\n", analyticAverageSpeed);
|
||||
// printf(" analyticTotalTime %f\n", analyticTotalTime);
|
||||
// printf(" analyticSampleTime %f\n", analyticSampleTime);
|
||||
// printf(" S_Wasser(%f) S_Kugel(%f) \n", S_Wasser, S_Kugel);
|
||||
// printf(" cWasser(%f) cKugel(%f) \n", cWasser, cKugel);
|
||||
// printf(" sampleRate %.10f\n", sampleRate);
|
||||
printf("\n");
|
||||
|
||||
// printf(" => voxelValue = %3.12f\n", voxelValue);
|
||||
|
||||
|
||||
//}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user