feat: refactor & clean cpp code in SAFT_TOFI

2024-11-21 09:49:34 +08:00
parent 0e29f139af
commit 82a2a9e132
7 changed files with 2210 additions and 4593 deletions
--- a/SAFT_TOFI/CMakeLists.txt
+++ b/SAFT_TOFI/CMakeLists.txt
@@ -4,7 +4,8 @@ set(CMAKE_CUDA_COMPILER /usr/local/cuda/bin/nvcc)
 enable_language(CUDA)
 find_package (OpenMP REQUIRED)
 file(GLOB_RECURSE  cu_files ./src/*.cu)
-file(GLOB_RECURSE  cuh_files ./src/*.cuh)
+file(GLOB_RECURSE  cuh_files ./src/*.cuh) 
+
 add_library(SaftTofi SHARED ./src/SAFT_TOFI.cpp ./src/processAScans.cpp ./src/saft.cpp ${cu_files} ${cuh_files})
 target_include_directories(SaftTofi PRIVATE ../SAFT ./src /usr/local/cuda/include  ) 
 set_target_properties(SaftTofi PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
@@ -13,7 +14,7 @@ target_compile_options(SaftTofi PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:
                       --use_fast_math
                       --ptxas-options=-v
                       -arch compute_30 -code compute_30,sm_30 
-                       >)
+                       >) 

 target_link_libraries(SaftTofi PRIVATE ${CUDA_RUNTIME_LIBRARY} )
 target_link_libraries(SaftTofi PRIVATE OpenMP::OpenMP_CXX )
--- a/SAFT_TOFI/src/SAFT_TOFI.cpp
+++ b/SAFT_TOFI/src/SAFT_TOFI.cpp
--- a/SAFT_TOFI/src/kernel/rayTracing.cuh
+++ b/SAFT_TOFI/src/kernel/rayTracing.cuh
@@ -137,7 +137,7 @@ __device__ __forceinline__ void performRayTracedSpeedAdditionTexture(float &voxe
        pathPoint[slowDim1] = voxel2f[slowDim1];
        pathPoint[slowDim2] = voxel2f[slowDim2];
    }
-    else  // voxel2f < voxel1f   Endpukt < Startpkt -> Steigung negativ
+    else  // voxel2f < voxel1f   End point < Start point -> Slope negative
    {
        fastDirectionSteps = floor(voxel2f[greatestDistanceDim] + 0.5f) - floor(voxel1f[greatestDistanceDim] + 0.5f) + 1;
        pathPoint[greatestDistanceDim] = voxel1f[greatestDistanceDim];
--- a/SAFT_TOFI/src/processAScans.cpp
+++ b/SAFT_TOFI/src/processAScans.cpp
--- a/SAFT_TOFI/src/saft.cpp
+++ b/SAFT_TOFI/src/saft.cpp
@@ -1,126 +1,91 @@
+#include <string.h>
+
+#include <cmath>
+#include <cstdlib>
+#include <ctime>
 #include <iostream>
 #include <vector>

-#include <cstdlib>
-#include <ctime>
-#include <cmath>
-#include <string.h>
-
-//#include <ail/file.hpp>
-//#include <ail/string.hpp>
-//#include <ail/time.hpp>
+// #include <ail/file.hpp>
+// #include <ail/string.hpp>
+// #include <ail/time.hpp>

 #include "saft.hpp"

-
-
-
 /**
   Clumsy constructor of the core reconstruction class.
   - Unbeholfener Konstruktor der Kern Rekonstuktionsklasse
 */
-SAFTHandler::SAFTHandler(
-    int deviceId, 			///< CUDA ID of the device to be used.
-    int deviceIndex,		///< Index given by MATLAB (An welcher Position steht die GPU in der Liste?)
-    float *aScan_ptr,    	///< Zeiger zu den AScandaten
-    double *output_ptr,   	///< Zeiger zu den Volumen-Daten
-    double *Duration_ptr, 	///< Zeiger auf Rueckgabewert fuer Matlab fuer Laufzeit des Kernels
-	unsigned short *receiver_index_ptr, ///<
-	unsigned short *emitter_index_ptr,  ///<
-	float *receiver_list_ptr, 			///<
-	int receiver_list_Size,
-	float *emitter_list_ptr,			///<
-	int emitter_list_Size,
-    float *speed_vec_ptr,				///< Zeiger auf die SoS-Daten in Block-/Gridmode
-    int3 SOSGrid_XYZ,
-	float3 sosOffset, 					///< Startpoint of SoSGrid
-	float SOS_RESOLUTION,				///< Aufloesung des SoSGrid
-	float *att_vec_ptr,					///< Zeiger auf die Att-Daten inm Gridmode
+SAFTHandler::SAFTHandler(int deviceId,                        ///< CUDA ID of the device to be used.
+                         int deviceIndex,                     ///< Index given by MATLAB (An welcher Position steht die GPU in der Liste?)
+                         float *aScan_ptr,                    ///< Zeiger zu den AScandaten
+                         double *output_ptr,                  ///< Zeiger zu den Volumen-Daten
+                         double *Duration_ptr,                ///< Zeiger auf Rueckgabewert fuer Matlab fuer Laufzeit des Kernels
+                         unsigned short *receiver_index_ptr,  ///<
+                         unsigned short *emitter_index_ptr,   ///<
+                         float *receiver_list_ptr,            ///<
+                         int receiver_list_Size,
+                         float *emitter_list_ptr,  ///<
+                         int emitter_list_Size,
+                         float *speed_vec_ptr,  ///< Zeiger auf die SoS-Daten in Block-/Gridmode
+                         int3 SOSGrid_XYZ,
+                         float3 sosOffset,      ///< Startpoint of SoSGrid
+                         float SOS_RESOLUTION,  ///< Aufloesung des SoSGrid
+                         float *att_vec_ptr,    ///< Zeiger auf die Att-Daten inm Gridmode

-    int aScanCount,
-    int aScanLength,
-    int3 IMAGE_SIZE_XYZ,
-    float sampleRate,
-    float3 regionOfInterestOffset,
-    float IMAGE_RESOLUTION,
-    dim3 const & fixedBlockDimensions, ///< If fixed block dimensions are enabled, they will be used over the ones determined by auto-tuning.
-    float debugMode,
-    float debugModeParameter,
-    bool SOSMode_3DVolume,
-    bool ATTMode_3DVolume,
+                         int aScanCount, int aScanLength, int3 IMAGE_SIZE_XYZ, float sampleRate, float3 regionOfInterestOffset, float IMAGE_RESOLUTION,
+                         dim3 const &fixedBlockDimensions,  ///< If fixed block dimensions are enabled, they will be used over the ones determined by auto-tuning.
+                         float debugMode, float debugModeParameter, bool SOSMode_3DVolume, bool ATTMode_3DVolume,

-    int SAFT_MODE,
-	int *SAFT_VARIANT,
-	int SAFT_VARIANT_Size,
+                         int SAFT_MODE, int *SAFT_VARIANT, int SAFT_VARIANT_Size,

-	int *Abort_ptr							///<  If there is not enough memory abort reconstruction. Wenn Fehler --> Abbruch;
-    ):
-    // Initialisation der Klassenvariablen mit den uebergebenen Werten (aehnlich Konstruktor)
-    // Initializer list of class variables
-    deviceId(deviceId),
-    deviceIndex(deviceIndex),
+                         int *Abort_ptr  ///<  If there is not enough memory abort reconstruction. Wenn Fehler --> Abbruch;
+                         )
+    : deviceId(deviceId),
+      deviceIndex(deviceIndex),

-	aScan_ptr(aScan_ptr),			//aScanSamplesPath(aScanSamplesPath),
+      aScan_ptr(aScan_ptr),  // aScanSamplesPath(aScanSamplesPath),

-	output_ptr(output_ptr),			//Path(Path),
-	Duration_ptr(Duration_ptr),
+      output_ptr(output_ptr),  // Path(Path),
+      Duration_ptr(Duration_ptr),

-	receiver_index_ptr(receiver_index_ptr), 	//
-	emitter_index_ptr(emitter_index_ptr),  		//
-	receiver_list_ptr(receiver_list_ptr), 		//
-	receiver_list_Size(receiver_list_Size),
-	emitter_list_ptr(emitter_list_ptr),		//
-	emitter_list_Size(emitter_list_Size),
-	speed_vec_ptr(speed_vec_ptr), 			///< SoS-Daten im Blockmode oder SoSGrid
-	SOSGrid_XYZ(SOSGrid_XYZ),	  			// Groesse des SoSGrids
-	sosOffset(sosOffset), 					///< Startpoint of SoSGrid
-	SOS_RESOLUTION(SOS_RESOLUTION),			///< Aufloesung des SoSGrid
+      receiver_index_ptr(receiver_index_ptr),  //
+      emitter_index_ptr(emitter_index_ptr),    //
+      receiver_list_ptr(receiver_list_ptr),    //
+      receiver_list_Size(receiver_list_Size),
+      emitter_list_ptr(emitter_list_ptr),  //
+      emitter_list_Size(emitter_list_Size),
+      speed_vec_ptr(speed_vec_ptr),    ///< SoS-Daten im Blockmode oder SoSGrid
+      SOSGrid_XYZ(SOSGrid_XYZ),        // Groesse des SoSGrids
+      sosOffset(sosOffset),            ///< Startpoint of SoSGrid
+      SOS_RESOLUTION(SOS_RESOLUTION),  ///< Aufloesung des SoSGrid

-	att_vec_ptr(att_vec_ptr),				///< Att-Daten als ATTGrid
+      att_vec_ptr(att_vec_ptr),  ///< Att-Daten als ATTGrid

-	aScanCount(aScanCount),
-	aScanLength(aScanLength),
-	IMAGE_SIZE_XYZ(IMAGE_SIZE_XYZ),
-	sampleRate(sampleRate),
-	regionOfInterestOffset(regionOfInterestOffset),
-	IMAGE_RESOLUTION(IMAGE_RESOLUTION),
+      aScanCount(aScanCount),
+      aScanLength(aScanLength),
+      IMAGE_SIZE_XYZ(IMAGE_SIZE_XYZ),
+      sampleRate(sampleRate),
+      regionOfInterestOffset(regionOfInterestOffset),
+      IMAGE_RESOLUTION(IMAGE_RESOLUTION),

-    fixedBlockDimensions(fixedBlockDimensions),
-	debugMode(debugMode),
-	debugModeParameter(debugModeParameter),
-	SOSMode_3DVolume(SOSMode_3DVolume),
-	ATTMode_3DVolume(ATTMode_3DVolume),
+      fixedBlockDimensions(fixedBlockDimensions),
+      debugMode(debugMode),
+      debugModeParameter(debugModeParameter),
+      SOSMode_3DVolume(SOSMode_3DVolume),
+      ATTMode_3DVolume(ATTMode_3DVolume),

-	SAFT_MODE(SAFT_MODE),
-	SAFT_VARIANT(SAFT_VARIANT),
-	SAFT_VARIANT_Size(SAFT_VARIANT_Size),
+      SAFT_MODE(SAFT_MODE),
+      SAFT_VARIANT(SAFT_VARIANT),
+      SAFT_VARIANT_Size(SAFT_VARIANT_Size),

-	Abort_ptr(Abort_ptr)
+      Abort_ptr(Abort_ptr)
 {
-	#ifdef debug_OutputFunctions
-		printf( "==> SAFTHandler::SAFTHandler - Start\n");
-	#endif
-
-	#ifdef debug_OutputInfo
-		printf( "SAFTHandler Constructor\n");
-	#endif
-
-    aScanAllocationCount = USED_ASCANSMEMORYREGIONS;	// Anzahl der A-Scan-Speicherbereiche die alloziert werden, es reicht einer statt 2! 2 nur wenn Streams fuer A-ScanCopy genutzt werden sollen.
-	maxSupportedTexturesForAscanIndex = MAX_SUPPORTEDTEXTURES_FORASCANINDEX;		// Definiert die im Code maximal unterstuetzen Texturen fuer AscanIndex;
-
-	IMAGE_RESOLUTION_FACTOR = 1 / IMAGE_RESOLUTION;   // Auflösung im OutputVolumen
-	SOS_RESOLUTION_FACTOR   = 1 / SOS_RESOLUTION;	  // Auflösung im SoS-Volumen
-
-	#ifdef debug_OutputVariables
-		printf( "IMAGE_RESOLUTION_FACTOR = %e\n", IMAGE_RESOLUTION_FACTOR);
-		printf( "SOS_RESOLUTION_FACTOR 	= %e\n", SOS_RESOLUTION_FACTOR);
-		printf( "Samplerate = %e\n", sampleRate);
-	#endif
-
-	#ifdef debug_OutputFunctions
-		printf( "<== SAFTHandler::SAFTHandler - End\n");
-	#endif
+    aScanAllocationCount = USED_ASCANSMEMORYREGIONS;  // Anzahl der A-Scan-Speicherbereiche die alloziert werden, es reicht einer statt 2! 2 nur wenn Streams fuer A-ScanCopy genutzt werden sollen.
+    maxSupportedTexturesForAscanIndex = MAX_SUPPORTEDTEXTURES_FORASCANINDEX;  // Definiert die im Code maximal unterstuetzen Texturen fuer AscanIndex;

+    IMAGE_RESOLUTION_FACTOR = 1 / IMAGE_RESOLUTION;  // Auflösung im OutputVolumen
+    SOS_RESOLUTION_FACTOR = 1 / SOS_RESOLUTION;      // Auflösung im SoS-Volumen
 }

 /**
@@ -129,271 +94,100 @@ SAFTHandler::SAFTHandler(
 */
 void SAFTHandler::performReconstruction()
 {
-	#ifdef debug_OutputFunctions
-		printf( "==> SAFTHandler::performReconstruction - Start\n");
-	#endif
+    aScanSamples = (float *)aScan_ptr;                      // Ascan-Data
+    emitter_index = (unsigned short *)emitter_index_ptr;    // Index for associating emitter to corresponding coordinates
+    receiver_index = (unsigned short *)receiver_index_ptr;  // Index for associating receiver to corresponding coordinates
+    emitter_list = (float3 *)emitter_list_ptr;              // Lookuptable for emitter coordinates
+    receiver_list = (float3 *)receiver_list_ptr;            // Lookuptable for receiver coordinates
+    output = (double *)output_ptr;                          // Output-Data

+    speedOfSoundField = (float *)speed_vec_ptr;  // For SOS Correction
+    // SoSData 		  	= (float*) speed_vec_ptr;	// Fuer Blockmode
+    attenuationField = (float *)att_vec_ptr;  // For Attenuation Correction

-	//Pointeruebergabe der AScan-Daten, Geometrie-Daten und Output-Daten von Matlab
-	#ifdef debug_OutputInfo
-		printf( "Give Pointer Names for AScan, Geometry, Output and SoS-Data from Matlab\n");
-	#endif
-	aScanSamples   		= (float*) aScan_ptr;								// Ascan-Data
-	emitter_index  		= (unsigned short*) emitter_index_ptr;			// Index for associating emitter to corresponding coordinates
-	receiver_index 		= (unsigned short*) receiver_index_ptr;			// Index for associating receiver to corresponding coordinates
-	emitter_list   		= (float3*) emitter_list_ptr;					// Lookuptable for emitter coordinates
-	receiver_list  		= (float3*) receiver_list_ptr;					// Lookuptable for receiver coordinates
-	output 		   		= (double*) output_ptr;							// Output-Data
+    // Read out GPU-Device Properties
+    // ----------------------------------------------------------
+    // List of all deviceProperties: http://developer.download.nvidia.com/compute/cuda/4_1/rel/toolkit/docs/online/group__CUDART__DEVICE_g5aa4f47938af8276f08074d09b7d520c.html

-	speedOfSoundField 	= (float*) speed_vec_ptr;	// For SOS Correction
-	//SoSData 		  	= (float*) speed_vec_ptr;	// Fuer Blockmode
-	attenuationField  	= (float*) att_vec_ptr;    	// For Attenuation Correction
+    // Determine the number of GPU-Devices in System
+    int deviceCount;
+    CUDA_CHECK(cudaGetDeviceCount(&deviceCount));
+    deviceProperties.reserve(static_cast<std::size_t>(deviceCount));  // Request Vector for all GPU Devices with the size deviceCount

+    // Determine the number of GPU-Devices in System
+    cudaDeviceProp &device = deviceProperties[deviceId];
+    CUDA_CHECK(cudaGetDeviceProperties(&device, deviceId));  // Read out Properties of current used GPU-Device in this thread

-	#ifdef debug_OutputInfo													// Name des Device mit ID ausgeben
-		printf( "Device ID: %i\n", deviceId);
-	#endif
+    CUDA_CHECK(cudaSetDevice(deviceId));

-	#ifdef debug_OutputFunctions
-	printf( "==> loadDevices - Start\n");
-	#endif
+    deviceProperties.push_back(device);  // Add element at the end of the vector outputProb
+    // Determine minimum supported Surface size. Dependent on device.maxTexture3D[2] and device.maxSurface3D[2]
+    maxSurfaceTexture3DDimension = (device.maxTexture3D[2] < device.maxSurface3D[2]) ? device.maxTexture3D[2] : device.maxSurface3D[2];
+    // printf("DEVICE  =>  maxSurfaceTexture3DDimension = %d  (device.maxTexture3D[2] = %d  -  device.maxSurface3D[2] = %d)\n",  maxSurfaceTexture3DDimension, device.maxTexture3D[2],
+    // device.maxSurface3D[2]);	// Set maximum Size of Texture

-	// Read out GPU-Device Properties
-	// ----------------------------------------------------------
-	// List of all deviceProperties: http://developer.download.nvidia.com/compute/cuda/4_1/rel/toolkit/docs/online/group__CUDART__DEVICE_g5aa4f47938af8276f08074d09b7d520c.html
-
-	// Determine the number of GPU-Devices in System
-	int deviceCount;
-	CUDA_CHECK(cudaGetDeviceCount(&deviceCount));
-	deviceProperties.reserve(static_cast<std::size_t>(deviceCount));	// Request Vector for all GPU Devices with the size deviceCount
-
-	// Determine the number of GPU-Devices in System
-	cudaDeviceProp & device = deviceProperties[deviceId];
-	CUDA_CHECK(cudaGetDeviceProperties(&device, deviceId));		// Read out Properties of current used GPU-Device in this thread
-	//printf("%i. %s\n", deviceId, device.name);
-	//#ifdef debug_OutputInfo
-		//printf( "Device used: %18s  (HW-ID %i) (Idx %i)\n", device.name , deviceId, deviceIndex);	// Name des Device mit ID ausgeben
-	//#endif
-	CUDA_CHECK(cudaSetDevice(deviceId));
-
-	#ifdef debug_OutputInfo
-		printf("Reset Device\n");	// Reset Device
-	#endif
-	
-    // CUDA_CHECK(cudaDeviceReset()); 2019: commented to remove re-initialization when called, avoids blocked threads later on
-
-		//printf("%i. %s\n", deviceId, deviceProperties[deviceId].name);
-		//printf("DEVICE  =>  Maximum 3D texture dimensions:  [%d %d %d]\n",  device.maxTexture3D[0], device.maxTexture3D[1], device.maxTexture3D[2]);
-		//printf("DEVICE  =>  Maximum width, height, and depth for a 3D surface reference bound to a CUDA array:  [%d %d %d]\n",  device.maxSurface3D[0], device.maxSurface3D[1], device.maxSurface3D[2]);
-
-	#ifdef debug_OutputInfo
-			printf("%i. %s\n", deviceId, device.name);
-			printf("   Byte Total Global Mem:  %lld \n",  device.totalGlobalMem);
-			printf("   Compute Capability:     %i.%i\n",  device.major,device.minor);
-
-			printf("   Name:                            %s\n",  device.name);
-			printf("   Major revision number:           %d\n",  device.major);
-			printf("   Minor revision number:           %d\n",  device.minor);
-			printf("   Total global memory:             %lld\n",  device.totalGlobalMem);
-			printf("   Total shared memory per block:   %u\n",  device.sharedMemPerBlock);
-			printf("   Total registers per block:       %d\n",  device.regsPerBlock);
-			printf("   Warp size:                       %d\n",  device.warpSize);
-			printf("   Maximum memory pitch:            %lld\n",  device.memPitch);
-			printf("   Maximum threads per block:       %d\n",  device.maxThreadsPerBlock);
-			printf("   Maximum 3D texture dimensions:  [%d %d %d]\n",  device.maxTexture3D[0], device.maxTexture3D[1], device.maxTexture3D[2]);
-			for (int i = 0; i < 3; ++i)
-				printf("        Maximum dimension %d of block:  %lld\n", i, device.maxThreadsDim[i]);
-			for (int i = 0; i < 3; ++i)
-				printf("        Maximum dimension %d of grid:   %lld\n", i, device.maxGridSize[i]);
-			printf("   Clock rate:                      %d\n",  device.clockRate);
-			printf("   Total constant memory:           %u\n",  device.totalConstMem);
-			printf("   Texture alignment:               %u\n",  device.textureAlignment);
-			printf("   Concurrent copy and execution:   %s\n",  (device.deviceOverlap ? "Yes" : "No"));
-			printf("   Number of multiprocessors:       %d\n",  device.multiProcessorCount);
-			printf("   Kernel execution timeout:        %s\n\n",  (device.kernelExecTimeoutEnabled ? "Yes" : "No"));
-			printf("   Maximum 3D texture dimensions:  [%d %d %d]\n",  device.maxTexture3D[0], device.maxTexture3D[1], device.maxTexture3D[2]);
-			printf("   Maximum width, height, and depth for a 3D surface reference bound to a CUDA array:  [%d %d %d]\n",  device.maxSurface3D[0], device.maxSurface3D[1], device.maxSurface3D[2]);
-	#endif
-
-	deviceProperties.push_back(device);		// Add element at the end of the vector outputProb
-
-//	printf("   Maximum memory pitch:            %lld\n",  device.memPitch);
-//	printf("   Texture alignment:               %u\n",  device.textureAlignment);
-//	printf("   Texture Pitch alignment:         %u\n",  device.texturePitchAlignment);
-
-	// Determine minimum supported Surface size. Dependent on device.maxTexture3D[2] and device.maxSurface3D[2]
-	maxSurfaceTexture3DDimension = (device.maxTexture3D[2]<device.maxSurface3D[2]) ? device.maxTexture3D[2]:device.maxSurface3D[2];
-		//printf("DEVICE  =>  maxSurfaceTexture3DDimension = %d  (device.maxTexture3D[2] = %d  -  device.maxSurface3D[2] = %d)\n",  maxSurfaceTexture3DDimension, device.maxTexture3D[2], device.maxSurface3D[2]);	// Set maximum Size of Texture
-
-	//Set the maximal used number of SOS-ZLayers, dependend on SAFT_VARIANT-Parameter 3DVolumeInterpolationAtReconstruction (=3)
+    // Set the maximal used number of SOS-ZLayers, dependend on SAFT_VARIANT-Parameter 3DVolumeInterpolationAtReconstruction (=3)
+    
+	//按照目前配置必定 maxFeasibleSosZLayerCount = 2;！！！
 	switch (SAFT_VARIANT[SAFT_VARIANT_3DVolumeInterpolationAtReconstruction])
-		 {
-			case  0:	// Mit Textur -> 1ne SOS-ZLayer
-				maxFeasibleSosZLayerCount = 1;
-				break;
-			case  1:	// Mit Textur & Interpolation -> 2 SOS-ZLayer
-				maxFeasibleSosZLayerCount = 2;
-				break;
-		 }
-		#ifdef debug_OutputVariables
-			printf( "Set maxFeasibleSosZLayerCount = %u\n", maxFeasibleSosZLayerCount);
-		#endif
+    {
+    case 0:  // Mit Textur -> 1ne SOS-ZLayer
+        maxFeasibleSosZLayerCount = 1;
+        break;
+    case 1:  // Mit Textur & Interpolation -> 2 SOS-ZLayer
+        maxFeasibleSosZLayerCount = 2;
+        break;
+    }

-			//printf( "AScan Blockgroesse (aScanCount)= %i\n", aScanCount);
-			//printf( "maxSurfaceTexture3DDimension= %i\n", maxSurfaceTexture3DDimension);
+    maxAscanIndexArraysInTexture = maxSurfaceTexture3DDimension / maxFeasibleSosZLayerCount;  // Max Anzahl der Ascans in einer Teiltabelle (1024)

-	// Fuer Ascan-Index-Varainte von SAFT werden mehrere Texturen benoetigt, da die Anzahl der Z_layer limitiert ist.
-	// Um 3D-Interpolation zu ermoeglichen muessen jeweils 2 Z-Layer pro A-Scan vorhanden sein.
-	// --> 2*nAscans < maxSurfaceTexture3DDimension(Fermi & Kepler: 2048) ==> maximal 1024 Em/Rec - Kombinationen koennen in einem Surface/Textur gespeichert werden
-	// maxSurfaceTexture3DDimension					=  maximale Groesse die erlaubt ist (2048)
-	// TableAscanIndexAllocationCount  				=  Anzahl der TeilSurfaces ==> auch Anzahl der benoetigten Durchlaeufe (aktuell 4 Texturen)
-	// maxFeasibleSosZLayerCount				   	=  Anzahl der SoS-Zlayer die gleichzeitig im Speicher pro EM/REC-Kombi vorgehalten werden (1 oder 2 bei Interpolierten Variante)
-	// maxAscanIndexArraysInTexture			   		=  Anzahl der Ascans in einer Teiltabelle (1024)
-	// maxSupportedTexturesForAscanIndex			=  MAX_SUPPORTEDTEXTURES_FORASCANINDEX (=4) // Definiert die aktuell maximal unterstuetzen Texturen im Code fuer AscanIndex
-	// neededAscanBatchCount						=  Anzahl an benoetigten Durchlaeufe des SAFTs um alle Ascans abarbeiten zu koennen
+    // memoryCheck();		// Freier Speicher am Anfang ausgeben

-	maxAscanIndexArraysInTexture = maxSurfaceTexture3DDimension/maxFeasibleSosZLayerCount;	// Max Anzahl der Ascans in einer Teiltabelle (1024)
+    // if ((strcmp(device.name, "GeForce GTX 690") == 0)||(strcmp(device.name, "GeForce GTX 590") == 0)){
+    if (memoryGPUfree() <= 2500000000)
+    {  // IF GPUMemory < 2.5GB only 1 Surface can be used
+        maxSupportedTexturesForAscanIndex = 1;
+    }

+    neededAscanBatchCount = ceil((float)aScanCount / (maxSurfaceTexture3DDimension / maxFeasibleSosZLayerCount) / maxSupportedTexturesForAscanIndex);
+    // Determine amount of PartSurfaces
+    if (neededAscanBatchCount > 1)
+    {
+        TableAscanIndexAllocationCount = maxSupportedTexturesForAscanIndex;  // Wenn mehr als ein Durlauf nötig --> so viele wie möglich nutzen
+    }
+    else
+    {
+        TableAscanIndexAllocationCount = (int)ceil((float)aScanCount / (maxAscanIndexArraysInTexture));  // Wenn nur ein Durlauf nötig --> so wenige wie nötig nutzen
+    }

-	#ifdef debug_OutputAScanIndexMemoryDivision
-		printf("%s :\n", device.name);
-		printf("    Total memory %lld Bytes\n", memoryGPUtotal() );
-		printf("    Free  memory %lld Bytes\n", memoryGPUfree() );
-		printf(" => Used memory %lld Bytes\n", (memoryGPUtotal()-memoryGPUfree()));
-	#endif
-	//memoryCheck();		// Freier Speicher am Anfang ausgeben
+    // Set Block and Grid-Dimensions for GPU Threads
+    genericSAFTBlockDimensions = fixedBlockDimensions;  // fixedBlockDimensions = Parameter BlockDim_XYZ
+    genericSAFTGridDimensions =
+        dim3((IMAGE_SIZE_XYZ.x + genericSAFTBlockDimensions.x - 1) / genericSAFTBlockDimensions.x,  // hier wird aufgerundet! Wenn ungerade Aufloesung nicht genau
+             (IMAGE_SIZE_XYZ.y + genericSAFTBlockDimensions.y - 1) / genericSAFTBlockDimensions.y,  // in Blockgroesse geteilt werden kann, muss ein weiterer
+             (IMAGE_SIZE_XYZ.z + genericSAFTBlockDimensions.z - 1) / genericSAFTBlockDimensions.z   // Block berechnet werden. Wenn insgesamt zu viele werden sie im Kernel aussortiert.
+        );

-	// if ((strcmp(device.name, "GeForce GTX 690") == 0)||(strcmp(device.name, "GeForce GTX 590") == 0)){
-	if (memoryGPUfree() <= 2500000000){	// IF GPUMemory < 2.5GB only 1 Surface can be used
-		maxSupportedTexturesForAscanIndex = 1;
-		#ifdef debug_OutputAScanIndexMemoryDivision
-			printf("Free GPU Memory: %lld < 2.5GB\n --> reduce maxSupportedTexturesForAscanIndex 4 -> %i \n", memoryGPUfree(), maxSupportedTexturesForAscanIndex );
-			//printf("GeForce GTX 690/590 \n --> reduce maxSupportedTexturesForAscanIndex 4 -> %i \n", maxSupportedTexturesForAscanIndex );
-		#endif
-	}
-
-	#ifdef debug_OutputAScanIndexMemoryDivision
-		printf( "--> maxSupportedTexturesForAscanIndex %i \n", maxSupportedTexturesForAscanIndex);
-		//printf( "--> TableAscanIndexAllocationCount %i    \n", TableAscanIndexAllocationCount);
-	#endif
-
-	neededAscanBatchCount = ceil((float)aScanCount/(maxSurfaceTexture3DDimension/maxFeasibleSosZLayerCount)/maxSupportedTexturesForAscanIndex);
-	#ifdef debug_OutputAScanIndexMemoryDivision
-		printf("aScanCount %i -> neededAscanBatchCount = %i!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n", aScanCount, neededAscanBatchCount);
-
-		//printf("totalGlobalMem = %lld Byte\n", device.totalGlobalMem);
-		//printf("multiProcessorCount = %i MultiProcessors\n", device.multiProcessorCount);
-	#endif
-
-	// Determine amount of PartSurfaces
-	if (neededAscanBatchCount > 1){
-		TableAscanIndexAllocationCount = maxSupportedTexturesForAscanIndex;								// Wenn mehr als ein Durlauf nötig --> so viele wie möglich nutzen
-	} else {
-		TableAscanIndexAllocationCount = (int)ceil((float)aScanCount/(maxAscanIndexArraysInTexture));   // Wenn nur ein Durlauf nötig --> so wenige wie nötig nutzen
-	}
-
-
-
-	// Set Block and Grid-Dimensions for GPU Threads
-	genericSAFTBlockDimensions = fixedBlockDimensions;	// fixedBlockDimensions = Parameter BlockDim_XYZ
-	genericSAFTGridDimensions = dim3(
-		(IMAGE_SIZE_XYZ.x + genericSAFTBlockDimensions.x-1)/ genericSAFTBlockDimensions.x,	// hier wird aufgerundet! Wenn ungerade Aufloesung nicht genau
-		(IMAGE_SIZE_XYZ.y + genericSAFTBlockDimensions.y-1)/ genericSAFTBlockDimensions.y,	// in Blockgroesse geteilt werden kann, muss ein weiterer
-		(IMAGE_SIZE_XYZ.z + genericSAFTBlockDimensions.z-1)/ genericSAFTBlockDimensions.z	// Block berechnet werden. Wenn insgesamt zu viele werden sie im Kernel aussortiert.
-		);
-	#ifdef debug_OutputVariables
-		printf( "genericSAFTBlockDimensions X,Y,Z = (%i %i %i)\n",genericSAFTBlockDimensions.x, genericSAFTBlockDimensions.y, genericSAFTBlockDimensions.z);
-		printf( "genericSAFTGridDimensions  X,Y,Z = (%i %i %i)\n",genericSAFTGridDimensions.x, genericSAFTGridDimensions.y, genericSAFTGridDimensions.z);
-	#endif
-
-
-
-	// Outputsize of SOS Volume
+    // Outputsize of SOS Volume
    SOSVolume_VoxelCount = SOSGrid_XYZ.x * SOSGrid_XYZ.y * SOSGrid_XYZ.z;
    SOSVolume_Bytes = SOSVolume_VoxelCount * sizeof(float);
-	#ifdef debug_OutputVariables
-		printf("  SOSVolume_VoxelCount [%ix%ix%i] = %i\n", SOSGrid_XYZ.x, SOSGrid_XYZ.y, SOSGrid_XYZ.z, SOSVolume_VoxelCount);
-		printf("  SOSVolume_Bytes = SOSVolumeVoxelCount(%i) x sizeof(float = 4) = %i\n", SOSVolume_VoxelCount, SOSVolume_Bytes);
-	#endif
-
-

    // Warn if Outputsize of Volume is too big for 32Bit Sytems
-	outputVolume_VoxelCount = (uint64_t)IMAGE_SIZE_XYZ.x * (uint64_t)IMAGE_SIZE_XYZ.y * (uint64_t)IMAGE_SIZE_XYZ.z; // Anzahl der Voxel im Volumen
-	outputVolume_Bytes = outputVolume_VoxelCount * sizeof(double);	// Speicherbedarf fuer alle Voxel im Volumen
+    outputVolume_VoxelCount = (uint64_t)IMAGE_SIZE_XYZ.x * (uint64_t)IMAGE_SIZE_XYZ.y * (uint64_t)IMAGE_SIZE_XYZ.z;  // Anzahl der Voxel im Volumen
+    outputVolume_Bytes = outputVolume_VoxelCount * sizeof(double);                                                   // Speicherbedarf fuer alle Voxel im Volumen

-	#ifdef debug_OutputVariables
-		printf("  outputVolume_VoxelCount [%ix%ix%i]= %lld\n",IMAGE_SIZE_XYZ.x, IMAGE_SIZE_XYZ.y, IMAGE_SIZE_XYZ.z, outputVolume_VoxelCount);
-		printf("  outputVolume_Bytes [%lld x sizeof(double = 8)] = %lld\n", outputVolume_VoxelCount, outputVolume_Bytes);
-	#endif
+    aScan_Bytes = aScanLength * sizeof(float);
+    aScanBatch_Bytes = aScanCount * aScan_Bytes;

-	//Hier auf maximale Outputgroesse von 32-BitSystem ueberpruefen --> falls Probleme mit 32-Bitsystemen hier noch Abfrage und Abbruch implementieren
-	//if (outputVolume_VoxelCount > 536870912) // 536870912 = 2^32 / sizeof(double)
-	//	std::cout << "outputVolume_Bytes > 2^32 the upper limit of unsigned integer!!!\n => Reconstruction only in 64-Bit Systems";
-
-	//Groesse der Datenbloecke fuer die Blockverarbeitung wird mit aScanCount angegeben
-	#ifdef debug_OutputVariables
-		printf( "AScan Blockgroesse (aScanCount)= %i\n", aScanCount);
-	#endif
-	aScan_Bytes = aScanLength * sizeof(float);
-	aScanBatch_Bytes = aScanCount * aScan_Bytes;
-	#ifdef debug_OutputVariables
-		printf( "aScan_Bytes = aScanLength(%i) * sizeof(float=4) = %i\n", aScanLength, aScan_Bytes);
-		printf( "aScanCount = %i\n", aScanCount);
-		printf( "aScanBatch_Bytes = aScanCount * aScan_Bytes ( = %i * sizeof(float)) = %i\n", aScanLength, aScanBatch_Bytes);
-	#endif
-
-	#ifdef debug_OutputInfo
-    	printf("\nParameter for Image Reconstruction\n");
-    	printf(  "========================================================================\n");
-    	printf(  "IMAGE_SIZE_XYZ:               [%i x %i x %i]\n", IMAGE_SIZE_XYZ.x, IMAGE_SIZE_XYZ.y, IMAGE_SIZE_XYZ.z);
-    	printf(  "outputVolume_VoxelCount:       %lld\n", outputVolume_VoxelCount);
-    	printf(  "Increment vector/Resolution:   %f\n", IMAGE_RESOLUTION);
-    	printf(  "IMAGE_STARTPOINT in m:        [%f x %f x %f]\n", regionOfInterestOffset.x, regionOfInterestOffset.y, regionOfInterestOffset.z);
-       	outputVolume_size_m.x = IMAGE_SIZE_XYZ.x * IMAGE_RESOLUTION;
-        outputVolume_size_m.y = IMAGE_SIZE_XYZ.y * IMAGE_RESOLUTION;
-        outputVolume_size_m.z = IMAGE_SIZE_XYZ.z * IMAGE_RESOLUTION;
-    	printf(  "Volume Size in m:             [%f x %f x %f]\n", outputVolume_size_m.x, outputVolume_size_m.y, outputVolume_size_m.z);
-		printf(  "aScanCount:                    %i\n", aScanCount);
-		printf(  "========================================================================\n\n");
-	#endif
-
-
-	#ifdef debug_OutputPerformance
-		struct timeval startProcessAscans, stopProcessAscans;
-		gettimeofday(&startProcessAscans, NULL);
-	#endif
-
-		//perform processing with AScan-Data
-		//===========================================================================================================
-		//===========================================================================================================
-		ullong duration;
-		processAScans(duration);
-		//===========================================================================================================
-		//===========================================================================================================
-
-	#ifdef debug_OutputPerformance
-		diff_time = (double)((stopProcessAscans.tv_sec * 1000000.0 + stopProcessAscans.tv_usec) - (startProcessAscans.tv_sec * 1000000.0 + startProcessAscans.tv_usec));
-		printf ("########################################################################\n");
-		printf ("### GPU (%18s: HW-ID %i, Idx %i) ### Free Memory =                    	%4.0f µs\n", deviceProperties[deviceId].name, deviceId, deviceIndex, diff_time);
-		printf ("########################################################################\n");
-	#endif
-
-	Duration_ptr[(deviceIndex+1)] = (double)duration;	// Für jede GPU einen Laufzeitwert in µs übermitteln, Angabe von Reihenfolge der angegebenen GPU-IDs abhaengig
-
-	#ifdef debug_OutputVariables
-		printf( "  GPU (%s:ID %i,Index %i): => Duration_ptr[%i] = duration(%i µs) = %.2f s\n", device.name, deviceId, deviceIndex, (deviceIndex+1), duration, Duration_ptr[(deviceIndex+1)]/1000/1000);
-	#endif
-
-	#ifdef debug_OutputInfo
-		printf("Reset Device\n"); // Reset Device
-	#endif
-	// CUDA_CHECK(cudaDeviceReset()); // news 2019 commented, see above reason.
-
-	#ifdef debug_OutputFunctions
-		printf( "<== SAFTHandler::performReconstruction - End\n");
-	#endif
+    // perform processing with AScan-Data
+    //===========================================================================================================
+    //===========================================================================================================
+    ullong duration;
+    processAScans(duration);
+    //===========================================================================================================
+    //===========================================================================================================

+    Duration_ptr[(deviceIndex + 1)] = (double)duration;  // Für jede GPU einen Laufzeitwert in µs übermitteln, Angabe von Reihenfolge der angegebenen GPU-IDs abhaengig
 }

 /**
@@ -402,96 +196,42 @@ void SAFTHandler::performReconstruction()
   - Der SAFT Kernel erwartet Argumente in den die Grid Dimension auf drei Dimensionen reduziert wurde und die Block-Dimensionen auf nur eine Dimension reduziert ist.
   - Das haengt auch von den Eigenschaften der verfuegbaren HW ab (shader model)
 */
-void SAFTHandler::reduceKernelDimensions(
-    dim3 const & gridDimensions, ///< Input grid dimensions.
-    dim3 const & blockDimensions, ///< Input block dimensions.
-    dim3 & reducedGridDimensions, ///< Reduced output grid dimensions.
-    dim3 & reducedBlockDimensions ///< Reduced output block dimensions.
-    )
+void SAFTHandler::reduceKernelDimensions(dim3 const &gridDimensions,   ///< Input grid dimensions.
+                                         dim3 const &blockDimensions,  ///< Input block dimensions.
+                                         dim3 &reducedGridDimensions,  ///< Reduced output grid dimensions.
+                                         dim3 &reducedBlockDimensions  ///< Reduced output block dimensions.
+)
 {
-
-	#ifdef debug_OutputFunctions
-		printf( "==> SAFTHandler::reduceKernelDimensions - Start\n");
-	#endif
-
-
-	if(deviceProperties[deviceId].maxGridSize[2] > 1)
+    if (deviceProperties[deviceId].maxGridSize[2] > 1)
    {
        reducedGridDimensions = gridDimensions;
-		#ifdef debug_OutputParameter
-        	printf( "reducedGridDimensions X,Y,Z = (%i %i %i)\n",reducedGridDimensions.x, reducedGridDimensions.y, reducedGridDimensions.z);
-		#endif
    }
    else
    {
-        reducedGridDimensions = dim3(
-            gridDimensions.x * gridDimensions.y,
-            gridDimensions.z,
-            1
-            );
-		#ifdef debug_OutputParameter
-        	printf( "reducedGridDimensions X,Y,Z = (%i %i %i)\n",reducedGridDimensions.x, reducedGridDimensions.y, reducedGridDimensions.z);
-		#endif
+        reducedGridDimensions = dim3(gridDimensions.x * gridDimensions.y, gridDimensions.z, 1);
    }

    reducedBlockDimensions = dim3(blockDimensions.x * blockDimensions.y * blockDimensions.z);
-	#ifdef debug_OutputParameter
-    	printf( "reducedBlockDimensions X,Y,Z = (%i %i %i)\n", reducedBlockDimensions.x, reducedBlockDimensions.y, reducedBlockDimensions.z);
-	#endif
-
-	#ifdef debug_OutputFunctions
-		printf( "<== SAFTHandler::reduceKernelDimensions - End\n");
-	#endif
-
 }

-
 /**
   Determine free memory available on the current device.
 */
 std::size_t memoryGPUfree()
 {
-	#ifdef debug_OutputFunctions
-		printf( "==> memoryGPUfree - Start\n");
-	#endif
-
-	std::size_t
-        totalMemory,
-        freeMemory;
+    std::size_t totalMemory, freeMemory;
    CUDA_CHECK(cudaMemGetInfo(&freeMemory, &totalMemory));
-
-//		printf("   Total memory %lld Bytes\n", totalMemory);
-//		printf("   Free  memory %lld Bytes\n", freeMemory);
-//		printf(" => Used memory %lld Bytes\n", (totalMemory-freeMemory));
-
-    #ifdef debug_OutputFunctions
-    	printf( "<== memoryGPUfree - End\n");
-	#endif
    return freeMemory;
 }

-
 /**
   Determine free memory available on the current device.
 */
 std::size_t memoryGPUtotal()
 {
-	#ifdef debug_OutputFunctions
-		printf( "==> current - Start\n");
-	#endif
-
-	std::size_t
-        totalMemory,
-        freeMemory;
+    std::size_t totalMemory, freeMemory;
    CUDA_CHECK(cudaMemGetInfo(&freeMemory, &totalMemory));

-//		printf("   Total memory %lld Bytes\n", totalMemory);
-//		printf("   Free  memory %lld Bytes\n", freeMemory);
-//		printf(" => Used memory %lld Bytes\n", (totalMemory-freeMemory));
-
-    #ifdef debug_OutputFunctions
-    	printf( "<== current - End\n");
-	#endif
    return totalMemory;
 }

@@ -501,111 +241,125 @@ std::size_t memoryGPUtotal()
 */
 void memoryCheck()
 {
-	#ifdef debug_OutputFunctions
-		printf( "==> memoryCheck - Start\n");
-	#endif
+#ifdef debug_OutputFunctions
+    printf("==> memoryCheck - Start\n");
+#endif

-	std::size_t
-        totalMemory,
-        freeMemory;
-	float check;
+    std::size_t totalMemory, freeMemory;
+    float check;
    CUDA_CHECK(cudaMemGetInfo(&freeMemory, &totalMemory));
+    // totalMemory
+    check = 1024.0f * 1024.0f * 1024.0f * 1024.0f;
+    if (totalMemory >= check)
+    {
+        printf("   Total memory %.3f TB\n", totalMemory / check);
+    }
+    else
+    {
+        check /= 1024.0f;
+        if (totalMemory >= check)
+        {
+            printf("   Total memory %.3f GB\n", totalMemory / check);
+        }
+        else
+        {
+            check /= 1024.0f;
+            if (totalMemory >= check)
+            {
+                printf("   Total memory %.3f MB\n", totalMemory / check);
+            }
+            else
+            {
+                check /= 1024.0f;
+                if (totalMemory >= check)
+                {
+                    printf("   Total memory %.3f kB\n", totalMemory / check);
+                }
+                else
+                {
+                    check /= 1024.0f;
+                    if (totalMemory >= check)
+                        printf("   Total memory %.3f Bytes\n", totalMemory / check);
+                }
+            }
+        }
+    }

-    //#if defined(debug_OutputInfo) || defined(debug_OutputMaxMemory)
-//		printf("   Total memory %lld Bytes\n", totalMemory);
-//		printf("   Free  memory %lld Bytes\n", freeMemory);
-//		printf(" => Used memory %lld Bytes\n", (totalMemory-freeMemory));
-
-
-    	// totalMemory
-		check = 1024.0f*1024.0f*1024.0f*1024.0f;
-	    if (totalMemory >= check){
-	    	printf("   Total memory %.3f TB\n", totalMemory/check);
-	    } else {
-			check /= 1024.0f;
-			if (totalMemory >= check){
-				printf("   Total memory %.3f GB\n", totalMemory/check);
-			} else {
-				check /= 1024.0f;
-				if (totalMemory >= check){
-					printf("   Total memory %.3f MB\n", totalMemory/check);
-				} else {
-					check /= 1024.0f;
-					if (totalMemory >= check){
-						printf("   Total memory %.3f kB\n", totalMemory/check);
-					} else {
-						check /= 1024.0f;
-						if (totalMemory >= check)
-							printf("   Total memory %.3f Bytes\n", totalMemory/check);
-					}
-				}
-			}
-	    }
-
-		// freeMemory
-		check = 1024.0f*1024.0f*1024.0f*1024.0f;
-	    if (freeMemory >= check){
-	    	printf("   Free memory %.3f TB\n", freeMemory/check);
-	    } else {
-			check /= 1024.0f;
-			if (freeMemory >= check){
-				printf("   Free memory %.3f GB\n", freeMemory/check);
-			} else {
-				check /= 1024.0f;
-				if (freeMemory >= check){
-					printf("   Free memory %.3f MB\n", freeMemory/check);
-				} else {
-					check /= 1024.0f;
-					if (freeMemory >= check){
-						printf("   Free memory %.3f kB\n", freeMemory/check);
-					} else {
-						check /= 1024.0f;
-						if (freeMemory >= check)
-							printf("   Free memory %.3f Bytes\n", freeMemory/check);
-					}
-				}
-			}
-	    }
-
-		// Used Memory
-		check = 1024.0f*1024.0f*1024.0f*1024.0f;
-	    if ((totalMemory-freeMemory) >= check){
-	    	printf("   Used memory %.3f TB\n", (totalMemory-freeMemory)/check);
-	    } else {
-			check /= 1024.0f;
-			if ((totalMemory-freeMemory) >= check){
-				printf("   Used memory %.3f GB\n", (totalMemory-freeMemory)/check);
-			} else {
-				check /= 1024.0f;
-				if ((totalMemory-freeMemory) >= check){
-					printf("   Used memory %.3f MB\n", (totalMemory-freeMemory)/check);
-				} else {
-					check /= 1024.0f;
-					if ((totalMemory-freeMemory) >= check){
-						printf("   Used memory %.3f kB\n", (totalMemory-freeMemory)/check);
-					} else {
-						check /= 1024.0f;
-						if ((totalMemory-freeMemory) >= check)
-							printf("   Used memory %.3f Bytes\n", (totalMemory-freeMemory)/check);
-					}
-				}
-			}
-	    }
-
-
-	//#endif
-
-    #ifdef debug_OutputFunctions
-    	printf( "<== memoryCheck - End\n");
-	#endif
+    // freeMemory
+    check = 1024.0f * 1024.0f * 1024.0f * 1024.0f;
+    if (freeMemory >= check)
+    {
+        printf("   Free memory %.3f TB\n", freeMemory / check);
+    }
+    else
+    {
+        check /= 1024.0f;
+        if (freeMemory >= check)
+        {
+            printf("   Free memory %.3f GB\n", freeMemory / check);
+        }
+        else
+        {
+            check /= 1024.0f;
+            if (freeMemory >= check)
+            {
+                printf("   Free memory %.3f MB\n", freeMemory / check);
+            }
+            else
+            {
+                check /= 1024.0f;
+                if (freeMemory >= check)
+                {
+                    printf("   Free memory %.3f kB\n", freeMemory / check);
+                }
+                else
+                {
+                    check /= 1024.0f;
+                    if (freeMemory >= check)
+                        printf("   Free memory %.3f Bytes\n", freeMemory / check);
+                }
+            }
+        }
+    }

+    // Used Memory
+    check = 1024.0f * 1024.0f * 1024.0f * 1024.0f;
+    if ((totalMemory - freeMemory) >= check)
+    {
+        printf("   Used memory %.3f TB\n", (totalMemory - freeMemory) / check);
+    }
+    else
+    {
+        check /= 1024.0f;
+        if ((totalMemory - freeMemory) >= check)
+        {
+            printf("   Used memory %.3f GB\n", (totalMemory - freeMemory) / check);
+        }
+        else
+        {
+            check /= 1024.0f;
+            if ((totalMemory - freeMemory) >= check)
+            {
+                printf("   Used memory %.3f MB\n", (totalMemory - freeMemory) / check);
+            }
+            else
+            {
+                check /= 1024.0f;
+                if ((totalMemory - freeMemory) >= check)
+                {
+                    printf("   Used memory %.3f kB\n", (totalMemory - freeMemory) / check);
+                }
+                else
+                {
+                    check /= 1024.0f;
+                    if ((totalMemory - freeMemory) >= check)
+                        printf("   Used memory %.3f Bytes\n", (totalMemory - freeMemory) / check);
+                }
+            }
+        }
+    }
 }

-
-
-
-
-
 /**
   Generic CUDA call wrapper.
   Check the result of a CUDA operation and throw an exception if an error occurred.
@@ -614,21 +368,19 @@ void memoryCheck()
   - <20>berpr<70>ft die Ergebnisse einer CUDA Operation und wirft eine Exception wenn ein Fehler auftritt
   - Das wird wird mit einer Kombination mit einem Makro in saft.hpp genutzt.
 */
-//inline	// Da performCUDAResultCheck in allen Files genutzt werden soll funktioniert inline und etern nicht zusammen
-void performCUDAResultCheck(
-    cudaError_t result, ///< Result of the CUDA operation.
-    std::string const & file, ///< Path to the source code file.
-    int line ///< Line within the source code
-    )
+// inline	// Da performCUDAResultCheck in allen Files genutzt werden soll funktioniert inline und etern nicht zusammen
+void performCUDAResultCheck(cudaError_t result,       ///< Result of the CUDA operation.
+                            std::string const &file,  ///< Path to the source code file.
+                            int line                  ///< Line within the source code
+)
 {
-    if(result != cudaSuccess)
+    if (result != cudaSuccess)
    {
-    	//printf("A CUDA operation failed in file \"%s\" (line %i):  %s \n", file, line, cudaGetErrorString(result).c_str() );
-    	printf("%s\n", cudaGetErrorString( cudaGetLastError() ) );
+        // printf("A CUDA operation failed in file \"%s\" (line %i):  %s \n", file, line, cudaGetErrorString(result).c_str() );
+        printf("%s\n", cudaGetErrorString(cudaGetLastError()));

-    	//std::string errorMessage = "A CUDA operation failed in file \"" + file + "\" (line " + ail::number_to_string(line) + "): " + std::string(cudaGetErrorString(result));
-        //std::cout << errorMessage << std::endl;
+        // std::string errorMessage = "A CUDA operation failed in file \"" + file + "\" (line " + ail::number_to_string(line) + "): " + std::string(cudaGetErrorString(result));
+        // std::cout << errorMessage << std::endl;
        printf("-> Error occurred");
    }
 }
-
--- a/SAFT_TOFI/src/saft.cu
+++ b/SAFT_TOFI/src/saft.cu
@@ -1,15 +0,0 @@
-#include <iostream>
-
-#include "saft.hpp"
-
-/*!
-	This is the central CUDA file which really just includes the other modules.
-	This is done because CUDA does not support external references for referencing data from other compilation units.
-	- Dies ist das zentrale CUDA-File welches nur die anderen Module einbindet
-	- Das wird gemacht, weil CUDA keine externen Referenzen unterst<73>tzt, um Daten von anderen Compilierungs Einheiten zu referenzieren.
-*/
-
-// #include "kernel/rayTracing.cuh"		// GPU-Code für Bresenham
-// #include "kernel/precalculateSpeedOfSoundKernel.cuh"	// GPU-Code Partitionierung für Bresenham. Ruft den Bresenham auf.
-// #include "kernel/saftKernel.cuh"		// GPU-Kernel für SAFT
-
--- a/SAFT_TOFI/src/saft.hpp
+++ b/SAFT_TOFI/src/saft.hpp
@@ -1,154 +1,88 @@
 #pragma once

-#include <string>
-
 #include <cuda.h>
 #include <cuda_runtime.h>
 #include <cuda_runtime_api.h>
-
 #include <stdint.h>
-#include <stdio.h>      // standard input/output
-#include <vector>       // stl vector header
+#include <stdio.h>  // standard input/output

+#include <string>
+#include <vector>  // stl vector header

-typedef unsigned char  uchar;
+typedef unsigned char uchar;
 typedef unsigned short ushort;
-typedef unsigned long  ulong;
-typedef unsigned long  long ullong;
-
-
-//Define Outputs for Debugmode
-//============================
-	//#define debug_OutputFormat_German					// German Format , instead . for numbers
-	//#define debug_OutputFunctions						// Funktionenaufrufe ausgeben
-	//#define debug_OutputVariables						// Werte der Variablen ausgeben
-	//#define debug_OutputParameter						// Uebersicht der Eingabedaten anzeigen sowie Infobloeke in den einzelnen Schritten
-	//#define debug_OutputMemory						// Speicherverwaltung, Malloc, Free, Groessen
-	//#define debug_OutputMaxMemory						// Gibt aktuellen Speicherverbrauch an, wenn memoryCheck aufgerufen wird
-	//#define debug_OutputInfo							// Gibt Infos zu Schritten, Variablen,... aus
-	//#define debug_OutputPerformance					// Gibt die Laufzeiten und die eizelnen Multi-GPU Performanzwerte von ProcessAscans aus
-	//#define debug_OutputStepsPerformance				// Gibt die Laufzeiten und für die einzelnen Schritte in performCoreReconstruction aus (Copy Ascans, Precalc, PerfCoreReconstruction, copy back)
-	//#define debug_OutputStepsPrecalculation			// Gibt Infos ueber die einzelnen Schritte der Precalculation Steps an
-	//#define debug_OutputHostStepsPerformance			// Gibt die Laufzeiten für die eizelnen Schritte auf dem HOST aus (Preintegrated Ascans)
-	//#define debug_OutputSAFTHandlerThreadPerformance 	// Gibt die Gesamt-Laufzeiten der einzelnen Multi-GPU Threads aus
-	//#define debug_OutputMultiGpu						// Einteilung des Volumens auf mehrerer GPUs ausgeben
-	//#define debug_OutputStreams						// Gibt die Schritte der Berechnung der Streams aus
-	//#define debug_OutputSOSPaths						// Gibt die Schritte und Werte der SOSPfadberechnung aus
-	//#define debug_OutputSOSStepsParameter				// Einteilung der ZLayer in SOSZlayer
-	//#define debug_OutputLookUpGeometryMemoryList		// Debugausgabe fuer die LookUpGeometryMemoryList (Constant Memory)
-	//#define debug_OutputAScanIndexMemoryDivision		// Debugausgabe für die Einteilung in Surfaces da mehrere Surfaces benoetigt werden
-
-	//#define OutputVolume						// Ausgabe des  Volumens
-
-// Debugging CUDA Kernels
-//================================================
-	//#define debug_CudaSAFTKernelModes			// Use variable debugMode for different calulations methods and output
-		//#define debug_CudaSAFTKernel_EnableAnalyticAverageSpeedCalculation	// Fuer Fehlerberchnungen
-	//#define debug_CudaSAFTKernel
-	//#define debug_CudaPrecalculateKernel
-	//#define debug_CudaPrecalculateAscanIndexKernel		// Kernel function for PrecalculateAscanIndex
-	//#define debug_CudaPrecalculateAscanIndexKernelProxy	// Proxy function for PrecalculateAscanIndex
-	//#define debug_CudaFillCuArrayKernelProxy
-	//#define debug_CudaSAFTAscanIndexKernel				    // Kernel function for SAFTAscanIndex
-	//#define debug_CudaSAFTAscanIndexKernelDataAccess      // Access and sum up values from Ascans
-	//#define debug_CudaRayTraceKernel
-	//#define debug_CudaRayTraceKernelLive
-
-
-	//#define DebugSetMemoryToZero  // Set SOSPathMemory to Zero as Initialisation
-
-
-// Define specific Hardware-Versions
-	#define GTX_590
-	//#define GTX_690
-	//#define GTX_TITAN
-
-	#if defined(GTX_590)
-		#define GTX_Fermi
-	#endif
-	#if defined(GTX_690) || defined(GTX_TITAN)
-		#define GTX_Kepler
-	#endif
+typedef unsigned long ulong;
+typedef unsigned long long ullong;

 // Memory management of GPU and Errordetection
 //================================================
-	//#define SaftNoTexture					// Instead of TextureMemory use Memory access on GPU Device Memory // out-of-date
-	//#define SaftCorrectSumAllAscan	    // Recalculation if too big values occur
-
-
-// SAFT-Implementierung 2-stufig mit AscanIndexInterpolation
-//============================================================
-	#define SaftUseAscanIndexInterpolation
-		#define noGeometryLoading
-		//#define SaftUseAscanIndexInterpolation_PartWise	// Kernel mit nur einem Durchlauf durchfuehren, sonst ueber Ascans im Kernel laufen
-		#define AscanTextureUse1Float	// Textur mit nur Float1 für SOS berechnen
+// #define SaftNoTexture					// Instead of TextureMemory use Memory access on GPU Device Memory // out-of-date
+// #define SaftCorrectSumAllAscan	    // Recalculation if too big values occur

+// #define SaftUseAscanIndexInterpolation_PartWise	// Kernel mit nur einem Durchlauf durchfuehren, sonst ueber Ascans im Kernel laufen
+#define AscanTextureUse1Float  // Textur mit nur Float1 für SOS berechnen

 // Integration der A-scans im Vornherein durchfuehren um Samplebreite an zu rekonstruierende Aufloesung anzupassen
 //=======================================================================================================================
-	#define preAscanIntegrationToMatchSamplerateToResolution  // Integration der Ascans ueber Fensterbreite durchfuehren
-		//#define debug_preAscanIntegration
-			#define DebugSammleMin  2990		// Grenzen feeru Degbugausgabe der Werte
-			#define DebugSammleMax  3000
-		//#define preAscanIntegrationVersion1Michael	// direkt übernommene Version von Michael
-		#define preAscanIntegrationVersion2Ernst	// korrigierte Variante mit genauerer Fensterbreite
-
+#define preAscanIntegrationToMatchSamplerateToResolution  // Integration der Ascans ueber Fensterbreite durchfuehren
+                                                          // #define debug_preAscanIntegration
+#define DebugSammleMin 2990                               // Grenzen feeru Degbugausgabe der Werte
+#define DebugSammleMax 3000
+// #define preAscanIntegrationVersion1Michael	// direkt übernommene Version von Michael
+#define preAscanIntegrationVersion2Ernst  // korrigierte Variante mit genauerer Fensterbreite

 // Parameter fuer SAFT-Kernel
 //=======================================================================================================================
-	#define USED_ASCANSMEMORYREGIONS  		     1	// Anzahl der A-Scan-Speicherbereiche die alloziert werden
-													// Hier reicht einer statt zwei! 2 nur nötig wenn A-scans Spückweise mit Streams fuer kopiert werden sollen.
-	#define MAX_SUPPORTEDTEXTURES_FORASCANINDEX  4	// Definiert die im Code maximal unterstuetzen Texturen fuer AscanIndex;
-	#define MAX_SUPPORTEDRECEIVER_FORSOSPATHTEXTURE  710 // Definiert maximale #Receiver in einem SOSPATH-Textur
-	#define MATLABSAVETY_MB  25 						  // in MB. Matlab belegt zusaetzlich GPU-Speicher, der bei Grenzfällen zum absturz fuehren kann, daher zur Sicherheit Speicher freihalten
+#define USED_ASCANSMEMORYREGIONS \
+    1                                                // Anzahl der A-Scan-Speicherbereiche die alloziert werden
+                                                     // Hier reicht einer statt zwei! 2 nur nötig wenn A-scans Spückweise mit Streams fuer kopiert werden sollen.
+#define MAX_SUPPORTEDTEXTURES_FORASCANINDEX 4        // Definiert die im Code maximal unterstuetzen Texturen fuer AscanIndex;
+#define MAX_SUPPORTEDRECEIVER_FORSOSPATHTEXTURE 710  // Definiert maximale #Receiver in einem SOSPATH-Textur
+#define MATLABSAVETY_MB 25                           // in MB. Matlab belegt zusaetzlich GPU-Speicher, der bei Grenzfällen zum absturz fuehren kann, daher zur Sicherheit Speicher freihalten

+#define SaftLinearInterpolation  // Lineare Interpolation beim Zugriff auf A-scans durchführen

-	#define SaftLinearInterpolation  // Lineare Interpolation beim Zugriff auf A-scans durchführen
+#define SaftUseConstantMemforGeometry  // Geometriedaten im Constantmemory nutzen

-	#define SaftUseConstantMemforGeometry				// Geometriedaten im Constantmemory nutzen
+// #define SaftTextureForEmRecSosPathsTablesFloat1	// Use Float1-Textur for loading SOS-Paths -> Sum, Count separated
+// #define SaftTextureForEmRecSosPathsTablesFloat2		// Use Float2-Textur for loading SOS-Paths -> Sum + Count for SOS for one position
+#define SaftTextureForEmRecSosPathsTablesFloat4  // Use Float4-Textur for loading SOS-Paths -> Sum as well Count for SOS and ATT for one position

-	//#define SaftTextureForEmRecSosPathsTablesFloat1	// Use Float1-Textur for loading SOS-Paths -> Sum, Count separated
-	//#define SaftTextureForEmRecSosPathsTablesFloat2		// Use Float2-Textur for loading SOS-Paths -> Sum + Count for SOS for one position
-	#define SaftTextureForEmRecSosPathsTablesFloat4		// Use Float4-Textur for loading SOS-Paths -> Sum as well Count for SOS and ATT for one position
-
-	#if defined(SaftTextureForEmRecSosPathsTablesFloat1) || defined(SaftTextureForEmRecSosPathsTablesFloat2) || defined(SaftTextureForEmRecSosPathsTablesFloat4)
-		#define SaftTextureForEmRecSosPathsTables		// Use Textur for loading SOS-Paths, -> Interpolation between SoSVoxelnPaths is possible
-	#endif
-
-	// Several SAFT_VARIANTs
-	#define SAFT_VARIANT_AscanPreintegration					0
-	#define SAFT_VARIANT_AscanInterpolation						1
-	#define SAFT_VARIANT_3DVolumeInterpolationAtPreprocessing	2	// Use interpolation while Preprocessing
-	#define SAFT_VARIANT_3DVolumeInterpolationAtReconstruction	3	// Use interpolation while Reconstruction
-	#define SAFT_VARIANT_CalcStandardDeviation					4	// Not yet implemented
-	#define SAFT_VARIANT_SumUpOverBoarderIndices				5	// Not yet implemented
+#if defined(SaftTextureForEmRecSosPathsTablesFloat1) || defined(SaftTextureForEmRecSosPathsTablesFloat2) || defined(SaftTextureForEmRecSosPathsTablesFloat4)
+#define SaftTextureForEmRecSosPathsTables  // Use Textur for loading SOS-Paths, -> Interpolation between SoSVoxelnPaths is possible
+#endif

+// Several SAFT_VARIANTs
+#define SAFT_VARIANT_AscanPreintegration 0
+#define SAFT_VARIANT_AscanInterpolation 1
+#define SAFT_VARIANT_3DVolumeInterpolationAtPreprocessing 2   // Use interpolation while Preprocessing
+#define SAFT_VARIANT_3DVolumeInterpolationAtReconstruction 3  // Use interpolation while Reconstruction
+#define SAFT_VARIANT_CalcStandardDeviation 4                  // Not yet implemented
+#define SAFT_VARIANT_SumUpOverBoarderIndices 5                // Not yet implemented

 // Cache <-> shared Memory
-    //#define SaftPreferSharedMem 	// cudaFuncCachePreferShared: shared memory is 48 KB
-    #define SaftPreferL1SharedMem 	// cudaFuncCachePreferL1: shared memory is 16
-	//#define SaftPreferNone			// cudaFuncCachePreferNone: no preference
+// #define SaftPreferSharedMem 	// cudaFuncCachePreferShared: shared memory is 48 KB
+#define SaftPreferL1SharedMem  // cudaFuncCachePreferL1: shared memory is 16
+// #define SaftPreferNone			// cudaFuncCachePreferNone: no preference

 // Receiver Cache mit shared Memory (nur bei kleinen Blockgroeßen)
-    //#define SaftReceiverSharedMemCacheReceiverDistance
-	//#define SaftCacheReceiverSOS
-		//#define SaftReceiverSharedMemCacheReceiverSOS	// Use Shared Memory for Caching
-		//#define SaftRegisterCacheReceiverSOS			// Use Register for Caching
-
+// #define SaftReceiverSharedMemCacheReceiverDistance
+// #define SaftCacheReceiverSOS
+// #define SaftReceiverSharedMemCacheReceiverSOS	// Use Shared Memory for Caching
+// #define SaftRegisterCacheReceiverSOS			// Use Register for Caching

 // Berechnung der mittleren Schallgeschwindigkeit
 //================================================
-	#define SaftUseHarmonicMean       	// harmonic Mean
+#define SaftUseHarmonicMean  // harmonic Mean

-	#define SaftTextureForBresenhamSosPaths	 // Texturmemory für SOS-Volumen nutzen (Version without not full implemented)
-			//#define SaftTextureForBresenhamInterpolated	//iSOS-Version --> wird nun ueber Parameter uebergeben
-			//#define SaftUseFastMath				//FastMath fuer schnellere Berechnung aber Fehler am Rand. Dafuer ist Korrektur noetig.
+#define SaftTextureForBresenhamSosPaths  // Texturmemory für SOS-Volumen nutzen (Version without not full implemented)
+                                         // #define SaftTextureForBresenhamInterpolated	//iSOS-Version --> wird nun ueber Parameter uebergeben
+// #define SaftUseFastMath				//FastMath fuer schnellere Berechnung aber Fehler am Rand. Dafuer ist Korrektur noetig.

-	//#define SaftUseSosAttFloat1		// Nutze getrennte Texturen fuer beide Volumen (Sos+Att) // Aktuell nicht implementiert
-	#define SaftUseSosAttFloat2		// Nutze nur eine Textur fuer beide Volumen (Sos+Att)
-
-	#define SOS_Version2	// korrekte Version mit Definitionen im Mittelpunkt
+// #define SaftUseSosAttFloat1		// Nutze getrennte Texturen fuer beide Volumen (Sos+Att) // Aktuell nicht implementiert
+#define SaftUseSosAttFloat2  // Nutze nur eine Textur fuer beide Volumen (Sos+Att)

+#define SOS_Version2  // korrekte Version mit Definitionen im Mittelpunkt

 // MultiGPU
 //================================================
@@ -163,18 +97,16 @@ typedef unsigned long  long ullong;
 // constant such that 64kB of constant is fully blocked by emitter/receiver combinations
 const int MAX_EMITTER_RECEIVE_IN_CONSTANT_MEMORY = 2340;

-//Macro used to perform CUDA calls. Throws an exception in case of a CUDA error. Also shows on which line it occurred.
+// Macro used to perform CUDA calls. Throws an exception in case of a CUDA error. Also shows on which line it occurred.
 #define CUDA_CHECK(operation) performCUDAResultCheck(operation, __FILE__, __LINE__);

-//Macro used to see when a particular line of code is executed on the host.
+// Macro used to see when a particular line of code is executed on the host.
 #define DEBUG_MARK std::cout << "[DEBUG] file " << __FILE__ << ", line " << __LINE__ << std::endl

-
-//Convenient typedefs for containers
+// Convenient typedefs for containers
 typedef std::vector<cudaDeviceProp> DeviceProperties;
 typedef std::vector<dim3> Dimensions;

-
 /**
   Most important class in the application.
   - Haupt-Klasse der Applikation
@@ -183,399 +115,288 @@ typedef std::vector<dim3> Dimensions;
 */
 class SAFTHandler
 {
-public:
-    SAFTHandler(int deviceId,
-    			int deviceIndex,
-    			float *aScan_ptr,    				///< Zeiger zu den AScandaten //std::string const & aScanSamplesPath,
-    			double *output_ptr,  				///< Zeiger zu den Outputdaten //std::string const & outputPath,
-    			double *Duration_ptr,  				///< Zeiger auf Ausgabewert f<>r benoetigte Laufzeit des SAFT-Kernels
-    			unsigned short *receiver_index_ptr, ///<
-    			unsigned short *emitter_index_ptr,  ///<
-    			float *receiver_list_ptr, 			///<
-    			int receiver_list_Size, 			///<
-    			float *emitter_list_ptr,			///<
-    			int emitter_list_Size, 				///<
-    			float *speed_vec_ptr,
-    			int3 SOSGrid_XYZ,
-    			float3 sosOffset, 					///< Startpoint of SoSGrid
-    			float SOS_RESOLUTION,				///< Aufloesung des SoSGrid
-    			float *att_vec_ptr,    				//att_vec_ptr
+   public:
+    SAFTHandler(int deviceId, int deviceIndex,
+                float *aScan_ptr,                    ///< Zeiger zu den AScandaten //std::string const & aScanSamplesPath,
+                double *output_ptr,                  ///< Zeiger zu den Outputdaten //std::string const & outputPath,
+                double *Duration_ptr,                ///< Zeiger auf Ausgabewert f<>r benoetigte Laufzeit des SAFT-Kernels
+                unsigned short *receiver_index_ptr,  ///<
+                unsigned short *emitter_index_ptr,   ///<
+                float *receiver_list_ptr,            ///<
+                int receiver_list_Size,              ///<
+                float *emitter_list_ptr,             ///<
+                int emitter_list_Size,               ///<
+                float *speed_vec_ptr, int3 SOSGrid_XYZ,
+                float3 sosOffset,      ///< Startpoint of SoSGrid
+                float SOS_RESOLUTION,  ///< Aufloesung des SoSGrid
+                float *att_vec_ptr,    // att_vec_ptr

-    			int aScanCount,
-    			int aScanLength,
-    			int3 IMAGE_SIZE_XYZ,
-    		    float sampleRate,
-    			float3 regionOfInterestOffset,
-    			float IMAGE_RESOLUTION,
-    			dim3 const & fixedBlockDimensions,
-    			float debugMode,
-    			float debugModeParameter,
-    			//bool useFixedPartialOutputWindow,
+                int aScanCount, int aScanLength, int3 IMAGE_SIZE_XYZ, float sampleRate, float3 regionOfInterestOffset, float IMAGE_RESOLUTION, dim3 const &fixedBlockDimensions, float debugMode,
+                float debugModeParameter,
+                // bool useFixedPartialOutputWindow,

-    			bool SOSMode_3DVolume,
-    			bool ATTMode_3DVolume,
+                bool SOSMode_3DVolume, bool ATTMode_3DVolume,

-    			int SAFT_MODE,
-    			int *SAFT_VARIANT,
-    			int SAFT_VARIANT_Size,
+                int SAFT_MODE, int *SAFT_VARIANT, int SAFT_VARIANT_Size,

-    			int *Abort_ptr
-    			);
+                int *Abort_ptr);

    void performReconstruction();

-private:
+   private:
+    int *Abort_ptr;  // Ist ein Fehler aufgetreten der zum Abburch geführt hat
+    // int Abort;

-    int *Abort_ptr;	// Ist ein Fehler aufgetreten der zum Abburch geführt hat
-    //int Abort;
+    bool SOSMode_3DVolume, ATTMode_3DVolume;

-    bool SOSMode_3DVolume,
-    	 ATTMode_3DVolume;
-
-	int SAFT_MODE;
-	int *SAFT_VARIANT;
-	int *deviceSAFT_VARIANT;
-	int SAFT_VARIANT_Size;
+    int SAFT_MODE;
+    int *SAFT_VARIANT;
+    int *deviceSAFT_VARIANT;
+    int SAFT_VARIANT_Size;

    int deviceId;
    int deviceIndex;
-    float  debugMode;
-    float  debugModeParameter;
+    float debugMode;
+    float debugModeParameter;

    DeviceProperties deviceProperties;

-    float
-    	*aScan_ptr;
+    float *aScan_ptr;

-//    float
-//    	*rec_vec_ptr,
-//    	*send_vec_ptr;
+    //    float
+    //    	*rec_vec_ptr,
+    //    	*send_vec_ptr;

    // Zuordnungslisten in der geschaut wird welcher Emitter/Receiver genutzt wird (65535 = nicht genutzt, alles andere ist dann der Index)
-	unsigned short* hostLookUpGeometryMemoryListEmitterPtr;  // Memory of hostLookUpGeometryMemoryListEmitter
-	unsigned short* hostLookUpGeometryMemoryListReceiverPtr; // Memory of hostLookUpGeometryMemoryListReceiver
-	int lookUpGeometryMemoryListEmitterSize; // Size of hostLookUpGeometryMemoryListEmitterPtr
-	int lookUpGeometryMemoryListReceiverSize;// Size of hostLookUpGeometryMemoryListReceiverPtr
+    unsigned short *hostLookUpGeometryMemoryListEmitterPtr;   // Memory of hostLookUpGeometryMemoryListEmitter
+    unsigned short *hostLookUpGeometryMemoryListReceiverPtr;  // Memory of hostLookUpGeometryMemoryListReceiver
+    int lookUpGeometryMemoryListEmitterSize;                  // Size of hostLookUpGeometryMemoryListEmitterPtr
+    int lookUpGeometryMemoryListReceiverSize;                 // Size of hostLookUpGeometryMemoryListReceiverPtr

-    unsigned short
-    	*emitter_index_ptr,
-    	*receiver_index_ptr;
+    unsigned short *emitter_index_ptr, *receiver_index_ptr;

-    float
-    	*emitter_list_ptr,
-    	*receiver_list_ptr;
+    float *emitter_list_ptr, *receiver_list_ptr;

-    int
-    	receiver_list_Size,
-    	emitter_list_Size;
+    int receiver_list_Size, emitter_list_Size;

-    double
-       	*output_ptr;
+    double *output_ptr;

-    double
-    	*Duration_ptr;
+    double *Duration_ptr;

-    float
-    	Sos,
-    	*speed_vec_ptr,
-    	*att_vec_ptr;
+    float Sos, *speed_vec_ptr, *att_vec_ptr;

-    int3
-    	SOSGrid_XYZ;
+    int3 SOSGrid_XYZ;

-	float3
-		sosOffset; 					///< Startpoint of SoSGrid
+    float3 sosOffset;  ///< Startpoint of SoSGrid

-    int
-    	aScanCount,
-    	aScanLength;
+    int aScanCount, aScanLength;

-    int3
-    	IMAGE_SIZE_XYZ;
+    int3 IMAGE_SIZE_XYZ;

-    float3
-    	regionOfInterestOffset; //imageStartpoint; TODO: umbenennen!
+    float3 regionOfInterestOffset;  // imageStartpoint; TODO: umbenennen!

-    float
-    	IMAGE_RESOLUTION,		  ///< Aufl<66>sung im OutputVolumen
-	    IMAGE_RESOLUTION_FACTOR,  ///< 1/Aufl<EFBFBD>sung im OutputVolumen
-	    SOS_RESOLUTION,			  ///< Aufloesung des SoSGrid
-	    SOS_RESOLUTION_FACTOR;	  ///< 1/Aufl<66>sung im SoS-Grid
+    float IMAGE_RESOLUTION,       ///< Aufl<66>sung im OutputVolumen
+        IMAGE_RESOLUTION_FACTOR,  ///< 1/Aufl<EFBFBD>sung im OutputVolumen
+        SOS_RESOLUTION,           ///< Aufloesung des SoSGrid
+        SOS_RESOLUTION_FACTOR;    ///< 1/Aufl<EFBFBD>sung im SoS-Grid

-    std::string
-        emitterGeometryPath,
-        receiverGeometryPath,
-        aScanSamplesPath,
-        outputPath;
+    std::string emitterGeometryPath, receiverGeometryPath, aScanSamplesPath, outputPath;

    float *aScanSamples;
    double *output;
-    //int aScanCount;
+    // int aScanCount;
    int
-    	//aScanSize,
-    	aScan_Bytes,
-    	//batchSize,	// --> aScanCount
-    	//aScanBatchSize;
-    	aScanBatch_Bytes;
+        // aScanSize,
+        aScan_Bytes,
+        // batchSize,	// --> aScanCount
+        // aScanBatchSize;
+        aScanBatch_Bytes;

    float voxelSize;

    float sampleRate;

-    //size_t
+    // size_t
    uint64_t
-    	//regionOfInterestVoxelCount,
-    	outputVolume_VoxelCount,
-        //outputSize;
-   		outputVolume_Bytes;
+        // regionOfInterestVoxelCount,
+        outputVolume_VoxelCount,
+        // outputSize;
+        outputVolume_Bytes;

-    float3 outputVolume_size_m; // ROI-Groesse in meter
+    float3 outputVolume_size_m;  // ROI-Groesse in meter

-    uint64_t
-    	partialOutputZLayerOffset;
+    uint64_t partialOutputZLayerOffset;

-    int
-    	partialOutputZLayerOffsetCount,
-    	partialOutputSoSZLayerCount,
-    	currentZLayerCount,
-    	partialSoSZLayerCount;
+    int partialOutputZLayerOffsetCount, partialOutputSoSZLayerCount, currentZLayerCount, partialSoSZLayerCount;

    // Fuer AscanIndexInterpolation
    // ------------------------------------------------------
-    int 		currentEmIndexUsedForAscanIndexCalculation;
-    float 		*deviceTextureAscanIndexFloat;					// Texture adresses for precalculated AscanIndex data
-    //std::size_t deviceTextureAscanIndexFloatSize;
+    int currentEmIndexUsedForAscanIndexCalculation;
+    float *deviceTextureAscanIndexFloat;  // Texture adresses for precalculated AscanIndex data
+    // std::size_t deviceTextureAscanIndexFloatSize;

-    cudaArray **deviceTextureAscanIndexFloatCuArray;  			// CudaArray for AscanIndex data
-    int 		maxSurfaceTexture3DDimension;		// max Dimension in 3D --> Max size for Texture
-    int 		maxAscanIndexArraysInTexture;		// = maxSurfaceTexture3DDimension/2;
-    int 		TableAscanIndexAllocationCount;		// Anzahl der benoetigten AscanBlocks der Groesse 2048/4096
-    int 		maxSupportedTexturesForAscanIndex;		// Definiert die maximal unterstuetzen Texturen fuer AscanIndex
-    int 		neededAscanBatchCount; 				// Anzahl an benoetigten Durchlaeufen des SAFTs um alle Ascans abarbeiten zu koennen
+    cudaArray **deviceTextureAscanIndexFloatCuArray;  // CudaArray for AscanIndex data
+    int maxSurfaceTexture3DDimension;                 // max Dimension in 3D --> Max size for Texture
+    int maxAscanIndexArraysInTexture;                 // = maxSurfaceTexture3DDimension/2;
+    int TableAscanIndexAllocationCount;               // Anzahl der benoetigten AscanBlocks der Groesse 2048/4096
+    int maxSupportedTexturesForAscanIndex;            // Definiert die maximal unterstuetzen Texturen fuer AscanIndex
+    int neededAscanBatchCount;                        // Anzahl an benoetigten Durchlaeufen des SAFTs um alle Ascans abarbeiten zu koennen
    // ------------------------------------------------------

    double *currentHostOutputAdress;

    // Pointer of Inputdata in memory of Ascanblock
-    float3
-        *receiver_list,			// LookUpTable receiverNr -> coordinates
-        *emitter_list;				// LookUpTable emitterNr -> coordinates
+    float3 *receiver_list,  // LookUpTable receiverNr -> coordinates
+        *emitter_list;      // LookUpTable emitterNr -> coordinates

-    unsigned short
-    	*receiver_index,			// Input Ascanblockdata: corresponding receiverNr
-    	*emitter_index;			// Input Ascanblockdata: corresponding emitterNr
+    unsigned short *receiver_index,  // Input Ascanblockdata: corresponding receiverNr
+        *emitter_index;              // Input Ascanblockdata: corresponding emitterNr

-    //float
+    // float
    //	*SoSData;					// Input Ascanblockdata: Corresponding SOS value

-    float *speedOfSoundField;		// Input Ascanblockdata: Corresponding SOS value as volume TODO: ==> in speedOfSoundGrid umbenennen
-    float *attenuationField;		// Input Ascanblockdata: Corresponding ATT value as volume TODO: ==> in attenuationGrid umbenennen
+    float *speedOfSoundField;  // Input Ascanblockdata: Corresponding SOS value as volume TODO: ==> in speedOfSoundGrid umbenennen
+    float *attenuationField;   // Input Ascanblockdata: Corresponding ATT value as volume TODO: ==> in attenuationGrid umbenennen

-	#ifdef SaftUseSosAttFloat2
-    	float2 *hostSosAttField;
-	#endif
+    float2 *hostSosAttField;

    // Memorysizes
    int
-    	//speedOfSoundFieldVoxelCount,				//
-    	SOSVolume_VoxelCount,				// Amount of Voxels of SOSVolume
-    	//speedOfSoundFieldBytes,						//
-    	SOSVolume_Bytes,					// Size of SOSVolume in Byte
-    	speedOfSoundEmitterVoxelPathCountByteSize,	// Speichergroesse fuer die Anzahl der Voxel, die auf einem Pfad liegen
-    	speedOfSoundEmitterVoxelPathSumByteSize;	// Speichergroesse fuer die Summe der Schallgeschwindigkeiten auf dem Pfad zu einem Voxel
+        // speedOfSoundFieldVoxelCount,				//
+        SOSVolume_VoxelCount,  // Amount of Voxels of SOSVolume
+        // speedOfSoundFieldBytes,						//
+        SOSVolume_Bytes,                            // Size of SOSVolume in Byte
+        speedOfSoundEmitterVoxelPathCountByteSize,  // Speichergroesse fuer die Anzahl der Voxel, die auf einem Pfad liegen
+        speedOfSoundEmitterVoxelPathSumByteSize;    // Speichergroesse fuer die Summe der Schallgeschwindigkeiten auf dem Pfad zu einem Voxel

-    dim3
-        fixedBlockDimensions,			// kann ws durch genericSAFTBlockDimensions ersetzt
-        genericSAFTBlockDimensions,
-        genericSAFTGridDimensions,
-        windowGridDimensions;
+    dim3 fixedBlockDimensions,  // kann ws durch genericSAFTBlockDimensions ersetzt
+        genericSAFTBlockDimensions, genericSAFTGridDimensions, windowGridDimensions;

-	cudaArray **deviceAScansCuArray;
+    cudaArray **deviceAScansCuArray;

-#ifdef SaftTextureForBresenhamSosPaths
+    cudaArray *deviceSosAttFieldCuArray;

-	#ifdef SaftUseSosAttFloat1		// Nutze getrennte Texturen fuer beide Volumen (Sos+Att)
-		cudaArray *deviceSpeedOfSoundFieldCuArray;			// SOS volume
-		cudaArray *deviceAttenuationFieldCuArray;			// ATT volume
-	#endif
+    int maxSoSReceiverArrayForTexture;
+    int TableVoxelToReceiverPathSosAllocationCount;
+    std::size_t receiver_list_Size_deviceMemory;

-	#ifdef SaftUseSosAttFloat2      // Nutze nur eine Textur fuer beide Volumen (Sos+Att)
-		cudaArray *deviceSosAttFieldCuArray;
-	#endif
-#endif
+    // Für Emitter ----- normal definieren
+    cudaArray *deviceTableVoxelToEmitterPathSosSumCuArray;  // SoSSum
+    cudaArray *deviceTableVoxelToEmitterPathCountCuArray;   // Count

+    // Für Receiver ----- als Arrays definieren da zwei benoetigt
+    cudaArray **deviceTableVoxelToReceiverPathSosSumCuArray;  // SoSSum
+    cudaArray **deviceTableVoxelToReceiverPathCountCuArray;   // Count

+    cudaArray *deviceTableVoxelToEmPathSosBothCuArray;    // Emitter SoSSum + Count
+    cudaArray **deviceTableVoxelToRecPathSosBothCuArray;  // Receiver SoSSum + Count

-	int maxSoSReceiverArrayForTexture;
-	int TableVoxelToReceiverPathSosAllocationCount;
-	std::size_t receiver_list_Size_deviceMemory;
-
-#ifdef SaftTextureForEmRecSosPathsTables
-	// Für Emitter ----- normal definieren
-	cudaArray *deviceTableVoxelToEmitterPathSosSumCuArray;			//SoSSum
-	cudaArray *deviceTableVoxelToEmitterPathCountCuArray;			//Count
-
-	// Für Receiver ----- als Arrays definieren da zwei benoetigt
-	cudaArray **deviceTableVoxelToReceiverPathSosSumCuArray;		//SoSSum
-	cudaArray **deviceTableVoxelToReceiverPathCountCuArray;		//Count
-#endif
-
-#if defined(SaftTextureForEmRecSosPathsTablesFloat2) || defined(SaftTextureForEmRecSosPathsTablesFloat4)
-	cudaArray *deviceTableVoxelToEmPathSosBothCuArray;  			//Emitter SoSSum + Count
-	cudaArray **deviceTableVoxelToRecPathSosBothCuArray;  			//Receiver SoSSum + Count
-#endif
-
-	// Schallgeschwindigkeitskorrektur-Mode
-    float *deviceSpeedOfSoundField;			// Adressen fuer Speicherfuer Schallgeschwindigkeitsgrid auf der GPU
+    // Schallgeschwindigkeitskorrektur-Mode
+    float *deviceSpeedOfSoundField;  // Adressen fuer Speicherfuer Schallgeschwindigkeitsgrid auf der GPU

    // Block-Mode
-    unsigned short *deviceEmitterIndex_block;		// Adressen fuer Speicher fuer Index der Geometriedaten auf der GPU
+    unsigned short *deviceEmitterIndex_block;  // Adressen fuer Speicher fuer Index der Geometriedaten auf der GPU
    unsigned short *deviceReceiverIndex_block;
-    float3 *deviceListEmitterGeometry;			// Adressen fuer Speicher fuer Zuordnung Index <-> Geometriedaten auf der GPU
+    float3 *deviceListEmitterGeometry;  // Adressen fuer Speicher fuer Zuordnung Index <-> Geometriedaten auf der GPU
    float3 *deviceListReceiverGeometry;

-    float  *deviceSoSData_block;				// Adressen fuer Speicher fuer Schallgeschwindigkeitsdaten auf der GPU
-
-//    VoxelCountType								// Adressen fuer Speicher der SoS-Pfade auf der GPU
-//        * deviceTableVoxelToEmitterPathCount,
-//        * deviceTableVoxelToReceiverPathCount;
-    float
-    	*deviceTableVoxelToEmitterPathCountFloat,	// Texture adresses for precalculated SOS data
-    	*deviceTableVoxelToReceiverPathCountFloat,
-        *deviceTableVoxelToEmitterPathSosSum,
-        *deviceTableVoxelToReceiverPathSosSum;
+    float *deviceSoSData_block;  // Adressen fuer Speicher fuer Schallgeschwindigkeitsdaten auf der GPU

+    float *deviceTableVoxelToEmitterPathCountFloat,  // Texture adresses for precalculated SOS data
+        *deviceTableVoxelToReceiverPathCountFloat, *deviceTableVoxelToEmitterPathSosSum, *deviceTableVoxelToReceiverPathSosSum;

    bool *deviceValidEmitterReceiverCombinations;

    int *deviceTransducerVectorAnalysisDistributionCounters;

-//    float3
-//        * deviceEmitterGeometry,
-//        * deviceReceiverGeometry;
-
-    int   usedAmountOfEmitter, 	// amount of used emitter
-    	  usedAmountOfReceiver; // amount of used receiver
+    int usedAmountOfEmitter,   // amount of used emitter
+        usedAmountOfReceiver;  // amount of used receiver

    // Output volume
-	double *deviceOutput;
+    double *deviceOutput;

-    //Streams used for synchronisation
-    cudaStream_t
-        copyStream,
-        calculationStream;
+    // Streams used for synchronisation
+    cudaStream_t copyStream, calculationStream;

-    //This variable describes the number of allocations used by the current SAFT mode
-    std::size_t aScanAllocationCount;	// Anzahl der Speicher die alloziert werden, es reicht einer statt 2! 2 nur wenn Streams fuer Copy genutzt werden sollen.
+    // This variable describes the number of allocations used by the current SAFT mode
+    std::size_t aScanAllocationCount;  // Anzahl der Speicher die alloziert werden, es reicht einer statt 2! 2 nur wenn Streams fuer Copy genutzt werden sollen.

-    int
-        invalidEmitterReceiverCombinationsCount,
-        validEmitterReceiverCombinationsCount;
+    int invalidEmitterReceiverCombinationsCount, validEmitterReceiverCombinationsCount;

    Dimensions validBlockDimensions;
    bool useAutoTuning;
-//    AutoTuningConfiguration autoTuningConfiguration;
+    //    AutoTuningConfiguration autoTuningConfiguration;

-    size_t
-    	partialOutputSize,
-    	partialVolumeSize,				// Speicher(OutputVolumen), der fuer die entsprechende Anzahl an Z-Layern benoetigt wuerde
-    	partialSosPathSize,				// Speicher(SOSATTPaths)  , der fuer die entsprechende Anzahl an SoS-Z-Layer benoetigt wuerde
-    	partialAscanIndexSize,			// Speicher(AscanIndex)   , der fuer die entsprechende Anzahl an SoS-Z-Layer & Ascans benoetigt wuerde
-    	maxFeasibleZLayerCount,			// Maximal moegliche Anzahl an Z-Layern wird zu Beginn auf # die in eine SOS Z-layer passt gesetzt.
-    	maxFeasibleSosZLayerCount;		// Maximal moegliche Anzahl an Sos-Z-Layern wird zu Beginn auf Anzahl der noetigen SoS-Z-Layern für die OutputDaten gesetzt.
+    size_t partialOutputSize,
+        partialVolumeSize,          // Speicher(OutputVolumen), der fuer die entsprechende Anzahl an Z-Layern benoetigt wuerde
+        partialSosPathSize,         // Speicher(SOSATTPaths)  , der fuer die entsprechende Anzahl an SoS-Z-Layer benoetigt wuerde
+        partialAscanIndexSize,      // Speicher(AscanIndex)   , der fuer die entsprechende Anzahl an SoS-Z-Layer & Ascans benoetigt wuerde
+        maxFeasibleZLayerCount,     // Maximal moegliche Anzahl an Z-Layern wird zu Beginn auf # die in eine SOS Z-layer passt gesetzt.
+        maxFeasibleSosZLayerCount;  // Maximal moegliche Anzahl an Sos-Z-Layern wird zu Beginn auf Anzahl der noetigen SoS-Z-Layern für die OutputDaten gesetzt.

-    int
-        minimumAutoTuningThreadCount,
-        maximumAutoTuningThreadCount;
+    int minimumAutoTuningThreadCount, maximumAutoTuningThreadCount;

-
-    //New partial reconstruction data
+    // New partial reconstruction data

    std::size_t partialSpeedOfSoundVoxelCount;
    std::size_t partialOutputZLayerCount;
    std::size_t zLayerVoxelCount;
-    std::size_t sosZLayerVoxelCount;           // Anzahl der X-Y-SOSVoxel in einer SoS-Layer. //saft.hpp
+    std::size_t sosZLayerVoxelCount;  // Anzahl der X-Y-SOSVoxel in einer SoS-Layer. //saft.hpp
    std::size_t partialOutputVoxelCount;

+    double diff_time;           // For Time Measurement
+    float transferRate;         // For DataTransferrate Measurement
+    float performRate;          // For PerformSAFTrate Measurement
+    cudaDeviceProp deviceProp;  // Ausgabe der Frequenz

-	double diff_time;	// For Time Measurement
-	float transferRate; // For DataTransferrate Measurement
-	float performRate;  // For PerformSAFTrate Measurement
-	cudaDeviceProp deviceProp;		// Ausgabe der Frequenz
+    // Core reconstruction

-
-    //Core reconstruction
-
-    void processAScans(ullong & duration);
+    void processAScans(ullong &duration);
    void performCoreReconstruction();

-    //Pre-calculation
+    // Pre-calculation

-    //void precalculateAverageSpeedOfSound(int zLayer, int zLayerCount);  // TODO: Funktion die nicht mehr benutzt wird?
-//    void analysisOfTransducerVectors();
-
-//    void normalisePerformanceStatisticsOutput();
-//    void printTransducerVectorStatistics();
-
-    //Auto-tuning
-    bool determineGridDimensions(dim3 const & blockDimensions, dim3 & gridDimensions);
+    // Auto-tuning
+    bool determineGridDimensions(dim3 const &blockDimensions, dim3 &gridDimensions);
    void determineValidBlockDimensions();

+    void reduceKernelDimensions(dim3 const &gridDimensions, dim3 const &blockDimensions, dim3 &reducedGridDimensions, dim3 &reducedBlockDimensions);

-    void reduceKernelDimensions(dim3 const & gridDimensions, dim3 const & blockDimensions, dim3 & reducedGridDimensions, dim3 & reducedBlockDimensions);
-
-    //Pre-calculation kernels
+    // Pre-calculation kernels
    //------------------------------------------------------------------------
-	#ifdef SaftUseConstantMemforGeometry
-		//void precalculateAverageSpeedOfSound(int firstZLayer, int sosZLayerCount, int deviceGeometry, int geometryElementCount, VoxelCountType * deviceVoxelCountOutput, float * deviceVoxelCountOutputFloat, float * deviceSpeedOfSoundSumOutput);
-		void precalculateAverageSpeedOfSound(int firstZLayer, int sosZLayerCount, int deviceGeometry, int geometryElementCount, float * deviceVoxelCountOutputFloat, float * deviceSpeedOfSoundSumOutput);
-	#else
-		//void precalculateAverageSpeedOfSound(int firstZLayer, int sosZLayerCount, float3 const * deviceGeometry, int geometryElementCount, VoxelCountType * deviceVoxelCountOutput, float * deviceSpeedOfSoundSumOutput);
-		void precalculateAverageSpeedOfSound(int firstZLayer, int sosZLayerCount, float3 const * deviceGeometry, int geometryElementCount, float * deviceSpeedOfSoundSumOutput);
-	#endif
+    void precalculateAverageSpeedOfSound(int firstZLayer, int sosZLayerCount, int deviceGeometry, int geometryElementCount, float *deviceVoxelCountOutputFloat, float *deviceSpeedOfSoundSumOutput);

-	#ifdef SaftUseAscanIndexInterpolation
-		void precalculateAscanIndex
-		(
-				int currentSpeedOfSoundZLayer, 						///< First z-layer in the speed of sound grid the pre-calculation is performed for.
-				int maxFeasibleSosZLayerCount 					 	///< Number of z-layers in the speed of sound grid the pre-calculation is performed for.
-				//int currentEmIndexUsedForAscanIndexCalculation,		///< current Index of Em for which the AscanIndex is calculated
-				//int emitter_list_Size, 		///< Number of emitter_array got from Matlab
-				//int receiver_list_Size, 	///< Number of receiver_array got from Matlab
-				//float * deviceTextureAscanIndexFloatCuArray 	    ///< Out: AscanIndex for the path from Emitter to voxel to Receiver.
-		);
+    void precalculateAscanIndex(int currentSpeedOfSoundZLayer,  ///< First z-layer in the speed of sound grid the pre-calculation is performed for.
+                                int maxFeasibleSosZLayerCount   ///< Number of z-layers in the speed of sound grid the pre-calculation is performed for.
+                                                                // int currentEmIndexUsedForAscanIndexCalculation,		///< current Index of Em for which the AscanIndex is calculated
+                                                                // int emitter_list_Size, 		///< Number of emitter_array got from Matlab
+                                                                // int receiver_list_Size, 	///< Number of receiver_array got from Matlab
+                                                                // float * deviceTextureAscanIndexFloatCuArray 	    ///< Out: AscanIndex for the path from Emitter to voxel to Receiver.
+    );

-		void precalculateAscanIndex_usePaths
-		(
-				int ascanIndex_i,									///< Offset of AscanIndex batch.
-				int aScanWindowSize,								///< Amount of Ascans in AscanIndex batch to process.
-				int currentSpeedOfSoundZLayer, 						///< First z-layer in the speed of sound grid the pre-calculation is performed for.
-				int maxFeasibleSosZLayerCount 					 	///< Number of z-layers in the speed of sound grid the pre-calculation is performed for.
-		//		int currentEmIndexUsedForAscanIndexCalculation,		///< current Index of Em for which the AscanIndex is calculated -> No more necessary due to all Combinations-should be Calculated
-		//		float * deviceTextureAscanIndexFloatCuArray			///< Out: AscanIndex for the path from Emitter to voxel to Receiver.
-		);
+    void precalculateAscanIndex_usePaths(
+        int ascanIndex_i,               ///< Offset of AscanIndex batch.
+        int aScanWindowSize,            ///< Amount of Ascans in AscanIndex batch to process.
+        int currentSpeedOfSoundZLayer,  ///< First z-layer in the speed of sound grid the pre-calculation is performed for.
+        int maxFeasibleSosZLayerCount   ///< Number of z-layers in the speed of sound grid the pre-calculation is performed for.
+                                        //		int currentEmIndexUsedForAscanIndexCalculation,		///< current Index of Em for which the AscanIndex is calculated -> No more necessary due to all
+                                        // Combinations-should be Calculated 		float * deviceTextureAscanIndexFloatCuArray			///< Out: AscanIndex for the path from Emitter to voxel to
+                                        // Receiver.
+    );

-	#endif
-
-	// Initialize AScanIndexSurface
-	void fillCuArray
-		(
-				float useValue,
-				cudaArray **deviceTextureAscanIndexFloatCuArray, ///< CuArray to fill
-				int TableAscanIndexAllocationCount
-		);
-
-    //SAFT Kernel
-	void performSAFT(int aScanIndex, size_t aScanWindowSize, int3 IMAGE_SIZE_XYZ, int3 SOSGrid_XYZ, int blockIndexOffset, int outputWindowVoxelCount, int speedOfSoundZLayer, int speedOfSoundVoxelsWithinZLayers, int maxFeasibleSosZLayerCount, int currentEmIndexUsedForAscanIndexCalculation, dim3 const & windowGridDimensions, dim3 const & gridDimensions, dim3 const & blockDimensions, float * deviceSpeedOfSoundField, cudaArray * deviceAScansCuArray);   //Ascans in CuArray f<>r Texturmemory
+    // Initialize AScanIndexSurface
+    void fillCuArray(float useValue,
+                     cudaArray **deviceTextureAscanIndexFloatCuArray,  ///< CuArray to fill
+                     int TableAscanIndexAllocationCount);

+    // SAFT Kernel
+    void performSAFT(int aScanIndex, size_t aScanWindowSize, int3 IMAGE_SIZE_XYZ, int3 SOSGrid_XYZ, int blockIndexOffset, int outputWindowVoxelCount, int speedOfSoundZLayer,
+                     int speedOfSoundVoxelsWithinZLayers, int maxFeasibleSosZLayerCount, int currentEmIndexUsedForAscanIndexCalculation, dim3 const &windowGridDimensions, dim3 const &gridDimensions,
+                     dim3 const &blockDimensions, float *deviceSpeedOfSoundField, cudaArray *deviceAScansCuArray);  // Ascans in CuArray f<>r Texturmemory
 };

-//std::string vectorToString(float3 const & vector);
-//std::string voxelToString(dim3 const & voxel);
 extern void memoryCheck();
 extern std::size_t memoryGPUfree();
 extern std::size_t memoryGPUtotal();

-extern void performCUDAResultCheck(cudaError_t result, std::string const & file, int line);
-
-
+extern void performCUDAResultCheck(cudaError_t result, std::string const &file, int line);