#include "SAFT_TOFI.h" #include "saft.hpp" #include #include #include // For German printf output format: Float with , instead of . #include #include #include #include #include #include #include // TODO: Blockgroesse (z > 1) fuehrt zu Kernelabbruechen //pthread handle typedef struct thread_handle_t { //pthread_t pthread; int deviceId; int deviceIndex; float *aScan_ptr; double *output_ptr; double *Duration_ptr; unsigned short *receiver_index_ptr; unsigned short *emitter_index_ptr; float *receiver_list_ptr; int receiver_list_Size; float *emitter_list_ptr; int emitter_list_Size; float *speed_vec_ptr; int3 SOSGrid_XYZ; float3 sosOffset; float SOS_RESOLUTION; float *att_vec_ptr; int aScanCount; int aScanLength; float inc; int3 res; float sampleRate; float3 volposition; int num_threads; dim3 fixedBlockDimensions; float debugMode; float debugModeParameter; bool SOSMode_3DVolume; bool ATTMode_3DVolume; int SAFT_MODE; int *SAFT_VARIANT; int SAFT_VARIANT_Size; int *Abort_ptr; } thread_handle; //Convenient typedefs for GPU-DeviceProperties container typedef std::vector DeviceProperties; /** Load CUDA devices and write them to a container. */ void loadDevices( DeviceProperties & output ///< This argument is written to. Container in which the device data are stored. ) { #ifdef debug_OutputFunctions printf( "==> loadDevices - Start\n"); #endif int deviceCount; CUDA_CHECK(cudaGetDeviceCount(&deviceCount)); #ifdef debug_OutputInfo printf( "There are %i devices present:\n", deviceCount); #endif output.reserve(static_cast(deviceCount)); for(int i = 0; i < deviceCount; i++) { cudaDeviceProp & device = output[i]; CUDA_CHECK(cudaGetDeviceProperties(&device, i)); #ifdef debug_OutputInfo std::cout << " " << (i + 1) << ". " << device.name << std::endl; std::cout << " " << "Byte Total Global Mem: " << device.totalGlobalMem <deviceId, // deviceId pthread_handle->deviceIndex, // deviceIndex pthread_handle->aScan_ptr, // aScan_ptr, pthread_handle->output_ptr, // output_ptr, pthread_handle->Duration_ptr, // Duration_ptr, pthread_handle->receiver_index_ptr, // receiver_index_ptr ///< pthread_handle->emitter_index_ptr, // emitter_index_ptr ///< pthread_handle->receiver_list_ptr, // receiver_list_ptr ///< pthread_handle->receiver_list_Size, // receiver_list_Size ///< pthread_handle->emitter_list_ptr, // emitter_list_ptr ///< pthread_handle->emitter_list_Size, // emitter_list_Size ///< pthread_handle->speed_vec_ptr, // speed_vec_ptr, pthread_handle->SOSGrid_XYZ, // SOSGrid_XYZ, pthread_handle->sosOffset, // sosOffset, ///< Startpoint of SoSGrid pthread_handle->SOS_RESOLUTION, // SOS_RESOLUTION, ///< Resolution of SoSGrid pthread_handle->att_vec_ptr, // att_vec_ptr pthread_handle->aScanCount, // aScanCount, pthread_handle->aScanLength, // aScanLength pthread_handle->res, // resolution => IMAGE_SIZE_XYZ, pthread_handle->sampleRate, // sampleRate pthread_handle->volposition, // => regionOfInterestOffset, pthread_handle->inc, // IMAGE_RESOLUTION, pthread_handle->fixedBlockDimensions, // fixedBlockDimensions pthread_handle->debugMode, // debugMode pthread_handle->debugModeParameter, // Parameter for DebugMode pthread_handle->SOSMode_3DVolume, pthread_handle->ATTMode_3DVolume, pthread_handle->SAFT_MODE, pthread_handle->SAFT_VARIANT, pthread_handle->SAFT_VARIANT_Size, pthread_handle->Abort_ptr // If there is not enough memory abort reconstruction. Wenn Fehler --> Abbruch; ); saft.performReconstruction(); #ifdef debug_OutputSAFTHandlerThreadPerformance CUDA_CHECK(cudaDeviceSynchronize()); gettimeofday(&stopSAFTHandler, NULL); double diff_time = (double)((stopSAFTHandler.tv_sec * 1000000.0 + stopSAFTHandler.tv_usec) - (startSAFTHandler.tv_sec * 1000000.0 + startSAFTHandler.tv_usec)); printf ("{~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~}\n"); printf ("{ ~~~> } Device (%i) - SAFTHandler-Thread : %8.0f µs\n", pthread_handle->deviceId, diff_time); double performance_Thread = (((double)pthread_handle->aScanCount * (double)pthread_handle->res.x * (double)pthread_handle->res.y * (double)pthread_handle->res.z) / diff_time )/ 1000.0; printf ("{ ~~~> } Device (%i) - SAFTHandler-Thread Performance: %.6lf A-Scan * GVoxel/s\n", pthread_handle->deviceId, performance_Thread); printf ("{~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~}\n"); #endif //pthread_exit(NULL); } /** Check amount of GPUs and divide Volume ins parts with same size Abfrage der Anzahl an GPU-Devices und Einteilung des Volumens in möglichst gleichgroße Volumen in Z-Richtung (3D-Volumen) oder Y-Richtung (2D-Volumen) auf. */ void multithreaded_processing( float *aScan_ptr, ///< AScan-Daten double *output_ptr, ///< OutputDaten der Voxel unsigned short *receiver_index_ptr, ///< Index Receiver per Ascan unsigned short *emitter_index_ptr, ///< Index Emitter per Ascan float *receiver_list_ptr, ///< Positionskoordinaten Receiver int receiver_list_Size, ///< Menge an Receiver float *emitter_list_ptr, ///< Positionskoordinaten Emitter int emitter_list_Size, ///< Menge an Emitter float *speed_vec_ptr, ///< SoS Daten im Blockmode oder als SoSGrid int3 SOSGrid_XYZ, ///< Size of SoSGrid float3 sosOffset, ///< Startpoint of SoSGrid float SOS_RESOLUTION, ///< Aufloesung des SoSGrid float *att_vec_ptr, ///< Attenuation Daten als ATTGrid int aScanCount, ///< Anzahl der AScans die im Blockmode verarbeitet werden sollen int aScanLength, ///< Laenge der AscanDaten (normal 3000) float3 regionOfInterestOffset, int3 IMAGE_SIZE_XYZ, ///< Groesse des Bildbereichs in Voxel float IMAGE_RESOLUTION, ///< Aufloesung des Bildbereichs float sampleRate, ///< Samplerate für AScans int3 BlockDim_XYZ, ///< BlockDimension für GPU double *Duration_ptr, ///< Rückgabepointer an Matlab für Laufzeit des SAFT-Kernels int selectedNumberGPUs, ///< Anzahl der ausgewählten GPUs bzw. auf maximale Anzhal vorhandener begrenzt int *enableGPUs_ptr, ///< Gibt an welche GPUs genutzt werden und welche nicht float debugMode, ///< Ausgabe im Debugmode -> Verschiedene Werte können ausgegeben werden float debugModeParameter, ///< Parameter der mit fuer Debugmode uebermittelt werden kann bool SOSMode_3DVolume, ///< Wird 3D Volumen für SOS-Korrektur genutzt? bool ATTMode_3DVolume, ///< Wird 3D Volumen für ATT-Korrektur genutzt? int SAFT_MODE, ///< Modus für SAFT-Rekonstruktion int *SAFT_VARIANT, ///< Verschiedene Parameter der Rekonstruktion int SAFT_VARIANT_Size, ///< Menge der verschiedenen Parameter für Rekonstruktion int *Abort_ptr ///< FehlerArray ) { #ifdef debug_OutputFunctions printf( "==> multithreaded_processing - Start\n"); #endif dim3 fixedBlockDimensions( // convert int3 to dim3 BlockDim_XYZ.x, BlockDim_XYZ.y, BlockDim_XYZ.z ); // Divide workload and show Information ------------------------------------------------------------------------------------------------------------------ // Divide workload in pieces with the same size for all available GPUs. // If the workload can not be divided in pieces with the same size, the last piece will be the one with little less workload. // Testfall simuliert die mehrfache Anzahl an GPUs int num_devices_factor = 1; // Vielfache an GPUs simulieren bzw. kleinere Pakete erzeugen int num_workingPackages = selectedNumberGPUs*num_devices_factor; float3 *position = (float3*)malloc(num_workingPackages * sizeof(float3)); int3 *resolution = (int3*) malloc(num_workingPackages * sizeof(int3)); int3 *volumeStartpoint = (int3*) malloc(num_workingPackages * sizeof(int3)); size_t *volumePtr = (size_t*)malloc(num_workingPackages * sizeof(size_t)); //int *Abort_ptr = (int*) malloc(num_workingPackages * sizeof(int)); float3 volposition = regionOfInterestOffset; // Uebergabe der Parameter float inc = IMAGE_RESOLUTION; // koennte auch direkt umbenannt werden #ifdef debug_OutputMultiGpu printf("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n"); printf("~ Dividing workload between %i devices ~\n", num_devices ); printf("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n"); #endif size_t volume_size = (size_t)IMAGE_SIZE_XYZ.x * (size_t)IMAGE_SIZE_XYZ.y * (size_t)IMAGE_SIZE_XYZ.z * (size_t)sizeof(double); // = Groesse des Outputvolumens in Byte if (( IMAGE_SIZE_XYZ.y == 1 ) && ( IMAGE_SIZE_XYZ.z == 1 )){ selectedNumberGPUs = 1; num_workingPackages = 1; #ifdef debug_OutputMultiGpu printf( "IMAGE_SIZE_XYZ.y = IMAGE_SIZE_XYZ.z = 1 ==> Use only one GPU\n", num_devices); #endif } volumeStartpoint[0].x = 0; volumeStartpoint[0].y = 0; volumeStartpoint[0].z = 0; position[0].x = volposition.x; // Startposition position[0].y = volposition.y; position[0].z = volposition.z; std::vector resolutionZs(num_workingPackages, IMAGE_SIZE_XYZ.z / num_workingPackages); for (size_t i = 0; i < IMAGE_SIZE_XYZ.z % num_workingPackages; i++) resolutionZs[i]++; int i,j,k; for (i=0; i < num_workingPackages; i++ ) { #ifdef debug_OutputMultiGpu printf("Working Package [%i]\n", i); #endif if ( IMAGE_SIZE_XYZ.z > 1 ) { // Divide in Z-Direction #ifdef debug_OutputMultiGpu printf( "( IMAGE_SIZE_XYZ.z > 1 ) => Divide through %i WP in Z-Direction for %i GPUs\n", num_workingPackages, selectedNumberGPUs); #endif resolution[i].x = IMAGE_SIZE_XYZ.x; // Initialization resolution[i].y = IMAGE_SIZE_XYZ.y; resolution[i].z = resolutionZs[i]; if (i>0) { volumeStartpoint[i].x = 0; // Koordinaten als Startpunkt für Layer in einzelnen GPUs volumeStartpoint[i].y = 0; volumeStartpoint[i].z = volumeStartpoint[i-1].z + resolution[i-1].z; } volumePtr[i] = (size_t)((size_t)resolution[0].x * (size_t)resolution[0].y * (size_t)volumeStartpoint[i].z); //Startpunkt der Speicherstellen fuer das Outputvolumen #ifdef debug_OutputMultiGpu printf(" - volumeStartpoint[%i] = [%d %d %d]\n", i, volumeStartpoint[i].x, volumeStartpoint[i].y, volumeStartpoint[i].z); printf(" => volume_size[%i] = [%i %i %i]*double = %lld kB\n", i, resolution[i].x, resolution[i].y, resolution[i].z, ((uint64_t)resolution[i].x * (uint64_t)resolution[i].y * (uint64_t)resolution[i].z * (uint64_t)sizeof(double)) / ((uint64_t)1024)); printf(" => volumePtr[%i] = [%lld]\n", i, (uint64_t)volumePtr[i]); #endif } else { // Divide in Y-Direction #ifdef debug_OutputMultiGpu printf( "( IMAGE_SIZE_XYZ.z = 1 ) => Divide through %i in Y-Direction for %i GPUs\n", num_workingPackages, selectedNumberGPUs); #endif resolution[i].x = IMAGE_SIZE_XYZ.x; // Initialization resolution[i].z = IMAGE_SIZE_XYZ.z; if ( IMAGE_SIZE_XYZ.y % num_workingPackages == 0) { // Volume is divisible resolution[i].y = IMAGE_SIZE_XYZ.y / num_workingPackages; } else { // if not divisible, if (i != (num_workingPackages - 1)){ // increment each GPU slice by one resolution[i].y = IMAGE_SIZE_XYZ.y / num_workingPackages + 1; } else { // except the last one, which get the remaining Layers resolution[i].y = IMAGE_SIZE_XYZ.y % resolution[0].y; } } } #ifdef debug_OutputMultiGpu printf("WP[%i] on deviceId[%i]=deviceIndex[%i]\n", i, enableGPUs_ptr[(i % selectedNumberGPUs)], (i % selectedNumberGPUs)); printf(" - resolution[%i].x = %i\n", i, resolution[i].x ); printf(" - resolution[%i].y = %i\n", i, resolution[i].y ); printf(" - resolution[%i].z = %i\n", i, resolution[i].z ); #endif position[i].x = volposition.x; // Startposition position[i].y = volposition.y; position[i].z = volposition.z; if ( IMAGE_SIZE_XYZ.z > 1 ) position[i].z += i*inc*resolution[0].z; // Calculate Startpositions for the workload-pieces else position[i].y += i*inc*resolution[0].y; #ifdef debug_OutputMultiGpu printf(" - position[%i].x = %f\n", i, position[i].x ); printf(" - position[%i].y = %f\n", i, position[i].y ); printf(" - position[%i].z = %f\n", i, position[i].z ); #endif } // Create one thread per GPU ------------------------------------------------------------------------------------------------------------------------ thread_handle *pthread_handle = (thread_handle*)malloc(selectedNumberGPUs * sizeof(thread_handle)); #ifdef debug_OutputMultiGpu printf("-> pthread_handles for %i workingPackages created!\n", num_workingPackages); printf("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n"); #endif for ( i = 0; i < num_workingPackages; i++ ) { //printf("num_workingPackages = %i !\n", num_workingPackages); //printf("num_devices = %i\n", num_devices); //printf("(i %% num_devices) = %i !\n", (i % num_devices)); //printf("deviceId = enableGPUs_ptr[%i] = %i !\n", (i % num_devices), enableGPUs_ptr[(i % num_devices)]); //printf("deviceIndex = (i %% num_devices) = %i !\n", (i % num_devices)); // initialize control block pthread_handle[i].deviceId = enableGPUs_ptr[(i % selectedNumberGPUs)]; // Hier DeviceID der GPU setzen pthread_handle[i].deviceIndex = (i % selectedNumberGPUs); pthread_handle[i].aScan_ptr = aScan_ptr; if ( IMAGE_SIZE_XYZ.z > 1 ) pthread_handle[i].output_ptr = &output_ptr[volumePtr[i]]; // Startpoint for Outputvolume. //volumePtr[i] = (size_t)(resolution[0].x * resolution[0].y * volumeStartpoint[i].z); //Startpunkt der Speicherstellen fuer das Outputvolumen else //pthread_handle[i].output_ptr = &output_ptr[ i * resolution[0].x * resolution[0].y * resolution[0].z ]; // Startpoint for Outputvolume. [0] da nur der letzte eine andere Gr��e hat. pthread_handle[i].output_ptr = &output_ptr[ (size_t)resolution[0].x * (size_t)i * (size_t)resolution[0].y]; // Startpoint for Outputvolume. [0] da nur der letzte eine andere Groesse hat. Z spielt hier keine Rolle pthread_handle[i].Duration_ptr = Duration_ptr; pthread_handle[i].receiver_index_ptr = receiver_index_ptr; pthread_handle[i].emitter_index_ptr = emitter_index_ptr; pthread_handle[i].receiver_list_ptr = receiver_list_ptr; pthread_handle[i].receiver_list_Size = receiver_list_Size; pthread_handle[i].emitter_list_ptr = emitter_list_ptr; pthread_handle[i].emitter_list_Size = emitter_list_Size; pthread_handle[i].speed_vec_ptr = speed_vec_ptr; pthread_handle[i].SOSGrid_XYZ = SOSGrid_XYZ; pthread_handle[i].sosOffset = sosOffset; pthread_handle[i].SOS_RESOLUTION = SOS_RESOLUTION; pthread_handle[i].att_vec_ptr = att_vec_ptr; pthread_handle[i].aScanCount = aScanCount; pthread_handle[i].aScanLength = aScanLength, pthread_handle[i].inc = IMAGE_RESOLUTION; pthread_handle[i].res = resolution[i]; pthread_handle[i].sampleRate = sampleRate; pthread_handle[i].volposition = position[i]; //regionOfInterestOffset pthread_handle[i].num_threads = num_workingPackages; pthread_handle[i].fixedBlockDimensions = fixedBlockDimensions; // pthread_handle[i].debugMode = debugMode; pthread_handle[i].debugModeParameter = debugModeParameter; pthread_handle[i].SOSMode_3DVolume = SOSMode_3DVolume; pthread_handle[i].ATTMode_3DVolume = ATTMode_3DVolume; pthread_handle[i].SAFT_MODE = SAFT_MODE; pthread_handle[i].SAFT_VARIANT = SAFT_VARIANT; pthread_handle[i].SAFT_VARIANT_Size = SAFT_VARIANT_Size; Abort_ptr[i] = 0; // Initialisieren mit kein Fehler pthread_handle[i].Abort_ptr = &Abort_ptr[i]; } auto startAllThreads = std::chrono::steady_clock::now(); double diff_time = 0.0; #ifdef debug_OutputMultiGpu printf("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n"); printf("pthread_handles for %i devices initialized!\n", selectedNumberGPUs)-1; #endif // Zeitmessung ueber alle Threads //gettimeofday(&startAllThreads, NULL); #ifdef debug_OutputMultiGpu printf("-> pthread_handles for %i workingPackages initialized!\n", num_workingPackages); printf("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n"); #endif std::vector> futures; futures.resize(selectedNumberGPUs); for ( j = 0; j < num_devices_factor; j++ ) { for ( k = 0; k < selectedNumberGPUs; k++ ) { //for ( i = 0; i < num_devices; i++ ) { #ifdef debug_OutputMultiGpu printf("]~~~~> create thread (%i) [WP (%i) on Device (%i)] and execute thread_function!\n", (j*selectedNumberGPUs+k), (j*selectedNumberGPUs+k), enableGPUs_ptr[k]); #endif //create thread and start execution by calling thread_function //=========================================================================================== //pthread_create(&pthread_handle[(j*selectedNumberGPUs+k)].pthread, NULL, &thread_function, &pthread_handle[(j*selectedNumberGPUs+k)] ); //=========================================================================================== //new async threads futures[k] = std::async(std::forward>(thread_function), std::forward((void*)&pthread_handle[(j * selectedNumberGPUs + k)])); //forward used for perfect forwarding } //Synchronization and termination ------------------------------------------------------------------------------------------------------------------- for ( k = 0; k < selectedNumberGPUs; k++ ) { //wait for threads to finish processing //pthread_join(pthread_handle[(j*selectedNumberGPUs+k)].pthread, NULL); #ifdef debug_OutputMultiGpu printf("<~~~~[ joined thread (%i) [WP (%i) on Device (%i)]!\n", (j*selectedNumberGPUs+k), (j*selectedNumberGPUs+k), enableGPUs_ptr[k]); #endif //new async threads futures[k].wait(); //advantage: async are packaged tasks after c++ with os handling, and consistency handling (if destructor is called, it executes task) } } //gettimeofday(&stopAllThreads, NULL); auto stopAllThreads = std::chrono::steady_clock::now(); diff_time = std::chrono::duration_cast(stopAllThreads - startAllThreads).count(); // total duration in µs #ifdef debug_OutputPerformance printf("\n# NUM_VOXEL = %i * %i * %i = %i\n", IMAGE_SIZE_XYZ.x, IMAGE_SIZE_XYZ.y, IMAGE_SIZE_XYZ.z, IMAGE_SIZE_XYZ.x * IMAGE_SIZE_XYZ.y * IMAGE_SIZE_XYZ.z ); printf( "# aScanCount = %i\n", aScanCount); //printf("volposition.x = %f, .y=%f, .z=%f\n", volposition.x, volposition.y, volposition.z ); double performance_all = (((double)aScanCount * (double)IMAGE_SIZE_XYZ.x * (double)IMAGE_SIZE_XYZ.y * (double)IMAGE_SIZE_XYZ.z) / diff_time )/ 1000.0; printf("# Duration of main Processing (all GPUs): %.3f us\n", diff_time); printf("{~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~}\n"); printf("{~~~~~~~~~} Performance: %.6lf A-Scan * GVoxel/s {~~~~~~~~~}\n", performance_all); printf("{~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~}\n"); #endif Duration_ptr[0] = diff_time; // Return total duration in µs // Speicher wieder freigeben free (position); free (resolution); free (volumeStartpoint); free (volumePtr); //free (Abort_ptr); free (pthread_handle); #ifdef debug_OutputFunctions printf( "<== multithreaded_processing - End\n"); #endif } /** preintegrateAscans Determine maximal SampleWidth, matching to the resolution to be used for reconstruction, and integrate A-scan over an window of this SampleWidth */ void preintegrateAscans( float *aScan_ptr, ///< AScan-Daten float *AscansOut_ptr, ///< AScan-OutputDaten fuer Testrueckgabe float *speed_vec_ptr, ///< SoS Daten im Blockmode int aScanCount, ///< Anzahl der AScans die im Blockmode verarbeitet werden sollen int aScanLength, ///< Laenge der AscanDaten (normal 3000) float IMAGE_RESOLUTION, ///< Aufloesung des Bildbereichs float sampleRate, ///< Samplerate fuer AScans float debugMode, ///< Ausgabe im Debugmode -> Verschiedene Werte können ausgegeben werden float debugModeParameter ///< Parameter der mit fuer Debugmode uebermittelt werden kann ) { #ifdef debug_OutputFunctions printf( "==> preintegrateAscans - Start\n"); #endif float* AscanBuffer; AscanBuffer = (float*)malloc(aScanLength * sizeof(float)); // Zweiten AScan-Buffer erzeugen mit Länge eines AScans (z.B. 3000) // //Debuging: Test: Alle Daten uebertragen von Input-Ascans auf AscansOut_ptr[0..aScanLength] ueber AscanBuffer // for (int i = 0; i i (%3i) \n",i); // //AscansOut_ptr[i] = *(aScan_ptr+i); // AscanBuffer[i] = aScan_ptr[i]; // AscansOut_ptr[i] = AscanBuffer[i]; // } #ifdef preAscanIntegrationVersion1Michael // direkt übernommene Version von Michael Zapf int windowWidth = 0; /////////////////////// Integration über Anzahl "sampl" voxel /////////////////////// unsigned int i,j,sampl = 0; double i_buffer=0.0; /////xsum ueber diagonale der voxellänge sampl = (unsigned int)(ceil((float)1.7*(( IMAGE_RESOLUTION / speed_vec_ptr[0])/ (sampleRate)) /2)); //halbe Breite bestimmen windowWidth = (int)2*IMAGE_RESOLUTION/sampleRate/speed_vec_ptr[0]; #ifdef debug_preAscanIntegration printf( "~~~~~~~~~~~~~~~~~ V1 Michael ~~~~~~~~~~~~~~~~~~~~~~~~~~\n"); printf( " aScanCount = %5i\n",aScanCount); printf( " aScanLength = %5i\n",aScanLength); printf( " IMAGE_RESOLUTION = %12.10f\n",IMAGE_RESOLUTION); printf( " speed_vec_ptr[%3i] = %12.10f\n",i,speed_vec_ptr[i]); printf( " sampleRate = %12.10f\n\n",sampleRate); printf( " => windowWidth = %5i (Ganze Breite) \n",windowWidth); printf( " => sampl = %5i (Halbe Breite) \n",sampl); printf( "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n"); #endif printf( " => windowWidth = %5i (Ganze Breite) \n",windowWidth); printf( " => sampl = %5i (Halbe Breite) \n",sampl); // Zugriff über // sec_buffer, das mit INTERP_RATIO gestreckt ist // bufferz ist der vorher schon angelegte Buffer der gleichen Größe wie sec_buffer // sampl = ist die Breite des Additionsfensters für die Addition if ((debugMode == 0.0) && (debugModeParameter == 2.0)){ printf( "~~~~~~~~~~~~~~~~~~~~~ !!!Use Abs before Preintegrate Ascans!!! ~~~~~~~~~~~~~~~~~~~~~\n"); for (int j = 0; j Ascan (j=%3i) ==> j*aScanLength+0 = %i \n\n",j, j*aScanLength); #endif i_buffer = 0; for (i=0;i=0){ i_buffer = i_buffer - aScan_ptr[j*aScanLength+i-sampl]/(2*sampl); //AscanBuffer[i] = i_buffer / (sampl-(aScanLength)-i); // Michaels alter Code AscanBuffer[i] = i_buffer ; // passt so besser zu Anfangsstrategie } } //Alle Daten uebertragen von AscanBuffer auf AscansOut_ptr[0..aScanLength] for (int i = 0; i windowWidth = %5.2f (Ganze Breite) \n",windowWidth); printf( " => windowWidthHalf = %5.2f (Halbe Breite) \n",windowWidthHalf); printf( "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n"); printf ("\n 2. Testvariante mit Start und Endpkt-berechnung --> Neue Implementierung mit Zetrum in der Mitte \n#######################################################################################\n"); #endif //printf( " => windowWidth = %5i (Ganze Breite) \n",(int)ceil(windowWidth)); //printf( " => windowWidthHalf = %5.2f (Halbe Breite) \n",windowWidthHalf); for (int j = 0; j Ungerade ==> widthHalf_minus1 = %i \n",(int)windowWidthHalf_minus1); #endif for (int i = 0; i < aScanLength; i++){ // über gesamte Breite des A-scans gehen. i_start = i - (int)windowWidthHalf_minus1; i_end = i + (int)windowWidthHalf_minus1; // Grenzen einhalten if (i_start < 0) i_start=0; if (i_end > aScanLength-1) i_end =aScanLength-1; // Anzahl Sample bestimmen nSample = i_end-i_start+1; // +1 da erstes Element auch dazugehört. windowSum = 0; for (int k = i_start; k <= i_end; k++){ #ifdef debug_preAscanIntegration if ((i >= DebugSammleMin) && (i<=DebugSammleMax)){ printf("Index %3i: windowSum (%3i:%3i) += aScan_ptr(k=%i) %07.5f \n", i, i_start, i_end, k, windowSum); } #endif windowSum += aScan_ptr[j*aScanLength+k]; } // geteilt durch nur die genutzten Samples //AscanBuffer[i] = windowSum/ceil(nSample); //Michael teilt durch die gesamte Breite an Samples,auch wenn sie an dem //Rand nicht vorhanden/bzw = 0 sind. --> Abflachung am Rand AscanBuffer[i] = windowSum/ceil(windowWidth); #ifdef debug_preAscanIntegration //if ((i >= 1720) && (i<=1730)){ if ((i >= DebugSammleMin) && (i<=DebugSammleMax)){ printf("Index %3i: Summe über %5i Samples (%3i:%3i) = %07.5f \n", i, (int)nSample, i_start, i_end, AscanBuffer[i]); } #endif } } else if ((int)ceil(windowWidth)%2 == 0){ // Even / Gerade // Bei geraden Breiten symmetrisch mit den beiden äußeren Samplewerten zu je 1/2 gewichten. windowWidthHalf_minus1 = floor(( ceil(windowWidth)-1)/2); #ifdef debug_preAscanIntegration printf( " ==> Gerade ==> widthHalf_minus1 = %i \n", (int)windowWidthHalf_minus1); #endif for (int i = 0; i < aScanLength; i++){ // über gesamte Breite des A-scans gehen. i_start = i - (int)windowWidthHalf_minus1; i_end = i + (int)windowWidthHalf_minus1; // Grenzen einhalten if (i_start < 0) i_start=0; if (i_end > aScanLength-1) i_end =aScanLength-1; // Anzahl Sample bestimmen nSample = i_end-i_start+1; // +1 da erstes Element auch dazugehört. windowSum = 0; for (int k = i_start; k <= i_end; k++){ #ifdef debug_preAscanIntegration //if ((i >= 1720) && (i<=1730)){ if ((i >= DebugSammleMin) && (i<=DebugSammleMax)){ printf("Index %3i: windowSum (%3i:%3i) += aScan_ptr(k=%i) %07.5f \n", i, i_start, i_end, k, windowSum); } #endif windowSum += aScan_ptr[j*aScanLength+k]; } // geteilt wird durch nur die genutzten Samples //AscanBuffer[i] = windowSum/ceil(nSample); //Michael teilt durch die gesamte Breite an Samples,auch wenn sie an dem //Rand nicht vorhanden/bzw = 0 sind.--> Abflachung am Rand windowSum = windowSum/ceil(windowWidth); // Halbe Samplewerte an Grenzen miteinberechen aber absolute Grenzen einhalten if (i_start >= 0){ windowSum = windowSum + aScan_ptr[j*aScanLength+(i_start-1)]/(2*ceil(windowWidth)); // Linken Nachbarn zu 1/2 mit dazunehmen nSample = nSample + 0.5; } if (i_end < aScanLength-1){ windowSum = windowSum + aScan_ptr[j*aScanLength+(i_end+1)]/(2*ceil(windowWidth)); // Rechten Nachbarn zu 1/2 mit dazunehmen nSample = nSample + 0.5; } AscanBuffer[i] = windowSum; #ifdef debug_preAscanIntegration //if ((i >= 1720) && (i<=1730)){ if ((i >= DebugSammleMin) && (i<=DebugSammleMax)){ printf("Index %3i: Summe über %5.1f Samples (%3i:%3i) = %07.5f \n", i, nSample, i_start, i_end, AscanBuffer[i]); } #endif } } // Transfer Data from Buffer to Memory regions for (int i = 0; i < aScanLength; i++){ //printf( " i (%4i) = %6.3f ",i,AscanBuffer[i]); //AscansOut_ptr[i] = *(aScan_ptr+i); //AscanBuffer[i] = aScan_ptr[i]; aScan_ptr[j*aScanLength+i] = AscanBuffer[i]; // Write in A-scans Memory AscansOut_ptr[j*aScanLength+i] = AscanBuffer[i]; // Also write back for Matlab } } #ifdef debug_preAscanIntegration printf ("#######################################################################################\n\n"); #endif #endif #ifdef debug_preAscanIntegration printf( " \n\n\n\n "); #ifdef debug_OutputVariables // Ausgabe einzelner AScan-Samples der �bergabewerte //float *aScan_ptr; //aScan_ptr = (float*)mxGetPr(AScan); //int aScanSampleCountPerReceiver=3000; //printf( " -> AScan (%3i) [ 0: 2] = [%f %f %f]\n",0 , *(aScan_ptr+0), *(aScan_ptr+1), *(aScan_ptr+2)); printf( " -> AScan (%3i) [1726:1729] = [%f %f %f %f]\n",0 , *(aScan_ptr+1726),*(aScan_ptr+1726+1), *(aScan_ptr+1726+2), *(aScan_ptr+1726+3)); // Ale drei Moeglichkeiten koennen angewandt werden //*(AscansOut_ptr+1726) = *(aScan_ptr+1726); //*(AscansOut_ptr+1726+1) = 0; //AscansOut_ptr[1726+2] = (float)*(aScan_ptr+1726); printf( " -> AScan (%3i) [1726:1729] = [%f %f %f %f]\n",0 , *(aScan_ptr+1726),*(aScan_ptr+1726+1), *(aScan_ptr+1726+2), *(aScan_ptr+1726+3)); //printf( " -> AScan (%3i) [2997:2999] = [%f %f %f]\n",0 , *(aScan_ptr+2997), *(aScan_ptr+2998), *(aScan_ptr+2999)); //printf( " -> AScan (%3i) [ 0: 2] = [%f %f %f]\n",1 , aScan_ptr[3000], aScan_ptr[3001], aScan_ptr[3002]); //printf( " -> AScan (%3i) [2997:2999] = [%f %f %f]\n",1 , aScan_ptr[5997], aScan_ptr[5998], aScan_ptr[5999]); //printf( " -> AScan (%3i) [ 0: 2] = [%f %f %f]\n",156 , aScan_ptr[0+(156*aScanSampleCountPerReceiver)], aScan_ptr[1+(156*aScanSampleCountPerReceiver)], aScan_ptr[2+(156*aScanSampleCountPerReceiver)]); //printf( " -> AScan (%3i) [2997:2999] = [%f %f %f]\n",156 , aScan_ptr[2997+(156*aScanSampleCountPerReceiver)], aScan_ptr[2998+(156*aScanSampleCountPerReceiver)], aScan_ptr[2999+(156*aScanSampleCountPerReceiver)]); #endif #endif free(AscanBuffer); #ifdef debug_OutputFunctions printf( "<== preintegrateAscans - End\n"); #endif } const size_t* GetDimensions(const Matrix_t& matrix) { return matrix.Dims; } const void* GetPr(const Matrix_t& matrix){ return matrix.Data; } size_t GetNumberOfDimensions(const Matrix_t& matrix){ return matrix.NumberOfDims; } size_t GetNumberOfElements(const Matrix_t& matrix){ return matrix.DataSize; } Matrix_t SAFT_TOFI(std::vector& params){ #ifdef debug_OutputFunctions printf( "==> mexFunction - Start\n"); #endif #ifdef debug_OutputFormat_German setlocale(LC_NUMERIC, "de_DE"); // German Format , instead . for numbers #endif size_t AScan_Nx, AScan_Mx, pix_vect_Nx, pix_vect_Mx, receiver_index_Nx, receiver_index_Mx, emitter_index_Nx, emitter_index_Mx, receiver_list_Nx, receiver_list_Mx, emitter_list_Nx, emitter_list_Mx, SAFT_mode_Nx, SAFT_mode_Mx, SAFT_variant_Nx, SAFT_variant_Mx, speed_Nx, speed_Mx, SOSGrid_Xx, SOSGrid_Yx, SOSGrid_Zx, sos_startPoint_Nx, sos_startPoint_Mx, sos_res_Nx, sos_res_Mx, attVolume_Nx,attVolume_Mx, ATTGrid_Xx, ATTGrid_Yx, ATTGrid_Zx, res_Nx, res_Mx, timeint_Nx, timeint_Mx, IMAGE_XYZ_Nx, IMAGE_XYZ_Mx, IMAGE_SUM_Xx, IMAGE_SUM_Yx, IMAGE_SUM_Zx, BlockDim_XYZ_Nx, BlockDim_XYZ_Mx, GPUs_Nx, GPUs_Mx, dbgMode_Nx,dbgMode_Mx ; int aScanCount; int aScanLength; float *aScan_ptr; int3 IMAGE_SIZE_XYZ; int3 BlockDim_XYZ; bool SOSMode_3DVolume; // Mode of SOS use: Grid (1) or Block (0) --> SOSGrid_XYZ, SOS_RESOLUTION, sosOffset not neccessary bool ATTMode_3DVolume; // Mode of SOS use: Grid (1) or Block (0) --> ATTGrid_XYZ not neccessary int SAFT_MODE; int *SAFT_VARIANT; // Variances of SAFT int SAFT_VARIANT_Size; // Size of Varainces int3 SOSGrid_XYZ; // Size of SOSGrid int3 ATTGrid_XYZ; // Size of ATTGrid float3 regionOfInterestOffset; // Startpoint float3 sosOffset; // Startpoint SoS float IMAGE_RESOLUTION; // Aufloesung float SOS_RESOLUTION; // Aufloesung float sampleRate; // Samplerate für AScans int selectedNumberGPUs; // Anzahl der genutzten GPUs durch uebergebene Groesse der Ausgewaehlten GPUs float debugMode; float debugModeParameter; // check number of arguments... // Anzahl der Argumente überprüfen #ifdef debug_OutputParameter printf( "Number of Arguments: Input(nrhs) = %i Output(nlhs) = %i\n", nrhs, nlhs); #endif if (params.size() != 19) { printf( " \n"); printf( " Inputparameter \n"); printf( " In[n] Meaning [Row N x Col M] Type\n"); printf( " =============================================================================\n"); printf( " 1-prhs[0] AScan-Data [3000?xnAscans] single\n"); printf( " 2-prhs[1] IMAGE_STARTPOINT_S [1x3] single\n"); printf( " 3-prhs[2] receiver_index [1xnAscans] uint16\n"); printf( " 4-prhs[3] emitter_index [1xnAscans] uint16\n"); printf( " 5-prhs[4] receiver_list [3xnReceiver] single\n"); printf( " 6-prhs[5] emitter_list [3xnEmitter] single\n"); printf( " 7-prhs[6] SAFT_mode [1x1] uint32\n"); printf( " 8-prhs[7] SAFT_variant [1x6] uint32\n"); printf( " Standard: [1 1 1 1 0 0] \n"); printf( " -> Ascan Preintegration\n"); printf( " -> Ascan Interpolation\n"); printf( " -> Preprocessing SOS&ATT 3D Volume Interpolation\n"); printf( " -> Reconstruction SOS&ATT 3D Volume Interpolation\n"); printf( " -> not yet\n"); printf( " -> not yet\n"); printf( " 9-prhs[8] SOSVolume [SOS_XxYxZ] single in m/s\n"); printf( " 10-prhs[9] SOS_STARTPOINT_S [1x3] single\n"); printf( " 11-prhs[10] SOS_RESOLUTION_S [1x1] single\n"); printf( " 12-prhs[11] ATTVolume [ATT_XxYxZ] single in dB/cm\n"); printf( " 13-prhs[12] IMAGE_RESOLUTION_S [1x1] single\n"); printf( " 14-prhs[13] TimeInterval_S [1x1] single\n"); printf( " 15-prhs[14] IMAGE_XYZ [1x3] uint32\n"); printf( " 16-prhs[15] IMAGE_SUM [Output_XxYxZ] double\n"); printf( " 17-prhs[16] BlockDim_XYZ (GPU) [1x3] uint32\n"); printf( " 18-prhs[17] GPUs (DeviceNr GPU) [1xn] uint32\n"); printf( " 19-prhs[18] dbgMode,dbgModeParam [1x2] single\n"); printf( " ==============================================================================\n"); printf( "\n"); printf( " Outputparameter \n"); printf( " Out[n] Meaning \n"); printf( " ================================================================================================= \n"); printf( " plhs[0] = Output_Voxels = mxCreateNumericArray ( [IMAGE_XYZ] , mxDOUBLE_CLASS, mxREAL); \n"); printf( " plhs[1] = Duration = mxCreateDoubleMatrix ( [nGPUs+1, 1] , mxREAL); \n"); printf( " plhs[2] = Output_Ascans = mxCreateNumericMatrix( [3000?,nAscans], mxSINGLE_CLASS, mxREAL); \n"); printf( " ================================================================================================= \n"); printf("Wrong number of input arguments. Should be 19."); } // assign input arguments... // Bestimme die Eingangswerte const Matrix_t&AScan = params [0]; // AScan-Data const Matrix_t&pix_vect = params [1]; // Image Startpoint (IMAGE_STARTPOINT_S) const Matrix_t&receiver_index = params [2]; // Index Data for Receiver-Position Data const Matrix_t&emitter_index = params [3]; // Index Data for Emitter-Position Data const Matrix_t&receiver_list = params [4]; // Assignment Index to Receiver-Position Data const Matrix_t&emitter_list = params [5]; // Assignment Index to Emitter-Position Data const Matrix_t&SAFT_mode = params [6]; // SOS?, ATT? const Matrix_t&SAFT_variant = params [7]; // Differnt Mode-Parameter for Reconstruction const Matrix_t&speed = params [8]; // Speed of Sound Data (Single, SoS-Grid) const Matrix_t&sos_startPoint = params [9]; // Startpoint of Speed of Sound Grid const Matrix_t&sos_res = params [10]; // SoS Grid Resolution const Matrix_t&attVolume = params [11]; // Attenuation Data (Single, SoS-Grid) const Matrix_t&res = params [12]; // Output Volume Resolution const Matrix_t&timeint = params [13]; // 1/Sample-Rate const Matrix_t&IMAGE_XYZ = params [14]; // Output Volume Size XYZ const Matrix_t&IMAGE_SUM = params [15]; // Volume from previous Call const Matrix_t&BlockDim = params [16]; // Block Dimension to use for GPU const Matrix_t&GPUs = params [17]; // Welche GPUs sollen genutzt werden? const Matrix_t&dbgMode = params [18]; // DebugMode and DebugMode-Parameter // check data types and assign variables... // Ueberpruefe die Datentypen und lege Variablen fest //================================================================================================================ Check Dimensions of input parameters #ifdef debug_OutputParameter printf( "\n"); printf( "In[n] Meaning [Row N x Col M] \n"); printf( "=================================================================================================\n"); #endif //====================================================================== 1.Input Parameter - Check AScan AScan_Nx = GetDimensions(AScan)[0]; // Reihen N ermitteln AScan_Mx = GetDimensions(AScan)[1]; // Spalten M ermitteln aScanCount = AScan_Mx; aScanLength = AScan_Nx; #ifdef debug_OutputParameter printf( "prhs[0] Ascan-Data [%ix%i]\n", AScan_Nx , AScan_Mx); #endif //printf( "mxGetNumberOfDimensions(AScan)=%i\n", mxGetNumberOfDimensions(AScan)); if ((aScanCount > 65535)) // new 2019: increasing the limit of the A-Scan block size. however this is limited by the datatype of unsigned short which is used for a pointer. { printf( " -> AScanBlock size = %i\n", aScanCount); printf( "AScanBlock size might be too large (=> 2^16)!!!"); } if ((aScanCount > 1)) #ifdef debug_OutputParameter printf( " -> Blockmode with [%i x %i]\n", AScan_Nx, aScanCount); #endif // if(!(mxIsSingle(AScan))) // printf("AScans must be Single"); aScan_ptr = (float*)GetPr(AScan); #ifdef debug_OutputVariables // Ausgabe einzelner AScan-Samples der übergabewerte //int aScanSampleCountPerReceiver=3000; //printf( " -> AScan (%3i) [ 0: 2] = [%f %f %f]\n",0 , *(aScan_ptr+0), *(aScan_ptr+1), *(aScan_ptr+2)); //printf( " -> AScan (%3i) [2997:2999] = [%f %f %f]\n",0 , *(aScan_ptr+2997), *(aScan_ptr+2998), *(aScan_ptr+2999)); //printf( " -> AScan (%3i) [ 0: 2] = [%f %f %f]\n",1 , aScan_ptr[3000], aScan_ptr[3001], aScan_ptr[3002]); //printf( " -> AScan (%3i) [2997:2999] = [%f %f %f]\n",1 , aScan_ptr[5997], aScan_ptr[5998], aScan_ptr[5999]); //printf( " -> AScan (%3i) [ 0: 2] = [%f %f %f]\n",156 , aScan_ptr[0+(156*aScanSampleCountPerReceiver)], aScan_ptr[1+(156*aScanSampleCountPerReceiver)], aScan_ptr[2+(156*aScanSampleCountPerReceiver)]); //printf( " -> AScan (%3i) [2997:2999] = [%f %f %f]\n",156 , aScan_ptr[2997+(156*aScanSampleCountPerReceiver)], aScan_ptr[2998+(156*aScanSampleCountPerReceiver)], aScan_ptr[2999+(156*aScanSampleCountPerReceiver)]); #endif //====================================================================== 2.Input Parameter - Check IMAGE_STARTPOINT_S / pix_vect pix_vect_Nx = GetDimensions(pix_vect)[0]; // Reihen N ermitteln pix_vect_Mx = GetDimensions(pix_vect)[1]; // Spalten M ermitteln regionOfInterestOffset.x = *((float*)GetPr(pix_vect)); regionOfInterestOffset.y = *((float*)GetPr(pix_vect)+1); regionOfInterestOffset.z = *((float*)GetPr(pix_vect)+2); #ifdef debug_OutputParameter printf( "prhs[1] IMAGE_STARTPOINT_S [%ix%i] = [%f x %f x %f]\n", pix_vect_Nx , pix_vect_Mx, regionOfInterestOffset.x, regionOfInterestOffset.y, regionOfInterestOffset.z); #endif if (!(pix_vect_Nx == 1)||!(pix_vect_Mx == 3)) printf(" -> Dimension of IMAGE_STARTPOINT_S must be [1 x 3]"); if ((pix_vect_Nx > 1)) printf( " -> No Blockmode [%i x 3] allowed for IMAGE_STARTPOINT_S\n", pix_vect_Nx); // if(!(mxIsSingle(pix_vect))) // printf(" -> IMAGE_STARTPOINT_S must be Single"); //====================================================================== 3.Input Parameter - Check Receiver Index receiver_index_Nx = GetDimensions(receiver_index)[0]; // Reihen N ermitteln receiver_index_Mx = GetDimensions(receiver_index)[1]; // Spalten M ermitteln #ifdef debug_OutputParameter printf( "prhs[2] receiver_index [%ix%i]\n", receiver_index_Nx , receiver_index_Mx); #endif if (!(receiver_index_Nx == 1)) printf(" -> Dimension of receiver_index must be [1 x M]"); if (!(receiver_index_Mx == aScanCount)){ printf (" -> aScanCount(%i)!= M(%i)\n", aScanCount, receiver_index_Mx); printf(" -> Dimension of receiver_index has different size as Ascan-Data\n"); } // if (!(receiver_index_Mx == 1)) #ifdef debug_OutputParameter printf( " -> Blockmode with [1 x %i]\n", receiver_index_Mx); #endif // if(!(mxIsUint16(receiver_index))) // printf(" -> receiver_index must be Uint16"); // Ausgabe einzelner Geometriedaten der Uebergabewerte mit verschiedenen Varianten unsigned short *receiver_index_ptr; receiver_index_ptr = (unsigned short*)GetPr(receiver_index); #ifdef debug_OutputVariables if ((receiver_index_Mx > 1)) { printf( " -> receiver_index: %i = [%i]\n",1 , *(receiver_index_ptr+0)); printf( " -> receiver_index: %i = [%i]\n",2 , *(receiver_index_ptr+1)); printf( " -> receiver_index: %i = [%i]\n",3 , *(receiver_index_ptr+2)); } else printf( " -> receiver_index: %i = [%i]\n",1 , *(receiver_index_ptr+0)); #endif //====================================================================== 4.Input Parameter - Check Emitter Index emitter_index_Nx = GetDimensions(emitter_index)[0]; // Reihen N ermitteln emitter_index_Mx = GetDimensions(emitter_index)[1]; // Spalten M ermitteln #ifdef debug_OutputParameter printf( "prhs[3] emitter_index [%ix%i]\n", emitter_index_Nx , emitter_index_Mx); #endif if (!(emitter_index_Nx == 1)) printf(" -> Dimension of emitter_index must be [1 x M]"); if (!(emitter_index_Mx == aScanCount)){ printf (" -> aScanCount(%i)!= M(%i)\n", aScanCount, emitter_index_Mx); printf(" -> Dimension of emitter_index has different size as Ascan-Data\n"); } // if (!(emitter_index_Mx == 1)) #ifdef debug_OutputParameter printf( " -> Blockmode with [1 x %i]\n", emitter_index_Mx); #endif // if(!(mxIsUint16(emitter_index))) // printf(" -> emitter_index must be Uint16"); // Ausgabe einzelner Geometriedaten der Uebergabewerte mit verschiedenen Varianten unsigned short * emitter_index_ptr = (unsigned short*)GetPr(emitter_index); #ifdef debug_OutputVariables if ((receiver_index_Mx > 1)) { printf( " -> emitter_index: %i = [%i]\n",1 , *(emitter_index_ptr+0)); printf( " -> emitter_index: %i = [%i]\n",2 , *(emitter_index_ptr+1)); printf( " -> emitter_index: %i = [%i]\n",3 , *(emitter_index_ptr+2)); } else printf( " -> emitter_index: %i = [%i]\n",1 , *(emitter_index_ptr+0)); #endif //====================================================================== 5.Input Parameter - Check receiver_list receiver_list_Nx = GetDimensions(receiver_list)[0]; // Reihen N ermitteln receiver_list_Mx = GetDimensions(receiver_list)[1]; // Spalten M ermitteln #ifdef debug_OutputParameter printf( "prhs[4] receiver_list [%ix%i]\n", receiver_list_Nx , receiver_list_Mx); #endif if (!(receiver_list_Nx == 3)) printf(" -> Dimension of receiver_list must be [3 x M]"); // if(!(mxIsSingle(receiver_list))) // printf(" -> receiver_list must be Single"); // Ausgabe einzelner Geometriedaten der Uebergabewerte mit verschiedenen Varianten float *receiver_list_ptr; receiver_list_ptr = (float*)GetPr(receiver_list); #ifdef debug_OutputVariables if ((receiver_list_Mx > 1)) { printf( " -> receiver_list-Position: %i = [%f %f %f]\n",1 , *(receiver_list_ptr+0), *(receiver_list_ptr+1), *(receiver_list_ptr+2)); printf( " -> receiver_list-Position: %i = [%f %f %f]\n",2 , *(receiver_list_ptr+3), *(receiver_list_ptr+4), *(receiver_list_ptr+5)); } else printf( " -> receiver_list-Position: %i = [%f %f %f]\n",1 , *(receiver_list_ptr+0), *(receiver_list_ptr+1), *(receiver_list_ptr+2)); #endif //====================================================================== 6.Input Parameter - Check emitter_list emitter_list_Nx = GetDimensions(emitter_list)[0]; // Reihen N ermitteln emitter_list_Mx = GetDimensions(emitter_list)[1]; // Spalten M ermitteln // emitter_list gibt die maximale Anzahl an Emittern die in diesem Block vorkommen können wieder! #ifdef debug_OutputParameter printf( "prhs[5] emitter_list [%ix%i]\n", emitter_list_Nx , emitter_list_Mx); #endif if (!(emitter_list_Nx == 3)) printf(" -> Dimension of emitter_list must be [3 x M]"); // if(!(mxIsSingle(receiver_list))) // printf(" -> emitter_list must be Single"); // Ausgabe einzelner Geometriedaten der übergabewerte mit verschiedenen Varianten float *emitter_list_ptr; emitter_list_ptr = (float*)GetPr(emitter_list); #ifdef debug_OutputVariables if ((emitter_list_Mx > 1)) { printf( " -> emitter_list-Position: %i = [%f %f %f]\n",1 , *(emitter_list_ptr+0), *(emitter_list_ptr+1), *(emitter_list_ptr+2)); printf( " -> emitter_list-Position: %i = [%f %f %f]\n",2 , *(emitter_list_ptr+3), *(emitter_list_ptr+4), *(emitter_list_ptr+5)); } else printf( " -> emitter_list-Position: %i = [%f %f %f]\n",1 , *(emitter_list_ptr+0), *(emitter_list_ptr+1), *(emitter_list_ptr+2)); #endif //====================================================================== 7.Input Parameter - Check SAFT_mode SAFT_mode_Nx = GetDimensions(SAFT_mode)[0]; // Reihen N ermitteln SAFT_mode_Mx = GetDimensions(SAFT_mode)[1]; // Spalten M ermitteln SAFT_MODE = *((int*)GetPr(SAFT_mode)); #ifdef debug_OutputParameter printf( "prhs[6] SAFT_MODE [%ix%i] = [%i]\n", SAFT_mode_Nx , SAFT_mode_Mx, SAFT_MODE); #endif if (!(SAFT_mode_Nx == 1)) printf(" -> Dimension of SAFT_MODE must be [1 x 1]"); // if(!(mxIsUint32(SAFT_mode))) // printf(" -> SAFT_MODE must be Uint32"); #ifdef debug_OutputParameter printf ( "\e[7;37m ======================================================================================== \e[0m\n"); printf ( "\e[7;37m Used SAFT (GPU-Version):\e[0m"); #endif switch (SAFT_MODE) { case 0: SOSMode_3DVolume = false; ATTMode_3DVolume = false; //printf ( "\e[7;37m Standard SAFT without correction (-SOS -ATT) (%i,%i) \e[0m", SOSMode_3DVolume, ATTMode_3DVolume); printf (" -> AscanIndexVersion only make sense with SOS or SOS and ATT Volume => exit"); break; case 1: SOSMode_3DVolume = true; ATTMode_3DVolume = false; //printf ( "\e[7;37m + Speed of sound correction - Attenuation correction (%i,%i) \e[0m", SOSMode_3DVolume, ATTMode_3DVolume); break; case 2: SOSMode_3DVolume = true; ATTMode_3DVolume = true; //printf ( "\e[7;37m + Speed of sound correction + Attenuation correction (%i,%i) \e[0m", SOSMode_3DVolume, ATTMode_3DVolume); break; case 3: //SOSMode_3DVolume = true; ATTMode_3DVolume = true; //printf ( "\e[7;37m SAFT_MODE = 3 \e[0m", SOSMode_3DVolume, ATTMode_3DVolume); printf(" -> not implemented => exit"); break; case 4: //SOSMode_3DVolume = false; ATTMode_3DVolume = false; //printf ( "\e[7;37m SAFT_MODE = 4 \e[0m", SOSMode_3DVolume, ATTMode_3DVolume); printf(" -> not implemented => exit"); break; default: SOSMode_3DVolume = false; ATTMode_3DVolume = false; //printf ( " -> SAFT_MODE %i is out of range [0..3] => use Standard SAFT\n", SAFT_MODE); //printf ( "\e[7;37m Standard SAFT without correction (-SOS -ATT) (%i,%i) \e[0m", SOSMode_3DVolume, ATTMode_3DVolume); break; } #ifdef debug_OutputParameter printf ( "\n\e[7;37m ======================================================================================== \e[0m\n"); #else //printf ( "\n"); #endif //====================================================================== 8.Input Parameter - Check SAFT_variant SAFT_variant_Nx = GetDimensions(SAFT_variant)[0]; // Reihen N ermitteln SAFT_variant_Mx = GetDimensions(SAFT_variant)[1]; // Spalten M ermitteln SAFT_VARIANT = (int*)GetPr(SAFT_variant); SAFT_VARIANT_Size = SAFT_variant_Mx; #ifdef debug_OutputParameter //printf( "prhs[7] SAFT_VARIANT [%ix%i] = [%i %i %i %i %i %i]\n", SAFT_variant_Nx , SAFT_variant_Mx, SAFT_VARIANT[0], SAFT_VARIANT[1], SAFT_VARIANT[2], SAFT_VARIANT[3], SAFT_VARIANT[4], SAFT_VARIANT[5]); printf(" -> Ascan Preintegration = [%i]\n", SAFT_VARIANT[SAFT_VARIANT_AscanPreintegration]); printf(" -> Ascan Interpolation = [%i]\n", SAFT_VARIANT[SAFT_VARIANT_AscanInterpolation]); printf(" -> Preprocessing SOS&ATT 3D Volume Interpolation = [%i]\n", SAFT_VARIANT[SAFT_VARIANT_3DVolumeInterpolationAtPreprocessing]); printf(" -> Reconstruction SOS&ATT 3D Volume Interpolation = [%i]\n", SAFT_VARIANT[SAFT_VARIANT_3DVolumeInterpolationAtReconstruction]); printf(" -> Standard deviation (STD) not yet = [%i]\n", SAFT_VARIANT[SAFT_VARIANT_CalcStandardDeviation]); printf(" -> Sum up over boarder Indices not yet = [%i]\n", SAFT_VARIANT[SAFT_VARIANT_SumUpOverBoarderIndices]); #endif // if(!(mxIsUint32(SAFT_variant))) // printf(" -> SAFT_VARIANT must be Uint32"); if (!(SAFT_variant_Nx == 1)||!(SAFT_variant_Mx == 6)) printf(" -> Dimension of SAFT_VARIANT must be [1 x 6]"); //====================================================================== 9.Input Parameter - Check for SOS volume speed_Nx = GetDimensions(speed)[0]; // Reihen N ermitteln speed_Mx = GetDimensions(speed)[1]; // Spalten M ermitteln float Sos = *((float*)GetPr(speed)); float *speed_vec_ptr; speed_vec_ptr = (float*)GetPr(speed); // Pointer für SoSDaten ermitteln // if(!(mxIsSingle(speed))) // printf(" -> SOSVolume must be Single"); if (SOSMode_3DVolume == true) // SOS correction need 3D Volume { if (GetNumberOfDimensions(speed) == 3){ #ifdef debug_OutputParameter printf( "prhs[8] SOSVolume [%ix%ix%i]\n", mxGetDimensions(speed)[0] , mxGetDimensions(speed)[1], mxGetDimensions(speed)[2]); printf( " -> use SoS-Mode with SoS-Correction per Path with SOS 3D Volume\n"); #endif if (!( (speed_Nx > 1)&&(speed_Mx > 1) )){ printf( " -> SOSGrid_XYZ.x and SOSGrid_XYZ.y must be > 1 for SOS Correction with 3D Volume!!!"); } } else if (GetNumberOfDimensions(speed) == 2){ printf ( "prhs[8] SOSVolume [%ix%i]\n",(int)GetDimensions(speed)[0] , (int)GetDimensions(speed)[1]); printf( " -> SOSVolume is not a 3D Volume as expected!"); } SOSGrid_Xx = GetDimensions(speed)[0]; // SOSGrid_X ermitteln SOSGrid_Yx = GetDimensions(speed)[1]; // SOSGrid_Y ermitteln SOSGrid_Zx = GetDimensions(speed)[2]; // SOSGrid_Z ermitteln SOSGrid_XYZ.x = SOSGrid_Xx; SOSGrid_XYZ.y = SOSGrid_Yx; SOSGrid_XYZ.z = SOSGrid_Zx; if ((SOSGrid_XYZ.x > 128)||(SOSGrid_XYZ.y > 128)|| (SOSGrid_XYZ.z > 128)){ printf ( " -> SOSGrid_XYZ [%i x %i x %i]\n", (int)SOSGrid_Xx, (int)SOSGrid_Yx, (int)SOSGrid_Zx); printf ( " Warning -> SOSGrid_XYZ.x, SOSGrid_XYZ.y, SOSGrid_XYZ.z > 128!!! --> can be problematic due to memory requirement\n"); } #ifdef debug_OutputVariables printf(" -> SoS-Data[1-3] = [%f %f %f]\n",speed_vec_ptr[0], speed_vec_ptr[1], speed_vec_ptr[2]); //int speedOfSoundFieldIndex = (currentVoxel.z * SOSGrid_XYZ.y + currentVoxel.y) * SOSGrid_XYZ.x + currentVoxel.x; int speedOfSoundFieldIndex = (0 * SOSGrid_XYZ.y + 1) * SOSGrid_XYZ.x + 9; printf(" -> SoS-Data[%i-%i] = [%f %f %f]\n",speedOfSoundFieldIndex, (speedOfSoundFieldIndex+2), speed_vec_ptr[speedOfSoundFieldIndex + 0], speed_vec_ptr[speedOfSoundFieldIndex + 1], speed_vec_ptr[speedOfSoundFieldIndex + 2]); speedOfSoundFieldIndex = (4 * SOSGrid_XYZ.y + 37) * SOSGrid_XYZ.x + 9; printf(" -> SoS-Data[%i-%i] = [%f %f %f]\n",speedOfSoundFieldIndex, (speedOfSoundFieldIndex+2), speed_vec_ptr[speedOfSoundFieldIndex + 0], speed_vec_ptr[speedOfSoundFieldIndex + 1], speed_vec_ptr[speedOfSoundFieldIndex + 2]); int SOSGrid_END = (SOSGrid_XYZ.x * SOSGrid_XYZ.y * SOSGrid_XYZ.z); printf(" -> SoS-Data[%i-%i] = [%f %f %f]\n",(SOSGrid_END-3), (SOSGrid_END-1), speed_vec_ptr[SOSGrid_END-3], speed_vec_ptr[SOSGrid_END-2], speed_vec_ptr[SOSGrid_END-1]); #endif } //====================================================================== 10.Input Parameter - Check SoS Startpoint sos_startPoint_Nx = GetDimensions(sos_startPoint)[0]; // Reihen N ermitteln sos_startPoint_Mx = GetDimensions(sos_startPoint)[1]; // Spalten M ermitteln sosOffset.x = *((float*)GetPr(sos_startPoint)); sosOffset.y = *((float*)GetPr(sos_startPoint)+1); sosOffset.z = *((float*)GetPr(sos_startPoint)+2); #ifdef debug_OutputParameter printf( "prhs[9] SOS_STARTPOINT_S [%ix%i] = [%f x %f x %f]\n", sos_startPoint_Nx , sos_startPoint_Mx, sosOffset.x, sosOffset.y, sosOffset.z); #endif if (!(sos_startPoint_Nx == 1)||!(sos_startPoint_Mx == 3)) printf(" -> Dimension of SOS_STARTPOINT_S must be [1 x 3]"); if ((sos_startPoint_Nx > 1)) printf( " -> No Blockmode [%i x 3] allowed for SOS_STARTPOINT_S\n", sos_startPoint_Nx); // if(!(mxIsSingle(sos_startPoint))) // printf(" -> SOS_STARTPOINT_S must be Single"); //====================================================================== 11.Input Parameter - Check SoS_RESOLUTION / sos_res if (SOSMode_3DVolume == true){ sos_res_Nx = GetDimensions(sos_res)[0]; // Reihen N ermitteln sos_res_Mx = GetDimensions(sos_res)[1]; // Spalten M ermitteln SOS_RESOLUTION = *((float*)GetPr(sos_res)); #ifdef debug_OutputParameter printf( "prhs[10] SOS_RESOLUTION_S [%ix%i] = [%f]\n", sos_res_Nx , sos_res_Mx, SOS_RESOLUTION); #endif if (!(sos_res_Nx == 1)) printf(" -> Dimension of SOS_RESOLUTION_S must be [1 x 1]"); if ((sos_res_Mx > 1)) printf( " -> No Blockmode allowed for SOS_RESOLUTION_S! [1 x %i]\n", sos_res_Mx); // if(!(mxIsSingle(sos_res))) // printf(" -> SOS_RESOLUTION_S must be Single"); } //====================================================================== 12.Input Parameter - Check for ATTVolume / Attenuation-Data attVolume_Nx = GetDimensions(attVolume)[0]; // Reihen N ermitteln attVolume_Mx = GetDimensions(attVolume)[1]; // Spalten M ermitteln float *att_vec_ptr; att_vec_ptr = (float*)GetPr(attVolume); // Pointer für ATT-Daten ermitteln // if(!(mxIsSingle(attVolume))) // printf(" -> attVolume must be Single"); if (GetNumberOfDimensions(attVolume) == 3){ #ifdef debug_OutputParameter printf( "prhs[11] attVolume [%ix%ix%i]\n", mxGetDimensions(attVolume)[0] , mxGetDimensions(attVolume)[1], mxGetDimensions(attVolume)[2]); printf( " -> use ATT-Mode with ATT-Correction per Path with ATT 3D Volume\n"); #endif if (!( (attVolume_Nx > 1)&&(attVolume_Mx > 1) )){ printf( " -> ATTGrid_XYZ.x and ATTGrid_XYZ.y must be > 1 for ATT Correction with 3D Volume!!!"); } } else if (GetNumberOfDimensions(attVolume) == 2){ #ifdef debug_OutputParameter printf ( "prhs[11] attVolume [%ix%i]\n", mxGetDimensions(attVolume)[0] , mxGetDimensions(attVolume)[1]); printf( " -> attVolume is not a 3D Volume as expected!"); #endif } if ((SOSMode_3DVolume == true)&&(ATTMode_3DVolume == true)){ // 3D Volume muss bei SOS und ATT angegeben sein damit ATT Korrektur durchgefuehrt werden kann ATTGrid_Xx = GetDimensions(attVolume)[0]; // ATTGrid_X ermitteln ATTGrid_Yx = GetDimensions(attVolume)[1]; // ATTGrid_Y ermitteln ATTGrid_Zx = GetDimensions(attVolume)[2]; // ATTGrid_Z ermitteln ATTGrid_XYZ.x = ATTGrid_Xx; ATTGrid_XYZ.y = ATTGrid_Yx; ATTGrid_XYZ.z = ATTGrid_Zx; if ((ATTGrid_XYZ.x > 128) || (ATTGrid_XYZ.y > 128) || (ATTGrid_XYZ.z > 128)){ printf ( " -> ATTGrid_XYZ [%i x %i x %i]\n", ATTGrid_XYZ.x, ATTGrid_XYZ.y, ATTGrid_XYZ.z); printf ( " Warning -> ATTGrid_XYZ.x, ATTGrid_XYZ.y, ATTGrid_XYZ.z > 128!!! --> can be problematic due to memory requirement\n"); } if ((ATTGrid_XYZ.x != SOSGrid_XYZ.x) || (ATTGrid_XYZ.y != SOSGrid_XYZ.y) || (ATTGrid_XYZ.z != SOSGrid_XYZ.z)){ // Restriction: Volume parameter of ATT & SOS must be the same printf( " -> ATTGrid[%i %i %i] != SOSGrid[%i %i %i]!\n", ATTGrid_XYZ.x , ATTGrid_XYZ.y, ATTGrid_XYZ.z, SOSGrid_XYZ.x , SOSGrid_XYZ.y, SOSGrid_XYZ.z); printf(" -> ATTGrid must have the same size as SOSGrid \n"); } #ifdef debug_OutputVariables printf(" -> ATT-Data[1-3] = [%f %f %f]\n", att_vec_ptr[0], att_vec_ptr[1], att_vec_ptr[2]); int ATTFieldIndex = (14 * ATTGrid_XYZ.y + 30 ) * ATTGrid_XYZ.x + 16; printf(" -> ATT-Data[%i-%i] = [%f %f %f]\n", ATTFieldIndex, (ATTFieldIndex+2), att_vec_ptr[ATTFieldIndex + 0], att_vec_ptr[ATTFieldIndex + 1], att_vec_ptr[ATTFieldIndex + 2]); ATTFieldIndex = (15 * ATTGrid_XYZ.y + 32 ) * ATTGrid_XYZ.x + 32; printf(" -> ATT-Data[%i-%i] = [%f %f %f]\n", ATTFieldIndex, (ATTFieldIndex+2), att_vec_ptr[ATTFieldIndex + 0], att_vec_ptr[ATTFieldIndex + 1], att_vec_ptr[ATTFieldIndex + 2]); int ATTGrid_END = (ATTGrid_XYZ.x * ATTGrid_XYZ.y * ATTGrid_XYZ.z); printf(" -> ATT-Data[%i-%i] = [%f %f %f]\n", (ATTGrid_END-3), (ATTGrid_END-1), att_vec_ptr[ATTGrid_END-3], att_vec_ptr[ATTGrid_END-2], att_vec_ptr[ATTGrid_END-1]); #endif } else{ //#ifdef debug_OutputVariables printf( " -> ATTMode_3DVolume == false => skip ATTGrid\n"); //#endif ATTGrid_Xx = 0; // ATTGrid_X ermitteln ATTGrid_Yx = 0; // ATTGrid_Y ermitteln ATTGrid_Zx = 0; // ATTGrid_Z ermitteln ATTGrid_XYZ.x = ATTGrid_Xx; ATTGrid_XYZ.y = ATTGrid_Yx; ATTGrid_XYZ.z = ATTGrid_Zx; } //====================================================================== 13.Input Parameter - Check IMAGE_RESOLUTION_S / res res_Nx = GetDimensions(res)[0]; // Reihen N ermitteln res_Mx = GetDimensions(res)[1]; // Spalten M ermitteln IMAGE_RESOLUTION = *((float*)GetPr(res)); #ifdef debug_OutputParameter printf( "prhs[12] IMAGE_RESOLUTION_S [%ix%i] = [%f]\n", res_Nx , res_Mx, IMAGE_RESOLUTION); #endif if (!(res_Nx == 1)) printf(" -> Dimension of IMAGE_RESOLUTION must be [1 x 1]"); if ((res_Mx > 1)) printf( " -> No Blockmode allowed for IMAGE_RESOLUTION! [1 x %i]\n", res_Mx); // if(!(mxIsSingle(res))) // printf(" -> IMAGE_RESOLUTION must be Single"); if (SOSMode_3DVolume == true){ if(IMAGE_RESOLUTION > SOS_RESOLUTION){ printf( " -> IMAGE_RESOLUTION (%f) > SOS_RESOLUTION (%f)\n", IMAGE_RESOLUTION, SOS_RESOLUTION); printf(" -> IMAGE_RESOLUTION must not > SOS_RESOLUTION !!!"); } } //====================================================================== 14.Input Parameter - Check TimeInterval_S / Timeint timeint_Nx = GetDimensions(timeint)[0]; // Reihen N ermitteln timeint_Mx = GetDimensions(timeint)[1]; // Spalten M ermitteln sampleRate = *((float*)GetPr(timeint)); #ifdef debug_OutputParameter printf( "prhs[13] TimeInterval_S [%ix%i] = [%e]\n", timeint_Nx , timeint_Mx, sampleRate); #endif if (!(timeint_Nx == 1)) printf(" -> Dimension of TimeInterval_S must be [1 x 1]"); if ((timeint_Mx > 1)) printf( " -> No Blockmode allowed for TimeInterval_S! [1 x %i]\n", timeint_Mx); // if(!(mxIsSingle(timeint))) // printf(" -> TimeInterval_S must be Single"); //====================================================================== 15.Input Parameter - Check IMAGE_XYZ_UI32 / IMAGE_XYZ IMAGE_XYZ_Nx = GetDimensions(IMAGE_XYZ)[0]; // Reihen N ermitteln IMAGE_XYZ_Mx = GetDimensions(IMAGE_XYZ)[1]; // Spalten M ermitteln IMAGE_SIZE_XYZ.x = *((int*)GetPr(IMAGE_XYZ)); IMAGE_SIZE_XYZ.y = *((int*)GetPr(IMAGE_XYZ)+1); IMAGE_SIZE_XYZ.z = *((int*)GetPr(IMAGE_XYZ)+2); #ifdef debug_OutputParameter printf( "prhs[14] IMAGE_XYZ [%ix%i] = [%ix%ix%i]\n", IMAGE_XYZ_Nx , IMAGE_XYZ_Mx,IMAGE_SIZE_XYZ.x, IMAGE_SIZE_XYZ.y, IMAGE_SIZE_XYZ.z); #endif if (!(IMAGE_XYZ_Nx == 1)||!(IMAGE_XYZ_Mx == 3)) printf(" -> Dimension of IMAGE_XYZ must be [1 x 3]"); if ((IMAGE_XYZ_Nx > 1)) printf( " -> No Blockmode allowed for IMAGE_XYZ! [%i x 3]\n", IMAGE_XYZ_Nx); // if(!(mxIsUint32(IMAGE_XYZ))) // printf(" -> IMAGE_XYZ must be UINT32"); if ((IMAGE_SIZE_XYZ.x > 8192)||(IMAGE_SIZE_XYZ.y > 8192)) // Aufteilung in BlockDim 512,1,1 passt für 5632x5632. Es würde etwas weiter gehen aber dann muss Y kleiner sein. printf(" -> IMAGE_XYZ must not > [8192 x 8192 x N]!!!"); //====================================================================== 16.Input Parameter - Check Env / IMAGE_SUM IMAGE_SUM_Xx = GetDimensions(IMAGE_SUM)[0]; // Spalten M ermitteln X IMAGE_SUM_Yx = GetDimensions(IMAGE_SUM)[1]; // Reihen N ermitteln Y if (GetNumberOfDimensions(IMAGE_SUM) > 2) IMAGE_SUM_Zx = GetDimensions(IMAGE_SUM)[2]; // Z-Schichten ermitteln Z else if (GetNumberOfDimensions(IMAGE_SUM) == 2) IMAGE_SUM_Zx = 1; // Z-Schichten = 1 else { printf( " -> mxGetNumberOfDimensions of IMAGE_SUM = %i\n", (int)GetNumberOfDimensions(IMAGE_SUM)); printf(" -> Dimension of IMAGE_SUM must be 3: [X x Y x Z]"); } #ifdef debug_OutputParameter printf( "prhs[15] IMAGE_SUM [%ix%ix%i]\n", IMAGE_SUM_Xx , IMAGE_SUM_Yx, IMAGE_SUM_Zx); #endif // if(!(mxIsDouble(IMAGE_SUM))) // printf(" -> IMAGE_SUM must be Double"); // if(!(mxGetNumberOfElements(IMAGE_SUM) == ((size_t)IMAGE_SIZE_XYZ.x * (size_t)IMAGE_SIZE_XYZ.y * (size_t)IMAGE_SIZE_XYZ.z))) // { // printf( " -> IMAGE_SUM and the Number of Voxels don't match: %lld = [%lldx%lldx%lld]\n",(int)GetNumberOfElements(IMAGE_SUM), IMAGE_SIZE_XYZ.x, IMAGE_SIZE_XYZ.y, IMAGE_SIZE_XYZ.z); // printf(" -> Make sure that they have the same size"); // } uint64_t IMAGE_SUM_Count = GetNumberOfElements(IMAGE_SUM); double *IMAGE_SUM_vec_ptr = (double*)GetPr(IMAGE_SUM); #ifdef debug_OutputVariables printf( " -> IMAGE_SUM: %i = [%f %f %f]\n",0 , IMAGE_SUM_vec_ptr[0], IMAGE_SUM_vec_ptr[1], IMAGE_SUM_vec_ptr[2]); printf( " -> IMAGE_SUM: %i = [%f %f %f]\n",1 , IMAGE_SUM_vec_ptr[3], IMAGE_SUM_vec_ptr[4], IMAGE_SUM_vec_ptr[5]); printf( " -> IMAGE_SUM: %i = [%f %f %f]\n",2 , IMAGE_SUM_vec_ptr[6], IMAGE_SUM_vec_ptr[7], IMAGE_SUM_vec_ptr[8]); printf( " -> IMAGE_SUM: %lld = [%f %f %f]\n",(IMAGE_SUM_Count-3) , IMAGE_SUM_vec_ptr[IMAGE_SUM_Count - 3], IMAGE_SUM_vec_ptr[IMAGE_SUM_Count - 2], IMAGE_SUM_vec_ptr[IMAGE_SUM_Count -1]); #endif //====================================================================== 17.Input Parameter - Check BlockDimension for GPU BlockDim_XYZ_Nx = GetDimensions(BlockDim)[0]; // Reihen N ermitteln BlockDim_XYZ_Mx = GetDimensions(BlockDim)[1]; // Spalten M ermitteln BlockDim_XYZ.x = *((int*)GetPr(BlockDim)); BlockDim_XYZ.y = *((int*)GetPr(BlockDim)+1); BlockDim_XYZ.z = *((int*)GetPr(BlockDim)+2); #ifdef debug_OutputParameter printf( "prhs[16] BlockDim_XYZ (GPU) [%ix%i] = [%ix%ix%i]\n", BlockDim_XYZ_Nx , BlockDim_XYZ_Mx, BlockDim_XYZ.x, BlockDim_XYZ.y, BlockDim_XYZ.z); #endif if (!(BlockDim_XYZ_Nx == 1)||!(BlockDim_XYZ_Mx == 3)) printf(" -> Dimension of BlockDim_XYZ must be [1 x 3]"); if ((BlockDim_XYZ_Nx > 1)) printf( " -> No Blockmode! [%i x 3]\n", (int)BlockDim_XYZ_Nx); // if(!(mxIsUint32(BlockDim))) // printf(" -> BlockDim_XYZ must be UINT32"); if ((BlockDim_XYZ.x * BlockDim_XYZ.y * BlockDim_XYZ.z) > 1024){ // BlockSize limited to 1024. Perhaps newer GPUs will support more Threads per Block printf( " -> BlockDim_XYZ.x * BlockDim_XYZ.y * BlockDim_XYZ.z must not > 1024!!!"); // Here Adaption for BlockSize can be done. BlockDim_XYZ.x = 1024; // If Blockdimensions are not specified than standard Blockdimensions will be used. BlockDim_XYZ.x = 1; BlockDim_XYZ.x = 1; printf( " -> Standard Size for BlockDim_XYZ is used [%ix%ix%i]\n", BlockDim_XYZ.x, BlockDim_XYZ.y, BlockDim_XYZ.z); } //====================================================================== 18.Input Parameter - Check GPUs int *enableGPUs_ptr; GPUs_Nx = GetDimensions(GPUs)[0]; // Reihen N ermitteln GPUs_Mx = GetDimensions(GPUs)[1]; // Spalten M ermitteln #ifdef debug_OutputParameter printf( "prhs[17] GPUs [%ix%i] \n", GPUs_Nx , GPUs_Mx); #endif enableGPUs_ptr = (int*)GetPr(GPUs); selectedNumberGPUs = GPUs_Mx; //Determine Number of GPU-Devices and check if there are so many available int num_devices = 0; //printf( " -> cudaGetDeviceCount: %i\n", num_devices); CUDA_CHECK(cudaGetDeviceCount(&num_devices)); #ifdef debug_OutputInfo printf( " -> GPU-devices available: %i\n", num_devices); #endif if (selectedNumberGPUs <= num_devices) { #ifdef debug_OutputParameter printf( " (selectedNumberGPUs(%i) <= num_devices(%i)) -> OK -> selectedNumberGPUs = %i!\n", selectedNumberGPUs, num_devices, selectedNumberGPUs); #endif } else { printf( " !!! !!! selectedNumberGPUs(%i) > num_devices(%i) !!!! !!!! -> selectedNumberGPUs = num_devices = %i!\n", selectedNumberGPUs, num_devices, num_devices); selectedNumberGPUs = num_devices; // Reduce number of selected to number of GPUs in PC system! } // Check passed GPU-ID-Numbers and amount of GPUs #ifdef debug_OutputParameter printf( "prhs[17] GPUs [%ix%i] = [", GPUs_Nx , selectedNumberGPUs); #endif int gpuNr = 0; int gpuNrCheck = 0; for (gpuNr = 0; gpuNr < selectedNumberGPUs; ++gpuNr){ #ifdef debug_OutputParameter printf( " %i ", enableGPUs_ptr[gpuNr]); #endif if (enableGPUs_ptr[gpuNr] > (num_devices-1)){ // Check if more GPUs are selected then available in System printf( "\n enableGPUs_ptr[gpuNr=%i] = %i !!!\n", gpuNr, enableGPUs_ptr[gpuNr]); printf(" -> selected number of GPU > available Devices is not allowed!"); } for (gpuNrCheck = 0; gpuNrCheck < gpuNr; ++gpuNrCheck){ #ifdef debug_OutputParameter printf( "\n enableGPUs_ptr[gpuNrCheck %i] = %i, enableGPUs_ptr[gpuNr %i] = %i\n",gpuNrCheck, enableGPUs_ptr[gpuNrCheck], gpuNr, enableGPUs_ptr[gpuNr]); #endif if (enableGPUs_ptr[gpuNrCheck] == enableGPUs_ptr[gpuNr]) printf(" -> GPU Device can only be used once!!!"); } } #ifdef debug_OutputParameter printf( "]; Number of GPUs to use = %i\n", selectedNumberGPUs); #endif if (!(GPUs_Nx == 1)||!(GPUs_Mx < 10)) printf(" -> Dimension of GPUs must be [1 x <10]"); if ((pix_vect_Nx > 1)) printf( " -> No Blockmode [%i x n] allowed for GPUs\n", GPUs_Nx); // if(!(mxIsUint32(GPUs))) // printf(" -> GPUs must be UINT32"); //====================================================================== 19.Input Parameter - debugMode, debugModeParameter dbgMode_Nx = GetDimensions(dbgMode)[0]; // Reihen N ermitteln dbgMode_Mx = GetDimensions(dbgMode)[1]; // Spalten M ermitteln debugMode = *((float*)GetPr(dbgMode) ); debugModeParameter = *((float*)GetPr(dbgMode)+1); #ifdef debug_OutputParameter printf( "prhs[18] debugMode [%ix%i] = [%f %f]\n", dbgMode_Nx , dbgMode_Mx, debugMode, debugModeParameter); #endif if ((dbgMode_Nx != 1)||(dbgMode_Mx != 2)) printf(" -> Dimension of debugMode must be [1 x 2]\n"); // if(!(mxIsSingle(dbgMode))) // printf(" -> debugMode must be single"); if(debugMode != 0.0) printf (" -> debugMode = [%f], debugModeParameter = [%f]\n", debugMode, debugModeParameter); #ifdef debug_OutputParameter printf( "=================================================================================================\n"); // End of Inputparameter #endif // // Enable colored Output // http://linuxgazette.net/issue65/padala.html // printf ("\e[1;34mThis is a blue text.\e[0m"); // farbige Ausgabe mit // printf ("\e[1;34m %-6s \e[m", "This is text"); // Farbige Text-Ausgabe // printf ("\n"); //================================================================================================================ Create Output Memory / Parameter #ifdef debug_OutputParameter printf( "\n"); printf( "Create Outputparameter\n"); printf( "Out[n] Meaning \n"); printf( "=================================================================================================\n"); #endif // ~~~~ Create 3D-Matrix for the Output-Values // Output-Dimension is {IMAGE_XYZ_X, IMAGE_XYZ_Y, IMAGE_XYZ_Z} const int dims[]={IMAGE_SIZE_XYZ.x, IMAGE_SIZE_XYZ.y, IMAGE_SIZE_XYZ.z}; int ndim = 3; #ifdef debug_OutputParameter printf( "plhs[0] = Output_Voxels = mxCreateNumericArray( ndim(%i), dims{%i %i %i}, mxDOUBLE_CLASS, mxREAL);\n", ndim, dims[0], dims[1], dims[2]); #endif Matrix_t Output_Voxels; Output_Voxels.NumberOfDims = ndim; Output_Voxels.Dims[0] = dims[0]; Output_Voxels.Dims[1] = dims[1]; Output_Voxels.Dims[2] = dims[2]; Output_Voxels.Data = new double[dims[0]*dims[1]*(dims[2]?dims[2]:1)]; double *Output_Voxels_ptr = (double*)GetPr(Output_Voxels); // ~~~~ Create Pointer to return value from Duration of Kernel // Erstelle Array mit folgender Formatierung // 0: Total Durationtime all GPUs // 1: Total Durationtime GPU 1 // 2: Total Durationtime GPU 2 // n: Total Durationtime GPU n int m = (1 + selectedNumberGPUs), n = 1; #ifdef debug_OutputParameter printf( "plhs[1] = Duration = mxCreateDoubleMatrix( m(%i), n(%i), mxREAL);\n", m, n); #endif double *Duration_ptr = new double[m*n]; // ~~~~ Create Pointer to return Error/Abortvalue of each multithread //int *Abort_ptr = (int*) malloc(num_workingPackages * sizeof(int)); int *Abort_ptr = (int*) malloc(selectedNumberGPUs*sizeof(int)); // ~~~~ Erstelle Array fuer Testrueckgabe der integrierten Ascans //AScan_Nx = mxGetDimensions(AScan)[0]; // Reihen N ermitteln //AScan_Mx = mxGetDimensions(AScan)[1]; // Spalten M ermitteln //int m = aScanCount; // 1 n = AScan_Nx; //z.B. 3000 #ifdef debug_OutputParameter printf( "plhs[2] = Ascans = mxCreateNumericMatrix( m(%i), n(%i), mxREAL);\n", m, n); #endif float *AscansOut_ptr = new float[m*n]; #ifdef debug_OutputParameter printf( "=================================================================================================\n\n"); // End of Outputparameter #endif //================================================================================================================ Preintegrate Ascans #ifdef preAscanIntegrationToMatchSamplerateToResolution // Preintegrate Ascans for matching of Samplerate and Resolution #ifdef debug_OutputHostStepsPerformance double diff_time; struct timeval startPreintegrateAscans, stopPreintegrateAscans; gettimeofday(&startPreintegrateAscans, NULL); #endif if (SAFT_VARIANT[SAFT_VARIANT_AscanPreintegration] == 1){ //printf( "(SAFT_VARIANT[0] == 1) => perform preintegrateAscans\n\n"); speed_vec_ptr = (float*)GetPr(speed); // printf( " speed_vec_ptr[%3i] = %12.10f\n",0,speed_vec_ptr[0]); if (speed_vec_ptr[0] == 0){ printf("First value in SOS Volume = 0 --> preintegrateAscans can't be performed!!! --> Exit"); } //================================================================================================================ preintegrateAscans(aScan_ptr, AscansOut_ptr, speed_vec_ptr, aScanCount, aScanLength, IMAGE_RESOLUTION, sampleRate, debugMode, debugModeParameter); //================================================================================================================ } else{ //printf( "(SAFT_VARIANT[0] == 0) => skip preintegrateAscans\n\n"); //Daten trotzdem in Outputspeicher fuer Matlab transferieren for (int j = 0; j 0) { printf( "!!!!!!!!!!!!!!!!!!! Aborted Thread for GPU[%i] = %i\n", i, Abort_ptr[i]); AbortedThreads = true; } } free(Abort_ptr); if (AbortedThreads) printf(" Aborted Thread occurred -> see output history"); //================================================================================================================ //================================================================================================================ //================================================================================================================ Build Sum of IMAGE_SUM and current reconstructed Volume // Daten des uebergebenen Outputvolumens zum rekonstruierten Volumen addieren #ifdef debug_OutputInfo printf( "Build Sum of reconstructed Volume and given IMAGE_SUM \n"); #endif #ifdef debug_OutputHostStepsPerformance struct timeval startSumIMAGE_SUM, stopSumIMAGE_SUM; gettimeofday(&startSumIMAGE_SUM, NULL); #endif for(uint64_t i=0; i < IMAGE_SUM_Count; i++) { Output_Voxels_ptr[i] += IMAGE_SUM_vec_ptr[i]; } #ifdef debug_OutputHostStepsPerformance gettimeofday(&stopSumIMAGE_SUM, NULL); diff_time = (double)((stopSumIMAGE_SUM.tv_sec * 1000000.0 + stopSumIMAGE_SUM.tv_usec) - (startSumIMAGE_SUM.tv_sec * 1000000.0 + startSumIMAGE_SUM.tv_usec)); printf ("### HOST ### Sum up (IMAGE_SUM + reconstr. Volume) = %8.0f µs = %8.2f MVoxel/s\n", diff_time, double(IMAGE_SUM_Count)/diff_time); printf ("########################################################################\n"); #endif //================================================================================================================ Show returned Output-Values // #ifdef debug_OutputVariables // Testoutput Value of duration printf( "Duration_ptr[0] = %f\n", Duration_ptr[0]); // Testoutput Sum of IMAGE_SUM and reconstructed Volume printf( "Output_Voxels: %i = [%f %f %f]\n",0 , Output_Voxels_ptr[0], Output_Voxels_ptr[1], Output_Voxels_ptr[2]); printf( "Output_Voxels: %i = [%f %f %f]\n",1 , Output_Voxels_ptr[3], Output_Voxels_ptr[4], Output_Voxels_ptr[5]); printf( "Output_Voxels: %i = [%f %f %f]\n",2 , Output_Voxels_ptr[6], Output_Voxels_ptr[7], Output_Voxels_ptr[8]); printf( "Output_Voxels: %i = [%f %f %f]\n",(IMAGE_SUM_Count-3) , Output_Voxels_ptr[IMAGE_SUM_Count - 3], Output_Voxels_ptr[IMAGE_SUM_Count - 2], Output_Voxels_ptr[IMAGE_SUM_Count -1]); #endif #ifdef debug_OutputFunctions printf( "<== mexFunction - End\n"); #endif delete [] AscansOut_ptr; delete [] Duration_ptr; return Output_Voxels; }