#include "SAFT_TOFI.h" #include #include #include #include #include "saft.hpp" // For German printf output format: Float with , instead of . #include #include #include #include #include #include #include #include #include "spdlog/sinks/stdout_color_sinks.h" // TODO: Blockgroesse (z > 1) fuehrt zu Kernelabbruechen // pthread handle typedef struct thread_handle_t { // pthread_t pthread; int deviceId; int deviceIndex; float *aScan_ptr; double *output_ptr; double *Duration_ptr; unsigned short *receiver_index_ptr; unsigned short *emitter_index_ptr; float *receiver_list_ptr; int receiver_list_Size; float *emitter_list_ptr; int emitter_list_Size; float *speed_vec_ptr; int3 SOSGrid_XYZ; float3 sosOffset; float SOS_RESOLUTION; float *att_vec_ptr; int aScanCount; int aScanLength; float inc; int3 res; float sampleRate; float3 volposition; int num_threads; dim3 fixedBlockDimensions; float debugMode; float debugModeParameter; bool SOSMode_3DVolume; bool ATTMode_3DVolume; int SAFT_MODE; int *SAFT_VARIANT; int SAFT_VARIANT_Size; int *Abort_ptr; } thread_handle; // Convenient typedefs for GPU-DeviceProperties container typedef std::vector DeviceProperties; /** Load CUDA devices and write them to a container. */ void loadDevices(DeviceProperties &output ///< This argument is written to. Container in which the device data are stored. ) { int deviceCount; CUDA_CHECK(cudaGetDeviceCount(&deviceCount)); output.reserve(static_cast(deviceCount)); for (int i = 0; i < deviceCount; i++) { cudaDeviceProp &device = output[i]; CUDA_CHECK(cudaGetDeviceProperties(&device, i)); output.push_back(device); } } // pthread call function void thread_function(void *arg) { thread_handle *pthread_handle = (thread_handle *)arg; // Create Instance of SAFT-Handler and call constructor SAFTHandler saft(pthread_handle->deviceId, // deviceId pthread_handle->deviceIndex, // deviceIndex pthread_handle->aScan_ptr, // aScan_ptr, pthread_handle->output_ptr, // output_ptr, pthread_handle->Duration_ptr, // Duration_ptr, pthread_handle->receiver_index_ptr, // receiver_index_ptr ///< pthread_handle->emitter_index_ptr, // emitter_index_ptr ///< pthread_handle->receiver_list_ptr, // receiver_list_ptr ///< pthread_handle->receiver_list_Size, // receiver_list_Size ///< pthread_handle->emitter_list_ptr, // emitter_list_ptr ///< pthread_handle->emitter_list_Size, // emitter_list_Size ///< pthread_handle->speed_vec_ptr, // speed_vec_ptr, pthread_handle->SOSGrid_XYZ, // SOSGrid_XYZ, pthread_handle->sosOffset, // sosOffset, ///< Startpoint of SoSGrid pthread_handle->SOS_RESOLUTION, // SOS_RESOLUTION, ///< Resolution of SoSGrid pthread_handle->att_vec_ptr, // att_vec_ptr pthread_handle->aScanCount, // aScanCount, pthread_handle->aScanLength, // aScanLength pthread_handle->res, // resolution => IMAGE_SIZE_XYZ, pthread_handle->sampleRate, // sampleRate pthread_handle->volposition, // => regionOfInterestOffset, pthread_handle->inc, // IMAGE_RESOLUTION, pthread_handle->fixedBlockDimensions, // fixedBlockDimensions pthread_handle->debugMode, // debugMode pthread_handle->debugModeParameter, // Parameter for DebugMode pthread_handle->SOSMode_3DVolume, pthread_handle->ATTMode_3DVolume, pthread_handle->SAFT_MODE, pthread_handle->SAFT_VARIANT, pthread_handle->SAFT_VARIANT_Size, pthread_handle->Abort_ptr // If there is not enough memory abort reconstruction. Wenn Fehler --> Abbruch; ); saft.performReconstruction(); // pthread_exit(NULL); } /** Check amount of GPUs and divide Volume ins parts with same size Abfrage der Anzahl an GPU-Devices und Einteilung des Volumens in möglichst gleichgroße Volumen in Z-Richtung (3D-Volumen) oder Y-Richtung (2D-Volumen) auf. */ void multithreaded_processing(float *aScan_ptr, ///< AScan-Daten double *output_ptr, ///< OutputDaten der Voxel unsigned short *receiver_index_ptr, ///< Index Receiver per Ascan unsigned short *emitter_index_ptr, ///< Index Emitter per Ascan float *receiver_list_ptr, ///< Positionskoordinaten Receiver int receiver_list_Size, ///< Menge an Receiver float *emitter_list_ptr, ///< Positionskoordinaten Emitter int emitter_list_Size, ///< Menge an Emitter float *speed_vec_ptr, ///< SoS Daten im Blockmode oder als SoSGrid int3 SOSGrid_XYZ, ///< Size of SoSGrid float3 sosOffset, ///< Startpoint of SoSGrid float SOS_RESOLUTION, ///< Aufloesung des SoSGrid float *att_vec_ptr, ///< Attenuation Daten als ATTGrid int aScanCount, ///< Anzahl der AScans die im Blockmode verarbeitet werden sollen int aScanLength, ///< Laenge der AscanDaten (normal 3000) float3 regionOfInterestOffset, int3 IMAGE_SIZE_XYZ, ///< Groesse des Bildbereichs in Voxel float IMAGE_RESOLUTION, ///< Aufloesung des Bildbereichs float sampleRate, ///< Samplerate für AScans int3 BlockDim_XYZ, ///< BlockDimension für GPU double *Duration_ptr, ///< Rückgabepointer an Matlab für Laufzeit des SAFT-Kernels int selectedNumberGPUs, ///< Anzahl der ausgewählten GPUs bzw. auf maximale Anzhal vorhandener begrenzt int *enableGPUs_ptr, ///< Gibt an welche GPUs genutzt werden und welche nicht float debugMode, ///< Ausgabe im Debugmode -> Verschiedene Werte können ausgegeben werden float debugModeParameter, ///< Parameter der mit fuer Debugmode uebermittelt werden kann bool SOSMode_3DVolume, ///< Wird 3D Volumen für SOS-Korrektur genutzt? bool ATTMode_3DVolume, ///< Wird 3D Volumen für ATT-Korrektur genutzt? int SAFT_MODE, ///< Modus für SAFT-Rekonstruktion int *SAFT_VARIANT, ///< Verschiedene Parameter der Rekonstruktion int SAFT_VARIANT_Size, ///< Menge der verschiedenen Parameter für Rekonstruktion int *Abort_ptr ///< FehlerArray ) { dim3 fixedBlockDimensions( // convert int3 to dim3 BlockDim_XYZ.x, BlockDim_XYZ.y, BlockDim_XYZ.z); // Divide workload and show Information ------------------------------------------------------------------------------------------------------------------ // Divide workload in pieces with the same size for all available GPUs. // If the workload can not be divided in pieces with the same size, the last piece will be the one with little less workload. // Testfall simuliert die mehrfache Anzahl an GPUs int num_devices_factor = 1; // Vielfache an GPUs simulieren bzw. kleinere Pakete erzeugen int num_workingPackages = selectedNumberGPUs * num_devices_factor; float3 *position = (float3 *)malloc(num_workingPackages * sizeof(float3)); int3 *resolution = (int3 *)malloc(num_workingPackages * sizeof(int3)); int3 *volumeStartpoint = (int3 *)malloc(num_workingPackages * sizeof(int3)); size_t *volumePtr = (size_t *)malloc(num_workingPackages * sizeof(size_t)); // int *Abort_ptr = (int*) malloc(num_workingPackages * sizeof(int)); float3 volposition = regionOfInterestOffset; // Uebergabe der Parameter float inc = IMAGE_RESOLUTION; // koennte auch direkt umbenannt werden size_t volume_size = (size_t)IMAGE_SIZE_XYZ.x * (size_t)IMAGE_SIZE_XYZ.y * (size_t)IMAGE_SIZE_XYZ.z * (size_t)sizeof(double); // = Groesse des Outputvolumens in Byte if ((IMAGE_SIZE_XYZ.y == 1) && (IMAGE_SIZE_XYZ.z == 1)) { selectedNumberGPUs = 1; num_workingPackages = 1; } volumeStartpoint[0].x = 0; volumeStartpoint[0].y = 0; volumeStartpoint[0].z = 0; position[0].x = volposition.x; // Startposition position[0].y = volposition.y; position[0].z = volposition.z; std::vector resolutionZs(num_workingPackages, IMAGE_SIZE_XYZ.z / num_workingPackages); for (size_t i = 0; i < IMAGE_SIZE_XYZ.z % num_workingPackages; i++) resolutionZs[i]++; int i, j, k; for (i = 0; i < num_workingPackages; i++) { if (IMAGE_SIZE_XYZ.z > 1) { // Divide in Z-Direction resolution[i].x = IMAGE_SIZE_XYZ.x; // Initialization resolution[i].y = IMAGE_SIZE_XYZ.y; resolution[i].z = resolutionZs[i]; if (i > 0) { volumeStartpoint[i].x = 0; // Koordinaten als Startpunkt für Layer in einzelnen GPUs volumeStartpoint[i].y = 0; volumeStartpoint[i].z = volumeStartpoint[i - 1].z + resolution[i - 1].z; } volumePtr[i] = (size_t)((size_t)resolution[0].x * (size_t)resolution[0].y * (size_t)volumeStartpoint[i].z); // Startpunkt der Speicherstellen fuer das Outputvolumen } else { // Divide in Y-Direction resolution[i].x = IMAGE_SIZE_XYZ.x; // Initialization resolution[i].z = IMAGE_SIZE_XYZ.z; if (IMAGE_SIZE_XYZ.y % num_workingPackages == 0) { // Volume is divisible resolution[i].y = IMAGE_SIZE_XYZ.y / num_workingPackages; } else { // if not divisible, if (i != (num_workingPackages - 1)) { // increment each GPU slice by one resolution[i].y = IMAGE_SIZE_XYZ.y / num_workingPackages + 1; } else { // except the last one, which get the remaining Layers resolution[i].y = IMAGE_SIZE_XYZ.y % resolution[0].y; } } } position[i].x = volposition.x; // Startposition position[i].y = volposition.y; position[i].z = volposition.z; if (IMAGE_SIZE_XYZ.z > 1) position[i].z += i * inc * resolution[0].z; // Calculate Startpositions for the workload-pieces else position[i].y += i * inc * resolution[0].y; } // Create one thread per GPU ------------------------------------------------------------------------------------------------------------------------ thread_handle *pthread_handle = (thread_handle *)malloc(selectedNumberGPUs * sizeof(thread_handle)); for (i = 0; i < num_workingPackages; i++) { // initialize control block pthread_handle[i].deviceId = enableGPUs_ptr[(i % selectedNumberGPUs)]; // Hier DeviceID der GPU setzen pthread_handle[i].deviceIndex = (i % selectedNumberGPUs); pthread_handle[i].aScan_ptr = aScan_ptr; if (IMAGE_SIZE_XYZ.z > 1) pthread_handle[i].output_ptr = &output_ptr[volumePtr[i]]; // Startpoint for Outputvolume. // volumePtr[i] = (size_t)(resolution[0].x * resolution[0].y * volumeStartpoint[i].z); //Startpunkt der Speicherstellen fuer das Outputvolumen else // pthread_handle[i].output_ptr = &output_ptr[ i * resolution[0].x * resolution[0].y * resolution[0].z ]; // Startpoint for Outputvolume. [0] da nur der letzte eine andere Gr��e hat. pthread_handle[i].output_ptr = &output_ptr[(size_t)resolution[0].x * (size_t)i * (size_t)resolution[0].y]; // Startpoint for Outputvolume. [0] da nur der letzte eine andere Groesse hat. Z spielt hier keine Rolle pthread_handle[i].Duration_ptr = Duration_ptr; pthread_handle[i].receiver_index_ptr = receiver_index_ptr; pthread_handle[i].emitter_index_ptr = emitter_index_ptr; pthread_handle[i].receiver_list_ptr = receiver_list_ptr; pthread_handle[i].receiver_list_Size = receiver_list_Size; pthread_handle[i].emitter_list_ptr = emitter_list_ptr; pthread_handle[i].emitter_list_Size = emitter_list_Size; pthread_handle[i].speed_vec_ptr = speed_vec_ptr; pthread_handle[i].SOSGrid_XYZ = SOSGrid_XYZ; pthread_handle[i].sosOffset = sosOffset; pthread_handle[i].SOS_RESOLUTION = SOS_RESOLUTION; pthread_handle[i].att_vec_ptr = att_vec_ptr; pthread_handle[i].aScanCount = aScanCount; pthread_handle[i].aScanLength = aScanLength, pthread_handle[i].inc = IMAGE_RESOLUTION; pthread_handle[i].res = resolution[i]; pthread_handle[i].sampleRate = sampleRate; pthread_handle[i].volposition = position[i]; // regionOfInterestOffset pthread_handle[i].num_threads = num_workingPackages; pthread_handle[i].fixedBlockDimensions = fixedBlockDimensions; // pthread_handle[i].debugMode = debugMode; pthread_handle[i].debugModeParameter = debugModeParameter; pthread_handle[i].SOSMode_3DVolume = SOSMode_3DVolume; pthread_handle[i].ATTMode_3DVolume = ATTMode_3DVolume; pthread_handle[i].SAFT_MODE = SAFT_MODE; pthread_handle[i].SAFT_VARIANT = SAFT_VARIANT; pthread_handle[i].SAFT_VARIANT_Size = SAFT_VARIANT_Size; Abort_ptr[i] = 0; // Initialisieren mit kein Fehler pthread_handle[i].Abort_ptr = &Abort_ptr[i]; } auto startAllThreads = std::chrono::steady_clock::now(); double diff_time = 0.0; std::vector> futures; futures.resize(selectedNumberGPUs); SPDLOG_INFO("Start GPU execute!"); for (j = 0; j < num_devices_factor; j++) { for (k = 0; k < selectedNumberGPUs; k++) { // new async threads futures[k] = std::async(std::forward>(thread_function), std::forward((void *)&pthread_handle[(j * selectedNumberGPUs + k)])); // forward used for perfect forwarding } // Synchronization and termination ------------------------------------------------------------------------------------------------------------------- for (k = 0; k < selectedNumberGPUs; k++) { // new async threads futures[k].wait(); // advantage: async are packaged tasks after c++ with os handling, and consistency handling (if destructor is called, it executes task) } } // gettimeofday(&stopAllThreads, NULL); SPDLOG_INFO("GPU execute finish!"); auto stopAllThreads = std::chrono::steady_clock::now(); diff_time = std::chrono::duration_cast(stopAllThreads - startAllThreads).count(); // total duration in µs Duration_ptr[0] = diff_time; // Return total duration in µs // Speicher wieder freigeben free(position); free(resolution); free(volumeStartpoint); free(volumePtr); // free (Abort_ptr); free(pthread_handle); } /** preintegrateAscans Determine maximal SampleWidth, matching to the resolution to be used for reconstruction, and integrate A-scan over an window of this SampleWidth */ void preintegrateAscans(float *aScan_ptr, ///< AScan-Daten float *AscansOut_ptr, ///< AScan-OutputDaten fuer Testrueckgabe float *speed_vec_ptr, ///< SoS Daten im Blockmode int aScanCount, ///< Anzahl der AScans die im Blockmode verarbeitet werden sollen int aScanLength, ///< Laenge der AscanDaten (normal 3000) float IMAGE_RESOLUTION, ///< Aufloesung des Bildbereichs float sampleRate, ///< Samplerate fuer AScans float debugMode, ///< Ausgabe im Debugmode -> Verschiedene Werte können ausgegeben werden float debugModeParameter ///< Parameter der mit fuer Debugmode uebermittelt werden kann ) { float windowWidth = 0.0f; float windowWidthHalf = 0.0f; // maximale Schrittweite ueber einen Voxel = sqr(3)*2*IMAGE_RESOLUTION*fs/c // sqr(3)*2 = 3.464101615 // width = ( ceil( 1.7*(( resz / speedz)/ (timeintz/INTERP_RATIO)) )); % Breite berechnen windowWidth = (float)3.464101615 * IMAGE_RESOLUTION / sampleRate / speed_vec_ptr[0]; windowWidthHalf = (float)1.732050808 * IMAGE_RESOLUTION / sampleRate / speed_vec_ptr[0]; // halbe Fenster Breite #pragma omp parallel for num_threads(32) for (int j = 0; j < aScanCount; j++) { // über alle A-scans gehen. float *AscanBuffer = (float *)malloc(aScanLength * sizeof(float)); int i_start, i_end = 0; float nSample = 0.0f; float windowWidthHalf_minus1 = 0.0f; float windowSum = 0.0f; if ((int)ceil(windowWidth) % 2 == 1) { // Uneven / Ungerade // // Bei ungeraden Breiten kann symmetrisch sampl = widthHalf_minus1 = floor((ceil(width)-1)/2) genutzt werden windowWidthHalf_minus1 = floor((ceil(windowWidth) - 1) / 2); for (int i = 0; i < aScanLength; i++) { // über gesamte Breite des A-scans gehen. i_start = i - (int)windowWidthHalf_minus1; i_end = i + (int)windowWidthHalf_minus1; // Grenzen einhalten if (i_start < 0) i_start = 0; if (i_end > aScanLength - 1) i_end = aScanLength - 1; // Anzahl Sample bestimmen nSample = i_end - i_start + 1; // +1 da erstes Element auch dazugehört. windowSum = 0; for (int k = i_start; k <= i_end; k++) { windowSum += aScan_ptr[j * aScanLength + k]; } // geteilt durch nur die genutzten Samples // AscanBuffer[i] = windowSum/ceil(nSample); // Michael teilt durch die gesamte Breite an Samples,auch wenn sie an dem // Rand nicht vorhanden/bzw = 0 sind. --> Abflachung am Rand AscanBuffer[i] = windowSum / ceil(windowWidth); } } else if ((int)ceil(windowWidth) % 2 == 0) { // Even / Gerade // Bei geraden Breiten symmetrisch mit den beiden äußeren Samplewerten zu je 1/2 gewichten. windowWidthHalf_minus1 = floor((ceil(windowWidth) - 1) / 2); for (int i = 0; i < aScanLength; i++) { // über gesamte Breite des A-scans gehen. i_start = i - (int)windowWidthHalf_minus1; i_end = i + (int)windowWidthHalf_minus1; // Grenzen einhalten if (i_start < 0) i_start = 0; if (i_end > aScanLength - 1) i_end = aScanLength - 1; // Anzahl Sample bestimmen nSample = i_end - i_start + 1; // +1 da erstes Element auch dazugehört. windowSum = 0; for (int k = i_start; k <= i_end; k++) { windowSum += aScan_ptr[j * aScanLength + k]; } // geteilt wird durch nur die genutzten Samples // AscanBuffer[i] = windowSum/ceil(nSample); // Michael teilt durch die gesamte Breite an Samples,auch wenn sie an dem // Rand nicht vorhanden/bzw = 0 sind.--> Abflachung am Rand windowSum = windowSum / ceil(windowWidth); // Halbe Samplewerte an Grenzen miteinberechen aber absolute Grenzen einhalten if (i_start >= 0) { windowSum = windowSum + aScan_ptr[j * aScanLength + (i_start - 1)] / (2 * ceil(windowWidth)); // Linken Nachbarn zu 1/2 mit dazunehmen nSample = nSample + 0.5; } if (i_end < aScanLength - 1) { windowSum = windowSum + aScan_ptr[j * aScanLength + (i_end + 1)] / (2 * ceil(windowWidth)); // Rechten Nachbarn zu 1/2 mit dazunehmen nSample = nSample + 0.5; } AscanBuffer[i] = windowSum; } } // Transfer Data from Buffer to Memory regions for (int i = 0; i < aScanLength; i++) { // printf( " i (%4i) = %6.3f ",i,AscanBuffer[i]); // AscansOut_ptr[i] = *(aScan_ptr+i); // AscanBuffer[i] = aScan_ptr[i]; aScan_ptr[j * aScanLength + i] = AscanBuffer[i]; // Write in A-scans Memory AscansOut_ptr[j * aScanLength + i] = AscanBuffer[i]; // Also write back for Matlab } free(AscanBuffer); } } const size_t *GetDimensions(const Matrix_t &matrix) { return matrix.Dims; } const void *GetPr(const Matrix_t &matrix) { return matrix.Data; } size_t GetNumberOfDimensions(const Matrix_t &matrix) { return matrix.NumberOfDims; } size_t GetNumberOfElements(const Matrix_t &matrix) { return matrix.DataSize; } Matrix_t SAFT_TOFI(std::vector ¶ms) { auto console_sink = std::make_shared(); console_sink->set_level(spdlog::level::info); console_sink->set_pattern(fmt::format("[%Y-%m-%d %T .%f][{}] [%^%l%$] %v", "SAFT")); std::shared_ptr logger(new spdlog::logger("SAFT", {console_sink})); logger->set_level(spdlog::level::info); logger->flush_on(spdlog::level::info); SPDLOG_INFO("Start SAFT!"); size_t AScan_Nx, AScan_Mx, pix_vect_Nx, pix_vect_Mx, receiver_index_Nx, receiver_index_Mx, emitter_index_Nx, emitter_index_Mx, receiver_list_Nx, receiver_list_Mx, emitter_list_Nx, emitter_list_Mx, SAFT_mode_Nx, SAFT_mode_Mx, SAFT_variant_Nx, SAFT_variant_Mx, speed_Nx, speed_Mx, SOSGrid_Xx, SOSGrid_Yx, SOSGrid_Zx, sos_startPoint_Nx, sos_startPoint_Mx, sos_res_Nx, sos_res_Mx, attVolume_Nx, attVolume_Mx, ATTGrid_Xx, ATTGrid_Yx, ATTGrid_Zx, res_Nx, res_Mx, timeint_Nx, timeint_Mx, IMAGE_XYZ_Nx, IMAGE_XYZ_Mx, IMAGE_SUM_Xx, IMAGE_SUM_Yx, IMAGE_SUM_Zx, BlockDim_XYZ_Nx, BlockDim_XYZ_Mx, GPUs_Nx, GPUs_Mx, dbgMode_Nx, dbgMode_Mx; int aScanCount; int aScanLength; float *aScan_ptr; int3 IMAGE_SIZE_XYZ; int3 BlockDim_XYZ; bool SOSMode_3DVolume; // Mode of SOS use: Grid (1) or Block (0) --> SOSGrid_XYZ, SOS_RESOLUTION, sosOffset not neccessary bool ATTMode_3DVolume; // Mode of SOS use: Grid (1) or Block (0) --> ATTGrid_XYZ not neccessary int SAFT_MODE; int *SAFT_VARIANT; // Variances of SAFT int SAFT_VARIANT_Size; // Size of Varainces int3 SOSGrid_XYZ; // Size of SOSGrid int3 ATTGrid_XYZ; // Size of ATTGrid float3 regionOfInterestOffset; // Startpoint float3 sosOffset; // Startpoint SoS float IMAGE_RESOLUTION; // Aufloesung float SOS_RESOLUTION; // Aufloesung float sampleRate; // Samplerate für AScans int selectedNumberGPUs; // Anzahl der genutzten GPUs durch uebergebene Groesse der Ausgewaehlten GPUs float debugMode; float debugModeParameter; if (params.size() != 19) { printf(" \n"); printf(" Inputparameter \n"); printf(" In[n] Meaning [Row N x Col M] Type\n"); printf(" =============================================================================\n"); printf(" 1-prhs[0] AScan-Data [3000?xnAscans] single\n"); printf(" 2-prhs[1] IMAGE_STARTPOINT_S [1x3] single\n"); printf(" 3-prhs[2] receiver_index [1xnAscans] uint16\n"); printf(" 4-prhs[3] emitter_index [1xnAscans] uint16\n"); printf(" 5-prhs[4] receiver_list [3xnReceiver] single\n"); printf(" 6-prhs[5] emitter_list [3xnEmitter] single\n"); printf(" 7-prhs[6] SAFT_mode [1x1] uint32\n"); printf(" 8-prhs[7] SAFT_variant [1x6] uint32\n"); printf(" Standard: [1 1 1 1 0 0] \n"); printf(" -> Ascan Preintegration\n"); printf(" -> Ascan Interpolation\n"); printf(" -> Preprocessing SOS&ATT 3D Volume Interpolation\n"); printf(" -> Reconstruction SOS&ATT 3D Volume Interpolation\n"); printf(" -> not yet\n"); printf(" -> not yet\n"); printf(" 9-prhs[8] SOSVolume [SOS_XxYxZ] single in m/s\n"); printf(" 10-prhs[9] SOS_STARTPOINT_S [1x3] single\n"); printf(" 11-prhs[10] SOS_RESOLUTION_S [1x1] single\n"); printf(" 12-prhs[11] ATTVolume [ATT_XxYxZ] single in dB/cm\n"); printf(" 13-prhs[12] IMAGE_RESOLUTION_S [1x1] single\n"); printf(" 14-prhs[13] TimeInterval_S [1x1] single\n"); printf(" 15-prhs[14] IMAGE_XYZ [1x3] uint32\n"); printf(" 16-prhs[15] IMAGE_SUM [Output_XxYxZ] double\n"); printf(" 17-prhs[16] BlockDim_XYZ (GPU) [1x3] uint32\n"); printf(" 18-prhs[17] GPUs (DeviceNr GPU) [1xn] uint32\n"); printf(" 19-prhs[18] dbgMode,dbgModeParam [1x2] single\n"); printf(" ==============================================================================\n"); printf("\n"); printf(" Outputparameter \n"); printf(" Out[n] Meaning \n"); printf(" ================================================================================================= \n"); printf(" plhs[0] = Output_Voxels = mxCreateNumericArray ( [IMAGE_XYZ] , mxDOUBLE_CLASS, mxREAL); \n"); printf(" plhs[1] = Duration = mxCreateDoubleMatrix ( [nGPUs+1, 1] , mxREAL); \n"); printf(" plhs[2] = Output_Ascans = mxCreateNumericMatrix( [3000?,nAscans], mxSINGLE_CLASS, mxREAL); \n"); printf(" ================================================================================================= \n"); printf("Wrong number of input arguments. Should be 19."); } // assign input arguments... // Bestimme die Eingangswerte const Matrix_t &AScan = params[0]; // AScan-Data const Matrix_t &pix_vect = params[1]; // Image Startpoint (IMAGE_STARTPOINT_S) const Matrix_t &receiver_index = params[2]; // Index Data for Receiver-Position Data const Matrix_t &emitter_index = params[3]; // Index Data for Emitter-Position Data const Matrix_t &receiver_list = params[4]; // Assignment Index to Receiver-Position Data const Matrix_t &emitter_list = params[5]; // Assignment Index to Emitter-Position Data const Matrix_t &SAFT_mode = params[6]; // SOS?, ATT? const Matrix_t &SAFT_variant = params[7]; // Differnt Mode-Parameter for Reconstruction const Matrix_t &speed = params[8]; // Speed of Sound Data (Single, SoS-Grid) const Matrix_t &sos_startPoint = params[9]; // Startpoint of Speed of Sound Grid const Matrix_t &sos_res = params[10]; // SoS Grid Resolution const Matrix_t &attVolume = params[11]; // Attenuation Data (Single, SoS-Grid) const Matrix_t &res = params[12]; // Output Volume Resolution const Matrix_t &timeint = params[13]; // 1/Sample-Rate const Matrix_t &IMAGE_XYZ = params[14]; // Output Volume Size XYZ const Matrix_t &IMAGE_SUM = params[15]; // Volume from previous Call const Matrix_t &BlockDim = params[16]; // Block Dimension to use for GPU const Matrix_t &GPUs = params[17]; // Welche GPUs sollen genutzt werden? const Matrix_t &dbgMode = params[18]; // DebugMode and DebugMode-Parameter //====================================================================== 1.Input Parameter - Check AScan AScan_Nx = GetDimensions(AScan)[0]; // Reihen N ermitteln AScan_Mx = GetDimensions(AScan)[1]; // Spalten M ermitteln aScanCount = AScan_Mx; aScanLength = AScan_Nx; // printf( "mxGetNumberOfDimensions(AScan)=%i\n", mxGetNumberOfDimensions(AScan)); if ((aScanCount > 65535)) // new 2019: increasing the limit of the A-Scan block size. however this is limited by the datatype of unsigned short which is used for a pointer. { printf(" -> AScanBlock size = %i\n", aScanCount); printf("AScanBlock size might be too large (=> 2^16)!!!"); } aScan_ptr = (float *)GetPr(AScan); //====================================================================== 2.Input Parameter - Check IMAGE_STARTPOINT_S / pix_vect pix_vect_Nx = GetDimensions(pix_vect)[0]; // Reihen N ermitteln pix_vect_Mx = GetDimensions(pix_vect)[1]; // Spalten M ermitteln regionOfInterestOffset.x = *((float *)GetPr(pix_vect)); regionOfInterestOffset.y = *((float *)GetPr(pix_vect) + 1); regionOfInterestOffset.z = *((float *)GetPr(pix_vect) + 2); if (!(pix_vect_Nx == 1) || !(pix_vect_Mx == 3)) printf(" -> Dimension of IMAGE_STARTPOINT_S must be [1 x 3]"); if ((pix_vect_Nx > 1)) printf(" -> No Blockmode [%i x 3] allowed for IMAGE_STARTPOINT_S\n", pix_vect_Nx); // if(!(mxIsSingle(pix_vect))) // printf(" -> IMAGE_STARTPOINT_S must be Single"); //====================================================================== 3.Input Parameter - Check Receiver Index receiver_index_Nx = GetDimensions(receiver_index)[0]; // Reihen N ermitteln receiver_index_Mx = GetDimensions(receiver_index)[1]; // Spalten M ermitteln if (!(receiver_index_Nx == 1)) printf(" -> Dimension of receiver_index must be [1 x M]"); if (!(receiver_index_Mx == aScanCount)) { printf(" -> aScanCount(%i)!= M(%i)\n", aScanCount, receiver_index_Mx); printf(" -> Dimension of receiver_index has different size as Ascan-Data\n"); } // Ausgabe einzelner Geometriedaten der Uebergabewerte mit verschiedenen Varianten unsigned short *receiver_index_ptr; receiver_index_ptr = (unsigned short *)GetPr(receiver_index); //====================================================================== 4.Input Parameter - Check Emitter Index emitter_index_Nx = GetDimensions(emitter_index)[0]; // Reihen N ermitteln emitter_index_Mx = GetDimensions(emitter_index)[1]; // Spalten M ermitteln if (!(emitter_index_Nx == 1)) printf(" -> Dimension of emitter_index must be [1 x M]"); if (!(emitter_index_Mx == aScanCount)) { printf(" -> aScanCount(%i)!= M(%i)\n", aScanCount, emitter_index_Mx); printf(" -> Dimension of emitter_index has different size as Ascan-Data\n"); } // Ausgabe einzelner Geometriedaten der Uebergabewerte mit verschiedenen Varianten unsigned short *emitter_index_ptr = (unsigned short *)GetPr(emitter_index); //====================================================================== 5.Input Parameter - Check receiver_list receiver_list_Nx = GetDimensions(receiver_list)[0]; // Reihen N ermitteln receiver_list_Mx = GetDimensions(receiver_list)[1]; // Spalten M ermitteln if (!(receiver_list_Nx == 3)) printf(" -> Dimension of receiver_list must be [3 x M]"); // Ausgabe einzelner Geometriedaten der Uebergabewerte mit verschiedenen Varianten float *receiver_list_ptr; receiver_list_ptr = (float *)GetPr(receiver_list); //====================================================================== 6.Input Parameter - Check emitter_list emitter_list_Nx = GetDimensions(emitter_list)[0]; // Reihen N ermitteln emitter_list_Mx = GetDimensions(emitter_list)[1]; // Spalten M ermitteln // emitter_list gibt die maximale Anzahl an Emittern die in diesem Block vorkommen können wieder! if (!(emitter_list_Nx == 3)) printf(" -> Dimension of emitter_list must be [3 x M]"); // Ausgabe einzelner Geometriedaten der übergabewerte mit verschiedenen Varianten float *emitter_list_ptr; emitter_list_ptr = (float *)GetPr(emitter_list); //====================================================================== 7.Input Parameter - Check SAFT_mode SAFT_mode_Nx = GetDimensions(SAFT_mode)[0]; // Reihen N ermitteln SAFT_mode_Mx = GetDimensions(SAFT_mode)[1]; // Spalten M ermitteln SAFT_MODE = *((int *)GetPr(SAFT_mode)); if (!(SAFT_mode_Nx == 1)) printf(" -> Dimension of SAFT_MODE must be [1 x 1]"); switch (SAFT_MODE) { case 0: SOSMode_3DVolume = false; ATTMode_3DVolume = false; // printf ( "\e[7;37m Standard SAFT without correction (-SOS -ATT) (%i,%i) \e[0m", SOSMode_3DVolume, ATTMode_3DVolume); printf(" -> AscanIndexVersion only make sense with SOS or SOS and ATT Volume => exit"); break; case 1: SOSMode_3DVolume = true; ATTMode_3DVolume = false; // printf ( "\e[7;37m + Speed of sound correction - Attenuation correction (%i,%i) \e[0m", SOSMode_3DVolume, ATTMode_3DVolume); break; case 2: SOSMode_3DVolume = true; ATTMode_3DVolume = true; // printf ( "\e[7;37m + Speed of sound correction + Attenuation correction (%i,%i) \e[0m", SOSMode_3DVolume, ATTMode_3DVolume); break; case 3: // SOSMode_3DVolume = true; ATTMode_3DVolume = true; // printf ( "\e[7;37m SAFT_MODE = 3 \e[0m", SOSMode_3DVolume, ATTMode_3DVolume); printf(" -> not implemented => exit"); break; case 4: // SOSMode_3DVolume = false; ATTMode_3DVolume = false; // printf ( "\e[7;37m SAFT_MODE = 4 \e[0m", SOSMode_3DVolume, ATTMode_3DVolume); printf(" -> not implemented => exit"); break; default: SOSMode_3DVolume = false; ATTMode_3DVolume = false; // printf ( " -> SAFT_MODE %i is out of range [0..3] => use Standard SAFT\n", SAFT_MODE); // printf ( "\e[7;37m Standard SAFT without correction (-SOS -ATT) (%i,%i) \e[0m", SOSMode_3DVolume, ATTMode_3DVolume); break; } //====================================================================== 8.Input Parameter - Check SAFT_variant SAFT_variant_Nx = GetDimensions(SAFT_variant)[0]; // Reihen N ermitteln SAFT_variant_Mx = GetDimensions(SAFT_variant)[1]; // Spalten M ermitteln SAFT_VARIANT = (int *)GetPr(SAFT_variant); SAFT_VARIANT_Size = SAFT_variant_Mx; // if(!(mxIsUint32(SAFT_variant))) // printf(" -> SAFT_VARIANT must be Uint32"); if (!(SAFT_variant_Nx == 1) || !(SAFT_variant_Mx == 6)) printf(" -> Dimension of SAFT_VARIANT must be [1 x 6]"); //====================================================================== 9.Input Parameter - Check for SOS volume speed_Nx = GetDimensions(speed)[0]; // Reihen N ermitteln speed_Mx = GetDimensions(speed)[1]; // Spalten M ermitteln float Sos = *((float *)GetPr(speed)); float *speed_vec_ptr; speed_vec_ptr = (float *)GetPr(speed); // Pointer für SoSDaten ermitteln // if(!(mxIsSingle(speed))) // printf(" -> SOSVolume must be Single"); if (SOSMode_3DVolume == true) // SOS correction need 3D Volume { if (GetNumberOfDimensions(speed) == 3) { if (!((speed_Nx > 1) && (speed_Mx > 1))) { printf(" -> SOSGrid_XYZ.x and SOSGrid_XYZ.y must be > 1 for SOS Correction with 3D Volume!!!"); } } else if (GetNumberOfDimensions(speed) == 2) { printf("prhs[8] SOSVolume [%ix%i]\n", (int)GetDimensions(speed)[0], (int)GetDimensions(speed)[1]); printf(" -> SOSVolume is not a 3D Volume as expected!"); } SOSGrid_Xx = GetDimensions(speed)[0]; // SOSGrid_X ermitteln SOSGrid_Yx = GetDimensions(speed)[1]; // SOSGrid_Y ermitteln SOSGrid_Zx = GetDimensions(speed)[2]; // SOSGrid_Z ermitteln SOSGrid_XYZ.x = SOSGrid_Xx; SOSGrid_XYZ.y = SOSGrid_Yx; SOSGrid_XYZ.z = SOSGrid_Zx; if ((SOSGrid_XYZ.x > 128) || (SOSGrid_XYZ.y > 128) || (SOSGrid_XYZ.z > 128)) { printf(" -> SOSGrid_XYZ [%i x %i x %i]\n", (int)SOSGrid_Xx, (int)SOSGrid_Yx, (int)SOSGrid_Zx); printf(" Warning -> SOSGrid_XYZ.x, SOSGrid_XYZ.y, SOSGrid_XYZ.z > 128!!! --> can be problematic due to memory requirement\n"); } } //====================================================================== 10.Input Parameter - Check SoS Startpoint sos_startPoint_Nx = GetDimensions(sos_startPoint)[0]; // Reihen N ermitteln sos_startPoint_Mx = GetDimensions(sos_startPoint)[1]; // Spalten M ermitteln sosOffset.x = *((float *)GetPr(sos_startPoint)); sosOffset.y = *((float *)GetPr(sos_startPoint) + 1); sosOffset.z = *((float *)GetPr(sos_startPoint) + 2); if (!(sos_startPoint_Nx == 1) || !(sos_startPoint_Mx == 3)) printf(" -> Dimension of SOS_STARTPOINT_S must be [1 x 3]"); if ((sos_startPoint_Nx > 1)) printf(" -> No Blockmode [%i x 3] allowed for SOS_STARTPOINT_S\n", sos_startPoint_Nx); // if(!(mxIsSingle(sos_startPoint))) // printf(" -> SOS_STARTPOINT_S must be Single"); //====================================================================== 11.Input Parameter - Check SoS_RESOLUTION / sos_res if (SOSMode_3DVolume == true) { sos_res_Nx = GetDimensions(sos_res)[0]; // Reihen N ermitteln sos_res_Mx = GetDimensions(sos_res)[1]; // Spalten M ermitteln SOS_RESOLUTION = *((float *)GetPr(sos_res)); if (!(sos_res_Nx == 1)) printf(" -> Dimension of SOS_RESOLUTION_S must be [1 x 1]"); if ((sos_res_Mx > 1)) printf(" -> No Blockmode allowed for SOS_RESOLUTION_S! [1 x %i]\n", sos_res_Mx); // if(!(mxIsSingle(sos_res))) // printf(" -> SOS_RESOLUTION_S must be Single"); } //====================================================================== 12.Input Parameter - Check for ATTVolume / Attenuation-Data attVolume_Nx = GetDimensions(attVolume)[0]; // Reihen N ermitteln attVolume_Mx = GetDimensions(attVolume)[1]; // Spalten M ermitteln float *att_vec_ptr; att_vec_ptr = (float *)GetPr(attVolume); // Pointer für ATT-Daten ermitteln if (GetNumberOfDimensions(attVolume) == 3) { if (!((attVolume_Nx > 1) && (attVolume_Mx > 1))) { printf(" -> ATTGrid_XYZ.x and ATTGrid_XYZ.y must be > 1 for ATT Correction with 3D Volume!!!"); } } else if (GetNumberOfDimensions(attVolume) == 2) { } if ((SOSMode_3DVolume == true) && (ATTMode_3DVolume == true)) { // 3D Volume muss bei SOS und ATT angegeben sein damit ATT Korrektur durchgefuehrt werden kann ATTGrid_Xx = GetDimensions(attVolume)[0]; // ATTGrid_X ermitteln ATTGrid_Yx = GetDimensions(attVolume)[1]; // ATTGrid_Y ermitteln ATTGrid_Zx = GetDimensions(attVolume)[2]; // ATTGrid_Z ermitteln ATTGrid_XYZ.x = ATTGrid_Xx; ATTGrid_XYZ.y = ATTGrid_Yx; ATTGrid_XYZ.z = ATTGrid_Zx; if ((ATTGrid_XYZ.x > 128) || (ATTGrid_XYZ.y > 128) || (ATTGrid_XYZ.z > 128)) { printf(" -> ATTGrid_XYZ [%i x %i x %i]\n", ATTGrid_XYZ.x, ATTGrid_XYZ.y, ATTGrid_XYZ.z); printf(" Warning -> ATTGrid_XYZ.x, ATTGrid_XYZ.y, ATTGrid_XYZ.z > 128!!! --> can be problematic due to memory requirement\n"); } if ((ATTGrid_XYZ.x != SOSGrid_XYZ.x) || (ATTGrid_XYZ.y != SOSGrid_XYZ.y) || (ATTGrid_XYZ.z != SOSGrid_XYZ.z)) { // Restriction: Volume parameter of ATT & SOS must be the same printf(" -> ATTGrid[%i %i %i] != SOSGrid[%i %i %i]!\n", ATTGrid_XYZ.x, ATTGrid_XYZ.y, ATTGrid_XYZ.z, SOSGrid_XYZ.x, SOSGrid_XYZ.y, SOSGrid_XYZ.z); printf(" -> ATTGrid must have the same size as SOSGrid \n"); } } else { printf(" -> ATTMode_3DVolume == false => skip ATTGrid\n"); ATTGrid_Xx = 0; // ATTGrid_X ermitteln ATTGrid_Yx = 0; // ATTGrid_Y ermitteln ATTGrid_Zx = 0; // ATTGrid_Z ermitteln ATTGrid_XYZ.x = ATTGrid_Xx; ATTGrid_XYZ.y = ATTGrid_Yx; ATTGrid_XYZ.z = ATTGrid_Zx; } //====================================================================== 13.Input Parameter - Check IMAGE_RESOLUTION_S / res res_Nx = GetDimensions(res)[0]; // Reihen N ermitteln res_Mx = GetDimensions(res)[1]; // Spalten M ermitteln IMAGE_RESOLUTION = *((float *)GetPr(res)); if (!(res_Nx == 1)) printf(" -> Dimension of IMAGE_RESOLUTION must be [1 x 1]"); if ((res_Mx > 1)) printf(" -> No Blockmode allowed for IMAGE_RESOLUTION! [1 x %i]\n", res_Mx); // if(!(mxIsSingle(res))) // printf(" -> IMAGE_RESOLUTION must be Single"); if (SOSMode_3DVolume == true) { if (IMAGE_RESOLUTION > SOS_RESOLUTION) { printf(" -> IMAGE_RESOLUTION (%f) > SOS_RESOLUTION (%f)\n", IMAGE_RESOLUTION, SOS_RESOLUTION); printf(" -> IMAGE_RESOLUTION must not > SOS_RESOLUTION !!!"); } } //====================================================================== 14.Input Parameter - Check TimeInterval_S / Timeint timeint_Nx = GetDimensions(timeint)[0]; // Reihen N ermitteln timeint_Mx = GetDimensions(timeint)[1]; // Spalten M ermitteln sampleRate = *((float *)GetPr(timeint)); if (!(timeint_Nx == 1)) printf(" -> Dimension of TimeInterval_S must be [1 x 1]"); if ((timeint_Mx > 1)) printf(" -> No Blockmode allowed for TimeInterval_S! [1 x %i]\n", timeint_Mx); //====================================================================== 15.Input Parameter - Check IMAGE_XYZ_UI32 / IMAGE_XYZ IMAGE_XYZ_Nx = GetDimensions(IMAGE_XYZ)[0]; // Reihen N ermitteln IMAGE_XYZ_Mx = GetDimensions(IMAGE_XYZ)[1]; // Spalten M ermitteln IMAGE_SIZE_XYZ.x = *((int *)GetPr(IMAGE_XYZ)); IMAGE_SIZE_XYZ.y = *((int *)GetPr(IMAGE_XYZ) + 1); IMAGE_SIZE_XYZ.z = *((int *)GetPr(IMAGE_XYZ) + 2); if (!(IMAGE_XYZ_Nx == 1) || !(IMAGE_XYZ_Mx == 3)) printf(" -> Dimension of IMAGE_XYZ must be [1 x 3]"); if ((IMAGE_XYZ_Nx > 1)) printf(" -> No Blockmode allowed for IMAGE_XYZ! [%i x 3]\n", IMAGE_XYZ_Nx); // if(!(mxIsUint32(IMAGE_XYZ))) // printf(" -> IMAGE_XYZ must be UINT32"); if ((IMAGE_SIZE_XYZ.x > 8192) || (IMAGE_SIZE_XYZ.y > 8192)) // Aufteilung in BlockDim 512,1,1 passt für 5632x5632. Es würde etwas weiter gehen aber dann muss Y kleiner sein. printf(" -> IMAGE_XYZ must not > [8192 x 8192 x N]!!!"); //====================================================================== 16.Input Parameter - Check Env / IMAGE_SUM IMAGE_SUM_Xx = GetDimensions(IMAGE_SUM)[0]; // Spalten M ermitteln X IMAGE_SUM_Yx = GetDimensions(IMAGE_SUM)[1]; // Reihen N ermitteln Y if (GetNumberOfDimensions(IMAGE_SUM) > 2) IMAGE_SUM_Zx = GetDimensions(IMAGE_SUM)[2]; // Z-Schichten ermitteln Z else if (GetNumberOfDimensions(IMAGE_SUM) == 2) IMAGE_SUM_Zx = 1; // Z-Schichten = 1 else { printf(" -> mxGetNumberOfDimensions of IMAGE_SUM = %i\n", (int)GetNumberOfDimensions(IMAGE_SUM)); printf(" -> Dimension of IMAGE_SUM must be 3: [X x Y x Z]"); } uint64_t IMAGE_SUM_Count = GetNumberOfElements(IMAGE_SUM); float *IMAGE_SUM_vec_ptr = (float *)GetPr(IMAGE_SUM); //====================================================================== 17.Input Parameter - Check BlockDimension for GPU BlockDim_XYZ_Nx = GetDimensions(BlockDim)[0]; // Reihen N ermitteln BlockDim_XYZ_Mx = GetDimensions(BlockDim)[1]; // Spalten M ermitteln BlockDim_XYZ.x = *((int *)GetPr(BlockDim)); BlockDim_XYZ.y = *((int *)GetPr(BlockDim) + 1); BlockDim_XYZ.z = *((int *)GetPr(BlockDim) + 2); if (!(BlockDim_XYZ_Nx == 1) || !(BlockDim_XYZ_Mx == 3)) printf(" -> Dimension of BlockDim_XYZ must be [1 x 3]"); if ((BlockDim_XYZ_Nx > 1)) printf(" -> No Blockmode! [%i x 3]\n", (int)BlockDim_XYZ_Nx); // if(!(mxIsUint32(BlockDim))) // printf(" -> BlockDim_XYZ must be UINT32"); if ((BlockDim_XYZ.x * BlockDim_XYZ.y * BlockDim_XYZ.z) > 1024) { // BlockSize limited to 1024. Perhaps newer GPUs will support more Threads per Block printf(" -> BlockDim_XYZ.x * BlockDim_XYZ.y * BlockDim_XYZ.z must not > 1024!!!"); // Here Adaption for BlockSize can be done. BlockDim_XYZ.x = 1024; // If Blockdimensions are not specified than standard Blockdimensions will be used. BlockDim_XYZ.x = 1; BlockDim_XYZ.x = 1; printf(" -> Standard Size for BlockDim_XYZ is used [%ix%ix%i]\n", BlockDim_XYZ.x, BlockDim_XYZ.y, BlockDim_XYZ.z); } //====================================================================== 18.Input Parameter - Check GPUs int *enableGPUs_ptr; GPUs_Nx = GetDimensions(GPUs)[0]; // Reihen N ermitteln GPUs_Mx = GetDimensions(GPUs)[1]; // Spalten M ermitteln enableGPUs_ptr = (int *)GetPr(GPUs); selectedNumberGPUs = GPUs_Mx; // Determine Number of GPU-Devices and check if there are so many available int num_devices = 0; // printf( " -> cudaGetDeviceCount: %i\n", num_devices); CUDA_CHECK(cudaGetDeviceCount(&num_devices)); if (selectedNumberGPUs <= num_devices) { } else { printf(" !!! !!! selectedNumberGPUs(%i) > num_devices(%i) !!!! !!!! -> selectedNumberGPUs = num_devices = %i!\n", selectedNumberGPUs, num_devices, num_devices); selectedNumberGPUs = num_devices; // Reduce number of selected to number of GPUs in PC system! } // Check passed GPU-ID-Numbers and amount of GPUs int gpuNr = 0; int gpuNrCheck = 0; for (gpuNr = 0; gpuNr < selectedNumberGPUs; ++gpuNr) { if (enableGPUs_ptr[gpuNr] > (num_devices - 1)) { // Check if more GPUs are selected then available in System printf("\n enableGPUs_ptr[gpuNr=%i] = %i !!!\n", gpuNr, enableGPUs_ptr[gpuNr]); printf(" -> selected number of GPU > available Devices is not allowed!"); } for (gpuNrCheck = 0; gpuNrCheck < gpuNr; ++gpuNrCheck) { if (enableGPUs_ptr[gpuNrCheck] == enableGPUs_ptr[gpuNr]) printf(" -> GPU Device can only be used once!!!"); } } if (!(GPUs_Nx == 1) || !(GPUs_Mx < 10)) printf(" -> Dimension of GPUs must be [1 x <10]"); if ((pix_vect_Nx > 1)) printf(" -> No Blockmode [%i x n] allowed for GPUs\n", GPUs_Nx); //====================================================================== 19.Input Parameter - debugMode, debugModeParameter dbgMode_Nx = GetDimensions(dbgMode)[0]; // Reihen N ermitteln dbgMode_Mx = GetDimensions(dbgMode)[1]; // Spalten M ermitteln debugMode = *((float *)GetPr(dbgMode)); debugModeParameter = *((float *)GetPr(dbgMode) + 1); if ((dbgMode_Nx != 1) || (dbgMode_Mx != 2)) printf(" -> Dimension of debugMode must be [1 x 2]\n"); if (debugMode != 0.0) printf(" -> debugMode = [%f], debugModeParameter = [%f]\n", debugMode, debugModeParameter); // ~~~~ Create 3D-Matrix for the Output-Values // Output-Dimension is {IMAGE_XYZ_X, IMAGE_XYZ_Y, IMAGE_XYZ_Z} const int dims[] = {IMAGE_SIZE_XYZ.x, IMAGE_SIZE_XYZ.y, IMAGE_SIZE_XYZ.z}; int ndim = 3; Matrix_t Output_Voxels; Output_Voxels.NumberOfDims = ndim; Output_Voxels.Dims[0] = dims[0]; Output_Voxels.Dims[1] = dims[1]; Output_Voxels.Dims[2] = dims[2]; Output_Voxels.Data = new float[dims[0] * dims[1] * (dims[2] ? dims[2] : 1)]; double *Output_Voxels_ptr = new double[dims[0] * dims[1] * (dims[2] ? dims[2] : 1)]; ; // ~~~~ Create Pointer to return value from Duration of Kernel // Erstelle Array mit folgender Formatierung // 0: Total Durationtime all GPUs // 1: Total Durationtime GPU 1 // 2: Total Durationtime GPU 2 // n: Total Durationtime GPU n int m = (1 + selectedNumberGPUs), n = 1; double *Duration_ptr = new double[m * n]; // ~~~~ Create Pointer to return Error/Abortvalue of each multithread // int *Abort_ptr = (int*) malloc(num_workingPackages * sizeof(int)); int *Abort_ptr = (int *)malloc(selectedNumberGPUs * sizeof(int)); m = aScanCount; // 1 n = AScan_Nx; // z.B. 3000 float *AscansOut_ptr = new float[m * n]; SPDLOG_INFO("preintegrateAscans!"); // 按照当前参数必走这个分支 //================================================================================================================ Preintegrate Ascans if (SAFT_VARIANT[SAFT_VARIANT_AscanPreintegration] == 1) { // printf( "(SAFT_VARIANT[0] == 1) => perform preintegrateAscans\n\n"); speed_vec_ptr = (float *)GetPr(speed); // printf( " speed_vec_ptr[%3i] = %12.10f\n",0,speed_vec_ptr[0]); if (speed_vec_ptr[0] == 0) { printf("First value in SOS Volume = 0 --> preintegrateAscans can't be performed!!! --> Exit"); } //================================================================================================================ preintegrateAscans(aScan_ptr, AscansOut_ptr, speed_vec_ptr, aScanCount, aScanLength, IMAGE_RESOLUTION, sampleRate, debugMode, debugModeParameter); //================================================================================================================ } else { // printf( "(SAFT_VARIANT[0] == 0) => skip preintegrateAscans\n\n"); // Daten trotzdem in Outputspeicher fuer Matlab transferieren for (int j = 0; j < aScanCount; j++) { // ueber alle A-scans gehen. for (int i = 0; i < aScanLength; i++) { // printf( " i (%4i) = %6.3f ",i, aScan_ptr[j*aScanLength+i]); AscansOut_ptr[j * aScanLength + i] = aScan_ptr[j * aScanLength + i]; // nach Matlab zurueckgeben // printf( " i (%4i) = %6.3f \n",i, AscansOut_ptr[j*aScanLength+i]); } } } //================================================================================================================ Start Reconstruction //================================================================================================================ SPDLOG_INFO("multithreaded_processing!"); multithreaded_processing(aScan_ptr, Output_Voxels_ptr, receiver_index_ptr, emitter_index_ptr, receiver_list_ptr, receiver_list_Mx, emitter_list_ptr, emitter_list_Mx, speed_vec_ptr, SOSGrid_XYZ, sosOffset, SOS_RESOLUTION, att_vec_ptr, AScan_Mx, AScan_Nx, regionOfInterestOffset, IMAGE_SIZE_XYZ, IMAGE_RESOLUTION, sampleRate, BlockDim_XYZ, Duration_ptr, selectedNumberGPUs, enableGPUs_ptr, debugMode, debugModeParameter, SOSMode_3DVolume, ATTMode_3DVolume, SAFT_MODE, SAFT_VARIANT, SAFT_VARIANT_Size, Abort_ptr); // Check if errors occurred bool AbortedThreads = false; for (int i = 0; i < selectedNumberGPUs; i++) { if (Abort_ptr[i] > 0) { printf("!!!!!!!!!!!!!!!!!!! Aborted Thread for GPU[%i] = %i\n", i, Abort_ptr[i]); AbortedThreads = true; } } free(Abort_ptr); if (AbortedThreads) printf(" Aborted Thread occurred -> see output history"); //================================================================================================================ //================================================================================================================ //================================================================================================================ Build Sum of IMAGE_SUM and current reconstructed Volume SPDLOG_INFO("multithreaded_processing finish!"); float *outData = (float *)Output_Voxels.Data; for (uint64_t i = 0; i < IMAGE_SUM_Count; i++) { outData[i] = Output_Voxels_ptr[i] + IMAGE_SUM_vec_ptr[i]; } delete[] AscansOut_ptr; delete[] Duration_ptr; delete[] Output_Voxels_ptr; SPDLOG_INFO("SAFT finish!"); return Output_Voxels; }