Add multi-threads support to SAFT_TOFI

This commit is contained in:
kradchen
2023-06-19 17:06:16 +08:00
parent 06d293fc58
commit c4c22ff6cf
2 changed files with 19 additions and 11 deletions

View File

@@ -2,7 +2,7 @@ cmake_minimum_required(VERSION 3.8 FATAL_ERROR)
project(SaftTofi)
set(CMAKE_CUDA_COMPILER /usr/local/cuda/bin/nvcc)
enable_language(CUDA)
find_package (OpenMP REQUIRED)
add_library(SaftTofi SHARED ./src/SAFT_TOFI.cpp ./src/saft.cu ./src/processAScans.cpp ./src/saft.cpp )
target_include_directories(SaftTofi PRIVATE ../SAFT ./src /usr/local/cuda/include )
set_target_properties(SaftTofi PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
@@ -14,6 +14,8 @@ target_compile_options(SaftTofi PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:
>)
target_link_libraries(SaftTofi PRIVATE ${CUDA_RUNTIME_LIBRARY} )
target_link_libraries(SaftTofi PRIVATE OpenMP::OpenMP_CXX )
get_filename_component(_DIR "${CMAKE_CURRENT_LIST_DIR}" PATH)
set(SAFT_HEADER ${CMAKE_CURRENT_LIST_DIR}/src/SAFT_TOFI.h ${_DIR}/SAFT/SAFTStructs.h)
set_target_properties(SaftTofi PROPERTIES PUBLIC_HEADER "${SAFT_HEADER}")

View File

@@ -4,7 +4,11 @@
#include <cstddef>
#include <math.h>
#include <locale.h> // For German printf output format: Float with , instead of .
#include <locale.h>
#include <omp.h>
// For German printf output format: Float with , instead of .
#include <iostream>
#include <map>
@@ -535,8 +539,7 @@ void preintegrateAscans(
printf( "==> preintegrateAscans - Start\n");
#endif
float* AscanBuffer;
AscanBuffer = (float*)malloc(aScanLength * sizeof(float)); // Zweiten AScan-Buffer erzeugen mit Länge eines AScans (z.B. 3000)
// Zweiten AScan-Buffer erzeugen mit Länge eines AScans (z.B. 3000)
// //Debuging: Test: Alle Daten uebertragen von Input-Ascans auf AscansOut_ptr[0..aScanLength] ueber AscanBuffer
@@ -653,13 +656,11 @@ void preintegrateAscans(
#ifdef preAscanIntegrationVersion2Ernst
int i, i_start, i_end = 0;
float nSample = 0.0f;
float windowWidth = 0.0f;
float windowWidthHalf = 0.0f;
float windowWidthHalf_minus1 = 0.0f;
float windowSum = 0.0f;
// maximale Schrittweite ueber einen Voxel = sqr(3)*2*IMAGE_RESOLUTION*fs/c // sqr(3)*2 = 3.464101615
// width = ( ceil( 1.7*(( resz / speedz)/ (timeintz/INTERP_RATIO)) )); % Breite berechnen
@@ -684,9 +685,14 @@ void preintegrateAscans(
//printf( " => windowWidth = %5i (Ganze Breite) \n",(int)ceil(windowWidth));
//printf( " => windowWidthHalf = %5.2f (Halbe Breite) \n",windowWidthHalf);
#pragma omp parallel for num_threads(32)
for (int j = 0; j<aScanCount; j++){ // über alle A-scans gehen.
float* AscanBuffer = (float*)malloc(aScanLength * sizeof(float));
int i_start, i_end = 0;
float nSample = 0.0f;
float windowWidthHalf_minus1 = 0.0f;
float windowSum = 0.0f;
if ((int)ceil(windowWidth)%2 == 1){ // Uneven / Ungerade
// // Bei ungeraden Breiten kann symmetrisch sampl = widthHalf_minus1 = floor((ceil(width)-1)/2) genutzt werden
@@ -799,7 +805,7 @@ void preintegrateAscans(
aScan_ptr[j*aScanLength+i] = AscanBuffer[i]; // Write in A-scans Memory
AscansOut_ptr[j*aScanLength+i] = AscanBuffer[i]; // Also write back for Matlab
}
free(AscanBuffer);
}
#ifdef debug_preAscanIntegration
@@ -832,7 +838,7 @@ void preintegrateAscans(
#endif
#endif
free(AscanBuffer);
// free(AscanBuffer);
#ifdef debug_OutputFunctions
printf( "<== preintegrateAscans - End\n");