Add multi-threads support to SAFT_TOFI
This commit is contained in:
@@ -2,7 +2,7 @@ cmake_minimum_required(VERSION 3.8 FATAL_ERROR)
|
||||
project(SaftTofi)
|
||||
set(CMAKE_CUDA_COMPILER /usr/local/cuda/bin/nvcc)
|
||||
enable_language(CUDA)
|
||||
|
||||
find_package (OpenMP REQUIRED)
|
||||
add_library(SaftTofi SHARED ./src/SAFT_TOFI.cpp ./src/saft.cu ./src/processAScans.cpp ./src/saft.cpp )
|
||||
target_include_directories(SaftTofi PRIVATE ../SAFT ./src /usr/local/cuda/include )
|
||||
set_target_properties(SaftTofi PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
|
||||
@@ -14,6 +14,8 @@ target_compile_options(SaftTofi PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:
|
||||
>)
|
||||
|
||||
target_link_libraries(SaftTofi PRIVATE ${CUDA_RUNTIME_LIBRARY} )
|
||||
target_link_libraries(SaftTofi PRIVATE OpenMP::OpenMP_CXX )
|
||||
|
||||
get_filename_component(_DIR "${CMAKE_CURRENT_LIST_DIR}" PATH)
|
||||
set(SAFT_HEADER ${CMAKE_CURRENT_LIST_DIR}/src/SAFT_TOFI.h ${_DIR}/SAFT/SAFTStructs.h)
|
||||
set_target_properties(SaftTofi PROPERTIES PUBLIC_HEADER "${SAFT_HEADER}")
|
||||
@@ -4,7 +4,11 @@
|
||||
#include <cstddef>
|
||||
#include <math.h>
|
||||
|
||||
#include <locale.h> // For German printf output format: Float with , instead of .
|
||||
#include <locale.h>
|
||||
|
||||
#include <omp.h>
|
||||
|
||||
// For German printf output format: Float with , instead of .
|
||||
|
||||
#include <iostream>
|
||||
#include <map>
|
||||
@@ -535,8 +539,7 @@ void preintegrateAscans(
|
||||
printf( "==> preintegrateAscans - Start\n");
|
||||
#endif
|
||||
|
||||
float* AscanBuffer;
|
||||
AscanBuffer = (float*)malloc(aScanLength * sizeof(float)); // Zweiten AScan-Buffer erzeugen mit Länge eines AScans (z.B. 3000)
|
||||
// Zweiten AScan-Buffer erzeugen mit Länge eines AScans (z.B. 3000)
|
||||
|
||||
|
||||
// //Debuging: Test: Alle Daten uebertragen von Input-Ascans auf AscansOut_ptr[0..aScanLength] ueber AscanBuffer
|
||||
@@ -653,13 +656,11 @@ void preintegrateAscans(
|
||||
|
||||
#ifdef preAscanIntegrationVersion2Ernst
|
||||
|
||||
int i, i_start, i_end = 0;
|
||||
float nSample = 0.0f;
|
||||
|
||||
|
||||
float windowWidth = 0.0f;
|
||||
float windowWidthHalf = 0.0f;
|
||||
float windowWidthHalf_minus1 = 0.0f;
|
||||
float windowSum = 0.0f;
|
||||
|
||||
|
||||
// maximale Schrittweite ueber einen Voxel = sqr(3)*2*IMAGE_RESOLUTION*fs/c // sqr(3)*2 = 3.464101615
|
||||
// width = ( ceil( 1.7*(( resz / speedz)/ (timeintz/INTERP_RATIO)) )); % Breite berechnen
|
||||
@@ -684,9 +685,14 @@ void preintegrateAscans(
|
||||
|
||||
//printf( " => windowWidth = %5i (Ganze Breite) \n",(int)ceil(windowWidth));
|
||||
//printf( " => windowWidthHalf = %5.2f (Halbe Breite) \n",windowWidthHalf);
|
||||
|
||||
#pragma omp parallel for num_threads(32)
|
||||
for (int j = 0; j<aScanCount; j++){ // über alle A-scans gehen.
|
||||
|
||||
float* AscanBuffer = (float*)malloc(aScanLength * sizeof(float));
|
||||
int i_start, i_end = 0;
|
||||
float nSample = 0.0f;
|
||||
float windowWidthHalf_minus1 = 0.0f;
|
||||
float windowSum = 0.0f;
|
||||
if ((int)ceil(windowWidth)%2 == 1){ // Uneven / Ungerade
|
||||
// // Bei ungeraden Breiten kann symmetrisch sampl = widthHalf_minus1 = floor((ceil(width)-1)/2) genutzt werden
|
||||
|
||||
@@ -799,7 +805,7 @@ void preintegrateAscans(
|
||||
aScan_ptr[j*aScanLength+i] = AscanBuffer[i]; // Write in A-scans Memory
|
||||
AscansOut_ptr[j*aScanLength+i] = AscanBuffer[i]; // Also write back for Matlab
|
||||
}
|
||||
|
||||
free(AscanBuffer);
|
||||
}
|
||||
|
||||
#ifdef debug_preAscanIntegration
|
||||
@@ -832,7 +838,7 @@ void preintegrateAscans(
|
||||
#endif
|
||||
#endif
|
||||
|
||||
free(AscanBuffer);
|
||||
// free(AscanBuffer);
|
||||
|
||||
#ifdef debug_OutputFunctions
|
||||
printf( "<== preintegrateAscans - End\n");
|
||||
|
||||
Reference in New Issue
Block a user