Add multi-threads support to SAFT_TOFI
This commit is contained in:
@@ -2,7 +2,7 @@ cmake_minimum_required(VERSION 3.8 FATAL_ERROR)
|
|||||||
project(SaftTofi)
|
project(SaftTofi)
|
||||||
set(CMAKE_CUDA_COMPILER /usr/local/cuda/bin/nvcc)
|
set(CMAKE_CUDA_COMPILER /usr/local/cuda/bin/nvcc)
|
||||||
enable_language(CUDA)
|
enable_language(CUDA)
|
||||||
|
find_package (OpenMP REQUIRED)
|
||||||
add_library(SaftTofi SHARED ./src/SAFT_TOFI.cpp ./src/saft.cu ./src/processAScans.cpp ./src/saft.cpp )
|
add_library(SaftTofi SHARED ./src/SAFT_TOFI.cpp ./src/saft.cu ./src/processAScans.cpp ./src/saft.cpp )
|
||||||
target_include_directories(SaftTofi PRIVATE ../SAFT ./src /usr/local/cuda/include )
|
target_include_directories(SaftTofi PRIVATE ../SAFT ./src /usr/local/cuda/include )
|
||||||
set_target_properties(SaftTofi PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
|
set_target_properties(SaftTofi PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
|
||||||
@@ -14,6 +14,8 @@ target_compile_options(SaftTofi PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:
|
|||||||
>)
|
>)
|
||||||
|
|
||||||
target_link_libraries(SaftTofi PRIVATE ${CUDA_RUNTIME_LIBRARY} )
|
target_link_libraries(SaftTofi PRIVATE ${CUDA_RUNTIME_LIBRARY} )
|
||||||
|
target_link_libraries(SaftTofi PRIVATE OpenMP::OpenMP_CXX )
|
||||||
|
|
||||||
get_filename_component(_DIR "${CMAKE_CURRENT_LIST_DIR}" PATH)
|
get_filename_component(_DIR "${CMAKE_CURRENT_LIST_DIR}" PATH)
|
||||||
set(SAFT_HEADER ${CMAKE_CURRENT_LIST_DIR}/src/SAFT_TOFI.h ${_DIR}/SAFT/SAFTStructs.h)
|
set(SAFT_HEADER ${CMAKE_CURRENT_LIST_DIR}/src/SAFT_TOFI.h ${_DIR}/SAFT/SAFTStructs.h)
|
||||||
set_target_properties(SaftTofi PROPERTIES PUBLIC_HEADER "${SAFT_HEADER}")
|
set_target_properties(SaftTofi PROPERTIES PUBLIC_HEADER "${SAFT_HEADER}")
|
||||||
@@ -4,7 +4,11 @@
|
|||||||
#include <cstddef>
|
#include <cstddef>
|
||||||
#include <math.h>
|
#include <math.h>
|
||||||
|
|
||||||
#include <locale.h> // For German printf output format: Float with , instead of .
|
#include <locale.h>
|
||||||
|
|
||||||
|
#include <omp.h>
|
||||||
|
|
||||||
|
// For German printf output format: Float with , instead of .
|
||||||
|
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <map>
|
#include <map>
|
||||||
@@ -535,8 +539,7 @@ void preintegrateAscans(
|
|||||||
printf( "==> preintegrateAscans - Start\n");
|
printf( "==> preintegrateAscans - Start\n");
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
float* AscanBuffer;
|
// Zweiten AScan-Buffer erzeugen mit Länge eines AScans (z.B. 3000)
|
||||||
AscanBuffer = (float*)malloc(aScanLength * sizeof(float)); // Zweiten AScan-Buffer erzeugen mit Länge eines AScans (z.B. 3000)
|
|
||||||
|
|
||||||
|
|
||||||
// //Debuging: Test: Alle Daten uebertragen von Input-Ascans auf AscansOut_ptr[0..aScanLength] ueber AscanBuffer
|
// //Debuging: Test: Alle Daten uebertragen von Input-Ascans auf AscansOut_ptr[0..aScanLength] ueber AscanBuffer
|
||||||
@@ -653,13 +656,11 @@ void preintegrateAscans(
|
|||||||
|
|
||||||
#ifdef preAscanIntegrationVersion2Ernst
|
#ifdef preAscanIntegrationVersion2Ernst
|
||||||
|
|
||||||
int i, i_start, i_end = 0;
|
|
||||||
float nSample = 0.0f;
|
|
||||||
|
|
||||||
float windowWidth = 0.0f;
|
float windowWidth = 0.0f;
|
||||||
float windowWidthHalf = 0.0f;
|
float windowWidthHalf = 0.0f;
|
||||||
float windowWidthHalf_minus1 = 0.0f;
|
|
||||||
float windowSum = 0.0f;
|
|
||||||
|
|
||||||
// maximale Schrittweite ueber einen Voxel = sqr(3)*2*IMAGE_RESOLUTION*fs/c // sqr(3)*2 = 3.464101615
|
// maximale Schrittweite ueber einen Voxel = sqr(3)*2*IMAGE_RESOLUTION*fs/c // sqr(3)*2 = 3.464101615
|
||||||
// width = ( ceil( 1.7*(( resz / speedz)/ (timeintz/INTERP_RATIO)) )); % Breite berechnen
|
// width = ( ceil( 1.7*(( resz / speedz)/ (timeintz/INTERP_RATIO)) )); % Breite berechnen
|
||||||
@@ -684,9 +685,14 @@ void preintegrateAscans(
|
|||||||
|
|
||||||
//printf( " => windowWidth = %5i (Ganze Breite) \n",(int)ceil(windowWidth));
|
//printf( " => windowWidth = %5i (Ganze Breite) \n",(int)ceil(windowWidth));
|
||||||
//printf( " => windowWidthHalf = %5.2f (Halbe Breite) \n",windowWidthHalf);
|
//printf( " => windowWidthHalf = %5.2f (Halbe Breite) \n",windowWidthHalf);
|
||||||
|
#pragma omp parallel for num_threads(32)
|
||||||
for (int j = 0; j<aScanCount; j++){ // über alle A-scans gehen.
|
for (int j = 0; j<aScanCount; j++){ // über alle A-scans gehen.
|
||||||
|
|
||||||
|
float* AscanBuffer = (float*)malloc(aScanLength * sizeof(float));
|
||||||
|
int i_start, i_end = 0;
|
||||||
|
float nSample = 0.0f;
|
||||||
|
float windowWidthHalf_minus1 = 0.0f;
|
||||||
|
float windowSum = 0.0f;
|
||||||
if ((int)ceil(windowWidth)%2 == 1){ // Uneven / Ungerade
|
if ((int)ceil(windowWidth)%2 == 1){ // Uneven / Ungerade
|
||||||
// // Bei ungeraden Breiten kann symmetrisch sampl = widthHalf_minus1 = floor((ceil(width)-1)/2) genutzt werden
|
// // Bei ungeraden Breiten kann symmetrisch sampl = widthHalf_minus1 = floor((ceil(width)-1)/2) genutzt werden
|
||||||
|
|
||||||
@@ -799,7 +805,7 @@ void preintegrateAscans(
|
|||||||
aScan_ptr[j*aScanLength+i] = AscanBuffer[i]; // Write in A-scans Memory
|
aScan_ptr[j*aScanLength+i] = AscanBuffer[i]; // Write in A-scans Memory
|
||||||
AscansOut_ptr[j*aScanLength+i] = AscanBuffer[i]; // Also write back for Matlab
|
AscansOut_ptr[j*aScanLength+i] = AscanBuffer[i]; // Also write back for Matlab
|
||||||
}
|
}
|
||||||
|
free(AscanBuffer);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef debug_preAscanIntegration
|
#ifdef debug_preAscanIntegration
|
||||||
@@ -832,7 +838,7 @@ void preintegrateAscans(
|
|||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
free(AscanBuffer);
|
// free(AscanBuffer);
|
||||||
|
|
||||||
#ifdef debug_OutputFunctions
|
#ifdef debug_OutputFunctions
|
||||||
printf( "<== preintegrateAscans - End\n");
|
printf( "<== preintegrateAscans - End\n");
|
||||||
|
|||||||
Reference in New Issue
Block a user