feat: add findPeaks Function

Fix：While valid equals zero problem.
feat: change description by sortrows.
2025-10-23 15:49:32 +08:00 · 2025-10-17 09:53:03 +08:00 · 2025-09-16 13:24:48 +08:00 · 2025-06-27 13:21:18 +08:00 · 2025-03-26 13:02:43 +08:00 · 2025-03-18 16:00:10 +08:00
21 changed files with 2095 additions and 1581 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -8,9 +8,10 @@ set(Aurora_USE_CUDA ON)

 if (Aurora_USE_CUDA)
 set(CMAKE_CUDA_COMPILER /usr/local/cuda/bin/nvcc)
-
+set(CUDA_TOOLKIT_ROOT /usr/local/cuda)

 enable_language(CUDA)
+find_package(CUDA)
 find_package(CUDAToolkit REQUIRED)
 add_definitions(-DUSE_CUDA)
 endif(Aurora_USE_CUDA)
@@ -43,12 +44,12 @@ target_link_libraries(Aurora PUBLIC $<LINK_ONLY:MKL::MKL>)
 target_link_libraries(Aurora PUBLIC OpenMP::OpenMP_CXX)
 target_link_libraries(Aurora PUBLIC matio)
 if (Aurora_USE_CUDA)
-target_include_directories(Aurora PRIVATE ./src /usr/local/cuda/include) 
+target_include_directories(Aurora PRIVATE ./src  ${CUDA_INCLUDE_DIRS}) 
 set_target_properties(Aurora PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
 target_compile_options(Aurora PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:
    -arch=sm_75 --expt-extended-lambda 
 >)
-target_link_libraries(Aurora PRIVATE ${CUDA_RUNTIME_LIBRARY} CUDA::cufft CUDA::cudart)
+target_link_libraries(Aurora PRIVATE ${CUDA_RUNTIME_LIBRARY} ${CUDA_cufft_LIBRARY})
 target_link_libraries(Aurora PRIVATE ${CUDA_cublas_LIBRARY})
 target_link_libraries(Aurora PRIVATE ${CUDA_cusolver_LIBRARY})
 endif(Aurora_USE_CUDA)
--- a/cmake/AuroraConfig.cmake
+++ b/cmake/AuroraConfig.cmake
@@ -1,10 +1,6 @@
 set(MKL_INTERFACE_FULL intel_lp64)
 find_package(OpenMP REQUIRED)
 find_package(MKL CONFIG REQUIRED)
-if(${USE_CUDA})
-enable_language(CUDA)
-find_package(CUDAToolkit REQUIRED)
-endif()

 set(Aurora_MAJOR_VERSION 1)
 set(Aurora_MINOR_VERSION 0)
@@ -13,17 +9,11 @@ set(Aurora_BUILD_VERSION 0)
 get_filename_component(Aurora_DIR "${CMAKE_CURRENT_LIST_DIR}/" PATH)

 message("Aurora_DIR: ${Aurora_DIR}")
-if(${USE_CUDA})
-file(GLOB_RECURSE Aurora_Source "${Aurora_DIR}/src/*.cpp" "${Aurora_DIR}/src/*.cu")
-set(Aurora_Libraries $<LINK_ONLY:MKL::MKL> OpenMP::OpenMP_CXX ${CUDA_cublas_LIBRARY} ${CUDA_cusolver_LIBRARY})
-else()
-set(Aurora_Libraries $<LINK_ONLY:MKL::MKL> OpenMP::OpenMP_CXX )
-file(GLOB_RECURSE Aurora_Source "${Aurora_DIR}/src/*.cpp" )
-endif()
+file(GLOB_RECURSE Aurora_Source "${Aurora_DIR}/src/[AFSCM]*.cpp" "${Aurora_DIR}/src/Matrix*.cpp" "${Aurora_DIR}/src/*.cu")
 message( ${Aurora_Source})
 set(Aurora_INCLUDE_DIRS "${Aurora_DIR}/src" "${Aurora_DIR}/thirdparty/include" $<TARGET_PROPERTY:MKL::MKL,INTERFACE_INCLUDE_DIRECTORIES>)

 set(Aurora_Complie_Options $<TARGET_PROPERTY:MKL::MKL,INTERFACE_COMPILE_OPTIONS> )
-
+set(Aurora_Libraries $<LINK_ONLY:MKL::MKL> OpenMP::OpenMP_CXX ${CUDA_cublas_LIBRARY} ${CUDA_cusolver_LIBRARY})
 set(Aurora_FOUND TRUE)
 message(Aurora Found)
--- a/src/CudaMatrix.cpp
+++ b/src/CudaMatrix.cpp
@@ -1,8 +1,9 @@
 #include "AuroraDefs.h"
-#include "Function1D.cuh"
 #include <complex>
 #include <utility>
+
 #ifdef USE_CUDA
+#include "Function1D.cuh"
 #include "CudaMatrix.h"

 #include "Function.h"
--- a/src/CudaMatrixPrivate.cu
+++ b/src/CudaMatrixPrivate.cu
@@ -5,129 +5,11 @@
 #include <thrust/functional.h>
 #include <thrust/execution_policy.h>

+
 #include "AuroraDefs.h"
 #include "AuroraThrustIterator.cuh"
 using namespace thrust::placeholders;

-struct PowOp: public thrust::unary_function<float, float>{
-    float exponent;
-    PowOp(float v):exponent(v) {}
-    void setExponent(float v){
-        exponent = v;
-    }
-    
-    __host__ __device__
-    float operator()(const float& x)  {
-        return powf(x, exponent);
-    }
-};
-
-struct CompareGOp: public thrust::unary_function<float, float>{
-    float exponent;
-    CompareGOp(float v):exponent(v) {}
-    void setExponent(float v){
-        exponent = v;
-    }
-    
-    __host__ __device__
-    float operator()(const float& x)  {
-        return (exponent<x?1.0:.0);
-    }
-};
-
-struct CompareGEOp: public thrust::unary_function<float, float>{
-    float exponent;
-    CompareGEOp(float v):exponent(v) {}
-    void setExponent(float v){
-        exponent = v;
-    }
-    
-    __host__ __device__
-    float operator()(const float& x)  {
-        return (exponent<=x?1.0:.0);
-    }
-};
-
-struct CompareEOp: public thrust::unary_function<float, float>{
-    float exponent;
-    CompareEOp(float v):exponent(v) {}
-    void setExponent(float v){
-        exponent = v;
-    }
-    
-    __host__ __device__
-    float operator()(const float& x)  {
-        return (exponent==x?1.0:.0);
-    }
-};
-
-struct CompareNEOp: public thrust::unary_function<float, float>{
-    float exponent;
-    CompareNEOp(float v):exponent(v) {}
-    void setExponent(float v){
-        exponent = v;
-    }
-    
-    __host__ __device__
-    float operator()(const float& x)  {
-        return (exponent!=x?1.0:.0);
-    }
-};
-
-struct CompareLOp: public thrust::unary_function<float, float>{
-    float exponent;
-    CompareLOp(float v):exponent(v) {}
-    void setExponent(float v){
-        exponent = v;
-    }
-    
-    __host__ __device__
-    float operator()(const float& x)  {
-        return (exponent>x?1.0:.0);
-    }
-};
-
-struct CompareLEOp: public thrust::unary_function<float, float>{
-    float exponent;
-    CompareLEOp(float v):exponent(v) {}
-    void setExponent(float v){
-        exponent = v;
-    }
-    
-    __host__ __device__
-    float operator()(const float& x)  {
-        return (exponent>=x?1.0:.0);
-    }
-};
-
-
-struct CompareAGOp{
-    __host__ __device__
-    float operator()(const float& x,const float& y)  {
-        return x>y?1:0;
-    }
-};
-
-struct CompareAGEOp{
-    __host__ __device__
-    float operator()(const float& x,const float& y)  {
-        return x>=y?1:0;
-    }
-};
-
-struct CompareAEOp{
-    __host__ __device__
-    float operator()(const float& x,const float& y)  {
-        return x==y?1:0;
-    }
-};
-
-struct CompareANEOp{
-    __host__ __device__
-    float operator()(const float& x,const float& y)  {
-        return x!=y?1:0;
-    }
-};

 typedef thrust::complex<float> complexf;

@@ -651,29 +533,51 @@ void unaryPow(float* in1, float N,float* out, unsigned long length){
        thrust::transform(thrust::device,in1,in1+length,out,op);
        return;
    }
-    thrust::transform(thrust::device,in1,in1+length,out,PowOp(N));
+    auto lambdaPow = [N] __host__ __device__(float x) {
+            return powf(x,N);
+    };
+    thrust::transform(thrust::device,in1,in1+length,out,lambdaPow);
 }

 void unaryCompare(float* in1, const float& in2, float* out, unsigned long length, int type){
+
    switch (type)
    {
        case G:
-            thrust::transform(thrust::device,in1,in1+length,out,CompareGOp(in2));
+            thrust::transform(thrust::device,in1,in1+length,out,[in2] __host__ __device__ (const float &x)
+                {
+                    return in2 < x ? 1.0 : .0;
+                });
            break;
        case GE:
-            thrust::transform(thrust::device,in1,in1+length,out,CompareGEOp(in2));
+            thrust::transform(thrust::device,in1,in1+length,out,[in2] __host__ __device__ (const float &x)
+            {
+                return in2 <= x ? 1.0 : .0;
+            });
            break;
        case E:
-            thrust::transform(thrust::device,in1,in1+length,out,CompareEOp(in2));
+            thrust::transform(thrust::device,in1,in1+length,out,[in2] __host__ __device__ (const float &x)
+            {
+                return in2 == x ? 1.0 : .0;
+            });
            break;
        case NE:
-            thrust::transform(thrust::device,in1,in1+length,out,CompareNEOp(in2));
+            thrust::transform(thrust::device,in1,in1+length,out,[in2] __host__ __device__ (const float &x)
+            {
+                return in2 != x ? 1.0 : .0;
+            });
            break;
        case LE:
-            thrust::transform(thrust::device,in1,in1+length,out,CompareLEOp(in2));
+            thrust::transform(thrust::device,in1,in1+length,out,[in2] __host__ __device__ (const float &x)
+            {
+                return in2 >= x ? 1.0 : .0;
+            });
            break;
        case L:
-            thrust::transform(thrust::device,in1,in1+length,out,CompareLOp(in2));
+            thrust::transform(thrust::device,in1,in1+length,out,[in2]__host__ __device__(const float &x)
+            {
+                return in2 > x ? 1.0 : .0;
+            });
            break;
        default:
            break;
@@ -683,51 +587,89 @@ void unaryCompare(const float& in1, float* in2, float* out, unsigned long length
    switch (type)
    {
        case G:
-            thrust::transform(thrust::device,in2,in2+length,out,CompareLOp(in1));
+            thrust::transform(thrust::device,in2,in2+length,out,[in1] __host__ __device__ (const float &x)
+            {
+                return in1 > x ? 1.0 : .0;
+            });
            break;
        case GE:
-            thrust::transform(thrust::device,in2,in2+length,out,CompareLEOp(in1));
+            thrust::transform(thrust::device,in2,in2+length,out,[in1] __host__ __device__ (const float &x)
+            {
+                return in1 >= x ? 1.0 : .0;
+            });
            break;
        case E:
-            thrust::transform(thrust::device,in2,in2+length,out,CompareEOp(in1));
+            thrust::transform(thrust::device,in2,in2+length,out,[in1] __host__ __device__ (const float &x)
+            {
+                return in1 == x ? 1.0 : .0;
+            });
            break;
        case NE:
-            thrust::transform(thrust::device,in2,in2+length,out,CompareNEOp(in1));
+            thrust::transform(thrust::device,in2,in2+length,out,[in1] __host__ __device__ (const float &x)
+            {
+                return in1 != x ? 1.0 : .0;
+            });
            break;
        case LE:
-            thrust::transform(thrust::device,in2,in2+length,out,CompareGEOp(in1));
+            thrust::transform(thrust::device,in2,in2+length,out, [in1] __host__ __device__ (const float &x)
+            {
+                return in1 <= x ? 1.0 : .0;
+            });
            break;
        case L:
-            thrust::transform(thrust::device,in2,in2+length,out,CompareGOp(in1));
+            thrust::transform(thrust::device,in2,in2+length,out,[in1] __host__ __device__ (const float &x)
+            {
+                return in1 < x ? 1.0 : .0;
+            });
            break;
        default:
            break;
    }
+
 }
 void unaryCompare(float* in1, float* in2, float* out, unsigned long length, int type){
    switch (type)
    {
        case G:
-            thrust::transform(thrust::device,in1,in1+length,in2,out,CompareAGOp());
+            thrust::transform(thrust::device,in1,in1+length,in2,out, []__host__ __device__(float x, float y)
+            {
+                return x > y ? 1. : .0;
+            });
            break;
        case GE:
-            thrust::transform(thrust::device,in1,in1+length,in2,out,CompareAGEOp());
+            thrust::transform(thrust::device,in1,in1+length,in2,out,[]__host__ __device__(float x, float y)
+            {
+                return x >= y ? 1. : .0;
+            });
            break;
        case E:
-            thrust::transform(thrust::device,in1,in1+length,in2,out,CompareAEOp());
+            thrust::transform(thrust::device,in1,in1+length,in2,out,[]__host__ __device__(float x, float y)
+            {
+                return x == y ? 1. : .0;
+            });
            break;
        case NE:
-            thrust::transform(thrust::device,in1,in1+length,in2,out,CompareANEOp());
+            thrust::transform(thrust::device,in1,in1+length,in2,out, []__host__ __device__(float x, float y)
+            {
+                return x != y ? 1. : .0;
+            });
            break;
        case LE:
-            thrust::transform(thrust::device,in2,in2+length,in1,out,CompareAGEOp());
+            thrust::transform(thrust::device,in1,in1+length,in2,out,[]__host__ __device__ (float x, float y)
+            {
+                return x <= y ? 1. : .0;
+            });
            break;
        case L:
-            thrust::transform(thrust::device,in2,in2+length,in1,out,CompareAGOp());
+            thrust::transform(thrust::device,in1,in1+length,in2,out, [] __host__ __device__ (float x, float y)
+            {
+                return x < y ? 1. : .0;
+            });
            break;
        default:
            break;
    }
+
 }

 void thrustFill(float* aBegin, float* aEnd, float aValue)
--- a/src/Function1D.cpp
+++ b/src/Function1D.cpp
@@ -35,55 +35,7 @@ namespace {

    uint CONVERT_ADD_VALUE = UINT32_MAX - 4095;

-    inline void convertValue(float aValue ,float* des){
-      float value = aValue;
-      ushort *exponentPtr = (ushort *)&value;
-      exponentPtr[0] = (exponentPtr[0] >> 11) & CONVERT_AND_VALUE;
-      exponentPtr[1] = (exponentPtr[1] >> 11) & CONVERT_AND_VALUE;
-      exponentPtr[2] = (exponentPtr[2] >> 11) & CONVERT_AND_VALUE;
-      exponentPtr[3] = (exponentPtr[3] >> 11) & CONVERT_AND_VALUE;
-      float signValue = aValue;
-      short *signPtr = (short *)&signValue;
-      uint sign_bit[4] = {
-          (uint)(signPtr[0] < 0 ? 1 : 0), (uint)(signPtr[1] < 0 ? 1 : 0),
-          (uint)(signPtr[2] < 0 ? 1 : 0), (uint)(signPtr[3] < 0 ? 1 : 0)};
-      float fraction3Value = aValue;
-      ushort *fraction3Ptr = (ushort *)&fraction3Value;
-      fraction3Ptr[0] &= CONVERT_AND_VALUE_2;
-      fraction3Ptr[1] &= CONVERT_AND_VALUE_2;
-      fraction3Ptr[2] &= CONVERT_AND_VALUE_2;
-      fraction3Ptr[3] &= CONVERT_AND_VALUE_2;
-      uint hidden_bit[4] = {
-          sign_bit[0] * (!exponentPtr[0] ? 1 : 0) * CONVERT_MUL_VALUE +
-              ((!sign_bit[0] && exponentPtr[0]) ? 1 : 0) * CONVERT_MUL_VALUE,
-          sign_bit[1] * (!exponentPtr[1] ? 1 : 0) * 2048 +
-              ((!sign_bit[1] && exponentPtr[1]) ? 1 : 0) * CONVERT_MUL_VALUE,
-          sign_bit[2] * (!exponentPtr[2] ? 1 : 0) * CONVERT_MUL_VALUE +
-              ((!sign_bit[2] && exponentPtr[2]) ? 1 : 0) * CONVERT_MUL_VALUE,
-          sign_bit[3] * (!exponentPtr[3] ? 1 : 0) * 2048 +
-              ((!sign_bit[3] && exponentPtr[3]) ? 1 : 0) * CONVERT_MUL_VALUE,
-      };
-      int outputPtr[4] = {0};
-      uint temp = fraction3Ptr[0] + hidden_bit[0] + sign_bit[0] * CONVERT_ADD_VALUE;
-      outputPtr[0] = exponentPtr[0] > 1 ? (temp << (exponentPtr[0] - 1))
-                                  : (temp >> std::abs(exponentPtr[0] - 1));
-      temp = fraction3Ptr[1] + hidden_bit[1] + sign_bit[1] * CONVERT_ADD_VALUE;
-      outputPtr[1] = exponentPtr[1] > 1 ? (temp << (exponentPtr[1] - 1))
-                                  : (temp >> std::abs(exponentPtr[1] - 1));
-      temp = fraction3Ptr[2] + hidden_bit[2] + sign_bit[2] * CONVERT_ADD_VALUE;
-      outputPtr[2] = exponentPtr[2] > 1 ? (temp << (exponentPtr[2] - 1))
-                                  : (temp >> std::abs(exponentPtr[2] - 1));
-      temp = fraction3Ptr[3] + hidden_bit[3] + sign_bit[3] * CONVERT_ADD_VALUE;
-      outputPtr[3] = exponentPtr[3] > 1 ? (temp << (exponentPtr[3] - 1))
-                                  : (temp >> std::abs(exponentPtr[3] - 1));
-    des[0] = outputPtr[0];
-    des[1] = outputPtr[1];
-    des[2] = outputPtr[2];
-    des[3] = outputPtr[3];
-
-    }
-
-    inline void convertValue2(short* aValue ,float* des){
+    inline void convertValue(short* aValue ,float* des){
      ushort exponentPtr[4] = {(ushort)aValue[0],(ushort)aValue[1],(ushort)aValue[2],(ushort)aValue[3]};
      exponentPtr[0] = (exponentPtr[0] >> 11) & CONVERT_AND_VALUE;
      exponentPtr[1] = (exponentPtr[1] >> 11) & CONVERT_AND_VALUE;
@@ -616,7 +568,9 @@ Matrix Aurora::acosd(const Matrix& aMatrix)
    {
        resultData[i] = resultData[i] * 180 / PI;
    }
-    return Matrix::New(resultData, aMatrix);
+    Matrix result = Matrix::New(resultData, aMatrix);
+    nantoval(result, 0);
+    return result;
 }

 Matrix Aurora::conj(const Matrix& aMatrix)
@@ -1096,14 +1050,14 @@ Matrix Aurora::convertfp16tofloat(short* aData, int aRows, int aColumns)
    #pragma omp parallel for
    for (size_t i = 0; i < quaterSize; i+=8) {
        //循环展开以避免过度的线程调用
-        if (i  < quaterSize)::convertValue2((short*)(input+i*4), output + (i) * 4);
-        if (i+1  < quaterSize)::convertValue2((short*)(input+(i+1)*4), output + (i+1) * 4);
-        if (i+2  < quaterSize)::convertValue2((short*)(input+(i+2)*4), output + (i+2) * 4);
-        if (i+3  < quaterSize)::convertValue2((short*)(input+(i+3)*4), output + (i+3) * 4);
-        if (i+4  < quaterSize)::convertValue2((short*)(input+(i+4)*4), output + (i+4) * 4);
-        if (i+5  < quaterSize)::convertValue2((short*)(input+(i+5)*4), output + (i+5) * 4);
-        if (i+6  < quaterSize)::convertValue2((short*)(input+(i+6)*4), output + (i+6) * 4);
-        if (i+7  < quaterSize)::convertValue2((short*)(input+(i+7)*4), output + (i+7) * 4);
+        if (i  < quaterSize)::convertValue((short*)(input+i*4), output + (i) * 4);
+        if (i+1  < quaterSize)::convertValue((short*)(input+(i+1)*4), output + (i+1) * 4);
+        if (i+2  < quaterSize)::convertValue((short*)(input+(i+2)*4), output + (i+2) * 4);
+        if (i+3  < quaterSize)::convertValue((short*)(input+(i+3)*4), output + (i+3) * 4);
+        if (i+4  < quaterSize)::convertValue((short*)(input+(i+4)*4), output + (i+4) * 4);
+        if (i+5  < quaterSize)::convertValue((short*)(input+(i+5)*4), output + (i+5) * 4);
+        if (i+6  < quaterSize)::convertValue((short*)(input+(i+6)*4), output + (i+6) * 4);
+        if (i+7  < quaterSize)::convertValue((short*)(input+(i+7)*4), output + (i+7) * 4);
    }
    return Matrix::New(output,aRows,aColumns,1);
 }
--- a/src/Function1D.cu
+++ b/src/Function1D.cu
--- a/src/Function1D.cuh
+++ b/src/Function1D.cuh
@@ -63,6 +63,8 @@ namespace Aurora

    CudaMatrix conj(const CudaMatrix& aMatrix);

+    CudaMatrix conj(CudaMatrix&& aMatrix);
+
    float norm(const CudaMatrix& aMatrix, NormMethod aNormMethod);

    CudaMatrix transpose(const CudaMatrix& aMatrix);
@@ -124,5 +126,4 @@ namespace Aurora
    void compareSet(CudaMatrix& aDesAndCompareMatrix,CudaMatrix& aOtherCompareMatrix, float newValue,CompareOp op);
    void compareSet(CudaMatrix& aCompareMatrix,float compareValue, CudaMatrix& aNewValueMatrix,CompareOp op);
 }
-
 #endif //AURORA_CUDA_FUNCTION1D_H
--- a/src/Function2D.cpp
+++ b/src/Function2D.cpp
@@ -1034,3 +1034,87 @@ Matrix Aurora::sub2ind(const Matrix &aVMatrixSize, std::initializer_list<Matrix>
    delete [] strides;
    return Matrix::New(output,returnVectorSize,1,1);
 }
+
+void Aurora::findPeaksHost(const Matrix & aData,  int aNPeaks, float aMinPeakHeight, float MinPeakProminece,
+        int* outIndex)
+{
+    int  signalSize = aData.getDimSize(0);
+    int  signalCount = aData.getDimSize(1);
+
+    #pragma omp parallel for
+    for (size_t threadIndex = 0; threadIndex < signalCount; threadIndex++)
+    {
+        float* dataPointer  = aData.getData() + threadIndex*signalSize ;
+        float newPeak = dataPointer[0];
+        float newValley = dataPointer[0];
+        int peakIndex = 0;
+        float higherValley;
+        int indexs[32];
+        float values[32];
+        for (size_t i = 0; i < aNPeaks; i++)
+        {
+            indexs[i] = signalSize;
+            values[i] = 0;
+        }
+        int save_index=0;
+
+        for (int i = 1; i < signalSize - 1; ++i)
+        {
+            // find peaks
+            if (dataPointer[i] > dataPointer[i - 1] && dataPointer[i] > dataPointer[i + 1])
+            {
+                newPeak = dataPointer[i];
+                peakIndex  = i;
+            };
+            // find valley
+            if (dataPointer[i] < dataPointer[i - 1] && dataPointer[i] < dataPointer[i + 1])
+            {
+                higherValley = std::max(newValley, dataPointer[i]);
+                newValley = dataPointer[i];
+                if (newPeak >= aMinPeakHeight)
+                {
+                    float prominece = newPeak - higherValley;
+                    if (prominece >= MinPeakProminece)
+                    {
+                        if (save_index < aNPeaks)
+                        {
+                            values[save_index] = newPeak;
+                            indexs[save_index] = peakIndex;
+                            save_index++;
+                        }
+                        else
+                        {
+                            for (size_t j = 0; j < aNPeaks; j++)
+                            {
+                                if (values[j] < newPeak)
+                                {
+                                    std::swap(values[j], newPeak);
+                                    std::swap(indexs[j], peakIndex);
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+        if (save_index>=aNPeaks)
+        {
+            for (size_t i = 0; i < aNPeaks; i++)
+            {
+                for (size_t j = i+1; j < aNPeaks; j++)
+                {
+                    if (indexs[i]>indexs[j])
+                    {
+                        std::swap(values[j], values[i]);
+                        std::swap(indexs[j], indexs[i]);
+                    }
+                }
+            }
+        }
+        for (size_t i = 0; i < aNPeaks; i++)
+        {
+            // aOutPeaks[threadIndex*aNPeaks+i] = values[i];
+            outIndex[threadIndex*aNPeaks+i] = indexs[i];
+        }
+    }
+}
--- a/src/Function2D.cu
+++ b/src/Function2D.cu
--- a/src/Function2D.cuh
+++ b/src/Function2D.cuh
@@ -64,6 +64,7 @@ namespace Aurora

    CudaMatrix fft(const CudaMatrix &aMatrix, long aFFTSize = -1);
    CudaMatrix ifft(const CudaMatrix &aMatrix, long aFFTSize = -1);
+    CudaMatrix ifft(CudaMatrix && aMatrix);

    CudaMatrix hilbert(const CudaMatrix &aMatrix);

@@ -86,6 +87,19 @@ namespace Aurora
     */
    CudaMatrix ifft_symmetric(const CudaMatrix &aMatrix,long aLength);

+    CudaMatrix valid(const CudaMatrix& aData, const CudaMatrix aValid);
+
+    /**
+     * findPeaks 按列进行峰查找和匹配
+     * @attention 不要给aOutIndexs提前分配内存
+     * @param aData 输入
+     * @param aNpeaks 峰数量
+     * @param aMinPeakHeight 最小高度
+     * @param aMinPeakProminence 最小相对高度
+     * @param aOutIndexs 空指针，会在函数内分配device内存
+     * @return 筛选出的峰高度
+    */
+    CudaMatrix findPeaks(const CudaMatrix& aData, int aNpeaks, float aMinPeakHeight, float aMinPeakProminence, int** aOutIndexs);
 }

 #endif // __FUNCTION2D_CUDA_H__
--- a/src/Function2D.h
+++ b/src/Function2D.h
@@ -107,10 +107,10 @@ namespace Aurora
     * 当第一列包含重复的元素时，sortrows 会根据下一列中的值进行排序，并对后续的相等值重复此行为。
     * @attention 目前不支持三维，不支持复数
     * @param aMatrix 目标矩阵
-     * @param indexMatrix 排序后各行的原索引矩阵指针，非必须
+     * @param indexMatrix 排序后各行的原索引矩阵指针，必须要有
     * @return 排序后矩阵
     */
-    Matrix sortrows(const Matrix &aMatrix, Matrix* indexMatrix=nullptr);
+    Matrix sortrows(const Matrix &aMatrix, Matrix* indexMatrix);

    /**
     * 对矩阵求中间值 按列, 目前不支持三维，不支持复数
@@ -178,7 +178,7 @@ namespace Aurora
     * @return
     */
    Matrix sub2ind(const Matrix &aVMatrixSize, std::initializer_list<Matrix> aSliceIdxs);
-    
+    void findPeaksHost(const Matrix & aData,  int aNPeaks, float aMinPeakHeight, float MinPeakProminece,int* outIndex);
 };

 #endif // AURORA_FUNCTION2D_H
--- a/src/Function3D.cpp
+++ b/src/Function3D.cpp
@@ -5,12 +5,10 @@
 #include "Function.h"

 #ifdef USE_CUDA
-#include <cuda_runtime.h>
-#include "CudaMatrixPrivate.cuh"
 #include "CudaMatrix.h"
-#endif
-
-
+#include "CudaMatrixPrivate.cuh"
+#include <cuda_runtime.h>
+#endif // USE_CUDA

 //必须在Eigen之前
 #include "AuroraDefs.h"
@@ -74,7 +72,6 @@ Matrix Aurora::ones(int aRow, int aColumn, int aSlice) {
    return Matrix::New(data,rowSize,colSize,aSlice);
 }

-
 Matrix Aurora::ones(int aSquareRow) {
    return Aurora::ones(aSquareRow, aSquareRow);
 }
@@ -95,7 +92,6 @@ Matrix Aurora::zeros(int aRow, int aColumn, int aSlice) {
    return Matrix::New(data,rowSize,colSize,sliceSize);
 }

-
 Matrix Aurora::zeros(int aSquareRow) {
    return Aurora::zeros(aSquareRow, aSquareRow);
 }
@@ -218,7 +214,7 @@ Matrix Aurora::meshgridInterp3(const Matrix& aX, const Matrix& aY, const Matrix&
    return result;
 }

-#ifdef USE_CUDA
+#if USE_CUDA
 CudaMatrix Aurora::onesCuda(int aRow, int aColumn, int aSlice){
    if (aRow == 0 || aColumn == 0)
    {
@@ -255,6 +251,7 @@ CudaMatrix Aurora::zerosCuda(int aRow, int aColumn, int aSlice) {
    return CudaMatrix::fromRawData(data,rowSize,colSize,sliceSize);
 }

+
 CudaMatrix Aurora::zerosCuda(int aSquareRow) {
    return Aurora::zerosCuda(aSquareRow, aSquareRow);
 }
--- a/src/Function3D.cu
+++ b/src/Function3D.cu
@@ -0,0 +1,82 @@
+#include "Function3D.cuh"
+
+using namespace Aurora;
+
+__global__ void interp3Kernel(cudaTextureObject_t aTexObj, float* aOutputData, float aStartX, float aDx, float aEndX, float aStartY, float aDy
+                              , float aEndY, float aStartZ, float aDz, float aEndZ, float* aNewX, float* aNewY, float* aNewZ 
+                              , int aOutputRowSize, int aOutputColumnSize, int aOutputSliceSize, float aOutValue)
+{
+    int xIndex = blockIdx.x * blockDim.x + threadIdx.x;
+    int yIndex = blockIdx.y * blockDim.y + threadIdx.y;
+    int zIndex = blockIdx.z * blockDim.z + threadIdx.z;
+   
+    if(xIndex > aOutputRowSize - 1 ||  yIndex > aOutputColumnSize - 1 || zIndex > aOutputSliceSize - 1)
+    {
+        return;
+    }
+    size_t index = zIndex * aOutputRowSize * aOutputColumnSize + yIndex * aOutputRowSize + xIndex;
+    float x = aNewX[index];
+    float y = aNewY[index];
+    float z = aNewZ[index];
+    if(x > aEndX || x < aStartX || y > aEndY || y < aStartY || z > aEndZ || z < aStartZ)
+    {
+        aOutputData[index] = aOutValue;
+    }
+    else
+    {
+        aOutputData[index] = tex3D<float>(aTexObj, (x - aStartX) / aDx + 0.5, (y - aStartY) / aDy + 0.5, (z - aStartZ) / aDz + 0.5);
+    }
+}
+
+CudaMatrix Aurora::interp3(float aStartX, float aDx, float aEndX, float aStartY, float aDy, float aEndY,
+                           float aStartZ, float aDz, float aEndZ, const CudaMatrix& aValue,
+                           const CudaMatrix& aNewX, const CudaMatrix& aNewY, const CudaMatrix& aNewZ, float aOutValue)
+{
+    cudaTextureObject_t texObj;
+    size_t dimX = aValue.getDimSize(1);
+    size_t dimY = aValue.getDimSize(0);
+    size_t dimZ = aValue.getDimSize(2);
+    cudaExtent extent = make_cudaExtent(dimX, dimY, dimZ);
+
+    cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<float>();
+
+    cudaArray* cuArray;
+    cudaMalloc3DArray(&cuArray, &channelDesc, extent);
+
+
+    cudaMemcpy3DParms copyParams = {0};
+    copyParams.srcPtr = make_cudaPitchedPtr(aValue.getData(), dimX * sizeof(float), dimX, dimY);
+    copyParams.dstArray = cuArray;
+    copyParams.extent = extent;
+    copyParams.kind = cudaMemcpyDeviceToDevice;
+    cudaMemcpy3D(&copyParams);
+
+    cudaResourceDesc resDesc;
+    memset(&resDesc, 0, sizeof(resDesc));
+    resDesc.resType = cudaResourceTypeArray;
+    resDesc.res.array.array = cuArray;
+
+    cudaTextureDesc texDesc;
+    memset(&texDesc, 0, sizeof(texDesc));
+    texDesc.filterMode = cudaFilterModeLinear;
+    texDesc.addressMode[0] = cudaAddressModeClamp;
+    texDesc.addressMode[1] = cudaAddressModeClamp;
+    texDesc.addressMode[2] = cudaAddressModeClamp;
+    texDesc.readMode = cudaReadModeElementType;
+    cudaCreateTextureObject(&texObj, &resDesc, &texDesc, NULL);
+
+    int row = aNewX.getDimSize(0);
+    int column = aNewX.getDimSize(1);
+    int slice = aNewX.getDimSize(2);
+    dim3 blockDim(4,4,4);
+    dim3 gridDim(row / 4 + 1, column / 4 + 1, slice / 4 + 1);
+    float *data = nullptr;
+    cudaMalloc((void **)&data, sizeof(float) * row * column * slice);
+    CudaMatrix result = Aurora::CudaMatrix::fromRawData(data, row, column, slice);
+    interp3Kernel<<<gridDim, blockDim>>>(texObj, data, aStartX, aDx, aEndX, aStartY, aDy, aEndY, aStartZ, aDz, aEndZ, 
+                                         aNewX.getData(), aNewY.getData(), aNewZ.getData(), row, column, slice, aOutValue);
+    
+    
+    cudaDeviceSynchronize();
+    return result;
+}
--- a/src/Function3D.cuh
+++ b/src/Function3D.cuh
@@ -0,0 +1,13 @@
+#ifndef __FUNCTION3D_CUDA__
+#define __FUNCTION3D_CUDA__
+#include "CudaMatrix.h"
+#include "AuroraDefs.h"
+
+namespace Aurora
+{
+    CudaMatrix interp3(float aStartX, float aDx, float aEndX, float aStartY, float aDy, float aEndY,
+                       float aStartZ, float aDz, float aEndZ, const CudaMatrix& aValue,
+                       const CudaMatrix& aNewX, const CudaMatrix& aNewY, const CudaMatrix& aNewZ, float aOutValue);
+}
+
+#endif // __FUNCTION3D_CUDA_H__
--- a/src/Function3D.h
+++ b/src/Function3D.h
@@ -4,8 +4,10 @@

 #include "Matrix.h"
 #include "Function1D.h"
-#include "CudaMatrix.h"

+#if USE_CUDA
+#include "CudaMatrix.h"
+#endif
 namespace Aurora {

    /**
@@ -17,8 +19,6 @@ namespace Aurora {
     */
    Matrix ones(int aRow, int aColumn, int aSlice = 0);

-    CudaMatrix onesCuda(int aRow, int aColumn, int aSlice = 0);
-
    /**
     * 创建全部为1的方阵
     * @param aSquareRow
@@ -26,8 +26,6 @@ namespace Aurora {
     */
    Matrix ones(int aSquareRow);

-    CudaMatrix onesCuda(int aSquareRow);
-
    /**
     * 创建全部为0的数组，矩阵
     * @param aRow 行数，必须大于0
@@ -37,25 +35,30 @@ namespace Aurora {
     */
    Matrix zeros(int aRow, int aColumn, int aSlice = 0);

-    CudaMatrix zerosCuda(int aRow, int aColumn, int aSlice = 0);
-
    /**
     * 创建全部为0的方阵
     * @param aSquareRow
     * @return 全部为0的方阵
     */
    Matrix zeros(int aSquareRow);
-    CudaMatrix zerosCuda(int aSquareRow);
    Matrix interp3(const Matrix& aX, const Matrix& aY, const Matrix& aZ, const Matrix& aV, const Matrix& aX1, const Matrix& aY1, const Matrix& aZ1,InterpnMethod aMethod);
    Matrix meshgridInterp3(const Matrix& aX, const Matrix& aY, const Matrix& aZ, const Matrix& aV, const Matrix& aX1, const Matrix& aY1, const Matrix& aZ1,InterpnMethod aMethod, float aExtrapval);
    Matrix interpn(const Matrix& aX, const Matrix& aY, const Matrix& aZ, const Matrix& aV, const Matrix& aX1, const Matrix& aY1, const Matrix& aZ1,InterpnMethod aMethod);

    Matrix size(const Matrix &aMatrix);
-    CudaMatrix size(const CudaMatrix &aMatrix);

    int size(const Matrix &aMatrix,int dims);
-    int size(const CudaMatrix &aMatrix,int dims);

+
+    #if USE_CUDA
+    CudaMatrix onesCuda(int aRow, int aColumn, int aSlice = 0);
+    CudaMatrix onesCuda(int aSquareRow);
+
+    CudaMatrix zerosCuda(int aRow, int aColumn, int aSlice = 0);
+    CudaMatrix zerosCuda(int aSquareRow);
+    CudaMatrix size(const CudaMatrix &aMatrix);
+    int size(const CudaMatrix &aMatrix,int dims);
+    #endif
 };


--- a/src/Matrix.cpp
+++ b/src/Matrix.cpp
@@ -1,6 +1,4 @@
 #include "Matrix.h"
-
-
 #include <cmath>
 #include <complex>
 #include <cstddef>
--- a/src/Matrix.h
+++ b/src/Matrix.h
@@ -10,7 +10,10 @@
 namespace Aurora {

    const int $ = -1;
+
+    #if USE_CUDA
    class CudaMatrix;
+    #endif
    
    class Matrix {
    public:
@@ -285,9 +288,10 @@ namespace Aurora {
        }

        void forceReshape(int rows, int columns, int slices);
-
+        
+        #if USE_CUDA
        CudaMatrix toDeviceMatrix() const;
-
+        #endif

    private:
        ValueType mValueType = Normal;
--- a/src/main.cxx
+++ b/src/main.cxx
@@ -7,15 +7,19 @@
 #include <complex>

 #include "Matrix.h"
-#include "CudaMatrix.h"
 #include "Function.h"
 #include "Function1D.h"
 #include "Function2D.h"
 #include "Function3D.h"
 #include "MatlabReader.h"

+#if USE_CUDA
+#include "CudaMatrix.h"
+#endif //USE_CUDA
+
 int main()
 {
+    #if USE_CUDA
    auto A = Aurora::zeros(1000,1,1);
    auto B = Aurora::zeros(1000,1,1);
    for (size_t i = 0; i < 1000; i++)
@@ -115,5 +119,6 @@ int main()
            }
        }
    }
+    #endif //USE_CUDA
    return 0;
 }
--- a/test/CudaMatrix_Test.cpp
+++ b/test/CudaMatrix_Test.cpp
@@ -2558,7 +2558,55 @@ TEST_F(CudaMatrix_Test, MatrixCompare){
    }
    {
        auto R= (9!=B);
-        auto dhR = (9!=dB).toHostMatrix();
+        auto dhR = (dB!=9).toHostMatrix();
+        for (size_t i = 0; i < 1000; i++)
+        {
+            EXPECT_FLOAT_EQ(R[i],dhR[i]);
+        }
+    }
+        {
+        auto R= (9<B);
+        auto dhR = (dB>9).toHostMatrix();
+        for (size_t i = 0; i < 1000; i++)
+        {
+            EXPECT_FLOAT_EQ(R[i],dhR[i]);
+        }
+    }
+    {
+        auto R= (9>B);
+        auto dhR = (dB<9).toHostMatrix();
+        for (size_t i = 0; i < 1000; i++)
+        {
+            EXPECT_FLOAT_EQ(R[i],dhR[i]);
+        }
+    }
+    {
+        auto R= (9<=B);
+        auto dhR = (dB>=9).toHostMatrix();
+        for (size_t i = 0; i < 1000; i++)
+        {
+            EXPECT_FLOAT_EQ(R[i],dhR[i]);
+        }
+    }
+    {
+        auto R= (9>=B);
+        auto dhR = (dB<=9).toHostMatrix();
+        for (size_t i = 0; i < 1000; i++)
+        {
+            EXPECT_FLOAT_EQ(R[i],dhR[i]);
+        }
+    }
+    {
+        auto R= (9==B);
+        auto dhR = (dB == 9).toHostMatrix();
+        for (size_t i = 0; i < 1000; i++)
+        {
+            EXPECT_FLOAT_EQ(R[i],dhR[i]);
+        }
+    }
+    {
+        auto R= (9!=B);
+        auto dhR = (dB!=9).toHostMatrix();
        for (size_t i = 0; i < 1000; i++)
        {
            EXPECT_FLOAT_EQ(R[i],dhR[i]);
--- a/test/Function2D_Cuda_Test.cpp
+++ b/test/Function2D_Cuda_Test.cpp
@@ -5,6 +5,7 @@
 #include "Function.h"
 #include "Matrix.h"
 #include "TestUtility.h"
+#include "MatlabReader.h"

 #include "Function2D.h"
 #include "Function2D.cuh"
@@ -18,11 +19,15 @@ protected:
    static void TearDownTestCase(){
    }
    public:
+    Aurora::Matrix mSignal;
+    Aurora::CudaMatrix dmSignal;
    Aurora::Matrix B;
    Aurora::CudaMatrix dB;

    void SetUp(){
-
+        MatlabReader m("/home/krad/TestData/peaks.mat");
+        mSignal = m.read("AScan_env_norm");
+        dmSignal = mSignal.toDeviceMatrix();
    }
    void TearDown(){
    }
@@ -997,3 +1002,17 @@ TEST_F(Function2D_Cuda_Test, hilbert) {
        EXPECT_NEAR(ret1[i], ret2.getValue(i), 0.01);
    }
 }
+
+TEST_F(Function2D_Cuda_Test, findPeaks) {
+
+    int* indexs = new int[mSignal.getDimSize(1)*10];
+    auto ret1 = Aurora::findPeaks(dmSignal,10, 0.2, 0.05,indexs);
+
+    auto reH = ret1.toHostMatrix();
+    for(unsigned int i=0; i<10; ++i)
+    {
+        printf("%d,",indexs[i]);
+    }
+    delete [] indexs;
+}
+
--- a/test/Function2D_Test.cpp
+++ b/test/Function2D_Test.cpp
@@ -2,6 +2,8 @@
 #include <vector>
 #include "TestUtility.h"

+#include "MatlabReader.h"
+
 #include "Matrix.h"
 #include "Function.h"
 #include "Function1D.h"
@@ -16,7 +18,11 @@ protected:
    }
    static void TearDownTestCase(){
    }
+    public:
+    Aurora::Matrix mSignal;
    void SetUp(){
+        MatlabReader m("/home/krad/TestData/peaks.mat");
+        mSignal = m.read("AScan_env_norm");
    }
    void TearDown(){
    }
@@ -573,3 +579,15 @@ TEST_F(Function2D_Test, sub2ind) {

 }

+TEST_F(Function2D_Test, findPeaks) {
+
+    int* indexs = new int[mSignal.getDimSize(1)*10];
+    Aurora::findPeaksHost(mSignal,10, 0.2, 0.05,indexs);
+
+    for(unsigned int i=0; i<10; ++i)
+    {
+        printf("%d,",indexs[i]);
+    }
+    delete [] indexs;
+}
+
Author	SHA1	Message	Date
kradchen	6b16f6e01a	feat: add findPeaks Function	2025-10-23 15:49:32 +08:00
sunwen	52da7bcd35	Fix：While valid equals zero problem.	2025-10-17 09:53:03 +08:00
sunwen	0bbb02839a	feat: change description by sortrows.	2025-09-16 13:24:48 +08:00
sunwen	e36ca5c82f	feat: Add interp3 in Function3D.	2025-06-27 13:21:18 +08:00
kradchen	9dd7d97237	feat: memory Improve for ifft & conj	2025-03-26 13:02:43 +08:00
kradchen	3ea6c84087	feat: replace CudaMatrix compare function ( deprecated in new cuda version ) with lambda	2025-03-18 16:00:10 +08:00
sunwen	4ba0d23d54	fix: To modify the acos function to return 0 when the result is NaN.	2025-03-11 15:51:05 +08:00
sunwen	04e0c4b38d	faet: Add valid cuda function.	2024-12-24 10:44:06 +08:00
kradchen	5407c3ccb6	feat: make cuda version build by USE_CUDA args	2024-12-18 17:55:14 +08:00
kradchen	c5a64dccc0	feat: make thust support new version more simple	2024-12-18 17:53:22 +08:00
kradchen	4e155617af	feat: Clean redudent Package from config.cmake	2024-12-18 13:26:33 +08:00
kradchen	ea188e5ad4	feat: Add new CUDA find logic to CMAKE file.	2024-12-18 13:16:53 +08:00
kradchen	f3ec70661c	feat: Add some new cuda support to Aurora	2024-12-18 11:40:11 +08:00
kradchen	81f9a97e85	feat: Add matwriter reference code to package cmake	2024-12-18 11:12:05 +08:00