Compare commits
1 Commits
dev
...
CPUPerform
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
087b02af88 |
@@ -8,10 +8,9 @@ set(Aurora_USE_CUDA ON)
|
|||||||
|
|
||||||
if (Aurora_USE_CUDA)
|
if (Aurora_USE_CUDA)
|
||||||
set(CMAKE_CUDA_COMPILER /usr/local/cuda/bin/nvcc)
|
set(CMAKE_CUDA_COMPILER /usr/local/cuda/bin/nvcc)
|
||||||
set(CUDA_TOOLKIT_ROOT /usr/local/cuda)
|
|
||||||
|
|
||||||
enable_language(CUDA)
|
enable_language(CUDA)
|
||||||
find_package(CUDA)
|
|
||||||
find_package(CUDAToolkit REQUIRED)
|
find_package(CUDAToolkit REQUIRED)
|
||||||
add_definitions(-DUSE_CUDA)
|
add_definitions(-DUSE_CUDA)
|
||||||
endif(Aurora_USE_CUDA)
|
endif(Aurora_USE_CUDA)
|
||||||
@@ -44,12 +43,12 @@ target_link_libraries(Aurora PUBLIC $<LINK_ONLY:MKL::MKL>)
|
|||||||
target_link_libraries(Aurora PUBLIC OpenMP::OpenMP_CXX)
|
target_link_libraries(Aurora PUBLIC OpenMP::OpenMP_CXX)
|
||||||
target_link_libraries(Aurora PUBLIC matio)
|
target_link_libraries(Aurora PUBLIC matio)
|
||||||
if (Aurora_USE_CUDA)
|
if (Aurora_USE_CUDA)
|
||||||
target_include_directories(Aurora PRIVATE ./src ${CUDA_INCLUDE_DIRS})
|
target_include_directories(Aurora PRIVATE ./src /usr/local/cuda/include)
|
||||||
set_target_properties(Aurora PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
|
set_target_properties(Aurora PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
|
||||||
target_compile_options(Aurora PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:
|
target_compile_options(Aurora PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:
|
||||||
-arch=sm_75 --expt-extended-lambda
|
-arch=sm_75 --expt-extended-lambda
|
||||||
>)
|
>)
|
||||||
target_link_libraries(Aurora PRIVATE ${CUDA_RUNTIME_LIBRARY} ${CUDA_cufft_LIBRARY})
|
target_link_libraries(Aurora PRIVATE ${CUDA_RUNTIME_LIBRARY} CUDA::cufft CUDA::cudart)
|
||||||
target_link_libraries(Aurora PRIVATE ${CUDA_cublas_LIBRARY})
|
target_link_libraries(Aurora PRIVATE ${CUDA_cublas_LIBRARY})
|
||||||
target_link_libraries(Aurora PRIVATE ${CUDA_cusolver_LIBRARY})
|
target_link_libraries(Aurora PRIVATE ${CUDA_cusolver_LIBRARY})
|
||||||
endif(Aurora_USE_CUDA)
|
endif(Aurora_USE_CUDA)
|
||||||
|
|||||||
@@ -1,6 +1,10 @@
|
|||||||
set(MKL_INTERFACE_FULL intel_lp64)
|
set(MKL_INTERFACE_FULL intel_lp64)
|
||||||
find_package(OpenMP REQUIRED)
|
find_package(OpenMP REQUIRED)
|
||||||
find_package(MKL CONFIG REQUIRED)
|
find_package(MKL CONFIG REQUIRED)
|
||||||
|
if(${USE_CUDA})
|
||||||
|
enable_language(CUDA)
|
||||||
|
find_package(CUDAToolkit REQUIRED)
|
||||||
|
endif()
|
||||||
|
|
||||||
set(Aurora_MAJOR_VERSION 1)
|
set(Aurora_MAJOR_VERSION 1)
|
||||||
set(Aurora_MINOR_VERSION 0)
|
set(Aurora_MINOR_VERSION 0)
|
||||||
@@ -9,11 +13,17 @@ set(Aurora_BUILD_VERSION 0)
|
|||||||
get_filename_component(Aurora_DIR "${CMAKE_CURRENT_LIST_DIR}/" PATH)
|
get_filename_component(Aurora_DIR "${CMAKE_CURRENT_LIST_DIR}/" PATH)
|
||||||
|
|
||||||
message("Aurora_DIR: ${Aurora_DIR}")
|
message("Aurora_DIR: ${Aurora_DIR}")
|
||||||
file(GLOB_RECURSE Aurora_Source "${Aurora_DIR}/src/[AFSCM]*.cpp" "${Aurora_DIR}/src/Matrix*.cpp" "${Aurora_DIR}/src/*.cu")
|
if(${USE_CUDA})
|
||||||
|
file(GLOB_RECURSE Aurora_Source "${Aurora_DIR}/src/*.cpp" "${Aurora_DIR}/src/*.cu")
|
||||||
|
set(Aurora_Libraries $<LINK_ONLY:MKL::MKL> OpenMP::OpenMP_CXX ${CUDA_cublas_LIBRARY} ${CUDA_cusolver_LIBRARY})
|
||||||
|
else()
|
||||||
|
set(Aurora_Libraries $<LINK_ONLY:MKL::MKL> OpenMP::OpenMP_CXX )
|
||||||
|
file(GLOB_RECURSE Aurora_Source "${Aurora_DIR}/src/*.cpp" )
|
||||||
|
endif()
|
||||||
message( ${Aurora_Source})
|
message( ${Aurora_Source})
|
||||||
set(Aurora_INCLUDE_DIRS "${Aurora_DIR}/src" "${Aurora_DIR}/thirdparty/include" $<TARGET_PROPERTY:MKL::MKL,INTERFACE_INCLUDE_DIRECTORIES>)
|
set(Aurora_INCLUDE_DIRS "${Aurora_DIR}/src" "${Aurora_DIR}/thirdparty/include" $<TARGET_PROPERTY:MKL::MKL,INTERFACE_INCLUDE_DIRECTORIES>)
|
||||||
|
|
||||||
set(Aurora_Complie_Options $<TARGET_PROPERTY:MKL::MKL,INTERFACE_COMPILE_OPTIONS> )
|
set(Aurora_Complie_Options $<TARGET_PROPERTY:MKL::MKL,INTERFACE_COMPILE_OPTIONS> )
|
||||||
set(Aurora_Libraries $<LINK_ONLY:MKL::MKL> OpenMP::OpenMP_CXX ${CUDA_cublas_LIBRARY} ${CUDA_cusolver_LIBRARY})
|
|
||||||
set(Aurora_FOUND TRUE)
|
set(Aurora_FOUND TRUE)
|
||||||
message(Aurora Found)
|
message(Aurora Found)
|
||||||
@@ -1,9 +1,8 @@
|
|||||||
#include "AuroraDefs.h"
|
#include "AuroraDefs.h"
|
||||||
|
#include "Function1D.cuh"
|
||||||
#include <complex>
|
#include <complex>
|
||||||
#include <utility>
|
#include <utility>
|
||||||
|
|
||||||
#ifdef USE_CUDA
|
#ifdef USE_CUDA
|
||||||
#include "Function1D.cuh"
|
|
||||||
#include "CudaMatrix.h"
|
#include "CudaMatrix.h"
|
||||||
|
|
||||||
#include "Function.h"
|
#include "Function.h"
|
||||||
|
|||||||
@@ -5,11 +5,129 @@
|
|||||||
#include <thrust/functional.h>
|
#include <thrust/functional.h>
|
||||||
#include <thrust/execution_policy.h>
|
#include <thrust/execution_policy.h>
|
||||||
|
|
||||||
|
|
||||||
#include "AuroraDefs.h"
|
#include "AuroraDefs.h"
|
||||||
#include "AuroraThrustIterator.cuh"
|
#include "AuroraThrustIterator.cuh"
|
||||||
using namespace thrust::placeholders;
|
using namespace thrust::placeholders;
|
||||||
|
|
||||||
|
struct PowOp: public thrust::unary_function<float, float>{
|
||||||
|
float exponent;
|
||||||
|
PowOp(float v):exponent(v) {}
|
||||||
|
void setExponent(float v){
|
||||||
|
exponent = v;
|
||||||
|
}
|
||||||
|
|
||||||
|
__host__ __device__
|
||||||
|
float operator()(const float& x) {
|
||||||
|
return powf(x, exponent);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct CompareGOp: public thrust::unary_function<float, float>{
|
||||||
|
float exponent;
|
||||||
|
CompareGOp(float v):exponent(v) {}
|
||||||
|
void setExponent(float v){
|
||||||
|
exponent = v;
|
||||||
|
}
|
||||||
|
|
||||||
|
__host__ __device__
|
||||||
|
float operator()(const float& x) {
|
||||||
|
return (exponent<x?1.0:.0);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct CompareGEOp: public thrust::unary_function<float, float>{
|
||||||
|
float exponent;
|
||||||
|
CompareGEOp(float v):exponent(v) {}
|
||||||
|
void setExponent(float v){
|
||||||
|
exponent = v;
|
||||||
|
}
|
||||||
|
|
||||||
|
__host__ __device__
|
||||||
|
float operator()(const float& x) {
|
||||||
|
return (exponent<=x?1.0:.0);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct CompareEOp: public thrust::unary_function<float, float>{
|
||||||
|
float exponent;
|
||||||
|
CompareEOp(float v):exponent(v) {}
|
||||||
|
void setExponent(float v){
|
||||||
|
exponent = v;
|
||||||
|
}
|
||||||
|
|
||||||
|
__host__ __device__
|
||||||
|
float operator()(const float& x) {
|
||||||
|
return (exponent==x?1.0:.0);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct CompareNEOp: public thrust::unary_function<float, float>{
|
||||||
|
float exponent;
|
||||||
|
CompareNEOp(float v):exponent(v) {}
|
||||||
|
void setExponent(float v){
|
||||||
|
exponent = v;
|
||||||
|
}
|
||||||
|
|
||||||
|
__host__ __device__
|
||||||
|
float operator()(const float& x) {
|
||||||
|
return (exponent!=x?1.0:.0);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct CompareLOp: public thrust::unary_function<float, float>{
|
||||||
|
float exponent;
|
||||||
|
CompareLOp(float v):exponent(v) {}
|
||||||
|
void setExponent(float v){
|
||||||
|
exponent = v;
|
||||||
|
}
|
||||||
|
|
||||||
|
__host__ __device__
|
||||||
|
float operator()(const float& x) {
|
||||||
|
return (exponent>x?1.0:.0);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct CompareLEOp: public thrust::unary_function<float, float>{
|
||||||
|
float exponent;
|
||||||
|
CompareLEOp(float v):exponent(v) {}
|
||||||
|
void setExponent(float v){
|
||||||
|
exponent = v;
|
||||||
|
}
|
||||||
|
|
||||||
|
__host__ __device__
|
||||||
|
float operator()(const float& x) {
|
||||||
|
return (exponent>=x?1.0:.0);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
struct CompareAGOp{
|
||||||
|
__host__ __device__
|
||||||
|
float operator()(const float& x,const float& y) {
|
||||||
|
return x>y?1:0;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct CompareAGEOp{
|
||||||
|
__host__ __device__
|
||||||
|
float operator()(const float& x,const float& y) {
|
||||||
|
return x>=y?1:0;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct CompareAEOp{
|
||||||
|
__host__ __device__
|
||||||
|
float operator()(const float& x,const float& y) {
|
||||||
|
return x==y?1:0;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct CompareANEOp{
|
||||||
|
__host__ __device__
|
||||||
|
float operator()(const float& x,const float& y) {
|
||||||
|
return x!=y?1:0;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
typedef thrust::complex<float> complexf;
|
typedef thrust::complex<float> complexf;
|
||||||
|
|
||||||
@@ -533,51 +651,29 @@ void unaryPow(float* in1, float N,float* out, unsigned long length){
|
|||||||
thrust::transform(thrust::device,in1,in1+length,out,op);
|
thrust::transform(thrust::device,in1,in1+length,out,op);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
auto lambdaPow = [N] __host__ __device__(float x) {
|
thrust::transform(thrust::device,in1,in1+length,out,PowOp(N));
|
||||||
return powf(x,N);
|
|
||||||
};
|
|
||||||
thrust::transform(thrust::device,in1,in1+length,out,lambdaPow);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void unaryCompare(float* in1, const float& in2, float* out, unsigned long length, int type){
|
void unaryCompare(float* in1, const float& in2, float* out, unsigned long length, int type){
|
||||||
|
|
||||||
switch (type)
|
switch (type)
|
||||||
{
|
{
|
||||||
case G:
|
case G:
|
||||||
thrust::transform(thrust::device,in1,in1+length,out,[in2] __host__ __device__ (const float &x)
|
thrust::transform(thrust::device,in1,in1+length,out,CompareGOp(in2));
|
||||||
{
|
|
||||||
return in2 < x ? 1.0 : .0;
|
|
||||||
});
|
|
||||||
break;
|
break;
|
||||||
case GE:
|
case GE:
|
||||||
thrust::transform(thrust::device,in1,in1+length,out,[in2] __host__ __device__ (const float &x)
|
thrust::transform(thrust::device,in1,in1+length,out,CompareGEOp(in2));
|
||||||
{
|
|
||||||
return in2 <= x ? 1.0 : .0;
|
|
||||||
});
|
|
||||||
break;
|
break;
|
||||||
case E:
|
case E:
|
||||||
thrust::transform(thrust::device,in1,in1+length,out,[in2] __host__ __device__ (const float &x)
|
thrust::transform(thrust::device,in1,in1+length,out,CompareEOp(in2));
|
||||||
{
|
|
||||||
return in2 == x ? 1.0 : .0;
|
|
||||||
});
|
|
||||||
break;
|
break;
|
||||||
case NE:
|
case NE:
|
||||||
thrust::transform(thrust::device,in1,in1+length,out,[in2] __host__ __device__ (const float &x)
|
thrust::transform(thrust::device,in1,in1+length,out,CompareNEOp(in2));
|
||||||
{
|
|
||||||
return in2 != x ? 1.0 : .0;
|
|
||||||
});
|
|
||||||
break;
|
break;
|
||||||
case LE:
|
case LE:
|
||||||
thrust::transform(thrust::device,in1,in1+length,out,[in2] __host__ __device__ (const float &x)
|
thrust::transform(thrust::device,in1,in1+length,out,CompareLEOp(in2));
|
||||||
{
|
|
||||||
return in2 >= x ? 1.0 : .0;
|
|
||||||
});
|
|
||||||
break;
|
break;
|
||||||
case L:
|
case L:
|
||||||
thrust::transform(thrust::device,in1,in1+length,out,[in2]__host__ __device__(const float &x)
|
thrust::transform(thrust::device,in1,in1+length,out,CompareLOp(in2));
|
||||||
{
|
|
||||||
return in2 > x ? 1.0 : .0;
|
|
||||||
});
|
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
@@ -587,89 +683,51 @@ void unaryCompare(const float& in1, float* in2, float* out, unsigned long length
|
|||||||
switch (type)
|
switch (type)
|
||||||
{
|
{
|
||||||
case G:
|
case G:
|
||||||
thrust::transform(thrust::device,in2,in2+length,out,[in1] __host__ __device__ (const float &x)
|
thrust::transform(thrust::device,in2,in2+length,out,CompareLOp(in1));
|
||||||
{
|
|
||||||
return in1 > x ? 1.0 : .0;
|
|
||||||
});
|
|
||||||
break;
|
break;
|
||||||
case GE:
|
case GE:
|
||||||
thrust::transform(thrust::device,in2,in2+length,out,[in1] __host__ __device__ (const float &x)
|
thrust::transform(thrust::device,in2,in2+length,out,CompareLEOp(in1));
|
||||||
{
|
|
||||||
return in1 >= x ? 1.0 : .0;
|
|
||||||
});
|
|
||||||
break;
|
break;
|
||||||
case E:
|
case E:
|
||||||
thrust::transform(thrust::device,in2,in2+length,out,[in1] __host__ __device__ (const float &x)
|
thrust::transform(thrust::device,in2,in2+length,out,CompareEOp(in1));
|
||||||
{
|
|
||||||
return in1 == x ? 1.0 : .0;
|
|
||||||
});
|
|
||||||
break;
|
break;
|
||||||
case NE:
|
case NE:
|
||||||
thrust::transform(thrust::device,in2,in2+length,out,[in1] __host__ __device__ (const float &x)
|
thrust::transform(thrust::device,in2,in2+length,out,CompareNEOp(in1));
|
||||||
{
|
|
||||||
return in1 != x ? 1.0 : .0;
|
|
||||||
});
|
|
||||||
break;
|
break;
|
||||||
case LE:
|
case LE:
|
||||||
thrust::transform(thrust::device,in2,in2+length,out, [in1] __host__ __device__ (const float &x)
|
thrust::transform(thrust::device,in2,in2+length,out,CompareGEOp(in1));
|
||||||
{
|
|
||||||
return in1 <= x ? 1.0 : .0;
|
|
||||||
});
|
|
||||||
break;
|
break;
|
||||||
case L:
|
case L:
|
||||||
thrust::transform(thrust::device,in2,in2+length,out,[in1] __host__ __device__ (const float &x)
|
thrust::transform(thrust::device,in2,in2+length,out,CompareGOp(in1));
|
||||||
{
|
|
||||||
return in1 < x ? 1.0 : .0;
|
|
||||||
});
|
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
void unaryCompare(float* in1, float* in2, float* out, unsigned long length, int type){
|
void unaryCompare(float* in1, float* in2, float* out, unsigned long length, int type){
|
||||||
switch (type)
|
switch (type)
|
||||||
{
|
{
|
||||||
case G:
|
case G:
|
||||||
thrust::transform(thrust::device,in1,in1+length,in2,out, []__host__ __device__(float x, float y)
|
thrust::transform(thrust::device,in1,in1+length,in2,out,CompareAGOp());
|
||||||
{
|
|
||||||
return x > y ? 1. : .0;
|
|
||||||
});
|
|
||||||
break;
|
break;
|
||||||
case GE:
|
case GE:
|
||||||
thrust::transform(thrust::device,in1,in1+length,in2,out,[]__host__ __device__(float x, float y)
|
thrust::transform(thrust::device,in1,in1+length,in2,out,CompareAGEOp());
|
||||||
{
|
|
||||||
return x >= y ? 1. : .0;
|
|
||||||
});
|
|
||||||
break;
|
break;
|
||||||
case E:
|
case E:
|
||||||
thrust::transform(thrust::device,in1,in1+length,in2,out,[]__host__ __device__(float x, float y)
|
thrust::transform(thrust::device,in1,in1+length,in2,out,CompareAEOp());
|
||||||
{
|
|
||||||
return x == y ? 1. : .0;
|
|
||||||
});
|
|
||||||
break;
|
break;
|
||||||
case NE:
|
case NE:
|
||||||
thrust::transform(thrust::device,in1,in1+length,in2,out, []__host__ __device__(float x, float y)
|
thrust::transform(thrust::device,in1,in1+length,in2,out,CompareANEOp());
|
||||||
{
|
|
||||||
return x != y ? 1. : .0;
|
|
||||||
});
|
|
||||||
break;
|
break;
|
||||||
case LE:
|
case LE:
|
||||||
thrust::transform(thrust::device,in1,in1+length,in2,out,[]__host__ __device__ (float x, float y)
|
thrust::transform(thrust::device,in2,in2+length,in1,out,CompareAGEOp());
|
||||||
{
|
|
||||||
return x <= y ? 1. : .0;
|
|
||||||
});
|
|
||||||
break;
|
break;
|
||||||
case L:
|
case L:
|
||||||
thrust::transform(thrust::device,in1,in1+length,in2,out, [] __host__ __device__ (float x, float y)
|
thrust::transform(thrust::device,in2,in2+length,in1,out,CompareAGOp());
|
||||||
{
|
|
||||||
return x < y ? 1. : .0;
|
|
||||||
});
|
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void thrustFill(float* aBegin, float* aEnd, float aValue)
|
void thrustFill(float* aBegin, float* aEnd, float aValue)
|
||||||
|
|||||||
@@ -35,7 +35,55 @@ namespace {
|
|||||||
|
|
||||||
uint CONVERT_ADD_VALUE = UINT32_MAX - 4095;
|
uint CONVERT_ADD_VALUE = UINT32_MAX - 4095;
|
||||||
|
|
||||||
inline void convertValue(short* aValue ,float* des){
|
inline void convertValue(float aValue ,float* des){
|
||||||
|
float value = aValue;
|
||||||
|
ushort *exponentPtr = (ushort *)&value;
|
||||||
|
exponentPtr[0] = (exponentPtr[0] >> 11) & CONVERT_AND_VALUE;
|
||||||
|
exponentPtr[1] = (exponentPtr[1] >> 11) & CONVERT_AND_VALUE;
|
||||||
|
exponentPtr[2] = (exponentPtr[2] >> 11) & CONVERT_AND_VALUE;
|
||||||
|
exponentPtr[3] = (exponentPtr[3] >> 11) & CONVERT_AND_VALUE;
|
||||||
|
float signValue = aValue;
|
||||||
|
short *signPtr = (short *)&signValue;
|
||||||
|
uint sign_bit[4] = {
|
||||||
|
(uint)(signPtr[0] < 0 ? 1 : 0), (uint)(signPtr[1] < 0 ? 1 : 0),
|
||||||
|
(uint)(signPtr[2] < 0 ? 1 : 0), (uint)(signPtr[3] < 0 ? 1 : 0)};
|
||||||
|
float fraction3Value = aValue;
|
||||||
|
ushort *fraction3Ptr = (ushort *)&fraction3Value;
|
||||||
|
fraction3Ptr[0] &= CONVERT_AND_VALUE_2;
|
||||||
|
fraction3Ptr[1] &= CONVERT_AND_VALUE_2;
|
||||||
|
fraction3Ptr[2] &= CONVERT_AND_VALUE_2;
|
||||||
|
fraction3Ptr[3] &= CONVERT_AND_VALUE_2;
|
||||||
|
uint hidden_bit[4] = {
|
||||||
|
sign_bit[0] * (!exponentPtr[0] ? 1 : 0) * CONVERT_MUL_VALUE +
|
||||||
|
((!sign_bit[0] && exponentPtr[0]) ? 1 : 0) * CONVERT_MUL_VALUE,
|
||||||
|
sign_bit[1] * (!exponentPtr[1] ? 1 : 0) * 2048 +
|
||||||
|
((!sign_bit[1] && exponentPtr[1]) ? 1 : 0) * CONVERT_MUL_VALUE,
|
||||||
|
sign_bit[2] * (!exponentPtr[2] ? 1 : 0) * CONVERT_MUL_VALUE +
|
||||||
|
((!sign_bit[2] && exponentPtr[2]) ? 1 : 0) * CONVERT_MUL_VALUE,
|
||||||
|
sign_bit[3] * (!exponentPtr[3] ? 1 : 0) * 2048 +
|
||||||
|
((!sign_bit[3] && exponentPtr[3]) ? 1 : 0) * CONVERT_MUL_VALUE,
|
||||||
|
};
|
||||||
|
int outputPtr[4] = {0};
|
||||||
|
uint temp = fraction3Ptr[0] + hidden_bit[0] + sign_bit[0] * CONVERT_ADD_VALUE;
|
||||||
|
outputPtr[0] = exponentPtr[0] > 1 ? (temp << (exponentPtr[0] - 1))
|
||||||
|
: (temp >> std::abs(exponentPtr[0] - 1));
|
||||||
|
temp = fraction3Ptr[1] + hidden_bit[1] + sign_bit[1] * CONVERT_ADD_VALUE;
|
||||||
|
outputPtr[1] = exponentPtr[1] > 1 ? (temp << (exponentPtr[1] - 1))
|
||||||
|
: (temp >> std::abs(exponentPtr[1] - 1));
|
||||||
|
temp = fraction3Ptr[2] + hidden_bit[2] + sign_bit[2] * CONVERT_ADD_VALUE;
|
||||||
|
outputPtr[2] = exponentPtr[2] > 1 ? (temp << (exponentPtr[2] - 1))
|
||||||
|
: (temp >> std::abs(exponentPtr[2] - 1));
|
||||||
|
temp = fraction3Ptr[3] + hidden_bit[3] + sign_bit[3] * CONVERT_ADD_VALUE;
|
||||||
|
outputPtr[3] = exponentPtr[3] > 1 ? (temp << (exponentPtr[3] - 1))
|
||||||
|
: (temp >> std::abs(exponentPtr[3] - 1));
|
||||||
|
des[0] = outputPtr[0];
|
||||||
|
des[1] = outputPtr[1];
|
||||||
|
des[2] = outputPtr[2];
|
||||||
|
des[3] = outputPtr[3];
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void convertValue2(short* aValue ,float* des){
|
||||||
ushort exponentPtr[4] = {(ushort)aValue[0],(ushort)aValue[1],(ushort)aValue[2],(ushort)aValue[3]};
|
ushort exponentPtr[4] = {(ushort)aValue[0],(ushort)aValue[1],(ushort)aValue[2],(ushort)aValue[3]};
|
||||||
exponentPtr[0] = (exponentPtr[0] >> 11) & CONVERT_AND_VALUE;
|
exponentPtr[0] = (exponentPtr[0] >> 11) & CONVERT_AND_VALUE;
|
||||||
exponentPtr[1] = (exponentPtr[1] >> 11) & CONVERT_AND_VALUE;
|
exponentPtr[1] = (exponentPtr[1] >> 11) & CONVERT_AND_VALUE;
|
||||||
@@ -568,9 +616,7 @@ Matrix Aurora::acosd(const Matrix& aMatrix)
|
|||||||
{
|
{
|
||||||
resultData[i] = resultData[i] * 180 / PI;
|
resultData[i] = resultData[i] * 180 / PI;
|
||||||
}
|
}
|
||||||
Matrix result = Matrix::New(resultData, aMatrix);
|
return Matrix::New(resultData, aMatrix);
|
||||||
nantoval(result, 0);
|
|
||||||
return result;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Matrix Aurora::conj(const Matrix& aMatrix)
|
Matrix Aurora::conj(const Matrix& aMatrix)
|
||||||
@@ -1050,14 +1096,14 @@ Matrix Aurora::convertfp16tofloat(short* aData, int aRows, int aColumns)
|
|||||||
#pragma omp parallel for
|
#pragma omp parallel for
|
||||||
for (size_t i = 0; i < quaterSize; i+=8) {
|
for (size_t i = 0; i < quaterSize; i+=8) {
|
||||||
//循环展开以避免过度的线程调用
|
//循环展开以避免过度的线程调用
|
||||||
if (i < quaterSize)::convertValue((short*)(input+i*4), output + (i) * 4);
|
if (i < quaterSize)::convertValue2((short*)(input+i*4), output + (i) * 4);
|
||||||
if (i+1 < quaterSize)::convertValue((short*)(input+(i+1)*4), output + (i+1) * 4);
|
if (i+1 < quaterSize)::convertValue2((short*)(input+(i+1)*4), output + (i+1) * 4);
|
||||||
if (i+2 < quaterSize)::convertValue((short*)(input+(i+2)*4), output + (i+2) * 4);
|
if (i+2 < quaterSize)::convertValue2((short*)(input+(i+2)*4), output + (i+2) * 4);
|
||||||
if (i+3 < quaterSize)::convertValue((short*)(input+(i+3)*4), output + (i+3) * 4);
|
if (i+3 < quaterSize)::convertValue2((short*)(input+(i+3)*4), output + (i+3) * 4);
|
||||||
if (i+4 < quaterSize)::convertValue((short*)(input+(i+4)*4), output + (i+4) * 4);
|
if (i+4 < quaterSize)::convertValue2((short*)(input+(i+4)*4), output + (i+4) * 4);
|
||||||
if (i+5 < quaterSize)::convertValue((short*)(input+(i+5)*4), output + (i+5) * 4);
|
if (i+5 < quaterSize)::convertValue2((short*)(input+(i+5)*4), output + (i+5) * 4);
|
||||||
if (i+6 < quaterSize)::convertValue((short*)(input+(i+6)*4), output + (i+6) * 4);
|
if (i+6 < quaterSize)::convertValue2((short*)(input+(i+6)*4), output + (i+6) * 4);
|
||||||
if (i+7 < quaterSize)::convertValue((short*)(input+(i+7)*4), output + (i+7) * 4);
|
if (i+7 < quaterSize)::convertValue2((short*)(input+(i+7)*4), output + (i+7) * 4);
|
||||||
}
|
}
|
||||||
return Matrix::New(output,aRows,aColumns,1);
|
return Matrix::New(output,aRows,aColumns,1);
|
||||||
}
|
}
|
||||||
|
|||||||
1090
src/Function1D.cu
1090
src/Function1D.cu
File diff suppressed because it is too large
Load Diff
@@ -63,8 +63,6 @@ namespace Aurora
|
|||||||
|
|
||||||
CudaMatrix conj(const CudaMatrix& aMatrix);
|
CudaMatrix conj(const CudaMatrix& aMatrix);
|
||||||
|
|
||||||
CudaMatrix conj(CudaMatrix&& aMatrix);
|
|
||||||
|
|
||||||
float norm(const CudaMatrix& aMatrix, NormMethod aNormMethod);
|
float norm(const CudaMatrix& aMatrix, NormMethod aNormMethod);
|
||||||
|
|
||||||
CudaMatrix transpose(const CudaMatrix& aMatrix);
|
CudaMatrix transpose(const CudaMatrix& aMatrix);
|
||||||
@@ -126,4 +124,5 @@ namespace Aurora
|
|||||||
void compareSet(CudaMatrix& aDesAndCompareMatrix,CudaMatrix& aOtherCompareMatrix, float newValue,CompareOp op);
|
void compareSet(CudaMatrix& aDesAndCompareMatrix,CudaMatrix& aOtherCompareMatrix, float newValue,CompareOp op);
|
||||||
void compareSet(CudaMatrix& aCompareMatrix,float compareValue, CudaMatrix& aNewValueMatrix,CompareOp op);
|
void compareSet(CudaMatrix& aCompareMatrix,float compareValue, CudaMatrix& aNewValueMatrix,CompareOp op);
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif //AURORA_CUDA_FUNCTION1D_H
|
#endif //AURORA_CUDA_FUNCTION1D_H
|
||||||
@@ -1034,87 +1034,3 @@ Matrix Aurora::sub2ind(const Matrix &aVMatrixSize, std::initializer_list<Matrix>
|
|||||||
delete [] strides;
|
delete [] strides;
|
||||||
return Matrix::New(output,returnVectorSize,1,1);
|
return Matrix::New(output,returnVectorSize,1,1);
|
||||||
}
|
}
|
||||||
|
|
||||||
void Aurora::findPeaksHost(const Matrix & aData, int aNPeaks, float aMinPeakHeight, float MinPeakProminece,
|
|
||||||
int* outIndex)
|
|
||||||
{
|
|
||||||
int signalSize = aData.getDimSize(0);
|
|
||||||
int signalCount = aData.getDimSize(1);
|
|
||||||
|
|
||||||
#pragma omp parallel for
|
|
||||||
for (size_t threadIndex = 0; threadIndex < signalCount; threadIndex++)
|
|
||||||
{
|
|
||||||
float* dataPointer = aData.getData() + threadIndex*signalSize ;
|
|
||||||
float newPeak = dataPointer[0];
|
|
||||||
float newValley = dataPointer[0];
|
|
||||||
int peakIndex = 0;
|
|
||||||
float higherValley;
|
|
||||||
int indexs[32];
|
|
||||||
float values[32];
|
|
||||||
for (size_t i = 0; i < aNPeaks; i++)
|
|
||||||
{
|
|
||||||
indexs[i] = signalSize;
|
|
||||||
values[i] = 0;
|
|
||||||
}
|
|
||||||
int save_index=0;
|
|
||||||
|
|
||||||
for (int i = 1; i < signalSize - 1; ++i)
|
|
||||||
{
|
|
||||||
// find peaks
|
|
||||||
if (dataPointer[i] > dataPointer[i - 1] && dataPointer[i] > dataPointer[i + 1])
|
|
||||||
{
|
|
||||||
newPeak = dataPointer[i];
|
|
||||||
peakIndex = i;
|
|
||||||
};
|
|
||||||
// find valley
|
|
||||||
if (dataPointer[i] < dataPointer[i - 1] && dataPointer[i] < dataPointer[i + 1])
|
|
||||||
{
|
|
||||||
higherValley = std::max(newValley, dataPointer[i]);
|
|
||||||
newValley = dataPointer[i];
|
|
||||||
if (newPeak >= aMinPeakHeight)
|
|
||||||
{
|
|
||||||
float prominece = newPeak - higherValley;
|
|
||||||
if (prominece >= MinPeakProminece)
|
|
||||||
{
|
|
||||||
if (save_index < aNPeaks)
|
|
||||||
{
|
|
||||||
values[save_index] = newPeak;
|
|
||||||
indexs[save_index] = peakIndex;
|
|
||||||
save_index++;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
for (size_t j = 0; j < aNPeaks; j++)
|
|
||||||
{
|
|
||||||
if (values[j] < newPeak)
|
|
||||||
{
|
|
||||||
std::swap(values[j], newPeak);
|
|
||||||
std::swap(indexs[j], peakIndex);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (save_index>=aNPeaks)
|
|
||||||
{
|
|
||||||
for (size_t i = 0; i < aNPeaks; i++)
|
|
||||||
{
|
|
||||||
for (size_t j = i+1; j < aNPeaks; j++)
|
|
||||||
{
|
|
||||||
if (indexs[i]>indexs[j])
|
|
||||||
{
|
|
||||||
std::swap(values[j], values[i]);
|
|
||||||
std::swap(indexs[j], indexs[i]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
for (size_t i = 0; i < aNPeaks; i++)
|
|
||||||
{
|
|
||||||
// aOutPeaks[threadIndex*aNPeaks+i] = values[i];
|
|
||||||
outIndex[threadIndex*aNPeaks+i] = indexs[i];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|||||||
1304
src/Function2D.cu
1304
src/Function2D.cu
File diff suppressed because it is too large
Load Diff
@@ -64,7 +64,6 @@ namespace Aurora
|
|||||||
|
|
||||||
CudaMatrix fft(const CudaMatrix &aMatrix, long aFFTSize = -1);
|
CudaMatrix fft(const CudaMatrix &aMatrix, long aFFTSize = -1);
|
||||||
CudaMatrix ifft(const CudaMatrix &aMatrix, long aFFTSize = -1);
|
CudaMatrix ifft(const CudaMatrix &aMatrix, long aFFTSize = -1);
|
||||||
CudaMatrix ifft(CudaMatrix && aMatrix);
|
|
||||||
|
|
||||||
CudaMatrix hilbert(const CudaMatrix &aMatrix);
|
CudaMatrix hilbert(const CudaMatrix &aMatrix);
|
||||||
|
|
||||||
@@ -87,19 +86,6 @@ namespace Aurora
|
|||||||
*/
|
*/
|
||||||
CudaMatrix ifft_symmetric(const CudaMatrix &aMatrix,long aLength);
|
CudaMatrix ifft_symmetric(const CudaMatrix &aMatrix,long aLength);
|
||||||
|
|
||||||
CudaMatrix valid(const CudaMatrix& aData, const CudaMatrix aValid);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* findPeaks 按列进行峰查找和匹配
|
|
||||||
* @attention 不要给aOutIndexs提前分配内存
|
|
||||||
* @param aData 输入
|
|
||||||
* @param aNpeaks 峰数量
|
|
||||||
* @param aMinPeakHeight 最小高度
|
|
||||||
* @param aMinPeakProminence 最小相对高度
|
|
||||||
* @param aOutIndexs 空指针,会在函数内分配device内存
|
|
||||||
* @return 筛选出的峰高度
|
|
||||||
*/
|
|
||||||
CudaMatrix findPeaks(const CudaMatrix& aData, int aNpeaks, float aMinPeakHeight, float aMinPeakProminence, int** aOutIndexs);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif // __FUNCTION2D_CUDA_H__
|
#endif // __FUNCTION2D_CUDA_H__
|
||||||
@@ -107,10 +107,10 @@ namespace Aurora
|
|||||||
* 当第一列包含重复的元素时,sortrows 会根据下一列中的值进行排序,并对后续的相等值重复此行为。
|
* 当第一列包含重复的元素时,sortrows 会根据下一列中的值进行排序,并对后续的相等值重复此行为。
|
||||||
* @attention 目前不支持三维,不支持复数
|
* @attention 目前不支持三维,不支持复数
|
||||||
* @param aMatrix 目标矩阵
|
* @param aMatrix 目标矩阵
|
||||||
* @param indexMatrix 排序后各行的原索引矩阵指针,必须要有
|
* @param indexMatrix 排序后各行的原索引矩阵指针,非必须
|
||||||
* @return 排序后矩阵
|
* @return 排序后矩阵
|
||||||
*/
|
*/
|
||||||
Matrix sortrows(const Matrix &aMatrix, Matrix* indexMatrix);
|
Matrix sortrows(const Matrix &aMatrix, Matrix* indexMatrix=nullptr);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 对矩阵求中间值 按列, 目前不支持三维,不支持复数
|
* 对矩阵求中间值 按列, 目前不支持三维,不支持复数
|
||||||
@@ -178,7 +178,7 @@ namespace Aurora
|
|||||||
* @return
|
* @return
|
||||||
*/
|
*/
|
||||||
Matrix sub2ind(const Matrix &aVMatrixSize, std::initializer_list<Matrix> aSliceIdxs);
|
Matrix sub2ind(const Matrix &aVMatrixSize, std::initializer_list<Matrix> aSliceIdxs);
|
||||||
void findPeaksHost(const Matrix & aData, int aNPeaks, float aMinPeakHeight, float MinPeakProminece,int* outIndex);
|
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif // AURORA_FUNCTION2D_H
|
#endif // AURORA_FUNCTION2D_H
|
||||||
|
|||||||
@@ -5,10 +5,12 @@
|
|||||||
#include "Function.h"
|
#include "Function.h"
|
||||||
|
|
||||||
#ifdef USE_CUDA
|
#ifdef USE_CUDA
|
||||||
#include "CudaMatrix.h"
|
|
||||||
#include "CudaMatrixPrivate.cuh"
|
|
||||||
#include <cuda_runtime.h>
|
#include <cuda_runtime.h>
|
||||||
#endif // USE_CUDA
|
#include "CudaMatrixPrivate.cuh"
|
||||||
|
#include "CudaMatrix.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
//必须在Eigen之前
|
//必须在Eigen之前
|
||||||
#include "AuroraDefs.h"
|
#include "AuroraDefs.h"
|
||||||
@@ -72,6 +74,7 @@ Matrix Aurora::ones(int aRow, int aColumn, int aSlice) {
|
|||||||
return Matrix::New(data,rowSize,colSize,aSlice);
|
return Matrix::New(data,rowSize,colSize,aSlice);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
Matrix Aurora::ones(int aSquareRow) {
|
Matrix Aurora::ones(int aSquareRow) {
|
||||||
return Aurora::ones(aSquareRow, aSquareRow);
|
return Aurora::ones(aSquareRow, aSquareRow);
|
||||||
}
|
}
|
||||||
@@ -92,6 +95,7 @@ Matrix Aurora::zeros(int aRow, int aColumn, int aSlice) {
|
|||||||
return Matrix::New(data,rowSize,colSize,sliceSize);
|
return Matrix::New(data,rowSize,colSize,sliceSize);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
Matrix Aurora::zeros(int aSquareRow) {
|
Matrix Aurora::zeros(int aSquareRow) {
|
||||||
return Aurora::zeros(aSquareRow, aSquareRow);
|
return Aurora::zeros(aSquareRow, aSquareRow);
|
||||||
}
|
}
|
||||||
@@ -214,7 +218,7 @@ Matrix Aurora::meshgridInterp3(const Matrix& aX, const Matrix& aY, const Matrix&
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
#if USE_CUDA
|
#ifdef USE_CUDA
|
||||||
CudaMatrix Aurora::onesCuda(int aRow, int aColumn, int aSlice){
|
CudaMatrix Aurora::onesCuda(int aRow, int aColumn, int aSlice){
|
||||||
if (aRow == 0 || aColumn == 0)
|
if (aRow == 0 || aColumn == 0)
|
||||||
{
|
{
|
||||||
@@ -251,7 +255,6 @@ CudaMatrix Aurora::zerosCuda(int aRow, int aColumn, int aSlice) {
|
|||||||
return CudaMatrix::fromRawData(data,rowSize,colSize,sliceSize);
|
return CudaMatrix::fromRawData(data,rowSize,colSize,sliceSize);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
CudaMatrix Aurora::zerosCuda(int aSquareRow) {
|
CudaMatrix Aurora::zerosCuda(int aSquareRow) {
|
||||||
return Aurora::zerosCuda(aSquareRow, aSquareRow);
|
return Aurora::zerosCuda(aSquareRow, aSquareRow);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,82 +0,0 @@
|
|||||||
#include "Function3D.cuh"
|
|
||||||
|
|
||||||
using namespace Aurora;
|
|
||||||
|
|
||||||
__global__ void interp3Kernel(cudaTextureObject_t aTexObj, float* aOutputData, float aStartX, float aDx, float aEndX, float aStartY, float aDy
|
|
||||||
, float aEndY, float aStartZ, float aDz, float aEndZ, float* aNewX, float* aNewY, float* aNewZ
|
|
||||||
, int aOutputRowSize, int aOutputColumnSize, int aOutputSliceSize, float aOutValue)
|
|
||||||
{
|
|
||||||
int xIndex = blockIdx.x * blockDim.x + threadIdx.x;
|
|
||||||
int yIndex = blockIdx.y * blockDim.y + threadIdx.y;
|
|
||||||
int zIndex = blockIdx.z * blockDim.z + threadIdx.z;
|
|
||||||
|
|
||||||
if(xIndex > aOutputRowSize - 1 || yIndex > aOutputColumnSize - 1 || zIndex > aOutputSliceSize - 1)
|
|
||||||
{
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
size_t index = zIndex * aOutputRowSize * aOutputColumnSize + yIndex * aOutputRowSize + xIndex;
|
|
||||||
float x = aNewX[index];
|
|
||||||
float y = aNewY[index];
|
|
||||||
float z = aNewZ[index];
|
|
||||||
if(x > aEndX || x < aStartX || y > aEndY || y < aStartY || z > aEndZ || z < aStartZ)
|
|
||||||
{
|
|
||||||
aOutputData[index] = aOutValue;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
aOutputData[index] = tex3D<float>(aTexObj, (x - aStartX) / aDx + 0.5, (y - aStartY) / aDy + 0.5, (z - aStartZ) / aDz + 0.5);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
CudaMatrix Aurora::interp3(float aStartX, float aDx, float aEndX, float aStartY, float aDy, float aEndY,
|
|
||||||
float aStartZ, float aDz, float aEndZ, const CudaMatrix& aValue,
|
|
||||||
const CudaMatrix& aNewX, const CudaMatrix& aNewY, const CudaMatrix& aNewZ, float aOutValue)
|
|
||||||
{
|
|
||||||
cudaTextureObject_t texObj;
|
|
||||||
size_t dimX = aValue.getDimSize(1);
|
|
||||||
size_t dimY = aValue.getDimSize(0);
|
|
||||||
size_t dimZ = aValue.getDimSize(2);
|
|
||||||
cudaExtent extent = make_cudaExtent(dimX, dimY, dimZ);
|
|
||||||
|
|
||||||
cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<float>();
|
|
||||||
|
|
||||||
cudaArray* cuArray;
|
|
||||||
cudaMalloc3DArray(&cuArray, &channelDesc, extent);
|
|
||||||
|
|
||||||
|
|
||||||
cudaMemcpy3DParms copyParams = {0};
|
|
||||||
copyParams.srcPtr = make_cudaPitchedPtr(aValue.getData(), dimX * sizeof(float), dimX, dimY);
|
|
||||||
copyParams.dstArray = cuArray;
|
|
||||||
copyParams.extent = extent;
|
|
||||||
copyParams.kind = cudaMemcpyDeviceToDevice;
|
|
||||||
cudaMemcpy3D(©Params);
|
|
||||||
|
|
||||||
cudaResourceDesc resDesc;
|
|
||||||
memset(&resDesc, 0, sizeof(resDesc));
|
|
||||||
resDesc.resType = cudaResourceTypeArray;
|
|
||||||
resDesc.res.array.array = cuArray;
|
|
||||||
|
|
||||||
cudaTextureDesc texDesc;
|
|
||||||
memset(&texDesc, 0, sizeof(texDesc));
|
|
||||||
texDesc.filterMode = cudaFilterModeLinear;
|
|
||||||
texDesc.addressMode[0] = cudaAddressModeClamp;
|
|
||||||
texDesc.addressMode[1] = cudaAddressModeClamp;
|
|
||||||
texDesc.addressMode[2] = cudaAddressModeClamp;
|
|
||||||
texDesc.readMode = cudaReadModeElementType;
|
|
||||||
cudaCreateTextureObject(&texObj, &resDesc, &texDesc, NULL);
|
|
||||||
|
|
||||||
int row = aNewX.getDimSize(0);
|
|
||||||
int column = aNewX.getDimSize(1);
|
|
||||||
int slice = aNewX.getDimSize(2);
|
|
||||||
dim3 blockDim(4,4,4);
|
|
||||||
dim3 gridDim(row / 4 + 1, column / 4 + 1, slice / 4 + 1);
|
|
||||||
float *data = nullptr;
|
|
||||||
cudaMalloc((void **)&data, sizeof(float) * row * column * slice);
|
|
||||||
CudaMatrix result = Aurora::CudaMatrix::fromRawData(data, row, column, slice);
|
|
||||||
interp3Kernel<<<gridDim, blockDim>>>(texObj, data, aStartX, aDx, aEndX, aStartY, aDy, aEndY, aStartZ, aDz, aEndZ,
|
|
||||||
aNewX.getData(), aNewY.getData(), aNewZ.getData(), row, column, slice, aOutValue);
|
|
||||||
|
|
||||||
|
|
||||||
cudaDeviceSynchronize();
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
@@ -1,13 +0,0 @@
|
|||||||
#ifndef __FUNCTION3D_CUDA__
|
|
||||||
#define __FUNCTION3D_CUDA__
|
|
||||||
#include "CudaMatrix.h"
|
|
||||||
#include "AuroraDefs.h"
|
|
||||||
|
|
||||||
namespace Aurora
|
|
||||||
{
|
|
||||||
CudaMatrix interp3(float aStartX, float aDx, float aEndX, float aStartY, float aDy, float aEndY,
|
|
||||||
float aStartZ, float aDz, float aEndZ, const CudaMatrix& aValue,
|
|
||||||
const CudaMatrix& aNewX, const CudaMatrix& aNewY, const CudaMatrix& aNewZ, float aOutValue);
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif // __FUNCTION3D_CUDA_H__
|
|
||||||
@@ -4,10 +4,8 @@
|
|||||||
|
|
||||||
#include "Matrix.h"
|
#include "Matrix.h"
|
||||||
#include "Function1D.h"
|
#include "Function1D.h"
|
||||||
|
|
||||||
#if USE_CUDA
|
|
||||||
#include "CudaMatrix.h"
|
#include "CudaMatrix.h"
|
||||||
#endif
|
|
||||||
namespace Aurora {
|
namespace Aurora {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -19,6 +17,8 @@ namespace Aurora {
|
|||||||
*/
|
*/
|
||||||
Matrix ones(int aRow, int aColumn, int aSlice = 0);
|
Matrix ones(int aRow, int aColumn, int aSlice = 0);
|
||||||
|
|
||||||
|
CudaMatrix onesCuda(int aRow, int aColumn, int aSlice = 0);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 创建全部为1的方阵
|
* 创建全部为1的方阵
|
||||||
* @param aSquareRow
|
* @param aSquareRow
|
||||||
@@ -26,6 +26,8 @@ namespace Aurora {
|
|||||||
*/
|
*/
|
||||||
Matrix ones(int aSquareRow);
|
Matrix ones(int aSquareRow);
|
||||||
|
|
||||||
|
CudaMatrix onesCuda(int aSquareRow);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 创建全部为0的数组,矩阵
|
* 创建全部为0的数组,矩阵
|
||||||
* @param aRow 行数,必须大于0
|
* @param aRow 行数,必须大于0
|
||||||
@@ -35,30 +37,25 @@ namespace Aurora {
|
|||||||
*/
|
*/
|
||||||
Matrix zeros(int aRow, int aColumn, int aSlice = 0);
|
Matrix zeros(int aRow, int aColumn, int aSlice = 0);
|
||||||
|
|
||||||
|
CudaMatrix zerosCuda(int aRow, int aColumn, int aSlice = 0);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 创建全部为0的方阵
|
* 创建全部为0的方阵
|
||||||
* @param aSquareRow
|
* @param aSquareRow
|
||||||
* @return 全部为0的方阵
|
* @return 全部为0的方阵
|
||||||
*/
|
*/
|
||||||
Matrix zeros(int aSquareRow);
|
Matrix zeros(int aSquareRow);
|
||||||
|
CudaMatrix zerosCuda(int aSquareRow);
|
||||||
Matrix interp3(const Matrix& aX, const Matrix& aY, const Matrix& aZ, const Matrix& aV, const Matrix& aX1, const Matrix& aY1, const Matrix& aZ1,InterpnMethod aMethod);
|
Matrix interp3(const Matrix& aX, const Matrix& aY, const Matrix& aZ, const Matrix& aV, const Matrix& aX1, const Matrix& aY1, const Matrix& aZ1,InterpnMethod aMethod);
|
||||||
Matrix meshgridInterp3(const Matrix& aX, const Matrix& aY, const Matrix& aZ, const Matrix& aV, const Matrix& aX1, const Matrix& aY1, const Matrix& aZ1,InterpnMethod aMethod, float aExtrapval);
|
Matrix meshgridInterp3(const Matrix& aX, const Matrix& aY, const Matrix& aZ, const Matrix& aV, const Matrix& aX1, const Matrix& aY1, const Matrix& aZ1,InterpnMethod aMethod, float aExtrapval);
|
||||||
Matrix interpn(const Matrix& aX, const Matrix& aY, const Matrix& aZ, const Matrix& aV, const Matrix& aX1, const Matrix& aY1, const Matrix& aZ1,InterpnMethod aMethod);
|
Matrix interpn(const Matrix& aX, const Matrix& aY, const Matrix& aZ, const Matrix& aV, const Matrix& aX1, const Matrix& aY1, const Matrix& aZ1,InterpnMethod aMethod);
|
||||||
|
|
||||||
Matrix size(const Matrix &aMatrix);
|
Matrix size(const Matrix &aMatrix);
|
||||||
|
CudaMatrix size(const CudaMatrix &aMatrix);
|
||||||
|
|
||||||
int size(const Matrix &aMatrix,int dims);
|
int size(const Matrix &aMatrix,int dims);
|
||||||
|
|
||||||
|
|
||||||
#if USE_CUDA
|
|
||||||
CudaMatrix onesCuda(int aRow, int aColumn, int aSlice = 0);
|
|
||||||
CudaMatrix onesCuda(int aSquareRow);
|
|
||||||
|
|
||||||
CudaMatrix zerosCuda(int aRow, int aColumn, int aSlice = 0);
|
|
||||||
CudaMatrix zerosCuda(int aSquareRow);
|
|
||||||
CudaMatrix size(const CudaMatrix &aMatrix);
|
|
||||||
int size(const CudaMatrix &aMatrix,int dims);
|
int size(const CudaMatrix &aMatrix,int dims);
|
||||||
#endif
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -1,4 +1,6 @@
|
|||||||
#include "Matrix.h"
|
#include "Matrix.h"
|
||||||
|
|
||||||
|
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
#include <complex>
|
#include <complex>
|
||||||
#include <cstddef>
|
#include <cstddef>
|
||||||
|
|||||||
@@ -10,10 +10,7 @@
|
|||||||
namespace Aurora {
|
namespace Aurora {
|
||||||
|
|
||||||
const int $ = -1;
|
const int $ = -1;
|
||||||
|
|
||||||
#if USE_CUDA
|
|
||||||
class CudaMatrix;
|
class CudaMatrix;
|
||||||
#endif
|
|
||||||
|
|
||||||
class Matrix {
|
class Matrix {
|
||||||
public:
|
public:
|
||||||
@@ -289,9 +286,8 @@ namespace Aurora {
|
|||||||
|
|
||||||
void forceReshape(int rows, int columns, int slices);
|
void forceReshape(int rows, int columns, int slices);
|
||||||
|
|
||||||
#if USE_CUDA
|
|
||||||
CudaMatrix toDeviceMatrix() const;
|
CudaMatrix toDeviceMatrix() const;
|
||||||
#endif
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
ValueType mValueType = Normal;
|
ValueType mValueType = Normal;
|
||||||
|
|||||||
@@ -7,19 +7,15 @@
|
|||||||
#include <complex>
|
#include <complex>
|
||||||
|
|
||||||
#include "Matrix.h"
|
#include "Matrix.h"
|
||||||
|
#include "CudaMatrix.h"
|
||||||
#include "Function.h"
|
#include "Function.h"
|
||||||
#include "Function1D.h"
|
#include "Function1D.h"
|
||||||
#include "Function2D.h"
|
#include "Function2D.h"
|
||||||
#include "Function3D.h"
|
#include "Function3D.h"
|
||||||
#include "MatlabReader.h"
|
#include "MatlabReader.h"
|
||||||
|
|
||||||
#if USE_CUDA
|
|
||||||
#include "CudaMatrix.h"
|
|
||||||
#endif //USE_CUDA
|
|
||||||
|
|
||||||
int main()
|
int main()
|
||||||
{
|
{
|
||||||
#if USE_CUDA
|
|
||||||
auto A = Aurora::zeros(1000,1,1);
|
auto A = Aurora::zeros(1000,1,1);
|
||||||
auto B = Aurora::zeros(1000,1,1);
|
auto B = Aurora::zeros(1000,1,1);
|
||||||
for (size_t i = 0; i < 1000; i++)
|
for (size_t i = 0; i < 1000; i++)
|
||||||
@@ -119,6 +115,5 @@ int main()
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif //USE_CUDA
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -2558,55 +2558,7 @@ TEST_F(CudaMatrix_Test, MatrixCompare){
|
|||||||
}
|
}
|
||||||
{
|
{
|
||||||
auto R= (9!=B);
|
auto R= (9!=B);
|
||||||
auto dhR = (dB!=9).toHostMatrix();
|
auto dhR = (9!=dB).toHostMatrix();
|
||||||
for (size_t i = 0; i < 1000; i++)
|
|
||||||
{
|
|
||||||
EXPECT_FLOAT_EQ(R[i],dhR[i]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
{
|
|
||||||
auto R= (9<B);
|
|
||||||
auto dhR = (dB>9).toHostMatrix();
|
|
||||||
for (size_t i = 0; i < 1000; i++)
|
|
||||||
{
|
|
||||||
EXPECT_FLOAT_EQ(R[i],dhR[i]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
{
|
|
||||||
auto R= (9>B);
|
|
||||||
auto dhR = (dB<9).toHostMatrix();
|
|
||||||
for (size_t i = 0; i < 1000; i++)
|
|
||||||
{
|
|
||||||
EXPECT_FLOAT_EQ(R[i],dhR[i]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
{
|
|
||||||
auto R= (9<=B);
|
|
||||||
auto dhR = (dB>=9).toHostMatrix();
|
|
||||||
for (size_t i = 0; i < 1000; i++)
|
|
||||||
{
|
|
||||||
EXPECT_FLOAT_EQ(R[i],dhR[i]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
{
|
|
||||||
auto R= (9>=B);
|
|
||||||
auto dhR = (dB<=9).toHostMatrix();
|
|
||||||
for (size_t i = 0; i < 1000; i++)
|
|
||||||
{
|
|
||||||
EXPECT_FLOAT_EQ(R[i],dhR[i]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
{
|
|
||||||
auto R= (9==B);
|
|
||||||
auto dhR = (dB == 9).toHostMatrix();
|
|
||||||
for (size_t i = 0; i < 1000; i++)
|
|
||||||
{
|
|
||||||
EXPECT_FLOAT_EQ(R[i],dhR[i]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
{
|
|
||||||
auto R= (9!=B);
|
|
||||||
auto dhR = (dB!=9).toHostMatrix();
|
|
||||||
for (size_t i = 0; i < 1000; i++)
|
for (size_t i = 0; i < 1000; i++)
|
||||||
{
|
{
|
||||||
EXPECT_FLOAT_EQ(R[i],dhR[i]);
|
EXPECT_FLOAT_EQ(R[i],dhR[i]);
|
||||||
|
|||||||
@@ -5,7 +5,6 @@
|
|||||||
#include "Function.h"
|
#include "Function.h"
|
||||||
#include "Matrix.h"
|
#include "Matrix.h"
|
||||||
#include "TestUtility.h"
|
#include "TestUtility.h"
|
||||||
#include "MatlabReader.h"
|
|
||||||
|
|
||||||
#include "Function2D.h"
|
#include "Function2D.h"
|
||||||
#include "Function2D.cuh"
|
#include "Function2D.cuh"
|
||||||
@@ -19,15 +18,11 @@ protected:
|
|||||||
static void TearDownTestCase(){
|
static void TearDownTestCase(){
|
||||||
}
|
}
|
||||||
public:
|
public:
|
||||||
Aurora::Matrix mSignal;
|
|
||||||
Aurora::CudaMatrix dmSignal;
|
|
||||||
Aurora::Matrix B;
|
Aurora::Matrix B;
|
||||||
Aurora::CudaMatrix dB;
|
Aurora::CudaMatrix dB;
|
||||||
|
|
||||||
void SetUp(){
|
void SetUp(){
|
||||||
MatlabReader m("/home/krad/TestData/peaks.mat");
|
|
||||||
mSignal = m.read("AScan_env_norm");
|
|
||||||
dmSignal = mSignal.toDeviceMatrix();
|
|
||||||
}
|
}
|
||||||
void TearDown(){
|
void TearDown(){
|
||||||
}
|
}
|
||||||
@@ -1002,17 +997,3 @@ TEST_F(Function2D_Cuda_Test, hilbert) {
|
|||||||
EXPECT_NEAR(ret1[i], ret2.getValue(i), 0.01);
|
EXPECT_NEAR(ret1[i], ret2.getValue(i), 0.01);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(Function2D_Cuda_Test, findPeaks) {
|
|
||||||
|
|
||||||
int* indexs = new int[mSignal.getDimSize(1)*10];
|
|
||||||
auto ret1 = Aurora::findPeaks(dmSignal,10, 0.2, 0.05,indexs);
|
|
||||||
|
|
||||||
auto reH = ret1.toHostMatrix();
|
|
||||||
for(unsigned int i=0; i<10; ++i)
|
|
||||||
{
|
|
||||||
printf("%d,",indexs[i]);
|
|
||||||
}
|
|
||||||
delete [] indexs;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|||||||
@@ -2,8 +2,6 @@
|
|||||||
#include <vector>
|
#include <vector>
|
||||||
#include "TestUtility.h"
|
#include "TestUtility.h"
|
||||||
|
|
||||||
#include "MatlabReader.h"
|
|
||||||
|
|
||||||
#include "Matrix.h"
|
#include "Matrix.h"
|
||||||
#include "Function.h"
|
#include "Function.h"
|
||||||
#include "Function1D.h"
|
#include "Function1D.h"
|
||||||
@@ -18,11 +16,7 @@ protected:
|
|||||||
}
|
}
|
||||||
static void TearDownTestCase(){
|
static void TearDownTestCase(){
|
||||||
}
|
}
|
||||||
public:
|
|
||||||
Aurora::Matrix mSignal;
|
|
||||||
void SetUp(){
|
void SetUp(){
|
||||||
MatlabReader m("/home/krad/TestData/peaks.mat");
|
|
||||||
mSignal = m.read("AScan_env_norm");
|
|
||||||
}
|
}
|
||||||
void TearDown(){
|
void TearDown(){
|
||||||
}
|
}
|
||||||
@@ -579,15 +573,3 @@ TEST_F(Function2D_Test, sub2ind) {
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(Function2D_Test, findPeaks) {
|
|
||||||
|
|
||||||
int* indexs = new int[mSignal.getDimSize(1)*10];
|
|
||||||
Aurora::findPeaksHost(mSignal,10, 0.2, 0.05,indexs);
|
|
||||||
|
|
||||||
for(unsigned int i=0; i<10; ++i)
|
|
||||||
{
|
|
||||||
printf("%d,",indexs[i]);
|
|
||||||
}
|
|
||||||
delete [] indexs;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user