CudaMatrix Operator logic patch1
This commit is contained in:
@@ -24,12 +24,17 @@ endif()
|
|||||||
|
|
||||||
set(MKL_INTERFACE_FULL intel_lp64)
|
set(MKL_INTERFACE_FULL intel_lp64)
|
||||||
find_package(MKL CONFIG REQUIRED)
|
find_package(MKL CONFIG REQUIRED)
|
||||||
include_directories(./ ./src /usr/local/include/eigen3 ./thirdparty/include)
|
include_directories(
|
||||||
|
./ ./src /usr/local/include/eigen3 ./thirdparty/include)
|
||||||
file(GLOB_RECURSE cpp_files ./src/*.cpp)
|
file(GLOB_RECURSE cpp_files ./src/*.cpp)
|
||||||
file(GLOB_RECURSE cxx_files ./src/*.cxx)
|
file(GLOB_RECURSE cxx_files ./src/*.cxx)
|
||||||
add_executable(Aurora ${cpp_files} ${cxx_files} )
|
|
||||||
|
|
||||||
|
if (Aurora_USE_CUDA)
|
||||||
|
file(GLOB_RECURSE cu_files ./src/*.cu)
|
||||||
|
file(GLOB_RECURSE cuh_files ./src/*.cuh)
|
||||||
|
endif(Aurora_USE_CUDA)
|
||||||
|
|
||||||
|
add_executable(Aurora ${cpp_files} ${cxx_files} ${cu_files} ${cuh_files})
|
||||||
target_compile_options(Aurora PUBLIC $<TARGET_PROPERTY:MKL::MKL,INTERFACE_COMPILE_OPTIONS>)
|
target_compile_options(Aurora PUBLIC $<TARGET_PROPERTY:MKL::MKL,INTERFACE_COMPILE_OPTIONS>)
|
||||||
target_include_directories(Aurora PUBLIC $<TARGET_PROPERTY:MKL::MKL,INTERFACE_INCLUDE_DIRECTORIES>)
|
target_include_directories(Aurora PUBLIC $<TARGET_PROPERTY:MKL::MKL,INTERFACE_INCLUDE_DIRECTORIES>)
|
||||||
target_link_libraries(Aurora PUBLIC $<LINK_ONLY:MKL::MKL>)
|
target_link_libraries(Aurora PUBLIC $<LINK_ONLY:MKL::MKL>)
|
||||||
|
|||||||
@@ -7,6 +7,7 @@
|
|||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <cstddef>
|
#include <cstddef>
|
||||||
#include <cuda_runtime.h>
|
#include <cuda_runtime.h>
|
||||||
|
#include "CudaMatrixPrivate.cuh"
|
||||||
|
|
||||||
using namespace Aurora;
|
using namespace Aurora;
|
||||||
|
|
||||||
@@ -239,4 +240,14 @@ bool CudaMatrix::setBlockValue(int aDim,int aBeginIndx, int aEndIndex,float valu
|
|||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
#endif // USE_CUDA
|
|
||||||
|
CudaMatrix CudaMatrix::operator+(const CudaMatrix &aMatrix) const{
|
||||||
|
if (this->getDataSize() != aMatrix.getDataSize()) return CudaMatrix();
|
||||||
|
float* data = nullptr;
|
||||||
|
unsigned long long size = getDataSize() * getValueType();
|
||||||
|
cudaMalloc((void**)&data, sizeof(float) * size);
|
||||||
|
auto out = CudaMatrix::fromRawData(data, getDimSize(0), getDimSize(1), getDimSize(2), getValueType());
|
||||||
|
unaryAdd(this->getData(),aMatrix.getData(),out.getData(),this->getDataSize());
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
#endif // USE_CUDA
|
||||||
|
|||||||
71
src/CudaMatrixPrivate.cu
Normal file
71
src/CudaMatrixPrivate.cu
Normal file
@@ -0,0 +1,71 @@
|
|||||||
|
#include <CudaMatrixPrivate.cuh>
|
||||||
|
#include <math.h>
|
||||||
|
#include <thrust/transform.h>
|
||||||
|
#include <thrust/functional.h>
|
||||||
|
#include <thrust/execution_policy.h>
|
||||||
|
using namespace thrust::placeholders;
|
||||||
|
|
||||||
|
struct PowOperator{
|
||||||
|
float exponent;
|
||||||
|
PowOperator(float v):exponent(v) {}
|
||||||
|
void setExponent(float v){
|
||||||
|
exponent = v;
|
||||||
|
}
|
||||||
|
__host__ __device__
|
||||||
|
float operator()(const float& x) {
|
||||||
|
return powf(x, exponent);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
void unaryAdd(float* in1, float* in2, float* out, unsigned long length)
|
||||||
|
{
|
||||||
|
thrust::plus<float> op;
|
||||||
|
thrust::transform(thrust::device,in1,in1+length,in2,out,op);
|
||||||
|
}
|
||||||
|
|
||||||
|
void unaryAdd(float* in1, const float& in2, float* out, unsigned long length)
|
||||||
|
{
|
||||||
|
thrust::transform(thrust::device,in1,in1+length,out,in2*_1);
|
||||||
|
}
|
||||||
|
|
||||||
|
void unaryMul(float* in1, float* in2, float* out, unsigned long length)
|
||||||
|
{
|
||||||
|
thrust::multiplies<float> op;
|
||||||
|
thrust::transform(thrust::device,in1,in1+length,in2,out,op);
|
||||||
|
}
|
||||||
|
|
||||||
|
void unaryNeg(float* in1, float* out, unsigned long length){
|
||||||
|
thrust::negate<float> op;
|
||||||
|
thrust::transform(thrust::device,in1,in1+length,out,op);
|
||||||
|
}
|
||||||
|
|
||||||
|
void unarySub(float* in1, float* in2, float* out, unsigned long length){
|
||||||
|
thrust::minus<float> op;
|
||||||
|
thrust::transform(thrust::device,in1,in1+length,in2,out,op);
|
||||||
|
}
|
||||||
|
|
||||||
|
void unaryDiv(float* in1, float* in2, float* out, unsigned long length){
|
||||||
|
thrust::divides<float> op;
|
||||||
|
thrust::transform(thrust::device,in1,in1+length,in2,out,op);
|
||||||
|
}
|
||||||
|
|
||||||
|
void unaryPow(float* in1, float N,float* out, unsigned long length){
|
||||||
|
if (N == 0.0f)
|
||||||
|
{
|
||||||
|
thrust::fill(out,out+length,0);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (N == 1.0f)
|
||||||
|
{
|
||||||
|
thrust::copy(in1,in1+length,out);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (N == 2.0f){
|
||||||
|
thrust::square<float> op;
|
||||||
|
thrust::transform(thrust::device,in1,in1+length,out,op);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
thrust::transform(thrust::device,in1,in1+length,out,powf(_1,N));
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
15
src/CudaMatrixPrivate.cuh
Normal file
15
src/CudaMatrixPrivate.cuh
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
|
||||||
|
#ifndef __CUDAMATRIX_CUH__
|
||||||
|
#define __CUDAMATRIX_CUH__
|
||||||
|
|
||||||
|
void unaryAdd(float* in1, float* in2, float* out, unsigned long length);
|
||||||
|
void unaryAdd(float* in1, const float& in2, float* out, unsigned long length);
|
||||||
|
void unaryMul(float* in1, float* in2, float* out, unsigned long length);
|
||||||
|
void unaryNeg(float* in1, float* out, unsigned long length);
|
||||||
|
void unaryPow(float* in1, float N,float* out, unsigned long length);
|
||||||
|
|
||||||
|
void unarySub(float* in1, float* in2, float* out, unsigned long length);
|
||||||
|
void unaryDiv(float* in1, float* in2, float* out, unsigned long length);
|
||||||
|
|
||||||
|
|
||||||
|
#endif // __CUDAMATRIX_H__
|
||||||
20
src/main.cxx
20
src/main.cxx
@@ -7,6 +7,7 @@
|
|||||||
#include <complex>
|
#include <complex>
|
||||||
|
|
||||||
#include "Matrix.h"
|
#include "Matrix.h"
|
||||||
|
#include "CudaMatrix.h"
|
||||||
#include "Function.h"
|
#include "Function.h"
|
||||||
#include "Function1D.h"
|
#include "Function1D.h"
|
||||||
#include "Function2D.h"
|
#include "Function2D.h"
|
||||||
@@ -15,5 +16,24 @@
|
|||||||
|
|
||||||
int main()
|
int main()
|
||||||
{
|
{
|
||||||
|
auto A = Aurora::zeros(1000,1,1);
|
||||||
|
auto B = Aurora::zeros(1000,1,1);
|
||||||
|
for (size_t i = 0; i < 1000; i++)
|
||||||
|
{
|
||||||
|
A[i] = -1;
|
||||||
|
B[i] = i;
|
||||||
|
}
|
||||||
|
auto C = A+B;
|
||||||
|
auto dA = A.toDeviceMatrix();
|
||||||
|
auto dB = B.toDeviceMatrix();
|
||||||
|
auto dC = (dA+dB).toHostMatrix();
|
||||||
|
for (size_t i = 0; i < 1000; i++)
|
||||||
|
{
|
||||||
|
if (C[i]!=dC[i]){
|
||||||
|
printf("error value i:%zu, value1:%f, value2: %f",i,C[i],dC[i]);
|
||||||
|
return 9;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user