Add max and min to function2D

This commit is contained in:
kradchen
2023-12-04 14:53:46 +08:00
parent 3438121b0c
commit 09d9e52eb0
7 changed files with 1134 additions and 67 deletions

View File

@@ -8,5 +8,13 @@
#include "mkl.h" #include "mkl.h"
#define PI 3.141592653589793238462 #define PI 3.141592653589793238462
namespace Aurora
{
enum FunctionDirection
{
Column,
Row,
All
};
}
#endif //AURORA_AURORADEFS_H #endif //AURORA_AURORADEFS_H

View File

@@ -204,6 +204,34 @@ Matrix Aurora::min(const Matrix &aMatrix, FunctionDirection direction) {
return min(aMatrix,direction,a,b); return min(aMatrix,direction,a,b);
} }
Matrix vxmMin(const Matrix &aVec, const Matrix &aMat)
{
//只有一列 对Other逐列求最小值
if (aVec.getDimSize(1) == 1 && aVec.getDimSize(0) == aMat.getDimSize(0)) {
float* output = Aurora::malloc(aMat.getDataSize());
for (int i = 0; i < aMat.getDimSize(1); ++i) {
vsFminI(aVec.getDataSize(), aVec.getData(), 1, aMat.getData() + aMat.getDimSize(0) * i, 1,
output + aMat.getDimSize(0) * i, 1);
}
// std::cout<<"min col-vec aMatrix and mat other"<<std::endl;
return Matrix::New(output, aMat);
}
//只有一行 对Other逐行求最小值
else if (aVec.getDimSize(0) == 1 && aVec.getDimSize(1) == aMat.getDimSize(1)) {
float* output = Aurora::malloc(aMat.getDataSize());
for (int i = 0; i < aMat.getDimSize(0); ++i) {
vsFminI(aVec.getDataSize(), aVec.getData(), 1, aMat.getData() + i, aMat.getDimSize(0),
output + i, aMat.getDimSize(0));
}
// std::cout<<"min row-vec aMatrix and mat other"<<std::endl;
return Matrix::New(output, aMat);
}
std::cerr
<< "min(A,B) with matrix must be like A[MxN] - B[1xN] or A[Mx1] - B[MxN]"
<< std::endl;
return Matrix();
}
Matrix Aurora::min(const Matrix &aMatrix, const Matrix &aOther) { Matrix Aurora::min(const Matrix &aMatrix, const Matrix &aOther) {
if (aMatrix.getDimSize(2)>1 || aMatrix.isComplex()) { if (aMatrix.getDimSize(2)>1 || aMatrix.isComplex()) {
std::cerr std::cerr
@@ -231,30 +259,22 @@ Matrix Aurora::min(const Matrix &aMatrix, const Matrix &aOther) {
vsFminI(matrix.getDataSize(),matrix.getData(),1,&scalar,0,output,1); vsFminI(matrix.getDataSize(),matrix.getData(),1,&scalar,0,output,1);
return Matrix::New(output,matrix); return Matrix::New(output,matrix);
} }
else if (aMatrix.getDimSize(1) == 1 || aOther.getDimSize(0) == 1) { else if (aMatrix.isVector()) {
if (aMatrix.getDimSize(1) == 1){ return ::vxmMin(aMatrix,aOther);
float* output = malloc(aOther.getDataSize());
for (int i = 0; i < aOther.getDimSize(1); ++i) {
vsFminI(aMatrix.getDataSize(), aMatrix.getData(), 1, aOther.getData() + aOther.getDimSize(0) * i, 1,
output + aOther.getDimSize(0) * i, 1);
} }
return Matrix::New(output,aOther); else if (aOther.isVector()) {
return ::vxmMin(aOther,aMatrix);
} }
else{
float* output = malloc(aMatrix.getDataSize());
for (int i = 0; i < aMatrix.getDimSize(0); ++i) {
vsFminI(aOther.getDataSize(), aOther.getData(), 1, aMatrix.getData() + i, aMatrix.getDimSize(0),
output + i, aOther.getDimSize(0));
}
return Matrix::New(output,aMatrix);
}
}
else{
std::cerr std::cerr
<< "min(A,B) with matrix must be like A[MxN] - B[1xN] or A[Mx1] - B[MxN]" << "min(A,B) with matrix must be like A[MxN] - B[1xN] or A[Mx1] - B[MxN]"
<< std::endl; << std::endl;
return Matrix(); return Matrix();
} }
Matrix Aurora::min(const Matrix &aMatrix, const float aValue){
float *output = malloc(1);
output[0] = aValue;
return min(aMatrix,Matrix::New(output, 1,1,1));
} }
Matrix Aurora::max(const Matrix &aMatrix, FunctionDirection direction) { Matrix Aurora::max(const Matrix &aMatrix, FunctionDirection direction) {
@@ -314,6 +334,34 @@ Matrix Aurora::max(const Matrix &aMatrix, FunctionDirection direction, long& row
} }
} }
Matrix vxmMax(const Matrix &aVec, const Matrix &aMat)
{
//只有一列 对Other逐列求最小值
if (aVec.getDimSize(1) == 1 && aVec.getDimSize(0) == aMat.getDimSize(0)) {
float* output = Aurora::malloc(aMat.getDataSize());
for (int i = 0; i < aMat.getDimSize(1); ++i) {
vsFmaxI(aVec.getDataSize(), aVec.getData(), 1, aMat.getData() + aMat.getDimSize(0) * i, 1,
output + aMat.getDimSize(0) * i, 1);
}
// std::cout<<"max col-vec aMatrix and mat other"<<std::endl;
return Matrix::New(output, aMat);
}
//只有一行 对Other逐行求最小值
else if (aVec.getDimSize(0) == 1 && aVec.getDimSize(1) == aMat.getDimSize(1)) {
float* output = Aurora::malloc(aMat.getDataSize());
for (int i = 0; i < aMat.getDimSize(0); ++i) {
vsFmaxI(aVec.getDataSize(), aVec.getData(), 1, aMat.getData() + i, aMat.getDimSize(0),
output + i, aMat.getDimSize(0));
}
// std::cout<<"max row-vec aMatrix and mat other"<<std::endl;
return Matrix::New(output, aMat);
}
std::cerr
<< "max(A,B) with matrix must be like A[MxN] - B[1xN] or A[Mx1] - B[MxN]"
<< std::endl;
return Matrix();
}
Matrix Aurora::max(const Matrix &aMatrix, const Matrix &aOther) { Matrix Aurora::max(const Matrix &aMatrix, const Matrix &aOther) {
if (aMatrix.getDimSize(2)>1 || aMatrix.isComplex()) { if (aMatrix.getDimSize(2)>1 || aMatrix.isComplex()) {
std::cerr std::cerr
@@ -341,31 +389,17 @@ Matrix Aurora::max(const Matrix &aMatrix, const Matrix &aOther) {
vsFmaxI(matrix.getDataSize(),matrix.getData(),1,&scalar,0,output,1); vsFmaxI(matrix.getDataSize(),matrix.getData(),1,&scalar,0,output,1);
return Matrix::New(output,matrix); return Matrix::New(output,matrix);
} }
else if (aMatrix.getDimSize(1) == 1 || aOther.getDimSize(0) == 1) { else if (aMatrix.isVector()) {
if (aMatrix.getDimSize(1) == 1){ return ::vxmMax(aMatrix,aOther);
float* output = malloc(aOther.getDataSize());
for (int i = 0; i < aOther.getDimSize(1); ++i) {
vsFmaxI(aMatrix.getDataSize(), aMatrix.getData(), 1, aOther.getData() + aOther.getDimSize(0) * i, 1,
output + aOther.getDimSize(0) * i, 1);
} }
return Matrix::New(output,aOther); else if (aOther.isVector()) {
return ::vxmMax(aOther,aMatrix);
} }
else{
float* output = malloc(aMatrix.getDataSize());
for (int i = 0; i < aMatrix.getDimSize(0); ++i) {
vsFmaxI(aOther.getDataSize(), aOther.getData(), 1, aMatrix.getData() + i, aMatrix.getDimSize(0),
output + i, aOther.getDimSize(0));
}
return Matrix::New(output,aMatrix);
}
}
else{
std::cerr std::cerr
<< "min(A,B) with matrix must be like A[MxN] - B[1xN] or A[Mx1] - B[MxN]" << "max(A,B) with matrix must be like A[MxN] - B[1xN] or A[Mx1] - B[MxN]"
<< std::endl; << std::endl;
return Matrix(); return Matrix();
} }
}
Matrix Aurora::max(const Matrix &aMatrix, const float aValue){ Matrix Aurora::max(const Matrix &aMatrix, const float aValue){
float *output = malloc(1); float *output = malloc(1);

478
src/Function2D.cu Normal file
View File

@@ -0,0 +1,478 @@
#include <Function2D.cuh>
#include <cfloat>
#include <cstddef>
#include <iostream>
#include <cmath>
#include <thrust/device_vector.h>
#include <thrust/transform.h>
#include <thrust/reduce.h>
#include <thrust/device_ptr.h>
#include <thrust/iterator/constant_iterator.h>
#include <thrust/iterator/counting_iterator.h>
#include <thrust/copy.h>
#include <thrust/functional.h>
#include <thrust/complex.h>
#include <cuda_runtime.h>
using namespace Aurora;
__global__ void maxColKernel(float* aInputData, float* aOutput, unsigned int aColSize)
{
//确定每个thread的index
unsigned int idx = blockIdx.x * aColSize + threadIdx.x;
__shared__ float shared_data[256];
// 每个线程加载一个元素到共享内存
shared_data[threadIdx.x] = (threadIdx.x< aColSize) ? aInputData[idx] : -FLT_MAX;
__syncthreads();
// 每个线程规约自己的分段将每个blockDim.x的值规约到数组最前面一段
for (int offset = blockDim.x; offset<aColSize; offset+=blockDim.x) {
if(threadIdx.x + offset<aColSize){
shared_data[threadIdx.x] = fmaxf(shared_data[threadIdx.x], aInputData[idx + offset]);
}
__syncthreads();
}
// 规约最前面一段
for (int offset = blockDim.x/2; offset >0; offset>>=1) {
int idx2 = offset + threadIdx.x;
if (idx2 < blockDim.x) {
shared_data[threadIdx.x] = fmaxf(shared_data[threadIdx.x], shared_data[idx2]);
}
__syncthreads();
}
// 第一个线程存储每个分段的最大值到全局内存
if (threadIdx.x == 0) {
aOutput[blockIdx.x] = shared_data[0];
}
}
__global__ void maxRowKernel(float* aInputData, float* aOutput,unsigned int aColSize, unsigned int aRowSize)
{
//确定每个thread的基础index
unsigned int idx = threadIdx.x*aColSize+ blockIdx.x;
__shared__ float shared_data[512];
// 每个线程加载一个元素到共享内存
shared_data[threadIdx.x]= (threadIdx.x< aRowSize) ? aInputData[idx] : -FLT_MAX;
__syncthreads();
// 每个线程规约自己的分段将每个blockDim.x的值规约到数组最前面一段
for (int offset = blockDim.x; offset < aRowSize; offset+=blockDim.x) {
if(threadIdx.x+offset < aRowSize){
shared_data[threadIdx.x]= fmaxf(shared_data[threadIdx.x], aInputData[idx + offset*aColSize]);
}
__syncthreads();
}
// 规约最前面一段
for (int offset = blockDim.x/2; offset >0; offset>>=1) {
int idx2 = offset + threadIdx.x;
if (idx2 < blockDim.x) {
shared_data[threadIdx.x] = fmaxf(shared_data[threadIdx.x], shared_data[idx2]);
}
__syncthreads();
}
// 第一个线程存储每个分段的最大值到全局内存
if (threadIdx.x == 0) {
aOutput[blockIdx.x] = shared_data[0];
}
}
CudaMatrix Aurora::max(const CudaMatrix &aMatrix, FunctionDirection direction) {
long a,b;
return max(aMatrix,direction,a,b);
}
CudaMatrix Aurora::max(const CudaMatrix &aMatrix, FunctionDirection direction, long& rowIdx, long& colIdx)
{
if (aMatrix.getDimSize(2)>1 || aMatrix.isComplex()) {
std::cerr<< (aMatrix.getDimSize(2) > 1 ? "max() not support 3D data!" : "max() not support complex value type!")
<< std::endl;
return CudaMatrix();
}
//针对向量行等于列
if (aMatrix.isVector()){
direction = All;
}
switch (direction)
{
case All: {
thrust::device_ptr<float> d_ptr = thrust::device_pointer_cast(aMatrix.getData());
auto max_iter = thrust::max_element(thrust::device,d_ptr,d_ptr+aMatrix.getDataSize());
int index = max_iter-d_ptr;
rowIdx = index%aMatrix.getDimSize(0);
colIdx = index/aMatrix.getDimSize(0);
float* data = nullptr;
cudaMalloc((void**)&data, sizeof(float));
auto ret = Aurora::CudaMatrix::fromRawData(data,1,1,1);
ret.setValue(0, *max_iter);
return ret;
}
case Row:
{
float* matData = aMatrix.getData();
float* retData = nullptr;
int rowCount = aMatrix.getDimSize(1);
cudaMalloc((void**)&retData, sizeof(float)*aMatrix.getDimSize(0));
if (rowCount<512){
maxRowKernel<<<aMatrix.getDimSize(0),rowCount/2+1>>>(matData,retData,aMatrix.getDimSize(0),rowCount);
}
else if (aMatrix.getDimSize(1)/aMatrix.getDimSize(0)>4){
maxRowKernel<<<aMatrix.getDimSize(0),512>>>(matData,retData,aMatrix.getDimSize(0),rowCount);
}
else{
maxRowKernel<<<aMatrix.getDimSize(0),256>>>(matData,retData,aMatrix.getDimSize(0),rowCount);
}
cudaDeviceSynchronize();
CudaMatrix ret = Aurora::CudaMatrix::fromRawData(retData,aMatrix.getDimSize(0),1);
return ret;
}
case Column:
default:
{
float* matData = aMatrix.getData();
float* retData = nullptr;
int colCount = aMatrix.getDimSize(0);
cudaMalloc((void**)&retData, sizeof(float)*aMatrix.getDimSize(1));
maxColKernel<<<aMatrix.getDimSize(1),256>>>(matData,retData,colCount);
cudaDeviceSynchronize();
CudaMatrix ret = Aurora::CudaMatrix::fromRawData(retData,1,aMatrix.getDimSize(1));
return ret;
}
}
}
CudaMatrix vxmMax(CudaMatrix aVec, CudaMatrix aMat) {
//col-vec x mat
if (aVec.getDimSize(1) == 1 && aVec.getDimSize(0) == aMat.getDimSize(0)) {
std::cout<<"max mat and col-vec "<<std::endl;
size_t size = aMat.getDataSize();
float* data = nullptr;
cudaMalloc((void**)&data, sizeof(float) * size);
auto lambda = [=] __host__ __device__(const float& x, const float& y) {
return fmaxf(x, y);
};
for (int i = 0; i < aMat.getDimSize(1); ++i) {
thrust::transform(thrust::device, aVec.getData(),
aVec.getData() + aVec.getDataSize(),
aMat.getData() + aMat.getDimSize(0) * i,
data + aMat.getDimSize(0) * i, lambda);
}
return Aurora::CudaMatrix::fromRawData(data, aMat.getDimSize(0),
aMat.getDimSize(1), aMat.getDimSize(2), aMat.getValueType());
}
// row-vec x mat
else if (aVec.getDimSize(0) == 1 && aVec.getDimSize(1) == aMat.getDimSize(1))
{
std::cout<<"max mat and row-vec "<<std::endl;
size_t size = aMat.getDataSize() ;
float* data = nullptr;
cudaMalloc((void**)&data, sizeof(float) * size);
for (int i = 0; i < aMat.getDimSize(1); ++i) {
float v = aVec.getValue(i);
auto lambda = [=] __host__ __device__(const float& x) {
return fmaxf(x, v);
};
thrust::transform(thrust::device,
aMat.getData() + aMat.getDimSize(0)*i,
aMat.getData() + aMat.getDimSize(0) * (i+1),
data + aMat.getDimSize(0) * i, lambda);
}
return Aurora::CudaMatrix::fromRawData(data, aMat.getDimSize(0),
aMat.getDimSize(1), aMat.getDimSize(2), aMat.getValueType());
}
std::cerr
<< "max(A,B) with matrix must be like A[MxN] - B[1xN] or A[Mx1] - B[MxN]"
<< std::endl;
return CudaMatrix();
}
CudaMatrix Aurora::max(const CudaMatrix &aMatrix, const CudaMatrix &aOther) {
if (aMatrix.getDimSize(2)>1 || aMatrix.isComplex()) {
std::cerr
<< (aMatrix.getDimSize(2) > 1 ? "max() not support 3D data!" : "max() not support complex value type!")
<< std::endl;
return CudaMatrix();
}
if (aOther.getDimSize(2)>1 || aOther.isComplex()) {
std::cerr
<< (aOther.getDimSize(2) > 1 ? "max() not support 3D data!" : "max() not support complex value type!")
<< std::endl;
return CudaMatrix();
}
//same shape
if (aMatrix.compareShape(aOther)){
size_t size = aMatrix.getDataSize() * aMatrix.getValueType();
float* data = nullptr;
cudaMalloc((void**)&data, sizeof(float) * size);
auto lambda = [=] __host__ __device__ (const float& x, const float& y){
return fmaxf(x,y);
};
thrust::transform(thrust::device,aMatrix.getData(),
aMatrix.getData()+aMatrix.getDataSize(),aOther.getData(),
data,lambda);
return Aurora::CudaMatrix::fromRawData(data, aMatrix.getDimSize(0),
aMatrix.getDimSize(1), aMatrix.getDimSize(2), aMatrix.getValueType());
}
// one is scalar
else if (aMatrix.getDataSize() == 1 || aOther.getDataSize() == 1){
float scalar = (aMatrix.getDataSize() == 1)?aMatrix.getValue(0):aOther.getValue(0);
auto matrix = (aMatrix.getDataSize() == 1)?aOther:aMatrix;
return max(matrix, scalar);
}
else if (aMatrix.isVector()) {
return ::vxmMax(aMatrix,aOther);
}
else if (aOther.isVector())
{
return ::vxmMax(aOther,aMatrix);
}
std::cerr
<< "max(A,B) with matrix must be like A[MxN] - B[1xN] or A[Mx1] - B[MxN]"
<< std::endl;
return CudaMatrix();
}
CudaMatrix Aurora::max(const CudaMatrix &aMatrix, const float aValue){
size_t size = aMatrix.getDataSize() * aMatrix.getValueType();
float* data = nullptr;
cudaMalloc((void**)&data, sizeof(float) * size);
auto lambda = [=] __host__ __device__ (const float& x){
return fmaxf(x,aValue);
};
thrust::transform(thrust::device,aMatrix.getData(),aMatrix.getData()+aMatrix.getDataSize(),
data,lambda);
return Aurora::CudaMatrix::fromRawData(data, aMatrix.getDimSize(0),
aMatrix.getDimSize(1), aMatrix.getDimSize(2), aMatrix.getValueType());
}
__global__ void minColKernel(float* aInputData, float* aOutput, unsigned int aColSize)
{
//确定每个thread的index
unsigned int idx = blockIdx.x * aColSize + threadIdx.x;
__shared__ float shared_data[256];
// 每个线程加载一个元素到共享内存
shared_data[threadIdx.x] = (threadIdx.x< aColSize) ? aInputData[idx] : FLT_MAX;
__syncthreads();
// 每个线程规约自己的分段将每个blockDim.x的值规约到数组最前面一段
for (int offset = blockDim.x; offset<aColSize; offset+=blockDim.x) {
if(threadIdx.x + offset<aColSize){
shared_data[threadIdx.x] = fminf(shared_data[threadIdx.x], aInputData[idx + offset]);
}
__syncthreads();
}
// 规约最前面一段
for (int offset = blockDim.x/2; offset >0; offset>>=1) {
int idx2 = offset + threadIdx.x;
if (idx2 < blockDim.x) {
shared_data[threadIdx.x] = fminf(shared_data[threadIdx.x], shared_data[idx2]);
}
__syncthreads();
}
// 第一个线程存储每个分段的最大值到全局内存
if (threadIdx.x == 0) {
aOutput[blockIdx.x] = shared_data[0];
}
}
__global__ void minRowKernel(float* aInputData, float* aOutput,unsigned int aColSize, unsigned int aRowSize)
{
//确定每个thread的基础index
unsigned int idx = threadIdx.x*aColSize+ blockIdx.x;
__shared__ float shared_data[512];
// 每个线程加载一个元素到共享内存
shared_data[threadIdx.x]= (threadIdx.x< aRowSize) ? aInputData[idx] : FLT_MAX;
__syncthreads();
// 每个线程规约自己的分段将每个blockDim.x的值规约到数组最前面一段
for (int offset = blockDim.x; offset < aRowSize; offset+=blockDim.x) {
if(threadIdx.x+offset < aRowSize){
shared_data[threadIdx.x]= fminf(shared_data[threadIdx.x], aInputData[idx + offset*aColSize]);
}
__syncthreads();
}
// 规约最前面一段
for (int offset = blockDim.x/2; offset >0; offset>>=1) {
int idx2 = offset + threadIdx.x;
if (idx2 < blockDim.x) {
shared_data[threadIdx.x] = fminf(shared_data[threadIdx.x], shared_data[idx2]);
}
__syncthreads();
}
// 第一个线程存储每个分段的最大值到全局内存
if (threadIdx.x == 0) {
aOutput[blockIdx.x] = shared_data[0];
}
}
CudaMatrix Aurora::min(const CudaMatrix &aMatrix, FunctionDirection direction) {
long a,b;
return min(aMatrix,direction,a,b);
}
CudaMatrix Aurora::min(const CudaMatrix &aMatrix, FunctionDirection direction, long& rowIdx, long& colIdx)
{
if (aMatrix.getDimSize(2)>1 || aMatrix.isComplex()) {
std::cerr<< (aMatrix.getDimSize(2) > 1 ? "min() not support 3D data!" : "min() not support complex value type!")
<< std::endl;
return CudaMatrix();
}
//针对向量行等于列
if (aMatrix.isVector()){
direction = All;
}
switch (direction)
{
case All: {
thrust::device_ptr<float> d_ptr = thrust::device_pointer_cast(aMatrix.getData());
auto max_iter = thrust::min_element(thrust::device,d_ptr,d_ptr+aMatrix.getDataSize());
int index = max_iter-d_ptr;
rowIdx = index%aMatrix.getDimSize(0);
colIdx = index/aMatrix.getDimSize(0);
float* data = nullptr;
cudaMalloc((void**)&data, sizeof(float));
auto ret = Aurora::CudaMatrix::fromRawData(data,1,1,1);
ret.setValue(0, *max_iter);
return ret;
}
case Row:
{
float* matData = aMatrix.getData();
float* retData = nullptr;
int rowCount = aMatrix.getDimSize(1);
cudaMalloc((void**)&retData, sizeof(float)*aMatrix.getDimSize(0));
if (rowCount<512){
minRowKernel<<<aMatrix.getDimSize(0),rowCount/2+1>>>(matData,retData,aMatrix.getDimSize(0),rowCount);
}
else if (aMatrix.getDimSize(1)/aMatrix.getDimSize(0)>4){
minRowKernel<<<aMatrix.getDimSize(0),512>>>(matData,retData,aMatrix.getDimSize(0),rowCount);
}
else{
minRowKernel<<<aMatrix.getDimSize(0),256>>>(matData,retData,aMatrix.getDimSize(0),rowCount);
}
cudaDeviceSynchronize();
CudaMatrix ret = Aurora::CudaMatrix::fromRawData(retData,aMatrix.getDimSize(0),1);
return ret;
}
case Column:
default:
{
float* matData = aMatrix.getData();
float* retData = nullptr;
int colCount = aMatrix.getDimSize(0);
cudaMalloc((void**)&retData, sizeof(float)*aMatrix.getDimSize(1));
minColKernel<<<aMatrix.getDimSize(1),256>>>(matData,retData,colCount);
cudaDeviceSynchronize();
CudaMatrix ret = Aurora::CudaMatrix::fromRawData(retData,1,aMatrix.getDimSize(1));
return ret;
}
}
}
CudaMatrix vxmMin(CudaMatrix aVec, CudaMatrix aMat) {
//col-vec x mat
if (aVec.getDimSize(1) == 1 && aVec.getDimSize(0) == aMat.getDimSize(0)) {
std::cout<<"min mat and col-vec "<<std::endl;
size_t size = aMat.getDataSize();
float* data = nullptr;
cudaMalloc((void**)&data, sizeof(float) * size);
auto lambda = [=] __host__ __device__(const float& x, const float& y) {
return fminf(x, y);
};
for (int i = 0; i < aMat.getDimSize(1); ++i) {
thrust::transform(thrust::device, aVec.getData(),
aVec.getData() + aVec.getDataSize(),
aMat.getData() + aMat.getDimSize(0) * i,
data + aMat.getDimSize(0) * i, lambda);
}
return Aurora::CudaMatrix::fromRawData(data, aMat.getDimSize(0),
aMat.getDimSize(1), aMat.getDimSize(2), aMat.getValueType());
}
// row-vec x mat
else if (aVec.getDimSize(0) == 1 && aVec.getDimSize(1) == aMat.getDimSize(1))
{
std::cout<<"min mat and row-vec "<<std::endl;
size_t size = aMat.getDataSize() ;
float* data = nullptr;
cudaMalloc((void**)&data, sizeof(float) * size);
for (int i = 0; i < aMat.getDimSize(1); ++i) {
float v = aVec.getValue(i);
auto lambda = [=] __host__ __device__(const float& x) {
return fminf(x, v);
};
thrust::transform(thrust::device,
aMat.getData() + aMat.getDimSize(0)*i,
aMat.getData() + aMat.getDimSize(0) * (i+1),
data + aMat.getDimSize(0) * i, lambda);
}
return Aurora::CudaMatrix::fromRawData(data, aMat.getDimSize(0),
aMat.getDimSize(1), aMat.getDimSize(2), aMat.getValueType());
}
std::cerr
<< "min(A,B) with matrix must be like A[MxN] - B[1xN] or A[Mx1] - B[MxN]"
<< std::endl;
return CudaMatrix();
}
CudaMatrix Aurora::min(const CudaMatrix &aMatrix, const CudaMatrix &aOther) {
if (aMatrix.getDimSize(2)>1 || aMatrix.isComplex()) {
std::cerr
<< (aMatrix.getDimSize(2) > 1 ? "min() not support 3D data!" : "min() not support complex value type!")
<< std::endl;
return CudaMatrix();
}
if (aOther.getDimSize(2)>1 || aOther.isComplex()) {
std::cerr
<< (aOther.getDimSize(2) > 1 ? "min() not support 3D data!" : "min() not support complex value type!")
<< std::endl;
return CudaMatrix();
}
//same shape
if (aMatrix.compareShape(aOther)){
size_t size = aMatrix.getDataSize() * aMatrix.getValueType();
float* data = nullptr;
cudaMalloc((void**)&data, sizeof(float) * size);
auto lambda = [=] __host__ __device__ (const float& x, const float& y){
return fminf(x,y);
};
thrust::transform(thrust::device,aMatrix.getData(),
aMatrix.getData()+aMatrix.getDataSize(),aOther.getData(),
data,lambda);
return Aurora::CudaMatrix::fromRawData(data, aMatrix.getDimSize(0),
aMatrix.getDimSize(1), aMatrix.getDimSize(2), aMatrix.getValueType());
}
// one is scalar
else if (aMatrix.getDataSize() == 1 || aOther.getDataSize() == 1){
float scalar = (aMatrix.getDataSize() == 1)?aMatrix.getValue(0):aOther.getValue(0);
auto matrix = (aMatrix.getDataSize() == 1)?aOther:aMatrix;
return min(matrix, scalar);
}
else if (aMatrix.isVector()) {
return ::vxmMin(aMatrix,aOther);
}
else if (aOther.isVector())
{
return ::vxmMin(aOther,aMatrix);
}
std::cerr
<< "min(A,B) with matrix must be like A[MxN] - B[1xN] or A[Mx1] - B[MxN]"
<< std::endl;
return CudaMatrix();
}
CudaMatrix Aurora::min(const CudaMatrix &aMatrix, const float aValue){
size_t size = aMatrix.getDataSize() * aMatrix.getValueType();
float* data = nullptr;
cudaMalloc((void**)&data, sizeof(float) * size);
auto lambda = [=] __host__ __device__ (const float& x){
return fminf(x,aValue);
};
thrust::transform(thrust::device,aMatrix.getData(),aMatrix.getData()+aMatrix.getDataSize(),
data,lambda);
return Aurora::CudaMatrix::fromRawData(data, aMatrix.getDimSize(0),
aMatrix.getDimSize(1), aMatrix.getDimSize(2), aMatrix.getValueType());
}

19
src/Function2D.cuh Normal file
View File

@@ -0,0 +1,19 @@
#ifndef __FUNCTION2D_CUDA__
#define __FUNCTION2D_CUDA__
#include "CudaMatrix.h"
#include "AuroraDefs.h"
namespace Aurora
{
CudaMatrix max(const CudaMatrix &aMatrix, FunctionDirection direction = Column);
CudaMatrix max(const CudaMatrix &aMatrix, FunctionDirection direction, long& rowIdx, long& colIdx);
CudaMatrix max(const CudaMatrix &aMatrix, const float aValue);
CudaMatrix max(const CudaMatrix &aMatrix, const CudaMatrix &aOther);
CudaMatrix min(const CudaMatrix &aMatrix, FunctionDirection direction = Column);
CudaMatrix min(const CudaMatrix &aMatrix, FunctionDirection direction, long& rowIdx, long& colIdx);
CudaMatrix min(const CudaMatrix &aMatrix, const float aValue);
CudaMatrix min(const CudaMatrix &aMatrix, const CudaMatrix &aOther);
}
#endif // __FUNCTION2D_CUDA_H__

View File

@@ -6,15 +6,11 @@
#include "Matrix.h" #include "Matrix.h"
#include "Function1D.h" #include "Function1D.h"
#include "AuroraDefs.h"
namespace Aurora namespace Aurora
{ {
enum FunctionDirection
{
Column,
Row,
All
};
float immse(const Matrix &aImageA, const Matrix &aImageB); float immse(const Matrix &aImageA, const Matrix &aImageB);
Matrix inv(const Matrix &aMatrix); Matrix inv(const Matrix &aMatrix);
Matrix inv(Matrix &&aMatrix); Matrix inv(Matrix &&aMatrix);
@@ -51,6 +47,8 @@ namespace Aurora
*/ */
Matrix min(const Matrix &aMatrix, const Matrix &aOther); Matrix min(const Matrix &aMatrix, const Matrix &aOther);
Matrix min(const Matrix &aMatrix, const float aValue);
/** /**
* 比较两个矩阵,求对应位置的最大值,不支持三维 * 比较两个矩阵,求对应位置的最大值,不支持三维
* @attention 矩阵形状不一样时如A为[MxN],则B应为标量或[1xN]的行向量 * @attention 矩阵形状不一样时如A为[MxN],则B应为标量或[1xN]的行向量

View File

@@ -0,0 +1,461 @@
#include <gtest/gtest.h>
#include <chrono>
#include "CudaMatrix.h"
#include "Function.h"
#include "Matrix.h"
#include "TestUtility.h"
#include "Function2D.h"
#include "Function2D.cuh"
class Function2D_Cuda_Test:public ::testing::Test
{
protected:
static void SetUpFunction2DCudaTester(){
}
static void TearDownTestCase(){
}
public:
Aurora::Matrix B;
Aurora::CudaMatrix dB;
void SetUp(){
}
void TearDown(){
}
};
TEST_F(Function2D_Cuda_Test, min)
{
{
float *dataB = Aurora::random(4096*41472);
B = Aurora::Matrix::fromRawData(dataB, 4096, 41472);
dB = B.toDeviceMatrix();
long r,c;
auto start_time_ = std::chrono::high_resolution_clock::now();
auto ret1 = Aurora::min(B, Aurora::FunctionDirection::Column,r,c);
auto end_time = std::chrono::high_resolution_clock::now();
auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time_);
std::cout << "Test1 duration: " << duration.count() << " ms" << std::endl;
start_time_ = std::chrono::high_resolution_clock::now();
auto ret2 = Aurora::min(dB, Aurora::FunctionDirection::Column,r,c);
end_time = std::chrono::high_resolution_clock::now();
duration = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time_);
std::cout << "Test2 duration: " << duration.count() << " ms" << std::endl;
ASSERT_EQ(ret1.getDimSize(0),ret2.getDimSize(0));
ASSERT_EQ(ret1.getDimSize(1),ret2.getDimSize(1));
ASSERT_EQ(ret1.getDimSize(2),ret2.getDimSize(2));
for (size_t i = 0; i < ret1.getDataSize(); i++)
{
ASSERT_FLOAT_EQ(ret1[i], ret2.getValue(i))<<", index at :"<<i;
}
start_time_ = std::chrono::high_resolution_clock::now();
ret1 = Aurora::min(B, Aurora::FunctionDirection::Row,r,c);
end_time = std::chrono::high_resolution_clock::now();
duration = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time_);
std::cout << "Test1 duration: " << duration.count() << " ms" << std::endl;
start_time_ = std::chrono::high_resolution_clock::now();
ret2 = Aurora::min(dB, Aurora::FunctionDirection::Row,r,c);
end_time = std::chrono::high_resolution_clock::now();
duration = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time_);
std::cout << "Test2 duration: " << duration.count() << " ms" << std::endl;
ASSERT_EQ(ret1.getDimSize(0),ret2.getDimSize(0));
ASSERT_EQ(ret1.getDimSize(1),ret2.getDimSize(1));
ASSERT_EQ(ret1.getDimSize(2),ret2.getDimSize(2));
for (size_t i = 0; i < ret1.getDataSize(); i++)
{
ASSERT_FLOAT_EQ(ret1[i], ret2.getValue(i))<<", index at :"<<i;
}
}
{
float *dataB = Aurora::random(3157*111);
B = Aurora::Matrix::fromRawData(dataB, 3157, 111);
dB = B.toDeviceMatrix();
long r,c;
auto start_time_ = std::chrono::high_resolution_clock::now();
auto ret1 = Aurora::min(B, Aurora::FunctionDirection::Column,r,c);
auto end_time = std::chrono::high_resolution_clock::now();
auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time_);
std::cout << "Test1 duration: " << duration.count() << " ms" << std::endl;
start_time_ = std::chrono::high_resolution_clock::now();
auto ret2 = Aurora::min(dB, Aurora::FunctionDirection::Column,r,c);
end_time = std::chrono::high_resolution_clock::now();
duration = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time_);
std::cout << "Test2 duration: " << duration.count() << " ms" << std::endl;
ASSERT_EQ(ret1.getDimSize(0),ret2.getDimSize(0));
ASSERT_EQ(ret1.getDimSize(1),ret2.getDimSize(1));
ASSERT_EQ(ret1.getDimSize(2),ret2.getDimSize(2));
for (size_t i = 0; i < ret1.getDataSize(); i++)
{
ASSERT_FLOAT_EQ(ret1[i], ret2.getValue(i))<<", index at :"<<i;
}
B.forceReshape( 111,3157, 1);
dB = B.toDeviceMatrix();
start_time_ = std::chrono::high_resolution_clock::now();
ret1 = Aurora::min(B, Aurora::FunctionDirection::Column,r,c);
end_time = std::chrono::high_resolution_clock::now();
duration = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time_);
std::cout << "Test1 duration: " << duration.count() << " ms" << std::endl;
start_time_ = std::chrono::high_resolution_clock::now();
ret2 = Aurora::min(dB, Aurora::FunctionDirection::Column,r,c);
end_time = std::chrono::high_resolution_clock::now();
duration = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time_);
std::cout << "Test2 duration: " << duration.count() << " ms" << std::endl;
ASSERT_EQ(ret1.getDimSize(0),ret2.getDimSize(0));
ASSERT_EQ(ret1.getDimSize(1),ret2.getDimSize(1));
ASSERT_EQ(ret1.getDimSize(2),ret2.getDimSize(2));
for (size_t i = 0; i < ret1.getDataSize(); i++)
{
ASSERT_FLOAT_EQ(ret1[i], ret2.getValue(i))<<", index at :"<<i;
}
}
{
float *dataB = Aurora::random(3157*111);
B = Aurora::Matrix::fromRawData(dataB, 3157, 111);
dB = B.toDeviceMatrix();
long r,c;
auto start_time_ = std::chrono::high_resolution_clock::now();
auto ret1 = Aurora::min(B, 500.5f);
auto end_time = std::chrono::high_resolution_clock::now();
auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time_);
std::cout << "Test1 duration: " << duration.count() << " ms" << std::endl;
start_time_ = std::chrono::high_resolution_clock::now();
auto ret2 = Aurora::min(dB, 500.5f);
end_time = std::chrono::high_resolution_clock::now();
duration = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time_);
std::cout << "Test2 duration: " << duration.count() << " ms" << std::endl;
ASSERT_EQ(ret1.getDimSize(0),ret2.getDimSize(0));
ASSERT_EQ(ret1.getDimSize(1),ret2.getDimSize(1));
ASSERT_EQ(ret1.getDimSize(2),ret2.getDimSize(2));
for (size_t i = 0; i < ret1.getDataSize(); i++)
{
ASSERT_FLOAT_EQ(ret1[i], ret2.getValue(i))<<", index at :"<<i;
}
}
{
float *dataB = Aurora::random(3157*111);
float *dataA = Aurora::random(3157*111);
auto A = Aurora::Matrix::fromRawData(dataA, 3157, 111);
B = Aurora::Matrix::fromRawData(dataB, 3157, 111);
auto dA = A.toDeviceMatrix();
dB = B.toDeviceMatrix();
long r,c;
auto start_time_ = std::chrono::high_resolution_clock::now();
auto ret1 = Aurora::min(B, A);
auto end_time = std::chrono::high_resolution_clock::now();
auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time_);
std::cout << "Test1 duration: " << duration.count() << " ms" << std::endl;
start_time_ = std::chrono::high_resolution_clock::now();
auto ret2 = Aurora::min(dB, dA);
end_time = std::chrono::high_resolution_clock::now();
duration = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time_);
std::cout << "Test2 duration: " << duration.count() << " ms" << std::endl;
ASSERT_EQ(ret1.getDimSize(0),ret2.getDimSize(0));
ASSERT_EQ(ret1.getDimSize(1),ret2.getDimSize(1));
ASSERT_EQ(ret1.getDimSize(2),ret2.getDimSize(2));
for (size_t i = 0; i < ret1.getDataSize(); i++)
{
ASSERT_FLOAT_EQ(ret1[i], ret2.getValue(i))<<", index at :"<<i;
}
}
{
float *dataB = Aurora::random(3157*111);
float *dataA = Aurora::random(3157);
auto A = Aurora::Matrix::fromRawData(dataA, 3157, 1);
B = Aurora::Matrix::fromRawData(dataB, 3157, 111);
auto dA = A.toDeviceMatrix();
dB = B.toDeviceMatrix();
long r,c;
auto start_time_ = std::chrono::high_resolution_clock::now();
auto ret1 = Aurora::min(B, A);
auto end_time = std::chrono::high_resolution_clock::now();
auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time_);
std::cout << "Test1 duration: " << duration.count() << " ms" << std::endl;
start_time_ = std::chrono::high_resolution_clock::now();
auto ret2 = Aurora::min(dB, dA);
end_time = std::chrono::high_resolution_clock::now();
duration = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time_);
std::cout << "Test2 duration: " << duration.count() << " ms" << std::endl;
ASSERT_EQ(ret1.getDimSize(0),ret2.getDimSize(0));
ASSERT_EQ(ret1.getDimSize(1),ret2.getDimSize(1));
ASSERT_EQ(ret1.getDimSize(2),ret2.getDimSize(2));
for (size_t i = 0; i < ret1.getDataSize(); i++)
{
ASSERT_FLOAT_EQ(ret1[i], ret2.getValue(i))<<", index at :"<<i;
}
start_time_ = std::chrono::high_resolution_clock::now();
ret2 = Aurora::min(dA, dB);
end_time = std::chrono::high_resolution_clock::now();
duration = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time_);
std::cout << "Test2 duration: " << duration.count() << " ms" << std::endl;
ASSERT_EQ(ret1.getDimSize(0),ret2.getDimSize(0));
ASSERT_EQ(ret1.getDimSize(1),ret2.getDimSize(1));
ASSERT_EQ(ret1.getDimSize(2),ret2.getDimSize(2));
for (size_t i = 0; i < ret1.getDataSize(); i++)
{
ASSERT_FLOAT_EQ(ret1[i], ret2.getValue(i))<<", index at :"<<i;
}
}
{
float *dataB = Aurora::random(3157*111);
float *dataA = Aurora::random(111);
auto A = Aurora::Matrix::fromRawData(dataA, 1, 111);
B = Aurora::Matrix::fromRawData(dataB, 3157, 111);
auto dA = A.toDeviceMatrix();
dB = B.toDeviceMatrix();
long r,c;
auto start_time_ = std::chrono::high_resolution_clock::now();
auto ret1 = Aurora::min(B, A);
auto end_time = std::chrono::high_resolution_clock::now();
auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time_);
std::cout << "Test1 duration: " << duration.count() << " ms" << std::endl;
start_time_ = std::chrono::high_resolution_clock::now();
auto ret2 = Aurora::min(dB, dA);
end_time = std::chrono::high_resolution_clock::now();
duration = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time_);
std::cout << "Test2 duration: " << duration.count() << " ms" << std::endl;
ASSERT_EQ(ret1.getDimSize(0),ret2.getDimSize(0));
ASSERT_EQ(ret1.getDimSize(1),ret2.getDimSize(1));
ASSERT_EQ(ret1.getDimSize(2),ret2.getDimSize(2));
for (size_t i = 0; i < ret1.getDataSize(); i++)
{
ASSERT_FLOAT_EQ(ret1[i], ret2.getValue(i))<<", index at :"<<i;
}
}
}
TEST_F(Function2D_Cuda_Test, max)
{
{
float *dataB = Aurora::random(4096*41472);
B = Aurora::Matrix::fromRawData(dataB, 4096, 41472);
dB = B.toDeviceMatrix();
long r,c;
auto start_time_ = std::chrono::high_resolution_clock::now();
auto ret1 = Aurora::max(B, Aurora::FunctionDirection::Column,r,c);
auto end_time = std::chrono::high_resolution_clock::now();
auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time_);
std::cout << "Test1 duration: " << duration.count() << " ms" << std::endl;
start_time_ = std::chrono::high_resolution_clock::now();
auto ret2 = Aurora::max(dB, Aurora::FunctionDirection::Column,r,c);
end_time = std::chrono::high_resolution_clock::now();
duration = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time_);
std::cout << "Test2 duration: " << duration.count() << " ms" << std::endl;
ASSERT_EQ(ret1.getDimSize(0),ret2.getDimSize(0));
ASSERT_EQ(ret1.getDimSize(1),ret2.getDimSize(1));
ASSERT_EQ(ret1.getDimSize(2),ret2.getDimSize(2));
for (size_t i = 0; i < ret1.getDataSize(); i++)
{
ASSERT_FLOAT_EQ(ret1[i], ret2.getValue(i))<<", index at :"<<i;
}
start_time_ = std::chrono::high_resolution_clock::now();
ret1 = Aurora::max(B, Aurora::FunctionDirection::Row,r,c);
end_time = std::chrono::high_resolution_clock::now();
duration = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time_);
std::cout << "Test1 duration: " << duration.count() << " ms" << std::endl;
start_time_ = std::chrono::high_resolution_clock::now();
ret2 = Aurora::max(dB, Aurora::FunctionDirection::Row,r,c);
end_time = std::chrono::high_resolution_clock::now();
duration = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time_);
std::cout << "Test2 duration: " << duration.count() << " ms" << std::endl;
ASSERT_EQ(ret1.getDimSize(0),ret2.getDimSize(0));
ASSERT_EQ(ret1.getDimSize(1),ret2.getDimSize(1));
ASSERT_EQ(ret1.getDimSize(2),ret2.getDimSize(2));
for (size_t i = 0; i < ret1.getDataSize(); i++)
{
ASSERT_FLOAT_EQ(ret1[i], ret2.getValue(i))<<", index at :"<<i;
}
}
{
float *dataB = Aurora::random(3157*111);
B = Aurora::Matrix::fromRawData(dataB, 3157, 111);
dB = B.toDeviceMatrix();
long r,c;
auto start_time_ = std::chrono::high_resolution_clock::now();
auto ret1 = Aurora::max(B, Aurora::FunctionDirection::Column,r,c);
auto end_time = std::chrono::high_resolution_clock::now();
auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time_);
std::cout << "Test1 duration: " << duration.count() << " ms" << std::endl;
start_time_ = std::chrono::high_resolution_clock::now();
auto ret2 = Aurora::max(dB, Aurora::FunctionDirection::Column,r,c);
end_time = std::chrono::high_resolution_clock::now();
duration = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time_);
std::cout << "Test2 duration: " << duration.count() << " ms" << std::endl;
ASSERT_EQ(ret1.getDimSize(0),ret2.getDimSize(0));
ASSERT_EQ(ret1.getDimSize(1),ret2.getDimSize(1));
ASSERT_EQ(ret1.getDimSize(2),ret2.getDimSize(2));
for (size_t i = 0; i < ret1.getDataSize(); i++)
{
ASSERT_FLOAT_EQ(ret1[i], ret2.getValue(i))<<", index at :"<<i;
}
B.forceReshape( 111,3157, 1);
dB = B.toDeviceMatrix();
start_time_ = std::chrono::high_resolution_clock::now();
ret1 = Aurora::max(B, Aurora::FunctionDirection::Column,r,c);
end_time = std::chrono::high_resolution_clock::now();
duration = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time_);
std::cout << "Test1 duration: " << duration.count() << " ms" << std::endl;
start_time_ = std::chrono::high_resolution_clock::now();
ret2 = Aurora::max(dB, Aurora::FunctionDirection::Column,r,c);
end_time = std::chrono::high_resolution_clock::now();
duration = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time_);
std::cout << "Test2 duration: " << duration.count() << " ms" << std::endl;
ASSERT_EQ(ret1.getDimSize(0),ret2.getDimSize(0));
ASSERT_EQ(ret1.getDimSize(1),ret2.getDimSize(1));
ASSERT_EQ(ret1.getDimSize(2),ret2.getDimSize(2));
for (size_t i = 0; i < ret1.getDataSize(); i++)
{
ASSERT_FLOAT_EQ(ret1[i], ret2.getValue(i))<<", index at :"<<i;
}
}
{
float *dataB = Aurora::random(3157*111);
B = Aurora::Matrix::fromRawData(dataB, 3157, 111);
dB = B.toDeviceMatrix();
long r,c;
auto start_time_ = std::chrono::high_resolution_clock::now();
auto ret1 = Aurora::max(B, 500.5f);
auto end_time = std::chrono::high_resolution_clock::now();
auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time_);
std::cout << "Test1 duration: " << duration.count() << " ms" << std::endl;
start_time_ = std::chrono::high_resolution_clock::now();
auto ret2 = Aurora::max(dB, 500.5f);
end_time = std::chrono::high_resolution_clock::now();
duration = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time_);
std::cout << "Test2 duration: " << duration.count() << " ms" << std::endl;
ASSERT_EQ(ret1.getDimSize(0),ret2.getDimSize(0));
ASSERT_EQ(ret1.getDimSize(1),ret2.getDimSize(1));
ASSERT_EQ(ret1.getDimSize(2),ret2.getDimSize(2));
for (size_t i = 0; i < ret1.getDataSize(); i++)
{
ASSERT_FLOAT_EQ(ret1[i], ret2.getValue(i))<<", index at :"<<i;
}
}
{
float *dataB = Aurora::random(3157*111);
float *dataA = Aurora::random(3157*111);
auto A = Aurora::Matrix::fromRawData(dataA, 3157, 111);
B = Aurora::Matrix::fromRawData(dataB, 3157, 111);
auto dA = A.toDeviceMatrix();
dB = B.toDeviceMatrix();
long r,c;
auto start_time_ = std::chrono::high_resolution_clock::now();
auto ret1 = Aurora::max(B, A);
auto end_time = std::chrono::high_resolution_clock::now();
auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time_);
std::cout << "Test1 duration: " << duration.count() << " ms" << std::endl;
start_time_ = std::chrono::high_resolution_clock::now();
auto ret2 = Aurora::max(dB, dA);
end_time = std::chrono::high_resolution_clock::now();
duration = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time_);
std::cout << "Test2 duration: " << duration.count() << " ms" << std::endl;
ASSERT_EQ(ret1.getDimSize(0),ret2.getDimSize(0));
ASSERT_EQ(ret1.getDimSize(1),ret2.getDimSize(1));
ASSERT_EQ(ret1.getDimSize(2),ret2.getDimSize(2));
for (size_t i = 0; i < ret1.getDataSize(); i++)
{
ASSERT_FLOAT_EQ(ret1[i], ret2.getValue(i))<<", index at :"<<i;
}
}
{
float *dataB = Aurora::random(3157*111);
float *dataA = Aurora::random(3157);
auto A = Aurora::Matrix::fromRawData(dataA, 3157, 1);
B = Aurora::Matrix::fromRawData(dataB, 3157, 111);
auto dA = A.toDeviceMatrix();
dB = B.toDeviceMatrix();
long r,c;
auto start_time_ = std::chrono::high_resolution_clock::now();
auto ret1 = Aurora::max(B, A);
auto end_time = std::chrono::high_resolution_clock::now();
auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time_);
std::cout << "Test1 duration: " << duration.count() << " ms" << std::endl;
start_time_ = std::chrono::high_resolution_clock::now();
auto ret2 = Aurora::max(dB, dA);
end_time = std::chrono::high_resolution_clock::now();
duration = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time_);
std::cout << "Test2 duration: " << duration.count() << " ms" << std::endl;
ASSERT_EQ(ret1.getDimSize(0),ret2.getDimSize(0));
ASSERT_EQ(ret1.getDimSize(1),ret2.getDimSize(1));
ASSERT_EQ(ret1.getDimSize(2),ret2.getDimSize(2));
for (size_t i = 0; i < ret1.getDataSize(); i++)
{
ASSERT_FLOAT_EQ(ret1[i], ret2.getValue(i))<<", index at :"<<i;
}
start_time_ = std::chrono::high_resolution_clock::now();
ret2 = Aurora::max(dA, dB);
end_time = std::chrono::high_resolution_clock::now();
duration = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time_);
std::cout << "Test2 duration: " << duration.count() << " ms" << std::endl;
ASSERT_EQ(ret1.getDimSize(0),ret2.getDimSize(0));
ASSERT_EQ(ret1.getDimSize(1),ret2.getDimSize(1));
ASSERT_EQ(ret1.getDimSize(2),ret2.getDimSize(2));
for (size_t i = 0; i < ret1.getDataSize(); i++)
{
ASSERT_FLOAT_EQ(ret1[i], ret2.getValue(i))<<", index at :"<<i;
}
}
{
float *dataB = Aurora::random(3157*111);
float *dataA = Aurora::random(111);
auto A = Aurora::Matrix::fromRawData(dataA, 1, 111);
B = Aurora::Matrix::fromRawData(dataB, 3157, 111);
auto dA = A.toDeviceMatrix();
dB = B.toDeviceMatrix();
long r,c;
auto start_time_ = std::chrono::high_resolution_clock::now();
auto ret1 = Aurora::max(B, A);
auto end_time = std::chrono::high_resolution_clock::now();
auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time_);
std::cout << "Test1 duration: " << duration.count() << " ms" << std::endl;
start_time_ = std::chrono::high_resolution_clock::now();
auto ret2 = Aurora::max(dB, dA);
end_time = std::chrono::high_resolution_clock::now();
duration = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time_);
std::cout << "Test2 duration: " << duration.count() << " ms" << std::endl;
ASSERT_EQ(ret1.getDimSize(0),ret2.getDimSize(0));
ASSERT_EQ(ret1.getDimSize(1),ret2.getDimSize(1));
ASSERT_EQ(ret1.getDimSize(2),ret2.getDimSize(2));
for (size_t i = 0; i < ret1.getDataSize(); i++)
{
ASSERT_FLOAT_EQ(ret1[i], ret2.getValue(i))<<", index at :"<<i;
}
}
}

View File

@@ -73,13 +73,16 @@ TEST_F(Function2D_Test, std){
} }
TEST_F(Function2D_Test, min) { TEST_F(Function2D_Test, min) {
float *dataA = new float[3]{1, 2, 3}; float *dataA = new float[3]{1, 2,-9};
float *dataB = new float[9]{2, 3, 3, 2, 2, -1, 3, 3, 3}; float *dataB = new float[9]{2, 3, 3, 2, 2, -1, 3, 3, 3};
float *dataC = new float[1]{1.5}; float *dataC = new float[1]{1.5};
float *dataE = new float[12]{2, 3, 3, 2, 2, -1, 3, 3, 3,1,-5,7};
auto A = Aurora::Matrix::fromRawData(dataA, 3, 1); auto A = Aurora::Matrix::fromRawData(dataA, 3, 1);
auto B = Aurora::Matrix::fromRawData(dataB, 3, 3); auto B = Aurora::Matrix::fromRawData(dataB, 3, 3);
auto C = Aurora::Matrix::fromRawData(dataC, 1); auto C = Aurora::Matrix::fromRawData(dataC, 1);
auto D = Aurora::Matrix::copyFromRawData(dataA, 1, 3); auto D = Aurora::Matrix::copyFromRawData(dataA, 1, 3);
auto E = Aurora::Matrix::copyFromRawData(dataE, 4, 3);
Aurora::Matrix ret = Aurora::min(B); Aurora::Matrix ret = Aurora::min(B);
EXPECT_EQ(1, ret.getDimSize(0)); EXPECT_EQ(1, ret.getDimSize(0));
EXPECT_EQ(3, ret.getDimSize(1)); EXPECT_EQ(3, ret.getDimSize(1));
@@ -99,32 +102,64 @@ TEST_F(Function2D_Test, min) {
long r,c; long r,c;
ret = Aurora::min(A, Aurora::Column,r,c); ret = Aurora::min(A, Aurora::Column,r,c);
EXPECT_FLOAT_EQ(1, ret.getDataSize()); EXPECT_FLOAT_EQ(1, ret.getDataSize());
EXPECT_FLOAT_EQ(1, ret.getData()[0]); EXPECT_FLOAT_EQ(-9, ret.getData()[0]);
EXPECT_EQ(0, r); EXPECT_EQ(2, r);
EXPECT_EQ(0, c); EXPECT_EQ(0, c);
ret = Aurora::min(D); ret = Aurora::min(D);
EXPECT_EQ(1, ret.getDimSize(0)); EXPECT_EQ(1, ret.getDimSize(0));
EXPECT_EQ(1, ret.getDimSize(1)); EXPECT_EQ(1, ret.getDimSize(1));
EXPECT_FLOAT_EQ(1, ret.getData()[0]); EXPECT_FLOAT_EQ(-9, ret.getData()[0]);
ret = Aurora::min(A, C); ret = Aurora::min(A, C);
EXPECT_FLOAT_EQ(3, ret.getDataSize()); EXPECT_FLOAT_EQ(3, ret.getDataSize());
EXPECT_FLOAT_EQ(1, ret.getData()[0]); EXPECT_FLOAT_EQ(1, ret.getData()[0]);
EXPECT_FLOAT_EQ(1.5, ret.getData()[1]); EXPECT_FLOAT_EQ(1.5, ret.getData()[1]);
EXPECT_FLOAT_EQ(1.5, ret.getData()[2]); EXPECT_FLOAT_EQ(-9, ret.getData()[2]);
// mat x row-vec
ret = Aurora::min(B,D); ret = Aurora::min(B,D);
EXPECT_FLOAT_EQ(9, ret.getDataSize()); EXPECT_FLOAT_EQ(9, ret.getDataSize());
EXPECT_FLOAT_EQ(1, ret.getData()[0]); EXPECT_FLOAT_EQ(1, ret.getData()[0]);
EXPECT_FLOAT_EQ(1, ret.getData()[1]); EXPECT_FLOAT_EQ(1, ret.getData()[1]);
EXPECT_FLOAT_EQ(1, ret.getData()[2]); EXPECT_FLOAT_EQ(-1, ret.getData()[5]);
EXPECT_FLOAT_EQ(-9, ret.getData()[8]);
// row-vec x mat
ret = Aurora::min(D,E);
EXPECT_FLOAT_EQ(12, ret.getDataSize());
EXPECT_FLOAT_EQ(1, ret.getData()[0]);
EXPECT_FLOAT_EQ(1, ret.getData()[1]);
EXPECT_FLOAT_EQ(-1, ret.getData()[5]);
EXPECT_FLOAT_EQ(-9, ret.getData()[8]);
D.forceReshape(3,1,1);
ret = Aurora::min(D,B);
EXPECT_FLOAT_EQ(9, ret.getDataSize());
EXPECT_FLOAT_EQ(1, ret.getData()[0]);
EXPECT_FLOAT_EQ(2, ret.getData()[1]);
EXPECT_FLOAT_EQ(-9, ret.getData()[5]);
EXPECT_FLOAT_EQ(-9, ret.getData()[8]);
D.forceReshape(3,1,1);
//col-vec x mat
ret = Aurora::min(D,B);
EXPECT_FLOAT_EQ(9, ret.getDataSize());
EXPECT_FLOAT_EQ(1, ret.getData()[0]);
EXPECT_FLOAT_EQ(2, ret.getData()[1]);
EXPECT_FLOAT_EQ(-9, ret.getData()[5]);
//mat x col-vec
ret = Aurora::min(E,D);
EXPECT_FLOAT_EQ(0, ret.getDataSize());
} }
TEST_F(Function2D_Test, max) { TEST_F(Function2D_Test, max)
float *dataA = new float[3]{1, 2, 3}; {
float *dataC = new float[3]{1, 2, 4}; float *dataA = new float[3]{1, 2,-9};
float *dataB = new float[9]{2, 3, 3, 2, 2, 1, 3, 3, 3}; float *dataB = new float[9]{2, 3, 3, 2, 2, -1, 3, 3, 3};
float *dataC = new float[1]{1.5};
float *dataE = new float[12]{2, 3, 3, 2, 2, -1, 3, 3, 3,1,-5,7};
auto A = Aurora::Matrix::fromRawData(dataA, 3, 1); auto A = Aurora::Matrix::fromRawData(dataA, 3, 1);
auto B = Aurora::Matrix::fromRawData(dataB, 3, 3); auto B = Aurora::Matrix::fromRawData(dataB, 3, 3);
auto C = Aurora::Matrix::fromRawData(dataC, 3, 1); auto C = Aurora::Matrix::fromRawData(dataC, 1);
auto D = Aurora::Matrix::copyFromRawData(dataA, 1, 3);
auto E = Aurora::Matrix::copyFromRawData(dataE, 4, 3);
Aurora::Matrix ret = Aurora::max(B); Aurora::Matrix ret = Aurora::max(B);
EXPECT_EQ(1, ret.getDimSize(0)); EXPECT_EQ(1, ret.getDimSize(0));
@@ -145,17 +180,51 @@ TEST_F(Function2D_Test, max) {
long r,c; long r,c;
ret = Aurora::max(A, Aurora::Column,r,c); ret = Aurora::max(A, Aurora::Column,r,c);
EXPECT_FLOAT_EQ(1, ret.getDataSize()); EXPECT_FLOAT_EQ(1, ret.getDataSize());
EXPECT_FLOAT_EQ(3, ret.getData()[0]); EXPECT_FLOAT_EQ(2, ret.getData()[0]);
EXPECT_EQ(2, r); EXPECT_EQ(1, r);
EXPECT_EQ(0, c); EXPECT_EQ(0, c);
auto D = Aurora::Matrix::copyFromRawData(dataA, 1, 3);
ret = Aurora::max(D); ret = Aurora::max(D);
EXPECT_EQ(1, ret.getDimSize(0)); EXPECT_EQ(1, ret.getDimSize(0));
EXPECT_EQ(1, ret.getDimSize(1)); EXPECT_EQ(1, ret.getDimSize(1));
EXPECT_FLOAT_EQ(3, ret.getData()[0]); EXPECT_FLOAT_EQ(2, ret.getData()[0]);
ret = Aurora::max(A, C); ret = Aurora::max(A, C);
EXPECT_FLOAT_EQ(4, ret.getData()[2]); EXPECT_FLOAT_EQ(3, ret.getDataSize());
EXPECT_FLOAT_EQ(1.5, ret.getData()[0]);
EXPECT_FLOAT_EQ(2, ret.getData()[1]);
EXPECT_FLOAT_EQ(1.5, ret.getData()[2]);
// mat x row-vec
ret = Aurora::max(B,D);
EXPECT_FLOAT_EQ(9, ret.getDataSize());
EXPECT_FLOAT_EQ(2, ret.getData()[0]);
EXPECT_FLOAT_EQ(3, ret.getData()[1]);
EXPECT_FLOAT_EQ(2, ret.getData()[5]);
EXPECT_FLOAT_EQ(3, ret.getData()[8]);
// row-vec x mat
ret = Aurora::max(D,E);
EXPECT_FLOAT_EQ(12, ret.getDataSize());
EXPECT_FLOAT_EQ(2, ret.getData()[0]);
EXPECT_FLOAT_EQ(3, ret.getData()[1]);
EXPECT_FLOAT_EQ(2, ret.getData()[5]);
EXPECT_FLOAT_EQ(3, ret.getData()[8]);
D.forceReshape(3,1,1);
ret = Aurora::max(D,B);
EXPECT_FLOAT_EQ(9, ret.getDataSize());
EXPECT_FLOAT_EQ(2, ret.getData()[0]);
EXPECT_FLOAT_EQ(3, ret.getData()[1]);
EXPECT_FLOAT_EQ(-1, ret.getData()[5]);
EXPECT_FLOAT_EQ(3, ret.getData()[8]);
D.forceReshape(3,1,1);
//col-vec x mat
ret = Aurora::max(D,B);
EXPECT_FLOAT_EQ(9, ret.getDataSize());
EXPECT_FLOAT_EQ(2, ret.getData()[0]);
EXPECT_FLOAT_EQ(3, ret.getData()[1]);
EXPECT_FLOAT_EQ(-1, ret.getData()[5]);
//mat x col-vec
ret = Aurora::max(E,D);
EXPECT_FLOAT_EQ(0, ret.getDataSize());
} }
TEST_F(Function2D_Test, sum) { TEST_F(Function2D_Test, sum) {