Add max and min to function2D
This commit is contained in:
@@ -8,5 +8,13 @@
|
||||
#include "mkl.h"
|
||||
|
||||
#define PI 3.141592653589793238462
|
||||
|
||||
namespace Aurora
|
||||
{
|
||||
enum FunctionDirection
|
||||
{
|
||||
Column,
|
||||
Row,
|
||||
All
|
||||
};
|
||||
}
|
||||
#endif //AURORA_AURORADEFS_H
|
||||
|
||||
@@ -204,6 +204,34 @@ Matrix Aurora::min(const Matrix &aMatrix, FunctionDirection direction) {
|
||||
return min(aMatrix,direction,a,b);
|
||||
}
|
||||
|
||||
Matrix vxmMin(const Matrix &aVec, const Matrix &aMat)
|
||||
{
|
||||
//只有一列 对Other逐列求最小值
|
||||
if (aVec.getDimSize(1) == 1 && aVec.getDimSize(0) == aMat.getDimSize(0)) {
|
||||
float* output = Aurora::malloc(aMat.getDataSize());
|
||||
for (int i = 0; i < aMat.getDimSize(1); ++i) {
|
||||
vsFminI(aVec.getDataSize(), aVec.getData(), 1, aMat.getData() + aMat.getDimSize(0) * i, 1,
|
||||
output + aMat.getDimSize(0) * i, 1);
|
||||
}
|
||||
// std::cout<<"min col-vec aMatrix and mat other"<<std::endl;
|
||||
return Matrix::New(output, aMat);
|
||||
}
|
||||
//只有一行 对Other逐行求最小值
|
||||
else if (aVec.getDimSize(0) == 1 && aVec.getDimSize(1) == aMat.getDimSize(1)) {
|
||||
float* output = Aurora::malloc(aMat.getDataSize());
|
||||
for (int i = 0; i < aMat.getDimSize(0); ++i) {
|
||||
vsFminI(aVec.getDataSize(), aVec.getData(), 1, aMat.getData() + i, aMat.getDimSize(0),
|
||||
output + i, aMat.getDimSize(0));
|
||||
}
|
||||
// std::cout<<"min row-vec aMatrix and mat other"<<std::endl;
|
||||
return Matrix::New(output, aMat);
|
||||
}
|
||||
std::cerr
|
||||
<< "min(A,B) with matrix must be like A[MxN] - B[1xN] or A[Mx1] - B[MxN]"
|
||||
<< std::endl;
|
||||
return Matrix();
|
||||
}
|
||||
|
||||
Matrix Aurora::min(const Matrix &aMatrix, const Matrix &aOther) {
|
||||
if (aMatrix.getDimSize(2)>1 || aMatrix.isComplex()) {
|
||||
std::cerr
|
||||
@@ -231,30 +259,22 @@ Matrix Aurora::min(const Matrix &aMatrix, const Matrix &aOther) {
|
||||
vsFminI(matrix.getDataSize(),matrix.getData(),1,&scalar,0,output,1);
|
||||
return Matrix::New(output,matrix);
|
||||
}
|
||||
else if (aMatrix.getDimSize(1) == 1 || aOther.getDimSize(0) == 1) {
|
||||
if (aMatrix.getDimSize(1) == 1){
|
||||
float* output = malloc(aOther.getDataSize());
|
||||
for (int i = 0; i < aOther.getDimSize(1); ++i) {
|
||||
vsFminI(aMatrix.getDataSize(), aMatrix.getData(), 1, aOther.getData() + aOther.getDimSize(0) * i, 1,
|
||||
output + aOther.getDimSize(0) * i, 1);
|
||||
else if (aMatrix.isVector()) {
|
||||
return ::vxmMin(aMatrix,aOther);
|
||||
}
|
||||
return Matrix::New(output,aOther);
|
||||
else if (aOther.isVector()) {
|
||||
return ::vxmMin(aOther,aMatrix);
|
||||
}
|
||||
else{
|
||||
float* output = malloc(aMatrix.getDataSize());
|
||||
for (int i = 0; i < aMatrix.getDimSize(0); ++i) {
|
||||
vsFminI(aOther.getDataSize(), aOther.getData(), 1, aMatrix.getData() + i, aMatrix.getDimSize(0),
|
||||
output + i, aOther.getDimSize(0));
|
||||
}
|
||||
return Matrix::New(output,aMatrix);
|
||||
}
|
||||
}
|
||||
else{
|
||||
std::cerr
|
||||
<< "min(A,B) with matrix must be like A[MxN] - B[1xN] or A[Mx1] - B[MxN]"
|
||||
<< std::endl;
|
||||
return Matrix();
|
||||
}
|
||||
|
||||
Matrix Aurora::min(const Matrix &aMatrix, const float aValue){
|
||||
float *output = malloc(1);
|
||||
output[0] = aValue;
|
||||
return min(aMatrix,Matrix::New(output, 1,1,1));
|
||||
}
|
||||
|
||||
Matrix Aurora::max(const Matrix &aMatrix, FunctionDirection direction) {
|
||||
@@ -314,6 +334,34 @@ Matrix Aurora::max(const Matrix &aMatrix, FunctionDirection direction, long& row
|
||||
}
|
||||
}
|
||||
|
||||
Matrix vxmMax(const Matrix &aVec, const Matrix &aMat)
|
||||
{
|
||||
//只有一列 对Other逐列求最小值
|
||||
if (aVec.getDimSize(1) == 1 && aVec.getDimSize(0) == aMat.getDimSize(0)) {
|
||||
float* output = Aurora::malloc(aMat.getDataSize());
|
||||
for (int i = 0; i < aMat.getDimSize(1); ++i) {
|
||||
vsFmaxI(aVec.getDataSize(), aVec.getData(), 1, aMat.getData() + aMat.getDimSize(0) * i, 1,
|
||||
output + aMat.getDimSize(0) * i, 1);
|
||||
}
|
||||
// std::cout<<"max col-vec aMatrix and mat other"<<std::endl;
|
||||
return Matrix::New(output, aMat);
|
||||
}
|
||||
//只有一行 对Other逐行求最小值
|
||||
else if (aVec.getDimSize(0) == 1 && aVec.getDimSize(1) == aMat.getDimSize(1)) {
|
||||
float* output = Aurora::malloc(aMat.getDataSize());
|
||||
for (int i = 0; i < aMat.getDimSize(0); ++i) {
|
||||
vsFmaxI(aVec.getDataSize(), aVec.getData(), 1, aMat.getData() + i, aMat.getDimSize(0),
|
||||
output + i, aMat.getDimSize(0));
|
||||
}
|
||||
// std::cout<<"max row-vec aMatrix and mat other"<<std::endl;
|
||||
return Matrix::New(output, aMat);
|
||||
}
|
||||
std::cerr
|
||||
<< "max(A,B) with matrix must be like A[MxN] - B[1xN] or A[Mx1] - B[MxN]"
|
||||
<< std::endl;
|
||||
return Matrix();
|
||||
}
|
||||
|
||||
Matrix Aurora::max(const Matrix &aMatrix, const Matrix &aOther) {
|
||||
if (aMatrix.getDimSize(2)>1 || aMatrix.isComplex()) {
|
||||
std::cerr
|
||||
@@ -341,31 +389,17 @@ Matrix Aurora::max(const Matrix &aMatrix, const Matrix &aOther) {
|
||||
vsFmaxI(matrix.getDataSize(),matrix.getData(),1,&scalar,0,output,1);
|
||||
return Matrix::New(output,matrix);
|
||||
}
|
||||
else if (aMatrix.getDimSize(1) == 1 || aOther.getDimSize(0) == 1) {
|
||||
if (aMatrix.getDimSize(1) == 1){
|
||||
float* output = malloc(aOther.getDataSize());
|
||||
for (int i = 0; i < aOther.getDimSize(1); ++i) {
|
||||
vsFmaxI(aMatrix.getDataSize(), aMatrix.getData(), 1, aOther.getData() + aOther.getDimSize(0) * i, 1,
|
||||
output + aOther.getDimSize(0) * i, 1);
|
||||
else if (aMatrix.isVector()) {
|
||||
return ::vxmMax(aMatrix,aOther);
|
||||
}
|
||||
return Matrix::New(output,aOther);
|
||||
else if (aOther.isVector()) {
|
||||
return ::vxmMax(aOther,aMatrix);
|
||||
}
|
||||
else{
|
||||
float* output = malloc(aMatrix.getDataSize());
|
||||
for (int i = 0; i < aMatrix.getDimSize(0); ++i) {
|
||||
vsFmaxI(aOther.getDataSize(), aOther.getData(), 1, aMatrix.getData() + i, aMatrix.getDimSize(0),
|
||||
output + i, aOther.getDimSize(0));
|
||||
}
|
||||
return Matrix::New(output,aMatrix);
|
||||
}
|
||||
}
|
||||
else{
|
||||
std::cerr
|
||||
<< "min(A,B) with matrix must be like A[MxN] - B[1xN] or A[Mx1] - B[MxN]"
|
||||
<< "max(A,B) with matrix must be like A[MxN] - B[1xN] or A[Mx1] - B[MxN]"
|
||||
<< std::endl;
|
||||
return Matrix();
|
||||
}
|
||||
}
|
||||
|
||||
Matrix Aurora::max(const Matrix &aMatrix, const float aValue){
|
||||
float *output = malloc(1);
|
||||
|
||||
478
src/Function2D.cu
Normal file
478
src/Function2D.cu
Normal file
@@ -0,0 +1,478 @@
|
||||
#include <Function2D.cuh>
|
||||
#include <cfloat>
|
||||
#include <cstddef>
|
||||
#include <iostream>
|
||||
|
||||
#include <cmath>
|
||||
#include <thrust/device_vector.h>
|
||||
#include <thrust/transform.h>
|
||||
#include <thrust/reduce.h>
|
||||
#include <thrust/device_ptr.h>
|
||||
#include <thrust/iterator/constant_iterator.h>
|
||||
#include <thrust/iterator/counting_iterator.h>
|
||||
#include <thrust/copy.h>
|
||||
#include <thrust/functional.h>
|
||||
#include <thrust/complex.h>
|
||||
#include <cuda_runtime.h>
|
||||
using namespace Aurora;
|
||||
|
||||
|
||||
__global__ void maxColKernel(float* aInputData, float* aOutput, unsigned int aColSize)
|
||||
{
|
||||
//确定每个thread的index
|
||||
unsigned int idx = blockIdx.x * aColSize + threadIdx.x;
|
||||
__shared__ float shared_data[256];
|
||||
// 每个线程加载一个元素到共享内存
|
||||
shared_data[threadIdx.x] = (threadIdx.x< aColSize) ? aInputData[idx] : -FLT_MAX;
|
||||
__syncthreads();
|
||||
// 每个线程规约自己的分段,将每个blockDim.x的值规约到数组最前面一段
|
||||
for (int offset = blockDim.x; offset<aColSize; offset+=blockDim.x) {
|
||||
if(threadIdx.x + offset<aColSize){
|
||||
shared_data[threadIdx.x] = fmaxf(shared_data[threadIdx.x], aInputData[idx + offset]);
|
||||
}
|
||||
__syncthreads();
|
||||
}
|
||||
// 规约最前面一段
|
||||
for (int offset = blockDim.x/2; offset >0; offset>>=1) {
|
||||
int idx2 = offset + threadIdx.x;
|
||||
if (idx2 < blockDim.x) {
|
||||
shared_data[threadIdx.x] = fmaxf(shared_data[threadIdx.x], shared_data[idx2]);
|
||||
}
|
||||
__syncthreads();
|
||||
}
|
||||
|
||||
// 第一个线程存储每个分段的最大值到全局内存
|
||||
if (threadIdx.x == 0) {
|
||||
aOutput[blockIdx.x] = shared_data[0];
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void maxRowKernel(float* aInputData, float* aOutput,unsigned int aColSize, unsigned int aRowSize)
|
||||
{
|
||||
//确定每个thread的基础index
|
||||
unsigned int idx = threadIdx.x*aColSize+ blockIdx.x;
|
||||
__shared__ float shared_data[512];
|
||||
// 每个线程加载一个元素到共享内存
|
||||
shared_data[threadIdx.x]= (threadIdx.x< aRowSize) ? aInputData[idx] : -FLT_MAX;
|
||||
__syncthreads();
|
||||
// 每个线程规约自己的分段,将每个blockDim.x的值规约到数组最前面一段
|
||||
for (int offset = blockDim.x; offset < aRowSize; offset+=blockDim.x) {
|
||||
if(threadIdx.x+offset < aRowSize){
|
||||
shared_data[threadIdx.x]= fmaxf(shared_data[threadIdx.x], aInputData[idx + offset*aColSize]);
|
||||
}
|
||||
__syncthreads();
|
||||
}
|
||||
// 规约最前面一段
|
||||
for (int offset = blockDim.x/2; offset >0; offset>>=1) {
|
||||
int idx2 = offset + threadIdx.x;
|
||||
if (idx2 < blockDim.x) {
|
||||
shared_data[threadIdx.x] = fmaxf(shared_data[threadIdx.x], shared_data[idx2]);
|
||||
}
|
||||
__syncthreads();
|
||||
}
|
||||
|
||||
// 第一个线程存储每个分段的最大值到全局内存
|
||||
if (threadIdx.x == 0) {
|
||||
aOutput[blockIdx.x] = shared_data[0];
|
||||
}
|
||||
}
|
||||
|
||||
CudaMatrix Aurora::max(const CudaMatrix &aMatrix, FunctionDirection direction) {
|
||||
long a,b;
|
||||
return max(aMatrix,direction,a,b);
|
||||
}
|
||||
|
||||
CudaMatrix Aurora::max(const CudaMatrix &aMatrix, FunctionDirection direction, long& rowIdx, long& colIdx)
|
||||
{
|
||||
if (aMatrix.getDimSize(2)>1 || aMatrix.isComplex()) {
|
||||
std::cerr<< (aMatrix.getDimSize(2) > 1 ? "max() not support 3D data!" : "max() not support complex value type!")
|
||||
<< std::endl;
|
||||
return CudaMatrix();
|
||||
}
|
||||
//针对向量行等于列
|
||||
if (aMatrix.isVector()){
|
||||
direction = All;
|
||||
}
|
||||
switch (direction)
|
||||
{
|
||||
case All: {
|
||||
thrust::device_ptr<float> d_ptr = thrust::device_pointer_cast(aMatrix.getData());
|
||||
auto max_iter = thrust::max_element(thrust::device,d_ptr,d_ptr+aMatrix.getDataSize());
|
||||
int index = max_iter-d_ptr;
|
||||
rowIdx = index%aMatrix.getDimSize(0);
|
||||
colIdx = index/aMatrix.getDimSize(0);
|
||||
float* data = nullptr;
|
||||
cudaMalloc((void**)&data, sizeof(float));
|
||||
auto ret = Aurora::CudaMatrix::fromRawData(data,1,1,1);
|
||||
ret.setValue(0, *max_iter);
|
||||
return ret;
|
||||
}
|
||||
case Row:
|
||||
{
|
||||
float* matData = aMatrix.getData();
|
||||
float* retData = nullptr;
|
||||
int rowCount = aMatrix.getDimSize(1);
|
||||
cudaMalloc((void**)&retData, sizeof(float)*aMatrix.getDimSize(0));
|
||||
if (rowCount<512){
|
||||
maxRowKernel<<<aMatrix.getDimSize(0),rowCount/2+1>>>(matData,retData,aMatrix.getDimSize(0),rowCount);
|
||||
}
|
||||
else if (aMatrix.getDimSize(1)/aMatrix.getDimSize(0)>4){
|
||||
maxRowKernel<<<aMatrix.getDimSize(0),512>>>(matData,retData,aMatrix.getDimSize(0),rowCount);
|
||||
}
|
||||
else{
|
||||
maxRowKernel<<<aMatrix.getDimSize(0),256>>>(matData,retData,aMatrix.getDimSize(0),rowCount);
|
||||
}
|
||||
cudaDeviceSynchronize();
|
||||
CudaMatrix ret = Aurora::CudaMatrix::fromRawData(retData,aMatrix.getDimSize(0),1);
|
||||
return ret;
|
||||
}
|
||||
case Column:
|
||||
default:
|
||||
{
|
||||
float* matData = aMatrix.getData();
|
||||
float* retData = nullptr;
|
||||
int colCount = aMatrix.getDimSize(0);
|
||||
cudaMalloc((void**)&retData, sizeof(float)*aMatrix.getDimSize(1));
|
||||
maxColKernel<<<aMatrix.getDimSize(1),256>>>(matData,retData,colCount);
|
||||
cudaDeviceSynchronize();
|
||||
CudaMatrix ret = Aurora::CudaMatrix::fromRawData(retData,1,aMatrix.getDimSize(1));
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
CudaMatrix vxmMax(CudaMatrix aVec, CudaMatrix aMat) {
|
||||
//col-vec x mat
|
||||
if (aVec.getDimSize(1) == 1 && aVec.getDimSize(0) == aMat.getDimSize(0)) {
|
||||
std::cout<<"max mat and col-vec "<<std::endl;
|
||||
size_t size = aMat.getDataSize();
|
||||
float* data = nullptr;
|
||||
cudaMalloc((void**)&data, sizeof(float) * size);
|
||||
auto lambda = [=] __host__ __device__(const float& x, const float& y) {
|
||||
return fmaxf(x, y);
|
||||
};
|
||||
for (int i = 0; i < aMat.getDimSize(1); ++i) {
|
||||
thrust::transform(thrust::device, aVec.getData(),
|
||||
aVec.getData() + aVec.getDataSize(),
|
||||
aMat.getData() + aMat.getDimSize(0) * i,
|
||||
data + aMat.getDimSize(0) * i, lambda);
|
||||
}
|
||||
return Aurora::CudaMatrix::fromRawData(data, aMat.getDimSize(0),
|
||||
aMat.getDimSize(1), aMat.getDimSize(2), aMat.getValueType());
|
||||
}
|
||||
// row-vec x mat
|
||||
else if (aVec.getDimSize(0) == 1 && aVec.getDimSize(1) == aMat.getDimSize(1))
|
||||
{
|
||||
std::cout<<"max mat and row-vec "<<std::endl;
|
||||
size_t size = aMat.getDataSize() ;
|
||||
float* data = nullptr;
|
||||
cudaMalloc((void**)&data, sizeof(float) * size);
|
||||
for (int i = 0; i < aMat.getDimSize(1); ++i) {
|
||||
float v = aVec.getValue(i);
|
||||
auto lambda = [=] __host__ __device__(const float& x) {
|
||||
return fmaxf(x, v);
|
||||
};
|
||||
thrust::transform(thrust::device,
|
||||
aMat.getData() + aMat.getDimSize(0)*i,
|
||||
aMat.getData() + aMat.getDimSize(0) * (i+1),
|
||||
data + aMat.getDimSize(0) * i, lambda);
|
||||
}
|
||||
return Aurora::CudaMatrix::fromRawData(data, aMat.getDimSize(0),
|
||||
aMat.getDimSize(1), aMat.getDimSize(2), aMat.getValueType());
|
||||
}
|
||||
std::cerr
|
||||
<< "max(A,B) with matrix must be like A[MxN] - B[1xN] or A[Mx1] - B[MxN]"
|
||||
<< std::endl;
|
||||
return CudaMatrix();
|
||||
|
||||
}
|
||||
|
||||
CudaMatrix Aurora::max(const CudaMatrix &aMatrix, const CudaMatrix &aOther) {
|
||||
if (aMatrix.getDimSize(2)>1 || aMatrix.isComplex()) {
|
||||
std::cerr
|
||||
<< (aMatrix.getDimSize(2) > 1 ? "max() not support 3D data!" : "max() not support complex value type!")
|
||||
<< std::endl;
|
||||
return CudaMatrix();
|
||||
}
|
||||
if (aOther.getDimSize(2)>1 || aOther.isComplex()) {
|
||||
std::cerr
|
||||
<< (aOther.getDimSize(2) > 1 ? "max() not support 3D data!" : "max() not support complex value type!")
|
||||
<< std::endl;
|
||||
return CudaMatrix();
|
||||
}
|
||||
//same shape
|
||||
if (aMatrix.compareShape(aOther)){
|
||||
size_t size = aMatrix.getDataSize() * aMatrix.getValueType();
|
||||
float* data = nullptr;
|
||||
cudaMalloc((void**)&data, sizeof(float) * size);
|
||||
auto lambda = [=] __host__ __device__ (const float& x, const float& y){
|
||||
return fmaxf(x,y);
|
||||
};
|
||||
thrust::transform(thrust::device,aMatrix.getData(),
|
||||
aMatrix.getData()+aMatrix.getDataSize(),aOther.getData(),
|
||||
data,lambda);
|
||||
return Aurora::CudaMatrix::fromRawData(data, aMatrix.getDimSize(0),
|
||||
aMatrix.getDimSize(1), aMatrix.getDimSize(2), aMatrix.getValueType());
|
||||
}
|
||||
// one is scalar
|
||||
else if (aMatrix.getDataSize() == 1 || aOther.getDataSize() == 1){
|
||||
float scalar = (aMatrix.getDataSize() == 1)?aMatrix.getValue(0):aOther.getValue(0);
|
||||
auto matrix = (aMatrix.getDataSize() == 1)?aOther:aMatrix;
|
||||
return max(matrix, scalar);
|
||||
}
|
||||
else if (aMatrix.isVector()) {
|
||||
return ::vxmMax(aMatrix,aOther);
|
||||
}
|
||||
else if (aOther.isVector())
|
||||
{
|
||||
return ::vxmMax(aOther,aMatrix);
|
||||
}
|
||||
std::cerr
|
||||
<< "max(A,B) with matrix must be like A[MxN] - B[1xN] or A[Mx1] - B[MxN]"
|
||||
<< std::endl;
|
||||
return CudaMatrix();
|
||||
}
|
||||
|
||||
CudaMatrix Aurora::max(const CudaMatrix &aMatrix, const float aValue){
|
||||
size_t size = aMatrix.getDataSize() * aMatrix.getValueType();
|
||||
float* data = nullptr;
|
||||
cudaMalloc((void**)&data, sizeof(float) * size);
|
||||
auto lambda = [=] __host__ __device__ (const float& x){
|
||||
return fmaxf(x,aValue);
|
||||
};
|
||||
thrust::transform(thrust::device,aMatrix.getData(),aMatrix.getData()+aMatrix.getDataSize(),
|
||||
data,lambda);
|
||||
return Aurora::CudaMatrix::fromRawData(data, aMatrix.getDimSize(0),
|
||||
aMatrix.getDimSize(1), aMatrix.getDimSize(2), aMatrix.getValueType());
|
||||
}
|
||||
|
||||
__global__ void minColKernel(float* aInputData, float* aOutput, unsigned int aColSize)
|
||||
{
|
||||
//确定每个thread的index
|
||||
unsigned int idx = blockIdx.x * aColSize + threadIdx.x;
|
||||
__shared__ float shared_data[256];
|
||||
// 每个线程加载一个元素到共享内存
|
||||
shared_data[threadIdx.x] = (threadIdx.x< aColSize) ? aInputData[idx] : FLT_MAX;
|
||||
__syncthreads();
|
||||
// 每个线程规约自己的分段,将每个blockDim.x的值规约到数组最前面一段
|
||||
for (int offset = blockDim.x; offset<aColSize; offset+=blockDim.x) {
|
||||
if(threadIdx.x + offset<aColSize){
|
||||
shared_data[threadIdx.x] = fminf(shared_data[threadIdx.x], aInputData[idx + offset]);
|
||||
}
|
||||
__syncthreads();
|
||||
}
|
||||
// 规约最前面一段
|
||||
for (int offset = blockDim.x/2; offset >0; offset>>=1) {
|
||||
int idx2 = offset + threadIdx.x;
|
||||
if (idx2 < blockDim.x) {
|
||||
shared_data[threadIdx.x] = fminf(shared_data[threadIdx.x], shared_data[idx2]);
|
||||
}
|
||||
__syncthreads();
|
||||
}
|
||||
|
||||
// 第一个线程存储每个分段的最大值到全局内存
|
||||
if (threadIdx.x == 0) {
|
||||
aOutput[blockIdx.x] = shared_data[0];
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void minRowKernel(float* aInputData, float* aOutput,unsigned int aColSize, unsigned int aRowSize)
|
||||
{
|
||||
//确定每个thread的基础index
|
||||
unsigned int idx = threadIdx.x*aColSize+ blockIdx.x;
|
||||
__shared__ float shared_data[512];
|
||||
// 每个线程加载一个元素到共享内存
|
||||
shared_data[threadIdx.x]= (threadIdx.x< aRowSize) ? aInputData[idx] : FLT_MAX;
|
||||
__syncthreads();
|
||||
// 每个线程规约自己的分段,将每个blockDim.x的值规约到数组最前面一段
|
||||
for (int offset = blockDim.x; offset < aRowSize; offset+=blockDim.x) {
|
||||
if(threadIdx.x+offset < aRowSize){
|
||||
shared_data[threadIdx.x]= fminf(shared_data[threadIdx.x], aInputData[idx + offset*aColSize]);
|
||||
}
|
||||
__syncthreads();
|
||||
}
|
||||
// 规约最前面一段
|
||||
for (int offset = blockDim.x/2; offset >0; offset>>=1) {
|
||||
int idx2 = offset + threadIdx.x;
|
||||
if (idx2 < blockDim.x) {
|
||||
shared_data[threadIdx.x] = fminf(shared_data[threadIdx.x], shared_data[idx2]);
|
||||
}
|
||||
__syncthreads();
|
||||
}
|
||||
|
||||
// 第一个线程存储每个分段的最大值到全局内存
|
||||
if (threadIdx.x == 0) {
|
||||
aOutput[blockIdx.x] = shared_data[0];
|
||||
}
|
||||
}
|
||||
|
||||
CudaMatrix Aurora::min(const CudaMatrix &aMatrix, FunctionDirection direction) {
|
||||
long a,b;
|
||||
return min(aMatrix,direction,a,b);
|
||||
}
|
||||
|
||||
CudaMatrix Aurora::min(const CudaMatrix &aMatrix, FunctionDirection direction, long& rowIdx, long& colIdx)
|
||||
{
|
||||
if (aMatrix.getDimSize(2)>1 || aMatrix.isComplex()) {
|
||||
std::cerr<< (aMatrix.getDimSize(2) > 1 ? "min() not support 3D data!" : "min() not support complex value type!")
|
||||
<< std::endl;
|
||||
return CudaMatrix();
|
||||
}
|
||||
//针对向量行等于列
|
||||
if (aMatrix.isVector()){
|
||||
direction = All;
|
||||
}
|
||||
switch (direction)
|
||||
{
|
||||
case All: {
|
||||
thrust::device_ptr<float> d_ptr = thrust::device_pointer_cast(aMatrix.getData());
|
||||
auto max_iter = thrust::min_element(thrust::device,d_ptr,d_ptr+aMatrix.getDataSize());
|
||||
int index = max_iter-d_ptr;
|
||||
rowIdx = index%aMatrix.getDimSize(0);
|
||||
colIdx = index/aMatrix.getDimSize(0);
|
||||
float* data = nullptr;
|
||||
cudaMalloc((void**)&data, sizeof(float));
|
||||
auto ret = Aurora::CudaMatrix::fromRawData(data,1,1,1);
|
||||
ret.setValue(0, *max_iter);
|
||||
return ret;
|
||||
}
|
||||
case Row:
|
||||
{
|
||||
float* matData = aMatrix.getData();
|
||||
float* retData = nullptr;
|
||||
int rowCount = aMatrix.getDimSize(1);
|
||||
cudaMalloc((void**)&retData, sizeof(float)*aMatrix.getDimSize(0));
|
||||
if (rowCount<512){
|
||||
minRowKernel<<<aMatrix.getDimSize(0),rowCount/2+1>>>(matData,retData,aMatrix.getDimSize(0),rowCount);
|
||||
}
|
||||
else if (aMatrix.getDimSize(1)/aMatrix.getDimSize(0)>4){
|
||||
minRowKernel<<<aMatrix.getDimSize(0),512>>>(matData,retData,aMatrix.getDimSize(0),rowCount);
|
||||
}
|
||||
else{
|
||||
minRowKernel<<<aMatrix.getDimSize(0),256>>>(matData,retData,aMatrix.getDimSize(0),rowCount);
|
||||
}
|
||||
cudaDeviceSynchronize();
|
||||
CudaMatrix ret = Aurora::CudaMatrix::fromRawData(retData,aMatrix.getDimSize(0),1);
|
||||
return ret;
|
||||
}
|
||||
case Column:
|
||||
default:
|
||||
{
|
||||
float* matData = aMatrix.getData();
|
||||
float* retData = nullptr;
|
||||
int colCount = aMatrix.getDimSize(0);
|
||||
cudaMalloc((void**)&retData, sizeof(float)*aMatrix.getDimSize(1));
|
||||
minColKernel<<<aMatrix.getDimSize(1),256>>>(matData,retData,colCount);
|
||||
cudaDeviceSynchronize();
|
||||
CudaMatrix ret = Aurora::CudaMatrix::fromRawData(retData,1,aMatrix.getDimSize(1));
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
CudaMatrix vxmMin(CudaMatrix aVec, CudaMatrix aMat) {
|
||||
//col-vec x mat
|
||||
if (aVec.getDimSize(1) == 1 && aVec.getDimSize(0) == aMat.getDimSize(0)) {
|
||||
std::cout<<"min mat and col-vec "<<std::endl;
|
||||
size_t size = aMat.getDataSize();
|
||||
float* data = nullptr;
|
||||
cudaMalloc((void**)&data, sizeof(float) * size);
|
||||
auto lambda = [=] __host__ __device__(const float& x, const float& y) {
|
||||
return fminf(x, y);
|
||||
};
|
||||
for (int i = 0; i < aMat.getDimSize(1); ++i) {
|
||||
thrust::transform(thrust::device, aVec.getData(),
|
||||
aVec.getData() + aVec.getDataSize(),
|
||||
aMat.getData() + aMat.getDimSize(0) * i,
|
||||
data + aMat.getDimSize(0) * i, lambda);
|
||||
}
|
||||
return Aurora::CudaMatrix::fromRawData(data, aMat.getDimSize(0),
|
||||
aMat.getDimSize(1), aMat.getDimSize(2), aMat.getValueType());
|
||||
}
|
||||
// row-vec x mat
|
||||
else if (aVec.getDimSize(0) == 1 && aVec.getDimSize(1) == aMat.getDimSize(1))
|
||||
{
|
||||
std::cout<<"min mat and row-vec "<<std::endl;
|
||||
size_t size = aMat.getDataSize() ;
|
||||
float* data = nullptr;
|
||||
cudaMalloc((void**)&data, sizeof(float) * size);
|
||||
for (int i = 0; i < aMat.getDimSize(1); ++i) {
|
||||
float v = aVec.getValue(i);
|
||||
auto lambda = [=] __host__ __device__(const float& x) {
|
||||
return fminf(x, v);
|
||||
};
|
||||
thrust::transform(thrust::device,
|
||||
aMat.getData() + aMat.getDimSize(0)*i,
|
||||
aMat.getData() + aMat.getDimSize(0) * (i+1),
|
||||
data + aMat.getDimSize(0) * i, lambda);
|
||||
}
|
||||
return Aurora::CudaMatrix::fromRawData(data, aMat.getDimSize(0),
|
||||
aMat.getDimSize(1), aMat.getDimSize(2), aMat.getValueType());
|
||||
}
|
||||
std::cerr
|
||||
<< "min(A,B) with matrix must be like A[MxN] - B[1xN] or A[Mx1] - B[MxN]"
|
||||
<< std::endl;
|
||||
return CudaMatrix();
|
||||
|
||||
}
|
||||
|
||||
CudaMatrix Aurora::min(const CudaMatrix &aMatrix, const CudaMatrix &aOther) {
|
||||
if (aMatrix.getDimSize(2)>1 || aMatrix.isComplex()) {
|
||||
std::cerr
|
||||
<< (aMatrix.getDimSize(2) > 1 ? "min() not support 3D data!" : "min() not support complex value type!")
|
||||
<< std::endl;
|
||||
return CudaMatrix();
|
||||
}
|
||||
if (aOther.getDimSize(2)>1 || aOther.isComplex()) {
|
||||
std::cerr
|
||||
<< (aOther.getDimSize(2) > 1 ? "min() not support 3D data!" : "min() not support complex value type!")
|
||||
<< std::endl;
|
||||
return CudaMatrix();
|
||||
}
|
||||
//same shape
|
||||
if (aMatrix.compareShape(aOther)){
|
||||
size_t size = aMatrix.getDataSize() * aMatrix.getValueType();
|
||||
float* data = nullptr;
|
||||
cudaMalloc((void**)&data, sizeof(float) * size);
|
||||
auto lambda = [=] __host__ __device__ (const float& x, const float& y){
|
||||
return fminf(x,y);
|
||||
};
|
||||
thrust::transform(thrust::device,aMatrix.getData(),
|
||||
aMatrix.getData()+aMatrix.getDataSize(),aOther.getData(),
|
||||
data,lambda);
|
||||
return Aurora::CudaMatrix::fromRawData(data, aMatrix.getDimSize(0),
|
||||
aMatrix.getDimSize(1), aMatrix.getDimSize(2), aMatrix.getValueType());
|
||||
}
|
||||
// one is scalar
|
||||
else if (aMatrix.getDataSize() == 1 || aOther.getDataSize() == 1){
|
||||
float scalar = (aMatrix.getDataSize() == 1)?aMatrix.getValue(0):aOther.getValue(0);
|
||||
auto matrix = (aMatrix.getDataSize() == 1)?aOther:aMatrix;
|
||||
return min(matrix, scalar);
|
||||
}
|
||||
else if (aMatrix.isVector()) {
|
||||
return ::vxmMin(aMatrix,aOther);
|
||||
}
|
||||
else if (aOther.isVector())
|
||||
{
|
||||
return ::vxmMin(aOther,aMatrix);
|
||||
}
|
||||
std::cerr
|
||||
<< "min(A,B) with matrix must be like A[MxN] - B[1xN] or A[Mx1] - B[MxN]"
|
||||
<< std::endl;
|
||||
return CudaMatrix();
|
||||
}
|
||||
|
||||
|
||||
CudaMatrix Aurora::min(const CudaMatrix &aMatrix, const float aValue){
|
||||
size_t size = aMatrix.getDataSize() * aMatrix.getValueType();
|
||||
float* data = nullptr;
|
||||
cudaMalloc((void**)&data, sizeof(float) * size);
|
||||
auto lambda = [=] __host__ __device__ (const float& x){
|
||||
return fminf(x,aValue);
|
||||
};
|
||||
thrust::transform(thrust::device,aMatrix.getData(),aMatrix.getData()+aMatrix.getDataSize(),
|
||||
data,lambda);
|
||||
return Aurora::CudaMatrix::fromRawData(data, aMatrix.getDimSize(0),
|
||||
aMatrix.getDimSize(1), aMatrix.getDimSize(2), aMatrix.getValueType());
|
||||
}
|
||||
19
src/Function2D.cuh
Normal file
19
src/Function2D.cuh
Normal file
@@ -0,0 +1,19 @@
|
||||
#ifndef __FUNCTION2D_CUDA__
|
||||
#define __FUNCTION2D_CUDA__
|
||||
#include "CudaMatrix.h"
|
||||
#include "AuroraDefs.h"
|
||||
|
||||
namespace Aurora
|
||||
{
|
||||
CudaMatrix max(const CudaMatrix &aMatrix, FunctionDirection direction = Column);
|
||||
CudaMatrix max(const CudaMatrix &aMatrix, FunctionDirection direction, long& rowIdx, long& colIdx);
|
||||
CudaMatrix max(const CudaMatrix &aMatrix, const float aValue);
|
||||
CudaMatrix max(const CudaMatrix &aMatrix, const CudaMatrix &aOther);
|
||||
|
||||
CudaMatrix min(const CudaMatrix &aMatrix, FunctionDirection direction = Column);
|
||||
CudaMatrix min(const CudaMatrix &aMatrix, FunctionDirection direction, long& rowIdx, long& colIdx);
|
||||
CudaMatrix min(const CudaMatrix &aMatrix, const float aValue);
|
||||
CudaMatrix min(const CudaMatrix &aMatrix, const CudaMatrix &aOther);
|
||||
}
|
||||
|
||||
#endif // __FUNCTION2D_CUDA_H__
|
||||
@@ -6,15 +6,11 @@
|
||||
|
||||
#include "Matrix.h"
|
||||
#include "Function1D.h"
|
||||
#include "AuroraDefs.h"
|
||||
|
||||
namespace Aurora
|
||||
{
|
||||
enum FunctionDirection
|
||||
{
|
||||
Column,
|
||||
Row,
|
||||
All
|
||||
};
|
||||
|
||||
float immse(const Matrix &aImageA, const Matrix &aImageB);
|
||||
Matrix inv(const Matrix &aMatrix);
|
||||
Matrix inv(Matrix &&aMatrix);
|
||||
@@ -51,6 +47,8 @@ namespace Aurora
|
||||
*/
|
||||
Matrix min(const Matrix &aMatrix, const Matrix &aOther);
|
||||
|
||||
Matrix min(const Matrix &aMatrix, const float aValue);
|
||||
|
||||
/**
|
||||
* 比较两个矩阵,求对应位置的最大值,不支持三维
|
||||
* @attention 矩阵形状不一样时,如A为[MxN],则B应为标量或[1xN]的行向量
|
||||
|
||||
461
test/Function2D_Cuda_Test.cpp
Normal file
461
test/Function2D_Cuda_Test.cpp
Normal file
@@ -0,0 +1,461 @@
|
||||
#include <gtest/gtest.h>
|
||||
#include <chrono>
|
||||
#include "CudaMatrix.h"
|
||||
#include "Function.h"
|
||||
#include "Matrix.h"
|
||||
#include "TestUtility.h"
|
||||
|
||||
#include "Function2D.h"
|
||||
#include "Function2D.cuh"
|
||||
|
||||
class Function2D_Cuda_Test:public ::testing::Test
|
||||
{
|
||||
protected:
|
||||
static void SetUpFunction2DCudaTester(){
|
||||
|
||||
}
|
||||
static void TearDownTestCase(){
|
||||
}
|
||||
public:
|
||||
Aurora::Matrix B;
|
||||
Aurora::CudaMatrix dB;
|
||||
|
||||
void SetUp(){
|
||||
|
||||
}
|
||||
void TearDown(){
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
TEST_F(Function2D_Cuda_Test, min)
|
||||
{
|
||||
{
|
||||
float *dataB = Aurora::random(4096*41472);
|
||||
B = Aurora::Matrix::fromRawData(dataB, 4096, 41472);
|
||||
dB = B.toDeviceMatrix();
|
||||
long r,c;
|
||||
auto start_time_ = std::chrono::high_resolution_clock::now();
|
||||
auto ret1 = Aurora::min(B, Aurora::FunctionDirection::Column,r,c);
|
||||
auto end_time = std::chrono::high_resolution_clock::now();
|
||||
auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time_);
|
||||
std::cout << "Test1 duration: " << duration.count() << " ms" << std::endl;
|
||||
start_time_ = std::chrono::high_resolution_clock::now();
|
||||
auto ret2 = Aurora::min(dB, Aurora::FunctionDirection::Column,r,c);
|
||||
end_time = std::chrono::high_resolution_clock::now();
|
||||
duration = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time_);
|
||||
std::cout << "Test2 duration: " << duration.count() << " ms" << std::endl;
|
||||
ASSERT_EQ(ret1.getDimSize(0),ret2.getDimSize(0));
|
||||
ASSERT_EQ(ret1.getDimSize(1),ret2.getDimSize(1));
|
||||
ASSERT_EQ(ret1.getDimSize(2),ret2.getDimSize(2));
|
||||
|
||||
for (size_t i = 0; i < ret1.getDataSize(); i++)
|
||||
{
|
||||
ASSERT_FLOAT_EQ(ret1[i], ret2.getValue(i))<<", index at :"<<i;
|
||||
}
|
||||
|
||||
start_time_ = std::chrono::high_resolution_clock::now();
|
||||
ret1 = Aurora::min(B, Aurora::FunctionDirection::Row,r,c);
|
||||
end_time = std::chrono::high_resolution_clock::now();
|
||||
duration = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time_);
|
||||
std::cout << "Test1 duration: " << duration.count() << " ms" << std::endl;
|
||||
start_time_ = std::chrono::high_resolution_clock::now();
|
||||
ret2 = Aurora::min(dB, Aurora::FunctionDirection::Row,r,c);
|
||||
end_time = std::chrono::high_resolution_clock::now();
|
||||
duration = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time_);
|
||||
std::cout << "Test2 duration: " << duration.count() << " ms" << std::endl;
|
||||
ASSERT_EQ(ret1.getDimSize(0),ret2.getDimSize(0));
|
||||
ASSERT_EQ(ret1.getDimSize(1),ret2.getDimSize(1));
|
||||
ASSERT_EQ(ret1.getDimSize(2),ret2.getDimSize(2));
|
||||
|
||||
for (size_t i = 0; i < ret1.getDataSize(); i++)
|
||||
{
|
||||
ASSERT_FLOAT_EQ(ret1[i], ret2.getValue(i))<<", index at :"<<i;
|
||||
}
|
||||
}
|
||||
{
|
||||
float *dataB = Aurora::random(3157*111);
|
||||
B = Aurora::Matrix::fromRawData(dataB, 3157, 111);
|
||||
dB = B.toDeviceMatrix();
|
||||
long r,c;
|
||||
auto start_time_ = std::chrono::high_resolution_clock::now();
|
||||
auto ret1 = Aurora::min(B, Aurora::FunctionDirection::Column,r,c);
|
||||
auto end_time = std::chrono::high_resolution_clock::now();
|
||||
auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time_);
|
||||
std::cout << "Test1 duration: " << duration.count() << " ms" << std::endl;
|
||||
start_time_ = std::chrono::high_resolution_clock::now();
|
||||
auto ret2 = Aurora::min(dB, Aurora::FunctionDirection::Column,r,c);
|
||||
end_time = std::chrono::high_resolution_clock::now();
|
||||
duration = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time_);
|
||||
std::cout << "Test2 duration: " << duration.count() << " ms" << std::endl;
|
||||
ASSERT_EQ(ret1.getDimSize(0),ret2.getDimSize(0));
|
||||
ASSERT_EQ(ret1.getDimSize(1),ret2.getDimSize(1));
|
||||
ASSERT_EQ(ret1.getDimSize(2),ret2.getDimSize(2));
|
||||
|
||||
for (size_t i = 0; i < ret1.getDataSize(); i++)
|
||||
{
|
||||
ASSERT_FLOAT_EQ(ret1[i], ret2.getValue(i))<<", index at :"<<i;
|
||||
}
|
||||
B.forceReshape( 111,3157, 1);
|
||||
dB = B.toDeviceMatrix();
|
||||
start_time_ = std::chrono::high_resolution_clock::now();
|
||||
ret1 = Aurora::min(B, Aurora::FunctionDirection::Column,r,c);
|
||||
end_time = std::chrono::high_resolution_clock::now();
|
||||
duration = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time_);
|
||||
std::cout << "Test1 duration: " << duration.count() << " ms" << std::endl;
|
||||
start_time_ = std::chrono::high_resolution_clock::now();
|
||||
ret2 = Aurora::min(dB, Aurora::FunctionDirection::Column,r,c);
|
||||
end_time = std::chrono::high_resolution_clock::now();
|
||||
duration = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time_);
|
||||
std::cout << "Test2 duration: " << duration.count() << " ms" << std::endl;
|
||||
ASSERT_EQ(ret1.getDimSize(0),ret2.getDimSize(0));
|
||||
ASSERT_EQ(ret1.getDimSize(1),ret2.getDimSize(1));
|
||||
ASSERT_EQ(ret1.getDimSize(2),ret2.getDimSize(2));
|
||||
|
||||
for (size_t i = 0; i < ret1.getDataSize(); i++)
|
||||
{
|
||||
ASSERT_FLOAT_EQ(ret1[i], ret2.getValue(i))<<", index at :"<<i;
|
||||
}
|
||||
}
|
||||
{
|
||||
float *dataB = Aurora::random(3157*111);
|
||||
B = Aurora::Matrix::fromRawData(dataB, 3157, 111);
|
||||
dB = B.toDeviceMatrix();
|
||||
long r,c;
|
||||
auto start_time_ = std::chrono::high_resolution_clock::now();
|
||||
|
||||
auto ret1 = Aurora::min(B, 500.5f);
|
||||
auto end_time = std::chrono::high_resolution_clock::now();
|
||||
auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time_);
|
||||
std::cout << "Test1 duration: " << duration.count() << " ms" << std::endl;
|
||||
start_time_ = std::chrono::high_resolution_clock::now();
|
||||
auto ret2 = Aurora::min(dB, 500.5f);
|
||||
end_time = std::chrono::high_resolution_clock::now();
|
||||
duration = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time_);
|
||||
std::cout << "Test2 duration: " << duration.count() << " ms" << std::endl;
|
||||
ASSERT_EQ(ret1.getDimSize(0),ret2.getDimSize(0));
|
||||
ASSERT_EQ(ret1.getDimSize(1),ret2.getDimSize(1));
|
||||
ASSERT_EQ(ret1.getDimSize(2),ret2.getDimSize(2));
|
||||
|
||||
for (size_t i = 0; i < ret1.getDataSize(); i++)
|
||||
{
|
||||
ASSERT_FLOAT_EQ(ret1[i], ret2.getValue(i))<<", index at :"<<i;
|
||||
}
|
||||
}
|
||||
{
|
||||
float *dataB = Aurora::random(3157*111);
|
||||
float *dataA = Aurora::random(3157*111);
|
||||
auto A = Aurora::Matrix::fromRawData(dataA, 3157, 111);
|
||||
|
||||
B = Aurora::Matrix::fromRawData(dataB, 3157, 111);
|
||||
auto dA = A.toDeviceMatrix();
|
||||
dB = B.toDeviceMatrix();
|
||||
long r,c;
|
||||
auto start_time_ = std::chrono::high_resolution_clock::now();
|
||||
|
||||
auto ret1 = Aurora::min(B, A);
|
||||
auto end_time = std::chrono::high_resolution_clock::now();
|
||||
auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time_);
|
||||
std::cout << "Test1 duration: " << duration.count() << " ms" << std::endl;
|
||||
start_time_ = std::chrono::high_resolution_clock::now();
|
||||
auto ret2 = Aurora::min(dB, dA);
|
||||
end_time = std::chrono::high_resolution_clock::now();
|
||||
duration = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time_);
|
||||
std::cout << "Test2 duration: " << duration.count() << " ms" << std::endl;
|
||||
ASSERT_EQ(ret1.getDimSize(0),ret2.getDimSize(0));
|
||||
ASSERT_EQ(ret1.getDimSize(1),ret2.getDimSize(1));
|
||||
ASSERT_EQ(ret1.getDimSize(2),ret2.getDimSize(2));
|
||||
|
||||
for (size_t i = 0; i < ret1.getDataSize(); i++)
|
||||
{
|
||||
ASSERT_FLOAT_EQ(ret1[i], ret2.getValue(i))<<", index at :"<<i;
|
||||
}
|
||||
}
|
||||
{
|
||||
float *dataB = Aurora::random(3157*111);
|
||||
float *dataA = Aurora::random(3157);
|
||||
auto A = Aurora::Matrix::fromRawData(dataA, 3157, 1);
|
||||
|
||||
B = Aurora::Matrix::fromRawData(dataB, 3157, 111);
|
||||
auto dA = A.toDeviceMatrix();
|
||||
dB = B.toDeviceMatrix();
|
||||
long r,c;
|
||||
auto start_time_ = std::chrono::high_resolution_clock::now();
|
||||
|
||||
auto ret1 = Aurora::min(B, A);
|
||||
auto end_time = std::chrono::high_resolution_clock::now();
|
||||
auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time_);
|
||||
std::cout << "Test1 duration: " << duration.count() << " ms" << std::endl;
|
||||
start_time_ = std::chrono::high_resolution_clock::now();
|
||||
auto ret2 = Aurora::min(dB, dA);
|
||||
end_time = std::chrono::high_resolution_clock::now();
|
||||
duration = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time_);
|
||||
std::cout << "Test2 duration: " << duration.count() << " ms" << std::endl;
|
||||
ASSERT_EQ(ret1.getDimSize(0),ret2.getDimSize(0));
|
||||
ASSERT_EQ(ret1.getDimSize(1),ret2.getDimSize(1));
|
||||
ASSERT_EQ(ret1.getDimSize(2),ret2.getDimSize(2));
|
||||
|
||||
for (size_t i = 0; i < ret1.getDataSize(); i++)
|
||||
{
|
||||
ASSERT_FLOAT_EQ(ret1[i], ret2.getValue(i))<<", index at :"<<i;
|
||||
}
|
||||
start_time_ = std::chrono::high_resolution_clock::now();
|
||||
ret2 = Aurora::min(dA, dB);
|
||||
end_time = std::chrono::high_resolution_clock::now();
|
||||
duration = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time_);
|
||||
std::cout << "Test2 duration: " << duration.count() << " ms" << std::endl;
|
||||
ASSERT_EQ(ret1.getDimSize(0),ret2.getDimSize(0));
|
||||
ASSERT_EQ(ret1.getDimSize(1),ret2.getDimSize(1));
|
||||
ASSERT_EQ(ret1.getDimSize(2),ret2.getDimSize(2));
|
||||
|
||||
for (size_t i = 0; i < ret1.getDataSize(); i++)
|
||||
{
|
||||
ASSERT_FLOAT_EQ(ret1[i], ret2.getValue(i))<<", index at :"<<i;
|
||||
}
|
||||
}
|
||||
{
|
||||
float *dataB = Aurora::random(3157*111);
|
||||
float *dataA = Aurora::random(111);
|
||||
auto A = Aurora::Matrix::fromRawData(dataA, 1, 111);
|
||||
|
||||
B = Aurora::Matrix::fromRawData(dataB, 3157, 111);
|
||||
auto dA = A.toDeviceMatrix();
|
||||
dB = B.toDeviceMatrix();
|
||||
long r,c;
|
||||
auto start_time_ = std::chrono::high_resolution_clock::now();
|
||||
|
||||
auto ret1 = Aurora::min(B, A);
|
||||
auto end_time = std::chrono::high_resolution_clock::now();
|
||||
auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time_);
|
||||
std::cout << "Test1 duration: " << duration.count() << " ms" << std::endl;
|
||||
start_time_ = std::chrono::high_resolution_clock::now();
|
||||
auto ret2 = Aurora::min(dB, dA);
|
||||
end_time = std::chrono::high_resolution_clock::now();
|
||||
duration = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time_);
|
||||
std::cout << "Test2 duration: " << duration.count() << " ms" << std::endl;
|
||||
ASSERT_EQ(ret1.getDimSize(0),ret2.getDimSize(0));
|
||||
ASSERT_EQ(ret1.getDimSize(1),ret2.getDimSize(1));
|
||||
ASSERT_EQ(ret1.getDimSize(2),ret2.getDimSize(2));
|
||||
|
||||
for (size_t i = 0; i < ret1.getDataSize(); i++)
|
||||
{
|
||||
ASSERT_FLOAT_EQ(ret1[i], ret2.getValue(i))<<", index at :"<<i;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(Function2D_Cuda_Test, max)
|
||||
{
|
||||
{
|
||||
float *dataB = Aurora::random(4096*41472);
|
||||
B = Aurora::Matrix::fromRawData(dataB, 4096, 41472);
|
||||
dB = B.toDeviceMatrix();
|
||||
long r,c;
|
||||
auto start_time_ = std::chrono::high_resolution_clock::now();
|
||||
auto ret1 = Aurora::max(B, Aurora::FunctionDirection::Column,r,c);
|
||||
auto end_time = std::chrono::high_resolution_clock::now();
|
||||
auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time_);
|
||||
std::cout << "Test1 duration: " << duration.count() << " ms" << std::endl;
|
||||
start_time_ = std::chrono::high_resolution_clock::now();
|
||||
auto ret2 = Aurora::max(dB, Aurora::FunctionDirection::Column,r,c);
|
||||
end_time = std::chrono::high_resolution_clock::now();
|
||||
duration = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time_);
|
||||
std::cout << "Test2 duration: " << duration.count() << " ms" << std::endl;
|
||||
ASSERT_EQ(ret1.getDimSize(0),ret2.getDimSize(0));
|
||||
ASSERT_EQ(ret1.getDimSize(1),ret2.getDimSize(1));
|
||||
ASSERT_EQ(ret1.getDimSize(2),ret2.getDimSize(2));
|
||||
|
||||
for (size_t i = 0; i < ret1.getDataSize(); i++)
|
||||
{
|
||||
ASSERT_FLOAT_EQ(ret1[i], ret2.getValue(i))<<", index at :"<<i;
|
||||
}
|
||||
|
||||
start_time_ = std::chrono::high_resolution_clock::now();
|
||||
ret1 = Aurora::max(B, Aurora::FunctionDirection::Row,r,c);
|
||||
end_time = std::chrono::high_resolution_clock::now();
|
||||
duration = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time_);
|
||||
std::cout << "Test1 duration: " << duration.count() << " ms" << std::endl;
|
||||
start_time_ = std::chrono::high_resolution_clock::now();
|
||||
ret2 = Aurora::max(dB, Aurora::FunctionDirection::Row,r,c);
|
||||
end_time = std::chrono::high_resolution_clock::now();
|
||||
duration = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time_);
|
||||
std::cout << "Test2 duration: " << duration.count() << " ms" << std::endl;
|
||||
ASSERT_EQ(ret1.getDimSize(0),ret2.getDimSize(0));
|
||||
ASSERT_EQ(ret1.getDimSize(1),ret2.getDimSize(1));
|
||||
ASSERT_EQ(ret1.getDimSize(2),ret2.getDimSize(2));
|
||||
|
||||
for (size_t i = 0; i < ret1.getDataSize(); i++)
|
||||
{
|
||||
ASSERT_FLOAT_EQ(ret1[i], ret2.getValue(i))<<", index at :"<<i;
|
||||
}
|
||||
}
|
||||
{
|
||||
float *dataB = Aurora::random(3157*111);
|
||||
B = Aurora::Matrix::fromRawData(dataB, 3157, 111);
|
||||
dB = B.toDeviceMatrix();
|
||||
long r,c;
|
||||
auto start_time_ = std::chrono::high_resolution_clock::now();
|
||||
auto ret1 = Aurora::max(B, Aurora::FunctionDirection::Column,r,c);
|
||||
auto end_time = std::chrono::high_resolution_clock::now();
|
||||
auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time_);
|
||||
std::cout << "Test1 duration: " << duration.count() << " ms" << std::endl;
|
||||
start_time_ = std::chrono::high_resolution_clock::now();
|
||||
auto ret2 = Aurora::max(dB, Aurora::FunctionDirection::Column,r,c);
|
||||
end_time = std::chrono::high_resolution_clock::now();
|
||||
duration = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time_);
|
||||
std::cout << "Test2 duration: " << duration.count() << " ms" << std::endl;
|
||||
ASSERT_EQ(ret1.getDimSize(0),ret2.getDimSize(0));
|
||||
ASSERT_EQ(ret1.getDimSize(1),ret2.getDimSize(1));
|
||||
ASSERT_EQ(ret1.getDimSize(2),ret2.getDimSize(2));
|
||||
|
||||
for (size_t i = 0; i < ret1.getDataSize(); i++)
|
||||
{
|
||||
ASSERT_FLOAT_EQ(ret1[i], ret2.getValue(i))<<", index at :"<<i;
|
||||
}
|
||||
B.forceReshape( 111,3157, 1);
|
||||
dB = B.toDeviceMatrix();
|
||||
start_time_ = std::chrono::high_resolution_clock::now();
|
||||
ret1 = Aurora::max(B, Aurora::FunctionDirection::Column,r,c);
|
||||
end_time = std::chrono::high_resolution_clock::now();
|
||||
duration = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time_);
|
||||
std::cout << "Test1 duration: " << duration.count() << " ms" << std::endl;
|
||||
start_time_ = std::chrono::high_resolution_clock::now();
|
||||
ret2 = Aurora::max(dB, Aurora::FunctionDirection::Column,r,c);
|
||||
end_time = std::chrono::high_resolution_clock::now();
|
||||
duration = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time_);
|
||||
std::cout << "Test2 duration: " << duration.count() << " ms" << std::endl;
|
||||
ASSERT_EQ(ret1.getDimSize(0),ret2.getDimSize(0));
|
||||
ASSERT_EQ(ret1.getDimSize(1),ret2.getDimSize(1));
|
||||
ASSERT_EQ(ret1.getDimSize(2),ret2.getDimSize(2));
|
||||
|
||||
for (size_t i = 0; i < ret1.getDataSize(); i++)
|
||||
{
|
||||
ASSERT_FLOAT_EQ(ret1[i], ret2.getValue(i))<<", index at :"<<i;
|
||||
}
|
||||
}
|
||||
{
|
||||
float *dataB = Aurora::random(3157*111);
|
||||
B = Aurora::Matrix::fromRawData(dataB, 3157, 111);
|
||||
dB = B.toDeviceMatrix();
|
||||
long r,c;
|
||||
auto start_time_ = std::chrono::high_resolution_clock::now();
|
||||
|
||||
auto ret1 = Aurora::max(B, 500.5f);
|
||||
auto end_time = std::chrono::high_resolution_clock::now();
|
||||
auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time_);
|
||||
std::cout << "Test1 duration: " << duration.count() << " ms" << std::endl;
|
||||
start_time_ = std::chrono::high_resolution_clock::now();
|
||||
auto ret2 = Aurora::max(dB, 500.5f);
|
||||
end_time = std::chrono::high_resolution_clock::now();
|
||||
duration = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time_);
|
||||
std::cout << "Test2 duration: " << duration.count() << " ms" << std::endl;
|
||||
ASSERT_EQ(ret1.getDimSize(0),ret2.getDimSize(0));
|
||||
ASSERT_EQ(ret1.getDimSize(1),ret2.getDimSize(1));
|
||||
ASSERT_EQ(ret1.getDimSize(2),ret2.getDimSize(2));
|
||||
|
||||
for (size_t i = 0; i < ret1.getDataSize(); i++)
|
||||
{
|
||||
ASSERT_FLOAT_EQ(ret1[i], ret2.getValue(i))<<", index at :"<<i;
|
||||
}
|
||||
}
|
||||
{
|
||||
float *dataB = Aurora::random(3157*111);
|
||||
float *dataA = Aurora::random(3157*111);
|
||||
auto A = Aurora::Matrix::fromRawData(dataA, 3157, 111);
|
||||
|
||||
B = Aurora::Matrix::fromRawData(dataB, 3157, 111);
|
||||
auto dA = A.toDeviceMatrix();
|
||||
dB = B.toDeviceMatrix();
|
||||
long r,c;
|
||||
auto start_time_ = std::chrono::high_resolution_clock::now();
|
||||
|
||||
auto ret1 = Aurora::max(B, A);
|
||||
auto end_time = std::chrono::high_resolution_clock::now();
|
||||
auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time_);
|
||||
std::cout << "Test1 duration: " << duration.count() << " ms" << std::endl;
|
||||
start_time_ = std::chrono::high_resolution_clock::now();
|
||||
auto ret2 = Aurora::max(dB, dA);
|
||||
end_time = std::chrono::high_resolution_clock::now();
|
||||
duration = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time_);
|
||||
std::cout << "Test2 duration: " << duration.count() << " ms" << std::endl;
|
||||
ASSERT_EQ(ret1.getDimSize(0),ret2.getDimSize(0));
|
||||
ASSERT_EQ(ret1.getDimSize(1),ret2.getDimSize(1));
|
||||
ASSERT_EQ(ret1.getDimSize(2),ret2.getDimSize(2));
|
||||
|
||||
for (size_t i = 0; i < ret1.getDataSize(); i++)
|
||||
{
|
||||
ASSERT_FLOAT_EQ(ret1[i], ret2.getValue(i))<<", index at :"<<i;
|
||||
}
|
||||
}
|
||||
{
|
||||
float *dataB = Aurora::random(3157*111);
|
||||
float *dataA = Aurora::random(3157);
|
||||
auto A = Aurora::Matrix::fromRawData(dataA, 3157, 1);
|
||||
|
||||
B = Aurora::Matrix::fromRawData(dataB, 3157, 111);
|
||||
auto dA = A.toDeviceMatrix();
|
||||
dB = B.toDeviceMatrix();
|
||||
long r,c;
|
||||
auto start_time_ = std::chrono::high_resolution_clock::now();
|
||||
|
||||
auto ret1 = Aurora::max(B, A);
|
||||
auto end_time = std::chrono::high_resolution_clock::now();
|
||||
auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time_);
|
||||
std::cout << "Test1 duration: " << duration.count() << " ms" << std::endl;
|
||||
start_time_ = std::chrono::high_resolution_clock::now();
|
||||
auto ret2 = Aurora::max(dB, dA);
|
||||
end_time = std::chrono::high_resolution_clock::now();
|
||||
duration = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time_);
|
||||
std::cout << "Test2 duration: " << duration.count() << " ms" << std::endl;
|
||||
ASSERT_EQ(ret1.getDimSize(0),ret2.getDimSize(0));
|
||||
ASSERT_EQ(ret1.getDimSize(1),ret2.getDimSize(1));
|
||||
ASSERT_EQ(ret1.getDimSize(2),ret2.getDimSize(2));
|
||||
|
||||
for (size_t i = 0; i < ret1.getDataSize(); i++)
|
||||
{
|
||||
ASSERT_FLOAT_EQ(ret1[i], ret2.getValue(i))<<", index at :"<<i;
|
||||
}
|
||||
start_time_ = std::chrono::high_resolution_clock::now();
|
||||
ret2 = Aurora::max(dA, dB);
|
||||
end_time = std::chrono::high_resolution_clock::now();
|
||||
duration = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time_);
|
||||
std::cout << "Test2 duration: " << duration.count() << " ms" << std::endl;
|
||||
ASSERT_EQ(ret1.getDimSize(0),ret2.getDimSize(0));
|
||||
ASSERT_EQ(ret1.getDimSize(1),ret2.getDimSize(1));
|
||||
ASSERT_EQ(ret1.getDimSize(2),ret2.getDimSize(2));
|
||||
|
||||
for (size_t i = 0; i < ret1.getDataSize(); i++)
|
||||
{
|
||||
ASSERT_FLOAT_EQ(ret1[i], ret2.getValue(i))<<", index at :"<<i;
|
||||
}
|
||||
}
|
||||
{
|
||||
float *dataB = Aurora::random(3157*111);
|
||||
float *dataA = Aurora::random(111);
|
||||
auto A = Aurora::Matrix::fromRawData(dataA, 1, 111);
|
||||
|
||||
B = Aurora::Matrix::fromRawData(dataB, 3157, 111);
|
||||
auto dA = A.toDeviceMatrix();
|
||||
dB = B.toDeviceMatrix();
|
||||
long r,c;
|
||||
auto start_time_ = std::chrono::high_resolution_clock::now();
|
||||
|
||||
auto ret1 = Aurora::max(B, A);
|
||||
auto end_time = std::chrono::high_resolution_clock::now();
|
||||
auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time_);
|
||||
std::cout << "Test1 duration: " << duration.count() << " ms" << std::endl;
|
||||
start_time_ = std::chrono::high_resolution_clock::now();
|
||||
auto ret2 = Aurora::max(dB, dA);
|
||||
end_time = std::chrono::high_resolution_clock::now();
|
||||
duration = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time_);
|
||||
std::cout << "Test2 duration: " << duration.count() << " ms" << std::endl;
|
||||
ASSERT_EQ(ret1.getDimSize(0),ret2.getDimSize(0));
|
||||
ASSERT_EQ(ret1.getDimSize(1),ret2.getDimSize(1));
|
||||
ASSERT_EQ(ret1.getDimSize(2),ret2.getDimSize(2));
|
||||
|
||||
for (size_t i = 0; i < ret1.getDataSize(); i++)
|
||||
{
|
||||
ASSERT_FLOAT_EQ(ret1[i], ret2.getValue(i))<<", index at :"<<i;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -73,13 +73,16 @@ TEST_F(Function2D_Test, std){
|
||||
}
|
||||
|
||||
TEST_F(Function2D_Test, min) {
|
||||
float *dataA = new float[3]{1, 2, 3};
|
||||
float *dataA = new float[3]{1, 2,-9};
|
||||
float *dataB = new float[9]{2, 3, 3, 2, 2, -1, 3, 3, 3};
|
||||
float *dataC = new float[1]{1.5};
|
||||
float *dataE = new float[12]{2, 3, 3, 2, 2, -1, 3, 3, 3,1,-5,7};
|
||||
auto A = Aurora::Matrix::fromRawData(dataA, 3, 1);
|
||||
auto B = Aurora::Matrix::fromRawData(dataB, 3, 3);
|
||||
auto C = Aurora::Matrix::fromRawData(dataC, 1);
|
||||
auto D = Aurora::Matrix::copyFromRawData(dataA, 1, 3);
|
||||
auto E = Aurora::Matrix::copyFromRawData(dataE, 4, 3);
|
||||
|
||||
Aurora::Matrix ret = Aurora::min(B);
|
||||
EXPECT_EQ(1, ret.getDimSize(0));
|
||||
EXPECT_EQ(3, ret.getDimSize(1));
|
||||
@@ -99,32 +102,64 @@ TEST_F(Function2D_Test, min) {
|
||||
long r,c;
|
||||
ret = Aurora::min(A, Aurora::Column,r,c);
|
||||
EXPECT_FLOAT_EQ(1, ret.getDataSize());
|
||||
EXPECT_FLOAT_EQ(1, ret.getData()[0]);
|
||||
EXPECT_EQ(0, r);
|
||||
EXPECT_FLOAT_EQ(-9, ret.getData()[0]);
|
||||
EXPECT_EQ(2, r);
|
||||
EXPECT_EQ(0, c);
|
||||
ret = Aurora::min(D);
|
||||
EXPECT_EQ(1, ret.getDimSize(0));
|
||||
EXPECT_EQ(1, ret.getDimSize(1));
|
||||
EXPECT_FLOAT_EQ(1, ret.getData()[0]);
|
||||
EXPECT_FLOAT_EQ(-9, ret.getData()[0]);
|
||||
ret = Aurora::min(A, C);
|
||||
EXPECT_FLOAT_EQ(3, ret.getDataSize());
|
||||
EXPECT_FLOAT_EQ(1, ret.getData()[0]);
|
||||
EXPECT_FLOAT_EQ(1.5, ret.getData()[1]);
|
||||
EXPECT_FLOAT_EQ(1.5, ret.getData()[2]);
|
||||
EXPECT_FLOAT_EQ(-9, ret.getData()[2]);
|
||||
// mat x row-vec
|
||||
ret = Aurora::min(B,D);
|
||||
EXPECT_FLOAT_EQ(9, ret.getDataSize());
|
||||
EXPECT_FLOAT_EQ(1, ret.getData()[0]);
|
||||
EXPECT_FLOAT_EQ(1, ret.getData()[1]);
|
||||
EXPECT_FLOAT_EQ(1, ret.getData()[2]);
|
||||
EXPECT_FLOAT_EQ(-1, ret.getData()[5]);
|
||||
EXPECT_FLOAT_EQ(-9, ret.getData()[8]);
|
||||
|
||||
// row-vec x mat
|
||||
ret = Aurora::min(D,E);
|
||||
EXPECT_FLOAT_EQ(12, ret.getDataSize());
|
||||
EXPECT_FLOAT_EQ(1, ret.getData()[0]);
|
||||
EXPECT_FLOAT_EQ(1, ret.getData()[1]);
|
||||
EXPECT_FLOAT_EQ(-1, ret.getData()[5]);
|
||||
EXPECT_FLOAT_EQ(-9, ret.getData()[8]);
|
||||
D.forceReshape(3,1,1);
|
||||
ret = Aurora::min(D,B);
|
||||
EXPECT_FLOAT_EQ(9, ret.getDataSize());
|
||||
EXPECT_FLOAT_EQ(1, ret.getData()[0]);
|
||||
EXPECT_FLOAT_EQ(2, ret.getData()[1]);
|
||||
EXPECT_FLOAT_EQ(-9, ret.getData()[5]);
|
||||
EXPECT_FLOAT_EQ(-9, ret.getData()[8]);
|
||||
D.forceReshape(3,1,1);
|
||||
//col-vec x mat
|
||||
ret = Aurora::min(D,B);
|
||||
EXPECT_FLOAT_EQ(9, ret.getDataSize());
|
||||
EXPECT_FLOAT_EQ(1, ret.getData()[0]);
|
||||
EXPECT_FLOAT_EQ(2, ret.getData()[1]);
|
||||
EXPECT_FLOAT_EQ(-9, ret.getData()[5]);
|
||||
//mat x col-vec
|
||||
ret = Aurora::min(E,D);
|
||||
EXPECT_FLOAT_EQ(0, ret.getDataSize());
|
||||
|
||||
}
|
||||
|
||||
TEST_F(Function2D_Test, max) {
|
||||
float *dataA = new float[3]{1, 2, 3};
|
||||
float *dataC = new float[3]{1, 2, 4};
|
||||
float *dataB = new float[9]{2, 3, 3, 2, 2, 1, 3, 3, 3};
|
||||
TEST_F(Function2D_Test, max)
|
||||
{
|
||||
float *dataA = new float[3]{1, 2,-9};
|
||||
float *dataB = new float[9]{2, 3, 3, 2, 2, -1, 3, 3, 3};
|
||||
float *dataC = new float[1]{1.5};
|
||||
float *dataE = new float[12]{2, 3, 3, 2, 2, -1, 3, 3, 3,1,-5,7};
|
||||
auto A = Aurora::Matrix::fromRawData(dataA, 3, 1);
|
||||
auto B = Aurora::Matrix::fromRawData(dataB, 3, 3);
|
||||
auto C = Aurora::Matrix::fromRawData(dataC, 3, 1);
|
||||
auto C = Aurora::Matrix::fromRawData(dataC, 1);
|
||||
auto D = Aurora::Matrix::copyFromRawData(dataA, 1, 3);
|
||||
auto E = Aurora::Matrix::copyFromRawData(dataE, 4, 3);
|
||||
|
||||
Aurora::Matrix ret = Aurora::max(B);
|
||||
EXPECT_EQ(1, ret.getDimSize(0));
|
||||
@@ -145,17 +180,51 @@ TEST_F(Function2D_Test, max) {
|
||||
long r,c;
|
||||
ret = Aurora::max(A, Aurora::Column,r,c);
|
||||
EXPECT_FLOAT_EQ(1, ret.getDataSize());
|
||||
EXPECT_FLOAT_EQ(3, ret.getData()[0]);
|
||||
EXPECT_EQ(2, r);
|
||||
EXPECT_FLOAT_EQ(2, ret.getData()[0]);
|
||||
EXPECT_EQ(1, r);
|
||||
EXPECT_EQ(0, c);
|
||||
auto D = Aurora::Matrix::copyFromRawData(dataA, 1, 3);
|
||||
ret = Aurora::max(D);
|
||||
EXPECT_EQ(1, ret.getDimSize(0));
|
||||
EXPECT_EQ(1, ret.getDimSize(1));
|
||||
EXPECT_FLOAT_EQ(3, ret.getData()[0]);
|
||||
|
||||
EXPECT_FLOAT_EQ(2, ret.getData()[0]);
|
||||
ret = Aurora::max(A, C);
|
||||
EXPECT_FLOAT_EQ(4, ret.getData()[2]);
|
||||
EXPECT_FLOAT_EQ(3, ret.getDataSize());
|
||||
EXPECT_FLOAT_EQ(1.5, ret.getData()[0]);
|
||||
EXPECT_FLOAT_EQ(2, ret.getData()[1]);
|
||||
EXPECT_FLOAT_EQ(1.5, ret.getData()[2]);
|
||||
// mat x row-vec
|
||||
ret = Aurora::max(B,D);
|
||||
EXPECT_FLOAT_EQ(9, ret.getDataSize());
|
||||
EXPECT_FLOAT_EQ(2, ret.getData()[0]);
|
||||
EXPECT_FLOAT_EQ(3, ret.getData()[1]);
|
||||
EXPECT_FLOAT_EQ(2, ret.getData()[5]);
|
||||
EXPECT_FLOAT_EQ(3, ret.getData()[8]);
|
||||
|
||||
// row-vec x mat
|
||||
ret = Aurora::max(D,E);
|
||||
EXPECT_FLOAT_EQ(12, ret.getDataSize());
|
||||
EXPECT_FLOAT_EQ(2, ret.getData()[0]);
|
||||
EXPECT_FLOAT_EQ(3, ret.getData()[1]);
|
||||
EXPECT_FLOAT_EQ(2, ret.getData()[5]);
|
||||
EXPECT_FLOAT_EQ(3, ret.getData()[8]);
|
||||
D.forceReshape(3,1,1);
|
||||
ret = Aurora::max(D,B);
|
||||
EXPECT_FLOAT_EQ(9, ret.getDataSize());
|
||||
EXPECT_FLOAT_EQ(2, ret.getData()[0]);
|
||||
EXPECT_FLOAT_EQ(3, ret.getData()[1]);
|
||||
EXPECT_FLOAT_EQ(-1, ret.getData()[5]);
|
||||
EXPECT_FLOAT_EQ(3, ret.getData()[8]);
|
||||
D.forceReshape(3,1,1);
|
||||
//col-vec x mat
|
||||
ret = Aurora::max(D,B);
|
||||
EXPECT_FLOAT_EQ(9, ret.getDataSize());
|
||||
EXPECT_FLOAT_EQ(2, ret.getData()[0]);
|
||||
EXPECT_FLOAT_EQ(3, ret.getData()[1]);
|
||||
EXPECT_FLOAT_EQ(-1, ret.getData()[5]);
|
||||
//mat x col-vec
|
||||
ret = Aurora::max(E,D);
|
||||
EXPECT_FLOAT_EQ(0, ret.getDataSize());
|
||||
|
||||
}
|
||||
|
||||
TEST_F(Function2D_Test, sum) {
|
||||
|
||||
Reference in New Issue
Block a user