Files
Aurora/src/Function1D.cpp
2023-06-09 14:32:17 +08:00

954 lines
34 KiB
C++
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#include "Function1D.h"
#include "Function2D.h"
#include "Function.h"
//必须在Eigen之前
#include "AuroraDefs.h"
#include "Function3D.h"
#include "Matrix.h"
#include <cstddef>
#include <cstdint>
#include <cstring>
#include <cmath>
#include <iostream>
#include <Eigen/Core>
#include <Eigen/Eigen>
#include <Eigen/Dense>
#include <Eigen/SVD>
#include <mkl_lapack.h>
#include <sys/types.h>
#include <utility>
using namespace Aurora;
namespace {
const int COMPLEX_STRIDE = 2;
const int REAL_STRIDE = 1;
const int SAME_STRIDE = 1;
const double VALUE_ONE = 1.0;
const ushort CONVERT_AND_VALUE = 15;
const ushort CONVERT_AND_VALUE_2 = 2047;
const ushort CONVERT_MUL_VALUE = 2048;
uint CONVERT_ADD_VALUE = UINT32_MAX - 4095;
inline void convertValue(double aValue ,double* des){
double value = aValue;
ushort *exponentPtr = (ushort *)&value;
exponentPtr[0] = (exponentPtr[0] >> 11) & CONVERT_AND_VALUE;
exponentPtr[1] = (exponentPtr[1] >> 11) & CONVERT_AND_VALUE;
exponentPtr[2] = (exponentPtr[2] >> 11) & CONVERT_AND_VALUE;
exponentPtr[3] = (exponentPtr[3] >> 11) & CONVERT_AND_VALUE;
double signValue = aValue;
short *signPtr = (short *)&signValue;
uint sign_bit[4] = {
(uint)(signPtr[0] < 0 ? 1 : 0), (uint)(signPtr[1] < 0 ? 1 : 0),
(uint)(signPtr[2] < 0 ? 1 : 0), (uint)(signPtr[3] < 0 ? 1 : 0)};
double fraction3Value = aValue;
ushort *fraction3Ptr = (ushort *)&fraction3Value;
fraction3Ptr[0] &= CONVERT_AND_VALUE_2;
fraction3Ptr[1] &= CONVERT_AND_VALUE_2;
fraction3Ptr[2] &= CONVERT_AND_VALUE_2;
fraction3Ptr[3] &= CONVERT_AND_VALUE_2;
uint hidden_bit[4] = {
sign_bit[0] * (!exponentPtr[0] ? 1 : 0) * CONVERT_MUL_VALUE +
((!sign_bit[0] && exponentPtr[0]) ? 1 : 0) * CONVERT_MUL_VALUE,
sign_bit[1] * (!exponentPtr[1] ? 1 : 0) * 2048 +
((!sign_bit[1] && exponentPtr[1]) ? 1 : 0) * CONVERT_MUL_VALUE,
sign_bit[2] * (!exponentPtr[2] ? 1 : 0) * CONVERT_MUL_VALUE +
((!sign_bit[2] && exponentPtr[2]) ? 1 : 0) * CONVERT_MUL_VALUE,
sign_bit[3] * (!exponentPtr[3] ? 1 : 0) * 2048 +
((!sign_bit[3] && exponentPtr[3]) ? 1 : 0) * CONVERT_MUL_VALUE,
};
int outputPtr[4] = {0};
uint temp = fraction3Ptr[0] + hidden_bit[0] + sign_bit[0] * CONVERT_ADD_VALUE;
outputPtr[0] = exponentPtr[0] > 1 ? (temp << (exponentPtr[0] - 1))
: (temp >> std::abs(exponentPtr[0] - 1));
temp = fraction3Ptr[1] + hidden_bit[1] + sign_bit[1] * CONVERT_ADD_VALUE;
outputPtr[1] = exponentPtr[1] > 1 ? (temp << (exponentPtr[1] - 1))
: (temp >> std::abs(exponentPtr[1] - 1));
temp = fraction3Ptr[2] + hidden_bit[2] + sign_bit[2] * CONVERT_ADD_VALUE;
outputPtr[2] = exponentPtr[2] > 1 ? (temp << (exponentPtr[2] - 1))
: (temp >> std::abs(exponentPtr[2] - 1));
temp = fraction3Ptr[3] + hidden_bit[3] + sign_bit[3] * CONVERT_ADD_VALUE;
outputPtr[3] = exponentPtr[3] > 1 ? (temp << (exponentPtr[3] - 1))
: (temp >> std::abs(exponentPtr[3] - 1));
des[0] = outputPtr[0];
des[1] = outputPtr[1];
des[2] = outputPtr[2];
des[3] = outputPtr[3];
}
inline void convertValue2(short* aValue ,double* des){
ushort exponentPtr[4] = {(ushort)aValue[0],(ushort)aValue[1],(ushort)aValue[2],(ushort)aValue[3]};
exponentPtr[0] = (exponentPtr[0] >> 11) & CONVERT_AND_VALUE;
exponentPtr[1] = (exponentPtr[1] >> 11) & CONVERT_AND_VALUE;
exponentPtr[2] = (exponentPtr[2] >> 11) & CONVERT_AND_VALUE;
exponentPtr[3] = (exponentPtr[3] >> 11) & CONVERT_AND_VALUE;
short signPtr [4] = {aValue[0],aValue[1],aValue[2],aValue[3]};
uint sign_bit[4] = {
(uint)(signPtr[0] < 0 ? 1 : 0), (uint)(signPtr[1] < 0 ? 1 : 0),
(uint)(signPtr[2] < 0 ? 1 : 0), (uint)(signPtr[3] < 0 ? 1 : 0)};
ushort fraction3Ptr[4] = {(ushort)aValue[0],(ushort)aValue[1],(ushort)aValue[2],(ushort)aValue[3]};
fraction3Ptr[0] &= CONVERT_AND_VALUE_2;
fraction3Ptr[1] &= CONVERT_AND_VALUE_2;
fraction3Ptr[2] &= CONVERT_AND_VALUE_2;
fraction3Ptr[3] &= CONVERT_AND_VALUE_2;
uint hidden_bit[4] = {
sign_bit[0] * (!exponentPtr[0] ? 1 : 0) * CONVERT_MUL_VALUE +
((!sign_bit[0] && exponentPtr[0]) ? 1 : 0) * CONVERT_MUL_VALUE,
sign_bit[1] * (!exponentPtr[1] ? 1 : 0) * 2048 +
((!sign_bit[1] && exponentPtr[1]) ? 1 : 0) * CONVERT_MUL_VALUE,
sign_bit[2] * (!exponentPtr[2] ? 1 : 0) * CONVERT_MUL_VALUE +
((!sign_bit[2] && exponentPtr[2]) ? 1 : 0) * CONVERT_MUL_VALUE,
sign_bit[3] * (!exponentPtr[3] ? 1 : 0) * 2048 +
((!sign_bit[3] && exponentPtr[3]) ? 1 : 0) * CONVERT_MUL_VALUE,
};
int outputPtr[4] = {0};
uint temp = fraction3Ptr[0] + hidden_bit[0] + sign_bit[0] * CONVERT_ADD_VALUE;
outputPtr[0] = exponentPtr[0] > 1 ? (temp << (exponentPtr[0] - 1)): temp;
temp = fraction3Ptr[1] + hidden_bit[1] + sign_bit[1] * CONVERT_ADD_VALUE;
outputPtr[1] = exponentPtr[1] > 1 ? (temp << (exponentPtr[1] - 1)): temp;
temp = fraction3Ptr[2] + hidden_bit[2] + sign_bit[2] * CONVERT_ADD_VALUE;
outputPtr[2] = exponentPtr[2] > 1 ? (temp << (exponentPtr[2] - 1)): temp;
temp = fraction3Ptr[3] + hidden_bit[3] + sign_bit[3] * CONVERT_ADD_VALUE;
outputPtr[3] = exponentPtr[3] > 1 ? (temp << (exponentPtr[3] - 1)): temp;
des[0] = outputPtr[0];
des[1] = outputPtr[1];
des[2] = outputPtr[2];
des[3] = outputPtr[3];
}
}
Aurora::Matrix Aurora::complex(const Aurora::Matrix &matrix) {
if (matrix.getValueType() == Complex) {
std::cerr<<"complex not support complex value type"<<std::endl;
return matrix;
}
auto output = malloc(matrix.getDataSize() ,true);
memset(output, 0, (matrix.getDataSize() * sizeof(std::complex<double>)));
cblas_dcopy(matrix.getDataSize(), matrix.getData(), REAL_STRIDE, (double *) output, COMPLEX_STRIDE);
return Aurora::Matrix::New((double *) output, matrix.getDimSize(0), matrix.getDimSize(1), matrix.getDimSize(2),
Complex);
}
Aurora::Matrix Aurora::real(const Aurora::Matrix &matrix) {
if (matrix.getValueType() == Normal) {
std::cerr<<"real only support complex value type"<<std::endl;
return matrix;
}
auto output = (double *) malloc(matrix.getDataSize());
memset(output, 0, (matrix.getDataSize() * sizeof(double)));
cblas_dcopy(matrix.getDataSize(), matrix.getData(),COMPLEX_STRIDE , (double *) output, REAL_STRIDE);
return Aurora::Matrix::New((double *) output, matrix.getDimSize(0), matrix.getDimSize(1), matrix.getDimSize(2));
}
Aurora::Matrix Aurora::imag(const Aurora::Matrix &matrix) {
if (matrix.getValueType() == Normal) {
std::cerr<<"imag only support complex value type"<<std::endl;
return matrix;
}
auto output = malloc(matrix.getDataSize());
memset(output, 0, (matrix.getDataSize() * sizeof(double)));
cblas_dcopy(matrix.getDataSize(), matrix.getData()+1,COMPLEX_STRIDE , (double *) output, REAL_STRIDE);
return Aurora::Matrix::New((double *) output, matrix.getDimSize(0), matrix.getDimSize(1), matrix.getDimSize(2));
}
Aurora::Matrix Aurora::ceil(const Aurora::Matrix &matrix) {
auto output = malloc(matrix.getDataSize());
//for real part
vdCeilI(matrix.getDataSize(), matrix.getData(), SAME_STRIDE, output, SAME_STRIDE);
if (matrix.getValueType() == Complex) {
//for imag part
vdCeilI(matrix.getDataSize(), matrix.getData() + 1, SAME_STRIDE, output + 1, SAME_STRIDE);
}
return Aurora::Matrix::New(output, matrix);
}
Aurora::Matrix Aurora::ceil(const Aurora::Matrix &&matrix) {
//for real part
vdCeilI(matrix.getDataSize(), matrix.getData(), SAME_STRIDE, matrix.getData(), SAME_STRIDE);
if (matrix.getValueType() == Complex) {
//for imag part
vdCeilI(matrix.getDataSize(), matrix.getData() + 1, SAME_STRIDE, matrix.getData() + 1, SAME_STRIDE);
}
return matrix;
}
Aurora::Matrix Aurora::round(const Aurora::Matrix &matrix) {
auto output = malloc(matrix.getDataSize());
//for real part
vdRoundI(matrix.getDataSize(), matrix.getData(), SAME_STRIDE, output, SAME_STRIDE);
if (matrix.getValueType() == Complex) {
//for imag part
vdRoundI(matrix.getDataSize(), matrix.getData() + 1, SAME_STRIDE, output + 1, SAME_STRIDE);
}
return Aurora::Matrix::New(output, matrix);
}
Aurora::Matrix Aurora::round(const Aurora::Matrix &&matrix) {
//for real part
vdRoundI(matrix.getDataSize(), matrix.getData(), SAME_STRIDE, matrix.getData(), SAME_STRIDE);
if (matrix.getValueType() == Complex) {
//for imag part
vdRoundI(matrix.getDataSize(), matrix.getData() + 1, SAME_STRIDE, matrix.getData() + 1, SAME_STRIDE);
}
return matrix;
}
Aurora::Matrix Aurora::floor(const Aurora::Matrix &matrix) {
auto output = malloc(matrix.getDataSize());
//for real part
vdFloorI(matrix.getDataSize(), matrix.getData(), SAME_STRIDE, output, SAME_STRIDE);
if (matrix.getValueType() == Complex) {
//for imag part
vdFloorI(matrix.getDataSize(), matrix.getData() + 1, SAME_STRIDE, output + 1, SAME_STRIDE);
}
return Aurora::Matrix::New(output, matrix);
}
Aurora::Matrix Aurora::floor(const Aurora::Matrix &&matrix) {
//for real part
vdFloorI(matrix.getDataSize(), matrix.getData(), SAME_STRIDE, matrix.getData(), SAME_STRIDE);
if (matrix.getValueType() == Complex) {
//for imag part
vdFloorI(matrix.getDataSize(), matrix.getData() + 1, SAME_STRIDE, matrix.getData() + 1, SAME_STRIDE);
}
return matrix;
}
Matrix Aurora::auroraNot(const Matrix& aMatrix){
return auroraNot(std::forward<Matrix&&>(aMatrix.deepCopy()));
}
Matrix Aurora::auroraNot(Matrix&& aMatrix){
Eigen::Map<Eigen::VectorXd> v2(aMatrix.getData(), aMatrix.getDataSize());
v2 = (v2.array()>0).select(1,v2);
v2 = (v2.array()<0).select(0,v2);
v2 = v2.array()+1.0;
v2 = (v2.array() == 2.0).select(0.0, v2);
return aMatrix;
}
Aurora::Matrix Aurora::sqrt(const Aurora::Matrix& matrix) {
if (matrix.getValueType() != Complex) {
auto output = malloc(matrix.getDataSize());
vdSqrtI(matrix.getDataSize(), matrix.getData(), SAME_STRIDE, output, SAME_STRIDE);
return Aurora::Matrix::New(output, matrix);
}
std::cerr<<"sqrt not support complex"<<std::endl;
return Aurora::Matrix();
}
Aurora::Matrix Aurora::sqrt(Aurora::Matrix&& matrix) {
if (matrix.getValueType() != Complex) {
vdSqrtI(matrix.getDataSize(), matrix.getData(), SAME_STRIDE, matrix.getData(), SAME_STRIDE);
return matrix;
}
std::cerr<<"sqrt not support complex"<<std::endl;
return Aurora::Matrix();
}
Aurora::Matrix Aurora::abs(const Aurora::Matrix &matrix) {
auto output = malloc(matrix.getDataSize());
if (matrix.getValueType()==Normal){
vdAbsI(matrix.getDataSize(), matrix.getData(), SAME_STRIDE, output, SAME_STRIDE);
}
else{
vzAbsI(matrix.getDataSize(), (std::complex<double> *)matrix.getData(), SAME_STRIDE,output, SAME_STRIDE);
}
return Aurora::Matrix::New(output, matrix.getDimSize(0), matrix.getDimSize(1), matrix.getDimSize(2));
}
Aurora::Matrix Aurora::abs(Aurora::Matrix&& matrix) {
if (matrix.getValueType()==Normal){
vdAbsI(matrix.getDataSize(), matrix.getData(), SAME_STRIDE, matrix.getData(), SAME_STRIDE);
return matrix;
}
//TODO考虑尝试是不是使用realloc缩短已分配的内存的方式重用matrix
else{
auto output = malloc(matrix.getDataSize());
vzAbsI(matrix.getDataSize(), (std::complex<double> *)matrix.getData(), SAME_STRIDE,output, SAME_STRIDE);
return Aurora::Matrix::New(output, matrix.getDimSize(0), matrix.getDimSize(1), matrix.getDimSize(2));
}
}
Aurora::Matrix Aurora::sign(const Aurora::Matrix &matrix) {
if (matrix.getValueType()==Normal){
auto ret = matrix.deepCopy();
Eigen::Map<Eigen::VectorXd> retV(ret.getData(),ret.getDataSize());
retV = retV.array().sign();
return ret;
}
else{
//sign(x) = x./abs(x),前提是 x 为复数。
auto output = malloc(matrix.getDataSize(),true);
Matrix absMatrix = abs(matrix);
vdDivI(matrix.getDataSize(), matrix.getData(),COMPLEX_STRIDE,
absMatrix.getData(), REAL_STRIDE,output,COMPLEX_STRIDE);
vdDivI(matrix.getDataSize(), matrix.getData()+1,COMPLEX_STRIDE,
absMatrix.getData(), REAL_STRIDE,output+1,COMPLEX_STRIDE);
return Aurora::Matrix::New(output, matrix);
}
}
Aurora::Matrix Aurora::sign(Aurora::Matrix&& matrix) {
if (matrix.getValueType()==Normal){
Eigen::Map<Eigen::VectorXd> retV(matrix.getData(),matrix.getDataSize());
retV = retV.array().sign();
return matrix;
}
else{
//sign(x) = x./abs(x),前提是 x 为复数。
Matrix absMatrix = abs(matrix);
vdDivI(matrix.getDataSize(), matrix.getData(),COMPLEX_STRIDE,
absMatrix.getData(), REAL_STRIDE,matrix.getData(),COMPLEX_STRIDE);
vdDivI(matrix.getDataSize(), matrix.getData()+1,COMPLEX_STRIDE,
absMatrix.getData(), REAL_STRIDE,matrix.getData()+1,COMPLEX_STRIDE);
return matrix;
}
}
Matrix Aurora::interp1(const Matrix& aX, const Matrix& aV, const Matrix& aX1, InterpnMethod aMethod)
{
const int nx = aX.getDimSize(0);
const int ny = 1;
int nx1 = aX1.getDimSize(0);
std::shared_ptr<double> resultData = std::shared_ptr<double>(Aurora::malloc(nx1), Aurora::free);
std::vector<int> resultInfo = {nx1};
Aurora::Matrix result(resultData, resultInfo);
DFTaskPtr task ;
int status = dfdNewTask1D(&task, nx, aX.getData(), DF_NO_HINT, ny, aV.getData(), DF_NO_HINT);
if (status != DF_STATUS_OK)
{
return Matrix();
}
MKL_INT sorder;
MKL_INT stype;
if (aMethod == InterpnMethod::Spline)
{
sorder = DF_PP_CUBIC;
stype = DF_PP_BESSEL;
}
else
{
sorder = DF_PP_LINEAR;
stype = DF_PP_BESSEL;
}
double* scoeffs = Aurora::malloc(ny * (nx-1) * sorder);
status = dfdEditPPSpline1D(task, sorder,DF_PP_NATURAL , DF_BC_NOT_A_KNOT, 0, DF_NO_IC, 0, scoeffs, DF_NO_HINT);
if (status != DF_STATUS_OK)
{
return Matrix();
}
status = dfdConstruct1D( task, DF_PP_SPLINE, DF_METHOD_STD );
if (status != DF_STATUS_OK)
{
return Matrix();
}
int dorder = 1;
status = dfdInterpolate1D(task, DF_INTERP, DF_METHOD_PP, nx1, aX1.getData(), DF_NO_HINT, 1, &dorder,
DF_NO_APRIORI_INFO, resultData.get(), DF_MATRIX_STORAGE_ROWS, nullptr);
status = dfDeleteTask(&task);
Aurora::free(scoeffs);
return result;
}
Matrix Aurora::repmat(const Matrix& aMatrix,int aRowTimes, int aColumnTimes)
{
if(aRowTimes < 1 || aColumnTimes < 1 || aMatrix.getDims() > 2 || aMatrix.isNull())
{
return Matrix();
}
int complexStep = aMatrix.getValueType();
int originalDataSize = aMatrix.getDataSize() * complexStep;
double* resultData = Aurora::malloc(originalDataSize * aRowTimes * aColumnTimes);
int row = aMatrix.getDimSize(0);
int column = aMatrix.getDimSize(1);
double* originalData = aMatrix.getData();
double* resultDataTemp = resultData;
size_t step = row*complexStep;
#pragma omp parallel for
for(int i=0; i<column; ++i)
{
double* origninalStart = originalData + i * step;
for(int j=0; j<aRowTimes; ++j)
{
double* copyStart = resultData + step * (i*aRowTimes + j);
cblas_dcopy(step, origninalStart, 1, copyStart, 1);
//resultDataTemp += row*complexStep;
}
//originalData += step;
}
resultDataTemp = resultData;
step = originalDataSize * aRowTimes;
#pragma omp parallel for
for(int i=1; i<aColumnTimes; ++i)
{
cblas_dcopy(step, resultData, 1, resultData + i*step, 1);
}
std::vector<int> resultInfo;
resultInfo.push_back(aRowTimes * row);
column = aColumnTimes*column;
if (column > 1)
{
resultInfo.push_back(column);
}
return Matrix(std::shared_ptr<double>(resultData, Aurora::free),resultInfo, aMatrix.getValueType());
}
Matrix Aurora::repmat(const Matrix& aMatrix,int aRowTimes, int aColumnTimes, int aSliceTimes)
{
if(aRowTimes < 1 || aColumnTimes < 1 || aSliceTimes < 1 || aMatrix.getDims() > 2 || aMatrix.isNull())
{
return Matrix();
}
int complexStep = aMatrix.getValueType();
Matrix resultTemp = Aurora::repmat(aMatrix, aRowTimes, aColumnTimes);
int resultTempDataSize = resultTemp.getDataSize() * complexStep;
double* resultData = Aurora::malloc(resultTempDataSize * aSliceTimes);
std::copy(resultTemp.getData(), resultTemp.getData() + resultTempDataSize, resultData);
for(int i=1; i<aSliceTimes; ++i)
{
cblas_dcopy(resultTempDataSize, resultData, 1, resultData + i*resultTempDataSize, 1);
}
std::vector<int> resultInfo;
int row = resultTemp.getDimSize(0);
int column = resultTemp.getDimSize(1);
resultInfo.push_back(row);
if (column > 1 || aSliceTimes > 1)
{
resultInfo.push_back(column);
}
if (aSliceTimes > 1)
{
resultInfo.push_back(aSliceTimes);
}
return Matrix(std::shared_ptr<double>(resultData, Aurora::free), resultInfo, aMatrix.getValueType());
}
Matrix Aurora::repmat3d(const Matrix& aMatrix,int aRowTimes, int aColumnTimes, int aSliceTimes)
{
if(aRowTimes < 1 || aColumnTimes < 1 || aMatrix.getDims() < 3 || aMatrix.isNull())
{
return Matrix();
}
double* start = aMatrix.getData();
int rows = aMatrix.getDimSize(0);
int columns = aMatrix.getDimSize(1);
int slices = aMatrix.getDimSize(2);
double* extended2DimsData = Aurora::malloc(rows * columns * aRowTimes * aColumnTimes * slices);
Matrix extended2DimsMatrix = Matrix::New(extended2DimsData, aRowTimes*rows, aColumnTimes*columns, slices);
for(int i=0; i<aMatrix.getDimSize(2); ++i)
{
Matrix dim2Matrix = Matrix::copyFromRawData(start, rows, columns);
Matrix extendedTemp = repmat(dim2Matrix, aRowTimes, aColumnTimes);
cblas_dcopy(extendedTemp.getDataSize(), extendedTemp.getData(), 1, extended2DimsData, 1);
extended2DimsData += extendedTemp.getDataSize();
start += columns * rows;
}
double* extended3DimsData = Aurora::malloc(rows * columns * aRowTimes * aColumnTimes * aSliceTimes * slices);
Matrix result = Matrix::New(extended3DimsData, aRowTimes*rows, aColumnTimes*columns, slices * aSliceTimes);
for(int i=0;i<aSliceTimes;++i)
{
cblas_dcopy(extended2DimsMatrix.getDataSize(), extended2DimsMatrix.getData(), 1, extended3DimsData, 1);
extended3DimsData+=extended2DimsMatrix.getDataSize();
}
return result;
}
Matrix Aurora::polyval(const Matrix &aP, const Matrix &aX) {
auto result = malloc(aX.getDataSize());
auto powArg = new double[aP.getDataSize()];
for (int j = aP.getDataSize(), i = 0; j > 0; --j, ++i) {
powArg[i] = (double) (j - 1);
}
auto temp = new double[aP.getDataSize()];
for (int i = 0; i < aX.getDataSize(); ++i) {
vdPowI(aP.getDataSize(), aX.getData() + i, 0, powArg, 1, temp, 1);
vdMul(aP.getDataSize(), aP.getData(), temp, temp);
Eigen::Map<Eigen::VectorXd> vd(temp,aP.getDataSize());
result[i] = vd.array().sum();
}
delete[] powArg;
delete[] temp;
return Matrix::New(result,aX);
}
Matrix Aurora::log(const Matrix& aMatrix, int aBaseNum)
{
size_t size = aMatrix.getDataSize();
double* data = Aurora::malloc(size);
vdLn(size, aMatrix.getData(), data);
if(aBaseNum != -1)
{
double baseNum = aBaseNum;
double temp;
vdLn(1, &baseNum, &temp);
for (size_t i = 0; i < size; i++)
{
data[i] /= temp;
}
}
return Matrix::New(data, aMatrix);
}
Matrix Aurora::exp(const Matrix& aMatrix)
{
size_t size = aMatrix.getDataSize();
double* data;
if (aMatrix.isComplex())
{
data = Aurora::malloc(size, true);
vzExp(size, (MKL_Complex16*)aMatrix.getData(), (MKL_Complex16*)data);
}
else
{
data = Aurora::malloc(size);
vdExp(size, aMatrix.getData(), data);
}
return Matrix::New(data, aMatrix);
}
Matrix Aurora::mod(const Matrix& aMatrix, double aValue)
{
if(aMatrix.isComplex() || aMatrix.isNull())
{
return Matrix();
}
size_t size = aMatrix.getDataSize();
double* matrixData = aMatrix.getData();
double* resultData = Aurora::malloc(size);
for(size_t i=0; i<size; ++i)
{
resultData[i] = fmod(matrixData[i], aValue);
}
return Matrix::New(resultData, aMatrix);
}
Matrix Aurora::acos(const Matrix& aMatrix)
{
if(aMatrix.isComplex() || aMatrix.isNull())
{
return Matrix();
}
size_t size = aMatrix.getDataSize();
double* matrixData = aMatrix.getData();
double* resultData = Aurora::malloc(size);
vdAcos(size, matrixData, resultData);
return Matrix::New(resultData, aMatrix);
}
Matrix Aurora::acosd(const Matrix& aMatrix)
{
if(aMatrix.isComplex() || aMatrix.isNull())
{
return Matrix();
}
size_t size = aMatrix.getDataSize();
double* matrixData = aMatrix.getData();
double* resultData = Aurora::malloc(size);
vdAcos(size, matrixData, resultData);
for(size_t i=0; i<size; ++i)
{
resultData[i] = resultData[i] * 180 / PI;
}
return Matrix::New(resultData, aMatrix);
}
Matrix Aurora::conj(const Matrix& aMatrix)
{
if(!aMatrix.isComplex())
{
return Matrix::copyFromRawData(aMatrix.getData(),aMatrix.getDimSize(0),aMatrix.getDimSize(1),aMatrix.getDimSize(2));
}
size_t size = aMatrix.getDataSize();
double* data = malloc(size,true);
vzConj(size,(MKL_Complex16*)aMatrix.getData(), (MKL_Complex16*)data);
return Matrix::New(data, aMatrix);
}
double Aurora::norm(const Matrix& aMatrix, NormMethod aNormMethod)
{
if(aMatrix.isNull())
{
return NAN;
}
size_t size = aMatrix.getDataSize();
if(aMatrix.isComplex())
{
size*=2;
}
int column = aMatrix.getDimSize(1);
int row = aMatrix.getDimSize(0);
if (aNormMethod == NormMethod::Norm1)
{
double value = 0;
for(int i=0; i<column; ++i)
{
double temp = Aurora::sum(abs(aMatrix($,i,$).toMatrix())).getData()[0];
if(temp > value)
{
value = temp;
}
}
return value;
}
else if(aNormMethod == NormMethod::NormF)
{
return cblas_dnrm2(size, aMatrix.getData(), 1);
}
else if(aNormMethod == NormMethod::Norm2)
{
//columns > 1
if(aMatrix.getDimSize(1) > 1)
{
if(aMatrix.isComplex())
{
Eigen::Map<Eigen::MatrixXcd> eMatrix((MKL_Complex16*)aMatrix.getData(), row, column);
Eigen::JacobiSVD<Eigen::MatrixXcd> svd(eMatrix, Eigen::ComputeThinU | Eigen::ComputeThinV);
return svd.singularValues()(0);
}
else
{
Eigen::Map<Eigen::MatrixXd> eMatrix(aMatrix.getData(), row, column);
Eigen::JacobiSVD<Eigen::MatrixXd> svd(eMatrix, Eigen::ComputeThinU | Eigen::ComputeThinV);
return svd.singularValues()(0);
}
}
else
{
return cblas_dnrm2(size, aMatrix.getData(), 1);
}
}
return 0;
}
Matrix Aurora::transpose(const Matrix& aMatrix)
{
//not surpport for 3 dims.
if(aMatrix.isNull() || aMatrix.getDimSize(2) > 1)
{
return Matrix();
}
size_t size = aMatrix.getDataSize();
int row = aMatrix.getDimSize(0);
int col = aMatrix.getDimSize(1);
double* resultData;
double* data = aMatrix.getData();
if(aMatrix.isComplex())
{
resultData = Aurora::malloc(size, true);
mkl_zomatcopy('C', 'T',row ,col, 1.0, (MKL_Complex16*)data, row, (MKL_Complex16*)resultData, col);
}
else
{
resultData = Aurora::malloc(size);
mkl_domatcopy('C', 'T',row ,col, 1.0, data, row, resultData, col);
}
return Matrix::New(resultData,col,row,1,aMatrix.getValueType());
}
Matrix Aurora::horzcat(const Matrix& aMatrix1, const Matrix& aMatrix2)
{
if(aMatrix1.isNull() || aMatrix2.isNull() || aMatrix1.getDims() !=2 || aMatrix2.getDims() !=2 ||
aMatrix1.getDimSize(0) != aMatrix2.getDimSize(0) || aMatrix1.getValueType() != aMatrix2.getValueType())
{
return Matrix();
}
int column1 = aMatrix1.getDimSize(1);
int column2 = aMatrix2.getDimSize(1);
int row = aMatrix1.getDimSize(0);
size_t size1= aMatrix1.getDataSize();
size_t size2= aMatrix2.getDataSize();
double* resultData = Aurora::malloc(size1 + size2,aMatrix1.getValueType());
cblas_dcopy(size1, aMatrix1.getData(), 1, resultData, 1);
cblas_dcopy(size2, aMatrix2.getData(), 1, resultData + size1, 1);
return Matrix::New(resultData, row, column1+column2, 1, aMatrix1.getValueType());
}
Matrix Aurora::vecnorm(const Matrix& aMatrix, NormMethod aNormMethod, int aDim)
{
//only surpport aDim = 1 for now.
if(aDim != 1 || aNormMethod == NormMethod::NormF || aMatrix.isNull())
{
return Matrix();
}
int column = aMatrix.getDimSize(1);
double* resultData = Aurora::malloc(column);
for(int i=0; i<column; ++i)
{
resultData[i] = norm(aMatrix($,i,$).toMatrix(), aNormMethod);
}
return Matrix::New(resultData,column);
}
Matrix Aurora::linspace(double aStart, double aEnd, int aNum)
{
double step = (aEnd - aStart) / (aNum - 1);
double* resultData = Aurora::malloc(aNum);
for (int i = 0; i < aNum; i++)
{
resultData[i] = aStart + step * i;
}
return Matrix::New(resultData,aNum);
}
Matrix Aurora::auroraUnion(const Matrix& aMatrix1, const Matrix& aMatrix2)
{
if(aMatrix1.isNull() || aMatrix2.isNull() || aMatrix1.isComplex() || aMatrix2.isComplex())
{
return Matrix();
}
size_t size1= aMatrix1.getDataSize();
size_t size2= aMatrix2.getDataSize();
double* resultData = Aurora::malloc(size1 + size2);
cblas_dcopy(size1, aMatrix1.getData(), 1, resultData, 1);
cblas_dcopy(size2, aMatrix2.getData(), 1, resultData + size1, 1);
std::vector<double> vector(resultData, resultData + size1 + size2);
Aurora::free(resultData);
std::sort(vector.begin(), vector.end());
auto last = std::unique(vector.begin(), vector.end());
vector.erase(last, vector.end());
return Matrix::copyFromRawData(vector.data(),vector.size());
}
Matrix Aurora::intersect(const Matrix& aMatrix1, const Matrix& aMatrix2)
{
if(aMatrix1.isNull() || aMatrix2.isNull() || aMatrix1.isComplex() || aMatrix2.isComplex())
{
return Matrix();
}
size_t size1= aMatrix1.getDataSize();
size_t size2= aMatrix2.getDataSize();
std::vector<double> vector1(aMatrix1.getData(), aMatrix1.getData() + size1);
std::vector<double> vector2(aMatrix2.getData(), aMatrix2.getData() + size2);
std::sort(vector1.begin(), vector1.end());
std::sort(vector2.begin(), vector2.end());
std::vector<double> intersection;
std::set_intersection(vector1.begin(), vector1.end(),
vector2.begin(), vector2.end(),
std::back_inserter(intersection));
return Matrix::copyFromRawData(intersection.data(), intersection.size());
}
Matrix Aurora::intersect(const Matrix& aMatrix1, const Matrix& aMatrix2, Matrix& aIa)
{
if(aMatrix1.isNull() || aMatrix2.isNull() || aMatrix1.isComplex() || aMatrix2.isComplex())
{
return Matrix();
}
Matrix result = intersect(aMatrix1,aMatrix2);
size_t size = result.getDataSize();
double* iaResult = Aurora::malloc(size);
for(size_t i=0; i<size; ++i)
{
for(size_t j=0; j<aMatrix1.getDataSize(); ++j)
{
if(aMatrix1.getData()[j] == result.getData()[i])
{
iaResult[i] = j + 1;
break;
}
}
}
aIa = Matrix::New(iaResult,size);
return result;
}
Matrix Aurora::xcorr(const Matrix& aMatrix1, const Matrix& aMatrix2)
{
//not support for complex
if (aMatrix1.isNull() || aMatrix2.isNull() || aMatrix1.getDataSize() != aMatrix2.getDataSize())
{
return Matrix();
}
size_t matrixSize = aMatrix1.getDataSize();
size_t resultSize = 2 * matrixSize -1;
double* resultData = Aurora::malloc(resultSize);
for(int i=0;i<matrixSize;++i)
{
double data = 0;
for(int j=0;j<i+1;++j)
{
data+= aMatrix1[j] * aMatrix2[matrixSize-i-1+j];
}
resultData[i] = data;
}
for(int i=0;i<matrixSize-1;++i)
{
double result = 0;
for(int j=0;j<i+1;++j)
{
result+= aMatrix1[matrixSize-i-1+j]*aMatrix2[j];
}
resultData[resultSize - 1 - i] = result;
}
return Matrix::New(resultData,resultSize);
}
Matrix Aurora::deleteColumn(const Matrix& aMatrix, int aColumnIndex)
{
int rows = aMatrix.getDimSize(0);
int columns = aMatrix.getDimSize(1);
if (aColumnIndex < 0 || aColumnIndex >= columns)
{
return aMatrix;
}
double* resultData = Aurora::malloc(rows* (columns-1));
if(aColumnIndex == 0)
{
cblas_dcopy(rows* (columns-1), aMatrix.getData() + rows, 1, resultData, 1);
}
else if(aColumnIndex == (columns - 1))
{
cblas_dcopy(rows* (columns-1), aMatrix.getData(), 1, resultData, 1);
}
else
{
cblas_dcopy(rows * aColumnIndex, aMatrix.getData(), 1, resultData, 1);
cblas_dcopy(rows * (columns - aColumnIndex - 1), aMatrix.getData() + rows * (aColumnIndex + 1), 1, resultData + rows * aColumnIndex, 1);
}
return Matrix::New(resultData, rows, columns-1);
}
Matrix Aurora::reshape(const Matrix& aMatrix, int aRows, int aColumns, int aSlices)
{
if(aMatrix.isNull() || (aMatrix.getDataSize() != aRows * aColumns * aSlices))
{
return Matrix();
}
return Matrix::copyFromRawData(aMatrix.getData(),aRows,aColumns,aSlices);
}
void Aurora::nantoval(Matrix& aMatrix, double val2) {
Eigen::Map<Eigen::VectorXd> srcV(aMatrix.getData(),aMatrix.getDataSize());
srcV = srcV.array().isNaN().select(val2,srcV);
}
Matrix Aurora::isnan(const Matrix& aMatrix){
Eigen::Map<Eigen::VectorXd> srcV(aMatrix.getData(),aMatrix.getDataSize());
auto result = zeros(aMatrix.getDimSize(0),aMatrix.getDimSize(1),aMatrix.getDimSize(2));
Eigen::Map<Eigen::VectorXd> resultV(result.getData(),result.getDataSize());
resultV = srcV.array().isNaN().select(1.0,resultV);
return result;
}
Matrix Aurora::isfinite(const Matrix& aMatrix){
Eigen::Map<Eigen::VectorXd> srcV(aMatrix.getData(),aMatrix.getDataSize());
auto result = zeros(aMatrix.getDimSize(0),aMatrix.getDimSize(1),aMatrix.getDimSize(2));
Eigen::Map<Eigen::VectorXd> resultV(result.getData(),result.getDataSize());
resultV = srcV.array().isFinite().select(1.0,resultV);
return result;
}
void Aurora::padding(Matrix &aMatrix, int aIndex, double aValue)
{
if(aMatrix.isNull() || !aMatrix.isVector())
{
std::cerr<<"padding only support vector"<<std::endl;
return;
}
if (aMatrix.getDataSize()>aIndex){
aMatrix.getData()[aIndex] = aValue;
return;
}
int size = (aIndex+1);
double* newData = malloc(size,aMatrix.isComplex());
cblas_dcopy(aMatrix.getDataSize()*aMatrix.getValueType(),
aMatrix.getData(),1,newData,1);
cblas_dcopy((size-aMatrix.getDataSize())*aMatrix.getValueType(),
&aValue,0,newData+aMatrix.getDataSize()*aMatrix.getValueType(),1);
aMatrix = Matrix::New(newData,size,1,1,aMatrix.getValueType());
}
void Aurora::compareSet(Matrix& aMatrix,double compareValue, double newValue,CompareOp op){
Eigen::Map<Eigen::VectorXd> v(aMatrix.getData(),aMatrix.getDataSize());
switch (op) {
case EQ:
v = (v.array() == compareValue).select(newValue, v);
break;
case GT:
v = (v.array() > compareValue).select(newValue, v);
break;
case LT:
v = (v.array() < compareValue).select(newValue, v);
break;
case NG:
v = (v.array() <= compareValue).select(newValue, v);
break;
case NL:
v = (v.array() >= compareValue).select(newValue, v);
break;
case NE:
v = (v.array() != compareValue).select(newValue, v);
break;
}
}
Matrix Aurora::convertfp16tofloat(short* aData, int aRows, int aColumns)
{
auto input = aData;
size_t size = aRows*aColumns;
size_t quaterSize = size/4;
//uint16变换为float(32位)输出大小翻倍
auto output = malloc(size);
#pragma omp parallel for
for (size_t i = 0; i < quaterSize; i+=8) {
//循环展开以避免过度的线程调用
if (i < quaterSize)::convertValue2((short*)(input+i*4), output + (i) * 4);
if (i+1 < quaterSize)::convertValue2((short*)(input+(i+1)*4), output + (i+1) * 4);
if (i+2 < quaterSize)::convertValue2((short*)(input+(i+2)*4), output + (i+2) * 4);
if (i+3 < quaterSize)::convertValue2((short*)(input+(i+3)*4), output + (i+3) * 4);
if (i+4 < quaterSize)::convertValue2((short*)(input+(i+4)*4), output + (i+4) * 4);
if (i+5 < quaterSize)::convertValue2((short*)(input+(i+5)*4), output + (i+5) * 4);
if (i+6 < quaterSize)::convertValue2((short*)(input+(i+6)*4), output + (i+6) * 4);
if (i+7 < quaterSize)::convertValue2((short*)(input+(i+7)*4), output + (i+7) * 4);
}
return Matrix::New(output,aRows,aColumns,1);
}