Files
Aurora/src/Function1D.cpp
2023-10-08 15:58:43 +08:00

1156 lines
40 KiB
C++
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#include "Function1D.h"
#include "Function2D.h"
#include "Function.h"
//必须在Eigen之前
#include "AuroraDefs.h"
#include "Function3D.h"
#include "Matrix.h"
#include <cstddef>
#include <cstdint>
#include <cstring>
#include <cmath>
#include <iostream>
#include <Eigen/Core>
#include <Eigen/Eigen>
#include <Eigen/Dense>
#include <Eigen/SVD>
#include <mkl_cblas.h>
#include <mkl_lapack.h>
#include <sys/types.h>
#include <utility>
using namespace Aurora;
namespace {
const int COMPLEX_STRIDE = 2;
const int REAL_STRIDE = 1;
const int SAME_STRIDE = 1;
const float VALUE_ONE = 1.0;
const ushort CONVERT_AND_VALUE = 15;
const ushort CONVERT_AND_VALUE_2 = 2047;
const ushort CONVERT_MUL_VALUE = 2048;
uint CONVERT_ADD_VALUE = UINT32_MAX - 4095;
inline void convertValue(float aValue ,float* des){
float value = aValue;
ushort *exponentPtr = (ushort *)&value;
exponentPtr[0] = (exponentPtr[0] >> 11) & CONVERT_AND_VALUE;
exponentPtr[1] = (exponentPtr[1] >> 11) & CONVERT_AND_VALUE;
exponentPtr[2] = (exponentPtr[2] >> 11) & CONVERT_AND_VALUE;
exponentPtr[3] = (exponentPtr[3] >> 11) & CONVERT_AND_VALUE;
float signValue = aValue;
short *signPtr = (short *)&signValue;
uint sign_bit[4] = {
(uint)(signPtr[0] < 0 ? 1 : 0), (uint)(signPtr[1] < 0 ? 1 : 0),
(uint)(signPtr[2] < 0 ? 1 : 0), (uint)(signPtr[3] < 0 ? 1 : 0)};
float fraction3Value = aValue;
ushort *fraction3Ptr = (ushort *)&fraction3Value;
fraction3Ptr[0] &= CONVERT_AND_VALUE_2;
fraction3Ptr[1] &= CONVERT_AND_VALUE_2;
fraction3Ptr[2] &= CONVERT_AND_VALUE_2;
fraction3Ptr[3] &= CONVERT_AND_VALUE_2;
uint hidden_bit[4] = {
sign_bit[0] * (!exponentPtr[0] ? 1 : 0) * CONVERT_MUL_VALUE +
((!sign_bit[0] && exponentPtr[0]) ? 1 : 0) * CONVERT_MUL_VALUE,
sign_bit[1] * (!exponentPtr[1] ? 1 : 0) * 2048 +
((!sign_bit[1] && exponentPtr[1]) ? 1 : 0) * CONVERT_MUL_VALUE,
sign_bit[2] * (!exponentPtr[2] ? 1 : 0) * CONVERT_MUL_VALUE +
((!sign_bit[2] && exponentPtr[2]) ? 1 : 0) * CONVERT_MUL_VALUE,
sign_bit[3] * (!exponentPtr[3] ? 1 : 0) * 2048 +
((!sign_bit[3] && exponentPtr[3]) ? 1 : 0) * CONVERT_MUL_VALUE,
};
int outputPtr[4] = {0};
uint temp = fraction3Ptr[0] + hidden_bit[0] + sign_bit[0] * CONVERT_ADD_VALUE;
outputPtr[0] = exponentPtr[0] > 1 ? (temp << (exponentPtr[0] - 1))
: (temp >> std::abs(exponentPtr[0] - 1));
temp = fraction3Ptr[1] + hidden_bit[1] + sign_bit[1] * CONVERT_ADD_VALUE;
outputPtr[1] = exponentPtr[1] > 1 ? (temp << (exponentPtr[1] - 1))
: (temp >> std::abs(exponentPtr[1] - 1));
temp = fraction3Ptr[2] + hidden_bit[2] + sign_bit[2] * CONVERT_ADD_VALUE;
outputPtr[2] = exponentPtr[2] > 1 ? (temp << (exponentPtr[2] - 1))
: (temp >> std::abs(exponentPtr[2] - 1));
temp = fraction3Ptr[3] + hidden_bit[3] + sign_bit[3] * CONVERT_ADD_VALUE;
outputPtr[3] = exponentPtr[3] > 1 ? (temp << (exponentPtr[3] - 1))
: (temp >> std::abs(exponentPtr[3] - 1));
des[0] = outputPtr[0];
des[1] = outputPtr[1];
des[2] = outputPtr[2];
des[3] = outputPtr[3];
}
inline void convertValue2(short* aValue ,float* des){
ushort exponentPtr[4] = {(ushort)aValue[0],(ushort)aValue[1],(ushort)aValue[2],(ushort)aValue[3]};
exponentPtr[0] = (exponentPtr[0] >> 11) & CONVERT_AND_VALUE;
exponentPtr[1] = (exponentPtr[1] >> 11) & CONVERT_AND_VALUE;
exponentPtr[2] = (exponentPtr[2] >> 11) & CONVERT_AND_VALUE;
exponentPtr[3] = (exponentPtr[3] >> 11) & CONVERT_AND_VALUE;
short signPtr [4] = {aValue[0],aValue[1],aValue[2],aValue[3]};
uint sign_bit[4] = {
(uint)(signPtr[0] < 0 ? 1 : 0), (uint)(signPtr[1] < 0 ? 1 : 0),
(uint)(signPtr[2] < 0 ? 1 : 0), (uint)(signPtr[3] < 0 ? 1 : 0)};
ushort fraction3Ptr[4] = {(ushort)aValue[0],(ushort)aValue[1],(ushort)aValue[2],(ushort)aValue[3]};
fraction3Ptr[0] &= CONVERT_AND_VALUE_2;
fraction3Ptr[1] &= CONVERT_AND_VALUE_2;
fraction3Ptr[2] &= CONVERT_AND_VALUE_2;
fraction3Ptr[3] &= CONVERT_AND_VALUE_2;
uint hidden_bit[4] = {
sign_bit[0] * (!exponentPtr[0] ? 1 : 0) * CONVERT_MUL_VALUE +
((!sign_bit[0] && exponentPtr[0]) ? 1 : 0) * CONVERT_MUL_VALUE,
sign_bit[1] * (!exponentPtr[1] ? 1 : 0) * 2048 +
((!sign_bit[1] && exponentPtr[1]) ? 1 : 0) * CONVERT_MUL_VALUE,
sign_bit[2] * (!exponentPtr[2] ? 1 : 0) * CONVERT_MUL_VALUE +
((!sign_bit[2] && exponentPtr[2]) ? 1 : 0) * CONVERT_MUL_VALUE,
sign_bit[3] * (!exponentPtr[3] ? 1 : 0) * 2048 +
((!sign_bit[3] && exponentPtr[3]) ? 1 : 0) * CONVERT_MUL_VALUE,
};
int outputPtr[4] = {0};
uint temp = fraction3Ptr[0] + hidden_bit[0] + sign_bit[0] * CONVERT_ADD_VALUE;
outputPtr[0] = exponentPtr[0] > 1 ? (temp << (exponentPtr[0] - 1)): temp;
temp = fraction3Ptr[1] + hidden_bit[1] + sign_bit[1] * CONVERT_ADD_VALUE;
outputPtr[1] = exponentPtr[1] > 1 ? (temp << (exponentPtr[1] - 1)): temp;
temp = fraction3Ptr[2] + hidden_bit[2] + sign_bit[2] * CONVERT_ADD_VALUE;
outputPtr[2] = exponentPtr[2] > 1 ? (temp << (exponentPtr[2] - 1)): temp;
temp = fraction3Ptr[3] + hidden_bit[3] + sign_bit[3] * CONVERT_ADD_VALUE;
outputPtr[3] = exponentPtr[3] > 1 ? (temp << (exponentPtr[3] - 1)): temp;
des[0] = outputPtr[0];
des[1] = outputPtr[1];
des[2] = outputPtr[2];
des[3] = outputPtr[3];
}
bool compare(const Matrix& aA, const Matrix& aB)
{
size_t dataSize = aA.getDataSize();
size_t sizeB = aB.getDataSize();
if(dataSize > sizeB)
{
dataSize = sizeB;
}
for(size_t i=0; i<dataSize; ++i)
{
if(aA[i] != aB[i])
{
return aA[i] < aB[i];
}
}
return false;
}
bool isEqual(const Matrix& aA, const Matrix& aB)
{
size_t sizeA = aA.getDataSize();
size_t sizeB = aB.getDataSize();
if(sizeA != sizeB)
{
return false;
}
for(size_t i=0; i<sizeA; ++i)
{
if(aA[i] != aB[i])
{
return false;
}
}
return true;
}
}
Aurora::Matrix Aurora::complex(const Aurora::Matrix &matrix) {
if (matrix.getValueType() == Complex) {
std::cerr<<"complex not support complex value type"<<std::endl;
return matrix;
}
auto output = malloc(matrix.getDataSize() ,true);
memset(output, 0, (matrix.getDataSize() * sizeof(std::complex<float>)));
cblas_scopy(matrix.getDataSize(), matrix.getData(), REAL_STRIDE, (float *) output, COMPLEX_STRIDE);
return Aurora::Matrix::New((float *) output, matrix.getDimSize(0), matrix.getDimSize(1), matrix.getDimSize(2),
Complex);
}
Aurora::Matrix Aurora::real(const Aurora::Matrix &matrix) {
if (matrix.getValueType() == Normal) {
std::cerr<<"real only support complex value type"<<std::endl;
return matrix;
}
auto output = (float *) malloc(matrix.getDataSize());
memset(output, 0, (matrix.getDataSize() * sizeof(float)));
cblas_scopy(matrix.getDataSize(), matrix.getData(),COMPLEX_STRIDE , (float *) output, REAL_STRIDE);
return Aurora::Matrix::New((float *) output, matrix.getDimSize(0), matrix.getDimSize(1), matrix.getDimSize(2));
}
Aurora::Matrix Aurora::imag(const Aurora::Matrix &matrix) {
if (matrix.getValueType() == Normal) {
std::cerr<<"imag only support complex value type"<<std::endl;
return matrix;
}
auto output = malloc(matrix.getDataSize());
memset(output, 0, (matrix.getDataSize() * sizeof(float)));
cblas_scopy(matrix.getDataSize(), matrix.getData()+1,COMPLEX_STRIDE , (float *) output, REAL_STRIDE);
return Aurora::Matrix::New((float *) output, matrix.getDimSize(0), matrix.getDimSize(1), matrix.getDimSize(2));
}
Aurora::Matrix Aurora::ceil(const Aurora::Matrix &matrix) {
auto output = malloc(matrix.getDataSize());
//for real part
vsCeilI(matrix.getDataSize(), matrix.getData(), SAME_STRIDE, output, SAME_STRIDE);
if (matrix.getValueType() == Complex) {
//for imag part
vsCeilI(matrix.getDataSize(), matrix.getData() + 1, SAME_STRIDE, output + 1, SAME_STRIDE);
}
return Aurora::Matrix::New(output, matrix);
}
Aurora::Matrix Aurora::ceil(const Aurora::Matrix &&matrix) {
//for real part
vsCeilI(matrix.getDataSize(), matrix.getData(), SAME_STRIDE, matrix.getData(), SAME_STRIDE);
if (matrix.getValueType() == Complex) {
//for imag part
vsCeilI(matrix.getDataSize(), matrix.getData() + 1, SAME_STRIDE, matrix.getData() + 1, SAME_STRIDE);
}
return matrix;
}
Aurora::Matrix Aurora::round(const Aurora::Matrix &matrix) {
auto output = malloc(matrix.getDataSize());
//for real part
vsRoundI(matrix.getDataSize(), matrix.getData(), SAME_STRIDE, output, SAME_STRIDE);
if (matrix.getValueType() == Complex) {
//for imag part
vsRoundI(matrix.getDataSize(), matrix.getData() + 1, SAME_STRIDE, output + 1, SAME_STRIDE);
}
return Aurora::Matrix::New(output, matrix);
}
Aurora::Matrix Aurora::round(const Aurora::Matrix &&matrix) {
//for real part
vsRoundI(matrix.getDataSize(), matrix.getData(), SAME_STRIDE, matrix.getData(), SAME_STRIDE);
if (matrix.getValueType() == Complex) {
//for imag part
vsRoundI(matrix.getDataSize(), matrix.getData() + 1, SAME_STRIDE, matrix.getData() + 1, SAME_STRIDE);
}
return matrix;
}
Aurora::Matrix Aurora::floor(const Aurora::Matrix &matrix) {
auto output = malloc(matrix.getDataSize());
//for real part
vsFloorI(matrix.getDataSize(), matrix.getData(), SAME_STRIDE, output, SAME_STRIDE);
if (matrix.getValueType() == Complex) {
//for imag part
vsFloorI(matrix.getDataSize(), matrix.getData() + 1, SAME_STRIDE, output + 1, SAME_STRIDE);
}
return Aurora::Matrix::New(output, matrix);
}
Aurora::Matrix Aurora::floor(const Aurora::Matrix &&matrix) {
//for real part
vsFloorI(matrix.getDataSize(), matrix.getData(), SAME_STRIDE, matrix.getData(), SAME_STRIDE);
if (matrix.getValueType() == Complex) {
//for imag part
vsFloorI(matrix.getDataSize(), matrix.getData() + 1, SAME_STRIDE, matrix.getData() + 1, SAME_STRIDE);
}
return matrix;
}
Matrix Aurora::auroraNot(const Matrix& aMatrix){
return auroraNot(std::forward<Matrix&&>(aMatrix.deepCopy()));
}
Matrix Aurora::auroraNot(Matrix&& aMatrix){
Eigen::Map<Eigen::VectorXf> v2(aMatrix.getData(), aMatrix.getDataSize());
v2 = (v2.array()>0).select(1,v2);
v2 = (v2.array()<0).select(0,v2);
v2 = v2.array()+1.0;
v2 = (v2.array() == 2.0).select(0.0, v2);
return aMatrix;
}
Aurora::Matrix Aurora::sqrt(const Aurora::Matrix& matrix) {
if (matrix.getValueType() != Complex) {
auto output = malloc(matrix.getDataSize());
vsSqrtI(matrix.getDataSize(), matrix.getData(), SAME_STRIDE, output, SAME_STRIDE);
return Aurora::Matrix::New(output, matrix);
}
std::cerr<<"sqrt not support complex"<<std::endl;
return Aurora::Matrix();
}
Aurora::Matrix Aurora::sqrt(Aurora::Matrix&& matrix) {
if (matrix.getValueType() != Complex) {
vsSqrtI(matrix.getDataSize(), matrix.getData(), SAME_STRIDE, matrix.getData(), SAME_STRIDE);
return matrix;
}
std::cerr<<"sqrt not support complex"<<std::endl;
return Aurora::Matrix();
}
Aurora::Matrix Aurora::abs(const Aurora::Matrix &matrix) {
auto output = malloc(matrix.getDataSize());
if (matrix.getValueType()==Normal){
vsAbsI(matrix.getDataSize(), matrix.getData(), SAME_STRIDE, output, SAME_STRIDE);
}
else{
vcAbsI(matrix.getDataSize(), (std::complex<float> *)matrix.getData(), SAME_STRIDE,output, SAME_STRIDE);
}
return Aurora::Matrix::New(output, matrix.getDimSize(0), matrix.getDimSize(1), matrix.getDimSize(2));
}
Aurora::Matrix Aurora::abs(Aurora::Matrix&& matrix) {
if (matrix.getValueType()==Normal){
vsAbsI(matrix.getDataSize(), matrix.getData(), SAME_STRIDE, matrix.getData(), SAME_STRIDE);
return matrix;
}
//TODO考虑尝试是不是使用realloc缩短已分配的内存的方式重用matrix
else{
auto output = malloc(matrix.getDataSize());
vcAbsI(matrix.getDataSize(), (std::complex<float> *)matrix.getData(), SAME_STRIDE,output, SAME_STRIDE);
return Aurora::Matrix::New(output, matrix.getDimSize(0), matrix.getDimSize(1), matrix.getDimSize(2));
}
}
Aurora::Matrix Aurora::sign(const Aurora::Matrix &matrix) {
if (matrix.getValueType()==Normal){
auto ret = matrix.deepCopy();
Eigen::Map<Eigen::VectorXf> retV(ret.getData(),ret.getDataSize());
retV = retV.array().sign();
return ret;
}
else{
//sign(x) = x./abs(x),前提是 x 为复数。
auto output = malloc(matrix.getDataSize(),true);
Matrix absMatrix = abs(matrix);
vsDivI(matrix.getDataSize(), matrix.getData(),COMPLEX_STRIDE,
absMatrix.getData(), REAL_STRIDE,output,COMPLEX_STRIDE);
vsDivI(matrix.getDataSize(), matrix.getData()+1,COMPLEX_STRIDE,
absMatrix.getData(), REAL_STRIDE,output+1,COMPLEX_STRIDE);
return Aurora::Matrix::New(output, matrix);
}
}
Aurora::Matrix Aurora::sign(Aurora::Matrix&& matrix) {
if (matrix.getValueType()==Normal){
Eigen::Map<Eigen::VectorXf> retV(matrix.getData(),matrix.getDataSize());
retV = retV.array().sign();
return matrix;
}
else{
//sign(x) = x./abs(x),前提是 x 为复数。
Matrix absMatrix = abs(matrix);
vsDivI(matrix.getDataSize(), matrix.getData(),COMPLEX_STRIDE,
absMatrix.getData(), REAL_STRIDE,matrix.getData(),COMPLEX_STRIDE);
vsDivI(matrix.getDataSize(), matrix.getData()+1,COMPLEX_STRIDE,
absMatrix.getData(), REAL_STRIDE,matrix.getData()+1,COMPLEX_STRIDE);
return matrix;
}
}
Matrix Aurora::interp1(const Matrix& aX, const Matrix& aV, const Matrix& aX1, InterpnMethod aMethod)
{
const int nx = aX.getDimSize(0);
const int ny = 1;
int nx1 = aX1.getDimSize(0);
std::shared_ptr<float> resultData = std::shared_ptr<float>(Aurora::malloc(nx1), Aurora::free);
std::vector<int> resultInfo = {nx1};
Aurora::Matrix result(resultData, resultInfo);
DFTaskPtr task ;
int status = dfsNewTask1D(&task, nx, aX.getData(), DF_NO_HINT, ny, aV.getData(), DF_NO_HINT);
if (status != DF_STATUS_OK)
{
return Matrix();
}
MKL_INT sorder;
MKL_INT stype;
if (aMethod == InterpnMethod::Spline)
{
sorder = DF_PP_CUBIC;
stype = DF_PP_BESSEL;
}
else
{
sorder = DF_PP_LINEAR;
stype = DF_PP_BESSEL;
}
float* scoeffs = Aurora::malloc(ny * (nx-1) * sorder);
status = dfsEditPPSpline1D(task, sorder,DF_PP_NATURAL , DF_BC_NOT_A_KNOT, 0, DF_NO_IC, 0, scoeffs, DF_NO_HINT);
if (status != DF_STATUS_OK)
{
return Matrix();
}
status = dfsConstruct1D( task, DF_PP_SPLINE, DF_METHOD_STD );
if (status != DF_STATUS_OK)
{
return Matrix();
}
int dorder = 1;
status = dfsInterpolate1D(task, DF_INTERP, DF_METHOD_PP, nx1, aX1.getData(), DF_NO_HINT, 1, &dorder,
DF_NO_APRIORI_INFO, resultData.get(), DF_MATRIX_STORAGE_ROWS, nullptr);
status = dfDeleteTask(&task);
Aurora::free(scoeffs);
return result;
}
Matrix Aurora::repmat(const Matrix& aMatrix,int aRowTimes, int aColumnTimes)
{
if(aRowTimes < 1 || aColumnTimes < 1 || aMatrix.getDims() > 2 || aMatrix.isNull())
{
return Matrix();
}
int complexStep = aMatrix.getValueType();
int originalDataSize = aMatrix.getDataSize() * complexStep;
float* resultData = Aurora::malloc(originalDataSize * aRowTimes * aColumnTimes);
int row = aMatrix.getDimSize(0);
int column = aMatrix.getDimSize(1);
float* originalData = aMatrix.getData();
float* resultDataTemp = resultData;
size_t step = row*complexStep;
#pragma omp parallel for
for(int i=0; i<column; ++i)
{
float* origninalStart = originalData + i * step;
for(int j=0; j<aRowTimes; ++j)
{
float* copyStart = resultData + step * (i*aRowTimes + j);
cblas_scopy(step, origninalStart, 1, copyStart, 1);
//resultDataTemp += row*complexStep;
}
//originalData += step;
}
resultDataTemp = resultData;
step = originalDataSize * aRowTimes;
#pragma omp parallel for
for(int i=1; i<aColumnTimes; ++i)
{
cblas_scopy(step, resultData, 1, resultData + i*step, 1);
}
std::vector<int> resultInfo;
resultInfo.push_back(aRowTimes * row);
column = aColumnTimes*column;
if (column > 1)
{
resultInfo.push_back(column);
}
return Matrix(std::shared_ptr<float>(resultData, Aurora::free),resultInfo, aMatrix.getValueType());
}
Matrix Aurora::repmat(const Matrix& aMatrix,int aRowTimes, int aColumnTimes, int aSliceTimes)
{
if(aRowTimes < 1 || aColumnTimes < 1 || aSliceTimes < 1 || aMatrix.getDims() > 2 || aMatrix.isNull())
{
return Matrix();
}
int complexStep = aMatrix.getValueType();
Matrix resultTemp = Aurora::repmat(aMatrix, aRowTimes, aColumnTimes);
int resultTempDataSize = resultTemp.getDataSize() * complexStep;
float* resultData = Aurora::malloc(resultTempDataSize * aSliceTimes);
std::copy(resultTemp.getData(), resultTemp.getData() + resultTempDataSize, resultData);
for(int i=1; i<aSliceTimes; ++i)
{
cblas_scopy(resultTempDataSize, resultData, 1, resultData + i*resultTempDataSize, 1);
}
std::vector<int> resultInfo;
int row = resultTemp.getDimSize(0);
int column = resultTemp.getDimSize(1);
resultInfo.push_back(row);
if (column > 1 || aSliceTimes > 1)
{
resultInfo.push_back(column);
}
if (aSliceTimes > 1)
{
resultInfo.push_back(aSliceTimes);
}
return Matrix(std::shared_ptr<float>(resultData, Aurora::free), resultInfo, aMatrix.getValueType());
}
Matrix Aurora::repmat3d(const Matrix& aMatrix,int aRowTimes, int aColumnTimes, int aSliceTimes)
{
if(aRowTimes < 1 || aColumnTimes < 1 || aMatrix.getDims() < 3 || aMatrix.isNull())
{
return Matrix();
}
float* start = aMatrix.getData();
int rows = aMatrix.getDimSize(0);
int columns = aMatrix.getDimSize(1);
int slices = aMatrix.getDimSize(2);
float* extended2DimsData = Aurora::malloc(rows * columns * aRowTimes * aColumnTimes * slices);
Matrix extended2DimsMatrix = Matrix::New(extended2DimsData, aRowTimes*rows, aColumnTimes*columns, slices);
for(int i=0; i<aMatrix.getDimSize(2); ++i)
{
Matrix dim2Matrix = Matrix::copyFromRawData(start, rows, columns);
Matrix extendedTemp = repmat(dim2Matrix, aRowTimes, aColumnTimes);
cblas_scopy(extendedTemp.getDataSize(), extendedTemp.getData(), 1, extended2DimsData, 1);
extended2DimsData += extendedTemp.getDataSize();
start += columns * rows;
}
float* extended3DimsData = Aurora::malloc(rows * columns * aRowTimes * aColumnTimes * aSliceTimes * slices);
Matrix result = Matrix::New(extended3DimsData, aRowTimes*rows, aColumnTimes*columns, slices * aSliceTimes);
for(int i=0;i<aSliceTimes;++i)
{
cblas_scopy(extended2DimsMatrix.getDataSize(), extended2DimsMatrix.getData(), 1, extended3DimsData, 1);
extended3DimsData+=extended2DimsMatrix.getDataSize();
}
return result;
}
Matrix Aurora::polyval(const Matrix &aP, const Matrix &aX) {
auto result = malloc(aX.getDataSize());
auto powArg = new float[aP.getDataSize()];
for (int j = aP.getDataSize(), i = 0; j > 0; --j, ++i) {
powArg[i] = (float) (j - 1);
}
auto temp = new float[aP.getDataSize()];
for (int i = 0; i < aX.getDataSize(); ++i) {
vsPowI(aP.getDataSize(), aX.getData() + i, 0, powArg, 1, temp, 1);
vsMul(aP.getDataSize(), aP.getData(), temp, temp);
Eigen::Map<Eigen::VectorXf> vs(temp,aP.getDataSize());
result[i] = vs.array().sum();
}
delete[] powArg;
delete[] temp;
return Matrix::New(result,aX);
}
Matrix Aurora::log(const Matrix& aMatrix, int aBaseNum)
{
size_t size = aMatrix.getDataSize();
float* data = Aurora::malloc(size);
vsLn(size, aMatrix.getData(), data);
if(aBaseNum != -1)
{
float baseNum = aBaseNum;
float temp;
vsLn(1, &baseNum, &temp);
for (size_t i = 0; i < size; i++)
{
data[i] /= temp;
}
}
return Matrix::New(data, aMatrix);
}
Matrix Aurora::exp(const Matrix& aMatrix)
{
size_t size = aMatrix.getDataSize();
float* data;
if (aMatrix.isComplex())
{
data = Aurora::malloc(size, true);
vcExp(size, (MKL_Complex8*)aMatrix.getData(), (MKL_Complex8*)data);
}
else
{
data = Aurora::malloc(size);
vsExp(size, aMatrix.getData(), data);
}
return Matrix::New(data, aMatrix);
}
Matrix Aurora::mod(const Matrix& aMatrix, float aValue)
{
if(aMatrix.isComplex() || aMatrix.isNull())
{
return Matrix();
}
size_t size = aMatrix.getDataSize();
float* matrixData = aMatrix.getData();
float* resultData = Aurora::malloc(size);
for(size_t i=0; i<size; ++i)
{
resultData[i] = fmod(matrixData[i], aValue);
}
return Matrix::New(resultData, aMatrix);
}
Matrix Aurora::acos(const Matrix& aMatrix)
{
if(aMatrix.isComplex() || aMatrix.isNull())
{
return Matrix();
}
size_t size = aMatrix.getDataSize();
float* matrixData = aMatrix.getData();
float* resultData = Aurora::malloc(size);
vsAcos(size, matrixData, resultData);
return Matrix::New(resultData, aMatrix);
}
Matrix Aurora::acosd(const Matrix& aMatrix)
{
if(aMatrix.isComplex() || aMatrix.isNull())
{
return Matrix();
}
size_t size = aMatrix.getDataSize();
float* matrixData = aMatrix.getData();
float* resultData = Aurora::malloc(size);
vsAcos(size, matrixData, resultData);
for(size_t i=0; i<size; ++i)
{
resultData[i] = resultData[i] * 180 / PI;
}
return Matrix::New(resultData, aMatrix);
}
Matrix Aurora::conj(const Matrix& aMatrix)
{
if(!aMatrix.isComplex())
{
return Matrix::copyFromRawData(aMatrix.getData(),aMatrix.getDimSize(0),aMatrix.getDimSize(1),aMatrix.getDimSize(2));
}
size_t size = aMatrix.getDataSize();
float* data = malloc(size,true);
vcConj(size,(MKL_Complex8*)aMatrix.getData(), (MKL_Complex8*)data);
return Matrix::New(data, aMatrix);
}
float Aurora::norm(const Matrix& aMatrix, NormMethod aNormMethod)
{
if(aMatrix.isNull())
{
return NAN;
}
size_t size = aMatrix.getDataSize();
if(aMatrix.isComplex())
{
size*=2;
}
int column = aMatrix.getDimSize(1);
int row = aMatrix.getDimSize(0);
if (aNormMethod == NormMethod::Norm1)
{
float value = 0;
for(int i=0; i<column; ++i)
{
float temp = Aurora::sum(abs(aMatrix($,i,$).toMatrix())).getData()[0];
if(temp > value)
{
value = temp;
}
}
return value;
}
else if(aNormMethod == NormMethod::NormF)
{
return cblas_snrm2(size, aMatrix.getData(), 1);
}
else if(aNormMethod == NormMethod::Norm2)
{
//columns > 1
if(aMatrix.getDimSize(1) > 1)
{
if(aMatrix.isComplex())
{
Eigen::Map<Eigen::MatrixXcf> eMatrix((MKL_Complex8*)aMatrix.getData(), row, column);
Eigen::JacobiSVD<Eigen::MatrixXcf> svs(eMatrix, Eigen::ComputeThinU | Eigen::ComputeThinV);
return svs.singularValues()(0);
}
else
{
Eigen::Map<Eigen::MatrixXf> eMatrix(aMatrix.getData(), row, column);
Eigen::JacobiSVD<Eigen::MatrixXf> svs(eMatrix, Eigen::ComputeThinU | Eigen::ComputeThinV);
return svs.singularValues()(0);
}
}
else
{
return cblas_snrm2(size, aMatrix.getData(), 1);
}
}
return 0;
}
Matrix Aurora::transpose(const Matrix& aMatrix)
{
//not surpport for 3 dims.
if(aMatrix.isNull() || aMatrix.getDimSize(2) > 1)
{
return Matrix();
}
size_t size = aMatrix.getDataSize();
int row = aMatrix.getDimSize(0);
int col = aMatrix.getDimSize(1);
float* resultData;
float* data = aMatrix.getData();
if(aMatrix.isComplex())
{
resultData = Aurora::malloc(size, true);
mkl_comatcopy('C', 'T',row ,col, 1.0, (MKL_Complex8*)data, row, (MKL_Complex8*)resultData, col);
}
else
{
resultData = Aurora::malloc(size);
mkl_somatcopy('C', 'T',row ,col, 1.0, data, row, resultData, col);
}
return Matrix::New(resultData,col,row,1,aMatrix.getValueType());
}
Matrix Aurora::horzcat(const Matrix& aMatrix1, const Matrix& aMatrix2)
{
if(aMatrix1.isNull() || aMatrix2.isNull() || aMatrix1.getDimSize(2) != aMatrix2.getDimSize(2) ||
aMatrix1.getDimSize(0) != aMatrix2.getDimSize(0) || aMatrix1.getValueType() != aMatrix2.getValueType())
{
return Matrix();
}
int column1 = aMatrix1.getDimSize(1);
int column2 = aMatrix2.getDimSize(1);
int slice = aMatrix1.getDimSize(2);
int row = aMatrix1.getDimSize(0);
size_t size1= row*column1;
size_t size2= row*column2;
float* resultData = Aurora::malloc(aMatrix1.getDataSize() + aMatrix2.getDataSize(),aMatrix1.getValueType());
size_t sliceStride = row*(column1+column2);
for (size_t i = 0; i < slice; i++)
{
cblas_scopy(size1, aMatrix1.getData()+i*size1 , 1, resultData + i*sliceStride, 1);
cblas_scopy(size2, aMatrix2.getData()+i*size2, 1, resultData + i*sliceStride + size1, 1);
}
return Matrix::New(resultData, row, column1+column2, slice, aMatrix1.getValueType());
}
Matrix Aurora::vertcat(const Matrix& aMatrix1, const Matrix& aMatrix2){
if(aMatrix1.isNull() || aMatrix2.isNull() || aMatrix1.getDimSize(2) != aMatrix2.getDimSize(2) ||
aMatrix1.getDimSize(1) != aMatrix2.getDimSize(1) || aMatrix1.getValueType() != aMatrix2.getValueType())
{
return Matrix();
}
int row1 = aMatrix1.getDimSize(0);
int row2 = aMatrix2.getDimSize(0);
int slice = aMatrix1.getDimSize(2);
int column = aMatrix1.getDimSize(1);
size_t size1= aMatrix1.getDataSize();
size_t size2= aMatrix2.getDataSize();
float* resultData = Aurora::malloc(size1 + size2,aMatrix1.getValueType());
cblas_scopy_batch_strided(row1, aMatrix1.getData(), 1,row1, resultData, 1, row1+row2, column*slice);
cblas_scopy_batch_strided(row2, aMatrix2.getData(), 1,row2, resultData + row1, 1, row1+row2, column*slice);
return Matrix::New(resultData, row1+row2, column, slice, aMatrix1.getValueType());
}
Matrix Aurora::vecnorm(const Matrix& aMatrix, NormMethod aNormMethod, int aDim)
{
//only surpport aDim = 1 for now.
if(aDim != 1 || aNormMethod == NormMethod::NormF || aMatrix.isNull())
{
return Matrix();
}
int column = aMatrix.getDimSize(1);
float* resultData = Aurora::malloc(column);
for(int i=0; i<column; ++i)
{
resultData[i] = norm(aMatrix($,i,$).toMatrix(), aNormMethod);
}
return Matrix::New(resultData,column);
}
Matrix Aurora::linspace(float aStart, float aEnd, int aNum)
{
float step = (aEnd - aStart) / (aNum - 1);
float* resultData = Aurora::malloc(aNum);
for (int i = 0; i < aNum; i++)
{
resultData[i] = aStart + step * i;
}
return Matrix::New(resultData,aNum);
}
Matrix Aurora::auroraUnion(const Matrix& aMatrix1, const Matrix& aMatrix2)
{
if(aMatrix1.isNull() || aMatrix2.isNull() || aMatrix1.isComplex() || aMatrix2.isComplex())
{
return Matrix();
}
size_t size1= aMatrix1.getDataSize();
size_t size2= aMatrix2.getDataSize();
float* resultData = Aurora::malloc(size1 + size2);
cblas_scopy(size1, aMatrix1.getData(), 1, resultData, 1);
cblas_scopy(size2, aMatrix2.getData(), 1, resultData + size1, 1);
std::vector<float> vector(resultData, resultData + size1 + size2);
Aurora::free(resultData);
std::sort(vector.begin(), vector.end());
auto last = std::unique(vector.begin(), vector.end());
vector.erase(last, vector.end());
return Matrix::copyFromRawData(vector.data(),vector.size());
}
Matrix Aurora::intersect(const Matrix& aMatrix1, const Matrix& aMatrix2)
{
if(aMatrix1.isNull() || aMatrix2.isNull() || aMatrix1.isComplex() || aMatrix2.isComplex())
{
return Matrix();
}
size_t size1= aMatrix1.getDataSize();
size_t size2= aMatrix2.getDataSize();
std::vector<float> vector1(aMatrix1.getData(), aMatrix1.getData() + size1);
std::vector<float> vector2(aMatrix2.getData(), aMatrix2.getData() + size2);
std::sort(vector1.begin(), vector1.end());
std::sort(vector2.begin(), vector2.end());
std::vector<float> intersection;
std::set_intersection(vector1.begin(), vector1.end(),
vector2.begin(), vector2.end(),
std::back_inserter(intersection));
return Matrix::copyFromRawData(intersection.data(), intersection.size());
}
Matrix Aurora::intersect(const Matrix& aMatrix1, const Matrix& aMatrix2, Matrix& aIa)
{
if(aMatrix1.isNull() || aMatrix2.isNull() || aMatrix1.isComplex() || aMatrix2.isComplex())
{
return Matrix();
}
Matrix result = intersect(aMatrix1,aMatrix2);
size_t size = result.getDataSize();
float* iaResult = Aurora::malloc(size);
for(size_t i=0; i<size; ++i)
{
for(size_t j=0; j<aMatrix1.getDataSize(); ++j)
{
if(aMatrix1.getData()[j] == result.getData()[i])
{
iaResult[i] = j + 1;
break;
}
}
}
aIa = Matrix::New(iaResult,size);
return result;
}
Matrix Aurora::xcorr(const Matrix& aMatrix1, const Matrix& aMatrix2)
{
//not support for complex
if (aMatrix1.isNull() || aMatrix2.isNull() || aMatrix1.getDataSize() != aMatrix2.getDataSize())
{
return Matrix();
}
size_t matrixSize = aMatrix1.getDataSize();
size_t resultSize = 2 * matrixSize -1;
float* resultData = Aurora::malloc(resultSize);
for(int i=0;i<matrixSize;++i)
{
float data = 0;
for(int j=0;j<i+1;++j)
{
data+= aMatrix1[j] * aMatrix2[matrixSize-i-1+j];
}
resultData[i] = data;
}
for(int i=0;i<matrixSize-1;++i)
{
float result = 0;
for(int j=0;j<i+1;++j)
{
result+= aMatrix1[matrixSize-i-1+j]*aMatrix2[j];
}
resultData[resultSize - 1 - i] = result;
}
return Matrix::New(resultData,resultSize);
}
Matrix Aurora::deleteColumn(const Matrix& aMatrix, int aColumnIndex)
{
int rows = aMatrix.getDimSize(0);
int columns = aMatrix.getDimSize(1);
if (aColumnIndex < 0 || aColumnIndex >= columns)
{
return aMatrix;
}
float* resultData = Aurora::malloc(rows* (columns-1));
if(aColumnIndex == 0)
{
cblas_scopy(rows* (columns-1), aMatrix.getData() + rows, 1, resultData, 1);
}
else if(aColumnIndex == (columns - 1))
{
cblas_scopy(rows* (columns-1), aMatrix.getData(), 1, resultData, 1);
}
else
{
cblas_scopy(rows * aColumnIndex, aMatrix.getData(), 1, resultData, 1);
cblas_scopy(rows * (columns - aColumnIndex - 1), aMatrix.getData() + rows * (aColumnIndex + 1), 1, resultData + rows * aColumnIndex, 1);
}
return Matrix::New(resultData, rows, columns-1);
}
Matrix Aurora::createVectorMatrix(float aStartValue, float aStepValue, float aEndValue)
{
std::vector<float> matrixData;
float tempValue = aStartValue;
matrixData.push_back(tempValue);
long long compare1 = std::round(aEndValue * 10e13);
long long compare2 = std::round(tempValue * 10e13);
while(std::round(tempValue* 10e13) <= compare1)
{
tempValue += aStepValue;
matrixData.push_back(tempValue);
compare2 = std::round(tempValue * 10e14);
}
matrixData.pop_back();
return Matrix::copyFromRawData(matrixData.data(), 1, matrixData.size());
}
Matrix Aurora::reshape(const Matrix& aMatrix, int aRows, int aColumns, int aSlices)
{
if(aMatrix.isNull() || (aMatrix.getDataSize() != aRows * aColumns * aSlices))
{
return Matrix();
}
return Matrix::copyFromRawData(aMatrix.getData(),aRows,aColumns,aSlices);
}
void Aurora::nantoval(Matrix& aMatrix, float val2) {
Eigen::Map<Eigen::VectorXf> srcV(aMatrix.getData(),aMatrix.getDataSize());
srcV = srcV.array().isNaN().select(val2,srcV);
}
Matrix Aurora::isnan(const Matrix& aMatrix){
Eigen::Map<Eigen::VectorXf> srcV(aMatrix.getData(),aMatrix.getDataSize());
auto result = zeros(aMatrix.getDimSize(0),aMatrix.getDimSize(1),aMatrix.getDimSize(2));
Eigen::Map<Eigen::VectorXf> resultV(result.getData(),result.getDataSize());
resultV = srcV.array().isNaN().select(1.0,resultV);
return result;
}
Matrix Aurora::isfinite(const Matrix& aMatrix){
Eigen::Map<Eigen::VectorXf> srcV(aMatrix.getData(),aMatrix.getDataSize());
auto result = zeros(aMatrix.getDimSize(0),aMatrix.getDimSize(1),aMatrix.getDimSize(2));
Eigen::Map<Eigen::VectorXf> resultV(result.getData(),result.getDataSize());
resultV = srcV.array().isFinite().select(1.0,resultV);
return result;
}
void Aurora::padding(Matrix &aMatrix, int aIndex, float aValue)
{
if(aMatrix.isNull() || !aMatrix.isVector())
{
std::cerr<<"padding only support vector"<<std::endl;
return;
}
if (aMatrix.getDataSize()>aIndex){
aMatrix.getData()[aIndex] = aValue;
return;
}
int size = (aIndex+1);
float* newData = malloc(size,aMatrix.isComplex());
cblas_scopy(aMatrix.getDataSize()*aMatrix.getValueType(),
aMatrix.getData(),1,newData,1);
cblas_scopy((size-aMatrix.getDataSize())*aMatrix.getValueType(),
&aValue,0,newData+aMatrix.getDataSize()*aMatrix.getValueType(),1);
aMatrix = Matrix::New(newData,size,1,1,aMatrix.getValueType());
}
void Aurora::compareSet(Matrix& aMatrix,float compareValue, float newValue,CompareOp op){
Eigen::Map<Eigen::VectorXf> v(aMatrix.getData(),aMatrix.getDataSize());
switch (op) {
case EQ:
v = (v.array() == compareValue).select(newValue, v);
break;
case GT:
v = (v.array() > compareValue).select(newValue, v);
break;
case LT:
v = (v.array() < compareValue).select(newValue, v);
break;
case NG:
v = (v.array() <= compareValue).select(newValue, v);
break;
case NL:
v = (v.array() >= compareValue).select(newValue, v);
break;
case NE:
v = (v.array() != compareValue).select(newValue, v);
break;
}
}
void Aurora::compareSet(Matrix& aMatrix,Matrix& aCompareMatrix,float compareValue, float newValue,CompareOp op){
Eigen::Map<Eigen::VectorXf> v(aMatrix.getData(),aMatrix.getDataSize());
Eigen::Map<Eigen::VectorXf> c(aCompareMatrix.getData(),aCompareMatrix.getDataSize());
switch (op) {
case EQ:
v = (c.array() == compareValue).select(newValue, v);
break;
case GT:
v = (c.array() > compareValue).select(newValue, v);
break;
case LT:
v = (c.array() < compareValue).select(newValue, v);
break;
case NG:
v = (c.array() <= compareValue).select(newValue, v);
break;
case NL:
v = (c.array() >= compareValue).select(newValue, v);
break;
case NE:
v = (c.array() != compareValue).select(newValue, v);
break;
}
}
void Aurora::compareSet(Matrix& aValueAndCompareMatrix,Matrix& aOtherCompareMatrix, float newValue,CompareOp op){
Eigen::Map<Eigen::VectorXf> v(aValueAndCompareMatrix.getData(),aValueAndCompareMatrix.getDataSize());
Eigen::Map<Eigen::VectorXf> c(aOtherCompareMatrix.getData(),aOtherCompareMatrix.getDataSize());
switch (op) {
case EQ:
v = (v.array() == c.array()).select(newValue, v);
break;
case GT:
v = (v.array() > c.array()).select(newValue, v);
break;
case LT:
v = (v.array() < c.array()).select(newValue, v);
break;
case NG:
v = (v.array() <= c.array()).select(newValue, v);
break;
case NL:
v = (v.array() >= c.array()).select(newValue, v);
break;
case NE:
v = (v.array() != c.array()).select(newValue, v);
break;
}
}
void Aurora::compareSet(Matrix& aCompareMatrix,float compareValue, Matrix& aNewValueMatrix,CompareOp op){
Eigen::Map<Eigen::VectorXf> v(aCompareMatrix.getData(),aCompareMatrix.getDataSize());
Eigen::Map<Eigen::VectorXf> nv(aNewValueMatrix.getData(),aNewValueMatrix.getDataSize());
switch (op) {
case EQ:
v = (v.array() == compareValue).select(nv, v);
break;
case GT:
v = (v.array() > compareValue).select(nv, v);
break;
case LT:
v = (v.array() < compareValue).select(nv, v);
break;
case NG:
v = (v.array() <= compareValue).select(nv, v);
break;
case NL:
v = (v.array() >= compareValue).select(nv, v);
break;
case NE:
v = (v.array() != compareValue).select(nv, v);
break;
}
}
Matrix Aurora::convertfp16tofloat(short* aData, int aRows, int aColumns)
{
auto input = aData;
size_t size = aRows*aColumns;
size_t quaterSize = size/4;
//uint16变换为float(32位)输出大小翻倍
auto output = malloc(size);
#pragma omp parallel for
for (size_t i = 0; i < quaterSize; i+=8) {
//循环展开以避免过度的线程调用
if (i < quaterSize)::convertValue2((short*)(input+i*4), output + (i) * 4);
if (i+1 < quaterSize)::convertValue2((short*)(input+(i+1)*4), output + (i+1) * 4);
if (i+2 < quaterSize)::convertValue2((short*)(input+(i+2)*4), output + (i+2) * 4);
if (i+3 < quaterSize)::convertValue2((short*)(input+(i+3)*4), output + (i+3) * 4);
if (i+4 < quaterSize)::convertValue2((short*)(input+(i+4)*4), output + (i+4) * 4);
if (i+5 < quaterSize)::convertValue2((short*)(input+(i+5)*4), output + (i+5) * 4);
if (i+6 < quaterSize)::convertValue2((short*)(input+(i+6)*4), output + (i+6) * 4);
if (i+7 < quaterSize)::convertValue2((short*)(input+(i+7)*4), output + (i+7) * 4);
}
return Matrix::New(output,aRows,aColumns,1);
}
Matrix Aurora::uniqueByRows(const Matrix& aMatrix, Matrix& aIndexResult)
{
if(aMatrix.isNull())
{
return Matrix();
}
Matrix transposeMatrix = transpose(aMatrix);
float* transposeMatrixData = transposeMatrix.getData();
int rows = transposeMatrix.getDimSize(0);
int columns = transposeMatrix.getDimSize(1);
std::vector<Matrix> uniqueResult;
for(int i=1; i<=columns; ++i)
{
Matrix rowData = Matrix::fromRawData(new float[rows], rows);
std::copy(transposeMatrixData, transposeMatrixData + rows, rowData.getData());
uniqueResult.push_back(rowData);
transposeMatrixData += rows;
}
std::vector<Matrix> matrixsCopy = uniqueResult;
std::sort(uniqueResult.begin(), uniqueResult.end(), compare);
auto uniqueIndex = std::unique(uniqueResult.begin(), uniqueResult.end(), isEqual);
uniqueResult.erase(uniqueIndex, uniqueResult.end());
std::vector<float> indexResultData;
for(int i=0; i<matrixsCopy.size();++i)
{
auto index = lower_bound(uniqueResult.begin(), uniqueResult.end(), matrixsCopy[i], compare);
int value = distance(uniqueResult.begin(), index);
indexResultData.push_back(value);
}
aIndexResult = Matrix::copyFromRawData(indexResultData.data(), indexResultData.size());
size_t resultSize = rows * uniqueResult.size();
float* resultData = new float[resultSize];
Matrix result = Matrix::fromRawData(resultData, rows, uniqueResult.size()) ;
for(size_t i=0; i<uniqueResult.size(); ++i)
{
std::copy(uniqueResult[i].getData(), uniqueResult[i].getData() + rows, resultData);
resultData += rows;
}
result = transpose(result);
return result;
}