Fix polyval, add convertfp16toFloat

This commit is contained in:
kradchen
2023-05-09 16:33:51 +08:00
parent 20b13f1e52
commit e1a917609d
3 changed files with 154 additions and 8 deletions

View File

@@ -4,7 +4,10 @@
//必须在Eigen之前
#include "AuroraDefs.h"
#include "Matrix.h"
#include <cstddef>
#include <cstdint>
#include <cstring>
#include <cmath>
#include <iostream>
@@ -14,6 +17,7 @@
#include <Eigen/Dense>
#include <Eigen/SVD>
#include <mkl_lapack.h>
#include <sys/types.h>
using namespace Aurora;
@@ -22,6 +26,100 @@ namespace {
const int REAL_STRIDE = 1;
const int SAME_STRIDE = 1;
const double VALUE_ONE = 1.0;
const ushort CONVERT_AND_VALUE = 15;
const ushort CONVERT_AND_VALUE_2 = 2047;
const ushort CONVERT_MUL_VALUE = 2048;
uint CONVERT_ADD_VALUE = UINT32_MAX - 4095;
inline void convertValue(double aValue ,double* des){
double value = aValue;
ushort *exponentPtr = (ushort *)&value;
exponentPtr[0] = (exponentPtr[0] >> 11) & CONVERT_AND_VALUE;
exponentPtr[1] = (exponentPtr[1] >> 11) & CONVERT_AND_VALUE;
exponentPtr[2] = (exponentPtr[2] >> 11) & CONVERT_AND_VALUE;
exponentPtr[3] = (exponentPtr[3] >> 11) & CONVERT_AND_VALUE;
double signValue = aValue;
short *signPtr = (short *)&signValue;
uint sign_bit[4] = {
(uint)(signPtr[0] < 0 ? 1 : 0), (uint)(signPtr[1] < 0 ? 1 : 0),
(uint)(signPtr[2] < 0 ? 1 : 0), (uint)(signPtr[3] < 0 ? 1 : 0)};
double fraction3Value = aValue;
ushort *fraction3Ptr = (ushort *)&fraction3Value;
fraction3Ptr[0] &= CONVERT_AND_VALUE_2;
fraction3Ptr[1] &= CONVERT_AND_VALUE_2;
fraction3Ptr[2] &= CONVERT_AND_VALUE_2;
fraction3Ptr[3] &= CONVERT_AND_VALUE_2;
uint hidden_bit[4] = {
sign_bit[0] * (!exponentPtr[0] ? 1 : 0) * CONVERT_MUL_VALUE +
((!sign_bit[0] && exponentPtr[0]) ? 1 : 0) * CONVERT_MUL_VALUE,
sign_bit[1] * (!exponentPtr[1] ? 1 : 0) * 2048 +
((!sign_bit[1] && exponentPtr[1]) ? 1 : 0) * CONVERT_MUL_VALUE,
sign_bit[2] * (!exponentPtr[2] ? 1 : 0) * CONVERT_MUL_VALUE +
((!sign_bit[2] && exponentPtr[2]) ? 1 : 0) * CONVERT_MUL_VALUE,
sign_bit[3] * (!exponentPtr[3] ? 1 : 0) * 2048 +
((!sign_bit[3] && exponentPtr[3]) ? 1 : 0) * CONVERT_MUL_VALUE,
};
int outputPtr[4] = {0};
uint temp = fraction3Ptr[0] + hidden_bit[0] + sign_bit[0] * CONVERT_ADD_VALUE;
outputPtr[0] = exponentPtr[0] > 1 ? (temp << (exponentPtr[0] - 1))
: (temp >> std::abs(exponentPtr[0] - 1));
temp = fraction3Ptr[1] + hidden_bit[1] + sign_bit[1] * CONVERT_ADD_VALUE;
outputPtr[1] = exponentPtr[1] > 1 ? (temp << (exponentPtr[1] - 1))
: (temp >> std::abs(exponentPtr[1] - 1));
temp = fraction3Ptr[2] + hidden_bit[2] + sign_bit[2] * CONVERT_ADD_VALUE;
outputPtr[2] = exponentPtr[2] > 1 ? (temp << (exponentPtr[2] - 1))
: (temp >> std::abs(exponentPtr[2] - 1));
temp = fraction3Ptr[3] + hidden_bit[3] + sign_bit[3] * CONVERT_ADD_VALUE;
outputPtr[3] = exponentPtr[3] > 1 ? (temp << (exponentPtr[3] - 1))
: (temp >> std::abs(exponentPtr[3] - 1));
des[0] = outputPtr[0];
des[1] = outputPtr[1];
des[2] = outputPtr[2];
des[3] = outputPtr[3];
}
inline void convertValue2(short* aValue ,double* des){
ushort exponentPtr[4] = {(ushort)aValue[0],(ushort)aValue[1],(ushort)aValue[2],(ushort)aValue[3]};
exponentPtr[0] = (exponentPtr[0] >> 11) & CONVERT_AND_VALUE;
exponentPtr[1] = (exponentPtr[1] >> 11) & CONVERT_AND_VALUE;
exponentPtr[2] = (exponentPtr[2] >> 11) & CONVERT_AND_VALUE;
exponentPtr[3] = (exponentPtr[3] >> 11) & CONVERT_AND_VALUE;
short signPtr [4] = {aValue[0],aValue[1],aValue[2],aValue[3]};
uint sign_bit[4] = {
(uint)(signPtr[0] < 0 ? 1 : 0), (uint)(signPtr[1] < 0 ? 1 : 0),
(uint)(signPtr[2] < 0 ? 1 : 0), (uint)(signPtr[3] < 0 ? 1 : 0)};
ushort fraction3Ptr[4] = {(ushort)aValue[0],(ushort)aValue[1],(ushort)aValue[2],(ushort)aValue[3]};
fraction3Ptr[0] &= CONVERT_AND_VALUE_2;
fraction3Ptr[1] &= CONVERT_AND_VALUE_2;
fraction3Ptr[2] &= CONVERT_AND_VALUE_2;
fraction3Ptr[3] &= CONVERT_AND_VALUE_2;
uint hidden_bit[4] = {
sign_bit[0] * (!exponentPtr[0] ? 1 : 0) * CONVERT_MUL_VALUE +
((!sign_bit[0] && exponentPtr[0]) ? 1 : 0) * CONVERT_MUL_VALUE,
sign_bit[1] * (!exponentPtr[1] ? 1 : 0) * 2048 +
((!sign_bit[1] && exponentPtr[1]) ? 1 : 0) * CONVERT_MUL_VALUE,
sign_bit[2] * (!exponentPtr[2] ? 1 : 0) * CONVERT_MUL_VALUE +
((!sign_bit[2] && exponentPtr[2]) ? 1 : 0) * CONVERT_MUL_VALUE,
sign_bit[3] * (!exponentPtr[3] ? 1 : 0) * 2048 +
((!sign_bit[3] && exponentPtr[3]) ? 1 : 0) * CONVERT_MUL_VALUE,
};
int outputPtr[4] = {0};
uint temp = fraction3Ptr[0] + hidden_bit[0] + sign_bit[0] * CONVERT_ADD_VALUE;
outputPtr[0] = exponentPtr[0] > 1 ? (temp << (exponentPtr[0] - 1)): temp;
temp = fraction3Ptr[1] + hidden_bit[1] + sign_bit[1] * CONVERT_ADD_VALUE;
outputPtr[1] = exponentPtr[1] > 1 ? (temp << (exponentPtr[1] - 1)): temp;
temp = fraction3Ptr[2] + hidden_bit[2] + sign_bit[2] * CONVERT_ADD_VALUE;
outputPtr[2] = exponentPtr[2] > 1 ? (temp << (exponentPtr[2] - 1)): temp;
temp = fraction3Ptr[3] + hidden_bit[3] + sign_bit[3] * CONVERT_ADD_VALUE;
outputPtr[3] = exponentPtr[3] > 1 ? (temp << (exponentPtr[3] - 1)): temp;
des[0] = outputPtr[0];
des[1] = outputPtr[1];
des[2] = outputPtr[2];
des[3] = outputPtr[3];
}
}
@@ -314,7 +412,8 @@ Matrix Aurora::polyval(const Matrix &aP, const Matrix &aX) {
for (int i = 0; i < aX.getDataSize(); ++i) {
vdPowI(aP.getDataSize(), aX.getData() + i, 0, powArg, 1, temp, 1);
vdMul(aP.getDataSize(), aP.getData(), temp, temp);
result[i] = cblas_dasum(3, temp, 1);
Eigen::Map<Eigen::VectorXd> vd(temp,aP.getDataSize());
result[i] = vd.array().sum();
}
delete[] powArg;
delete[] temp;
@@ -653,3 +752,27 @@ void Aurora::padding(Matrix &aMatrix, int aIndex, double aValue)
&aValue,0,newData+aMatrix.getDataSize()*aMatrix.getValueType(),1);
aMatrix = Matrix::New(newData,size,1,1,aMatrix.getValueType());
}
Matrix Aurora::convertfp16tofloat(const Matrix& aMatrix)
{
auto input = aMatrix.getData();
//uint16变换为float(32位)输出大小翻倍
auto output = malloc(aMatrix.getDataSize()*4);
size_t rows = aMatrix.getDataSize()*sizeof(double)/sizeof(short);
size_t total_count = aMatrix.getDataSize();
// #pragma omp parallel for
for (size_t i = 0; i < total_count; i+=8) {
//循环展开以避免过度的线程调用
if (i < total_count)::convertValue2((short*)(input+i), output + (i) * 4);
if (i+1 < total_count)::convertValue2((short*)(input+i+1), output + (i+1) * 4);
if (i+2 < total_count)::convertValue2((short*)(input+i+2), output + (i+2) * 4);
if (i+3 < total_count)::convertValue2((short*)(input+i+3), output + (i+3) * 4);
if (i+4 < total_count)::convertValue2((short*)(input+i+4), output + (i+4) * 4);
if (i+5 < total_count)::convertValue2((short*)(input+i+5), output + (i+5) * 4);
if (i+6 < total_count)::convertValue2((short*)(input+i+6), output + (i+6) * 4);
if (i+7 < total_count)::convertValue2((short*)(input+i+7), output + (i+7) * 4);
}
return Matrix::New(output,aMatrix.getDimSize(0),aMatrix.getDimSize(1),aMatrix.getDimSize(2));
}

View File

@@ -112,6 +112,8 @@ namespace Aurora {
* @param aValue 指定值
*/
void padding(Matrix& aMatrix, int aIndex, double aValue);
Matrix convertfp16tofloat(const Matrix& aMatrix);
};

View File

@@ -1,6 +1,8 @@
#include <cstddef>
#include <gtest/gtest.h>
#include <vector>
#include "MatlabReader.h"
#include "TestUtility.h"
#include "Matrix.h"
@@ -107,14 +109,18 @@ TEST_F(Function1D_Test, interp1) {
}
TEST_F(Function1D_Test, polyval){
double *dataP = new double[3]{3,2,1};
double *dataX = new double[3]{5,7,9};
auto matrixP = Aurora::Matrix::fromRawData(dataP,3);
auto matrixX = Aurora::Matrix::fromRawData(dataX,3);
// double *dataP = new double[3]{3,2,1};
double *dataP = new double[6]{2.78786e-9, -1.398845e-6, 3.287156e-4, -5.799136e-2, 5.038813, 1.402385e3};
// double *dataX = new double[3]{5,7,9};
double *dataX = new double[1]{24.570274572648690};
auto matrixP = Aurora::Matrix::fromRawData(dataP,6);
auto matrixX = Aurora::Matrix::fromRawData(dataX,1);
auto result = Aurora::polyval(matrixP,matrixX);
EXPECT_DOUBLE_EQ(86., result.getData()[0]);
EXPECT_DOUBLE_EQ(162., result.getData()[1]);
EXPECT_DOUBLE_EQ(262., result.getData()[2]);
EXPECT_DOUBLE_EQ(1495.6, result.getData()[0]);
// EXPECT_DOUBLE_EQ(162., result.getData()[1]);
// EXPECT_DOUBLE_EQ(262., result.getData()[2]);
}
TEST_F(Function1D_Test, complexAndEtc){
@@ -501,3 +507,18 @@ TEST_F(Function1D_Test, padding) {
auto result = ma.getData();
}
TEST_F(Function1D_Test, convertfp16tofloat) {
MatlabReader m("/home/krad/TestData/convertReal.mat");
size_t count = 0;
auto input = m.readint16("input",count);
auto ma = Aurora::Matrix::copyFromRawData((double*)input.get(),count/4);
auto resultM = Aurora::convertfp16tofloat(ma);
auto result = resultM.getData();
auto output = m.read("output");
for (size_t i = 0; i<count; i++) {
EXPECT_EQ(result[i], output.getData()[i])<<"index:"<<i<<",input:"<< ((short*)ma.getData())[i]<<",input2:"<<input.get()[i];
}
}