Fix polyval, add convertfp16toFloat
This commit is contained in:
@@ -4,7 +4,10 @@
|
||||
|
||||
//必须在Eigen之前
|
||||
#include "AuroraDefs.h"
|
||||
#include "Matrix.h"
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <cstring>
|
||||
#include <cmath>
|
||||
#include <iostream>
|
||||
@@ -14,6 +17,7 @@
|
||||
#include <Eigen/Dense>
|
||||
#include <Eigen/SVD>
|
||||
#include <mkl_lapack.h>
|
||||
#include <sys/types.h>
|
||||
|
||||
using namespace Aurora;
|
||||
|
||||
@@ -22,6 +26,100 @@ namespace {
|
||||
const int REAL_STRIDE = 1;
|
||||
const int SAME_STRIDE = 1;
|
||||
const double VALUE_ONE = 1.0;
|
||||
const ushort CONVERT_AND_VALUE = 15;
|
||||
const ushort CONVERT_AND_VALUE_2 = 2047;
|
||||
const ushort CONVERT_MUL_VALUE = 2048;
|
||||
|
||||
uint CONVERT_ADD_VALUE = UINT32_MAX - 4095;
|
||||
|
||||
inline void convertValue(double aValue ,double* des){
|
||||
double value = aValue;
|
||||
ushort *exponentPtr = (ushort *)&value;
|
||||
exponentPtr[0] = (exponentPtr[0] >> 11) & CONVERT_AND_VALUE;
|
||||
exponentPtr[1] = (exponentPtr[1] >> 11) & CONVERT_AND_VALUE;
|
||||
exponentPtr[2] = (exponentPtr[2] >> 11) & CONVERT_AND_VALUE;
|
||||
exponentPtr[3] = (exponentPtr[3] >> 11) & CONVERT_AND_VALUE;
|
||||
double signValue = aValue;
|
||||
short *signPtr = (short *)&signValue;
|
||||
uint sign_bit[4] = {
|
||||
(uint)(signPtr[0] < 0 ? 1 : 0), (uint)(signPtr[1] < 0 ? 1 : 0),
|
||||
(uint)(signPtr[2] < 0 ? 1 : 0), (uint)(signPtr[3] < 0 ? 1 : 0)};
|
||||
double fraction3Value = aValue;
|
||||
ushort *fraction3Ptr = (ushort *)&fraction3Value;
|
||||
fraction3Ptr[0] &= CONVERT_AND_VALUE_2;
|
||||
fraction3Ptr[1] &= CONVERT_AND_VALUE_2;
|
||||
fraction3Ptr[2] &= CONVERT_AND_VALUE_2;
|
||||
fraction3Ptr[3] &= CONVERT_AND_VALUE_2;
|
||||
uint hidden_bit[4] = {
|
||||
sign_bit[0] * (!exponentPtr[0] ? 1 : 0) * CONVERT_MUL_VALUE +
|
||||
((!sign_bit[0] && exponentPtr[0]) ? 1 : 0) * CONVERT_MUL_VALUE,
|
||||
sign_bit[1] * (!exponentPtr[1] ? 1 : 0) * 2048 +
|
||||
((!sign_bit[1] && exponentPtr[1]) ? 1 : 0) * CONVERT_MUL_VALUE,
|
||||
sign_bit[2] * (!exponentPtr[2] ? 1 : 0) * CONVERT_MUL_VALUE +
|
||||
((!sign_bit[2] && exponentPtr[2]) ? 1 : 0) * CONVERT_MUL_VALUE,
|
||||
sign_bit[3] * (!exponentPtr[3] ? 1 : 0) * 2048 +
|
||||
((!sign_bit[3] && exponentPtr[3]) ? 1 : 0) * CONVERT_MUL_VALUE,
|
||||
};
|
||||
int outputPtr[4] = {0};
|
||||
uint temp = fraction3Ptr[0] + hidden_bit[0] + sign_bit[0] * CONVERT_ADD_VALUE;
|
||||
outputPtr[0] = exponentPtr[0] > 1 ? (temp << (exponentPtr[0] - 1))
|
||||
: (temp >> std::abs(exponentPtr[0] - 1));
|
||||
temp = fraction3Ptr[1] + hidden_bit[1] + sign_bit[1] * CONVERT_ADD_VALUE;
|
||||
outputPtr[1] = exponentPtr[1] > 1 ? (temp << (exponentPtr[1] - 1))
|
||||
: (temp >> std::abs(exponentPtr[1] - 1));
|
||||
temp = fraction3Ptr[2] + hidden_bit[2] + sign_bit[2] * CONVERT_ADD_VALUE;
|
||||
outputPtr[2] = exponentPtr[2] > 1 ? (temp << (exponentPtr[2] - 1))
|
||||
: (temp >> std::abs(exponentPtr[2] - 1));
|
||||
temp = fraction3Ptr[3] + hidden_bit[3] + sign_bit[3] * CONVERT_ADD_VALUE;
|
||||
outputPtr[3] = exponentPtr[3] > 1 ? (temp << (exponentPtr[3] - 1))
|
||||
: (temp >> std::abs(exponentPtr[3] - 1));
|
||||
des[0] = outputPtr[0];
|
||||
des[1] = outputPtr[1];
|
||||
des[2] = outputPtr[2];
|
||||
des[3] = outputPtr[3];
|
||||
|
||||
}
|
||||
|
||||
inline void convertValue2(short* aValue ,double* des){
|
||||
ushort exponentPtr[4] = {(ushort)aValue[0],(ushort)aValue[1],(ushort)aValue[2],(ushort)aValue[3]};
|
||||
exponentPtr[0] = (exponentPtr[0] >> 11) & CONVERT_AND_VALUE;
|
||||
exponentPtr[1] = (exponentPtr[1] >> 11) & CONVERT_AND_VALUE;
|
||||
exponentPtr[2] = (exponentPtr[2] >> 11) & CONVERT_AND_VALUE;
|
||||
exponentPtr[3] = (exponentPtr[3] >> 11) & CONVERT_AND_VALUE;
|
||||
short signPtr [4] = {aValue[0],aValue[1],aValue[2],aValue[3]};
|
||||
uint sign_bit[4] = {
|
||||
(uint)(signPtr[0] < 0 ? 1 : 0), (uint)(signPtr[1] < 0 ? 1 : 0),
|
||||
(uint)(signPtr[2] < 0 ? 1 : 0), (uint)(signPtr[3] < 0 ? 1 : 0)};
|
||||
ushort fraction3Ptr[4] = {(ushort)aValue[0],(ushort)aValue[1],(ushort)aValue[2],(ushort)aValue[3]};
|
||||
fraction3Ptr[0] &= CONVERT_AND_VALUE_2;
|
||||
fraction3Ptr[1] &= CONVERT_AND_VALUE_2;
|
||||
fraction3Ptr[2] &= CONVERT_AND_VALUE_2;
|
||||
fraction3Ptr[3] &= CONVERT_AND_VALUE_2;
|
||||
uint hidden_bit[4] = {
|
||||
sign_bit[0] * (!exponentPtr[0] ? 1 : 0) * CONVERT_MUL_VALUE +
|
||||
((!sign_bit[0] && exponentPtr[0]) ? 1 : 0) * CONVERT_MUL_VALUE,
|
||||
sign_bit[1] * (!exponentPtr[1] ? 1 : 0) * 2048 +
|
||||
((!sign_bit[1] && exponentPtr[1]) ? 1 : 0) * CONVERT_MUL_VALUE,
|
||||
sign_bit[2] * (!exponentPtr[2] ? 1 : 0) * CONVERT_MUL_VALUE +
|
||||
((!sign_bit[2] && exponentPtr[2]) ? 1 : 0) * CONVERT_MUL_VALUE,
|
||||
sign_bit[3] * (!exponentPtr[3] ? 1 : 0) * 2048 +
|
||||
((!sign_bit[3] && exponentPtr[3]) ? 1 : 0) * CONVERT_MUL_VALUE,
|
||||
};
|
||||
int outputPtr[4] = {0};
|
||||
uint temp = fraction3Ptr[0] + hidden_bit[0] + sign_bit[0] * CONVERT_ADD_VALUE;
|
||||
outputPtr[0] = exponentPtr[0] > 1 ? (temp << (exponentPtr[0] - 1)): temp;
|
||||
temp = fraction3Ptr[1] + hidden_bit[1] + sign_bit[1] * CONVERT_ADD_VALUE;
|
||||
outputPtr[1] = exponentPtr[1] > 1 ? (temp << (exponentPtr[1] - 1)): temp;
|
||||
temp = fraction3Ptr[2] + hidden_bit[2] + sign_bit[2] * CONVERT_ADD_VALUE;
|
||||
outputPtr[2] = exponentPtr[2] > 1 ? (temp << (exponentPtr[2] - 1)): temp;
|
||||
temp = fraction3Ptr[3] + hidden_bit[3] + sign_bit[3] * CONVERT_ADD_VALUE;
|
||||
outputPtr[3] = exponentPtr[3] > 1 ? (temp << (exponentPtr[3] - 1)): temp;
|
||||
des[0] = outputPtr[0];
|
||||
des[1] = outputPtr[1];
|
||||
des[2] = outputPtr[2];
|
||||
des[3] = outputPtr[3];
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -314,7 +412,8 @@ Matrix Aurora::polyval(const Matrix &aP, const Matrix &aX) {
|
||||
for (int i = 0; i < aX.getDataSize(); ++i) {
|
||||
vdPowI(aP.getDataSize(), aX.getData() + i, 0, powArg, 1, temp, 1);
|
||||
vdMul(aP.getDataSize(), aP.getData(), temp, temp);
|
||||
result[i] = cblas_dasum(3, temp, 1);
|
||||
Eigen::Map<Eigen::VectorXd> vd(temp,aP.getDataSize());
|
||||
result[i] = vd.array().sum();
|
||||
}
|
||||
delete[] powArg;
|
||||
delete[] temp;
|
||||
@@ -653,3 +752,27 @@ void Aurora::padding(Matrix &aMatrix, int aIndex, double aValue)
|
||||
&aValue,0,newData+aMatrix.getDataSize()*aMatrix.getValueType(),1);
|
||||
aMatrix = Matrix::New(newData,size,1,1,aMatrix.getValueType());
|
||||
}
|
||||
|
||||
Matrix Aurora::convertfp16tofloat(const Matrix& aMatrix)
|
||||
{
|
||||
|
||||
auto input = aMatrix.getData();
|
||||
//uint16变换为float(32位)输出大小翻倍
|
||||
auto output = malloc(aMatrix.getDataSize()*4);
|
||||
size_t rows = aMatrix.getDataSize()*sizeof(double)/sizeof(short);
|
||||
size_t total_count = aMatrix.getDataSize();
|
||||
|
||||
// #pragma omp parallel for
|
||||
for (size_t i = 0; i < total_count; i+=8) {
|
||||
//循环展开以避免过度的线程调用
|
||||
if (i < total_count)::convertValue2((short*)(input+i), output + (i) * 4);
|
||||
if (i+1 < total_count)::convertValue2((short*)(input+i+1), output + (i+1) * 4);
|
||||
if (i+2 < total_count)::convertValue2((short*)(input+i+2), output + (i+2) * 4);
|
||||
if (i+3 < total_count)::convertValue2((short*)(input+i+3), output + (i+3) * 4);
|
||||
if (i+4 < total_count)::convertValue2((short*)(input+i+4), output + (i+4) * 4);
|
||||
if (i+5 < total_count)::convertValue2((short*)(input+i+5), output + (i+5) * 4);
|
||||
if (i+6 < total_count)::convertValue2((short*)(input+i+6), output + (i+6) * 4);
|
||||
if (i+7 < total_count)::convertValue2((short*)(input+i+7), output + (i+7) * 4);
|
||||
}
|
||||
return Matrix::New(output,aMatrix.getDimSize(0),aMatrix.getDimSize(1),aMatrix.getDimSize(2));
|
||||
}
|
||||
|
||||
@@ -112,6 +112,8 @@ namespace Aurora {
|
||||
* @param aValue 指定值
|
||||
*/
|
||||
void padding(Matrix& aMatrix, int aIndex, double aValue);
|
||||
|
||||
Matrix convertfp16tofloat(const Matrix& aMatrix);
|
||||
};
|
||||
|
||||
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
#include <cstddef>
|
||||
#include <gtest/gtest.h>
|
||||
#include <vector>
|
||||
|
||||
#include "MatlabReader.h"
|
||||
#include "TestUtility.h"
|
||||
|
||||
#include "Matrix.h"
|
||||
@@ -107,14 +109,18 @@ TEST_F(Function1D_Test, interp1) {
|
||||
}
|
||||
|
||||
TEST_F(Function1D_Test, polyval){
|
||||
double *dataP = new double[3]{3,2,1};
|
||||
double *dataX = new double[3]{5,7,9};
|
||||
auto matrixP = Aurora::Matrix::fromRawData(dataP,3);
|
||||
auto matrixX = Aurora::Matrix::fromRawData(dataX,3);
|
||||
// double *dataP = new double[3]{3,2,1};
|
||||
double *dataP = new double[6]{2.78786e-9, -1.398845e-6, 3.287156e-4, -5.799136e-2, 5.038813, 1.402385e3};
|
||||
// double *dataX = new double[3]{5,7,9};
|
||||
double *dataX = new double[1]{24.570274572648690};
|
||||
|
||||
|
||||
auto matrixP = Aurora::Matrix::fromRawData(dataP,6);
|
||||
auto matrixX = Aurora::Matrix::fromRawData(dataX,1);
|
||||
auto result = Aurora::polyval(matrixP,matrixX);
|
||||
EXPECT_DOUBLE_EQ(86., result.getData()[0]);
|
||||
EXPECT_DOUBLE_EQ(162., result.getData()[1]);
|
||||
EXPECT_DOUBLE_EQ(262., result.getData()[2]);
|
||||
EXPECT_DOUBLE_EQ(1495.6, result.getData()[0]);
|
||||
// EXPECT_DOUBLE_EQ(162., result.getData()[1]);
|
||||
// EXPECT_DOUBLE_EQ(262., result.getData()[2]);
|
||||
}
|
||||
|
||||
TEST_F(Function1D_Test, complexAndEtc){
|
||||
@@ -501,3 +507,18 @@ TEST_F(Function1D_Test, padding) {
|
||||
auto result = ma.getData();
|
||||
|
||||
}
|
||||
|
||||
TEST_F(Function1D_Test, convertfp16tofloat) {
|
||||
MatlabReader m("/home/krad/TestData/convertReal.mat");
|
||||
|
||||
size_t count = 0;
|
||||
auto input = m.readint16("input",count);
|
||||
auto ma = Aurora::Matrix::copyFromRawData((double*)input.get(),count/4);
|
||||
auto resultM = Aurora::convertfp16tofloat(ma);
|
||||
auto result = resultM.getData();
|
||||
auto output = m.read("output");
|
||||
for (size_t i = 0; i<count; i++) {
|
||||
EXPECT_EQ(result[i], output.getData()[i])<<"index:"<<i<<",input:"<< ((short*)ma.getData())[i]<<",input2:"<<input.get()[i];
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user