From e1a917609d4255cda264abb0516719e40bdb4a3b Mon Sep 17 00:00:00 2001 From: kradchen Date: Tue, 9 May 2023 16:33:51 +0800 Subject: [PATCH] Fix polyval, add convertfp16toFloat --- src/Function1D.cpp | 125 ++++++++++++++++++++++++++++++++++++++- src/Function1D.h | 2 + test/Function1D_Test.cpp | 35 ++++++++--- 3 files changed, 154 insertions(+), 8 deletions(-) diff --git a/src/Function1D.cpp b/src/Function1D.cpp index ecf21b8..b9b1a55 100644 --- a/src/Function1D.cpp +++ b/src/Function1D.cpp @@ -4,7 +4,10 @@ //必须在Eigen之前 #include "AuroraDefs.h" +#include "Matrix.h" +#include +#include #include #include #include @@ -14,6 +17,7 @@ #include #include #include +#include using namespace Aurora; @@ -22,6 +26,100 @@ namespace { const int REAL_STRIDE = 1; const int SAME_STRIDE = 1; const double VALUE_ONE = 1.0; + const ushort CONVERT_AND_VALUE = 15; + const ushort CONVERT_AND_VALUE_2 = 2047; + const ushort CONVERT_MUL_VALUE = 2048; + + uint CONVERT_ADD_VALUE = UINT32_MAX - 4095; + + inline void convertValue(double aValue ,double* des){ + double value = aValue; + ushort *exponentPtr = (ushort *)&value; + exponentPtr[0] = (exponentPtr[0] >> 11) & CONVERT_AND_VALUE; + exponentPtr[1] = (exponentPtr[1] >> 11) & CONVERT_AND_VALUE; + exponentPtr[2] = (exponentPtr[2] >> 11) & CONVERT_AND_VALUE; + exponentPtr[3] = (exponentPtr[3] >> 11) & CONVERT_AND_VALUE; + double signValue = aValue; + short *signPtr = (short *)&signValue; + uint sign_bit[4] = { + (uint)(signPtr[0] < 0 ? 1 : 0), (uint)(signPtr[1] < 0 ? 1 : 0), + (uint)(signPtr[2] < 0 ? 1 : 0), (uint)(signPtr[3] < 0 ? 1 : 0)}; + double fraction3Value = aValue; + ushort *fraction3Ptr = (ushort *)&fraction3Value; + fraction3Ptr[0] &= CONVERT_AND_VALUE_2; + fraction3Ptr[1] &= CONVERT_AND_VALUE_2; + fraction3Ptr[2] &= CONVERT_AND_VALUE_2; + fraction3Ptr[3] &= CONVERT_AND_VALUE_2; + uint hidden_bit[4] = { + sign_bit[0] * (!exponentPtr[0] ? 1 : 0) * CONVERT_MUL_VALUE + + ((!sign_bit[0] && exponentPtr[0]) ? 1 : 0) * CONVERT_MUL_VALUE, + sign_bit[1] * (!exponentPtr[1] ? 1 : 0) * 2048 + + ((!sign_bit[1] && exponentPtr[1]) ? 1 : 0) * CONVERT_MUL_VALUE, + sign_bit[2] * (!exponentPtr[2] ? 1 : 0) * CONVERT_MUL_VALUE + + ((!sign_bit[2] && exponentPtr[2]) ? 1 : 0) * CONVERT_MUL_VALUE, + sign_bit[3] * (!exponentPtr[3] ? 1 : 0) * 2048 + + ((!sign_bit[3] && exponentPtr[3]) ? 1 : 0) * CONVERT_MUL_VALUE, + }; + int outputPtr[4] = {0}; + uint temp = fraction3Ptr[0] + hidden_bit[0] + sign_bit[0] * CONVERT_ADD_VALUE; + outputPtr[0] = exponentPtr[0] > 1 ? (temp << (exponentPtr[0] - 1)) + : (temp >> std::abs(exponentPtr[0] - 1)); + temp = fraction3Ptr[1] + hidden_bit[1] + sign_bit[1] * CONVERT_ADD_VALUE; + outputPtr[1] = exponentPtr[1] > 1 ? (temp << (exponentPtr[1] - 1)) + : (temp >> std::abs(exponentPtr[1] - 1)); + temp = fraction3Ptr[2] + hidden_bit[2] + sign_bit[2] * CONVERT_ADD_VALUE; + outputPtr[2] = exponentPtr[2] > 1 ? (temp << (exponentPtr[2] - 1)) + : (temp >> std::abs(exponentPtr[2] - 1)); + temp = fraction3Ptr[3] + hidden_bit[3] + sign_bit[3] * CONVERT_ADD_VALUE; + outputPtr[3] = exponentPtr[3] > 1 ? (temp << (exponentPtr[3] - 1)) + : (temp >> std::abs(exponentPtr[3] - 1)); + des[0] = outputPtr[0]; + des[1] = outputPtr[1]; + des[2] = outputPtr[2]; + des[3] = outputPtr[3]; + + } + + inline void convertValue2(short* aValue ,double* des){ + ushort exponentPtr[4] = {(ushort)aValue[0],(ushort)aValue[1],(ushort)aValue[2],(ushort)aValue[3]}; + exponentPtr[0] = (exponentPtr[0] >> 11) & CONVERT_AND_VALUE; + exponentPtr[1] = (exponentPtr[1] >> 11) & CONVERT_AND_VALUE; + exponentPtr[2] = (exponentPtr[2] >> 11) & CONVERT_AND_VALUE; + exponentPtr[3] = (exponentPtr[3] >> 11) & CONVERT_AND_VALUE; + short signPtr [4] = {aValue[0],aValue[1],aValue[2],aValue[3]}; + uint sign_bit[4] = { + (uint)(signPtr[0] < 0 ? 1 : 0), (uint)(signPtr[1] < 0 ? 1 : 0), + (uint)(signPtr[2] < 0 ? 1 : 0), (uint)(signPtr[3] < 0 ? 1 : 0)}; + ushort fraction3Ptr[4] = {(ushort)aValue[0],(ushort)aValue[1],(ushort)aValue[2],(ushort)aValue[3]}; + fraction3Ptr[0] &= CONVERT_AND_VALUE_2; + fraction3Ptr[1] &= CONVERT_AND_VALUE_2; + fraction3Ptr[2] &= CONVERT_AND_VALUE_2; + fraction3Ptr[3] &= CONVERT_AND_VALUE_2; + uint hidden_bit[4] = { + sign_bit[0] * (!exponentPtr[0] ? 1 : 0) * CONVERT_MUL_VALUE + + ((!sign_bit[0] && exponentPtr[0]) ? 1 : 0) * CONVERT_MUL_VALUE, + sign_bit[1] * (!exponentPtr[1] ? 1 : 0) * 2048 + + ((!sign_bit[1] && exponentPtr[1]) ? 1 : 0) * CONVERT_MUL_VALUE, + sign_bit[2] * (!exponentPtr[2] ? 1 : 0) * CONVERT_MUL_VALUE + + ((!sign_bit[2] && exponentPtr[2]) ? 1 : 0) * CONVERT_MUL_VALUE, + sign_bit[3] * (!exponentPtr[3] ? 1 : 0) * 2048 + + ((!sign_bit[3] && exponentPtr[3]) ? 1 : 0) * CONVERT_MUL_VALUE, + }; + int outputPtr[4] = {0}; + uint temp = fraction3Ptr[0] + hidden_bit[0] + sign_bit[0] * CONVERT_ADD_VALUE; + outputPtr[0] = exponentPtr[0] > 1 ? (temp << (exponentPtr[0] - 1)): temp; + temp = fraction3Ptr[1] + hidden_bit[1] + sign_bit[1] * CONVERT_ADD_VALUE; + outputPtr[1] = exponentPtr[1] > 1 ? (temp << (exponentPtr[1] - 1)): temp; + temp = fraction3Ptr[2] + hidden_bit[2] + sign_bit[2] * CONVERT_ADD_VALUE; + outputPtr[2] = exponentPtr[2] > 1 ? (temp << (exponentPtr[2] - 1)): temp; + temp = fraction3Ptr[3] + hidden_bit[3] + sign_bit[3] * CONVERT_ADD_VALUE; + outputPtr[3] = exponentPtr[3] > 1 ? (temp << (exponentPtr[3] - 1)): temp; + des[0] = outputPtr[0]; + des[1] = outputPtr[1]; + des[2] = outputPtr[2]; + des[3] = outputPtr[3]; + + } } @@ -314,7 +412,8 @@ Matrix Aurora::polyval(const Matrix &aP, const Matrix &aX) { for (int i = 0; i < aX.getDataSize(); ++i) { vdPowI(aP.getDataSize(), aX.getData() + i, 0, powArg, 1, temp, 1); vdMul(aP.getDataSize(), aP.getData(), temp, temp); - result[i] = cblas_dasum(3, temp, 1); + Eigen::Map vd(temp,aP.getDataSize()); + result[i] = vd.array().sum(); } delete[] powArg; delete[] temp; @@ -653,3 +752,27 @@ void Aurora::padding(Matrix &aMatrix, int aIndex, double aValue) &aValue,0,newData+aMatrix.getDataSize()*aMatrix.getValueType(),1); aMatrix = Matrix::New(newData,size,1,1,aMatrix.getValueType()); } + +Matrix Aurora::convertfp16tofloat(const Matrix& aMatrix) +{ + + auto input = aMatrix.getData(); + //uint16变换为float(32位)输出大小翻倍 + auto output = malloc(aMatrix.getDataSize()*4); + size_t rows = aMatrix.getDataSize()*sizeof(double)/sizeof(short); + size_t total_count = aMatrix.getDataSize(); + + // #pragma omp parallel for + for (size_t i = 0; i < total_count; i+=8) { + //循环展开以避免过度的线程调用 + if (i < total_count)::convertValue2((short*)(input+i), output + (i) * 4); + if (i+1 < total_count)::convertValue2((short*)(input+i+1), output + (i+1) * 4); + if (i+2 < total_count)::convertValue2((short*)(input+i+2), output + (i+2) * 4); + if (i+3 < total_count)::convertValue2((short*)(input+i+3), output + (i+3) * 4); + if (i+4 < total_count)::convertValue2((short*)(input+i+4), output + (i+4) * 4); + if (i+5 < total_count)::convertValue2((short*)(input+i+5), output + (i+5) * 4); + if (i+6 < total_count)::convertValue2((short*)(input+i+6), output + (i+6) * 4); + if (i+7 < total_count)::convertValue2((short*)(input+i+7), output + (i+7) * 4); + } + return Matrix::New(output,aMatrix.getDimSize(0),aMatrix.getDimSize(1),aMatrix.getDimSize(2)); +} diff --git a/src/Function1D.h b/src/Function1D.h index 94b2f64..c26dafe 100644 --- a/src/Function1D.h +++ b/src/Function1D.h @@ -112,6 +112,8 @@ namespace Aurora { * @param aValue 指定值 */ void padding(Matrix& aMatrix, int aIndex, double aValue); + + Matrix convertfp16tofloat(const Matrix& aMatrix); }; diff --git a/test/Function1D_Test.cpp b/test/Function1D_Test.cpp index 6fae939..7949da1 100644 --- a/test/Function1D_Test.cpp +++ b/test/Function1D_Test.cpp @@ -1,6 +1,8 @@ +#include #include #include +#include "MatlabReader.h" #include "TestUtility.h" #include "Matrix.h" @@ -107,14 +109,18 @@ TEST_F(Function1D_Test, interp1) { } TEST_F(Function1D_Test, polyval){ - double *dataP = new double[3]{3,2,1}; - double *dataX = new double[3]{5,7,9}; - auto matrixP = Aurora::Matrix::fromRawData(dataP,3); - auto matrixX = Aurora::Matrix::fromRawData(dataX,3); + // double *dataP = new double[3]{3,2,1}; + double *dataP = new double[6]{2.78786e-9, -1.398845e-6, 3.287156e-4, -5.799136e-2, 5.038813, 1.402385e3}; + // double *dataX = new double[3]{5,7,9}; + double *dataX = new double[1]{24.570274572648690}; + + + auto matrixP = Aurora::Matrix::fromRawData(dataP,6); + auto matrixX = Aurora::Matrix::fromRawData(dataX,1); auto result = Aurora::polyval(matrixP,matrixX); - EXPECT_DOUBLE_EQ(86., result.getData()[0]); - EXPECT_DOUBLE_EQ(162., result.getData()[1]); - EXPECT_DOUBLE_EQ(262., result.getData()[2]); + EXPECT_DOUBLE_EQ(1495.6, result.getData()[0]); + // EXPECT_DOUBLE_EQ(162., result.getData()[1]); + // EXPECT_DOUBLE_EQ(262., result.getData()[2]); } TEST_F(Function1D_Test, complexAndEtc){ @@ -501,3 +507,18 @@ TEST_F(Function1D_Test, padding) { auto result = ma.getData(); } + +TEST_F(Function1D_Test, convertfp16tofloat) { + MatlabReader m("/home/krad/TestData/convertReal.mat"); + + size_t count = 0; + auto input = m.readint16("input",count); + auto ma = Aurora::Matrix::copyFromRawData((double*)input.get(),count/4); + auto resultM = Aurora::convertfp16tofloat(ma); + auto result = resultM.getData(); + auto output = m.read("output"); + for (size_t i = 0; i