diff --git a/src/Function1D.cpp b/src/Function1D.cpp index b9b1a55..4d4d781 100644 --- a/src/Function1D.cpp +++ b/src/Function1D.cpp @@ -753,26 +753,26 @@ void Aurora::padding(Matrix &aMatrix, int aIndex, double aValue) aMatrix = Matrix::New(newData,size,1,1,aMatrix.getValueType()); } -Matrix Aurora::convertfp16tofloat(const Matrix& aMatrix) +Matrix Aurora::convertfp16tofloat(short* aData, int aRows, int aColumns) { - auto input = aMatrix.getData(); + auto input = aData; + size_t size = aRows*aColumns; + size_t quaterSize = size/4; //uint16变换为float(32位)输出大小翻倍 - auto output = malloc(aMatrix.getDataSize()*4); - size_t rows = aMatrix.getDataSize()*sizeof(double)/sizeof(short); - size_t total_count = aMatrix.getDataSize(); + auto output = malloc(size); - // #pragma omp parallel for - for (size_t i = 0; i < total_count; i+=8) { + #pragma omp parallel for + for (size_t i = 0; i < quaterSize; i+=8) { //循环展开以避免过度的线程调用 - if (i < total_count)::convertValue2((short*)(input+i), output + (i) * 4); - if (i+1 < total_count)::convertValue2((short*)(input+i+1), output + (i+1) * 4); - if (i+2 < total_count)::convertValue2((short*)(input+i+2), output + (i+2) * 4); - if (i+3 < total_count)::convertValue2((short*)(input+i+3), output + (i+3) * 4); - if (i+4 < total_count)::convertValue2((short*)(input+i+4), output + (i+4) * 4); - if (i+5 < total_count)::convertValue2((short*)(input+i+5), output + (i+5) * 4); - if (i+6 < total_count)::convertValue2((short*)(input+i+6), output + (i+6) * 4); - if (i+7 < total_count)::convertValue2((short*)(input+i+7), output + (i+7) * 4); + if (i < quaterSize)::convertValue2((short*)(input+i*4), output + (i) * 4); + if (i+1 < quaterSize)::convertValue2((short*)(input+(i+1)*4), output + (i+1) * 4); + if (i+2 < quaterSize)::convertValue2((short*)(input+(i+2)*4), output + (i+2) * 4); + if (i+3 < quaterSize)::convertValue2((short*)(input+(i+3)*4), output + (i+3) * 4); + if (i+4 < quaterSize)::convertValue2((short*)(input+(i+4)*4), output + (i+4) * 4); + if (i+5 < quaterSize)::convertValue2((short*)(input+(i+5)*4), output + (i+5) * 4); + if (i+6 < quaterSize)::convertValue2((short*)(input+(i+6)*4), output + (i+6) * 4); + if (i+7 < quaterSize)::convertValue2((short*)(input+(i+7)*4), output + (i+7) * 4); } - return Matrix::New(output,aMatrix.getDimSize(0),aMatrix.getDimSize(1),aMatrix.getDimSize(2)); + return Matrix::New(output,aRows,aColumns,1); } diff --git a/src/Function1D.h b/src/Function1D.h index c26dafe..cddd088 100644 --- a/src/Function1D.h +++ b/src/Function1D.h @@ -113,7 +113,7 @@ namespace Aurora { */ void padding(Matrix& aMatrix, int aIndex, double aValue); - Matrix convertfp16tofloat(const Matrix& aMatrix); + Matrix convertfp16tofloat(short* aData, int aRows, int aColumns); }; diff --git a/test/Function1D_Test.cpp b/test/Function1D_Test.cpp index 7949da1..a8b51f2 100644 --- a/test/Function1D_Test.cpp +++ b/test/Function1D_Test.cpp @@ -513,12 +513,14 @@ TEST_F(Function1D_Test, convertfp16tofloat) { size_t count = 0; auto input = m.readint16("input",count); - auto ma = Aurora::Matrix::copyFromRawData((double*)input.get(),count/4); - auto resultM = Aurora::convertfp16tofloat(ma); + for(int i = 0; i<1000; i++){ + auto resultM = Aurora::convertfp16tofloat(input.get(),count,1); + } + auto resultM = Aurora::convertfp16tofloat(input.get(),count,1); auto result = resultM.getData(); auto output = m.read("output"); for (size_t i = 0; i