Change sortrows logic.

This commit is contained in:
kradchen
2023-05-24 11:04:36 +08:00
parent 8b7bff2f00
commit e0aebc7922
3 changed files with 119 additions and 77 deletions

View File

@@ -6,11 +6,14 @@
#include "Function1D.h"
//必须在Eigen之前
#include "AuroraDefs.h"
#include "Function3D.h"
#include "Matrix.h"
#include <Eigen/Core>
#include <Eigen/Eigen>
#include <Eigen/Dense>
#include <iterator>
#include <utility>
using namespace Aurora;
@@ -544,17 +547,17 @@ Matrix Aurora::mean(const Matrix &aMatrix, FunctionDirection direction, bool aIn
}
}
Matrix Aurora::sort(const Matrix &aMatrix) {
Matrix Aurora::sort(const Matrix &aMatrix, FunctionDirection direction) {
if (aMatrix.getDimSize(2)>1 || aMatrix.isComplex()) {
std::cerr
<< (aMatrix.getDimSize(2) > 1 ? "sort() not support 3D data!" : "sort() not support complex value type!")
<< std::endl;
return Matrix();
}
return sort(std::forward<Matrix &&>(aMatrix.deepCopy()));
return sort(std::forward<Matrix &&>(aMatrix.deepCopy()), direction);
}
Matrix Aurora::sort(Matrix &&aMatrix) {
Matrix Aurora::sort(Matrix &&aMatrix, FunctionDirection direction) {
if (aMatrix.getDimSize(2)>1 || aMatrix.isComplex()) {
std::cerr
<< (aMatrix.getDimSize(2) > 1 ? "sort() not support 3D data!" : "sort() not support complex value type!")
@@ -563,58 +566,105 @@ Matrix Aurora::sort(Matrix &&aMatrix) {
}
//针对向量行等于列
if (aMatrix.getDimSize(0)==1){
return sortrows(aMatrix);
direction = Row;
}
if (aMatrix.getDimSize(0)>=100000){
#pragma omp parallel for
for (int i = 0; i < aMatrix.getDimSize(1); ++i) {
Eigen::Map<Eigen::VectorXd> srcV(aMatrix.getData()+i*aMatrix.getDimSize(0),aMatrix.getDimSize(0));
std::sort(srcV.array().begin(),srcV.array().end());
}
}
else
if (direction == Column)
{
for (int i = 0; i < aMatrix.getDimSize(1); ++i) {
Eigen::Map<Eigen::VectorXd> srcV(aMatrix.getData()+i*aMatrix.getDimSize(0),aMatrix.getDimSize(0));
std::sort(srcV.array().begin(),srcV.array().end());
if (aMatrix.getDimSize(0)>=100000){
#pragma omp parallel for
for (int i = 0; i < aMatrix.getDimSize(1); ++i) {
Eigen::Map<Eigen::VectorXd> srcV(aMatrix.getData()+i*aMatrix.getDimSize(0),aMatrix.getDimSize(0));
std::sort(srcV.array().begin(),srcV.array().end());
}
}
else
{
for (int i = 0; i < aMatrix.getDimSize(1); ++i) {
Eigen::Map<Eigen::VectorXd> srcV(aMatrix.getData()+i*aMatrix.getDimSize(0),aMatrix.getDimSize(0));
std::sort(srcV.array().begin(),srcV.array().end());
}
}
}
else if(direction == Row){
Eigen::Map<Eigen::MatrixXd> srcM(aMatrix.getData(),aMatrix.getDimSize(0),aMatrix.getDimSize(1));
if (aMatrix.getDimSize(1)>=100000){
#pragma omp parallel for
for (int i = 0; i < aMatrix.getDimSize(0); ++i) {
std::sort(srcM.row(i).array().begin(),srcM.row(i).array().end());
}
}
else
{
for (int i = 0; i < aMatrix.getDimSize(0); ++i) {
std::sort(srcM.row(i).array().begin(),srcM.row(i).array().end());
}
}
}
else{
std::cerr<<"sort not support all mode!"<<std::endl;
}
return aMatrix;
}
Matrix Aurora::sortrows(const Matrix &aMatrix) {
Matrix Aurora::sortrows(const Matrix &aMatrix, Matrix* indexMatrix) {
if (aMatrix.getDimSize(2)>1 || aMatrix.isComplex()) {
std::cerr
<< (aMatrix.getDimSize(2) > 1 ? "sortrows() not support 3D data!" : "sortrows() not support complex value type!")
<< std::endl;
return Matrix();
}
return sortrows(std::forward<Matrix &&>(aMatrix.deepCopy()));
}
Matrix Aurora::sortrows(Matrix &&aMatrix) {
if (aMatrix.getDimSize(2)>1 || aMatrix.isComplex()) {
std::cerr
<< (aMatrix.getDimSize(2) > 1 ? "sortrows() not support 3D data!" : "sortrows() not support complex value type!")
<< std::endl;
return Matrix();
}
Eigen::Map<Eigen::MatrixXd> srcM(aMatrix.getData(),aMatrix.getDimSize(0),aMatrix.getDimSize(1));
if (aMatrix.getDimSize(1)>=100000){
#pragma omp parallel for
for (int i = 0; i < aMatrix.getDimSize(0); ++i) {
std::sort(srcM.row(i).array().begin(),srcM.row(i).array().end());
}
}
else
auto result = aMatrix.deepCopy();
int rows = aMatrix.getDimSize(0);
std::vector<std::pair<double,int>> col;
std::vector<std::pair<double,int>>::iterator last;
for (size_t j = 0; j < rows; j++)
{
for (int i = 0; i < aMatrix.getDimSize(0); ++i) {
std::sort(srcM.row(i).array().begin(),srcM.row(i).array().end());
}
col.push_back(std::make_pair(aMatrix[j], j));
}
return aMatrix;
std::sort(col.begin(), col.end(),[](auto a,auto b){
return a.first < b.first;
});
last = col.begin();
//按列里边
for (size_t i = 1; i < aMatrix.getDimSize(1); i++)
{
int beginIdx = 0;
bool sameFlag = false;
//遍历已排序数据查找相同值
while(beginIdx < col.size()){
for (int iterIdx = beginIdx+1; iterIdx <= col.size(); iterIdx++)
{
//查找下一个不同值
if(col[iterIdx].first == col[iterIdx-1].first && iterIdx!=col.size()){
//存在相同值
sameFlag = true;
continue;
}
//判断是否需要对相同值进行排序iterIdx-beginIdx==1时代表正常的不同值
if (iterIdx-beginIdx != 1){
//按照新的一列对相同值排序
std::sort(col.begin()+beginIdx, col.begin()+iterIdx,[&aMatrix,i,rows](auto a,auto b){
return aMatrix[a.second+i*rows] < aMatrix[b.second+i*rows];
});
}
beginIdx = iterIdx;
}
//未发现不同值 break 循环
if(!sameFlag) break;
}
//未发现不同值 break 循环
if(!sameFlag) break;
//按照新一列刷新数组值
std::for_each(col.begin(), col.end() , [&aMatrix,i,rows](std::pair<double,int>& a){return a.first=aMatrix[a.second+i*rows];});
}
int i=0;
(*indexMatrix) = zeros(aMatrix.getDimSize(0),1);
std::for_each(col.begin(),col.end(), [&aMatrix,&result,&i,indexMatrix](std::pair<double,int>& a){
result(i,$) = aMatrix(a.second,$);
(*indexMatrix)[i] = a.second;
i++;
});
return result;
}
Matrix Aurora::median(const Matrix &aMatrix) {
@@ -626,7 +676,7 @@ Matrix Aurora::median(const Matrix &aMatrix) {
return Matrix();
}
bool horVector = aMatrix.getDimSize(0)==1;
Matrix sorted = horVector?sortrows(aMatrix):sort(aMatrix);
Matrix sorted = horVector?sort(aMatrix,Row):sort(aMatrix);
int rows = horVector?sorted.getDimSize(1):sorted.getDimSize(0);
int cols = horVector?sorted.getDimSize(0):sorted.getDimSize(1);
Eigen::Map<Eigen::MatrixXd> srcM(sorted.getData(),rows,cols);

View File

@@ -87,32 +87,32 @@ namespace Aurora
Matrix mean(const Matrix &aMatrix, FunctionDirection direction = Column, bool aIncludeNan = true);
/**
* 矩阵排序 按列, 目前不支持三维,不支持复数
* @brief 矩阵排序 按列, 目前不支持三维,不支持复数
*
* @param aMatrix 目标矩阵
* @return 排序后矩阵
* @param direction 排序方向。不支持ALL
* @return Matrix 排序后矩阵
*/
Matrix sort(const Matrix &aMatrix);
Matrix sort(const Matrix &aMatrix,FunctionDirection direction = Column);
/**
* 矩阵排序 按列, 目前不支持三维,不支持复数
* @brief 矩阵排序 按列, 目前不支持三维,不支持复数
*
* @param aMatrix 目标矩阵
* @return 排序后矩阵
* @param direction 排序方向。不支持ALL
* @return Matrix 排序后矩阵
*/
Matrix sort(Matrix &&aMatrix);
Matrix sort(Matrix &&aMatrix,FunctionDirection direction = Column);
/**
* 矩阵排序 按行, 目前不支持三维,不支持复数
* 基于第一列中的元素按升序对矩阵行进行排序。
* 当第一列包含重复的元素时sortrows 会根据下一列中的值进行排序,并对后续的相等值重复此行为。
* @attention 目前不支持三维,不支持复数
* @param aMatrix 目标矩阵
* @param indexMatrix 排序后各行的原索引矩阵指针,非必须
* @return 排序后矩阵
*/
Matrix sortrows(const Matrix &aMatrix);
/**
* 矩阵排序 按行, 目前不支持三维,不支持复数
* @param aMatrix 目标矩阵
* @return 排序后矩阵
*/
Matrix sortrows(Matrix &&aMatrix);
Matrix sortrows(const Matrix &aMatrix, Matrix* indexMatrix=nullptr);
/**
* 对矩阵求中间值 按列, 目前不支持三维,不支持复数

View File

@@ -291,27 +291,19 @@ TEST_F(Function2D_Test, sort) {
}
TEST_F(Function2D_Test, sortrows) {
double *dataB = new double[16]{1.1, 2.6, 6.2, 3.8,
4.3, 10.6, 5.7, 6.9,
7.1, 8.3, 9.7, 11.2,
17.8, 13.3,7 , -7.7};
auto B = Aurora::Matrix::fromRawData(dataB, 4, 4);
auto ret = Aurora::sortrows(B);
EXPECT_EQ(4, ret.getDimSize(0));
EXPECT_EQ(4, ret.getDimSize(1));
EXPECT_DOUBLE_EQ(5.7, ret.getData()[2]);
EXPECT_DOUBLE_EQ(8.3, ret.getData()[5]);
EXPECT_DOUBLE_EQ(17.8, ret.getData()[12]);
ret = Aurora::sortrows(B*5);
EXPECT_EQ(4, ret.getDimSize(0));
EXPECT_EQ(4, ret.getDimSize(1));
EXPECT_DOUBLE_EQ(28.5, ret.getData()[2]);
EXPECT_DOUBLE_EQ(41.5, ret.getData()[5]);
EXPECT_DOUBLE_EQ(89, ret.getData()[12]);
//big sort 10w以上多线程快
double * dataA = Aurora::random(1000000*4);
auto A = Aurora::Matrix::New(dataA, 4, 1000000);
ret = Aurora::sortrows(A);
double *dataB = new double[42]{
95, 27, 95, 79, 67, 70, 69,
95, 7, 48, 95, 75, 3, 31,
95, 7, 48, 65, 74, 27, 95,
95, 7, 14, 3, 39, 4, 3,
76, 15, 42, 84, 65, 9, 43,
76, 97, 91, 93, 17, 82, 38
};
auto B = transpose(Aurora::Matrix::fromRawData(dataB, 7, 6));
Aurora::Matrix idx ;
auto ret = Aurora::sortrows(B,&idx);
ret.printf();
idx.printf();
}
TEST_F(Function2D_Test, median) {