451 lines
16 KiB
C++
451 lines
16 KiB
C++
// The code was adapted from Eigen/src/Eigenvaleus/RealSchur.h
|
|
//
|
|
// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
|
|
// Copyright (C) 2010,2012 Jitse Niesen <jitse@maths.leeds.ac.uk>
|
|
// Copyright (C) 2021-2022 Yixuan Qiu <yixuan.qiu@cos.name>
|
|
//
|
|
// This Source Code Form is subject to the terms of the Mozilla
|
|
// Public License v. 2.0. If a copy of the MPL was not distributed
|
|
// with this file, You can obtain one at https://mozilla.org/MPL/2.0/.
|
|
|
|
#ifndef SPECTRA_UPPER_HESSENBERG_SCHUR_H
|
|
#define SPECTRA_UPPER_HESSENBERG_SCHUR_H
|
|
|
|
#include <Eigen/Core>
|
|
#include <Eigen/Jacobi>
|
|
#include <Eigen/Householder>
|
|
#include <stdexcept>
|
|
|
|
#include "../Util/TypeTraits.h"
|
|
|
|
namespace Spectra {
|
|
|
|
template <typename Scalar = double>
|
|
class UpperHessenbergSchur
|
|
{
|
|
private:
|
|
using Index = Eigen::Index;
|
|
using Matrix = Eigen::Matrix<Scalar, Eigen::Dynamic, Eigen::Dynamic>;
|
|
using Vector = Eigen::Matrix<Scalar, Eigen::Dynamic, 1>;
|
|
using Vector2s = Eigen::Matrix<Scalar, 2, 1>;
|
|
using Vector3s = Eigen::Matrix<Scalar, 3, 1>;
|
|
using GenericMatrix = Eigen::Ref<Matrix>;
|
|
using ConstGenericMatrix = const Eigen::Ref<const Matrix>;
|
|
|
|
Index m_n; // Size of the matrix
|
|
Matrix m_T; // T matrix, A = UTU'
|
|
Matrix m_U; // U matrix, A = UTU'
|
|
bool m_computed;
|
|
|
|
// L1 norm of an upper Hessenberg matrix
|
|
static Scalar upper_hessenberg_l1_norm(ConstGenericMatrix& x)
|
|
{
|
|
const Index n = x.cols();
|
|
Scalar norm(0);
|
|
for (Index j = 0; j < n; j++)
|
|
norm += x.col(j).segment(0, (std::min)(n, j + 2)).cwiseAbs().sum();
|
|
return norm;
|
|
}
|
|
|
|
// Look for single small sub-diagonal element and returns its index
|
|
Index find_small_subdiag(Index iu, const Scalar& near_0) const
|
|
{
|
|
using std::abs;
|
|
|
|
const Scalar eps = Eigen::NumTraits<Scalar>::epsilon();
|
|
Index res = iu;
|
|
while (res > 0)
|
|
{
|
|
Scalar s = abs(m_T.coeff(res - 1, res - 1)) + abs(m_T.coeff(res, res));
|
|
s = Eigen::numext::maxi<Scalar>(s * eps, near_0);
|
|
if (abs(m_T.coeff(res, res - 1)) <= s)
|
|
break;
|
|
res--;
|
|
}
|
|
|
|
return res;
|
|
}
|
|
|
|
// Update T given that rows iu-1 and iu decouple from the rest
|
|
void split_off_two_rows(Index iu, const Scalar& ex_shift)
|
|
{
|
|
using std::sqrt;
|
|
using std::abs;
|
|
|
|
// The eigenvalues of the 2x2 matrix [a b; c d] are
|
|
// trace +/- sqrt(discr/4) where discr = tr^2 - 4*det, tr = a + d, det = ad - bc
|
|
Scalar p = Scalar(0.5) * (m_T.coeff(iu - 1, iu - 1) - m_T.coeff(iu, iu));
|
|
Scalar q = p * p + m_T.coeff(iu, iu - 1) * m_T.coeff(iu - 1, iu); // q = tr^2 / 4 - det = discr/4
|
|
m_T.coeffRef(iu, iu) += ex_shift;
|
|
m_T.coeffRef(iu - 1, iu - 1) += ex_shift;
|
|
|
|
if (q >= Scalar(0)) // Two real eigenvalues
|
|
{
|
|
Scalar z = sqrt(abs(q));
|
|
Eigen::JacobiRotation<Scalar> rot;
|
|
rot.makeGivens((p >= Scalar(0)) ? (p + z) : (p - z), m_T.coeff(iu, iu - 1));
|
|
m_T.rightCols(m_n - iu + 1).applyOnTheLeft(iu - 1, iu, rot.adjoint());
|
|
m_T.topRows(iu + 1).applyOnTheRight(iu - 1, iu, rot);
|
|
m_T.coeffRef(iu, iu - 1) = Scalar(0);
|
|
m_U.applyOnTheRight(iu - 1, iu, rot);
|
|
}
|
|
if (iu > 1)
|
|
m_T.coeffRef(iu - 1, iu - 2) = Scalar(0);
|
|
}
|
|
|
|
// Form shift in shift_info, and update ex_shift if an exceptional shift is performed
|
|
void compute_shift(Index iu, Index iter, Scalar& ex_shift, Vector3s& shift_info)
|
|
{
|
|
using std::sqrt;
|
|
using std::abs;
|
|
|
|
shift_info.coeffRef(0) = m_T.coeff(iu, iu);
|
|
shift_info.coeffRef(1) = m_T.coeff(iu - 1, iu - 1);
|
|
shift_info.coeffRef(2) = m_T.coeff(iu, iu - 1) * m_T.coeff(iu - 1, iu);
|
|
|
|
// Wilkinson's original ad hoc shift
|
|
if (iter == 10)
|
|
{
|
|
ex_shift += shift_info.coeff(0);
|
|
for (Index i = 0; i <= iu; ++i)
|
|
m_T.coeffRef(i, i) -= shift_info.coeff(0);
|
|
Scalar s = abs(m_T.coeff(iu, iu - 1)) + abs(m_T.coeff(iu - 1, iu - 2));
|
|
shift_info.coeffRef(0) = Scalar(0.75) * s;
|
|
shift_info.coeffRef(1) = Scalar(0.75) * s;
|
|
shift_info.coeffRef(2) = Scalar(-0.4375) * s * s;
|
|
}
|
|
|
|
// MATLAB's new ad hoc shift
|
|
if (iter == 30)
|
|
{
|
|
Scalar s = (shift_info.coeff(1) - shift_info.coeff(0)) / Scalar(2);
|
|
s = s * s + shift_info.coeff(2);
|
|
if (s > Scalar(0))
|
|
{
|
|
s = sqrt(s);
|
|
if (shift_info.coeff(1) < shift_info.coeff(0))
|
|
s = -s;
|
|
s = s + (shift_info.coeff(1) - shift_info.coeff(0)) / Scalar(2);
|
|
s = shift_info.coeff(0) - shift_info.coeff(2) / s;
|
|
ex_shift += s;
|
|
for (Index i = 0; i <= iu; ++i)
|
|
m_T.coeffRef(i, i) -= s;
|
|
shift_info.setConstant(Scalar(0.964));
|
|
}
|
|
}
|
|
}
|
|
|
|
// Compute index im at which Francis QR step starts and the first Householder vector
|
|
void init_francis_qr_step(Index il, Index iu, const Vector3s& shift_info, Index& im, Vector3s& first_householder_vec) const
|
|
{
|
|
using std::abs;
|
|
|
|
const Scalar eps = Eigen::NumTraits<Scalar>::epsilon();
|
|
Vector3s& v = first_householder_vec; // alias to save typing
|
|
for (im = iu - 2; im >= il; --im)
|
|
{
|
|
const Scalar Tmm = m_T.coeff(im, im);
|
|
const Scalar r = shift_info.coeff(0) - Tmm;
|
|
const Scalar s = shift_info.coeff(1) - Tmm;
|
|
v.coeffRef(0) = (r * s - shift_info.coeff(2)) / m_T.coeff(im + 1, im) + m_T.coeff(im, im + 1);
|
|
v.coeffRef(1) = m_T.coeff(im + 1, im + 1) - Tmm - r - s;
|
|
v.coeffRef(2) = m_T.coeff(im + 2, im + 1);
|
|
if (im == il)
|
|
break;
|
|
const Scalar lhs = m_T.coeff(im, im - 1) * (abs(v.coeff(1)) + abs(v.coeff(2)));
|
|
const Scalar rhs = v.coeff(0) * (abs(m_T.coeff(im - 1, im - 1)) + abs(Tmm) + abs(m_T.coeff(im + 1, im + 1)));
|
|
if (abs(lhs) < eps * rhs)
|
|
break;
|
|
}
|
|
}
|
|
|
|
// P = I - tau * v * v' = P'
|
|
// PX = X - tau * v * (v'X), X [3 x c]
|
|
static void apply_householder_left(const Vector2s& ess, const Scalar& tau, Scalar* x, Index ncol, Index stride)
|
|
{
|
|
const Scalar v1 = ess.coeff(0), v2 = ess.coeff(1);
|
|
const Scalar* const x_end = x + ncol * stride;
|
|
for (; x < x_end; x += stride)
|
|
{
|
|
const Scalar tvx = tau * (x[0] + v1 * x[1] + v2 * x[2]);
|
|
x[0] -= tvx;
|
|
x[1] -= tvx * v1;
|
|
x[2] -= tvx * v2;
|
|
}
|
|
}
|
|
|
|
// P = I - tau * v * v' = P'
|
|
// XP = X - tau * (X * v) * v', X [r x 3]
|
|
static void apply_householder_right(const Vector2s& ess, const Scalar& tau, Scalar* x, Index nrow, Index stride)
|
|
{
|
|
const Scalar v1 = ess.coeff(0), v2 = ess.coeff(1);
|
|
Scalar* x0 = x;
|
|
Scalar* x1 = x + stride;
|
|
Scalar* x2 = x1 + stride;
|
|
for (Index i = 0; i < nrow; i++)
|
|
{
|
|
const Scalar txv = tau * (x0[i] + v1 * x1[i] + v2 * x2[i]);
|
|
x0[i] -= txv;
|
|
x1[i] -= txv * v1;
|
|
x2[i] -= txv * v2;
|
|
}
|
|
}
|
|
|
|
// SIMD version of apply_householder_right()
|
|
// Inspired by apply_rotation_in_the_plane_selector() in Eigen/src/Jacobi/Jacobi.h
|
|
static void apply_householder_right_simd(const Vector2s& ess, const Scalar& tau, Scalar* x, Index nrow, Index stride)
|
|
{
|
|
// Packet type
|
|
using Eigen::internal::ploadu;
|
|
using Eigen::internal::pstoreu;
|
|
using Eigen::internal::pset1;
|
|
using Eigen::internal::padd;
|
|
using Eigen::internal::psub;
|
|
using Eigen::internal::pmul;
|
|
using Packet = typename Eigen::internal::packet_traits<Scalar>::type;
|
|
constexpr unsigned char PacketSize = Eigen::internal::packet_traits<Scalar>::size;
|
|
constexpr unsigned char Peeling = 2;
|
|
constexpr unsigned char Increment = Peeling * PacketSize;
|
|
|
|
// Column heads
|
|
Scalar* x0 = x;
|
|
Scalar* x1 = x + stride;
|
|
Scalar* x2 = x1 + stride;
|
|
// Pointers for the current row
|
|
Scalar* px0 = x0;
|
|
Scalar* px1 = x1;
|
|
Scalar* px2 = x2;
|
|
|
|
// Householder reflectors
|
|
const Scalar v1 = ess.coeff(0), v2 = ess.coeff(1);
|
|
// Vectorized versions
|
|
const Packet vtau = pset1<Packet>(tau);
|
|
const Packet vv1 = pset1<Packet>(v1);
|
|
const Packet vv2 = pset1<Packet>(v2);
|
|
|
|
// n % (2^k) == n & (2^k-1), see https://stackoverflow.com/q/3072665
|
|
// const Index peeling_end = nrow - nrow % Increment;
|
|
const Index aligned_end = nrow - (nrow & (PacketSize - 1));
|
|
const Index peeling_end = nrow - (nrow & (Increment - 1));
|
|
for (Index i = 0; i < peeling_end; i += Increment)
|
|
{
|
|
Packet vx01 = ploadu<Packet>(px0);
|
|
Packet vx02 = ploadu<Packet>(px0 + PacketSize);
|
|
Packet vx11 = ploadu<Packet>(px1);
|
|
Packet vx12 = ploadu<Packet>(px1 + PacketSize);
|
|
Packet vx21 = ploadu<Packet>(px2);
|
|
Packet vx22 = ploadu<Packet>(px2 + PacketSize);
|
|
|
|
// Packet txv1 = vtau * (vx01 + vv1 * vx11 + vv2 * vx21);
|
|
Packet txv1 = pmul(vtau, padd(padd(vx01, pmul(vv1, vx11)), pmul(vv2, vx21)));
|
|
Packet txv2 = pmul(vtau, padd(padd(vx02, pmul(vv1, vx12)), pmul(vv2, vx22)));
|
|
|
|
pstoreu(px0, psub(vx01, txv1));
|
|
pstoreu(px0 + PacketSize, psub(vx02, txv2));
|
|
pstoreu(px1, psub(vx11, pmul(txv1, vv1)));
|
|
pstoreu(px1 + PacketSize, psub(vx12, pmul(txv2, vv1)));
|
|
pstoreu(px2, psub(vx21, pmul(txv1, vv2)));
|
|
pstoreu(px2 + PacketSize, psub(vx22, pmul(txv2, vv2)));
|
|
|
|
px0 += Increment;
|
|
px1 += Increment;
|
|
px2 += Increment;
|
|
}
|
|
if (aligned_end != peeling_end)
|
|
{
|
|
px0 = x0 + peeling_end;
|
|
px1 = x1 + peeling_end;
|
|
px2 = x2 + peeling_end;
|
|
|
|
Packet x0_p = ploadu<Packet>(px0);
|
|
Packet x1_p = ploadu<Packet>(px1);
|
|
Packet x2_p = ploadu<Packet>(px2);
|
|
Packet txv = pmul(vtau, padd(padd(x0_p, pmul(vv1, x1_p)), pmul(vv2, x2_p)));
|
|
|
|
pstoreu(px0, psub(x0_p, txv));
|
|
pstoreu(px1, psub(x1_p, pmul(txv, vv1)));
|
|
pstoreu(px2, psub(x2_p, pmul(txv, vv2)));
|
|
}
|
|
|
|
// Remaining rows
|
|
for (Index i = aligned_end; i < nrow; i++)
|
|
{
|
|
const Scalar txv = tau * (x0[i] + v1 * x1[i] + v2 * x2[i]);
|
|
x0[i] -= txv;
|
|
x1[i] -= txv * v1;
|
|
x2[i] -= txv * v2;
|
|
}
|
|
}
|
|
|
|
// Perform a Francis QR step involving rows il:iu and columns im:iu
|
|
void perform_francis_qr_step(Index il, Index im, Index iu, const Vector3s& first_householder_vec, const Scalar& near_0)
|
|
{
|
|
using std::abs;
|
|
|
|
for (Index k = im; k <= iu - 2; ++k)
|
|
{
|
|
const bool first_iter = (k == im);
|
|
Vector3s v;
|
|
if (first_iter)
|
|
v = first_householder_vec;
|
|
else
|
|
v = m_T.template block<3, 1>(k, k - 1);
|
|
|
|
Scalar tau, beta;
|
|
Vector2s ess;
|
|
v.makeHouseholder(ess, tau, beta);
|
|
|
|
if (abs(beta) > near_0) // if v is not zero
|
|
{
|
|
if (first_iter && k > il)
|
|
m_T.coeffRef(k, k - 1) = -m_T.coeff(k, k - 1);
|
|
else if (!first_iter)
|
|
m_T.coeffRef(k, k - 1) = beta;
|
|
|
|
// These Householder transformations form the O(n^3) part of the algorithm
|
|
// m_T.block(k, k, 3, m_n - k).applyHouseholderOnTheLeft(ess, tau, workspace);
|
|
// m_T.block(0, k, (std::min)(iu, k + 3) + 1, 3).applyHouseholderOnTheRight(ess, tau, workspace);
|
|
// m_U.block(0, k, m_n, 3).applyHouseholderOnTheRight(ess, tau, workspace);
|
|
apply_householder_left(ess, tau, &m_T.coeffRef(k, k), m_n - k, m_n);
|
|
apply_householder_right_simd(ess, tau, &m_T.coeffRef(0, k), (std::min)(iu, k + 3) + 1, m_n);
|
|
apply_householder_right_simd(ess, tau, &m_U.coeffRef(0, k), m_n, m_n);
|
|
}
|
|
}
|
|
|
|
// The last 2-row block
|
|
Eigen::JacobiRotation<Scalar> rot;
|
|
Scalar beta;
|
|
rot.makeGivens(m_T.coeff(iu - 1, iu - 2), m_T.coeff(iu, iu - 2), &beta);
|
|
|
|
if (abs(beta) > near_0) // if v is not zero
|
|
{
|
|
m_T.coeffRef(iu - 1, iu - 2) = beta;
|
|
m_T.rightCols(m_n - iu + 1).applyOnTheLeft(iu - 1, iu, rot.adjoint());
|
|
m_T.topRows(iu + 1).applyOnTheRight(iu - 1, iu, rot);
|
|
m_U.applyOnTheRight(iu - 1, iu, rot);
|
|
}
|
|
|
|
// clean up pollution due to round-off errors
|
|
for (Index i = im + 2; i <= iu; ++i)
|
|
{
|
|
m_T.coeffRef(i, i - 2) = Scalar(0);
|
|
if (i > im + 2)
|
|
m_T.coeffRef(i, i - 3) = Scalar(0);
|
|
}
|
|
}
|
|
|
|
public:
|
|
UpperHessenbergSchur() :
|
|
m_n(0), m_computed(false)
|
|
{}
|
|
|
|
UpperHessenbergSchur(ConstGenericMatrix& mat) :
|
|
m_n(mat.rows()), m_computed(false)
|
|
{
|
|
compute(mat);
|
|
}
|
|
|
|
void compute(ConstGenericMatrix& mat)
|
|
{
|
|
using std::abs;
|
|
using std::sqrt;
|
|
|
|
if (mat.rows() != mat.cols())
|
|
throw std::invalid_argument("UpperHessenbergSchur: matrix must be square");
|
|
|
|
m_n = mat.rows();
|
|
m_T.resize(m_n, m_n);
|
|
m_U.resize(m_n, m_n);
|
|
constexpr Index max_iter_per_row = 40;
|
|
const Index max_iter = m_n * max_iter_per_row;
|
|
|
|
m_T.noalias() = mat;
|
|
m_U.setIdentity();
|
|
|
|
// The matrix m_T is divided in three parts.
|
|
// Rows 0,...,il-1 are decoupled from the rest because m_T(il,il-1) is zero.
|
|
// Rows il,...,iu is the part we are working on (the active window).
|
|
// Rows iu+1,...,end are already brought in triangular form.
|
|
Index iu = m_n - 1;
|
|
Index iter = 0; // iteration count for current eigenvalue
|
|
Index total_iter = 0; // iteration count for whole matrix
|
|
Scalar ex_shift(0); // sum of exceptional shifts
|
|
const Scalar norm = upper_hessenberg_l1_norm(m_T);
|
|
// sub-diagonal entries smaller than near_0 will be treated as zero.
|
|
// We use eps^2 to enable more precision in small eigenvalues.
|
|
const Scalar eps = Eigen::NumTraits<Scalar>::epsilon();
|
|
const Scalar near_0 = Eigen::numext::maxi<Scalar>(norm * eps * eps, TypeTraits<Scalar>::min());
|
|
|
|
if (norm != Scalar(0))
|
|
{
|
|
while (iu >= 0)
|
|
{
|
|
Index il = find_small_subdiag(iu, near_0);
|
|
|
|
// Check for convergence
|
|
if (il == iu) // One root found
|
|
{
|
|
m_T.coeffRef(iu, iu) += ex_shift;
|
|
if (iu > 0)
|
|
m_T.coeffRef(iu, iu - 1) = Scalar(0);
|
|
iu--;
|
|
iter = 0;
|
|
}
|
|
else if (il == iu - 1) // Two roots found
|
|
{
|
|
split_off_two_rows(iu, ex_shift);
|
|
iu -= 2;
|
|
iter = 0;
|
|
}
|
|
else // No convergence yet
|
|
{
|
|
Vector3s first_householder_vec = Vector3s::Zero(), shift_info;
|
|
compute_shift(iu, iter, ex_shift, shift_info);
|
|
iter++;
|
|
total_iter++;
|
|
if (total_iter > max_iter)
|
|
break;
|
|
Index im;
|
|
init_francis_qr_step(il, iu, shift_info, im, first_householder_vec);
|
|
perform_francis_qr_step(il, im, iu, first_householder_vec, near_0);
|
|
}
|
|
}
|
|
}
|
|
|
|
if (total_iter > max_iter)
|
|
throw std::runtime_error("UpperHessenbergSchur: Schur decomposition failed");
|
|
|
|
m_computed = true;
|
|
}
|
|
|
|
const Matrix& matrix_T() const
|
|
{
|
|
if (!m_computed)
|
|
throw std::logic_error("UpperHessenbergSchur: need to call compute() first");
|
|
|
|
return m_T;
|
|
}
|
|
|
|
const Matrix& matrix_U() const
|
|
{
|
|
if (!m_computed)
|
|
throw std::logic_error("UpperHessenbergSchur: need to call compute() first");
|
|
|
|
return m_U;
|
|
}
|
|
|
|
void swap_T(Matrix& other)
|
|
{
|
|
m_T.swap(other);
|
|
}
|
|
|
|
void swap_U(Matrix& other)
|
|
{
|
|
m_U.swap(other);
|
|
}
|
|
};
|
|
|
|
} // namespace Spectra
|
|
|
|
#endif // SPECTRA_UPPER_HESSENBERG_SCHUR_H
|