Program Listing for File permutation.hpp¶
↰ Return to documentation for file (src/permutation.hpp)
/* This file is part of brille.
Copyright © 2019,2020 Greg Tucker <greg.tucker@stfc.ac.uk>
brille is free software: you can redistribute it and/or modify it under the
terms of the GNU Affero General Public License as published by the Free
Software Foundation, either version 3 of the License, or (at your option)
any later version.
brille is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with brille. If not, see <https://www.gnu.org/licenses/>. */
/* This file holds implementations of Linear Assignment Problem solvers which
can be used to find the sorting permutations at grid points. */
#ifndef BRILLE_PERMUTATION_H
#define BRILLE_PERMUTATION_H
#include <complex>
#include <cstdint>
#include "array.hpp"
#include "array_latvec.hpp" // defines bArray
#include "sorting_status.hpp"
#include "munkres.hpp"
#include "lapjv.hpp"
#include "smp.hpp"
namespace brille {
template<class T> struct CostTraits{
using type = T;
constexpr static T max = (std::numeric_limits<T>::max)();
};
#ifndef DOXYGEN_SHOULD_SKIP_THIS
template<class T> struct CostTraits<std::complex<T>>{
using type = T;
constexpr static T max = (std::numeric_limits<T>::max)();
};
#endif
template<class T, class R, class I,
typename=typename std::enable_if<std::is_same<typename CostTraits<T>::type, R>::value>::type
>
bool munkres_permutation(const T* centre, const T* neighbour, const std::array<I,3>& Nel,
const R Wscl, const R Wvec, const R Wmat,
const size_t span, const size_t Nobj,
bArray<size_t>& permutations,
const size_t centre_idx, const size_t neighbour_idx,
const int vec_cost_func = 0
){
/* An earlier version of this function took `centre` and `neighbour` arrays
which were effectively [span, Nobj] 2D arrays. This has the unfortunate
property that individual objects were not contiguous in memory but instead
had a stride equal to Nobj.
Now this function requires arrays which are [Nobj, span], placing each
object in contiguous memory and eliminating the need to copy sub-object
vector/matrices into contiguous memory before calling subroutines.
*/
// initialize variables
R s_cost{0}, v_cost{0}, m_cost{0};
brille::assignment::Munkres<R> munkres(Nobj);
size_t *assignment = new size_t[Nobj]();
const T *c_i, *n_j;
// calculate costs and fill the Munkres cost matrix
for (size_t i=0; i<Nobj; ++i){
for (size_t j=0; j<Nobj; ++j){
c_i = centre+i*span;
n_j = neighbour+j*span;
s_cost = R(0);
if (Nel[0]){
for (size_t k=0; k<Nel[0]; ++k)
s_cost += magnitude(c_i[k] - n_j[k]);
c_i += Nel[0];
n_j += Nel[0];
}
if (Nel[1]){
/* As long as the eigenvectors at each grid point are the eigenvectors of
a Hermitian matrix (for which H=H†, † ≡ complex conjugate transpose)
then the *entire* Nel[1] dimensional eigenvectors at a given grid
point *are* orthogonal, and equivalent-mode eigenvectors at neighbouring
grid points should be least-orthogonal.
If ⃗a and ⃗b are eigenvectors of D with eigenvalues α and β,
respectively, then (using Einstein summation notation)
<D ⃗a, ⃗b > = (Dᵢⱼaⱼ)† bᵢ = Dᵢⱼ* aⱼ* bᵢ = aⱼ* Dᵢⱼ* bᵢ = < ⃗a, D† ⃗b >
and, since D ⃗a = α ⃗a and D† ⃗b = D ⃗b = β ⃗b,
< α ⃗a, ⃗b > = < ⃗a, β ⃗b > → (α-β)< ⃗a, ⃗b > = 0.
∴ < ⃗a, ⃗b > = 0 for non-degenerate solutions to D ϵ = ω² ϵ.
For degenerate solutions, eigenvalue solvers still tend to return an
arbitrary linear combination c₀ ⃗a + s₁ ⃗b and s₀ ⃗a + c₁ ⃗b which are
still orthogonal.
It stands to reason that at nearby grid points the orthogonality will
only be approximate, and we can instead try to identify the any
least-orthogonal modes as being equivalent.
*/
switch(vec_cost_func){
case 0: v_cost = std::abs(std::sin(brille::utils::hermitian_angle(Nel[1], c_i, n_j))); break;
case 1: v_cost = brille::utils::vector_distance(Nel[1], c_i, n_j); break;
case 2: v_cost = 1-brille::utils::vector_product(Nel[1], c_i, n_j); break;
case 3: v_cost = brille::utils::vector_angle(Nel[1], c_i, n_j); break;
}
c_i += Nel[1];
n_j += Nel[1];
}
if (Nel[2]){
I nel2 = std::sqrt(Nel[2]);
if (nel2*nel2 != Nel[2])
throw std::runtime_error("Non-square matrix in munkres_permutation");
m_cost = brille::utils::frobenius_distance(nel2, c_i, n_j);
}
// for each i we want to determine the cheapest j
munkres.get_cost()[i*Nobj+j] = Wscl*s_cost + Wvec*v_cost + Wmat*m_cost;
}
}
// use the Munkres' algorithm to determine the optimal assignment
munkres.run_assignment();
if (!munkres.get_assignment(assignment)){
delete[] assignment;
return false;
}
/* use the fact that the neighbour objects have already had their global
permutation saved into `permutations` to determine the global permuation
for the centre objects too; storing the result into `permutations` as well.
*/
brille::ind_t nind = static_cast<brille::ind_t>(neighbour_idx);
brille::ind_t cind = static_cast<brille::ind_t>(centre_idx);
for (brille::ind_t i=0; i<Nobj; ++i)
for (brille::ind_t j=0; j<Nobj; ++j)
if (permutations.val(nind, i) == assignment[j]) permutations.val(cind, i) = j;
delete[] assignment;
return true;
}
template<class T, class R, class I,
typename=typename std::enable_if<std::is_same<typename CostTraits<T>::type, R>::value>::type
>
bool jv_permutation(const T* centre, const T* neighbour, const std::array<I,3>& Nel,
const R Wscl, const R Wvec, const R Wmat,
const size_t span, const size_t Nobj,
bArray<size_t>& permutations,
const size_t centre_idx, const size_t neighbour_idx,
const int vec_cost_func = 0
){
/* An earlier version of this function took `centre` and `neighbour` arrays
which were effectively [span, Nobj] 2D arrays. This has the unfortunate
property that individual objects were not contiguous in memory but instead
had a stride equal to Nobj.
Now this function requires arrays which are [Nobj, span], placing each
object in contiguous memory and eliminating the need to copy sub-object
vector/matrices into contiguous memory before calling subroutines.
*/
// initialize variables
R s_cost{0}, v_cost{0}, m_cost{0};
R *cost=nullptr, *usol=nullptr, *vsol=nullptr;
cost = new R[Nobj*Nobj];
usol = new R[Nobj];
vsol = new R[Nobj];
int *rowsol=nullptr, *colsol=nullptr;
rowsol = new int[Nobj];
colsol = new int[Nobj];
const T *c_i, *n_j;
// calculate costs and fill the Munkres cost matrix
for (size_t i=0; i<Nobj; ++i){
for (size_t j=0; j<Nobj; ++j){
c_i = centre+i*span;
n_j = neighbour+j*span;
s_cost = R(0);
if (Nel[0]){
for (size_t k=0; k<Nel[0]; ++k)
s_cost += magnitude(c_i[k] - n_j[k]);
c_i += Nel[0];
n_j += Nel[0];
}
if (Nel[1]){
switch(vec_cost_func){
case 0: v_cost = std::abs(std::sin(brille::utils::hermitian_angle(Nel[1], c_i, n_j))); break;
case 1: v_cost = brille::utils::vector_distance(Nel[1], c_i, n_j); break;
case 2: v_cost = 1-brille::utils::vector_product(Nel[1], c_i, n_j); break;
case 3: v_cost = brille::utils::vector_angle(Nel[1], c_i, n_j); break;
}
c_i += Nel[1];
n_j += Nel[1];
}
if (Nel[2]){
I nel2 = std::sqrt(Nel[2]);
if (nel2*nel2 != Nel[2])
throw std::runtime_error("Non-square matrix in jv_permutation");
m_cost = brille::utils::frobenius_distance(nel2, c_i, n_j);
}
// for each i we want to determine the cheapest j
// cost[i*Nobj+j] = std::log(Wscl*s_cost + Wvec*v_cost + Wmat*m_cost);
cost[i*Nobj+j] = Wscl*s_cost + Wvec*v_cost + Wmat*m_cost;
}
}
// use the Jonker-Volgenant algorithm to determine the optimal assignment
/*
There might be a hidden problem here.
As discussed in the README at https://github.com/hrldcpr/pyLAPJV
Supposedly, if two costs are equally smallest (in a row) to machine precision
then the Jonker-Volgenant algorithm enters an infinite loop.
The version in lapjv.h has a check to avoid this but it might still be a
problem.
*/
brille::assignment::lapjv((int)Nobj, cost, false, rowsol, colsol, usol, vsol);
/* use the fact that the neighbour objects have already had their global
permutation saved into `permutations` to determine the global permuation
for the centre objects too; storing the result into `permutations` as well.
*/
brille::ind_t nind = static_cast<brille::ind_t>(neighbour_idx);
brille::ind_t cind = static_cast<brille::ind_t>(centre_idx);
for (brille::ind_t i=0; i<Nobj; ++i)
for (brille::ind_t j=0; j<Nobj; ++j)
if (permutations.val(nind, i) == rowsol[j]) permutations.val(cind, i) = static_cast<size_t>(j);
delete[] cost;
delete[] usol;
delete[] vsol;
delete[] rowsol;
delete[] colsol;
return true;
}
template<class S, class T, class R, class I,
typename=typename std::enable_if<std::is_same<typename CostTraits<T>::type, R>::value>::type
>
bool jv_permutation(const S* centre_vals, const T* centre_vecs,
const S* neighbour_vals, const T* neighbour_vecs,
const std::array<I,3>& vals_Nel, const std::array<I,3>& vecs_Nel,
const R Wscl, const R Wvec, const R Wmat,
const size_t vals_span, const size_t vecs_span, const size_t Nobj,
bArray<size_t>& permutations,
const size_t centre_idx, const size_t neighbour_idx,
const int vec_cost_func = 0
){
/* An earlier version of this function took `centre` and `neighbour` arrays
which were effectively [span, Nobj] 2D arrays. This has the unfortunate
property that individual objects were not contiguous in memory but instead
had a stride equal to Nobj.
Now this function requires arrays which are [Nobj, span], placing each
object in contiguous memory and eliminating the need to copy sub-object
vector/matrices into contiguous memory before calling subroutines.
*/
// initialize variables
R s_cost{0}, v_cost{0}, m_cost{0};
R *cost=nullptr, *usol=nullptr, *vsol=nullptr;
cost = new R[Nobj*Nobj];
usol = new R[Nobj];
vsol = new R[Nobj];
int *rowsol=nullptr, *colsol=nullptr;
rowsol = new int[Nobj];
colsol = new int[Nobj];
const S *vals_c_i, *vals_n_j;
const T *vecs_c_i, *vecs_n_j;
// calculate costs and fill the Munkres cost matrix
for (size_t i=0; i<Nobj; ++i){
for (size_t j=0; j<Nobj; ++j){
vals_c_i = centre_vals+i*vals_span;
vals_n_j = neighbour_vals+j*vals_span;
vecs_c_i = centre_vecs+i*vecs_span;
vecs_n_j = neighbour_vecs+j*vecs_span;
s_cost = R(0);
if (vals_Nel[0]){
for (size_t k=0; k<vals_Nel[0]; ++k)
s_cost += magnitude(vals_c_i[k] - vals_n_j[k]);
vals_c_i += vals_Nel[0];
vals_n_j += vals_Nel[0];
}
if (vecs_Nel[0]){
for (size_t k=0; k<vecs_Nel[0]; ++k)
s_cost += magnitude(vecs_c_i[k] - vecs_n_j[k]);
vecs_c_i += vecs_Nel[0];
vecs_n_j += vecs_Nel[0];
}
v_cost = R(0);
if (vals_Nel[1]){
switch(vec_cost_func){
case 0: v_cost += std::abs(std::sin(brille::utils::hermitian_angle(vals_Nel[1], vals_c_i, vals_n_j))); break;
case 1: v_cost += brille::utils::vector_distance(vals_Nel[1], vals_c_i, vals_n_j); break;
case 2: v_cost += 1-brille::utils::vector_product(vals_Nel[1], vals_c_i, vals_n_j); break;
case 3: v_cost += brille::utils::vector_angle(vals_Nel[1], vals_c_i, vals_n_j); break;
}
vals_c_i += vals_Nel[1];
vals_n_j += vals_Nel[1];
}
if (vecs_Nel[1]){
switch(vec_cost_func){
case 0: v_cost += std::abs(std::sin(brille::utils::hermitian_angle(vecs_Nel[1], vecs_c_i, vecs_n_j))); break;
case 1: v_cost += brille::utils::vector_distance(vecs_Nel[1], vecs_c_i, vecs_n_j); break;
case 2: v_cost += 1-brille::utils::vector_product(vecs_Nel[1], vecs_c_i, vecs_n_j); break;
case 3: v_cost += brille::utils::vector_angle(vecs_Nel[1], vecs_c_i, vecs_n_j); break;
}
vecs_c_i += vecs_Nel[1];
vecs_n_j += vecs_Nel[1];
}
m_cost = R(0);
if (vals_Nel[2]){
I nel2 = static_cast<I>(std::sqrt(vals_Nel[2]));
if (nel2*nel2 != vals_Nel[2])
throw std::runtime_error("Non-square matrix in jv_permutation");
m_cost = brille::utils::frobenius_distance(nel2, vals_c_i, vals_n_j);
}
if (vecs_Nel[2]){
I nel2 = static_cast<I>(std::sqrt(vecs_Nel[2]));
if (nel2*nel2 != vecs_Nel[2])
throw std::runtime_error("Non-square matrix in jv_permutation");
m_cost = brille::utils::frobenius_distance(nel2, vecs_c_i, vecs_n_j);
}
// for each i we want to determine the cheapest j
// cost[i*Nobj+j] = std::log(Wscl*s_cost + Wvec*v_cost + Wmat*m_cost);
cost[i*Nobj+j] = Wscl*s_cost + Wvec*v_cost + Wmat*m_cost;
}
}
// use the Jonker-Volgenant algorithm to determine the optimal assignment
/*
There might be a hidden problem here.
As discussed in the README at https://github.com/hrldcpr/pyLAPJV
Supposedly, if two costs are equally smallest (in a row) to machine precision
then the Jonker-Volgenant algorithm enters an infinite loop.
The version in lapjv.h has a check to avoid this but it might still be a
problem.
*/
brille::assignment::lapjv((int)Nobj, cost, false, rowsol, colsol, usol, vsol);
/* use the fact that the neighbour objects have already had their global
permutation saved into `permutations` to determine the global permuation
for the centre objects too; storing the result into `permutations` as well.
*/
brille::ind_t nind = static_cast<brille::ind_t>(neighbour_idx);
brille::ind_t cind = static_cast<brille::ind_t>(centre_idx);
for (brille::ind_t i=0; i<Nobj; ++i)
for (brille::ind_t j=0; j<Nobj; ++j)
if (permutations.val(nind, i) == rowsol[j]) permutations.val(cind, i) = static_cast<size_t>(j);
delete[] cost;
delete[] usol;
delete[] vsol;
delete[] rowsol;
delete[] colsol;
return true;
}
template<class I, class T>
std::vector<I> jv_permutation(const std::vector<T>& cost){
I Nobj = static_cast<I>(std::sqrt(cost.size()));
assert( cost.size() == Nobj*Nobj);
std::vector<T> usol(Nobj,T(0)), vsol(Nobj,T(0));
std::vector<I> rows(Nobj,I(0)), cols(Nobj,I(0));
brille::assignment::lapjv(Nobj, cost.data(), false, rows.data(), cols.data(), usol.data(), vsol.data());
return rows;
}
template<class T>
std::vector<int> jv_permutation(const std::vector<T>& cost){
return jv_permutation<int,T>(cost);
}
template<class T, class I>
bool jv_permutation_fill(const std::vector<T>& cost, std::vector<I>& row){
I Nobj = static_cast<I>(std::sqrt(cost.size()));
assert(cost.size() == static_cast<size_t>(Nobj)*static_cast<size_t>(Nobj));
std::vector<T> u(Nobj,T(0)), v(Nobj,T(0));
std::vector<I> col(Nobj,0);
row.resize(Nobj);
brille::assignment::lapjv(Nobj, cost.data(), false, row.data(), col.data(), u.data(), v.data());
return true;
}
template<class T, class I>
bool jv_permutation_fill(const std::vector<T>& cost, std::vector<I>& row, std::vector<I>& col){
I Nobj = static_cast<I>(std::sqrt(cost.size()));
assert(cost.size() == static_cast<size_t>(Nobj)*static_cast<size_t>(Nobj));
std::vector<T> u(Nobj,T(0)), v(Nobj,T(0));
row.resize(Nobj);
col.resize(Nobj);
brille::assignment::lapjv(Nobj, cost.data(), false, row.data(), col.data(), u.data(), v.data());
return true;
}
template<class T, class R, class I,
typename=typename std::enable_if<std::is_same<typename CostTraits<T>::type, R>::value>::type
>
bool sm_permutation(const T* centre, const T* neighbour, const std::array<I,3>& Nel,
const R Wscl, const R Wvec, const R Wmat,
const size_t span, const size_t Nobj,
bArray<size_t>& permutations,
const size_t centre_idx, const size_t neighbour_idx,
const int vec_cost_func = 0
){
/* An earlier version of this function took `centre` and `neighbour` arrays
which were effectively [span, Nobj] 2D arrays. This has the unfortunate
property that individual objects were not contiguous in memory but instead
had a stride equal to Nobj.
Now this function requires arrays which are [Nobj, span], placing each
object in contiguous memory and eliminating the need to copy sub-object
vector/matrices into contiguous memory before calling subroutines.
*/
// initialize variables
R s_cost{0}, v_cost{0}, m_cost{0};
R* cost = new R[Nobj*Nobj];
size_t* rowsol = new size_t[Nobj];
size_t* colsol = new size_t[Nobj];
const T *c_i, *n_j;
// calculate costs and fill the Munkres cost matrix
for (size_t i=0; i<Nobj; ++i){
for (size_t j=0; j<Nobj; ++j){
c_i = centre+i*span;
n_j = neighbour+j*span;
s_cost = R(0);
if (Nel[0]){
for (size_t k=0; k<Nel[0]; ++k)
s_cost += magnitude(c_i[k] - n_j[k]);
c_i += Nel[0];
n_j += Nel[0];
}
if (Nel[1]){
switch(vec_cost_func){
case 0: v_cost = std::abs(std::sin(brille::utils::hermitian_angle(Nel[1], c_i, n_j))); break;
case 1: v_cost = brille::utils::vector_distance(Nel[1], c_i, n_j); break;
case 2: v_cost = 1-brille::utils::vector_product(Nel[1], c_i, n_j); break;
case 3: v_cost = brille::utils::vector_angle(Nel[2], c_i, n_j); break;
default: std::cout << "Unknown vector cost function. None used." << std::endl;
}
c_i += Nel[1];
n_j += Nel[1];
}
if (Nel[2]){
I nel2 = std::sqrt(Nel[2]);
if (nel2*nel2 != Nel[2])
throw std::runtime_error("Non-square matrix in sm_permutation");
m_cost = brille::utils::frobenius_distance(nel2, c_i, n_j);
}
// for each i we want to determine the cheapest j
cost[i*Nobj+j] = Wscl*s_cost + Wvec*v_cost + Wmat*m_cost;
}
}
brille::assignment::smp(Nobj, cost, rowsol, colsol, false);
/* use the fact that the neighbour objects have already had their global
permutation saved into `permutations` to determine the global permuation
for the centre objects too; storing the result into `permutations` as well.
*/
brille::ind_t nind = static_cast<brille::ind_t>(neighbour_idx);
brille::ind_t cind = static_cast<brille::ind_t>(centre_idx);
for (brille::ind_t i=0; i<Nobj; ++i)
for (brille::ind_t j=0; j<Nobj; ++j)
if (permutations.val(nind, i) == rowsol[j]) permutations.val(cind, i) = static_cast<size_t>(j);
delete[] cost;
delete[] rowsol;
delete[] colsol;
return true;
}
// The following apply_permutation has been adapted from
// https://devblogs.microsoft.com/oldnewthing/20170104-00/?p=95115
template<typename ObjItr, typename PermItr>
void
apply_permutation(ObjItr objects, ObjItr end, PermItr indices){
using Obj = typename std::iterator_traits<ObjItr>::value_type;
using Dif = typename std::iterator_traits<PermItr>::value_type;
Dif numel = end - objects;
for (Dif i=0; i<numel; ++i) if (i != indices[i]) {
// move the object to a temporary location (fallsback to copy)
Obj obji{std::move(objects[i])};
// keep track of where we are in the swap loop
Dif current = i;
while (i != indices[current]){
Dif next = indices[current];
objects[current] = std::move(objects[next]);
indices[current] = current;
current = next;
}
objects[current] = std::move(obji);
indices[current] = current;
}
}
template<typename ObjItr, typename PermItr>
void
apply_inverse_permutation(ObjItr objects, ObjItr end, PermItr indices){
using Obj = typename std::iterator_traits<ObjItr>::value_type;
using Dif = typename std::iterator_traits<PermItr>::value_type;
Dif numel = end - objects;
for (Dif i=0; i<numel; ++i) while (i != indices[i]) {
// pop-out the targeted object and its index
Dif pop_idx = indices[indices[i]];
Obj pop_obj{std::move(objects[indices[i]])};
// move the current object and index to the target
objects[indices[i]] = std::move(objects[i]);
indices[indices[i]] = indices[i];
// and put the popped object and index back in here
objects[i] = std::move(pop_idx);
indices[i] = pop_idx;
}
}
} // namespace brille
#endif