Program Listing for File permutation.hpp

Return to documentation for file (src/permutation.hpp)

/* This file is part of brille.

Copyright © 2019,2020 Greg Tucker <greg.tucker@stfc.ac.uk>

brille is free software: you can redistribute it and/or modify it under the
terms of the GNU Affero General Public License as published by the Free
Software Foundation, either version 3 of the License, or (at your option)
any later version.

brille is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU Affero General Public License for more details.

You should have received a copy of the GNU Affero General Public License
along with brille. If not, see <https://www.gnu.org/licenses/>.            */

/* This file holds implementations of Linear Assignment Problem solvers which
   can be used to find the sorting permutations at grid points.              */
#ifndef BRILLE_PERMUTATION_H
#define BRILLE_PERMUTATION_H
#include <complex>
#include <cstdint>
#include "array.hpp"
#include "array_latvec.hpp" // defines bArray
#include "sorting_status.hpp"
#include "munkres.hpp"
#include "lapjv.hpp"
#include "smp.hpp"
namespace brille {

template<class T> struct CostTraits{
  using type = T;
  constexpr static T max = (std::numeric_limits<T>::max)();
};
#ifndef DOXYGEN_SHOULD_SKIP_THIS
template<class T> struct CostTraits<std::complex<T>>{
  using type = T;
  constexpr static T max = (std::numeric_limits<T>::max)();
};
#endif



template<class T, class R, class I,
          typename=typename std::enable_if<std::is_same<typename CostTraits<T>::type, R>::value>::type
        >
bool munkres_permutation(const T* centre, const T* neighbour, const std::array<I,3>& Nel,
                         const R Wscl, const R Wvec, const R Wmat,
                         const size_t span, const size_t Nobj,
                         bArray<size_t>& permutations,
                         const size_t centre_idx, const size_t neighbour_idx,
                         const int vec_cost_func = 0
                       ){
/* An earlier version of this function took `centre` and `neighbour` arrays
   which were effectively [span, Nobj] 2D arrays. This has the unfortunate
   property that individual objects were not contiguous in memory but instead
   had a stride equal to Nobj.
   Now this function requires arrays which are [Nobj, span], placing each
   object in contiguous memory and eliminating the need to copy sub-object
   vector/matrices into contiguous memory before calling subroutines.
*/
// initialize variables
R s_cost{0}, v_cost{0}, m_cost{0};
brille::assignment::Munkres<R> munkres(Nobj);
size_t *assignment = new size_t[Nobj]();
const T *c_i, *n_j;
// calculate costs and fill the Munkres cost matrix
for (size_t i=0; i<Nobj; ++i){
  for (size_t j=0; j<Nobj; ++j){
    c_i = centre+i*span;
    n_j = neighbour+j*span;
    s_cost = R(0);
    if (Nel[0]){
      for (size_t k=0; k<Nel[0]; ++k)
        s_cost += magnitude(c_i[k] - n_j[k]);
      c_i += Nel[0];
      n_j += Nel[0];
    }
    if (Nel[1]){
      /* As long as the eigenvectors at each grid point are the eigenvectors of
         a Hermitian matrix (for which H=H†, † ≡ complex conjugate transpose)
         then the *entire* Nel[1] dimensional eigenvectors at a given grid
         point *are* orthogonal, and equivalent-mode eigenvectors at neighbouring
         grid points should be least-orthogonal.

         If ⃗a and ⃗b are eigenvectors of D with eigenvalues α and β,
         respectively, then (using Einstein summation notation)
         <D ⃗a, ⃗b > = (Dᵢⱼaⱼ)† bᵢ = Dᵢⱼ* aⱼ* bᵢ = aⱼ* Dᵢⱼ* bᵢ = < ⃗a, D† ⃗b >
         and, since D ⃗a = α ⃗a and D† ⃗b = D ⃗b = β ⃗b,
         < α ⃗a, ⃗b > = < ⃗a, β ⃗b > → (α-β)< ⃗a, ⃗b > = 0.
         ∴ < ⃗a, ⃗b > = 0 for non-degenerate solutions to D ϵ = ω² ϵ.
         For degenerate solutions, eigenvalue solvers still tend to return an
         arbitrary linear combination c₀ ⃗a + s₁ ⃗b and s₀ ⃗a + c₁ ⃗b which are
         still orthogonal.

         It stands to reason that at nearby grid points the orthogonality will
         only be approximate, and we can instead try to identify the any
         least-orthogonal modes as being equivalent.
      */
      switch(vec_cost_func){
        case 0: v_cost = std::abs(std::sin(brille::utils::hermitian_angle(Nel[1], c_i, n_j))); break;
        case 1: v_cost = brille::utils::vector_distance(Nel[1], c_i, n_j); break;
        case 2: v_cost = 1-brille::utils::vector_product(Nel[1], c_i, n_j); break;
        case 3: v_cost = brille::utils::vector_angle(Nel[1], c_i, n_j); break;
      }
      c_i += Nel[1];
      n_j += Nel[1];
    }
    if (Nel[2]){
      I nel2 = std::sqrt(Nel[2]);
      if (nel2*nel2 != Nel[2])
        throw std::runtime_error("Non-square matrix in munkres_permutation");
      m_cost = brille::utils::frobenius_distance(nel2, c_i, n_j);
    }
    // for each i we want to determine the cheapest j
    munkres.get_cost()[i*Nobj+j] = Wscl*s_cost + Wvec*v_cost + Wmat*m_cost;
  }
}
// use the Munkres' algorithm to determine the optimal assignment
munkres.run_assignment();
if (!munkres.get_assignment(assignment)){
  delete[] assignment;
  return false;
}
/* use the fact that the neighbour objects have already had their global
   permutation saved into `permutations` to determine the global permuation
   for the centre objects too; storing the result into `permutations` as well.
*/
brille::ind_t nind = static_cast<brille::ind_t>(neighbour_idx);
brille::ind_t cind = static_cast<brille::ind_t>(centre_idx);
for (brille::ind_t i=0; i<Nobj; ++i)
  for (brille::ind_t j=0; j<Nobj; ++j)
    if (permutations.val(nind, i) == assignment[j]) permutations.val(cind, i) = j;
delete[] assignment;
return true;
}


template<class T, class R, class I,
          typename=typename std::enable_if<std::is_same<typename CostTraits<T>::type, R>::value>::type
        >
bool jv_permutation(const T* centre, const T* neighbour, const std::array<I,3>& Nel,
                    const R Wscl, const R Wvec, const R Wmat,
                    const size_t span, const size_t Nobj,
                    bArray<size_t>& permutations,
                    const size_t centre_idx, const size_t neighbour_idx,
                    const int vec_cost_func = 0
                   ){
/* An earlier version of this function took `centre` and `neighbour` arrays
   which were effectively [span, Nobj] 2D arrays. This has the unfortunate
   property that individual objects were not contiguous in memory but instead
   had a stride equal to Nobj.
   Now this function requires arrays which are [Nobj, span], placing each
   object in contiguous memory and eliminating the need to copy sub-object
   vector/matrices into contiguous memory before calling subroutines.
*/
// initialize variables
R s_cost{0}, v_cost{0}, m_cost{0};
R *cost=nullptr, *usol=nullptr, *vsol=nullptr;
cost = new R[Nobj*Nobj];
usol = new R[Nobj];
vsol = new R[Nobj];
int *rowsol=nullptr, *colsol=nullptr;
rowsol = new int[Nobj];
colsol = new int[Nobj];

const T *c_i, *n_j;
// calculate costs and fill the Munkres cost matrix
for (size_t i=0; i<Nobj; ++i){
  for (size_t j=0; j<Nobj; ++j){
    c_i = centre+i*span;
    n_j = neighbour+j*span;
    s_cost = R(0);
    if (Nel[0]){
      for (size_t k=0; k<Nel[0]; ++k)
        s_cost += magnitude(c_i[k] - n_j[k]);
      c_i += Nel[0];
      n_j += Nel[0];
    }
    if (Nel[1]){
      switch(vec_cost_func){
        case 0: v_cost = std::abs(std::sin(brille::utils::hermitian_angle(Nel[1], c_i, n_j))); break;
        case 1: v_cost = brille::utils::vector_distance(Nel[1], c_i, n_j); break;
        case 2: v_cost = 1-brille::utils::vector_product(Nel[1], c_i, n_j); break;
        case 3: v_cost =     brille::utils::vector_angle(Nel[1], c_i, n_j); break;
      }
      c_i += Nel[1];
      n_j += Nel[1];
    }
    if (Nel[2]){
      I nel2 = std::sqrt(Nel[2]);
      if (nel2*nel2 != Nel[2])
        throw std::runtime_error("Non-square matrix in jv_permutation");
      m_cost = brille::utils::frobenius_distance(nel2, c_i, n_j);
    }
    // for each i we want to determine the cheapest j
    // cost[i*Nobj+j] = std::log(Wscl*s_cost + Wvec*v_cost + Wmat*m_cost);
    cost[i*Nobj+j] = Wscl*s_cost + Wvec*v_cost + Wmat*m_cost;
  }
}

// use the Jonker-Volgenant algorithm to determine the optimal assignment
/*
There might be a hidden problem here.
As discussed in the README at https://github.com/hrldcpr/pyLAPJV

Supposedly, if two costs are equally smallest (in a row) to machine precision
then the Jonker-Volgenant algorithm enters an infinite loop.
The version in lapjv.h has a check to avoid this but it might still be a
problem.
*/
brille::assignment::lapjv((int)Nobj, cost, false, rowsol, colsol, usol, vsol);
/* use the fact that the neighbour objects have already had their global
   permutation saved into `permutations` to determine the global permuation
   for the centre objects too; storing the result into `permutations` as well.
*/
brille::ind_t nind = static_cast<brille::ind_t>(neighbour_idx);
brille::ind_t cind = static_cast<brille::ind_t>(centre_idx);
for (brille::ind_t i=0; i<Nobj; ++i)
  for (brille::ind_t j=0; j<Nobj; ++j)
    if (permutations.val(nind, i) == rowsol[j]) permutations.val(cind, i) = static_cast<size_t>(j);

delete[] cost;
delete[] usol;
delete[] vsol;
delete[] rowsol;
delete[] colsol;
return true;
}

template<class S, class T, class R, class I,
          typename=typename std::enable_if<std::is_same<typename CostTraits<T>::type, R>::value>::type
        >
bool jv_permutation(const S* centre_vals, const T* centre_vecs,
                    const S* neighbour_vals, const T* neighbour_vecs,
                    const std::array<I,3>& vals_Nel, const std::array<I,3>& vecs_Nel,
                    const R Wscl, const R Wvec, const R Wmat,
                    const size_t vals_span, const size_t vecs_span, const size_t Nobj,
                    bArray<size_t>& permutations,
                    const size_t centre_idx, const size_t neighbour_idx,
                    const int vec_cost_func = 0
                   ){
/* An earlier version of this function took `centre` and `neighbour` arrays
   which were effectively [span, Nobj] 2D arrays. This has the unfortunate
   property that individual objects were not contiguous in memory but instead
   had a stride equal to Nobj.
   Now this function requires arrays which are [Nobj, span], placing each
   object in contiguous memory and eliminating the need to copy sub-object
   vector/matrices into contiguous memory before calling subroutines.
*/
// initialize variables
R s_cost{0}, v_cost{0}, m_cost{0};
R *cost=nullptr, *usol=nullptr, *vsol=nullptr;
cost = new R[Nobj*Nobj];
usol = new R[Nobj];
vsol = new R[Nobj];
int *rowsol=nullptr, *colsol=nullptr;
rowsol = new int[Nobj];
colsol = new int[Nobj];

const S *vals_c_i, *vals_n_j;
const T *vecs_c_i, *vecs_n_j;
// calculate costs and fill the Munkres cost matrix
for (size_t i=0; i<Nobj; ++i){
  for (size_t j=0; j<Nobj; ++j){
    vals_c_i = centre_vals+i*vals_span;
    vals_n_j = neighbour_vals+j*vals_span;
    vecs_c_i = centre_vecs+i*vecs_span;
    vecs_n_j = neighbour_vecs+j*vecs_span;
    s_cost = R(0);
    if (vals_Nel[0]){
      for (size_t k=0; k<vals_Nel[0]; ++k)
        s_cost += magnitude(vals_c_i[k] - vals_n_j[k]);
      vals_c_i += vals_Nel[0];
      vals_n_j += vals_Nel[0];
    }
    if (vecs_Nel[0]){
      for (size_t k=0; k<vecs_Nel[0]; ++k)
        s_cost += magnitude(vecs_c_i[k] - vecs_n_j[k]);
      vecs_c_i += vecs_Nel[0];
      vecs_n_j += vecs_Nel[0];
    }
    v_cost = R(0);
    if (vals_Nel[1]){
      switch(vec_cost_func){
        case 0: v_cost += std::abs(std::sin(brille::utils::hermitian_angle(vals_Nel[1], vals_c_i, vals_n_j))); break;
        case 1: v_cost +=  brille::utils::vector_distance(vals_Nel[1], vals_c_i, vals_n_j); break;
        case 2: v_cost += 1-brille::utils::vector_product(vals_Nel[1], vals_c_i, vals_n_j); break;
        case 3: v_cost +=     brille::utils::vector_angle(vals_Nel[1], vals_c_i, vals_n_j); break;
      }
      vals_c_i += vals_Nel[1];
      vals_n_j += vals_Nel[1];
    }
    if (vecs_Nel[1]){
      switch(vec_cost_func){
        case 0: v_cost += std::abs(std::sin(brille::utils::hermitian_angle(vecs_Nel[1], vecs_c_i, vecs_n_j))); break;
        case 1: v_cost +=  brille::utils::vector_distance(vecs_Nel[1], vecs_c_i, vecs_n_j); break;
        case 2: v_cost += 1-brille::utils::vector_product(vecs_Nel[1], vecs_c_i, vecs_n_j); break;
        case 3: v_cost +=     brille::utils::vector_angle(vecs_Nel[1], vecs_c_i, vecs_n_j); break;
      }
      vecs_c_i += vecs_Nel[1];
      vecs_n_j += vecs_Nel[1];
    }
    m_cost = R(0);
    if (vals_Nel[2]){
      I nel2 = static_cast<I>(std::sqrt(vals_Nel[2]));
      if (nel2*nel2 != vals_Nel[2])
        throw std::runtime_error("Non-square matrix in jv_permutation");
      m_cost = brille::utils::frobenius_distance(nel2, vals_c_i, vals_n_j);
    }
    if (vecs_Nel[2]){
      I nel2 = static_cast<I>(std::sqrt(vecs_Nel[2]));
      if (nel2*nel2 != vecs_Nel[2])
        throw std::runtime_error("Non-square matrix in jv_permutation");
      m_cost = brille::utils::frobenius_distance(nel2, vecs_c_i, vecs_n_j);
    }
    // for each i we want to determine the cheapest j
    // cost[i*Nobj+j] = std::log(Wscl*s_cost + Wvec*v_cost + Wmat*m_cost);
    cost[i*Nobj+j] = Wscl*s_cost + Wvec*v_cost + Wmat*m_cost;
  }
}
// use the Jonker-Volgenant algorithm to determine the optimal assignment
/*
There might be a hidden problem here.
As discussed in the README at https://github.com/hrldcpr/pyLAPJV

Supposedly, if two costs are equally smallest (in a row) to machine precision
then the Jonker-Volgenant algorithm enters an infinite loop.
The version in lapjv.h has a check to avoid this but it might still be a
problem.
*/
brille::assignment::lapjv((int)Nobj, cost, false, rowsol, colsol, usol, vsol);
/* use the fact that the neighbour objects have already had their global
   permutation saved into `permutations` to determine the global permuation
   for the centre objects too; storing the result into `permutations` as well.
*/
brille::ind_t nind = static_cast<brille::ind_t>(neighbour_idx);
brille::ind_t cind = static_cast<brille::ind_t>(centre_idx);
for (brille::ind_t i=0; i<Nobj; ++i)
  for (brille::ind_t j=0; j<Nobj; ++j)
    if (permutations.val(nind, i) == rowsol[j]) permutations.val(cind, i) = static_cast<size_t>(j);

delete[] cost;
delete[] usol;
delete[] vsol;
delete[] rowsol;
delete[] colsol;
return true;
}

template<class I, class T>
std::vector<I> jv_permutation(const std::vector<T>& cost){
  I Nobj = static_cast<I>(std::sqrt(cost.size()));
  assert( cost.size() == Nobj*Nobj);
  std::vector<T> usol(Nobj,T(0)), vsol(Nobj,T(0));
  std::vector<I> rows(Nobj,I(0)), cols(Nobj,I(0));

  brille::assignment::lapjv(Nobj, cost.data(), false, rows.data(), cols.data(), usol.data(), vsol.data());
  return rows;
}
template<class T>
std::vector<int> jv_permutation(const std::vector<T>& cost){
  return jv_permutation<int,T>(cost);
}

template<class T, class I>
bool jv_permutation_fill(const std::vector<T>& cost, std::vector<I>& row){
  I Nobj = static_cast<I>(std::sqrt(cost.size()));
  assert(cost.size() == static_cast<size_t>(Nobj)*static_cast<size_t>(Nobj));
  std::vector<T> u(Nobj,T(0)), v(Nobj,T(0));
  std::vector<I> col(Nobj,0);
  row.resize(Nobj);
  brille::assignment::lapjv(Nobj, cost.data(), false, row.data(), col.data(), u.data(), v.data());
  return true;
}
template<class T, class I>
bool jv_permutation_fill(const std::vector<T>& cost, std::vector<I>& row, std::vector<I>& col){
  I Nobj = static_cast<I>(std::sqrt(cost.size()));
  assert(cost.size() == static_cast<size_t>(Nobj)*static_cast<size_t>(Nobj));
  std::vector<T> u(Nobj,T(0)), v(Nobj,T(0));
  row.resize(Nobj);
  col.resize(Nobj);
  brille::assignment::lapjv(Nobj, cost.data(), false, row.data(), col.data(), u.data(), v.data());
  return true;
}

template<class T, class R, class I,
          typename=typename std::enable_if<std::is_same<typename CostTraits<T>::type, R>::value>::type
        >
bool sm_permutation(const T* centre, const T* neighbour, const std::array<I,3>& Nel,
                    const R Wscl, const R Wvec, const R Wmat,
                    const size_t span, const size_t Nobj,
                    bArray<size_t>& permutations,
                    const size_t centre_idx, const size_t neighbour_idx,
                    const int vec_cost_func = 0
                   ){
/* An earlier version of this function took `centre` and `neighbour` arrays
   which were effectively [span, Nobj] 2D arrays. This has the unfortunate
   property that individual objects were not contiguous in memory but instead
   had a stride equal to Nobj.
   Now this function requires arrays which are [Nobj, span], placing each
   object in contiguous memory and eliminating the need to copy sub-object
   vector/matrices into contiguous memory before calling subroutines.
*/
// initialize variables
R s_cost{0}, v_cost{0}, m_cost{0};
R* cost = new R[Nobj*Nobj];
size_t* rowsol = new size_t[Nobj];
size_t* colsol = new size_t[Nobj];

const T *c_i, *n_j;
// calculate costs and fill the Munkres cost matrix
for (size_t i=0; i<Nobj; ++i){
  for (size_t j=0; j<Nobj; ++j){
    c_i = centre+i*span;
    n_j = neighbour+j*span;
    s_cost = R(0);
    if (Nel[0]){
      for (size_t k=0; k<Nel[0]; ++k)
        s_cost += magnitude(c_i[k] - n_j[k]);
      c_i += Nel[0];
      n_j += Nel[0];
    }
    if (Nel[1]){
      switch(vec_cost_func){
        case 0: v_cost = std::abs(std::sin(brille::utils::hermitian_angle(Nel[1], c_i, n_j))); break;
        case 1: v_cost =  brille::utils::vector_distance(Nel[1], c_i, n_j); break;
        case 2: v_cost = 1-brille::utils::vector_product(Nel[1], c_i, n_j); break;
        case 3: v_cost =     brille::utils::vector_angle(Nel[2], c_i, n_j); break;
        default: std::cout << "Unknown vector cost function. None used." << std::endl;
      }
      c_i += Nel[1];
      n_j += Nel[1];
    }
    if (Nel[2]){
      I nel2 = std::sqrt(Nel[2]);
      if (nel2*nel2 != Nel[2])
        throw std::runtime_error("Non-square matrix in sm_permutation");
      m_cost = brille::utils::frobenius_distance(nel2, c_i, n_j);
    }
    // for each i we want to determine the cheapest j
    cost[i*Nobj+j] = Wscl*s_cost + Wvec*v_cost + Wmat*m_cost;
  }
}

brille::assignment::smp(Nobj, cost, rowsol, colsol, false);
/* use the fact that the neighbour objects have already had their global
   permutation saved into `permutations` to determine the global permuation
   for the centre objects too; storing the result into `permutations` as well.
*/
brille::ind_t nind = static_cast<brille::ind_t>(neighbour_idx);
brille::ind_t cind = static_cast<brille::ind_t>(centre_idx);
for (brille::ind_t i=0; i<Nobj; ++i)
  for (brille::ind_t j=0; j<Nobj; ++j)
    if (permutations.val(nind, i) == rowsol[j]) permutations.val(cind, i) = static_cast<size_t>(j);


delete[] cost;
delete[] rowsol;
delete[] colsol;
return true;
}

// The following apply_permutation has been adapted from
//  https://devblogs.microsoft.com/oldnewthing/20170104-00/?p=95115
template<typename ObjItr, typename PermItr>
void
apply_permutation(ObjItr objects, ObjItr end, PermItr indices){
  using Obj = typename std::iterator_traits<ObjItr>::value_type;
  using Dif = typename std::iterator_traits<PermItr>::value_type;
  Dif numel = end - objects;
  for (Dif i=0; i<numel; ++i) if (i != indices[i]) {
    // move the object to a temporary location (fallsback to copy)
    Obj obji{std::move(objects[i])};
    // keep track of where we are in the swap loop
    Dif current = i;
    while (i != indices[current]){
      Dif next = indices[current];
      objects[current] = std::move(objects[next]);
      indices[current] = current;
      current = next;
    }
    objects[current] = std::move(obji);
    indices[current] = current;
  }
}

template<typename ObjItr, typename PermItr>
void
apply_inverse_permutation(ObjItr objects, ObjItr end, PermItr indices){
  using Obj = typename std::iterator_traits<ObjItr>::value_type;
  using Dif = typename std::iterator_traits<PermItr>::value_type;
  Dif numel = end - objects;
  for (Dif i=0; i<numel; ++i) while (i != indices[i]) {
    // pop-out the targeted object and its index
    Dif pop_idx = indices[indices[i]];
    Obj pop_obj{std::move(objects[indices[i]])};
    // move the current object and index to the target
    objects[indices[i]] = std::move(objects[i]);
    indices[indices[i]] = indices[i];
    // and put the popped object and index back in here
    objects[i] = std::move(pop_idx);
    indices[i] = pop_idx;
  }
}

} // namespace brille
#endif