[BUGFIX] Fix crash in parser.

[ginac.git] / ginac / factor.cpp
diff --git a/ginac/factor.cpp b/ginac/factor.cpp

index ecf537b010968166d75f5f0488816bbb5caa6251..efe438eae8c59e658cd7553d2867aa42c71556b3 100644 (file)
--- a/ginac/factor.cpp
+++ b/ginac/factor.cpp
@@ -1,16 +1,39 @@
  /** @file factor.cpp
   *
- *  Polynomial factorization code (implementation).
+ *  Polynomial factorization (implementation).
+ *
+ *  The interface function factor() at the end of this file is defined in the
+ *  GiNaC namespace. All other utility functions and classes are defined in an
+ *  additional anonymous namespace.
+ *
+ *  Factorization starts by doing a square free factorization and making the
+ *  coefficients integer. Then, depending on the number of free variables it
+ *  proceeds either in dedicated univariate or multivariate factorization code.
+ *
+ *  Univariate factorization does a modular factorization via Berlekamp's
+ *  algorithm and distinct degree factorization. Hensel lifting is used at the
+ *  end.
+ *  
+ *  Multivariate factorization uses the univariate factorization (applying a
+ *  evaluation homomorphism first) and Hensel lifting raises the answer to the
+ *  multivariate domain. The Hensel lifting code is completely distinct from the
+ *  code used by the univariate factorization.
   *
   *  Algorithms used can be found in
- *    [W1]  An Improved Multivariate Polynomial Factoring Algorithm,
- *          P.S.Wang, Mathematics of Computation, Vol. 32, No. 144 (1978) 1215--1231.
+ *    [Wan] An Improved Multivariate Polynomial Factoring Algorithm,
+ *          P.S.Wang,
+ *          Mathematics of Computation, Vol. 32, No. 144 (1978) 1215--1231.
   *    [GCL] Algorithms for Computer Algebra,
- *          K.O.Geddes, S.R.Czapor, G.Labahn, Springer Verlag, 1992.
+ *          K.O.Geddes, S.R.Czapor, G.Labahn,
+ *          Springer Verlag, 1992.
+ *    [Mig] Some Useful Bounds,
+ *          M.Mignotte, 
+ *          In "Computer Algebra, Symbolic and Algebraic Computation" (B.Buchberger et al., eds.),
+ *          pp. 259-263, Springer-Verlag, New York, 1982.
   */
  
  /*
- *  GiNaC Copyright (C) 1999-2008 Johannes Gutenberg University Mainz, Germany
+ *  GiNaC Copyright (C) 1999-2024 Johannes Gutenberg University Mainz, Germany
   *
   *  This program is free software; you can redistribute it and/or modify
   *  it under the terms of the GNU General Public License as published by
@@ -42,11 +65,12 @@
  #include "normal.h"
  #include "add.h"
  
+#include <type_traits>
  #include <algorithm>
-#include <cmath>
  #include <limits>
  #include <list>
  #include <vector>
+#include <stack>
  #ifdef DEBUGFACTOR
  #include <ostream>
  #endif
@@ -57,57 +81,61 @@ using namespace cln;
  
  namespace GiNaC {
  
-#ifdef DEBUGFACTOR
-#define DCOUT(str) cout << #str << endl
-#define DCOUTVAR(var) cout << #var << ": " << var << endl
-#define DCOUT2(str,var) cout << #str << ": " << var << endl
-#else
-#define DCOUT(str)
-#define DCOUTVAR(var)
-#define DCOUT2(str,var)
-#endif
-
  // anonymous namespace to hide all utility functions
  namespace {
  
-typedef vector<cl_MI> mvec;
  #ifdef DEBUGFACTOR
+#define DCOUT(str) cout << #str << endl
+#define DCOUTVAR(var) cout << #var << ": " << var << endl
+#define DCOUT2(str,var) cout << #str << ": " << var << endl
  ostream& operator<<(ostream& o, const vector<int>& v)
  {
-       vector<int>::const_iterator i = v.begin(), end = v.end();
+       auto i = v.begin(), end = v.end();
         while ( i != end ) {
-               o << *i++ << " ";
+               o << *i << " ";
+               ++i;
         }
         return o;
  }
-ostream& operator<<(ostream& o, const vector<cl_I>& v)
+static ostream& operator<<(ostream& o, const vector<cl_I>& v)
  {
-       vector<cl_I>::const_iterator i = v.begin(), end = v.end();
+       auto i = v.begin(), end = v.end();
         while ( i != end ) {
                 o << *i << "[" << i-v.begin() << "]" << " ";
                 ++i;
         }
         return o;
  }
-ostream& operator<<(ostream& o, const vector<cl_MI>& v)
+static ostream& operator<<(ostream& o, const vector<cl_MI>& v)
  {
-       vector<cl_MI>::const_iterator i = v.begin(), end = v.end();
+       auto i = v.begin(), end = v.end();
         while ( i != end ) {
                 o << *i << "[" << i-v.begin() << "]" << " ";
                 ++i;
         }
         return o;
  }
-ostream& operator<<(ostream& o, const vector< vector<cl_MI> >& v)
+ostream& operator<<(ostream& o, const vector<numeric>& v)
  {
-       vector< vector<cl_MI> >::const_iterator i = v.begin(), end = v.end();
+       for ( size_t i=0; i<v.size(); ++i ) {
+               o << v[i] << " ";
+       }
+       return o;
+}
+ostream& operator<<(ostream& o, const vector<vector<cl_MI>>& v)
+{
+       auto i = v.begin(), end = v.end();
         while ( i != end ) {
                 o << i-v.begin() << ": " << *i << endl;
                 ++i;
         }
         return o;
  }
-#endif
+#else
+#define DCOUT(str)
+#define DCOUTVAR(var)
+#define DCOUT2(str,var)
+#endif // def DEBUGFACTOR
  
  ////////////////////////////////////////////////////////////////////////////////
  // modular univariate polynomial code
@@ -116,7 +144,8 @@ typedef std::vector<cln::cl_MI> umodpoly;
  typedef std::vector<cln::cl_I> upoly;
  typedef vector<umodpoly> upvec;
  
-// COPY FROM UPOLY.HPP
+
+// COPY FROM UPOLY.H
  
  // CHANGED size_t -> int !!!
  template<typename T> static int degree(const T& p)
@@ -129,6 +158,11 @@ template<typename T> static typename T::value_type lcoeff(const T& p)
         return p[p.size() - 1];
  }
  
+/** Make the polynomial unit normal (having unit normal leading coefficient).
+ *
+ *  @param[in, out] a  polynomial to make unit normal
+ *  @return            true if polynomial a was already unit normal, false otherwise
+ */
  static bool normalize_in_field(umodpoly& a)
  {
         if (a.size() == 0)
@@ -143,57 +177,42 @@ static bool normalize_in_field(umodpoly& a)
         return false;
  }
  
+/** Remove leading zero coefficients from polynomial.
+ *
+ *  @param[in, out] p     polynomial from which the zero leading coefficients will be removed
+ *  @param[in]      hint  assume all coefficients of order ≥ hint are zero
+ */
  template<typename T> static void
  canonicalize(T& p, const typename T::size_type hint = std::numeric_limits<typename T::size_type>::max())
  {
-       if (p.empty())
-               return;
+       std::size_t i = min(p.size(), hint);
  
-       std::size_t i = p.size() - 1;
-       // Be fast if the polynomial is already canonicalized
-       if (!zerop(p[i]))
-               return;
+       while ( i-- && zerop(p[i]) ) { }
  
-       if (hint < p.size())
-               i = hint;
+       p.erase(p.begin() + i + 1, p.end());
+}
  
-       bool is_zero = false;
-       do {
-               if (!zerop(p[i])) {
-                       ++i;
-                       break;
-               }
-               if (i == 0) {
-                       is_zero = true;
-                       break;
-               }
-               --i;
-       } while (true);
+// END COPY FROM UPOLY.H
  
-       if (is_zero) {
-               p.clear();
-               return;
-       }
-
-       p.erase(p.begin() + i, p.end());
-}
+template<typename T> struct uvar_poly_p
+{
+       static const bool value = false;
+};
  
-// END COPY FROM UPOLY.HPP
+template<> struct uvar_poly_p<upoly>
+{
+       static const bool value = true;
+};
  
-static void expt_pos(umodpoly& a, unsigned int q)
+template<> struct uvar_poly_p<umodpoly>
  {
-       if ( a.empty() ) return;
-       cl_MI zero = a[0].ring()->zero(); 
-       int deg = degree(a);
-       a.resize(degree(a)*q+1, zero);
-       for ( int i=deg; i>0; --i ) {
-               a[i*q] = a[i];
-               a[i] = zero;
-       }
-}
+       static const bool value = true;
+};
  
  template<typename T>
-static T operator+(const T& a, const T& b)
+// Don't define this for anything but univariate polynomials.
+static typename enable_if<uvar_poly_p<T>::value, T>::type
+operator+(const T& a, const T& b)
  {
         int sa = a.size();
         int sb = b.size();
@@ -224,7 +243,11 @@ static T operator+(const T& a, const T& b)
  }
  
  template<typename T>
-static T operator-(const T& a, const T& b)
+// Don't define this for anything but univariate polynomials. Otherwise
+// overload resolution might fail (this actually happens when compiling
+// GiNaC with g++ 3.4).
+static typename enable_if<uvar_poly_p<T>::value, T>::type
+operator-(const T& a, const T& b)
  {
         int sa = a.size();
         int sb = b.size();
@@ -365,10 +388,12 @@ static void umodpoly_from_ex(umodpoly& ump, const ex& e, const ex& x, const cl_m
         canonicalize(ump);
  }
  
+#ifdef DEBUGFACTOR
  static void umodpoly_from_ex(umodpoly& ump, const ex& e, const ex& x, const cl_I& modulus)
  {
         umodpoly_from_ex(ump, e, x, find_modint_ring(modulus));
  }
+#endif
  
  static ex upoly_to_ex(const upoly& a, const ex& x)
  {
@@ -430,23 +455,22 @@ static umodpoly umodpoly_to_umodpoly(const umodpoly& a, const cl_modint_ring& R,
         return e;
  }
  
-/** Divides all coefficients of the polynomial a by the integer x.
+/** Divides all coefficients of the polynomial a by the positive integer x.
   *  All coefficients are supposed to be divisible by x. If they are not, the
- *  the<cl_I> cast will raise an exception.
+ *  division will raise an exception.
   *
   *  @param[in,out] a  polynomial of which the coefficients will be reduced by x
- *  @param[in]     x  integer that divides the coefficients
+ *  @param[in]     x  positive integer that divides the coefficients
   */
  static void reduce_coeff(umodpoly& a, const cl_I& x)
  {
         if ( a.empty() ) return;
  
         cl_modint_ring R = a[0].ring();
-       umodpoly::iterator i = a.begin(), end = a.end();
-       for ( ; i!=end; ++i ) {
+       for (auto & i : a) {
                 // cln cannot perform this division in the modular field
-               cl_I c = R->retract(*i);
-               *i = cl_MI(R, the<cl_I>(c / x));
+               cl_I c = R->retract(i);
+               i = cl_MI(R, exquopos(c, x));
         }
  }
  
@@ -476,7 +500,7 @@ static void rem(const umodpoly& a, const umodpoly& b, umodpoly& r)
         } while ( k-- );
  
         fill(r.begin()+n, r.end(), a[0].ring()->zero());
-       canonicalize(r);
+       canonicalize(r, n);
  }
  
  /** Calculates quotient of a/b.
@@ -587,7 +611,6 @@ static void deriv(const umodpoly& a, umodpoly& d)
  
  static bool unequal_one(const umodpoly& a)
  {
-       if ( a.empty() ) return true;
         return ( a.size() != 1 || a[0] != a[0].ring()->one() );
  }
  
@@ -613,15 +636,39 @@ static bool squarefree(const umodpoly& a)
         return equal_one(c);
  }
  
+/** Computes w^q mod a.
+ *  Uses theorem 2.1 from A.K.Lenstra's PhD thesis; see exercise 8.13 in [GCL].
+ *
+ *  @param[in]  w  polynomial
+ *  @param[in]  a  modulus polynomial
+ *  @param[in]  q  common modulus of w and a
+ *  @param[out] r  result
+ */
+static void expt_pos_Q(const umodpoly& w, const umodpoly& a, unsigned int q, umodpoly& r)
+{
+       if ( w.empty() ) return;
+       cl_MI zero = w[0].ring()->zero();
+       int deg = degree(w);
+       umodpoly buf(deg*q+1, zero);
+       for ( size_t i=0; i<=deg; ++i ) {
+               buf[i*q] = w[i];
+       }
+       rem(buf, a, r);
+}
+
  // END modular univariate polynomial code
  ////////////////////////////////////////////////////////////////////////////////
  
  ////////////////////////////////////////////////////////////////////////////////
  // modular matrix
  
+typedef vector<cl_MI> mvec;
+
  class modular_matrix
  {
+#ifdef DEBUGFACTOR
         friend ostream& operator<<(ostream& o, const modular_matrix& m);
+#endif
  public:
         modular_matrix(size_t r_, size_t c_, const cl_MI& init) : r(r_), c(c_)
         {
@@ -633,90 +680,75 @@ public:
         cl_MI operator()(size_t row, size_t col) const { return m[row*c + col]; }
         void mul_col(size_t col, const cl_MI x)
         {
-               mvec::iterator i = m.begin() + col;
                 for ( size_t rc=0; rc<r; ++rc ) {
-                       *i = *i * x;
-                       i += c;
+                       std::size_t i = c*rc + col;
+                       m[i] = m[i] * x;
                 }
         }
         void sub_col(size_t col1, size_t col2, const cl_MI fac)
         {
-               mvec::iterator i1 = m.begin() + col1;
-               mvec::iterator i2 = m.begin() + col2;
                 for ( size_t rc=0; rc<r; ++rc ) {
-                       *i1 = *i1 - *i2 * fac;
-                       i1 += c;
-                       i2 += c;
+                       std::size_t i1 = col1 + c*rc;
+                       std::size_t i2 = col2 + c*rc;
+                       m[i1] = m[i1] - m[i2]*fac;
                 }
         }
         void switch_col(size_t col1, size_t col2)
         {
-               cl_MI buf;
-               mvec::iterator i1 = m.begin() + col1;
-               mvec::iterator i2 = m.begin() + col2;
                 for ( size_t rc=0; rc<r; ++rc ) {
-                       buf = *i1; *i1 = *i2; *i2 = buf;
-                       i1 += c;
-                       i2 += c;
+                       std::size_t i1 = col1 + rc*c;
+                       std::size_t i2 = col2 + rc*c;
+                       std::swap(m[i1], m[i2]);
                 }
         }
         void mul_row(size_t row, const cl_MI x)
         {
-               vector<cl_MI>::iterator i = m.begin() + row*c;
                 for ( size_t cc=0; cc<c; ++cc ) {
-                       *i = *i * x;
-                       ++i;
+                       std::size_t i = row*c + cc; 
+                       m[i] = m[i] * x;
                 }
         }
         void sub_row(size_t row1, size_t row2, const cl_MI fac)
         {
-               vector<cl_MI>::iterator i1 = m.begin() + row1*c;
-               vector<cl_MI>::iterator i2 = m.begin() + row2*c;
                 for ( size_t cc=0; cc<c; ++cc ) {
-                       *i1 = *i1 - *i2 * fac;
-                       ++i1;
-                       ++i2;
+                       std::size_t i1 = row1*c + cc;
+                       std::size_t i2 = row2*c + cc;
+                       m[i1] = m[i1] - m[i2]*fac;
                 }
         }
         void switch_row(size_t row1, size_t row2)
         {
-               cl_MI buf;
-               vector<cl_MI>::iterator i1 = m.begin() + row1*c;
-               vector<cl_MI>::iterator i2 = m.begin() + row2*c;
                 for ( size_t cc=0; cc<c; ++cc ) {
-                       buf = *i1; *i1 = *i2; *i2 = buf;
-                       ++i1;
-                       ++i2;
+                       std::size_t i1 = row1*c + cc;
+                       std::size_t i2 = row2*c + cc;
+                       std::swap(m[i1], m[i2]);
                 }
         }
         bool is_col_zero(size_t col) const
         {
-               mvec::const_iterator i = m.begin() + col;
                 for ( size_t rr=0; rr<r; ++rr ) {
-                       if ( !zerop(*i) ) {
+                       std::size_t i = col + rr*c;
+                       if ( !zerop(m[i]) ) {
                                 return false;
                         }
-                       i += c;
                 }
                 return true;
         }
         bool is_row_zero(size_t row) const
         {
-               mvec::const_iterator i = m.begin() + row*c;
                 for ( size_t cc=0; cc<c; ++cc ) {
-                       if ( !zerop(*i) ) {
+                       std::size_t i = row*c + cc;
+                       if ( !zerop(m[i]) ) {
                                 return false;
                         }
-                       ++i;
                 }
                 return true;
         }
         void set_row(size_t row, const vector<cl_MI>& newrow)
         {
-               mvec::iterator i1 = m.begin() + row*c;
-               mvec::const_iterator i2 = newrow.begin(), end = newrow.end();
-               for ( ; i2 != end; ++i1, ++i2 ) {
-                       *i1 = *i2;
+               for (std::size_t i2 = 0; i2 < newrow.size(); ++i2) {
+                       std::size_t i1 = row*c + i2;
+                       m[i1] = newrow[i2];
                 }
         }
         mvec::const_iterator row_begin(size_t row) const { return m.begin()+row*c; }
@@ -768,6 +800,13 @@ ostream& operator<<(ostream& o, const modular_matrix& m)
  // END modular matrix
  ////////////////////////////////////////////////////////////////////////////////
  
+/** Calculates the Q matrix for a polynomial. Used by Berlekamp's algorithm.
+ *
+ *  The implementation follows algorithm 8.5 of [GCL].
+ *
+ *  @param[in]  a_  modular polynomial
+ *  @param[out] Q   Q matrix
+ */
  static void q_matrix(const umodpoly& a_, modular_matrix& Q)
  {
         umodpoly a = a_;
@@ -791,6 +830,11 @@ static void q_matrix(const umodpoly& a_, modular_matrix& Q)
         }
  }
  
+/** Determine the nullspace of a matrix M-1.
+ *
+ *  @param[in,out] M      matrix, will be modified
+ *  @param[out]    basis  calculated nullspace of M-1
+ */
  static void nullspace(modular_matrix& M, vector<mvec>& basis)
  {
         const size_t n = M.rowsize();
@@ -835,11 +879,20 @@ static void nullspace(modular_matrix& M, vector<mvec>& basis)
         }
  }
  
+/** Berlekamp's modular factorization.
+ *  
+ *  The implementation follows algorithm 8.4 of [GCL].
+ *
+ *  @param[in]  a    modular polynomial
+ *  @param[out] upv  vector containing modular factors. if upv was not empty the
+ *                   new elements are added at the end
+ */
  static void berlekamp(const umodpoly& a, upvec& upv)
  {
         cl_modint_ring R = a[0].ring();
         umodpoly one(1, R->one());
  
+       // find nullspace of Q matrix
         modular_matrix Q(degree(a), degree(a), R->zero());
         q_matrix(a, Q);
         vector<mvec> nu;
@@ -847,17 +900,18 @@ static void berlekamp(const umodpoly& a, upvec& upv)
  
         const unsigned int k = nu.size();
         if ( k == 1 ) {
+               // irreducible
                 return;
         }
  
-       list<umodpoly> factors;
-       factors.push_back(a);
+       list<umodpoly> factors = {a};
         unsigned int size = 1;
         unsigned int r = 1;
         unsigned int q = cl_I_to_uint(R->modulus);
  
         list<umodpoly>::iterator u = factors.begin();
  
+       // calculate all gcd's
         while ( true ) {
                 for ( unsigned int s=0; s<q; ++s ) {
                         umodpoly nur = nu[r];
@@ -870,21 +924,18 @@ static void berlekamp(const umodpoly& a, upvec& upv)
                                 div(*u, g, uo);
                                 if ( equal_one(uo) ) {
                                         throw logic_error("berlekamp: unexpected divisor.");
-                               }
-                               else {
+                               } else {
                                         *u = uo;
                                 }
                                 factors.push_back(g);
                                 size = 0;
-                               list<umodpoly>::const_iterator i = factors.begin(), end = factors.end();
-                               while ( i != end ) {
-                                       if ( degree(*i) ) ++size; 
-                                       ++i;
+                               for (auto & i : factors) {
+                                       if (degree(i))
+                                               ++size;
                                 }
                                 if ( size == k ) {
-                                       list<umodpoly>::const_iterator i = factors.begin(), end = factors.end();
-                                       while ( i != end ) {
-                                               upv.push_back(*i++);
+                                       for (auto & i : factors) {
+                                               upv.push_back(i);
                                         }
                                         return;
                                 }
@@ -897,6 +948,16 @@ static void berlekamp(const umodpoly& a, upvec& upv)
         }
  }
  
+// modular square free factorization is not used at the moment so we deactivate
+// the code
+#if 0
+
+/** Calculates a^(1/prime).
+ *  
+ *  @param[in]  a      polynomial
+ *  @param[in]  prime  prime number -> exponent 1/prime
+ *  @param[out] ap     resulting polynomial
+ */
  static void expt_1_over_p(const umodpoly& a, unsigned int prime, umodpoly& ap)
  {
         size_t newdeg = degree(a)/prime;
@@ -907,6 +968,12 @@ static void expt_1_over_p(const umodpoly& a, unsigned int prime, umodpoly& ap)
         }
  }
  
+/** Modular square free factorization.
+ *
+ *  @param[in]  a        polynomial
+ *  @param[out] factors  modular factors
+ *  @param[out] mult     corresponding multiplicities (exponents)
+ */
  static void modsqrfree(const umodpoly& a, upvec& factors, vector<int>& mult)
  {
         const unsigned int prime = cl_I_to_uint(a[0].ring()->modulus);
@@ -940,8 +1007,7 @@ static void modsqrfree(const umodpoly& a, upvec& factors, vector<int>& mult)
                                 mult[i] *= prime;
                         }
                 }
-       }
-       else {
+       } else {
                 umodpoly ap;
                 expt_1_over_p(a, prime, ap);
                 size_t previ = mult.size();
@@ -952,6 +1018,18 @@ static void modsqrfree(const umodpoly& a, upvec& factors, vector<int>& mult)
         }
  }
  
+#endif // deactivation of square free factorization
+
+/** Distinct degree factorization (DDF).
+ *  
+ *  The implementation follows algorithm 8.8 of [GCL].
+ *
+ *  @param[in]  a_         modular polynomial
+ *  @param[out] degrees    vector containing the degrees of the factors of the
+ *                         corresponding polynomials in ddfactors.
+ *  @param[out] ddfactors  vector containing polynomials which factors have the
+ *                         degree given in degrees.
+ */
  static void distinct_degree_factor(const umodpoly& a_, vector<int>& degrees, upvec& ddfactors)
  {
         umodpoly a = a_;
@@ -961,23 +1039,17 @@ static void distinct_degree_factor(const umodpoly& a_, vector<int>& degrees, upv
         int nhalf = degree(a)/2;
  
         int i = 1;
-       umodpoly w(2);
-       w[0] = R->zero();
-       w[1] = R->one();
+       umodpoly w = {R->zero(), R->one()};
         umodpoly x = w;
  
         while ( i <= nhalf ) {
-               expt_pos(w, q);
                 umodpoly buf;
-               rem(w, a, buf);
+               expt_pos_Q(w, a, q, buf);
                 w = buf;
-               umodpoly wx = w - x;
-               gcd(a, wx, buf);
+               gcd(a, w - x, buf);
                 if ( unequal_one(buf) ) {
                         degrees.push_back(i);
                         ddfactors.push_back(buf);
-               }
-               if ( unequal_one(buf) ) {
                         umodpoly buf2;
                         div(a, buf, buf2);
                         a = buf2;
@@ -993,6 +1065,16 @@ static void distinct_degree_factor(const umodpoly& a_, vector<int>& degrees, upv
         }
  }
  
+/** Modular same degree factorization.
+ *  Same degree factorization is a kind of misnomer. It performs distinct degree
+ *  factorization, but instead of using the Cantor-Zassenhaus algorithm it
+ *  (sub-optimally) uses Berlekamp's algorithm for the factors of the same
+ *  degree.
+ *
+ *  @param[in]  a    modular polynomial
+ *  @param[out] upv  vector containing modular factors. if upv was not empty the
+ *                   new elements are added at the end
+ */
  static void same_degree_factor(const umodpoly& a, upvec& upv)
  {
         cl_modint_ring R = a[0].ring();
@@ -1004,15 +1086,25 @@ static void same_degree_factor(const umodpoly& a, upvec& upv)
         for ( size_t i=0; i<degrees.size(); ++i ) {
                 if ( degrees[i] == degree(ddfactors[i]) ) {
                         upv.push_back(ddfactors[i]);
-               }
-               else {
+               } else {
                         berlekamp(ddfactors[i], upv);
                 }
         }
  }
  
+// Yes, we can (choose).
  #define USE_SAME_DEGREE_FACTOR
  
+/** Modular univariate factorization.
+ *
+ *  In principle, we have two algorithms at our disposal: Berlekamp's algorithm
+ *  and same degree factorization (SDF). SDF seems to be slightly faster in
+ *  almost all cases so it is activated as default.
+ *
+ *  @param[in]  p    modular polynomial
+ *  @param[out] upv  vector containing modular factors. if upv was not empty the
+ *                   new elements are added at the end
+ */
  static void factor_modular(const umodpoly& p, upvec& upv)
  {
  #ifdef USE_SAME_DEGREE_FACTOR
@@ -1022,7 +1114,7 @@ static void factor_modular(const umodpoly& p, upvec& upv)
  #endif
  }
  
-/** Calculates polynomials s and t such that a*s+b*t==1.
+/** Calculates modular polynomials s and t such that a*s+b*t==1.
   *  Assertion: a and b are relatively prime and not zero.
   *
   *  @param[in]  a  polynomial
@@ -1059,19 +1151,23 @@ static void exteuclid(const umodpoly& a, const umodpoly& b, umodpoly& s, umodpol
                 d2 = r2;
         }
         cl_MI fac = recip(lcoeff(a) * lcoeff(c));
-       umodpoly::iterator i = s.begin(), end = s.end();
-       for ( ; i!=end; ++i ) {
-               *i = *i * fac;
+       for (auto & i : s) {
+               i = i * fac;
         }
         canonicalize(s);
         fac = recip(lcoeff(b) * lcoeff(c));
-       i = t.begin(), end = t.end();
-       for ( ; i!=end; ++i ) {
-               *i = *i * fac;
+       for (auto & i : t) {
+               i = i * fac;
         }
         canonicalize(t);
  }
  
+/** Replaces the leading coefficient in a polynomial by a given number.
+ *
+ *  @param[in] poly  polynomial to change
+ *  @param[in] lc    new leading coefficient
+ *  @return          changed polynomial
+ */
  static upoly replace_lc(const upoly& poly, const cl_I& lc)
  {
         if ( poly.empty() ) return poly;
@@ -1080,34 +1176,44 @@ static upoly replace_lc(const upoly& poly, const cl_I& lc)
         return r;
  }
  
-static inline cl_I calc_bound(const ex& a, const ex& x, int maxdeg)
+/** Calculates bound for the product of absolute values (modulus) of the roots.
+ *  Uses Landau's inequality, see [Mig].
+ */
+static inline cl_I calc_bound(const ex& a, const ex& x)
  {
-       cl_I maxcoeff = 0;
-       cl_R coeff = 0;
+       cl_R radicand = 0;
         for ( int i=a.degree(x); i>=a.ldegree(x); --i ) {
                 cl_I aa = abs(the<cl_I>(ex_to<numeric>(a.coeff(x, i)).to_cl_N()));
-               if ( aa > maxcoeff ) maxcoeff = aa;
-               coeff = coeff + square(aa);
+               radicand = radicand + square(aa);
         }
-       cl_I coeffnorm = ceiling1(the<cl_R>(cln::sqrt(coeff)));
-       cl_I B = coeffnorm * expt_pos(cl_I(2), cl_I(maxdeg));
-       return ( B > maxcoeff ) ? B : maxcoeff;
+       return ceiling1(the<cl_R>(cln::sqrt(radicand)));
  }
  
-static inline cl_I calc_bound(const upoly& a, int maxdeg)
+/** Calculates bound for the product of absolute values (modulus) of the roots.
+ *  Uses Landau's inequality, see [Mig].
+ */
+static inline cl_I calc_bound(const upoly& a)
  {
-       cl_I maxcoeff = 0;
-       cl_R coeff = 0;
+       cl_R radicand = 0;
         for ( int i=degree(a); i>=0; --i ) {
                 cl_I aa = abs(a[i]);
-               if ( aa > maxcoeff ) maxcoeff = aa;
-               coeff = coeff + square(aa);
+               radicand = radicand + square(aa);
         }
-       cl_I coeffnorm = ceiling1(the<cl_R>(cln::sqrt(coeff)));
-       cl_I B = coeffnorm * expt_pos(cl_I(2), cl_I(maxdeg));
-       return ( B > maxcoeff ) ? B : maxcoeff;
+       return ceiling1(the<cl_R>(cln::sqrt(radicand)));
  }
  
+/** Hensel lifting as used by factor_univariate().
+ *
+ *  The implementation follows algorithm 6.1 of [GCL].
+ *
+ *  @param[in]  a_   primitive univariate polynomials
+ *  @param[in]  p    prime number that does not divide lcoeff(a)
+ *  @param[in]  u1_  modular factor of a (mod p)
+ *  @param[in]  w1_  modular factor of a (mod p), relatively prime to u1_,
+ *                   fulfilling  u1_*w1_ == a mod p
+ *  @param[out] u    lifted factor
+ *  @param[out] w    lifted factor, u*w = a
+ */
  static void hensel_univar(const upoly& a_, unsigned int p, const umodpoly& u1_, const umodpoly& w1_, upoly& u, upoly& w)
  {
         upoly a = a_;
@@ -1115,7 +1221,7 @@ static void hensel_univar(const upoly& a_, unsigned int p, const umodpoly& u1_,
  
         // calc bound B
         int maxdeg = (degree(u1_) > degree(w1_)) ? degree(u1_) : degree(w1_);
-       cl_I maxmodulus = 2*calc_bound(a, maxdeg);
+       cl_I maxmodulus = ash(calc_bound(a), maxdeg+1);  // = 2 * calc_bound(a) * 2^maxdeg
  
         // step 1
         cl_I alpha = lcoeff(a);
@@ -1178,45 +1284,46 @@ static void hensel_univar(const upoly& a_, unsigned int p, const umodpoly& u1_,
                 if ( alpha != 1 ) {
                         w = w / alpha;
                 }
-       }
-       else {
+       } else {
                 u.clear();
         }
  }
  
-static unsigned int next_prime(unsigned int p)
-{
-       static vector<unsigned int> primes;
-       if ( primes.size() == 0 ) {
-               primes.push_back(3); primes.push_back(5); primes.push_back(7);
-       }
-       vector<unsigned int>::const_iterator it = primes.begin();
-       if ( p >= primes.back() ) {
-               unsigned int candidate = primes.back() + 2;
-               while ( true ) {
-                       size_t n = primes.size()/2;
-                       for ( size_t i=0; i<n; ++i ) {
-                               if ( candidate % primes[i] ) continue;
-                               candidate += 2;
-                               i=-1;
+/** Returns a new small prime number.
+ *
+ *  @param[in] n  an integer
+ *  @return       smallest prime greater than n
+ */
+static unsigned int next_prime(unsigned int n)
+{
+       static vector<unsigned int> primes = {2, 3, 5, 7};
+       unsigned int candidate = primes.back();
+       while (primes.back() <= n) {
+               candidate += 2;
+               bool is_prime = true;
+               for (size_t i=1; primes[i]*primes[i]<=candidate; ++i) {
+                       if (candidate % primes[i] == 0) {
+                               is_prime = false;
+                               break;
                         }
-                       primes.push_back(candidate);
-                       if ( candidate > p ) break;
                 }
-               return candidate;
+               if (is_prime)
+                       primes.push_back(candidate);
         }
-       vector<unsigned int>::const_iterator end = primes.end();
-       for ( ; it!=end; ++it ) {
-               if ( *it > p ) {
-                       return *it;
+       for (auto & it : primes) {
+               if ( it > n ) {
+                       return it;
                 }
         }
         throw logic_error("next_prime: should not reach this point!");
  }
  
+/** Manages the splitting of a vector of modular factors into two partitions.
+ */
  class factor_partition
  {
  public:
+       /** Takes the vector of modular factors and initializes the first partition */
         factor_partition(const upvec& factors_) : factors(factors_)
         {
                 n = factors.size();
@@ -1232,9 +1339,8 @@ public:
         size_t size() const { return n; }
         size_t size_left() const { return n-len; }
         size_t size_right() const { return len; }
-#ifdef DEBUGFACTOR
-       void get() const { DCOUTVAR(k); }
-#endif
+       /** Initializes the next partition.
+           Returns true, if there is one, false otherwise. */
         bool next()
         {
                 if ( last == n-1 ) {
@@ -1264,15 +1370,16 @@ public:
                         if ( len > n/2 ) return false;
                         fill(k.begin(), k.begin()+len, 1);
                         fill(k.begin()+len+1, k.end(), 0);
-               }
-               else {
+               } else {
                         k[last++] = 0;
                         k[last] = 1;
                 }
                 split();
                 return true;
         }
+       /** Get first partition */
         umodpoly& left() { return lr[0]; }
+       /** Get second partition */
         umodpoly& right() { return lr[1]; }
  private:
         void split_cached()
@@ -1286,8 +1393,7 @@ private:
                         if ( d ) {
                                 if ( cache[pos].size() >= d ) {
                                         lr[group] = lr[group] * cache[pos][d-1];
-                               }
-                               else {
+                               } else {
                                         if ( cache[pos].size() == 0 ) {
                                                 cache[pos].push_back(factors[pos] * factors[pos+1]);
                                         }
@@ -1301,8 +1407,7 @@ private:
                                         }
                                         lr[group] = lr[group] * cache[pos].back();
                                 }
-                       }
-                       else {
+                       } else {
                                 lr[group] = lr[group] * factors[pos];
                         }
                 } while ( i < n );
@@ -1313,8 +1418,7 @@ private:
                 lr[1] = one;
                 if ( n > 6 ) {
                         split_cached();
-               }
-               else {
+               } else {
                         for ( size_t i=0; i<n; ++i ) {
                                 lr[k[i]] = lr[k[i]] * factors[i];
                         }
@@ -1322,7 +1426,7 @@ private:
         }
  private:
         umodpoly lr[2];
-       vector< vector<umodpoly> > cache;
+       vector<vector<umodpoly>> cache;
         upvec factors;
         umodpoly one;
         size_t n;
@@ -1331,32 +1435,59 @@ private:
         vector<int> k;
  };
  
+/** Contains a pair of univariate polynomial and its modular factors.
+ *  Used by factor_univariate().
+ */
  struct ModFactors
  {
         upoly poly;
         upvec factors;
  };
  
-static ex factor_univariate(const ex& poly, const ex& x)
+/** Univariate polynomial factorization.
+ *
+ *  Modular factorization is tried for several primes to minimize the number of
+ *  modular factors. Then, Hensel lifting is performed.
+ *
+ *  @param[in]     poly   expanded square free univariate polynomial
+ *  @param[in]     x      symbol
+ *  @param[in,out] prime  prime number to start trying modular factorization with,
+ *                        output value is the prime number actually used
+ */
+static ex factor_univariate(const ex& poly, const ex& x, unsigned int& prime)
  {
         ex unit, cont, prim_ex;
         poly.unitcontprim(x, unit, cont, prim_ex);
         upoly prim;
         upoly_from_ex(prim, prim_ex, x);
+       if (prim_ex.is_equal(1)) {
+               return poly;
+       }
  
         // determine proper prime and minimize number of modular factors
-       unsigned int p = 3, lastp = 3;
+       prime = 3;
+       unsigned int lastp = prime;
         cl_modint_ring R;
         unsigned int trials = 0;
         unsigned int minfactors = 0;
-       cl_I lc = lcoeff(prim);
+
+       const numeric& cont_n = ex_to<numeric>(cont);
+       cl_I i_cont;
+       if (cont_n.is_integer()) {
+               i_cont = the<cl_I>(cont_n.to_cl_N());
+       } else {
+               // poly \in Q[x] => poly = q ipoly, ipoly \in Z[x], q \in Q
+               // factor(poly) \equiv q factor(ipoly)
+               i_cont = cl_I(1);
+       }
+       cl_I lc = lcoeff(prim)*i_cont;
         upvec factors;
         while ( trials < 2 ) {
                 umodpoly modpoly;
                 while ( true ) {
-                       p = next_prime(p);
-                       if ( !zerop(rem(lc, p)) ) {
-                               R = find_modint_ring(p);
+                       prime = next_prime(prime);
+                       if ( !zerop(rem(lc, prime)) ) {
+                               R = find_modint_ring(prime);
                                 umodpoly_from_upoly(modpoly, prim, R);
                                 if ( squarefree(modpoly) ) break;
                         }
@@ -1373,15 +1504,14 @@ static ex factor_univariate(const ex& poly, const ex& x)
                 if ( minfactors == 0 || trialfactors.size() < minfactors ) {
                         factors = trialfactors;
                         minfactors = trialfactors.size();
-                       lastp = p;
+                       lastp = prime;
                         trials = 1;
-               }
-               else {
+               } else {
                         ++trials;
                 }
         }
-       p = lastp;
-       R = find_modint_ring(p);
+       prime = lastp;
+       R = find_modint_ring(prime);
  
         // lift all factor combinations
         stack<ModFactors> tocheck;
@@ -1395,8 +1525,10 @@ static ex factor_univariate(const ex& poly, const ex& x)
                 const size_t n = tocheck.top().factors.size();
                 factor_partition part(tocheck.top().factors);
                 while ( true ) {
-                       hensel_univar(tocheck.top().poly, p, part.left(), part.right(), f1, f2);
+                       // call Hensel lifting
+                       hensel_univar(tocheck.top().poly, prime, part.left(), part.right(), f1, f2);
                         if ( !f1.empty() ) {
+                               // successful, update the stack and the result
                                 if ( part.size_left() == 1 ) {
                                         if ( part.size_right() == 1 ) {
                                                 result *= upoly_to_ex(f1, x) * upoly_to_ex(f2, x);
@@ -1428,15 +1560,13 @@ static ex factor_univariate(const ex& poly, const ex& x)
                                                 }
                                         }
                                         break;
-                               }
-                               else {
+                               } else {
                                         upvec newfactors1(part.size_left()), newfactors2(part.size_right());
-                                       upvec::iterator i1 = newfactors1.begin(), i2 = newfactors2.begin();
+                                       auto i1 = newfactors1.begin(), i2 = newfactors2.begin();
                                         for ( size_t i=0; i<n; ++i ) {
                                                 if ( part[i] ) {
                                                         *i2++ = tocheck.top().factors[i];
-                                               }
-                                               else {
+                                               } else {
                                                         *i1++ = tocheck.top().factors[i];
                                                 }
                                         }
@@ -1448,9 +1578,11 @@ static ex factor_univariate(const ex& poly, const ex& x)
                                         tocheck.push(mf);
                                         break;
                                 }
-                       }
-                       else {
+                       } else {
+                               // not successful
                                 if ( !part.next() ) {
+                                       // if no more combinations left, return polynomial as
+                                       // irreducible
                                         result *= upoly_to_ex(tocheck.top().poly, x);
                                         tocheck.pop();
                                         break;
@@ -1462,16 +1594,52 @@ static ex factor_univariate(const ex& poly, const ex& x)
         return unit * cont * result;
  }
  
+/** Second interface to factor_univariate() to be used if the information about
+ *  the prime is not needed.
+ */
+static inline ex factor_univariate(const ex& poly, const ex& x)
+{
+       unsigned int prime;
+       return factor_univariate(poly, x, prime);
+}
+
+/** Represents an evaluation point (<symbol>==<integer>).
+ */
  struct EvalPoint
  {
         ex x;
         int evalpoint;
  };
  
+#ifdef DEBUGFACTOR
+ostream& operator<<(ostream& o, const vector<EvalPoint>& v)
+{
+       for ( size_t i=0; i<v.size(); ++i ) {
+               o << "(" << v[i].x << "==" << v[i].evalpoint << ") ";
+       }
+       return o;
+}
+#endif // def DEBUGFACTOR
+
  // forward declaration
-vector<ex> multivar_diophant(const vector<ex>& a_, const ex& x, const ex& c, const vector<EvalPoint>& I, unsigned int d, unsigned int p, unsigned int k);
+static vector<ex> multivar_diophant(const vector<ex>& a_, const ex& x, const ex& c, const vector<EvalPoint>& I, unsigned int d, unsigned int p, unsigned int k);
  
-upvec multiterm_eea_lift(const upvec& a, const ex& x, unsigned int p, unsigned int k)
+/** Utility function for multivariate Hensel lifting.
+ *
+ *  Solves the equation
+ *    s_1*b_1 + ... + s_r*b_r == 1 mod p^k
+ *  with deg(s_i) < deg(a_i)
+ *  and with given b_1 = a_1 * ... * a_{i-1} * a_{i+1} * ... * a_r
+ *
+ *  The implementation follows algorithm 6.3 of [GCL].
+ *
+ *  @param[in]  a   vector of modular univariate polynomials
+ *  @param[in]  x   symbol
+ *  @param[in]  p   prime number
+ *  @param[in]  k   p^k is modulus
+ *  @return         vector of polynomials (s_i)
+ */
+static upvec multiterm_eea_lift(const upvec& a, const ex& x, unsigned int p, unsigned int k)
  {
         const size_t r = a.size();
         cl_modint_ring R = find_modint_ring(expt_pos(cl_I(p),k));
@@ -1499,21 +1667,36 @@ upvec multiterm_eea_lift(const upvec& a, const ex& x, unsigned int p, unsigned i
         return s;
  }
  
-/**
- *  Assert: a not empty.
+/** Changes the modulus of a modular polynomial. Used by eea_lift().
+ *
+ *  @param[in]     R  new modular ring
+ *  @param[in,out] a  polynomial to change (in situ)
   */
-void change_modulus(const cl_modint_ring& R, umodpoly& a)
+static void change_modulus(const cl_modint_ring& R, umodpoly& a)
  {
         if ( a.empty() ) return;
         cl_modint_ring oldR = a[0].ring();
-       umodpoly::iterator i = a.begin(), end = a.end();
-       for ( ; i!=end; ++i ) {
-               *i = R->canonhom(oldR->retract(*i));
+       for (auto & i : a) {
+               i = R->canonhom(oldR->retract(i));
         }
         canonicalize(a);
  }
  
-void eea_lift(const umodpoly& a, const umodpoly& b, const ex& x, unsigned int p, unsigned int k, umodpoly& s_, umodpoly& t_)
+/** Utility function for multivariate Hensel lifting.
+ *
+ *  Solves  s*a + t*b == 1 mod p^k  given a,b.
+ *
+ *  The implementation follows algorithm 6.3 of [GCL].
+ *
+ *  @param[in]  a   polynomial
+ *  @param[in]  b   polynomial
+ *  @param[in]  x   symbol
+ *  @param[in]  p   prime number
+ *  @param[in]  k   p^k is modulus
+ *  @param[out] s_  output polynomial
+ *  @param[out] t_  output polynomial
+ */
+static void eea_lift(const umodpoly& a, const umodpoly& b, const ex& x, unsigned int p, unsigned int k, umodpoly& s_, umodpoly& t_)
  {
         cl_modint_ring R = find_modint_ring(p);
         umodpoly amod = a;
@@ -1556,7 +1739,22 @@ void eea_lift(const umodpoly& a, const umodpoly& b, const ex& x, unsigned int p,
         s_ = s; t_ = t;
  }
  
-upvec univar_diophant(const upvec& a, const ex& x, unsigned int m, unsigned int p, unsigned int k)
+/** Utility function for multivariate Hensel lifting.
+ *
+ *  Solves the equation
+ *    s_1*b_1 + ... + s_r*b_r == x^m mod p^k
+ *  with given b_1 = a_1 * ... * a_{i-1} * a_{i+1} * ... * a_r
+ *
+ *  The implementation follows algorithm 6.3 of [GCL].
+ *
+ *  @param a  vector with univariate polynomials mod p^k
+ *  @param x  symbol
+ *  @param m  exponent of x^m in the equation to solve
+ *  @param p  prime number
+ *  @param k  p^k is modulus
+ *  @return   vector of polynomials (s_i)
+ */
+static upvec univar_diophant(const upvec& a, const ex& x, unsigned int m, unsigned int p, unsigned int k)
  {
         cl_modint_ring R = find_modint_ring(expt_pos(cl_I(p),k));
  
@@ -1570,8 +1768,7 @@ upvec univar_diophant(const upvec& a, const ex& x, unsigned int m, unsigned int
                         rem(bmod, a[j], buf);
                         result.push_back(buf);
                 }
-       }
-       else {
+       } else {
                 umodpoly s, t;
                 eea_lift(a[1], a[0], x, p, k, s, t);
                 umodpoly bmod = umodpoly_to_umodpoly(s, R, m);
@@ -1586,10 +1783,14 @@ upvec univar_diophant(const upvec& a, const ex& x, unsigned int m, unsigned int
         return result;
  }
  
+/** Map used by function make_modular().
+ *  Finds every coefficient in a polynomial and replaces it by is value in the
+ *  given modular ring R (symmetric representation).
+ */
  struct make_modular_map : public map_function {
         cl_modint_ring R;
         make_modular_map(const cl_modint_ring& R_) : R(R_) { }
-       ex operator()(const ex& e)
+       ex operator()(const ex& e) override
         {
                 if ( is_a<add>(e) || is_a<mul>(e) ) {
                         return e.map(*this);
@@ -1601,8 +1802,7 @@ struct make_modular_map : public map_function {
                         numeric n(R->retract(emod));
                         if ( n > halfmod ) {
                                 return n-mod;
-                       }
-                       else {
+                       } else {
                                 return n;
                         }
                 }
@@ -1610,17 +1810,43 @@ struct make_modular_map : public map_function {
         }
  };
  
+/** Helps mimicking modular multivariate polynomial arithmetic.
+ *
+ *  @param e  expression of which to make the coefficients equal to their value
+ *            in the modular ring R (symmetric representation)
+ *  @param R  modular ring
+ *  @return   resulting expression
+ */
  static ex make_modular(const ex& e, const cl_modint_ring& R)
  {
         make_modular_map map(R);
         return map(e.expand());
  }
  
-vector<ex> multivar_diophant(const vector<ex>& a_, const ex& x, const ex& c, const vector<EvalPoint>& I, unsigned int d, unsigned int p, unsigned int k)
+/** Utility function for multivariate Hensel lifting.
+ *
+ *  Returns the polynomials s_i that fulfill
+ *    s_1*b_1 + ... + s_r*b_r == c mod <I^(d+1),p^k>
+ *  with given b_1 = a_1 * ... * a_{i-1} * a_{i+1} * ... * a_r
+ *
+ *  The implementation follows algorithm 6.2 of [GCL].
+ *
+ *  @param a_  vector of multivariate factors mod p^k
+ *  @param x   symbol (equiv. x_1 in [GCL])
+ *  @param c   polynomial mod p^k
+ *  @param I   vector of evaluation points
+ *  @param d   maximum total degree of result
+ *  @param p   prime number
+ *  @param k   p^k is modulus
+ *  @return    vector of polynomials (s_i)
+ */
+static vector<ex> multivar_diophant(const vector<ex>& a_, const ex& x, const ex& c, const vector<EvalPoint>& I,
+                                    unsigned int d, unsigned int p, unsigned int k)
  {
         vector<ex> a = a_;
  
-       const cl_modint_ring R = find_modint_ring(expt_pos(cl_I(p),k));
+       const cl_I modulus = expt_pos(cl_I(p),k);
+       const cl_modint_ring R = find_modint_ring(modulus);
         const size_t r = a.size();
         const size_t nu = I.size() + 1;
  
@@ -1670,8 +1896,7 @@ vector<ex> multivar_diophant(const vector<ex>& a_, const ex& x, const ex& c, con
                                 e = make_modular(buf, R);
                         }
                 }
-       }
-       else {
+       } else {
                 upvec amod;
                 for ( size_t i=0; i<a.size(); ++i ) {
                         umodpoly up;
@@ -1685,8 +1910,7 @@ vector<ex> multivar_diophant(const vector<ex>& a_, const ex& x, const ex& c, con
                 if ( is_a<add>(c) ) {
                         nterms = c.nops();
                         z = c.op(0);
-               }
-               else {
+               } else {
                         nterms = 1;
                         z = c;
                 }
@@ -1695,16 +1919,13 @@ vector<ex> multivar_diophant(const vector<ex>& a_, const ex& x, const ex& c, con
                         cl_I cm = the<cl_I>(ex_to<numeric>(z.lcoeff(x)).to_cl_N());
                         upvec delta_s = univar_diophant(amod, x, m, p, k);
                         cl_MI modcm;
-                       cl_I poscm = cm;
-                       while ( poscm < 0 ) {
-                               poscm = poscm + expt_pos(cl_I(p),k);
-                       }
+                       cl_I poscm = plusp(cm) ? cm : mod(cm, modulus);
                         modcm = cl_MI(R, poscm);
                         for ( size_t j=0; j<delta_s.size(); ++j ) {
                                 delta_s[j] = delta_s[j] * modcm;
                                 sigma[j] = sigma[j] + umodpoly_to_ex(delta_s[j], x);
                         }
-                       if ( nterms > 1 ) {
+                       if ( nterms > 1 && i+1 != nterms ) {
                                 z = c.op(i+1);
                         }
                 }
@@ -1717,17 +1938,24 @@ vector<ex> multivar_diophant(const vector<ex>& a_, const ex& x, const ex& c, con
         return sigma;
  }
  
-#ifdef DEBUGFACTOR
-ostream& operator<<(ostream& o, const vector<EvalPoint>& v)
-{
-       for ( size_t i=0; i<v.size(); ++i ) {
-               o << "(" << v[i].x << "==" << v[i].evalpoint << ") ";
-       }
-       return o;
-}
-#endif // def DEBUGFACTOR
-
-ex hensel_multivar(const ex& a, const ex& x, const vector<EvalPoint>& I, unsigned int p, const cl_I& l, const upvec& u, const vector<ex>& lcU)
+/** Multivariate Hensel lifting.
+ *  The implementation follows algorithm 6.4 of [GCL].
+ *  Since we don't have a data type for modular multivariate polynomials, the
+ *  respective operations are done in a GiNaC::ex and the function
+ *  make_modular() is then called to make the coefficient modular p^l.
+ *
+ *  @param a    multivariate polynomial primitive in x
+ *  @param x    symbol (equiv. x_1 in [GCL])
+ *  @param I    vector of evaluation points (x_2==a_2,x_3==a_3,...)
+ *  @param p    prime number (should not divide lcoeff(a mod I))
+ *  @param l    p^l is the modulus of the lifted univariate field
+ *  @param u    vector of modular (mod p^l) factors of a mod I
+ *  @param lcU  correct leading coefficient of the univariate factors of a mod I
+ *  @return     list GiNaC::lst with lifted factors (multivariate factors of a),
+ *              empty if Hensel lifting did not succeed
+ */
+static ex hensel_multivar(const ex& a, const ex& x, const vector<EvalPoint>& I,
+                          unsigned int p, const cl_I& l, const upvec& u, const vector<ex>& lcU)
  {
         const size_t nu = I.size() + 1;
         const cl_modint_ring R = find_modint_ring(expt_pos(cl_I(p),l));
@@ -1811,80 +2039,69 @@ ex hensel_multivar(const ex& a, const ex& x, const vector<EvalPoint>& I, unsigne
                 acand *= U[i];
         }
         if ( expand(a-acand).is_zero() ) {
-               lst res;
-               for ( size_t i=0; i<U.size(); ++i ) {
-                       res.append(U[i]);
-               }
-               return res;
-       }
-       else {
-               lst res;
-               return lst();
+               return lst(U.begin(), U.end());
+       } else {
+               return lst{};
         }
  }
  
-static ex put_factors_into_lst(const ex& e)
+/** Takes a factorized expression and puts the factors in a vector. The exponents
+ *  of the factors are discarded, e.g. 7*x^2*(y+1)^4 --> {7,x,y+1}. The first
+ *  element of the result is always the numeric coefficient.
+ */
+static exvector put_factors_into_vec(const ex& e)
  {
-       lst result;
-
+       exvector result;
         if ( is_a<numeric>(e) ) {
-               result.append(e);
+               result.push_back(e);
                 return result;
         }
         if ( is_a<power>(e) ) {
-               result.append(1);
-               result.append(e.op(0));
-               result.append(e.op(1));
+               result.push_back(1);
+               result.push_back(e.op(0));
                 return result;
         }
         if ( is_a<symbol>(e) || is_a<add>(e) ) {
-               result.append(1);
-               result.append(e);
-               result.append(1);
+               ex icont(e.integer_content());
+               result.push_back(icont);
+               result.push_back(e/icont);
                 return result;
         }
         if ( is_a<mul>(e) ) {
                 ex nfac = 1;
+               result.push_back(nfac);
                 for ( size_t i=0; i<e.nops(); ++i ) {
                         ex op = e.op(i);
                         if ( is_a<numeric>(op) ) {
                                 nfac = op;
                         }
                         if ( is_a<power>(op) ) {
-                               result.append(op.op(0));
-                               result.append(op.op(1));
+                               result.push_back(op.op(0));
                         }
                         if ( is_a<symbol>(op) || is_a<add>(op) ) {
-                               result.append(op);
-                               result.append(1);
+                               result.push_back(op);
                         }
                 }
-               result.prepend(nfac);
+               result[0] = nfac;
                 return result;
         }
-       throw runtime_error("put_factors_into_lst: bad term.");
+       throw runtime_error("put_factors_into_vec: bad term.");
  }
  
-#ifdef DEBUGFACTOR
-ostream& operator<<(ostream& o, const vector<numeric>& v)
-{
-       for ( size_t i=0; i<v.size(); ++i ) {
-               o << v[i] << " ";
-       }
-       return o;
-}
-#endif // def DEBUGFACTOR
-
-static bool checkdivisors(const lst& f, vector<numeric>& d)
+/** Checks a set of numbers for whether each number has a unique prime factor.
+ *
+ *  @param[in]  f  numbers to check
+ *  @return        true: if number set is bad, false: if set is okay (has unique
+ *                 prime factors)
+ */
+static bool checkdivisors(const exvector& f)
  {
-       const int k = f.nops()-2;
+       const int k = f.size();
         numeric q, r;
-       d[0] = ex_to<numeric>(f.op(0) * f.op(f.nops()-1));
-       if ( d[0] == 1 && k == 1 && abs(f.op(1)) != 1 ) {
-               return false;
-       }
-       for ( int i=1; i<=k; ++i ) {
-               q = ex_to<numeric>(abs(f.op(i)));
+       vector<numeric> d(k);
+       d[0] = ex_to<numeric>(abs(f[0]));
+       for ( int i=1; i<k; ++i ) {
+               q = ex_to<numeric>(abs(f[i]));
                 for ( int j=i-1; j>=0; --j ) {
                         r = d[j];
                         do {
@@ -1900,307 +2117,226 @@ static bool checkdivisors(const lst& f, vector<numeric>& d)
         return false;
  }
  
-static bool generate_set(const ex& u, const ex& vn, const exset& syms, const ex& f, const numeric& modulus, vector<numeric>& a, vector<numeric>& d)
+/** Generates a set of evaluation points for a multivariate polynomial.
+ *  The set fulfills the following conditions:
+ *  1. lcoeff(evaluated_polynomial) does not vanish
+ *  2. factors of lcoeff(evaluated_polynomial) have each a unique prime factor
+ *  3. evaluated_polynomial is square free
+ *  See [Wan] for more details.
+ *
+ *  @param[in]     u        multivariate polynomial to be factored
+ *  @param[in]     vn       leading coefficient of u in x (x==first symbol in syms)
+ *  @param[in]     x        first symbol that appears in u
+ *  @param[in]     syms_wox remaining symbols that appear in u
+ *  @param[in]     f        vector containing the factors of the leading coefficient vn
+ *  @param[in,out] modulus  integer modulus for random number generation (i.e. |a_i| < modulus)
+ *  @param[out]    u0       returns the evaluated (univariate) polynomial
+ *  @param[out]    a        returns the valid evaluation points. must have initial size equal
+ *                          number of symbols-1 before calling generate_set
+ */
+static void generate_set(const ex& u, const ex& vn, const ex& x, const exset& syms_wox, const exvector& f,
+                         numeric& modulus, ex& u0, vector<numeric>& a)
  {
-       // computation of d is actually not necessary
-       const ex& x = *syms.begin();
-       bool trying = true;
-       do {
-               ex u0 = u;
+       while ( true ) {
+               ++modulus;
+               // generate a set of integers ...
+               u0 = u;
                 ex vna = vn;
                 ex vnatry;
-               exset::const_iterator s = syms.begin();
-               ++s;
+               auto s = syms_wox.begin();
                 for ( size_t i=0; i<a.size(); ++i ) {
                         do {
                                 a[i] = mod(numeric(rand()), 2*modulus) - modulus;
                                 vnatry = vna.subs(*s == a[i]);
+                               // ... for which the leading coefficient doesn't vanish ...
                         } while ( vnatry == 0 );
                         vna = vnatry;
                         u0 = u0.subs(*s == a[i]);
                         ++s;
                 }
-               if ( gcd(u0,u0.diff(ex_to<symbol>(x))) != 1 ) {
+               // ... for which u0 is square free ...
+               ex g = gcd(u0, u0.diff(ex_to<symbol>(x)));
+               if ( !is_a<numeric>(g) ) {
                         continue;
                 }
-               if ( is_a<numeric>(vn) ) {
-                       trying = false;
-               }
-               else {
-                       lst fnum;
-                       lst::const_iterator i = ex_to<lst>(f).begin();
-                       fnum.append(*i++);
-                       bool problem = false;
-                       while ( i!=ex_to<lst>(f).end() ) {
-                               ex fs = *i;
-                               if ( !is_a<numeric>(fs) ) {
-                                       s = syms.begin();
-                                       ++s;
-                                       for ( size_t j=0; j<a.size(); ++j ) {
-                                               fs = fs.subs(*s == a[j]);
-                                               ++s;
-                                       }
-                                       if ( abs(fs) == 1 ) {
-                                               problem = true;
-                                               break;
+               if ( !is_a<numeric>(vn) ) {
+                       // ... and for which the evaluated factors have each an unique prime factor
+                       exvector fnum = f;
+                       fnum[0] = fnum[0] * u0.content(x);
+                       for ( size_t i=1; i<fnum.size(); ++i ) {
+                               if ( !is_a<numeric>(fnum[i]) ) {
+                                       s = syms_wox.begin();
+                                       for ( size_t j=0; j<a.size(); ++j, ++s ) {
+                                               fnum[i] = fnum[i].subs(*s == a[j]);
                                         }
                                 }
-                               fnum.append(fs);
-                               ++i; ++i;
                         }
-                       if ( problem ) {
-                               return true;
+                       if ( checkdivisors(fnum) ) {
+                               continue;
                         }
-                       ex con = u0.content(x);
-                       fnum.append(con);
-                       trying = checkdivisors(fnum, d);
                 }
-       } while ( trying );
-       return false;
-}
-
-static ex factor_multivariate(const ex& poly, const exset& syms)
-{
-       exset::const_iterator s;
-       const ex& x = *syms.begin();
-
-       /* make polynomial primitive */
-       ex p = poly.expand().collect(x);
-       ex cont = p.lcoeff(x);
-       for ( numeric i=p.degree(x)-1; i>=p.ldegree(x); --i ) {
-               cont = gcd(cont, p.coeff(x,ex_to<numeric>(i).to_int()));
-               if ( cont == 1 ) break;
-       }
-       ex pp = expand(normal(p / cont));
-       if ( !is_a<numeric>(cont) ) {
-               return factor(cont) * factor(pp);
+               // ok, we have a valid set now
+               return;
         }
+}
  
-       /* factor leading coefficient */
-       pp = pp.collect(x);
-       ex vn = pp.lcoeff(x);
-       pp = pp.expand();
-       ex vnlst;
-       if ( is_a<numeric>(vn) ) {
-               vnlst = lst(vn);
-       }
-       else {
-               ex vnfactors = factor(vn);
-               vnlst = put_factors_into_lst(vnfactors);
-       }
+// forward declaration
+static ex factor_sqrfree(const ex& poly);
  
-       const numeric maxtrials = 3;
-       numeric modulus = (vnlst.nops()-1 > 3) ? vnlst.nops()-1 : 3;
-       numeric minimalr = -1;
-       vector<numeric> a(syms.size()-1, 0);
-       vector<numeric> d((vnlst.nops()-1)/2+1, 0);
+/** Used by factor_multivariate().
+ */
+struct factorization_ctx {
+       const ex poly, x;         // polynomial, first symbol x...
+       const exset syms_wox;     // ...remaining symbols w/o x
+       ex unit, cont, pp;        // unit * cont * pp == poly
+       ex vn; exvector vnlst;    // leading coeff, factors of leading coeff
+       numeric modulus;          // incremented each time we try
+       /** returns factors or empty if it did not succeed */
+       ex try_next_evaluation_homomorphism()
+       {
+               constexpr unsigned maxtrials = 3;
+               vector<numeric> a(syms_wox.size(), 0);
  
-       while ( true ) {
-               numeric trialcount = 0;
+               unsigned int trialcount = 0;
+               unsigned int prime;
+               int factor_count = 0;
+               int min_factor_count = -1;
                 ex u, delta;
-               unsigned int prime = 3;
-               size_t factor_count = 0;
                 ex ufac;
-               ex ufaclst;
+               exvector ufaclst;
+
+               // try several evaluation points to reduce the number of factors
                 while ( trialcount < maxtrials ) {
-                       bool problem = generate_set(pp, vn, syms, vnlst, modulus, a, d);
-                       if ( problem ) {
-                               ++modulus;
-                               continue;
-                       }
-                       u = pp;
-                       s = syms.begin();
-                       ++s;
-                       for ( size_t i=0; i<a.size(); ++i ) {
-                               u = u.subs(*s == a[i]);
-                               ++s;
-                       }
-                       delta = u.content(x);
-
-                       // determine proper prime
-                       prime = 3;
-                       cl_modint_ring R = find_modint_ring(prime);
-                       while ( true ) {
-                               if ( irem(ex_to<numeric>(u.lcoeff(x)), prime) != 0 ) {
-                                       umodpoly modpoly;
-                                       umodpoly_from_ex(modpoly, u, x, R);
-                                       if ( squarefree(modpoly) ) break;
-                               }
-                               prime = next_prime(prime);
-                               R = find_modint_ring(prime);
-                       }
  
-                       ufac = factor(u);
-                       ufaclst = put_factors_into_lst(ufac);
-                       factor_count = (ufaclst.nops()-1)/2;
+                       // generate a set of valid evaluation points
+                       generate_set(pp, vn, x, syms_wox, vnlst, modulus, u, a);
  
-                       // veto factorization for which gcd(u_i, u_j) != 1 for all i,j
-                       upvec tryu;
-                       for ( size_t i=0; i<(ufaclst.nops()-1)/2; ++i ) {
-                               umodpoly newu;
-                               umodpoly_from_ex(newu, ufaclst.op(i*2+1), x, R);
-                               tryu.push_back(newu);
-                       }
-                       bool veto = false;
-                       for ( size_t i=0; i<tryu.size()-1; ++i ) {
-                               for ( size_t j=i+1; j<tryu.size(); ++j ) {
-                                       umodpoly tryg;
-                                       gcd(tryu[i], tryu[j], tryg);
-                                       if ( unequal_one(tryg) ) {
-                                               veto = true;
-                                               goto escape_quickly;
-                                       }
-                               }
-                       }
-                       escape_quickly: ;
-                       if ( veto ) {
-                               continue;
-                       }
+                       ufac = factor_univariate(u, x, prime);
+                       ufaclst = put_factors_into_vec(ufac);
+                       factor_count = ufaclst.size()-1;
+                       delta = ufaclst[0];
  
                         if ( factor_count <= 1 ) {
-                               return poly;
+                               // irreducible
+                               return lst{pp};
                         }
-
-                       if ( minimalr < 0 ) {
-                               minimalr = factor_count;
+                       if ( min_factor_count < 0 ) {
+                               // first time here
+                               min_factor_count = factor_count;
                         }
-                       else if ( minimalr == factor_count ) {
+                       else if ( min_factor_count == factor_count ) {
+                               // one less to try
                                 ++trialcount;
-                               ++modulus;
                         }
-                       else if ( minimalr > factor_count ) {
-                               minimalr = factor_count;
+                       else if ( min_factor_count > factor_count ) {
+                               // new minimum, reset trial counter
+                               min_factor_count = factor_count;
                                 trialcount = 0;
                         }
-                       if ( minimalr <= 1 ) {
-                               return poly;
-                       }
-               }
-
-               vector<numeric> ftilde((vnlst.nops()-1)/2+1);
-               ftilde[0] = ex_to<numeric>(vnlst.op(0));
-               for ( size_t i=1; i<ftilde.size(); ++i ) {
-                       ex ft = vnlst.op((i-1)*2+1);
-                       s = syms.begin();
-                       ++s;
-                       for ( size_t j=0; j<a.size(); ++j ) {
-                               ft = ft.subs(*s == a[j]);
-                               ++s;
-                       }
-                       ftilde[i] = ex_to<numeric>(ft);
                 }
  
-               vector<bool> used_flag((vnlst.nops()-1)/2+1, false);
-               vector<ex> D(factor_count, 1);
-               for ( size_t i=0; i<=factor_count; ++i ) {
-                       numeric prefac;
-                       if ( i == 0 ) {
-                               prefac = ex_to<numeric>(ufaclst.op(0));
-                               ftilde[0] = ftilde[0] / prefac;
-                               vnlst.let_op(0) = vnlst.op(0) / prefac;
-                               continue;
-                       }
-                       else {
-                               prefac = ex_to<numeric>(ufaclst.op(2*(i-1)+1).lcoeff(x));
+               // determine true leading coefficients for the Hensel lifting
+               vector<ex> C(factor_count);
+               if ( is_a<numeric>(vn) ) {
+                       // easy case
+                       for ( size_t i=1; i<ufaclst.size(); ++i ) {
+                               C[i-1] = ufaclst[i].lcoeff(x);
                         }
-                       for ( size_t j=(vnlst.nops()-1)/2+1; j>0; --j ) {
-                               if ( abs(ftilde[j-1]) == 1 ) {
-                                       used_flag[j-1] = true;
-                                       continue;
+               } else {
+                       // difficult case.
+                       // we use the property of the ftilde having a unique prime factor.
+                       // details can be found in [Wan].
+                       // calculate ftilde
+                       vector<numeric> ftilde(vnlst.size()-1);
+                       for ( size_t i=0; i<ftilde.size(); ++i ) {
+                               ex ft = vnlst[i+1];
+                               auto s = syms_wox.begin();
+                               for ( size_t j=0; j<a.size(); ++j ) {
+                                       ft = ft.subs(*s == a[j]);
+                                       ++s;
                                 }
-                               numeric g = gcd(prefac, ftilde[j-1]);
-                               if ( g != 1 ) {
-                                       prefac = prefac / g;
-                                       numeric count = abs(iquo(g, ftilde[j-1]));
-                                       used_flag[j-1] = true;
-                                       if ( i > 0 ) {
-                                               if ( j == 1 ) {
-                                                       D[i-1] = D[i-1] * pow(vnlst.op(0), count);
+                               ftilde[i] = ex_to<numeric>(ft);
+                       }
+                       // calculate D and C
+                       vector<bool> used_flag(ftilde.size(), false);
+                       vector<ex> D(factor_count, 1);
+                       if ( delta == 1 ) {
+                               for ( int i=0; i<factor_count; ++i ) {
+                                       numeric prefac = ex_to<numeric>(ufaclst[i+1].lcoeff(x));
+                                       for ( int j=ftilde.size()-1; j>=0; --j ) {
+                                               int count = 0;
+                                               numeric q;
+                                               while ( irem(prefac, ftilde[j], q) == 0 ) {
+                                                       prefac = q;
+                                                       ++count;
                                                 }
-                                               else {
-                                                       D[i-1] = D[i-1] * pow(vnlst.op(2*(j-2)+1), count);
+                                               if ( count ) {
+                                                       used_flag[j] = true;
+                                                       D[i] = D[i] * pow(vnlst[j+1], count);
                                                 }
                                         }
-                                       else {
-                                               ftilde[j-1] = ftilde[j-1] / prefac;
-                                               break;
+                                       C[i] = D[i] * prefac;
+                               }
+                       } else {
+                               for ( int i=0; i<factor_count; ++i ) {
+                                       numeric prefac = ex_to<numeric>(ufaclst[i+1].lcoeff(x));
+                                       for ( int j=ftilde.size()-1; j>=0; --j ) {
+                                               int count = 0;
+                                               numeric q;
+                                               while ( irem(prefac, ftilde[j], q) == 0 ) {
+                                                       prefac = q;
+                                                       ++count;
+                                               }
+                                               while ( irem(ex_to<numeric>(delta)*prefac, ftilde[j]) == 0 ) {
+                                                       numeric g = gcd(prefac, ex_to<numeric>(ftilde[j]));
+                                                       prefac = iquo(prefac, g);
+                                                       delta = delta / (ftilde[j]/g);
+                                                       ufaclst[i+1] = ufaclst[i+1] * (ftilde[j]/g);
+                                                       ++count;
+                                               }
+                                               if ( count ) {
+                                                       used_flag[j] = true;
+                                                       D[i] = D[i] * pow(vnlst[j+1], count);
+                                               }
                                         }
-                                       ++j;
+                                       C[i] = D[i] * prefac;
                                 }
                         }
-               }
-
-               bool some_factor_unused = false;
-               for ( size_t i=0; i<used_flag.size(); ++i ) {
-                       if ( !used_flag[i] ) {
-                               some_factor_unused = true;
-                               break;
-                       }
-               }
-               if ( some_factor_unused ) {
-                       continue;
-               }
-
-               vector<ex> C(factor_count);
-               if ( delta == 1 ) {
-                       for ( size_t i=0; i<D.size(); ++i ) {
-                               ex Dtilde = D[i];
-                               s = syms.begin();
-                               ++s;
-                               for ( size_t j=0; j<a.size(); ++j ) {
-                                       Dtilde = Dtilde.subs(*s == a[j]);
-                                       ++s;
+                       // check if something went wrong
+                       bool some_factor_unused = false;
+                       for ( size_t i=0; i<used_flag.size(); ++i ) {
+                               if ( !used_flag[i] ) {
+                                       some_factor_unused = true;
+                                       break;
                                 }
-                               C[i] = D[i] * (ufaclst.op(2*i+1).lcoeff(x) / Dtilde);
                         }
-               }
-               else {
-                       for ( size_t i=0; i<D.size(); ++i ) {
-                               ex Dtilde = D[i];
-                               s = syms.begin();
-                               ++s;
-                               for ( size_t j=0; j<a.size(); ++j ) {
-                                       Dtilde = Dtilde.subs(*s == a[j]);
-                                       ++s;
-                               }
-                               ex ui;
-                               if ( i == 0 ) {
-                                       ui = ufaclst.op(0);
-                               }
-                               else {
-                                       ui = ufaclst.op(2*(i-1)+1);
-                               }
-                               while ( true ) {
-                                       ex d = gcd(ui.lcoeff(x), Dtilde);
-                                       C[i] = D[i] * ( ui.lcoeff(x) / d );
-                                       ui = ui * ( Dtilde[i] / d );
-                                       delta = delta / ( Dtilde[i] / d );
-                                       if ( delta == 1 ) break;
-                                       ui = delta * ui;
-                                       C[i] = delta * C[i];
-                                       pp = pp * pow(delta, D.size()-1);
-                               }
+                       if ( some_factor_unused ) {
+                               return lst{};  // next try
                         }
                 }
  
-               EvalPoint ep;
+               // multiply the remaining content of the univariate polynomial into the
+               // first factor
+               if ( delta != 1 ) {
+                       C[0] = C[0] * delta;
+                       ufaclst[1] = ufaclst[1] * delta;
+               }
+
+               // set up evaluation points
                 vector<EvalPoint> epv;
-               s = syms.begin();
-               ++s;
+               auto s = syms_wox.begin();
                 for ( size_t i=0; i<a.size(); ++i ) {
-                       ep.x = *s++;
-                       ep.evalpoint = a[i].to_int();
-                       epv.push_back(ep);
+                       epv.emplace_back(EvalPoint{*s++, a[i].to_int()});
                 }
  
                 // calc bound p^l
                 int maxdeg = 0;
-               for ( size_t i=0; i<factor_count; ++i ) {
-                       if ( ufaclst[2*i+1].degree(x) > maxdeg ) {
-                               maxdeg = ufaclst[2*i+1].degree(x);
+               for ( int i=1; i<=factor_count; ++i ) {
+                       if ( ufaclst[i].degree(x) > maxdeg ) {
+                               maxdeg = ufaclst[i].degree(x);
                         }
                 }
-               cl_I B = 2*calc_bound(u, x, maxdeg);
+               cl_I B = ash(calc_bound(u, x), maxdeg+1);  // = 2 * calc_bound(u,x) * 2^maxdeg
                 cl_I l = 1;
                 cl_I pl = prime;
                 while ( pl < B ) {
@@ -2208,29 +2344,89 @@ static ex factor_multivariate(const ex& poly, const exset& syms)
                         pl = pl * prime;
                 }
  
-               upvec uvec;
-               cl_modint_ring R = find_modint_ring(expt_pos(cl_I(prime),l));
-               for ( size_t i=0; i<(ufaclst.nops()-1)/2; ++i ) {
-                       umodpoly newu;
-                       umodpoly_from_ex(newu, ufaclst.op(i*2+1), x, R);
-                       uvec.push_back(newu);
+               // set up modular factors (mod p^l)
+               cl_modint_ring R = find_modint_ring(pl);
+               upvec modfactors(ufaclst.size()-1);
+               for ( size_t i=1; i<ufaclst.size(); ++i ) {
+                       umodpoly_from_ex(modfactors[i-1], ufaclst[i], x, R);
+               }
+
+               // try Hensel lifting
+               return hensel_multivar(pp, x, epv, prime, l, modfactors, C);
+       }
+};
+
+/** Multivariate factorization.
+ *
+ *  The implementation is based on the algorithm described in [Wan].
+ *  An evaluation homomorphism (a set of integers) is determined that fulfills
+ *  certain criteria. The evaluated polynomial is univariate and is factorized
+ *  by factor_univariate(). The main work then is to find the correct leading
+ *  coefficients of the univariate factors. They have to correspond to the
+ *  factors of the (multivariate) leading coefficient of the input polynomial
+ *  (as defined for a specific variable x). After that the Hensel lifting can be
+ *  performed. This is done in round-robin for each x in syms until success.
+ *
+ *  @param[in] poly  expanded, square free polynomial
+ *  @param[in] syms  contains the symbols in the polynomial
+ *  @return          factorized polynomial
+ */
+static ex factor_multivariate(const ex& poly, const exset& syms)
+{
+       // set up one factorization context for each symbol
+       vector<factorization_ctx> ctx_in_x;
+       for (auto x : syms) {
+               exset syms_wox;  // remaining syms w/o x
+               copy_if(syms.begin(), syms.end(),
+                       inserter(syms_wox, syms_wox.end()), [x](const ex& y){ return y != x; });
+
+               factorization_ctx ctx{poly, x, syms_wox};
+
+               // make polynomial primitive
+               poly.unitcontprim(x, ctx.unit, ctx.cont, ctx.pp);
+               if ( !is_a<numeric>(ctx.cont) ) {
+                       // content is a polynomial in one or more of remaining syms, let's start over
+                       return ctx.unit * factor_sqrfree(ctx.cont) * factor_sqrfree(ctx.pp);
                 }
  
-               ex res = hensel_multivar(ufaclst.op(0)*pp, x, epv, prime, l, uvec, C);
-               if ( res != lst() ) {
-                       ex result = cont * ufaclst.op(0);
+               // find factors of leading coefficient
+               ctx.vn = ctx.pp.collect(x).lcoeff(x);
+               ctx.vnlst = put_factors_into_vec(factor(ctx.vn));
+
+               ctx.modulus = (ctx.vnlst.size() > 3) ? ctx.vnlst.size() : numeric(3);
+
+               ctx_in_x.push_back(ctx);
+       }
+
+       // try an evaluation homomorphism for each context in round-robin
+       auto ctx = ctx_in_x.begin();
+       while ( true ) {
+
+               ex res = ctx->try_next_evaluation_homomorphism();
+
+               if ( res != lst{} ) {
+                       // found the factors
+                       ex result = ctx->cont * ctx->unit;
                         for ( size_t i=0; i<res.nops(); ++i ) {
-                               result *= res.op(i).content(x) * res.op(i).unit(x);
-                               result *= res.op(i).primpart(x);
+                               ex unit, cont, pp;
+                               res.op(i).unitcontprim(ctx->x, unit, cont, pp);
+                               result *= unit * cont * pp;
                         }
                         return result;
                 }
+
+               // switch context for next symbol
+               if (++ctx == ctx_in_x.end()) {
+                       ctx = ctx_in_x.begin();
+               }
         }
  }
  
+/** Finds all symbols in an expression. Used by factor_sqrfree() and factor().
+ */
  struct find_symbols_map : public map_function {
         exset syms;
-       ex operator()(const ex& e)
+       ex operator()(const ex& e) override
         {
                 if ( is_a<symbol>(e) ) {
                         syms.insert(e);
@@ -2240,6 +2436,9 @@ struct find_symbols_map : public map_function {
         }
  };
  
+/** Factorizes a polynomial that is square free. It calls either the univariate
+ *  or the multivariate factorization functions.
+ */
  static ex factor_sqrfree(const ex& poly)
  {
         // determine all symbols in poly
@@ -2252,12 +2451,12 @@ static ex factor_sqrfree(const ex& poly)
         if ( findsymbols.syms.size() == 1 ) {
                 // univariate case
                 const ex& x = *(findsymbols.syms.begin());
-               if ( poly.ldegree(x) > 0 ) {
-                       int ld = poly.ldegree(x);
+               int ld = poly.ldegree(x);
+               if ( ld > 0 ) {
+                       // pull out direct factors
                         ex res = factor_univariate(expand(poly/pow(x, ld)), x);
                         return res * pow(x,ld);
-               }
-               else {
+               } else {
                         ex res = factor_univariate(poly, x);
                         return res;
                 }
@@ -2268,10 +2467,13 @@ static ex factor_sqrfree(const ex& poly)
         return res;
  }
  
+/** Map used by factor() when factor_options::all is given to access all
+ *  subexpressions and to call factor() on them.
+ */
  struct apply_factor_map : public map_function {
         unsigned options;
         apply_factor_map(unsigned options_) : options(options_) { }
-       ex operator()(const ex& e)
+       ex operator()(const ex& e) override
         {
                 if ( e.info(info_flags::polynomial) ) {
                         return factor(e, options);
@@ -2281,22 +2483,51 @@ struct apply_factor_map : public map_function {
                         for ( size_t i=0; i<e.nops(); ++i ) {
                                 if ( e.op(i).info(info_flags::polynomial) ) {
                                         s1 += e.op(i);
-                               }
-                               else {
+                               } else {
                                         s2 += e.op(i);
                                 }
                         }
-                       s1 = s1.eval();
-                       s2 = s2.eval();
                         return factor(s1, options) + s2.map(*this);
                 }
                 return e.map(*this);
         }
  };
  
-} // anonymous namespace
+/** Iterate through explicit factors of e, call yield(f, k) for
+ *  each factor of the form f^k.
+ *
+ *  Note that this function doesn't factor e itself, it only
+ *  iterates through the factors already explicitly present.
+ */
+template <typename F> void
+factor_iter(const ex &e, F yield)
+{
+       if (is_a<mul>(e)) {
+               for (const auto &f : e) {
+                       if (is_a<power>(f)) {
+                               yield(f.op(0), f.op(1));
+                       } else {
+                               yield(f, ex(1));
+                       }
+               }
+       } else {
+               if (is_a<power>(e)) {
+                       yield(e.op(0), e.op(1));
+               } else {
+                       yield(e, ex(1));
+               }
+       }
+}
  
-ex factor(const ex& poly, unsigned options)
+/** This function factorizes a polynomial. It checks the arguments,
+ *  tries a square free factorization, and then calls factor_sqrfree
+ *  to do the hard work.
+ *
+ *  This function expands its argument, so for polynomials with
+ *  explicit factors it's better to call it on each one separately
+ *  (or use factor() which does just that).
+ */
+static ex factor1(const ex& poly, unsigned options)
  {
         // check arguments
         if ( !poly.info(info_flags::polynomial) ) {
@@ -2315,60 +2546,43 @@ ex factor(const ex& poly, unsigned options)
                 return poly;
         }
         lst syms;
-       exset::const_iterator i=findsymbols.syms.begin(), end=findsymbols.syms.end();
-       for ( ; i!=end; ++i ) {
-               syms.append(*i);
+       for (auto & i : findsymbols.syms ) {
+               syms.append(i);
         }
  
         // make poly square free
-       ex sfpoly = sqrfree(poly, syms);
+       ex sfpoly = sqrfree(poly.expand(), syms);
  
         // factorize the square free components
-       if ( is_a<power>(sfpoly) ) {
-               // case: (polynomial)^exponent
-               const ex& base = sfpoly.op(0);
-               if ( !is_a<add>(base) ) {
-                       // simple case: (monomial)^exponent
-                       return sfpoly;
-               }
-               ex f = factor_sqrfree(base);
-               return pow(f, sfpoly.op(1));
-       }
-       if ( is_a<mul>(sfpoly) ) {
-               // case: multiple factors
-               ex res = 1;
-               for ( size_t i=0; i<sfpoly.nops(); ++i ) {
-                       const ex& t = sfpoly.op(i);
-                       if ( is_a<power>(t) ) {
-                               const ex& base = t.op(0);
-                               if ( !is_a<add>(base) ) {
-                                       res *= t;
-                               }
-                               else {
-                                       ex f = factor_sqrfree(base);
-                                       res *= pow(f, t.op(1));
-                               }
+       ex res = 1;
+       factor_iter(sfpoly,
+               [&](const ex &f, const ex &k) {
+                       if ( is_a<add>(f) ) {
+                               res *= pow(factor_sqrfree(f), k);
+                       } else {
+                               // simple case: (monomial)^exponent
+                               res *= pow(f, k);
                         }
-                       else if ( is_a<add>(t) ) {
-                               ex f = factor_sqrfree(t);
-                               res *= f;
-                       }
-                       else {
-                               res *= t;
-                       }
-               }
-               return res;
-       }
-       if ( is_a<symbol>(sfpoly) ) {
-               return poly;
-       }
-       // case: (polynomial)
-       ex f = factor_sqrfree(sfpoly);
-       return f;
+               });
+       return res;
  }
  
-} // namespace GiNaC
+} // anonymous namespace
  
-#ifdef DEBUGFACTOR
-#include "test.h"
-#endif
+/** Interface function to the outside world. It uses factor1()
+ *  on each of the explicitly present factors of poly.
+ */
+ex factor(const ex& poly, unsigned options)
+{
+       ex result = 1;
+       factor_iter(poly,
+               [&](const ex &f1, const ex &k1) {
+                       factor_iter(factor1(f1, options),
+                               [&](const ex &f2, const ex &k2) {
+                                       result *= pow(f2, k1*k2);
+                               });
+               });
+       return result;
+}
+
+} // namespace GiNaC