X-Git-Url: https://www.ginac.de/ginac.git//ginac.git?p=ginac.git;a=blobdiff_plain;f=ginac%2Ffactor.cpp;h=2f98f9db5d0ee442ad54ad03fe6b8b18bf503bd6;hp=204010be3750294640434dc28da698f9f452db69;hb=f3b92d92e9b8ee71b189ddb2c7e27ed72b577732;hpb=edc92b7a463993da62357fb4afad053e8c6d0771

diff --git a/ginac/factor.cpp b/ginac/factor.cpp
index 204010be..2f98f9db 100644
--- a/ginac/factor.cpp
+++ b/ginac/factor.cpp
@@ -1,16 +1,39 @@
 /** @file factor.cpp
  *
- *  Polynomial factorization code (implementation).
+ *  Polynomial factorization (implementation).
+ *
+ *  The interface function factor() at the end of this file is defined in the
+ *  GiNaC namespace. All other utility functions and classes are defined in an
+ *  additional anonymous namespace.
+ *
+ *  Factorization starts by doing a square free factorization and making the
+ *  coefficients integer. Then, depending on the number of free variables it
+ *  proceeds either in dedicated univariate or multivariate factorization code.
+ *
+ *  Univariate factorization does a modular factorization via Berlekamp's
+ *  algorithm and distinct degree factorization. Hensel lifting is used at the
+ *  end.
+ *  
+ *  Multivariate factorization uses the univariate factorization (applying a
+ *  evaluation homomorphism first) and Hensel lifting raises the answer to the
+ *  multivariate domain. The Hensel lifting code is completely distinct from the
+ *  code used by the univariate factorization.
  *
  *  Algorithms used can be found in
- *    [W1]  An Improved Multivariate Polynomial Factoring Algorithm,
- *          P.S.Wang, Mathematics of Computation, Vol. 32, No. 144 (1978) 1215--1231.
+ *    [Wan] An Improved Multivariate Polynomial Factoring Algorithm,
+ *          P.S.Wang,
+ *          Mathematics of Computation, Vol. 32, No. 144 (1978) 1215--1231.
  *    [GCL] Algorithms for Computer Algebra,
- *          K.O.Geddes, S.R.Czapor, G.Labahn, Springer Verlag, 1992.
+ *          K.O.Geddes, S.R.Czapor, G.Labahn,
+ *          Springer Verlag, 1992.
+ *    [Mig] Some Useful Bounds,
+ *          M.Mignotte, 
+ *          In "Computer Algebra, Symbolic and Algebraic Computation" (B.Buchberger et al., eds.),
+ *          pp. 259-263, Springer-Verlag, New York, 1982.
  */
 
 /*
- *  GiNaC Copyright (C) 1999-2008 Johannes Gutenberg University Mainz, Germany
+ *  GiNaC Copyright (C) 1999-2011 Johannes Gutenberg University Mainz, Germany
  *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -61,17 +84,6 @@ namespace GiNaC {
 #define DCOUT(str) cout << #str << endl
 #define DCOUTVAR(var) cout << #var << ": " << var << endl
 #define DCOUT2(str,var) cout << #str << ": " << var << endl
-#else
-#define DCOUT(str)
-#define DCOUTVAR(var)
-#define DCOUT2(str,var)
-#endif
-
-// anonymous namespace to hide all utility functions
-namespace {
-
-typedef vector<cl_MI> mvec;
-#ifdef DEBUGFACTOR
 ostream& operator<<(ostream& o, const vector<int>& v)
 {
 	vector<int>::const_iterator i = v.begin(), end = v.end();
@@ -80,7 +92,7 @@ ostream& operator<<(ostream& o, const vector<int>& v)
 	}
 	return o;
 }
-ostream& operator<<(ostream& o, const vector<cl_I>& v)
+static ostream& operator<<(ostream& o, const vector<cl_I>& v)
 {
 	vector<cl_I>::const_iterator i = v.begin(), end = v.end();
 	while ( i != end ) {
@@ -89,7 +101,7 @@ ostream& operator<<(ostream& o, const vector<cl_I>& v)
 	}
 	return o;
 }
-ostream& operator<<(ostream& o, const vector<cl_MI>& v)
+static ostream& operator<<(ostream& o, const vector<cl_MI>& v)
 {
 	vector<cl_MI>::const_iterator i = v.begin(), end = v.end();
 	while ( i != end ) {
@@ -98,6 +110,13 @@ ostream& operator<<(ostream& o, const vector<cl_MI>& v)
 	}
 	return o;
 }
+ostream& operator<<(ostream& o, const vector<numeric>& v)
+{
+	for ( size_t i=0; i<v.size(); ++i ) {
+		o << v[i] << " ";
+	}
+	return o;
+}
 ostream& operator<<(ostream& o, const vector< vector<cl_MI> >& v)
 {
 	vector< vector<cl_MI> >::const_iterator i = v.begin(), end = v.end();
@@ -107,7 +126,14 @@ ostream& operator<<(ostream& o, const vector< vector<cl_MI> >& v)
 	}
 	return o;
 }
-#endif
+#else
+#define DCOUT(str)
+#define DCOUTVAR(var)
+#define DCOUT2(str,var)
+#endif // def DEBUGFACTOR
+
+// anonymous namespace to hide all utility functions
+namespace {
 
 ////////////////////////////////////////////////////////////////////////////////
 // modular univariate polynomial code
@@ -192,8 +218,32 @@ static void expt_pos(umodpoly& a, unsigned int q)
 	}
 }
 
+template<bool COND, typename T = void> struct enable_if
+{
+	typedef T type;
+};
+
+template<typename T> struct enable_if<false, T> { /* empty */ };
+
+template<typename T> struct uvar_poly_p
+{
+	static const bool value = false;
+};
+
+template<> struct uvar_poly_p<upoly>
+{
+	static const bool value = true;
+};
+
+template<> struct uvar_poly_p<umodpoly>
+{
+	static const bool value = true;
+};
+
 template<typename T>
-static T operator+(const T& a, const T& b)
+// Don't define this for anything but univariate polynomials.
+static typename enable_if<uvar_poly_p<T>::value, T>::type
+operator+(const T& a, const T& b)
 {
 	int sa = a.size();
 	int sb = b.size();
@@ -224,7 +274,11 @@ static T operator+(const T& a, const T& b)
 }
 
 template<typename T>
-static T operator-(const T& a, const T& b)
+// Don't define this for anything but univariate polynomials. Otherwise
+// overload resolution might fail (this actually happens when compiling
+// GiNaC with g++ 3.4).
+static typename enable_if<uvar_poly_p<T>::value, T>::type
+operator-(const T& a, const T& b)
 {
 	int sa = a.size();
 	int sb = b.size();
@@ -365,10 +419,12 @@ static void umodpoly_from_ex(umodpoly& ump, const ex& e, const ex& x, const cl_m
 	canonicalize(ump);
 }
 
+#ifdef DEBUGFACTOR
 static void umodpoly_from_ex(umodpoly& ump, const ex& e, const ex& x, const cl_I& modulus)
 {
 	umodpoly_from_ex(ump, e, x, find_modint_ring(modulus));
 }
+#endif
 
 static ex upoly_to_ex(const upoly& a, const ex& x)
 {
@@ -423,7 +479,7 @@ static umodpoly umodpoly_to_umodpoly(const umodpoly& a, const cl_modint_ring& R,
 	cl_modint_ring oldR = a[0].ring();
 	size_t sa = a.size();
 	e.resize(sa+m, R->zero());
-	for ( int i=0; i<sa; ++i ) {
+	for ( size_t i=0; i<sa; ++i ) {
 		e[i+m] = R->canonhom(oldR->retract(a[i]));
 	}
 	canonicalize(e);
@@ -606,7 +662,7 @@ static bool squarefree(const umodpoly& a)
 	umodpoly b;
 	deriv(a, b);
 	if ( b.empty() ) {
-		return true;
+		return false;
 	}
 	umodpoly c;
 	gcd(a, b, c);
@@ -619,6 +675,8 @@ static bool squarefree(const umodpoly& a)
 ////////////////////////////////////////////////////////////////////////////////
 // modular matrix
 
+typedef vector<cl_MI> mvec;
+
 class modular_matrix
 {
 	friend ostream& operator<<(ostream& o, const modular_matrix& m);
@@ -633,90 +691,76 @@ public:
 	cl_MI operator()(size_t row, size_t col) const { return m[row*c + col]; }
 	void mul_col(size_t col, const cl_MI x)
 	{
-		mvec::iterator i = m.begin() + col;
 		for ( size_t rc=0; rc<r; ++rc ) {
-			*i = *i * x;
-			i += c;
+			std::size_t i = c*rc + col;
+			m[i] = m[i] * x;
 		}
 	}
 	void sub_col(size_t col1, size_t col2, const cl_MI fac)
 	{
-		mvec::iterator i1 = m.begin() + col1;
-		mvec::iterator i2 = m.begin() + col2;
 		for ( size_t rc=0; rc<r; ++rc ) {
-			*i1 = *i1 - *i2 * fac;
-			i1 += c;
-			i2 += c;
+			std::size_t i1 = col1 + c*rc;
+			std::size_t i2 = col2 + c*rc;
+			m[i1] = m[i1] - m[i2]*fac;
 		}
 	}
 	void switch_col(size_t col1, size_t col2)
 	{
-		cl_MI buf;
-		mvec::iterator i1 = m.begin() + col1;
-		mvec::iterator i2 = m.begin() + col2;
 		for ( size_t rc=0; rc<r; ++rc ) {
-			buf = *i1; *i1 = *i2; *i2 = buf;
-			i1 += c;
-			i2 += c;
+			std::size_t i1 = col1 + rc*c;
+			std::size_t i2 = col2 + rc*c;
+			std::swap(m[i1], m[i2]);
 		}
 	}
 	void mul_row(size_t row, const cl_MI x)
 	{
-		vector<cl_MI>::iterator i = m.begin() + row*c;
 		for ( size_t cc=0; cc<c; ++cc ) {
-			*i = *i * x;
-			++i;
+			std::size_t i = row*c + cc; 
+			m[i] = m[i] * x;
 		}
 	}
 	void sub_row(size_t row1, size_t row2, const cl_MI fac)
 	{
-		vector<cl_MI>::iterator i1 = m.begin() + row1*c;
-		vector<cl_MI>::iterator i2 = m.begin() + row2*c;
 		for ( size_t cc=0; cc<c; ++cc ) {
-			*i1 = *i1 - *i2 * fac;
-			++i1;
-			++i2;
+			std::size_t i1 = row1*c + cc;
+			std::size_t i2 = row2*c + cc;
+			m[i1] = m[i1] - m[i2]*fac;
 		}
 	}
 	void switch_row(size_t row1, size_t row2)
 	{
-		cl_MI buf;
-		vector<cl_MI>::iterator i1 = m.begin() + row1*c;
-		vector<cl_MI>::iterator i2 = m.begin() + row2*c;
 		for ( size_t cc=0; cc<c; ++cc ) {
-			buf = *i1; *i1 = *i2; *i2 = buf;
-			++i1;
-			++i2;
+			std::size_t i1 = row1*c + cc;
+			std::size_t i2 = row2*c + cc;
+			std::swap(m[i1], m[i2]);
 		}
 	}
 	bool is_col_zero(size_t col) const
 	{
 		mvec::const_iterator i = m.begin() + col;
 		for ( size_t rr=0; rr<r; ++rr ) {
-			if ( !zerop(*i) ) {
+			std::size_t i = col + rr*c;
+			if ( !zerop(m[i]) ) {
 				return false;
 			}
-			i += c;
 		}
 		return true;
 	}
 	bool is_row_zero(size_t row) const
 	{
-		mvec::const_iterator i = m.begin() + row*c;
 		for ( size_t cc=0; cc<c; ++cc ) {
-			if ( !zerop(*i) ) {
+			std::size_t i = row*c + cc;
+			if ( !zerop(m[i]) ) {
 				return false;
 			}
-			++i;
 		}
 		return true;
 	}
 	void set_row(size_t row, const vector<cl_MI>& newrow)
 	{
-		mvec::iterator i1 = m.begin() + row*c;
-		mvec::const_iterator i2 = newrow.begin(), end = newrow.end();
-		for ( ; i2 != end; ++i1, ++i2 ) {
-			*i1 = *i2;
+		for (std::size_t i2 = 0; i2 < newrow.size(); ++i2) {
+			std::size_t i1 = row*c + i2;
+			m[i1] = newrow[i2];
 		}
 	}
 	mvec::const_iterator row_begin(size_t row) const { return m.begin()+row*c; }
@@ -768,6 +812,11 @@ ostream& operator<<(ostream& o, const modular_matrix& m)
 // END modular matrix
 ////////////////////////////////////////////////////////////////////////////////
 
+/** Calculates the Q matrix for a polynomial. Used by Berlekamp's algorithm.
+ *
+ *  @param[in]  a_  modular polynomial
+ *  @param[out] Q   Q matrix
+ */
 static void q_matrix(const umodpoly& a_, modular_matrix& Q)
 {
 	umodpoly a = a_;
@@ -791,6 +840,11 @@ static void q_matrix(const umodpoly& a_, modular_matrix& Q)
 	}
 }
 
+/** Determine the nullspace of a matrix M-1.
+ *
+ *  @param[in,out] M      matrix, will be modified
+ *  @param[out]    basis  calculated nullspace of M-1
+ */
 static void nullspace(modular_matrix& M, vector<mvec>& basis)
 {
 	const size_t n = M.rowsize();
@@ -835,11 +889,20 @@ static void nullspace(modular_matrix& M, vector<mvec>& basis)
 	}
 }
 
+/** Berlekamp's modular factorization.
+ *  
+ *  The implementation follows the algorithm in chapter 8 of [GCL].
+ *
+ *  @param[in]  a    modular polynomial
+ *  @param[out] upv  vector containing modular factors. if upv was not empty the
+ *                   new elements are added at the end
+ */
 static void berlekamp(const umodpoly& a, upvec& upv)
 {
 	cl_modint_ring R = a[0].ring();
 	umodpoly one(1, R->one());
 
+	// find nullspace of Q matrix
 	modular_matrix Q(degree(a), degree(a), R->zero());
 	q_matrix(a, Q);
 	vector<mvec> nu;
@@ -847,6 +910,7 @@ static void berlekamp(const umodpoly& a, upvec& upv)
 
 	const unsigned int k = nu.size();
 	if ( k == 1 ) {
+		// irreducible
 		return;
 	}
 
@@ -858,6 +922,7 @@ static void berlekamp(const umodpoly& a, upvec& upv)
 
 	list<umodpoly>::iterator u = factors.begin();
 
+	// calculate all gcd's
 	while ( true ) {
 		for ( unsigned int s=0; s<q; ++s ) {
 			umodpoly nur = nu[r];
@@ -897,6 +962,16 @@ static void berlekamp(const umodpoly& a, upvec& upv)
 	}
 }
 
+// modular square free factorization is not used at the moment so we deactivate
+// the code
+#if 0
+
+/** Calculates a^(1/prime).
+ *  
+ *  @param[in] a      polynomial
+ *  @param[in] prime  prime number -> exponent 1/prime
+ *  @param[in] ap     resulting polynomial
+ */
 static void expt_1_over_p(const umodpoly& a, unsigned int prime, umodpoly& ap)
 {
 	size_t newdeg = degree(a)/prime;
@@ -907,6 +982,12 @@ static void expt_1_over_p(const umodpoly& a, unsigned int prime, umodpoly& ap)
 	}
 }
 
+/** Modular square free factorization.
+ *
+ *  @param[in]  a        polynomial
+ *  @param[out] factors  modular factors
+ *  @param[out] mult     corresponding multiplicities (exponents)
+ */
 static void modsqrfree(const umodpoly& a, upvec& factors, vector<int>& mult)
 {
 	const unsigned int prime = cl_I_to_uint(a[0].ring()->modulus);
@@ -952,6 +1033,18 @@ static void modsqrfree(const umodpoly& a, upvec& factors, vector<int>& mult)
 	}
 }
 
+#endif // deactivation of square free factorization
+
+/** Distinct degree factorization (DDF).
+ *  
+ *  The implementation follows the algorithm in chapter 8 of [GCL].
+ *
+ *  @param[in]  a_         modular polynomial
+ *  @param[out] degrees    vector containing the degrees of the factors of the
+ *                         corresponding polynomials in ddfactors.
+ *  @param[out] ddfactors  vector containing polynomials which factors have the
+ *                         degree given in degrees.
+ */
 static void distinct_degree_factor(const umodpoly& a_, vector<int>& degrees, upvec& ddfactors)
 {
 	umodpoly a = a_;
@@ -966,7 +1059,6 @@ static void distinct_degree_factor(const umodpoly& a_, vector<int>& degrees, upv
 	w[1] = R->one();
 	umodpoly x = w;
 
-	bool nontrivial = false;
 	while ( i <= nhalf ) {
 		expt_pos(w, q);
 		umodpoly buf;
@@ -994,10 +1086,19 @@ static void distinct_degree_factor(const umodpoly& a_, vector<int>& degrees, upv
 	}
 }
 
+/** Modular same degree factorization.
+ *  Same degree factorization is a kind of misnomer. It performs distinct degree
+ *  factorization, but instead of using the Cantor-Zassenhaus algorithm it
+ *  (sub-optimally) uses Berlekamp's algorithm for the factors of the same
+ *  degree.
+ *
+ *  @param[in]  a    modular polynomial
+ *  @param[out] upv  vector containing modular factors. if upv was not empty the
+ *                   new elements are added at the end
+ */
 static void same_degree_factor(const umodpoly& a, upvec& upv)
 {
 	cl_modint_ring R = a[0].ring();
-	int deg = degree(a);
 
 	vector<int> degrees;
 	upvec ddfactors;
@@ -1013,40 +1114,29 @@ static void same_degree_factor(const umodpoly& a, upvec& upv)
 	}
 }
 
+// Yes, we can (choose).
+#define USE_SAME_DEGREE_FACTOR
+
+/** Modular univariate factorization.
+ *
+ *  In principle, we have two algorithms at our disposal: Berlekamp's algorithm
+ *  and same degree factorization (SDF). SDF seems to be slightly faster in
+ *  almost all cases so it is activated as default.
+ *
+ *  @param[in]  p    modular polynomial
+ *  @param[out] upv  vector containing modular factors. if upv was not empty the
+ *                   new elements are added at the end
+ */
 static void factor_modular(const umodpoly& p, upvec& upv)
 {
-	upvec factors;
-	vector<int> mult;
-	modsqrfree(p, factors, mult);
-
-#define USE_SAME_DEGREE_FACTOR
 #ifdef USE_SAME_DEGREE_FACTOR
-	for ( size_t i=0; i<factors.size(); ++i ) {
-		upvec upvbuf;
-		same_degree_factor(factors[i], upvbuf);
-		for ( int j=mult[i]; j>0; --j ) {
-			upv.insert(upv.end(), upvbuf.begin(), upvbuf.end());
-		}
-	}
+	same_degree_factor(p, upv);
 #else
-	for ( size_t i=0; i<factors.size(); ++i ) {
-		upvec upvbuf;
-		berlekamp(factors[i], upvbuf);
-		if ( upvbuf.size() ) {
-			for ( size_t j=0; j<upvbuf.size(); ++j ) {
-				upv.insert(upv.end(), mult[i], upvbuf[j]);
-			}
-		}
-		else {
-			for ( int j=mult[i]; j>0; --j ) {
-				upv.push_back(factors[i]);
-			}
-		}
-	}
+	berlekamp(p, upv);
 #endif
 }
 
-/** Calculates polynomials s and t such that a*s+b*t==1.
+/** Calculates modular polynomials s and t such that a*s+b*t==1.
  *  Assertion: a and b are relatively prime and not zero.
  *
  *  @param[in]  a  polynomial
@@ -1096,6 +1186,12 @@ static void exteuclid(const umodpoly& a, const umodpoly& b, umodpoly& s, umodpol
 	canonicalize(t);
 }
 
+/** Replaces the leading coefficient in a polynomial by a given number.
+ *
+ *  @param[in] poly  polynomial to change
+ *  @param[in] lc    new leading coefficient
+ *  @return          changed polynomial
+ */
 static upoly replace_lc(const upoly& poly, const cl_I& lc)
 {
 	if ( poly.empty() ) return poly;
@@ -1104,19 +1200,60 @@ static upoly replace_lc(const upoly& poly, const cl_I& lc)
 	return r;
 }
 
+/** Calculates the bound for the modulus.
+ *  See [Mig].
+ */
+static inline cl_I calc_bound(const ex& a, const ex& x, int maxdeg)
+{
+	cl_I maxcoeff = 0;
+	cl_R coeff = 0;
+	for ( int i=a.degree(x); i>=a.ldegree(x); --i ) {
+		cl_I aa = abs(the<cl_I>(ex_to<numeric>(a.coeff(x, i)).to_cl_N()));
+		if ( aa > maxcoeff ) maxcoeff = aa;
+		coeff = coeff + square(aa);
+	}
+	cl_I coeffnorm = ceiling1(the<cl_R>(cln::sqrt(coeff)));
+	cl_I B = coeffnorm * expt_pos(cl_I(2), cl_I(maxdeg));
+	return ( B > maxcoeff ) ? B : maxcoeff;
+}
+
+/** Calculates the bound for the modulus.
+ *  See [Mig].
+ */
+static inline cl_I calc_bound(const upoly& a, int maxdeg)
+{
+	cl_I maxcoeff = 0;
+	cl_R coeff = 0;
+	for ( int i=degree(a); i>=0; --i ) {
+		cl_I aa = abs(a[i]);
+		if ( aa > maxcoeff ) maxcoeff = aa;
+		coeff = coeff + square(aa);
+	}
+	cl_I coeffnorm = ceiling1(the<cl_R>(cln::sqrt(coeff)));
+	cl_I B = coeffnorm * expt_pos(cl_I(2), cl_I(maxdeg));
+	return ( B > maxcoeff ) ? B : maxcoeff;
+}
+
+/** Hensel lifting as used by factor_univariate().
+ *
+ *  The implementation follows the algorithm in chapter 6 of [GCL].
+ *
+ *  @param[in]  a_   primitive univariate polynomials
+ *  @param[in]  p    prime number that does not divide lcoeff(a)
+ *  @param[in]  u1_  modular factor of a (mod p)
+ *  @param[in]  w1_  modular factor of a (mod p), relatively prime to u1_,
+ *                   fulfilling  u1_*w1_ == a mod p
+ *  @param[out] u    lifted factor
+ *  @param[out] w    lifted factor, u*w = a
+ */
 static void hensel_univar(const upoly& a_, unsigned int p, const umodpoly& u1_, const umodpoly& w1_, upoly& u, upoly& w)
 {
 	upoly a = a_;
 	const cl_modint_ring& R = u1_[0].ring();
 
 	// calc bound B
-	cl_R maxcoeff = 0;
-	for ( int i=degree(a); i>=0; --i ) {
-		maxcoeff = maxcoeff + square(abs(a[i]));
-	}
-	cl_I normmc = ceiling1(the<cl_R>(cln::sqrt(maxcoeff)));
-	cl_I maxdegree = (degree(u1_) > degree(w1_)) ? degree(u1_) : degree(w1_);
-	cl_I B = normmc * expt_pos(cl_I(2), maxdegree);
+	int maxdeg = (degree(u1_) > degree(w1_)) ? degree(u1_) : degree(w1_);
+	cl_I maxmodulus = 2*calc_bound(a, maxdeg);
 
 	// step 1
 	cl_I alpha = lcoeff(a);
@@ -1143,16 +1280,9 @@ static void hensel_univar(const upoly& a_, unsigned int p, const umodpoly& u1_,
 	w = replace_lc(umodpoly_to_upoly(w1), alpha);
 	upoly e = a - u * w;
 	cl_I modulus = p;
-	const cl_I maxmodulus = 2*B*abs(alpha);
 
 	// step 4
 	while ( !e.empty() && modulus < maxmodulus ) {
-		// ad-hoc divisablity check
-		for ( size_t k=0; k<e.size(); ++k ) {
-			if ( !zerop(mod(e[k], modulus)) ) {
-				goto quickexit;
-			}
-		}
 		upoly c = e / modulus;
 		phi = umodpoly_to_upoly(s) * c;
 		umodpoly sigmatilde;
@@ -1171,7 +1301,6 @@ static void hensel_univar(const upoly& a_, unsigned int p, const umodpoly& u1_,
 		e = a - u * w;
 		modulus = modulus * p;
 	}
-quickexit: ;
 
 	// step 5
 	if ( e.empty() ) {
@@ -1193,6 +1322,11 @@ quickexit: ;
 	}
 }
 
+/** Returns a new prime number.
+ *
+ *  @param[in] p  prime number
+ *  @return       next prime number after p
+ */
 static unsigned int next_prime(unsigned int p)
 {
 	static vector<unsigned int> primes;
@@ -1223,73 +1357,147 @@ static unsigned int next_prime(unsigned int p)
 	throw logic_error("next_prime: should not reach this point!");
 }
 
+/** Manages the splitting a vector of of modular factors into two partitions.
+ */
 class factor_partition
 {
 public:
+	/** Takes the vector of modular factors and initializes the first partition */
 	factor_partition(const upvec& factors_) : factors(factors_)
 	{
 		n = factors.size();
-		k.resize(n, 1);
-		k[0] = 0;
-		sum = n-1;
+		k.resize(n, 0);
+		k[0] = 1;
+		cache.resize(n-1);
 		one.resize(1, factors.front()[0].ring()->one());
+		len = 1;
+		last = 0;
 		split();
 	}
 	int operator[](size_t i) const { return k[i]; }
 	size_t size() const { return n; }
-	size_t size_first() const { return n-sum; }
-	size_t size_second() const { return sum; }
-#ifdef DEBUGFACTOR
-	void get() const { DCOUTVAR(k); }
-#endif
+	size_t size_left() const { return n-len; }
+	size_t size_right() const { return len; }
+	/** Initializes the next partition.
+	    Returns true, if there is one, false otherwise. */
 	bool next()
 	{
-		for ( size_t i=n-1; i>=1; --i ) {
-			if ( k[i] ) {
-				--k[i];
-				--sum;
-				if ( sum > 0 ) {
-					split();
-					return true;
+		if ( last == n-1 ) {
+			int rem = len - 1;
+			int p = last - 1;
+			while ( rem ) {
+				if ( k[p] ) {
+					--rem;
+					--p;
+					continue;
 				}
-				else {
-					return false;
+				last = p - 1;
+				while ( k[last] == 0 ) { --last; }
+				if ( last == 0 && n == 2*len ) return false;
+				k[last++] = 0;
+				for ( size_t i=0; i<=len-rem; ++i ) {
+					k[last] = 1;
+					++last;
 				}
+				fill(k.begin()+last, k.end(), 0);
+				--last;
+				split();
+				return true;
 			}
-			++k[i];
-			++sum;
+			last = len;
+			++len;
+			if ( len > n/2 ) return false;
+			fill(k.begin(), k.begin()+len, 1);
+			fill(k.begin()+len+1, k.end(), 0);
 		}
-		return false;
+		else {
+			k[last++] = 0;
+			k[last] = 1;
+		}
+		split();
+		return true;
 	}
-	void split()
+	/** Get first partition */
+	umodpoly& left() { return lr[0]; }
+	/** Get second partition */
+	umodpoly& right() { return lr[1]; }
+private:
+	void split_cached()
 	{
-		left = one;
-		right = one;
-		for ( size_t i=0; i<n; ++i ) {
-			if ( k[i] ) {
-				right = right * factors[i];
+		size_t i = 0;
+		do {
+			size_t pos = i;
+			int group = k[i++];
+			size_t d = 0;
+			while ( i < n && k[i] == group ) { ++d; ++i; }
+			if ( d ) {
+				if ( cache[pos].size() >= d ) {
+					lr[group] = lr[group] * cache[pos][d-1];
+				}
+				else {
+					if ( cache[pos].size() == 0 ) {
+						cache[pos].push_back(factors[pos] * factors[pos+1]);
+					}
+					size_t j = pos + cache[pos].size() + 1;
+					d -= cache[pos].size();
+					while ( d ) {
+						umodpoly buf = cache[pos].back() * factors[j];
+						cache[pos].push_back(buf);
+						--d;
+						++j;
+					}
+					lr[group] = lr[group] * cache[pos].back();
+				}
 			}
 			else {
-				left = left * factors[i];
+				lr[group] = lr[group] * factors[pos];
+			}
+		} while ( i < n );
+	}
+	void split()
+	{
+		lr[0] = one;
+		lr[1] = one;
+		if ( n > 6 ) {
+			split_cached();
+		}
+		else {
+			for ( size_t i=0; i<n; ++i ) {
+				lr[k[i]] = lr[k[i]] * factors[i];
 			}
 		}
 	}
-public:
-	umodpoly left, right;
 private:
+	umodpoly lr[2];
+	vector< vector<umodpoly> > cache;
 	upvec factors;
 	umodpoly one;
-	size_t n, sum;
+	size_t n;
+	size_t len;
+	size_t last;
 	vector<int> k;
 };
 
+/** Contains a pair of univariate polynomial and its modular factors.
+ *  Used by factor_univariate().
+ */
 struct ModFactors
 {
 	upoly poly;
 	upvec factors;
 };
 
-static ex factor_univariate(const ex& poly, const ex& x)
+/** Univariate polynomial factorization.
+ *
+ *  Modular factorization is tried for several primes to minimize the number of
+ *  modular factors. Then, Hensel lifting is performed.
+ *
+ *  @param[in]     poly   expanded square free univariate polynomial
+ *  @param[in]     x      symbol
+ *  @param[in,out] prime  prime number to start trying modular factorization with,
+ *                        output value is the prime number actually used
+ */
+static ex factor_univariate(const ex& poly, const ex& x, unsigned int& prime)
 {
 	ex unit, cont, prim_ex;
 	poly.unitcontprim(x, unit, cont, prim_ex);
@@ -1297,18 +1505,19 @@ static ex factor_univariate(const ex& poly, const ex& x)
 	upoly_from_ex(prim, prim_ex, x);
 
 	// determine proper prime and minimize number of modular factors
-	unsigned int p = 3, lastp = 3;
+	prime = 3;
+	unsigned int lastp = prime;
 	cl_modint_ring R;
 	unsigned int trials = 0;
 	unsigned int minfactors = 0;
-	cl_I lc = lcoeff(prim);
+	cl_I lc = lcoeff(prim) * the<cl_I>(ex_to<numeric>(cont).to_cl_N());
 	upvec factors;
 	while ( trials < 2 ) {
 		umodpoly modpoly;
 		while ( true ) {
-			p = next_prime(p);
-			if ( !zerop(rem(lc, p)) ) {
-				R = find_modint_ring(p);
+			prime = next_prime(prime);
+			if ( !zerop(rem(lc, prime)) ) {
+				R = find_modint_ring(prime);
 				umodpoly_from_upoly(modpoly, prim, R);
 				if ( squarefree(modpoly) ) break;
 			}
@@ -1324,16 +1533,16 @@ static ex factor_univariate(const ex& poly, const ex& x)
 
 		if ( minfactors == 0 || trialfactors.size() < minfactors ) {
 			factors = trialfactors;
-			minfactors = factors.size();
-			lastp = p;
+			minfactors = trialfactors.size();
+			lastp = prime;
 			trials = 1;
 		}
 		else {
 			++trials;
 		}
 	}
-	p = lastp;
-	R = find_modint_ring(p);
+	prime = lastp;
+	R = find_modint_ring(prime);
 
 	// lift all factor combinations
 	stack<ModFactors> tocheck;
@@ -1347,10 +1556,12 @@ static ex factor_univariate(const ex& poly, const ex& x)
 		const size_t n = tocheck.top().factors.size();
 		factor_partition part(tocheck.top().factors);
 		while ( true ) {
-			hensel_univar(tocheck.top().poly, p, part.left, part.right, f1, f2);
+			// call Hensel lifting
+			hensel_univar(tocheck.top().poly, prime, part.left(), part.right(), f1, f2);
 			if ( !f1.empty() ) {
-				if ( part.size_first() == 1 ) {
-					if ( part.size_second() == 1 ) {
+				// successful, update the stack and the result
+				if ( part.size_left() == 1 ) {
+					if ( part.size_right() == 1 ) {
 						result *= upoly_to_ex(f1, x) * upoly_to_ex(f2, x);
 						tocheck.pop();
 						break;
@@ -1365,8 +1576,8 @@ static ex factor_univariate(const ex& poly, const ex& x)
 					}
 					break;
 				}
-				else if ( part.size_second() == 1 ) {
-					if ( part.size_first() == 1 ) {
+				else if ( part.size_right() == 1 ) {
+					if ( part.size_left() == 1 ) {
 						result *= upoly_to_ex(f1, x) * upoly_to_ex(f2, x);
 						tocheck.pop();
 						break;
@@ -1382,7 +1593,7 @@ static ex factor_univariate(const ex& poly, const ex& x)
 					break;
 				}
 				else {
-					upvec newfactors1(part.size_first()), newfactors2(part.size_second());
+					upvec newfactors1(part.size_left()), newfactors2(part.size_right());
 					upvec::iterator i1 = newfactors1.begin(), i2 = newfactors2.begin();
 					for ( size_t i=0; i<n; ++i ) {
 						if ( part[i] ) {
@@ -1402,7 +1613,10 @@ static ex factor_univariate(const ex& poly, const ex& x)
 				}
 			}
 			else {
+				// not successful
 				if ( !part.next() ) {
+					// if no more combinations left, return polynomial as
+					// irreducible
 					result *= upoly_to_ex(tocheck.top().poly, x);
 					tocheck.pop();
 					break;
@@ -1414,16 +1628,52 @@ static ex factor_univariate(const ex& poly, const ex& x)
 	return unit * cont * result;
 }
 
+/** Second interface to factor_univariate() to be used if the information about
+ *  the prime is not needed.
+ */
+static inline ex factor_univariate(const ex& poly, const ex& x)
+{
+	unsigned int prime;
+	return factor_univariate(poly, x, prime);
+}
+
+/** Represents an evaluation point (<symbol>==<integer>).
+ */
 struct EvalPoint
 {
 	ex x;
 	int evalpoint;
 };
 
+#ifdef DEBUGFACTOR
+ostream& operator<<(ostream& o, const vector<EvalPoint>& v)
+{
+	for ( size_t i=0; i<v.size(); ++i ) {
+		o << "(" << v[i].x << "==" << v[i].evalpoint << ") ";
+	}
+	return o;
+}
+#endif // def DEBUGFACTOR
+
 // forward declaration
-vector<ex> multivar_diophant(const vector<ex>& a_, const ex& x, const ex& c, const vector<EvalPoint>& I, unsigned int d, unsigned int p, unsigned int k);
+static vector<ex> multivar_diophant(const vector<ex>& a_, const ex& x, const ex& c, const vector<EvalPoint>& I, unsigned int d, unsigned int p, unsigned int k);
 
-upvec multiterm_eea_lift(const upvec& a, const ex& x, unsigned int p, unsigned int k)
+/** Utility function for multivariate Hensel lifting.
+ *
+ *  Solves the equation
+ *    s_1*b_1 + ... + s_r*b_r == 1 mod p^k
+ *  with deg(s_i) < deg(a_i)
+ *  and with given b_1 = a_1 * ... * a_{i-1} * a_{i+1} * ... * a_r
+ *
+ *  The implementation follows the algorithm in chapter 6 of [GCL].
+ *
+ *  @param[in]  a   vector of modular univariate polynomials
+ *  @param[in]  x   symbol
+ *  @param[in]  p   prime number
+ *  @param[in]  k   p^k is modulus
+ *  @return         vector of polynomials (s_i)
+ */
+static upvec multiterm_eea_lift(const upvec& a, const ex& x, unsigned int p, unsigned int k)
 {
 	const size_t r = a.size();
 	cl_modint_ring R = find_modint_ring(expt_pos(cl_I(p),k));
@@ -1451,10 +1701,12 @@ upvec multiterm_eea_lift(const upvec& a, const ex& x, unsigned int p, unsigned i
 	return s;
 }
 
-/**
- *  Assert: a not empty.
+/** Changes the modulus of a modular polynomial. Used by eea_lift().
+ *
+ *  @param[in]     R  new modular ring
+ *  @param[in,out] a  polynomial to change (in situ)
  */
-void change_modulus(const cl_modint_ring& R, umodpoly& a)
+static void change_modulus(const cl_modint_ring& R, umodpoly& a)
 {
 	if ( a.empty() ) return;
 	cl_modint_ring oldR = a[0].ring();
@@ -1465,7 +1717,21 @@ void change_modulus(const cl_modint_ring& R, umodpoly& a)
 	canonicalize(a);
 }
 
-void eea_lift(const umodpoly& a, const umodpoly& b, const ex& x, unsigned int p, unsigned int k, umodpoly& s_, umodpoly& t_)
+/** Utility function for multivariate Hensel lifting.
+ *
+ *  Solves  s*a + t*b == 1 mod p^k  given a,b.
+ *
+ *  The implementation follows the algorithm in chapter 6 of [GCL].
+ *
+ *  @param[in]  a   polynomial
+ *  @param[in]  b   polynomial
+ *  @param[in]  x   symbol
+ *  @param[in]  p   prime number
+ *  @param[in]  k   p^k is modulus
+ *  @param[out] s_  output polynomial
+ *  @param[out] t_  output polynomial
+ */
+static void eea_lift(const umodpoly& a, const umodpoly& b, const ex& x, unsigned int p, unsigned int k, umodpoly& s_, umodpoly& t_)
 {
 	cl_modint_ring R = find_modint_ring(p);
 	umodpoly amod = a;
@@ -1508,7 +1774,22 @@ void eea_lift(const umodpoly& a, const umodpoly& b, const ex& x, unsigned int p,
 	s_ = s; t_ = t;
 }
 
-upvec univar_diophant(const upvec& a, const ex& x, unsigned int m, unsigned int p, unsigned int k)
+/** Utility function for multivariate Hensel lifting.
+ *
+ *  Solves the equation
+ *    s_1*b_1 + ... + s_r*b_r == x^m mod p^k
+ *  with given b_1 = a_1 * ... * a_{i-1} * a_{i+1} * ... * a_r
+ *
+ *  The implementation follows the algorithm in chapter 6 of [GCL].
+ *
+ *  @param a  vector with univariate polynomials mod p^k
+ *  @param x  symbol
+ *  @param m  exponent of x^m in the equation to solve
+ *  @param p  prime number
+ *  @param k  p^k is modulus
+ *  @return   vector of polynomials (s_i)
+ */
+static upvec univar_diophant(const upvec& a, const ex& x, unsigned int m, unsigned int p, unsigned int k)
 {
 	cl_modint_ring R = find_modint_ring(expt_pos(cl_I(p),k));
 
@@ -1538,6 +1819,10 @@ upvec univar_diophant(const upvec& a, const ex& x, unsigned int m, unsigned int
 	return result;
 }
 
+/** Map used by function make_modular().
+ *  Finds every coefficient in a polynomial and replaces it by is value in the
+ *  given modular ring R (symmetric representation).
+ */
 struct make_modular_map : public map_function {
 	cl_modint_ring R;
 	make_modular_map(const cl_modint_ring& R_) : R(R_) { }
@@ -1562,13 +1847,38 @@ struct make_modular_map : public map_function {
 	}
 };
 
+/** Helps mimicking modular multivariate polynomial arithmetic.
+ *
+ *  @param e  expression of which to make the coefficients equal to their value
+ *            in the modular ring R (symmetric representation)
+ *  @param R  modular ring
+ *  @return   resulting expression
+ */
 static ex make_modular(const ex& e, const cl_modint_ring& R)
 {
 	make_modular_map map(R);
 	return map(e.expand());
 }
 
-vector<ex> multivar_diophant(const vector<ex>& a_, const ex& x, const ex& c, const vector<EvalPoint>& I, unsigned int d, unsigned int p, unsigned int k)
+/** Utility function for multivariate Hensel lifting.
+ *
+ *  Returns the polynomials s_i that fulfill
+ *    s_1*b_1 + ... + s_r*b_r == c mod <I^(d+1),p^k>
+ *  with given b_1 = a_1 * ... * a_{i-1} * a_{i+1} * ... * a_r
+ *
+ *  The implementation follows the algorithm in chapter 6 of [GCL].
+ *
+ *  @param a_  vector of multivariate factors mod p^k
+ *  @param x   symbol (equiv. x_1 in [GCL])
+ *  @param c   polynomial mod p^k
+ *  @param I   vector of evaluation points
+ *  @param d   maximum total degree of result
+ *  @param p   prime number
+ *  @param k   p^k is modulus
+ *  @return    vector of polynomials (s_i)
+ */
+static vector<ex> multivar_diophant(const vector<ex>& a_, const ex& x, const ex& c, const vector<EvalPoint>& I,
+                                    unsigned int d, unsigned int p, unsigned int k)
 {
 	vector<ex> a = a_;
 
@@ -1606,22 +1916,20 @@ vector<ex> multivar_diophant(const vector<ex>& a_, const ex& x, const ex& c, con
 		ex e = make_modular(buf, R);
 
 		ex monomial = 1;
-		for ( size_t m=1; m<=d; ++m ) {
-			while ( !e.is_zero() && e.has(xnu) ) {
-				monomial *= (xnu - alphanu);
-				monomial = expand(monomial);
-				ex cm = e.diff(ex_to<symbol>(xnu), m).subs(xnu==alphanu) / factorial(m);
-				cm = make_modular(cm, R);
-				if ( !cm.is_zero() ) {
-					vector<ex> delta_s = multivar_diophant(anew, x, cm, Inew, d, p, k);
-					ex buf = e;
-					for ( size_t j=0; j<delta_s.size(); ++j ) {
-						delta_s[j] *= monomial;
-						sigma[j] += delta_s[j];
-						buf -= delta_s[j] * b[j];
-					}
-					e = make_modular(buf, R);
+		for ( size_t m=1; !e.is_zero() && e.has(xnu) && m<=d; ++m ) {
+			monomial *= (xnu - alphanu);
+			monomial = expand(monomial);
+			ex cm = e.diff(ex_to<symbol>(xnu), m).subs(xnu==alphanu) / factorial(m);
+			cm = make_modular(cm, R);
+			if ( !cm.is_zero() ) {
+				vector<ex> delta_s = multivar_diophant(anew, x, cm, Inew, d, p, k);
+				ex buf = e;
+				for ( size_t j=0; j<delta_s.size(); ++j ) {
+					delta_s[j] *= monomial;
+					sigma[j] += delta_s[j];
+					buf -= delta_s[j] * b[j];
 				}
+				e = make_modular(buf, R);
 			}
 		}
 	}
@@ -1671,17 +1979,24 @@ vector<ex> multivar_diophant(const vector<ex>& a_, const ex& x, const ex& c, con
 	return sigma;
 }
 
-#ifdef DEBUGFACTOR
-ostream& operator<<(ostream& o, const vector<EvalPoint>& v)
-{
-	for ( size_t i=0; i<v.size(); ++i ) {
-		o << "(" << v[i].x << "==" << v[i].evalpoint << ") ";
-	}
-	return o;
-}
-#endif // def DEBUGFACTOR
-
-ex hensel_multivar(const ex& a, const ex& x, const vector<EvalPoint>& I, unsigned int p, const cl_I& l, const upvec& u, const vector<ex>& lcU)
+/** Multivariate Hensel lifting.
+ *  The implementation follows the algorithm in chapter 6 of [GCL].
+ *  Since we don't have a data type for modular multivariate polynomials, the
+ *  respective operations are done in a GiNaC::ex and the function
+ *  make_modular() is then called to make the coefficient modular p^l.
+ *
+ *  @param a    multivariate polynomial primitive in x
+ *  @param x    symbol (equiv. x_1 in [GCL])
+ *  @param I    vector of evaluation points (x_2==a_2,x_3==a_3,...)
+ *  @param p    prime number (should not divide lcoeff(a mod I))
+ *  @param l    p^l is the modulus of the lifted univariate field
+ *  @param u    vector of modular (mod p^l) factors of a mod I
+ *  @param lcU  correct leading coefficient of the univariate factors of a mod I
+ *  @return     list GiNaC::lst with lifted factors (multivariate factors of a),
+ *              empty if Hensel lifting did not succeed
+ */
+static ex hensel_multivar(const ex& a, const ex& x, const vector<EvalPoint>& I,
+                          unsigned int p, const cl_I& l, const upvec& u, const vector<ex>& lcU)
 {
 	const size_t nu = I.size() + 1;
 	const cl_modint_ring R = find_modint_ring(expt_pos(cl_I(p),l));
@@ -1777,10 +2092,13 @@ ex hensel_multivar(const ex& a, const ex& x, const vector<EvalPoint>& I, unsigne
 	}
 }
 
+/** Takes a factorized expression and puts the factors in a lst. The exponents
+ *  of the factors are discarded, e.g. 7*x^2*(y+1)^4 --> {7,x,y+1}. The first
+ *  element of the list is always the numeric coefficient.
+ */
 static ex put_factors_into_lst(const ex& e)
 {
 	lst result;
-
 	if ( is_a<numeric>(e) ) {
 		result.append(e);
 		return result;
@@ -1788,13 +2106,11 @@ static ex put_factors_into_lst(const ex& e)
 	if ( is_a<power>(e) ) {
 		result.append(1);
 		result.append(e.op(0));
-		result.append(e.op(1));
 		return result;
 	}
 	if ( is_a<symbol>(e) || is_a<add>(e) ) {
 		result.append(1);
 		result.append(e);
-		result.append(1);
 		return result;
 	}
 	if ( is_a<mul>(e) ) {
@@ -1806,11 +2122,9 @@ static ex put_factors_into_lst(const ex& e)
 			}
 			if ( is_a<power>(op) ) {
 				result.append(op.op(0));
-				result.append(op.op(1));
 			}
 			if ( is_a<symbol>(op) || is_a<add>(op) ) {
 				result.append(op);
-				result.append(1);
 			}
 		}
 		result.prepend(nfac);
@@ -1819,25 +2133,19 @@ static ex put_factors_into_lst(const ex& e)
 	throw runtime_error("put_factors_into_lst: bad term.");
 }
 
-#ifdef DEBUGFACTOR
-ostream& operator<<(ostream& o, const vector<numeric>& v)
-{
-	for ( size_t i=0; i<v.size(); ++i ) {
-		o << v[i] << " ";
-	}
-	return o;
-}
-#endif // def DEBUGFACTOR
-
-static bool checkdivisors(const lst& f, vector<numeric>& d)
+/** Checks a set of numbers for whether each number has a unique prime factor.
+ *
+ *  @param[in]  f  list of numbers to check
+ *  @return        true: if number set is bad, false: if set is okay (has unique
+ *                 prime factors)
+ */
+static bool checkdivisors(const lst& f)
 {
-	const int k = f.nops()-2;
+	const int k = f.nops();
 	numeric q, r;
-	d[0] = ex_to<numeric>(f.op(0) * f.op(f.nops()-1));
-	if ( d[0] == 1 && k == 1 && abs(f.op(1)) != 1 ) {
-		return false;
-	}
-	for ( int i=1; i<=k; ++i ) {
+	vector<numeric> d(k);
+	d[0] = ex_to<numeric>(abs(f.op(0)));
+	for ( int i=1; i<k; ++i ) {
 		q = ex_to<numeric>(abs(f.op(i)));
 		for ( int j=i-1; j>=0; --j ) {
 			r = d[j];
@@ -1854,13 +2162,30 @@ static bool checkdivisors(const lst& f, vector<numeric>& d)
 	return false;
 }
 
-static bool generate_set(const ex& u, const ex& vn, const exset& syms, const ex& f, const numeric& modulus, vector<numeric>& a, vector<numeric>& d)
+/** Generates a set of evaluation points for a multivariate polynomial.
+ *  The set fulfills the following conditions:
+ *  1. lcoeff(evaluated_polynomial) does not vanish
+ *  2. factors of lcoeff(evaluated_polynomial) have each a unique prime factor
+ *  3. evaluated_polynomial is square free
+ *  See [Wan] for more details.
+ *
+ *  @param[in]     u        multivariate polynomial to be factored
+ *  @param[in]     vn       leading coefficient of u in x (x==first symbol in syms)
+ *  @param[in]     syms     set of symbols that appear in u
+ *  @param[in]     f        lst containing the factors of the leading coefficient vn
+ *  @param[in,out] modulus  integer modulus for random number generation (i.e. |a_i| < modulus)
+ *  @param[out]    u0       returns the evaluated (univariate) polynomial
+ *  @param[out]    a        returns the valid evaluation points. must have initial size equal
+ *                          number of symbols-1 before calling generate_set
+ */
+static void generate_set(const ex& u, const ex& vn, const exset& syms, const lst& f,
+                         numeric& modulus, ex& u0, vector<numeric>& a)
 {
-	// computation of d is actually not necessary
 	const ex& x = *syms.begin();
-	bool trying = true;
-	do {
-		ex u0 = u;
+	while ( true ) {
+		++modulus;
+		// generate a set of integers ...
+		u0 = u;
 		ex vna = vn;
 		ex vnatry;
 		exset::const_iterator s = syms.begin();
@@ -1869,71 +2194,71 @@ static bool generate_set(const ex& u, const ex& vn, const exset& syms, const ex&
 			do {
 				a[i] = mod(numeric(rand()), 2*modulus) - modulus;
 				vnatry = vna.subs(*s == a[i]);
+				// ... for which the leading coefficient doesn't vanish ...
 			} while ( vnatry == 0 );
 			vna = vnatry;
 			u0 = u0.subs(*s == a[i]);
 			++s;
 		}
-		if ( gcd(u0,u0.diff(ex_to<symbol>(x))) != 1 ) {
+		// ... for which u0 is square free ...
+		ex g = gcd(u0, u0.diff(ex_to<symbol>(x)));
+		if ( !is_a<numeric>(g) ) {
 			continue;
 		}
-		if ( is_a<numeric>(vn) ) {
-			trying = false;
-		}
-		else {
-			lst fnum;
-			lst::const_iterator i = ex_to<lst>(f).begin();
-			fnum.append(*i++);
-			bool problem = false;
-			while ( i!=ex_to<lst>(f).end() ) {
-				ex fs = *i;
-				if ( !is_a<numeric>(fs) ) {
+		if ( !is_a<numeric>(vn) ) {
+			// ... and for which the evaluated factors have each an unique prime factor
+			lst fnum = f;
+			fnum.let_op(0) = fnum.op(0) * u0.content(x);
+			for ( size_t i=1; i<fnum.nops(); ++i ) {
+				if ( !is_a<numeric>(fnum.op(i)) ) {
 					s = syms.begin();
 					++s;
-					for ( size_t j=0; j<a.size(); ++j ) {
-						fs = fs.subs(*s == a[j]);
-						++s;
-					}
-					if ( abs(fs) == 1 ) {
-						problem = true;
-						break;
+					for ( size_t j=0; j<a.size(); ++j, ++s ) {
+						fnum.let_op(i) = fnum.op(i).subs(*s == a[j]);
 					}
 				}
-				fnum.append(fs);
-				++i; ++i;
 			}
-			if ( problem ) {
-				return true;
+			if ( checkdivisors(fnum) ) {
+				continue;
 			}
-			ex con = u0.content(x);
-			fnum.append(con);
-			trying = checkdivisors(fnum, d);
 		}
-	} while ( trying );
-	return false;
+		// ok, we have a valid set now
+		return;
+	}
 }
 
+// forward declaration
+static ex factor_sqrfree(const ex& poly);
+
+/** Multivariate factorization.
+ *  
+ *  The implementation is based on the algorithm described in [Wan].
+ *  An evaluation homomorphism (a set of integers) is determined that fulfills
+ *  certain criteria. The evaluated polynomial is univariate and is factorized
+ *  by factor_univariate(). The main work then is to find the correct leading
+ *  coefficients of the univariate factors. They have to correspond to the
+ *  factors of the (multivariate) leading coefficient of the input polynomial
+ *  (as defined for a specific variable x). After that the Hensel lifting can be
+ *  performed.
+ *
+ *  @param[in] poly  expanded, square free polynomial
+ *  @param[in] syms  contains the symbols in the polynomial
+ *  @return          factorized polynomial
+ */
 static ex factor_multivariate(const ex& poly, const exset& syms)
 {
 	exset::const_iterator s;
 	const ex& x = *syms.begin();
 
-	/* make polynomial primitive */
-	ex p = poly.expand().collect(x);
-	ex cont = p.lcoeff(x);
-	for ( numeric i=p.degree(x)-1; i>=p.ldegree(x); --i ) {
-		cont = gcd(cont, p.coeff(x,ex_to<numeric>(i).to_int()));
-		if ( cont == 1 ) break;
-	}
-	ex pp = expand(normal(p / cont));
+	// make polynomial primitive
+	ex unit, cont, pp;
+	poly.unitcontprim(x, unit, cont, pp);
 	if ( !is_a<numeric>(cont) ) {
-		return factor(cont) * factor(pp);
+		return factor_sqrfree(cont) * factor_sqrfree(pp);
 	}
 
-	/* factor leading coefficient */
-	pp = pp.collect(x);
-	ex vn = pp.lcoeff(x);
-	pp = pp.expand();
+	// factor leading coefficient
+	ex vn = pp.collect(x).lcoeff(x);
 	ex vnlst;
 	if ( is_a<numeric>(vn) ) {
 		vnlst = lst(vn);
@@ -1943,200 +2268,139 @@ static ex factor_multivariate(const ex& poly, const exset& syms)
 		vnlst = put_factors_into_lst(vnfactors);
 	}
 
-	const numeric maxtrials = 3;
-	numeric modulus = (vnlst.nops()-1 > 3) ? vnlst.nops()-1 : 3;
-	numeric minimalr = -1;
+	const unsigned int maxtrials = 3;
+	numeric modulus = (vnlst.nops() > 3) ? vnlst.nops() : 3;
 	vector<numeric> a(syms.size()-1, 0);
-	vector<numeric> d((vnlst.nops()-1)/2+1, 0);
 
+	// try now to factorize until we are successful
 	while ( true ) {
-		numeric trialcount = 0;
+
+		unsigned int trialcount = 0;
+		unsigned int prime;
+		int factor_count = 0;
+		int min_factor_count = -1;
 		ex u, delta;
-		unsigned int prime = 3;
-		size_t factor_count = 0;
-		ex ufac;
-		ex ufaclst;
+		ex ufac, ufaclst;
+
+		// try several evaluation points to reduce the number of factors
 		while ( trialcount < maxtrials ) {
-			bool problem = generate_set(pp, vn, syms, vnlst, modulus, a, d);
-			if ( problem ) {
-				++modulus;
-				continue;
-			}
-			u = pp;
-			s = syms.begin();
-			++s;
-			for ( size_t i=0; i<a.size(); ++i ) {
-				u = u.subs(*s == a[i]);
-				++s;
-			}
-			delta = u.content(x);
-
-			// determine proper prime
-			prime = 3;
-			cl_modint_ring R = find_modint_ring(prime);
-			while ( true ) {
-				if ( irem(ex_to<numeric>(u.lcoeff(x)), prime) != 0 ) {
-					umodpoly modpoly;
-					umodpoly_from_ex(modpoly, u, x, R);
-					if ( squarefree(modpoly) ) break;
-				}
-				prime = next_prime(prime);
-				R = find_modint_ring(prime);
-			}
 
-			ufac = factor(u);
+			// generate a set of valid evaluation points
+			generate_set(pp, vn, syms, ex_to<lst>(vnlst), modulus, u, a);
+
+			ufac = factor_univariate(u, x, prime);
 			ufaclst = put_factors_into_lst(ufac);
-			factor_count = (ufaclst.nops()-1)/2;
-
-			// veto factorization for which gcd(u_i, u_j) != 1 for all i,j
-			upvec tryu;
-			for ( size_t i=0; i<(ufaclst.nops()-1)/2; ++i ) {
-				umodpoly newu;
-				umodpoly_from_ex(newu, ufaclst.op(i*2+1), x, R);
-				tryu.push_back(newu);
-			}
-			bool veto = false;
-			for ( size_t i=0; i<tryu.size()-1; ++i ) {
-				for ( size_t j=i+1; j<tryu.size(); ++j ) {
-					umodpoly tryg;
-					gcd(tryu[i], tryu[j], tryg);
-					if ( unequal_one(tryg) ) {
-						veto = true;
-						goto escape_quickly;
-					}
-				}
-			}
-			escape_quickly: ;
-			if ( veto ) {
-				continue;
-			}
+			factor_count = ufaclst.nops()-1;
+			delta = ufaclst.op(0);
 
 			if ( factor_count <= 1 ) {
+				// irreducible
 				return poly;
 			}
-
-			if ( minimalr < 0 ) {
-				minimalr = factor_count;
+			if ( min_factor_count < 0 ) {
+				// first time here
+				min_factor_count = factor_count;
 			}
-			else if ( minimalr == factor_count ) {
+			else if ( min_factor_count == factor_count ) {
+				// one less to try
 				++trialcount;
-				++modulus;
 			}
-			else if ( minimalr > factor_count ) {
-				minimalr = factor_count;
+			else if ( min_factor_count > factor_count ) {
+				// new minimum, reset trial counter
+				min_factor_count = factor_count;
 				trialcount = 0;
 			}
-			if ( minimalr <= 1 ) {
-				return poly;
-			}
-		}
-
-		vector<numeric> ftilde((vnlst.nops()-1)/2+1);
-		ftilde[0] = ex_to<numeric>(vnlst.op(0));
-		for ( size_t i=1; i<ftilde.size(); ++i ) {
-			ex ft = vnlst.op((i-1)*2+1);
-			s = syms.begin();
-			++s;
-			for ( size_t j=0; j<a.size(); ++j ) {
-				ft = ft.subs(*s == a[j]);
-				++s;
-			}
-			ftilde[i] = ex_to<numeric>(ft);
-		}
-
-		vector<bool> used_flag((vnlst.nops()-1)/2+1, false);
-		vector<ex> D(factor_count, 1);
-		for ( size_t i=0; i<=factor_count; ++i ) {
-			numeric prefac;
-			if ( i == 0 ) {
-				prefac = ex_to<numeric>(ufaclst.op(0));
-				ftilde[0] = ftilde[0] / prefac;
-				vnlst.let_op(0) = vnlst.op(0) / prefac;
-				continue;
-			}
-			else {
-				prefac = ex_to<numeric>(ufaclst.op(2*(i-1)+1).lcoeff(x));
-			}
-			for ( size_t j=(vnlst.nops()-1)/2+1; j>0; --j ) {
-				if ( abs(ftilde[j-1]) == 1 ) {
-					used_flag[j-1] = true;
-					continue;
-				}
-				numeric g = gcd(prefac, ftilde[j-1]);
-				if ( g != 1 ) {
-					prefac = prefac / g;
-					numeric count = abs(iquo(g, ftilde[j-1]));
-					used_flag[j-1] = true;
-					if ( i > 0 ) {
-						if ( j == 1 ) {
-							D[i-1] = D[i-1] * pow(vnlst.op(0), count);
-						}
-						else {
-							D[i-1] = D[i-1] * pow(vnlst.op(2*(j-2)+1), count);
-						}
-					}
-					else {
-						ftilde[j-1] = ftilde[j-1] / prefac;
-						break;
-					}
-					++j;
-				}
-			}
-		}
-
-		bool some_factor_unused = false;
-		for ( size_t i=0; i<used_flag.size(); ++i ) {
-			if ( !used_flag[i] ) {
-				some_factor_unused = true;
-				break;
-			}
-		}
-		if ( some_factor_unused ) {
-			continue;
 		}
 
+		// determine true leading coefficients for the Hensel lifting
 		vector<ex> C(factor_count);
-		if ( delta == 1 ) {
-			for ( size_t i=0; i<D.size(); ++i ) {
-				ex Dtilde = D[i];
-				s = syms.begin();
-				++s;
-				for ( size_t j=0; j<a.size(); ++j ) {
-					Dtilde = Dtilde.subs(*s == a[j]);
-					++s;
-				}
-				C[i] = D[i] * (ufaclst.op(2*i+1).lcoeff(x) / Dtilde);
+		if ( is_a<numeric>(vn) ) {
+			// easy case
+			for ( size_t i=1; i<ufaclst.nops(); ++i ) {
+				C[i-1] = ufaclst.op(i).lcoeff(x);
 			}
 		}
 		else {
-			for ( size_t i=0; i<D.size(); ++i ) {
-				ex Dtilde = D[i];
+			// difficult case.
+			// we use the property of the ftilde having a unique prime factor.
+			// details can be found in [Wan].
+			// calculate ftilde
+			vector<numeric> ftilde(vnlst.nops()-1);
+			for ( size_t i=0; i<ftilde.size(); ++i ) {
+				ex ft = vnlst.op(i+1);
 				s = syms.begin();
 				++s;
 				for ( size_t j=0; j<a.size(); ++j ) {
-					Dtilde = Dtilde.subs(*s == a[j]);
+					ft = ft.subs(*s == a[j]);
 					++s;
 				}
-				ex ui;
-				if ( i == 0 ) {
-					ui = ufaclst.op(0);
+				ftilde[i] = ex_to<numeric>(ft);
+			}
+			// calculate D and C
+			vector<bool> used_flag(ftilde.size(), false);
+			vector<ex> D(factor_count, 1);
+			if ( delta == 1 ) {
+				for ( int i=0; i<factor_count; ++i ) {
+					numeric prefac = ex_to<numeric>(ufaclst.op(i+1).lcoeff(x));
+					for ( int j=ftilde.size()-1; j>=0; --j ) {
+						int count = 0;
+						while ( irem(prefac, ftilde[j]) == 0 ) {
+							prefac = iquo(prefac, ftilde[j]);
+							++count;
+						}
+						if ( count ) {
+							used_flag[j] = true;
+							D[i] = D[i] * pow(vnlst.op(j+1), count);
+						}
+					}
+					C[i] = D[i] * prefac;
 				}
-				else {
-					ui = ufaclst.op(2*(i-1)+1);
+			}
+			else {
+				for ( int i=0; i<factor_count; ++i ) {
+					numeric prefac = ex_to<numeric>(ufaclst.op(i+1).lcoeff(x));
+					for ( int j=ftilde.size()-1; j>=0; --j ) {
+						int count = 0;
+						while ( irem(prefac, ftilde[j]) == 0 ) {
+							prefac = iquo(prefac, ftilde[j]);
+							++count;
+						}
+						while ( irem(ex_to<numeric>(delta)*prefac, ftilde[j]) == 0 ) {
+							numeric g = gcd(prefac, ex_to<numeric>(ftilde[j]));
+							prefac = iquo(prefac, g);
+							delta = delta / (ftilde[j]/g);
+							ufaclst.let_op(i+1) = ufaclst.op(i+1) * (ftilde[j]/g);
+							++count;
+						}
+						if ( count ) {
+							used_flag[j] = true;
+							D[i] = D[i] * pow(vnlst.op(j+1), count);
+						}
+					}
+					C[i] = D[i] * prefac;
 				}
-				while ( true ) {
-					ex d = gcd(ui.lcoeff(x), Dtilde);
-					C[i] = D[i] * ( ui.lcoeff(x) / d );
-					ui = ui * ( Dtilde[i] / d );
-					delta = delta / ( Dtilde[i] / d );
-					if ( delta == 1 ) break;
-					ui = delta * ui;
-					C[i] = delta * C[i];
-					pp = pp * pow(delta, D.size()-1);
+			}
+			// check if something went wrong
+			bool some_factor_unused = false;
+			for ( size_t i=0; i<used_flag.size(); ++i ) {
+				if ( !used_flag[i] ) {
+					some_factor_unused = true;
+					break;
 				}
 			}
+			if ( some_factor_unused ) {
+				continue;
+			}
+		}
+		
+		// multiply the remaining content of the univariate polynomial into the
+		// first factor
+		if ( delta != 1 ) {
+			C[0] = C[0] * delta;
+			ufaclst.let_op(1) = ufaclst.op(1) * delta;
 		}
 
+		// set up evaluation points
 		EvalPoint ep;
 		vector<EvalPoint> epv;
 		s = syms.begin();
@@ -2147,37 +2411,32 @@ static ex factor_multivariate(const ex& poly, const exset& syms)
 			epv.push_back(ep);
 		}
 
-		// calc bound B
-		ex maxcoeff;
-		for ( int i=u.degree(x); i>=u.ldegree(x); --i ) {
-			maxcoeff += pow(abs(u.coeff(x, i)),2);
-		}
-		cl_I normmc = ceiling1(the<cl_R>(cln::sqrt(ex_to<numeric>(maxcoeff).to_cl_N())));
-		unsigned int maxdegree = 0;
-		for ( size_t i=0; i<factor_count; ++i ) {
-			if ( ufaclst[2*i+1].degree(x) > (int)maxdegree ) {
-				maxdegree = ufaclst[2*i+1].degree(x);
+		// calc bound p^l
+		int maxdeg = 0;
+		for ( int i=1; i<=factor_count; ++i ) {
+			if ( ufaclst.op(i).degree(x) > maxdeg ) {
+				maxdeg = ufaclst[i].degree(x);
 			}
 		}
-		cl_I B = normmc * expt_pos(cl_I(2), maxdegree);
+		cl_I B = 2*calc_bound(u, x, maxdeg);
 		cl_I l = 1;
 		cl_I pl = prime;
 		while ( pl < B ) {
 			l = l + 1;
 			pl = pl * prime;
 		}
-
-		upvec uvec;
+		
+		// set up modular factors (mod p^l)
 		cl_modint_ring R = find_modint_ring(expt_pos(cl_I(prime),l));
-		for ( size_t i=0; i<(ufaclst.nops()-1)/2; ++i ) {
-			umodpoly newu;
-			umodpoly_from_ex(newu, ufaclst.op(i*2+1), x, R);
-			uvec.push_back(newu);
+		upvec modfactors(ufaclst.nops()-1);
+		for ( size_t i=1; i<ufaclst.nops(); ++i ) {
+			umodpoly_from_ex(modfactors[i-1], ufaclst.op(i), x, R);
 		}
 
-		ex res = hensel_multivar(ufaclst.op(0)*pp, x, epv, prime, l, uvec, C);
+		// try Hensel lifting
+		ex res = hensel_multivar(pp, x, epv, prime, l, modfactors, C);
 		if ( res != lst() ) {
-			ex result = cont * ufaclst.op(0);
+			ex result = cont * unit;
 			for ( size_t i=0; i<res.nops(); ++i ) {
 				result *= res.op(i).content(x) * res.op(i).unit(x);
 				result *= res.op(i).primpart(x);
@@ -2187,6 +2446,8 @@ static ex factor_multivariate(const ex& poly, const exset& syms)
 	}
 }
 
+/** Finds all symbols in an expression. Used by factor_sqrfree() and factor().
+ */
 struct find_symbols_map : public map_function {
 	exset syms;
 	ex operator()(const ex& e)
@@ -2199,6 +2460,9 @@ struct find_symbols_map : public map_function {
 	}
 };
 
+/** Factorizes a polynomial that is square free. It calls either the univariate
+ *  or the multivariate factorization functions.
+ */
 static ex factor_sqrfree(const ex& poly)
 {
 	// determine all symbols in poly
@@ -2212,6 +2476,7 @@ static ex factor_sqrfree(const ex& poly)
 		// univariate case
 		const ex& x = *(findsymbols.syms.begin());
 		if ( poly.ldegree(x) > 0 ) {
+			// pull out direct factors
 			int ld = poly.ldegree(x);
 			ex res = factor_univariate(expand(poly/pow(x, ld)), x);
 			return res * pow(x,ld);
@@ -2227,6 +2492,9 @@ static ex factor_sqrfree(const ex& poly)
 	return res;
 }
 
+/** Map used by factor() when factor_options::all is given to access all
+ *  subexpressions and to call factor() on them.
+ */
 struct apply_factor_map : public map_function {
 	unsigned options;
 	apply_factor_map(unsigned options_) : options(options_) { }
@@ -2255,6 +2523,10 @@ struct apply_factor_map : public map_function {
 
 } // anonymous namespace
 
+/** Interface function to the outside world. It checks the arguments, tries a
+ *  square free factorization, and then calls factor_sqrfree to do the hard
+ *  work.
+ */
 ex factor(const ex& poly, unsigned options)
 {
 	// check arguments
@@ -2280,7 +2552,7 @@ ex factor(const ex& poly, unsigned options)
 	}
 
 	// make poly square free
-	ex sfpoly = sqrfree(poly, syms);
+	ex sfpoly = sqrfree(poly.expand(), syms);
 
 	// factorize the square free components
 	if ( is_a<power>(sfpoly) ) {