X-Git-Url: https://www.ginac.de/ginac.git//ginac.git?p=ginac.git;a=blobdiff_plain;f=ginac%2Ffactor.cpp;h=fb73897218db06ed31b40f32bf52aabca7e33acf;hp=f7dded5e1566e1bebe33470b6800c0e37583fc88;hb=2b0ad5c381dc081cc4066b0c5f939f5169ad9ab3;hpb=2a5d912dc9407c6bd1dbee6cb99cfdc206c4e42c;ds=sidebyside

diff --git a/ginac/factor.cpp b/ginac/factor.cpp
index f7dded5e..fb738972 100644
--- a/ginac/factor.cpp
+++ b/ginac/factor.cpp
@@ -1,6 +1,6 @@
 /** @file factor.cpp
  *
- *  Polynomial factorization code (Implementation).
+ *  Polynomial factorization code (implementation).
  *
  *  Algorithms used can be found in
  *    [W1]  An Improved Multivariate Polynomial Factoring Algorithm,
@@ -29,11 +29,6 @@
 
 //#define DEBUGFACTOR
 
-#ifdef DEBUGFACTOR
-#include <ostream>
-#include <ginac/ginac.h>
-using namespace GiNaC;
-#else
 #include "factor.h"
 
 #include "ex.h"
@@ -46,21 +41,21 @@ using namespace GiNaC;
 #include "mul.h"
 #include "normal.h"
 #include "add.h"
-#endif
 
 #include <algorithm>
+#include <cmath>
+#include <limits>
 #include <list>
 #include <vector>
+#ifdef DEBUGFACTOR
+#include <ostream>
+#endif
 using namespace std;
 
 #include <cln/cln.h>
 using namespace cln;
 
-#ifdef DEBUGFACTOR
-namespace Factor {
-#else
 namespace GiNaC {
-#endif
 
 #ifdef DEBUGFACTOR
 #define DCOUT(str) cout << #str << endl
@@ -72,127 +67,330 @@ namespace GiNaC {
 #define DCOUT2(str,var)
 #endif
 
-// forward declaration
-ex factor(const ex& poly, unsigned options);
-
 // anonymous namespace to hide all utility functions
 namespace {
 
 typedef vector<cl_MI> mvec;
-
 #ifdef DEBUGFACTOR
-ostream& operator<<(ostream& o, const mvec& v)
+ostream& operator<<(ostream& o, const vector<int>& v)
 {
-	mvec::const_iterator i = v.begin(), end = v.end();
+	vector<int>::const_iterator i = v.begin(), end = v.end();
 	while ( i != end ) {
 		o << *i++ << " ";
 	}
 	return o;
 }
-#endif // def DEBUGFACTOR
-
-#ifdef DEBUGFACTOR
-ostream& operator<<(ostream& o, const vector<mvec>& v)
+ostream& operator<<(ostream& o, const vector<cl_I>& v)
 {
-	vector<mvec>::const_iterator i = v.begin(), end = v.end();
+	vector<cl_I>::const_iterator i = v.begin(), end = v.end();
 	while ( i != end ) {
-		o << *i++ << endl;
+		o << *i << "[" << i-v.begin() << "]" << " ";
+		++i;
 	}
 	return o;
 }
-#endif // def DEBUGFACTOR
+ostream& operator<<(ostream& o, const vector<cl_MI>& v)
+{
+	vector<cl_MI>::const_iterator i = v.begin(), end = v.end();
+	while ( i != end ) {
+		o << *i << "[" << i-v.begin() << "]" << " ";
+		++i;
+	}
+	return o;
+}
+ostream& operator<<(ostream& o, const vector< vector<cl_MI> >& v)
+{
+	vector< vector<cl_MI> >::const_iterator i = v.begin(), end = v.end();
+	while ( i != end ) {
+		o << i-v.begin() << ": " << *i << endl;
+		++i;
+	}
+	return o;
+}
+#endif
 
 ////////////////////////////////////////////////////////////////////////////////
 // modular univariate polynomial code
 
-typedef cl_UP_MI umod;
-typedef vector<umod> umodvec;
+typedef std::vector<cln::cl_MI> umodpoly;
+typedef std::vector<cln::cl_I> upoly;
+typedef vector<umodpoly> upvec;
 
-#define COPY(to,from) from.ring()->create(degree(from)); \
-	for ( int II=0; II<=degree(from); ++II ) to.set_coeff(II, coeff(from, II)); \
- 	to.finalize()
+// COPY FROM UPOLY.HPP
 
-#ifdef DEBUGFACTOR
-ostream& operator<<(ostream& o, const umodvec& v)
+// CHANGED size_t -> int !!!
+template<typename T> static int degree(const T& p)
 {
-	umodvec::const_iterator i = v.begin(), end = v.end();
-	while ( i != end ) {
-		o << *i++ << " , " << endl;
+	return p.size() - 1;
+}
+
+template<typename T> static typename T::value_type lcoeff(const T& p)
+{
+	return p[p.size() - 1];
+}
+
+static bool normalize_in_field(umodpoly& a)
+{
+	if (a.size() == 0)
+		return true;
+	if ( lcoeff(a) == a[0].ring()->one() ) {
+		return true;
 	}
-	return o;
+
+	const cln::cl_MI lc_1 = recip(lcoeff(a));
+	for (std::size_t k = a.size(); k-- != 0; )
+		a[k] = a[k]*lc_1;
+	return false;
+}
+
+template<typename T> static void
+canonicalize(T& p, const typename T::size_type hint = std::numeric_limits<typename T::size_type>::max())
+{
+	if (p.empty())
+		return;
+
+	std::size_t i = p.size() - 1;
+	// Be fast if the polynomial is already canonicalized
+	if (!zerop(p[i]))
+		return;
+
+	if (hint < p.size())
+		i = hint;
+
+	bool is_zero = false;
+	do {
+		if (!zerop(p[i])) {
+			++i;
+			break;
+		}
+		if (i == 0) {
+			is_zero = true;
+			break;
+		}
+		--i;
+	} while (true);
+
+	if (is_zero) {
+		p.clear();
+		return;
+	}
+
+	p.erase(p.begin() + i, p.end());
+}
+
+// END COPY FROM UPOLY.HPP
+
+static void expt_pos(umodpoly& a, unsigned int q)
+{
+	if ( a.empty() ) return;
+	cl_MI zero = a[0].ring()->zero(); 
+	int deg = degree(a);
+	a.resize(degree(a)*q+1, zero);
+	for ( int i=deg; i>0; --i ) {
+		a[i*q] = a[i];
+		a[i] = zero;
+	}
+}
+
+template<typename T>
+static T operator+(const T& a, const T& b)
+{
+	int sa = a.size();
+	int sb = b.size();
+	if ( sa >= sb ) {
+		T r(sa);
+		int i = 0;
+		for ( ; i<sb; ++i ) {
+			r[i] = a[i] + b[i];
+		}
+		for ( ; i<sa; ++i ) {
+			r[i] = a[i];
+		}
+		canonicalize(r);
+		return r;
+	}
+	else {
+		T r(sb);
+		int i = 0;
+		for ( ; i<sa; ++i ) {
+			r[i] = a[i] + b[i];
+		}
+		for ( ; i<sb; ++i ) {
+			r[i] = b[i];
+		}
+		canonicalize(r);
+		return r;
+	}
+}
+
+template<typename T>
+static T operator-(const T& a, const T& b)
+{
+	int sa = a.size();
+	int sb = b.size();
+	if ( sa >= sb ) {
+		T r(sa);
+		int i = 0;
+		for ( ; i<sb; ++i ) {
+			r[i] = a[i] - b[i];
+		}
+		for ( ; i<sa; ++i ) {
+			r[i] = a[i];
+		}
+		canonicalize(r);
+		return r;
+	}
+	else {
+		T r(sb);
+		int i = 0;
+		for ( ; i<sa; ++i ) {
+			r[i] = a[i] - b[i];
+		}
+		for ( ; i<sb; ++i ) {
+			r[i] = -b[i];
+		}
+		canonicalize(r);
+		return r;
+	}
+}
+
+static upoly operator*(const upoly& a, const upoly& b)
+{
+	upoly c;
+	if ( a.empty() || b.empty() ) return c;
+
+	int n = degree(a) + degree(b);
+	c.resize(n+1, 0);
+	for ( int i=0 ; i<=n; ++i ) {
+		for ( int j=0 ; j<=i; ++j ) {
+			if ( j > degree(a) || (i-j) > degree(b) ) continue;
+			c[i] = c[i] + a[j] * b[i-j];
+		}
+	}
+	canonicalize(c);
+	return c;
+}
+
+static umodpoly operator*(const umodpoly& a, const umodpoly& b)
+{
+	umodpoly c;
+	if ( a.empty() || b.empty() ) return c;
+
+	int n = degree(a) + degree(b);
+	c.resize(n+1, a[0].ring()->zero());
+	for ( int i=0 ; i<=n; ++i ) {
+		for ( int j=0 ; j<=i; ++j ) {
+			if ( j > degree(a) || (i-j) > degree(b) ) continue;
+			c[i] = c[i] + a[j] * b[i-j];
+		}
+	}
+	canonicalize(c);
+	return c;
+}
+
+static upoly operator*(const upoly& a, const cl_I& x)
+{
+	if ( zerop(x) ) {
+		upoly r;
+		return r;
+	}
+	upoly r(a.size());
+	for ( size_t i=0; i<a.size(); ++i ) {
+		r[i] = a[i] * x;
+	}
+	return r;
+}
+
+static upoly operator/(const upoly& a, const cl_I& x)
+{
+	if ( zerop(x) ) {
+		upoly r;
+		return r;
+	}
+	upoly r(a.size());
+	for ( size_t i=0; i<a.size(); ++i ) {
+		r[i] = exquo(a[i],x);
+	}
+	return r;
+}
+
+static umodpoly operator*(const umodpoly& a, const cl_MI& x)
+{
+	umodpoly r(a.size());
+	for ( size_t i=0; i<a.size(); ++i ) {
+		r[i] = a[i] * x;
+	}
+	canonicalize(r);
+	return r;
 }
-#endif // def DEBUGFACTOR
 
-static umod umod_from_ex(const ex& e, const ex& x, const cl_univpoly_modint_ring& UPR)
+static void upoly_from_ex(upoly& up, const ex& e, const ex& x)
 {
 	// assert: e is in Z[x]
 	int deg = e.degree(x);
-	umod p = UPR->create(deg);
+	up.resize(deg+1);
 	int ldeg = e.ldegree(x);
 	for ( ; deg>=ldeg; --deg ) {
-		cl_I coeff = the<cl_I>(ex_to<numeric>(e.coeff(x, deg)).to_cl_N());
-		p.set_coeff(deg, UPR->basering()->canonhom(coeff));
+		up[deg] = the<cl_I>(ex_to<numeric>(e.coeff(x, deg)).to_cl_N());
 	}
 	for ( ; deg>=0; --deg ) {
-		p.set_coeff(deg, UPR->basering()->zero());
+		up[deg] = 0;
 	}
-	p.finalize();
-	return p;
+	canonicalize(up);
 }
 
-static umod umod_from_ex(const ex& e, const ex& x, const cl_modint_ring& R)
+static void umodpoly_from_upoly(umodpoly& ump, const upoly& e, const cl_modint_ring& R)
 {
-	return umod_from_ex(e, x, find_univpoly_ring(R));
+	int deg = degree(e);
+	ump.resize(deg+1);
+	for ( ; deg>=0; --deg ) {
+		ump[deg] = R->canonhom(e[deg]);
+	}
+	canonicalize(ump);
 }
 
-static umod umod_from_ex(const ex& e, const ex& x, const cl_I& modulus)
+static void umodpoly_from_ex(umodpoly& ump, const ex& e, const ex& x, const cl_modint_ring& R)
 {
-	return umod_from_ex(e, x, find_modint_ring(modulus));
+	// assert: e is in Z[x]
+	int deg = e.degree(x);
+	ump.resize(deg+1);
+	int ldeg = e.ldegree(x);
+	for ( ; deg>=ldeg; --deg ) {
+		cl_I coeff = the<cl_I>(ex_to<numeric>(e.coeff(x, deg)).to_cl_N());
+		ump[deg] = R->canonhom(coeff);
+	}
+	for ( ; deg>=0; --deg ) {
+		ump[deg] = R->zero();
+	}
+	canonicalize(ump);
 }
 
-static umod umod_from_modvec(const mvec& mv)
+#ifdef DEBUGFACTOR
+static void umodpoly_from_ex(umodpoly& ump, const ex& e, const ex& x, const cl_I& modulus)
 {
-	size_t n = mv.size(); // assert: n>0
-	while ( n && zerop(mv[n-1]) ) --n;
-	cl_univpoly_modint_ring UPR = find_univpoly_ring(mv.front().ring());
-	if ( n == 0 ) {
-		umod p = UPR->create(-1);
-		p.finalize();
-		return p;
-	}
-	umod p = UPR->create(n-1);
-	for ( size_t i=0; i<n; ++i ) {
-		p.set_coeff(i, mv[i]);
-	}
-	p.finalize();
-	return p;
+	umodpoly_from_ex(ump, e, x, find_modint_ring(modulus));
 }
+#endif
 
-static umod divide(const umod& a, const cl_I& x)
+static ex upoly_to_ex(const upoly& a, const ex& x)
 {
-	DCOUT(divide);
-	DCOUTVAR(a);
-	cl_univpoly_modint_ring UPR = a.ring();
-	cl_modint_ring R = UPR->basering();
-	int deg = degree(a);
-	umod newa = UPR->create(deg);
-	for ( int i=0; i<=deg; ++i ) {
-		cl_I c = R->retract(coeff(a, i));
-		newa.set_coeff(i, cl_MI(R, the<cl_I>(c / x)));
+	if ( a.empty() ) return 0;
+	ex e;
+	for ( int i=degree(a); i>=0; --i ) {
+		e += numeric(a[i]) * pow(x, i);
 	}
-	newa.finalize();
-	DCOUT(END divide);
-	return newa;
+	return e;
 }
 
-static ex umod_to_ex(const umod& a, const ex& x)
+static ex umodpoly_to_ex(const umodpoly& a, const ex& x)
 {
-	ex e;
-	cl_modint_ring R = a.ring()->basering();
+	if ( a.empty() ) return 0;
+	cl_modint_ring R = a[0].ring();
 	cl_I mod = R->modulus;
 	cl_I halfmod = (mod-1) >> 1;
+	ex e;
 	for ( int i=degree(a); i>=0; --i ) {
-		cl_I n = R->retract(coeff(a, i));
+		cl_I n = R->retract(a[i]);
 		if ( n > halfmod ) {
 			e += numeric(n-mod) * pow(x, i);
 		} else {
@@ -202,147 +400,219 @@ static ex umod_to_ex(const umod& a, const ex& x)
 	return e;
 }
 
-static void unit_normal(umod& a)
+static upoly umodpoly_to_upoly(const umodpoly& a)
 {
-	int deg = degree(a);
-	if ( deg >= 0 ) {
-		cl_MI lc = coeff(a, deg);
-		cl_MI one = a.ring()->basering()->one();
-		if ( lc != one ) {
-			umod newa = a.ring()->create(deg);
-			newa.set_coeff(deg, one);
-			for ( --deg; deg>=0; --deg ) {
-				cl_MI nc = div(coeff(a, deg), lc);
-				newa.set_coeff(deg, nc);
-			}
-			newa.finalize();
-			a = newa;
+	upoly e(a.size());
+	if ( a.empty() ) return e;
+	cl_modint_ring R = a[0].ring();
+	cl_I mod = R->modulus;
+	cl_I halfmod = (mod-1) >> 1;
+	for ( int i=degree(a); i>=0; --i ) {
+		cl_I n = R->retract(a[i]);
+		if ( n > halfmod ) {
+			e[i] = n-mod;
+		} else {
+			e[i] = n;
 		}
 	}
+	return e;
+}
+
+static umodpoly umodpoly_to_umodpoly(const umodpoly& a, const cl_modint_ring& R, unsigned int m)
+{
+	umodpoly e;
+	if ( a.empty() ) return e;
+	cl_modint_ring oldR = a[0].ring();
+	size_t sa = a.size();
+	e.resize(sa+m, R->zero());
+	for ( size_t i=0; i<sa; ++i ) {
+		e[i+m] = R->canonhom(oldR->retract(a[i]));
+	}
+	canonicalize(e);
+	return e;
+}
+
+/** Divides all coefficients of the polynomial a by the integer x.
+ *  All coefficients are supposed to be divisible by x. If they are not, the
+ *  the<cl_I> cast will raise an exception.
+ *
+ *  @param[in,out] a  polynomial of which the coefficients will be reduced by x
+ *  @param[in]     x  integer that divides the coefficients
+ */
+static void reduce_coeff(umodpoly& a, const cl_I& x)
+{
+	if ( a.empty() ) return;
+
+	cl_modint_ring R = a[0].ring();
+	umodpoly::iterator i = a.begin(), end = a.end();
+	for ( ; i!=end; ++i ) {
+		// cln cannot perform this division in the modular field
+		cl_I c = R->retract(*i);
+		*i = cl_MI(R, the<cl_I>(c / x));
+	}
 }
 
-static umod rem(const umod& a, const umod& b)
+/** Calculates remainder of a/b.
+ *  Assertion: a and b not empty.
+ *
+ *  @param[in]  a  polynomial dividend
+ *  @param[in]  b  polynomial divisor
+ *  @param[out] r  polynomial remainder
+ */
+static void rem(const umodpoly& a, const umodpoly& b, umodpoly& r)
 {
 	int k, n;
 	n = degree(b);
 	k = degree(a) - n;
-	if ( k < 0 ) {
-		umod c = COPY(c, a);
-		return c;
-	}
+	r = a;
+	if ( k < 0 ) return;
 
-	umod c = COPY(c, a);
 	do {
-		cl_MI qk = div(coeff(c, n+k), coeff(b, n));
+		cl_MI qk = div(r[n+k], b[n]);
 		if ( !zerop(qk) ) {
-			unsigned int j;
 			for ( int i=0; i<n; ++i ) {
-				j = n + k - 1 - i;
-				c.set_coeff(j, coeff(c, j) - qk * coeff(b, j-k));
+				unsigned int j = n + k - 1 - i;
+				r[j] = r[j] - qk * b[j-k];
 			}
 		}
 	} while ( k-- );
 
-	cl_MI zero = a.ring()->basering()->zero();
-	for ( int i=degree(a); i>=n; --i ) {
-		c.set_coeff(i, zero);
-	}
-
-	c.finalize();
-	return c;
+	fill(r.begin()+n, r.end(), a[0].ring()->zero());
+	canonicalize(r);
 }
 
-static umod div(const umod& a, const umod& b)
+/** Calculates quotient of a/b.
+ *  Assertion: a and b not empty.
+ *
+ *  @param[in]  a  polynomial dividend
+ *  @param[in]  b  polynomial divisor
+ *  @param[out] q  polynomial quotient
+ */
+static void div(const umodpoly& a, const umodpoly& b, umodpoly& q)
 {
 	int k, n;
 	n = degree(b);
 	k = degree(a) - n;
-	if ( k < 0 ) {
-		umod q = a.ring()->create(-1);
-		q.finalize();
-		return q;
-	}
+	q.clear();
+	if ( k < 0 ) return;
 
-	umod c = COPY(c, a);
-	umod q = a.ring()->create(k);
+	umodpoly r = a;
+	q.resize(k+1, a[0].ring()->zero());
 	do {
-		cl_MI qk = div(coeff(c, n+k), coeff(b, n));
+		cl_MI qk = div(r[n+k], b[n]);
 		if ( !zerop(qk) ) {
-			q.set_coeff(k, qk);
-			unsigned int j;
+			q[k] = qk;
 			for ( int i=0; i<n; ++i ) {
-				j = n + k - 1 - i;
-				c.set_coeff(j, coeff(c, j) - qk * coeff(b, j-k));
+				unsigned int j = n + k - 1 - i;
+				r[j] = r[j] - qk * b[j-k];
 			}
 		}
 	} while ( k-- );
 
-	q.finalize();
-	return q;
+	canonicalize(q);
 }
 
-static umod remdiv(const umod& a, const umod& b, umod& q)
+/** Calculates quotient and remainder of a/b.
+ *  Assertion: a and b not empty.
+ *
+ *  @param[in]  a  polynomial dividend
+ *  @param[in]  b  polynomial divisor
+ *  @param[out] r  polynomial remainder
+ *  @param[out] q  polynomial quotient
+ */
+static void remdiv(const umodpoly& a, const umodpoly& b, umodpoly& r, umodpoly& q)
 {
 	int k, n;
 	n = degree(b);
 	k = degree(a) - n;
-	if ( k < 0 ) {
-		q = a.ring()->create(-1);
-		q.finalize();
-		umod c = COPY(c, a);
-		return c;
-	}
+	q.clear();
+	r = a;
+	if ( k < 0 ) return;
 
-	umod c = COPY(c, a);
-	q = a.ring()->create(k);
+	q.resize(k+1, a[0].ring()->zero());
 	do {
-		cl_MI qk = div(coeff(c, n+k), coeff(b, n));
+		cl_MI qk = div(r[n+k], b[n]);
 		if ( !zerop(qk) ) {
-			q.set_coeff(k, qk);
-			unsigned int j;
+			q[k] = qk;
 			for ( int i=0; i<n; ++i ) {
-				j = n + k - 1 - i;
-				c.set_coeff(j, coeff(c, j) - qk * coeff(b, j-k));
+				unsigned int j = n + k - 1 - i;
+				r[j] = r[j] - qk * b[j-k];
 			}
 		}
 	} while ( k-- );
 
-	cl_MI zero = a.ring()->basering()->zero();
-	for ( int i=degree(a); i>=n; --i ) {
-		c.set_coeff(i, zero);
-	}
+	fill(r.begin()+n, r.end(), a[0].ring()->zero());
+	canonicalize(r);
+	canonicalize(q);
+}
 
-	q.finalize();
-	c.finalize();
-	return c;
+/** Calculates the GCD of polynomial a and b.
+ *
+ *  @param[in]  a  polynomial
+ *  @param[in]  b  polynomial
+ *  @param[out] c  GCD
+ */
+static void gcd(const umodpoly& a, const umodpoly& b, umodpoly& c)
+{
+	if ( degree(a) < degree(b) ) return gcd(b, a, c);
+
+	c = a;
+	normalize_in_field(c);
+	umodpoly d = b;
+	normalize_in_field(d);
+	umodpoly r;
+	while ( !d.empty() ) {
+		rem(c, d, r);
+		c = d;
+		d = r;
+	}
+	normalize_in_field(c);
 }
 
-static umod gcd(const umod& a, const umod& b)
+/** Calculates the derivative of the polynomial a.
+ *  
+ *  @param[in]  a  polynomial of which to take the derivative
+ *  @param[out] d  result/derivative
+ */
+static void deriv(const umodpoly& a, umodpoly& d)
 {
-	if ( degree(a) < degree(b) ) return gcd(b, a);
+	d.clear();
+	if ( a.size() <= 1 ) return;
 
-	umod c = COPY(c, a);
-	unit_normal(c);
-	umod d = COPY(d, b);
-	unit_normal(d);
-	while ( !zerop(d) ) {
-		umod r = rem(c, d);
-		c = COPY(c, d);
-		d = COPY(d, r);
+	d.insert(d.begin(), a.begin()+1, a.end());
+	int max = d.size();
+	for ( int i=1; i<max; ++i ) {
+		d[i] = d[i] * (i+1);
 	}
-	unit_normal(c);
-	return c;
+	canonicalize(d);
+}
+
+static bool unequal_one(const umodpoly& a)
+{
+	if ( a.empty() ) return true;
+	return ( a.size() != 1 || a[0] != a[0].ring()->one() );
 }
 
-static bool squarefree(const umod& a)
+static bool equal_one(const umodpoly& a)
+{
+	return ( a.size() == 1 && a[0] == a[0].ring()->one() );
+}
+
+/** Returns true if polynomial a is square free.
+ *
+ *  @param[in] a  polynomial to check
+ *  @return       true if polynomial is square free, false otherwise
+ */
+static bool squarefree(const umodpoly& a)
 {
-	umod b = deriv(a);
-	if ( zerop(b) ) {
+	umodpoly b;
+	deriv(a, b);
+	if ( b.empty() ) {
 		return false;
 	}
-	umod one = a.ring()->one();
-	umod c = gcd(a, b);
-	return c == one;
+	umodpoly c;
+	gcd(a, b, c);
+	return equal_one(c);
 }
 
 // END modular univariate polynomial code
@@ -480,15 +750,19 @@ modular_matrix operator*(const modular_matrix& m1, const modular_matrix& m2)
 
 ostream& operator<<(ostream& o, const modular_matrix& m)
 {
-	vector<cl_MI>::const_iterator i = m.m.begin(), end = m.m.end();
-	size_t wrap = 1;
-	for ( ; i != end; ++i ) {
-		o << *i << " ";
-		if ( !(wrap++ % m.c) ) {
-			o << endl;
+	cl_modint_ring R = m(0,0).ring();
+	o << "{";
+	for ( size_t i=0; i<m.rowsize(); ++i ) {
+		o << "{";
+		for ( size_t j=0; j<m.colsize()-1; ++j ) {
+			o << R->retract(m(i,j)) << ",";
+		}
+		o << R->retract(m(i,m.colsize()-1)) << "}";
+		if ( i != m.rowsize()-1 ) {
+			o << ",";
 		}
 	}
-	o << endl;
+	o << "}";
 	return o;
 }
 #endif // def DEBUGFACTOR
@@ -496,37 +770,26 @@ ostream& operator<<(ostream& o, const modular_matrix& m)
 // END modular matrix
 ////////////////////////////////////////////////////////////////////////////////
 
-static void q_matrix(const umod& a, modular_matrix& Q)
+static void q_matrix(const umodpoly& a_, modular_matrix& Q)
 {
+	umodpoly a = a_;
+	normalize_in_field(a);
+
 	int n = degree(a);
-	unsigned int q = cl_I_to_uint(a.ring()->basering()->modulus);
-// fast and buggy
-//	vector<cl_MI> r(n, a.R->zero());
-//	r[0] = a.R->one();
-//	Q.set_row(0, r);
-//	unsigned int max = (n-1) * q;
-//	for ( size_t m=1; m<=max; ++m ) {
-//		cl_MI rn_1 = r.back();
-//		for ( size_t i=n-1; i>0; --i ) {
-//			r[i] = r[i-1] - rn_1 * a[i];
-//		}
-//		r[0] = -rn_1 * a[0];
-//		if ( (m % q) == 0 ) {
-//			Q.set_row(m/q, r);
-//		}
-//	}
-// slow and (hopefully) correct
-	cl_MI one = a.ring()->basering()->one();
-	for ( int i=0; i<n; ++i ) {
-		umod qk = a.ring()->create(i*q);
-		qk.set_coeff(i*q, one);
-		qk.finalize();
-		umod r = rem(qk, a);
-		mvec rvec;
-		for ( int j=0; j<n; ++j ) {
-			rvec.push_back(coeff(r, j));
-		}
-		Q.set_row(i, rvec);
+	unsigned int q = cl_I_to_uint(a[0].ring()->modulus);
+	umodpoly r(n, a[0].ring()->zero());
+	r[0] = a[0].ring()->one();
+	Q.set_row(0, r);
+	unsigned int max = (n-1) * q;
+	for ( size_t m=1; m<=max; ++m ) {
+		cl_MI rn_1 = r.back();
+		for ( size_t i=n-1; i>0; --i ) {
+			r[i] = r[i-1] - (rn_1 * a[i]);
+		}
+		r[0] = -rn_1 * a[0];
+		if ( (m % q) == 0 ) {
+			Q.set_row(m/q, r);
+		}
 	}
 }
 
@@ -574,52 +837,54 @@ static void nullspace(modular_matrix& M, vector<mvec>& basis)
 	}
 }
 
-static void berlekamp(const umod& a, umodvec& upv)
+static void berlekamp(const umodpoly& a, upvec& upv)
 {
-	cl_modint_ring R = a.ring()->basering();
-	const umod one = a.ring()->one();
+	cl_modint_ring R = a[0].ring();
+	umodpoly one(1, R->one());
 
 	modular_matrix Q(degree(a), degree(a), R->zero());
 	q_matrix(a, Q);
 	vector<mvec> nu;
 	nullspace(Q, nu);
+
 	const unsigned int k = nu.size();
 	if ( k == 1 ) {
 		return;
 	}
 
-	list<umod> factors;
+	list<umodpoly> factors;
 	factors.push_back(a);
 	unsigned int size = 1;
 	unsigned int r = 1;
 	unsigned int q = cl_I_to_uint(R->modulus);
 
-	list<umod>::iterator u = factors.begin();
+	list<umodpoly>::iterator u = factors.begin();
 
 	while ( true ) {
 		for ( unsigned int s=0; s<q; ++s ) {
-			umod nur = umod_from_modvec(nu[r]);
-			cl_MI buf = coeff(nur, 0) - cl_MI(R, s);
-			nur.set_coeff(0, buf);
-			nur.finalize();
-			umod g = gcd(nur, *u);
-			if ( g != one && g != *u ) {
-				umod uo = div(*u, g);
-				if ( uo == one ) {
+			umodpoly nur = nu[r];
+			nur[0] = nur[0] - cl_MI(R, s);
+			canonicalize(nur);
+			umodpoly g;
+			gcd(nur, *u, g);
+			if ( unequal_one(g) && g != *u ) {
+				umodpoly uo;
+				div(*u, g, uo);
+				if ( equal_one(uo) ) {
 					throw logic_error("berlekamp: unexpected divisor.");
 				}
 				else {
-					*u = COPY((*u), uo);
+					*u = uo;
 				}
 				factors.push_back(g);
 				size = 0;
-				list<umod>::const_iterator i = factors.begin(), end = factors.end();
+				list<umodpoly>::const_iterator i = factors.begin(), end = factors.end();
 				while ( i != end ) {
 					if ( degree(*i) ) ++size; 
 					++i;
 				}
 				if ( size == k ) {
-					list<umod>::const_iterator i = factors.begin(), end = factors.end();
+					list<umodpoly>::const_iterator i = factors.begin(), end = factors.end();
 					while ( i != end ) {
 						upv.push_back(*i++);
 					}
@@ -634,128 +899,290 @@ static void berlekamp(const umod& a, umodvec& upv)
 	}
 }
 
-static void factor_modular(const umod& p, umodvec& upv)
+static void expt_1_over_p(const umodpoly& a, unsigned int prime, umodpoly& ap)
 {
+	size_t newdeg = degree(a)/prime;
+	ap.resize(newdeg+1);
+	ap[0] = a[0];
+	for ( size_t i=1; i<=newdeg; ++i ) {
+		ap[i] = a[i*prime];
+	}
+}
+
+static void modsqrfree(const umodpoly& a, upvec& factors, vector<int>& mult)
+{
+	const unsigned int prime = cl_I_to_uint(a[0].ring()->modulus);
+	int i = 1;
+	umodpoly b;
+	deriv(a, b);
+	if ( b.size() ) {
+		umodpoly c;
+		gcd(a, b, c);
+		umodpoly w;
+		div(a, c, w);
+		while ( unequal_one(w) ) {
+			umodpoly y;
+			gcd(w, c, y);
+			umodpoly z;
+			div(w, y, z);
+			factors.push_back(z);
+			mult.push_back(i);
+			++i;
+			w = y;
+			umodpoly buf;
+			div(c, y, buf);
+			c = buf;
+		}
+		if ( unequal_one(c) ) {
+			umodpoly cp;
+			expt_1_over_p(c, prime, cp);
+			size_t previ = mult.size();
+			modsqrfree(cp, factors, mult);
+			for ( size_t i=previ; i<mult.size(); ++i ) {
+				mult[i] *= prime;
+			}
+		}
+	}
+	else {
+		umodpoly ap;
+		expt_1_over_p(a, prime, ap);
+		size_t previ = mult.size();
+		modsqrfree(ap, factors, mult);
+		for ( size_t i=previ; i<mult.size(); ++i ) {
+			mult[i] *= prime;
+		}
+	}
+}
+
+static void distinct_degree_factor(const umodpoly& a_, vector<int>& degrees, upvec& ddfactors)
+{
+	umodpoly a = a_;
+
+	cl_modint_ring R = a[0].ring();
+	int q = cl_I_to_int(R->modulus);
+	int nhalf = degree(a)/2;
+
+	int i = 1;
+	umodpoly w(2);
+	w[0] = R->zero();
+	w[1] = R->one();
+	umodpoly x = w;
+
+	while ( i <= nhalf ) {
+		expt_pos(w, q);
+		umodpoly buf;
+		rem(w, a, buf);
+		w = buf;
+		umodpoly wx = w - x;
+		gcd(a, wx, buf);
+		if ( unequal_one(buf) ) {
+			degrees.push_back(i);
+			ddfactors.push_back(buf);
+		}
+		if ( unequal_one(buf) ) {
+			umodpoly buf2;
+			div(a, buf, buf2);
+			a = buf2;
+			nhalf = degree(a)/2;
+			rem(w, a, buf);
+			w = buf;
+		}
+		++i;
+	}
+	if ( unequal_one(a) ) {
+		degrees.push_back(degree(a));
+		ddfactors.push_back(a);
+	}
+}
+
+static void same_degree_factor(const umodpoly& a, upvec& upv)
+{
+	cl_modint_ring R = a[0].ring();
+
+	vector<int> degrees;
+	upvec ddfactors;
+	distinct_degree_factor(a, degrees, ddfactors);
+
+	for ( size_t i=0; i<degrees.size(); ++i ) {
+		if ( degrees[i] == degree(ddfactors[i]) ) {
+			upv.push_back(ddfactors[i]);
+		}
+		else {
+			berlekamp(ddfactors[i], upv);
+		}
+	}
+}
+
+#define USE_SAME_DEGREE_FACTOR
+
+static void factor_modular(const umodpoly& p, upvec& upv)
+{
+#ifdef USE_SAME_DEGREE_FACTOR
+	same_degree_factor(p, upv);
+#else
 	berlekamp(p, upv);
-	return;
+#endif
 }
 
-static void exteuclid(const umod& a, const umod& b, umod& g, umod& s, umod& t)
+/** Calculates polynomials s and t such that a*s+b*t==1.
+ *  Assertion: a and b are relatively prime and not zero.
+ *
+ *  @param[in]  a  polynomial
+ *  @param[in]  b  polynomial
+ *  @param[out] s  polynomial
+ *  @param[out] t  polynomial
+ */
+static void exteuclid(const umodpoly& a, const umodpoly& b, umodpoly& s, umodpoly& t)
 {
 	if ( degree(a) < degree(b) ) {
-		exteuclid(b, a, g, t, s);
+		exteuclid(b, a, t, s);
 		return;
 	}
-	umod c = COPY(c, a); unit_normal(c);
-	umod d = COPY(d, b); unit_normal(d);
-	umod c1 = a.ring()->one();
-	umod c2 = a.ring()->create(-1);
-	umod d1 = a.ring()->create(-1);
-	umod d2 = a.ring()->one();
-	while ( !zerop(d) ) {
-		umod q = div(c, d);
-		umod r = c - q * d;
-		umod r1 = c1 - q * d1;
-		umod r2 = c2 - q * d2;
-		c = COPY(c, d);
-		c1 = COPY(c1, d1);
-		c2 = COPY(c2, d2);
-		d = COPY(d, r);
-		d1 = COPY(d1, r1);
-		d2 = COPY(d2, r2);
-	}
-	g = COPY(g, c); unit_normal(g);
-	s = COPY(s, c1);
-	for ( int i=0; i<=degree(s); ++i ) {
-		s.set_coeff(i, coeff(s, i) * recip(coeff(a, degree(a)) * coeff(c, degree(c))));
-	}
-	s.finalize();
-	t = COPY(t, c2);
-	for ( int i=0; i<=degree(t); ++i ) {
-		t.set_coeff(i, coeff(t, i) * recip(coeff(b, degree(b)) * coeff(c, degree(c))));
-	}
-	t.finalize();
-}
-
-static ex replace_lc(const ex& poly, const ex& x, const ex& lc)
-{
-	ex r = expand(poly + (lc - poly.lcoeff(x)) * pow(x, poly.degree(x)));
-	return r;
+
+	umodpoly one(1, a[0].ring()->one());
+	umodpoly c = a; normalize_in_field(c);
+	umodpoly d = b; normalize_in_field(d);
+	s = one;
+	t.clear();
+	umodpoly d1;
+	umodpoly d2 = one;
+	umodpoly q;
+	while ( true ) {
+		div(c, d, q);
+		umodpoly r = c - q * d;
+		umodpoly r1 = s - q * d1;
+		umodpoly r2 = t - q * d2;
+		c = d;
+		s = d1;
+		t = d2;
+		if ( r.empty() ) break;
+		d = r;
+		d1 = r1;
+		d2 = r2;
+	}
+	cl_MI fac = recip(lcoeff(a) * lcoeff(c));
+	umodpoly::iterator i = s.begin(), end = s.end();
+	for ( ; i!=end; ++i ) {
+		*i = *i * fac;
+	}
+	canonicalize(s);
+	fac = recip(lcoeff(b) * lcoeff(c));
+	i = t.begin(), end = t.end();
+	for ( ; i!=end; ++i ) {
+		*i = *i * fac;
+	}
+	canonicalize(t);
 }
 
-static ex hensel_univar(const ex& a_, const ex& x, unsigned int p, const umod& u1_, const umod& w1_, const ex& gamma_ = 0)
+static upoly replace_lc(const upoly& poly, const cl_I& lc)
 {
-	ex a = a_;
-	const cl_univpoly_modint_ring& UPR = u1_.ring();
-	const cl_modint_ring& R = UPR->basering();
+	if ( poly.empty() ) return poly;
+	upoly r = poly;
+	r.back() = lc;
+	return r;
+}
 
-	// calc bound B
-	ex maxcoeff;
+static inline cl_I calc_bound(const ex& a, const ex& x, int maxdeg)
+{
+	cl_I maxcoeff = 0;
+	cl_R coeff = 0;
 	for ( int i=a.degree(x); i>=a.ldegree(x); --i ) {
-		maxcoeff += pow(abs(a.coeff(x, i)),2);
+		cl_I aa = abs(the<cl_I>(ex_to<numeric>(a.coeff(x, i)).to_cl_N()));
+		if ( aa > maxcoeff ) maxcoeff = aa;
+		coeff = coeff + square(aa);
 	}
-	cl_I normmc = ceiling1(the<cl_R>(cln::sqrt(ex_to<numeric>(maxcoeff).to_cl_N())));
-	cl_I maxdegree = (degree(u1_) > degree(w1_)) ? degree(u1_) : degree(w1_);
-	cl_I B = normmc * expt_pos(cl_I(2), maxdegree);
+	cl_I coeffnorm = ceiling1(the<cl_R>(cln::sqrt(coeff)));
+	cl_I B = coeffnorm * expt_pos(cl_I(2), cl_I(maxdeg));
+	return ( B > maxcoeff ) ? B : maxcoeff;
+}
+
+static inline cl_I calc_bound(const upoly& a, int maxdeg)
+{
+	cl_I maxcoeff = 0;
+	cl_R coeff = 0;
+	for ( int i=degree(a); i>=0; --i ) {
+		cl_I aa = abs(a[i]);
+		if ( aa > maxcoeff ) maxcoeff = aa;
+		coeff = coeff + square(aa);
+	}
+	cl_I coeffnorm = ceiling1(the<cl_R>(cln::sqrt(coeff)));
+	cl_I B = coeffnorm * expt_pos(cl_I(2), cl_I(maxdeg));
+	return ( B > maxcoeff ) ? B : maxcoeff;
+}
+
+static void hensel_univar(const upoly& a_, unsigned int p, const umodpoly& u1_, const umodpoly& w1_, upoly& u, upoly& w)
+{
+	upoly a = a_;
+	const cl_modint_ring& R = u1_[0].ring();
+
+	// calc bound B
+	int maxdeg = (degree(u1_) > degree(w1_)) ? degree(u1_) : degree(w1_);
+	cl_I maxmodulus = 2*calc_bound(a, maxdeg);
 
 	// step 1
-	ex alpha = a.lcoeff(x);
-	ex gamma = gamma_;
-	if ( gamma == 0 ) {
-		gamma = alpha;
-	}
-	numeric gamma_ui = ex_to<numeric>(abs(gamma));
-	a = a * gamma;
-	umod nu1 = COPY(nu1, u1_);
-	unit_normal(nu1);
-	umod nw1 = COPY(nw1, w1_);
-	unit_normal(nw1);
-	ex phi;
-	phi = gamma * umod_to_ex(nu1, x);
-	umod u1 = umod_from_ex(phi, x, R);
-	phi = alpha * umod_to_ex(nw1, x);
-	umod w1 = umod_from_ex(phi, x, R);
+	cl_I alpha = lcoeff(a);
+	a = a * alpha;
+	umodpoly nu1 = u1_;
+	normalize_in_field(nu1);
+	umodpoly nw1 = w1_;
+	normalize_in_field(nw1);
+	upoly phi;
+	phi = umodpoly_to_upoly(nu1) * alpha;
+	umodpoly u1;
+	umodpoly_from_upoly(u1, phi, R);
+	phi = umodpoly_to_upoly(nw1) * alpha;
+	umodpoly w1;
+	umodpoly_from_upoly(w1, phi, R);
 
 	// step 2
-	umod g = UPR->create(-1);
-	umod s = UPR->create(-1);
-	umod t = UPR->create(-1);
-	exteuclid(u1, w1, g, s, t);
+	umodpoly s;
+	umodpoly t;
+	exteuclid(u1, w1, s, t);
 
 	// step 3
-	ex u = replace_lc(umod_to_ex(u1, x), x, gamma);
-	ex w = replace_lc(umod_to_ex(w1, x), x, alpha);
-	ex e = expand(a - u * w);
-	numeric modulus = p;
-	const numeric maxmodulus = 2*numeric(B)*gamma_ui;
+	u = replace_lc(umodpoly_to_upoly(u1), alpha);
+	w = replace_lc(umodpoly_to_upoly(w1), alpha);
+	upoly e = a - u * w;
+	cl_I modulus = p;
 
 	// step 4
-	while ( !e.is_zero() && modulus < maxmodulus ) {
-		ex c = e / modulus;
-		phi = expand(umod_to_ex(s, x) * c);
-		umod sigmatilde = umod_from_ex(phi, x, R);
-		phi = expand(umod_to_ex(t, x) * c);
-		umod tautilde = umod_from_ex(phi, x, R);
-		umod q = div(sigmatilde, w1);
-		umod r = rem(sigmatilde, w1);
-		umod sigma = COPY(sigma, r);
-		phi = expand(umod_to_ex(tautilde, x) + umod_to_ex(q, x) * umod_to_ex(u1, x));
-		umod tau = umod_from_ex(phi, x, R);
-		u = expand(u + umod_to_ex(tau, x) * modulus);
-		w = expand(w + umod_to_ex(sigma, x) * modulus);
-		e = expand(a - u * w);
+	while ( !e.empty() && modulus < maxmodulus ) {
+		upoly c = e / modulus;
+		phi = umodpoly_to_upoly(s) * c;
+		umodpoly sigmatilde;
+		umodpoly_from_upoly(sigmatilde, phi, R);
+		phi = umodpoly_to_upoly(t) * c;
+		umodpoly tautilde;
+		umodpoly_from_upoly(tautilde, phi, R);
+		umodpoly r, q;
+		remdiv(sigmatilde, w1, r, q);
+		umodpoly sigma = r;
+		phi = umodpoly_to_upoly(tautilde) + umodpoly_to_upoly(q) * umodpoly_to_upoly(u1);
+		umodpoly tau;
+		umodpoly_from_upoly(tau, phi, R);
+		u = u + umodpoly_to_upoly(tau) * modulus;
+		w = w + umodpoly_to_upoly(sigma) * modulus;
+		e = a - u * w;
 		modulus = modulus * p;
 	}
 
 	// step 5
-	if ( e.is_zero() ) {
-		ex delta = u.content(x);
-		u = u / delta;
-		w = w / gamma * delta;
-		return lst(u, w);
+	if ( e.empty() ) {
+		cl_I g = u[0];
+		for ( size_t i=1; i<u.size(); ++i ) {
+			g = gcd(g, u[i]);
+			if ( g == 1 ) break;
+		}
+		if ( g != 1 ) {
+			u = u / g;
+			w = w * g;
+		}
+		if ( alpha != 1 ) {
+			w = w / alpha;
+		}
 	}
 	else {
-		return lst();
+		u.clear();
 	}
 }
 
@@ -789,91 +1216,157 @@ static unsigned int next_prime(unsigned int p)
 	throw logic_error("next_prime: should not reach this point!");
 }
 
-class Partition
+class factor_partition
 {
 public:
-	Partition(size_t n_) : n(n_)
+	factor_partition(const upvec& factors_) : factors(factors_)
 	{
-		k.resize(n, 1);
-		k[0] = 0;
-		sum = n-1;
+		n = factors.size();
+		k.resize(n, 0);
+		k[0] = 1;
+		cache.resize(n-1);
+		one.resize(1, factors.front()[0].ring()->one());
+		len = 1;
+		last = 0;
+		split();
 	}
 	int operator[](size_t i) const { return k[i]; }
 	size_t size() const { return n; }
-	size_t size_first() const { return n-sum; }
-	size_t size_second() const { return sum; }
+	size_t size_left() const { return n-len; }
+	size_t size_right() const { return len; }
 #ifdef DEBUGFACTOR
-	void get() const
-	{
-		for ( size_t i=0; i<k.size(); ++i ) {
-			cout << k[i] << " ";
-		}
-		cout << endl;
-	}
+	void get() const { DCOUTVAR(k); }
 #endif
 	bool next()
 	{
-		for ( size_t i=n-1; i>=1; --i ) {
-			if ( k[i] ) {
-				--k[i];
-				--sum;
-				return sum > 0;
+		if ( last == n-1 ) {
+			int rem = len - 1;
+			int p = last - 1;
+			while ( rem ) {
+				if ( k[p] ) {
+					--rem;
+					--p;
+					continue;
+				}
+				last = p - 1;
+				while ( k[last] == 0 ) { --last; }
+				if ( last == 0 && n == 2*len ) return false;
+				k[last++] = 0;
+				for ( size_t i=0; i<=len-rem; ++i ) {
+					k[last] = 1;
+					++last;
+				}
+				fill(k.begin()+last, k.end(), 0);
+				--last;
+				split();
+				return true;
 			}
-			++k[i];
-			++sum;
+			last = len;
+			++len;
+			if ( len > n/2 ) return false;
+			fill(k.begin(), k.begin()+len, 1);
+			fill(k.begin()+len+1, k.end(), 0);
 		}
-		return false;
+		else {
+			k[last++] = 0;
+			k[last] = 1;
+		}
+		split();
+		return true;
 	}
+	umodpoly& left() { return lr[0]; }
+	umodpoly& right() { return lr[1]; }
 private:
-	size_t n, sum;
-	vector<int> k;
-};
-
-static void split(const umodvec& factors, const Partition& part, umod& a, umod& b)
-{
-	a = factors.front().ring()->one();
-	b = factors.front().ring()->one();
-	for ( size_t i=0; i<part.size(); ++i ) {
-		if ( part[i] ) {
-			b = b * factors[i];
+	void split_cached()
+	{
+		size_t i = 0;
+		do {
+			size_t pos = i;
+			int group = k[i++];
+			size_t d = 0;
+			while ( i < n && k[i] == group ) { ++d; ++i; }
+			if ( d ) {
+				if ( cache[pos].size() >= d ) {
+					lr[group] = lr[group] * cache[pos][d-1];
+				}
+				else {
+					if ( cache[pos].size() == 0 ) {
+						cache[pos].push_back(factors[pos] * factors[pos+1]);
+					}
+					size_t j = pos + cache[pos].size() + 1;
+					d -= cache[pos].size();
+					while ( d ) {
+						umodpoly buf = cache[pos].back() * factors[j];
+						cache[pos].push_back(buf);
+						--d;
+						++j;
+					}
+					lr[group] = lr[group] * cache[pos].back();
+				}
+			}
+			else {
+				lr[group] = lr[group] * factors[pos];
+			}
+		} while ( i < n );
+	}
+	void split()
+	{
+		lr[0] = one;
+		lr[1] = one;
+		if ( n > 6 ) {
+			split_cached();
 		}
 		else {
-			a = a * factors[i];
+			for ( size_t i=0; i<n; ++i ) {
+				lr[k[i]] = lr[k[i]] * factors[i];
+			}
 		}
 	}
-}
+private:
+	umodpoly lr[2];
+	vector< vector<umodpoly> > cache;
+	upvec factors;
+	umodpoly one;
+	size_t n;
+	size_t len;
+	size_t last;
+	vector<int> k;
+};
 
 struct ModFactors
 {
-	ex poly;
-	umodvec factors;
+	upoly poly;
+	upvec factors;
 };
 
-static ex factor_univariate(const ex& poly, const ex& x)
+static ex factor_univariate(const ex& poly, const ex& x, unsigned int& prime)
 {
-	ex unit, cont, prim;
-	poly.unitcontprim(x, unit, cont, prim);
+	ex unit, cont, prim_ex;
+	poly.unitcontprim(x, unit, cont, prim_ex);
+	upoly prim;
+	upoly_from_ex(prim, prim_ex, x);
 
 	// determine proper prime and minimize number of modular factors
-	unsigned int p = 3, lastp;
+	prime = 3;
+	unsigned int lastp = prime;
 	cl_modint_ring R;
 	unsigned int trials = 0;
 	unsigned int minfactors = 0;
-	numeric lcoeff = ex_to<numeric>(prim.lcoeff(x));
-	umodvec factors;
+	cl_I lc = lcoeff(prim) * the<cl_I>(ex_to<numeric>(cont).to_cl_N());
+	upvec factors;
 	while ( trials < 2 ) {
+		umodpoly modpoly;
 		while ( true ) {
-			p = next_prime(p);
-			if ( irem(lcoeff, p) != 0 ) {
-				R = find_modint_ring(p);
-				umod modpoly = umod_from_ex(prim, x, R);
+			prime = next_prime(prime);
+			if ( !zerop(rem(lc, prime)) ) {
+				R = find_modint_ring(prime);
+				umodpoly_from_upoly(modpoly, prim, R);
 				if ( squarefree(modpoly) ) break;
 			}
 		}
 
 		// do modular factorization
-		umod modpoly = umod_from_ex(prim, x, R);
-		umodvec trialfactors;
+		upvec trialfactors;
 		factor_modular(modpoly, trialfactors);
 		if ( trialfactors.size() <= 1 ) {
 			// irreducible for sure
@@ -882,17 +1375,16 @@ static ex factor_univariate(const ex& poly, const ex& x)
 
 		if ( minfactors == 0 || trialfactors.size() < minfactors ) {
 			factors = trialfactors;
-			minfactors = factors.size();
-			lastp = p;
+			minfactors = trialfactors.size();
+			lastp = prime;
 			trials = 1;
 		}
 		else {
 			++trials;
 		}
 	}
-	p = lastp;
-	R = find_modint_ring(p);
-	cl_univpoly_modint_ring UPR = find_univpoly_ring(R);
+	prime = lastp;
+	R = find_modint_ring(prime);
 
 	// lift all factor combinations
 	stack<ModFactors> tocheck;
@@ -900,25 +1392,22 @@ static ex factor_univariate(const ex& poly, const ex& x)
 	mf.poly = prim;
 	mf.factors = factors;
 	tocheck.push(mf);
+	upoly f1, f2;
 	ex result = 1;
 	while ( tocheck.size() ) {
 		const size_t n = tocheck.top().factors.size();
-		Partition part(n);
+		factor_partition part(tocheck.top().factors);
 		while ( true ) {
-			umod a = UPR->create(-1);
-			umod b = UPR->create(-1);
-			split(tocheck.top().factors, part, a, b);
-
-			ex answer = hensel_univar(tocheck.top().poly, x, p, a, b);
-			if ( answer != lst() ) {
-				if ( part.size_first() == 1 ) {
-					if ( part.size_second() == 1 ) {
-						result *= answer.op(0) * answer.op(1);
+			hensel_univar(tocheck.top().poly, prime, part.left(), part.right(), f1, f2);
+			if ( !f1.empty() ) {
+				if ( part.size_left() == 1 ) {
+					if ( part.size_right() == 1 ) {
+						result *= upoly_to_ex(f1, x) * upoly_to_ex(f2, x);
 						tocheck.pop();
 						break;
 					}
-					result *= answer.op(0);
-					tocheck.top().poly = answer.op(1);
+					result *= upoly_to_ex(f1, x);
+					tocheck.top().poly = f2;
 					for ( size_t i=0; i<n; ++i ) {
 						if ( part[i] == 0 ) {
 							tocheck.top().factors.erase(tocheck.top().factors.begin()+i);
@@ -927,14 +1416,14 @@ static ex factor_univariate(const ex& poly, const ex& x)
 					}
 					break;
 				}
-				else if ( part.size_second() == 1 ) {
-					if ( part.size_first() == 1 ) {
-						result *= answer.op(0) * answer.op(1);
+				else if ( part.size_right() == 1 ) {
+					if ( part.size_left() == 1 ) {
+						result *= upoly_to_ex(f1, x) * upoly_to_ex(f2, x);
 						tocheck.pop();
 						break;
 					}
-					result *= answer.op(1);
-					tocheck.top().poly = answer.op(0);
+					result *= upoly_to_ex(f2, x);
+					tocheck.top().poly = f1;
 					for ( size_t i=0; i<n; ++i ) {
 						if ( part[i] == 1 ) {
 							tocheck.top().factors.erase(tocheck.top().factors.begin()+i);
@@ -944,8 +1433,8 @@ static ex factor_univariate(const ex& poly, const ex& x)
 					break;
 				}
 				else {
-					umodvec newfactors1(part.size_first(), UPR->create(-1)), newfactors2(part.size_second(), UPR->create(-1));
-					umodvec::iterator i1 = newfactors1.begin(), i2 = newfactors2.begin();
+					upvec newfactors1(part.size_left()), newfactors2(part.size_right());
+					upvec::iterator i1 = newfactors1.begin(), i2 = newfactors2.begin();
 					for ( size_t i=0; i<n; ++i ) {
 						if ( part[i] ) {
 							*i2++ = tocheck.top().factors[i];
@@ -955,17 +1444,17 @@ static ex factor_univariate(const ex& poly, const ex& x)
 						}
 					}
 					tocheck.top().factors = newfactors1;
-					tocheck.top().poly = answer.op(0);
+					tocheck.top().poly = f1;
 					ModFactors mf;
 					mf.factors = newfactors2;
-					mf.poly = answer.op(1);
+					mf.poly = f2;
 					tocheck.push(mf);
 					break;
 				}
 			}
 			else {
 				if ( !part.next() ) {
-					result *= tocheck.top().poly;
+					result *= upoly_to_ex(tocheck.top().poly, x);
 					tocheck.pop();
 					break;
 				}
@@ -976,185 +1465,133 @@ static ex factor_univariate(const ex& poly, const ex& x)
 	return unit * cont * result;
 }
 
+static inline ex factor_univariate(const ex& poly, const ex& x)
+{
+	unsigned int prime;
+	return factor_univariate(poly, x, prime);
+}
+
 struct EvalPoint
 {
 	ex x;
 	int evalpoint;
 };
 
-// MARK
-
 // forward declaration
-vector<ex> multivar_diophant(const vector<ex>& a_, const ex& x, const ex& c, const vector<EvalPoint>& I, unsigned int d, unsigned int p, unsigned int k);
+static vector<ex> multivar_diophant(const vector<ex>& a_, const ex& x, const ex& c, const vector<EvalPoint>& I, unsigned int d, unsigned int p, unsigned int k);
 
-umodvec multiterm_eea_lift(const umodvec& a, const ex& x, unsigned int p, unsigned int k)
+static upvec multiterm_eea_lift(const upvec& a, const ex& x, unsigned int p, unsigned int k)
 {
-	DCOUT(multiterm_eea_lift);
-	DCOUTVAR(a);
-	DCOUTVAR(p);
-	DCOUTVAR(k);
-
 	const size_t r = a.size();
-	DCOUTVAR(r);
 	cl_modint_ring R = find_modint_ring(expt_pos(cl_I(p),k));
-	cl_univpoly_modint_ring UPR = find_univpoly_ring(R);
-	umodvec q(r-1, UPR->create(-1));
+	upvec q(r-1);
 	q[r-2] = a[r-1];
 	for ( size_t j=r-2; j>=1; --j ) {
 		q[j-1] = a[j] * q[j];
 	}
-	DCOUTVAR(q);
-	umod beta = UPR->one();
-	umodvec s;
+	umodpoly beta(1, R->one());
+	upvec s;
 	for ( size_t j=1; j<r; ++j ) {
-		DCOUTVAR(j);
-		DCOUTVAR(beta);
 		vector<ex> mdarg(2);
-		mdarg[0] = umod_to_ex(q[j-1], x);
-		mdarg[1] = umod_to_ex(a[j-1], x);
+		mdarg[0] = umodpoly_to_ex(q[j-1], x);
+		mdarg[1] = umodpoly_to_ex(a[j-1], x);
 		vector<EvalPoint> empty;
-		vector<ex> exsigma = multivar_diophant(mdarg, x, umod_to_ex(beta, x), empty, 0, p, k);
-		umod sigma1 = umod_from_ex(exsigma[0], x, R);
-		umod sigma2 = umod_from_ex(exsigma[1], x, R);
-		beta = COPY(beta, sigma1);
+		vector<ex> exsigma = multivar_diophant(mdarg, x, umodpoly_to_ex(beta, x), empty, 0, p, k);
+		umodpoly sigma1;
+		umodpoly_from_ex(sigma1, exsigma[0], x, R);
+		umodpoly sigma2;
+		umodpoly_from_ex(sigma2, exsigma[1], x, R);
+		beta = sigma1;
 		s.push_back(sigma2);
 	}
 	s.push_back(beta);
-
-	DCOUTVAR(s);
-	DCOUT(END multiterm_eea_lift);
 	return s;
 }
 
-void change_modulus(umod& out, const umod& in)
+/**
+ *  Assert: a not empty.
+ */
+static void change_modulus(const cl_modint_ring& R, umodpoly& a)
 {
-	// ASSERT: out and in have same degree
-	if ( out.ring() == in.ring() ) {
-		out = COPY(out, in);
-	}
-	else {
-		for ( int i=0; i<=degree(in); ++i ) {
-			out.set_coeff(i, out.ring()->basering()->canonhom(in.ring()->basering()->retract(coeff(in, i))));
-		}
-		out.finalize();
+	if ( a.empty() ) return;
+	cl_modint_ring oldR = a[0].ring();
+	umodpoly::iterator i = a.begin(), end = a.end();
+	for ( ; i!=end; ++i ) {
+		*i = R->canonhom(oldR->retract(*i));
 	}
+	canonicalize(a);
 }
 
-void eea_lift(const umod& a, const umod& b, const ex& x, unsigned int p, unsigned int k, umod& s_, umod& t_)
+static void eea_lift(const umodpoly& a, const umodpoly& b, const ex& x, unsigned int p, unsigned int k, umodpoly& s_, umodpoly& t_)
 {
-	DCOUT(eea_lift);
-	DCOUTVAR(a);
-	DCOUTVAR(b);
-	DCOUTVAR(x);
-	DCOUTVAR(p);
-	DCOUTVAR(k);
-
 	cl_modint_ring R = find_modint_ring(p);
-	cl_univpoly_modint_ring UPR = find_univpoly_ring(R);
-	umod amod = UPR->create(degree(a));
-	DCOUTVAR(a);
-	change_modulus(amod, a);
-	umod bmod = UPR->create(degree(b));
-	change_modulus(bmod, b);
-	DCOUTVAR(amod);
-	DCOUTVAR(bmod);
-
-	umod g = UPR->create(-1);
-	umod smod = UPR->create(-1);
-	umod tmod = UPR->create(-1);
-	exteuclid(amod, bmod, g, smod, tmod);
-	
-	DCOUTVAR(smod);
-	DCOUTVAR(tmod);
-	DCOUTVAR(g);
-	DCOUTVAR(a);
+	umodpoly amod = a;
+	change_modulus(R, amod);
+	umodpoly bmod = b;
+	change_modulus(R, bmod);
+
+	umodpoly smod;
+	umodpoly tmod;
+	exteuclid(amod, bmod, smod, tmod);
 
 	cl_modint_ring Rpk = find_modint_ring(expt_pos(cl_I(p),k));
-	cl_univpoly_modint_ring UPRpk = find_univpoly_ring(Rpk);
-	umod s = UPRpk->create(degree(smod));
-	change_modulus(s, smod);
-	umod t = UPRpk->create(degree(tmod));
-	change_modulus(t, tmod);
-	DCOUTVAR(s);
-	DCOUTVAR(t);
+	umodpoly s = smod;
+	change_modulus(Rpk, s);
+	umodpoly t = tmod;
+	change_modulus(Rpk, t);
 
 	cl_I modulus(p);
-	DCOUTVAR(a);
-
-	umod one = UPRpk->one();
+	umodpoly one(1, Rpk->one());
 	for ( size_t j=1; j<k; ++j ) {
-		DCOUTVAR(a);
-		umod e = one - a * s - b * t;
-		DCOUTVAR(one);
-		DCOUTVAR(a*s);
-		DCOUTVAR(b*t);
-		DCOUTVAR(e);
-		e = divide(e, modulus);
-		umod c = UPR->create(degree(e));
-		change_modulus(c, e);
-		umod sigmabar = smod * c;
-		umod taubar = tmod * c;
-		umod q = div(sigmabar, bmod);
-		umod sigma = rem(sigmabar, bmod);
-		umod tau = taubar + q * amod;
-		umod sadd = UPRpk->create(degree(sigma));
-		change_modulus(sadd, sigma);
+		umodpoly e = one - a * s - b * t;
+		reduce_coeff(e, modulus);
+		umodpoly c = e;
+		change_modulus(R, c);
+		umodpoly sigmabar = smod * c;
+		umodpoly taubar = tmod * c;
+		umodpoly sigma, q;
+		remdiv(sigmabar, bmod, sigma, q);
+		umodpoly tau = taubar + q * amod;
+		umodpoly sadd = sigma;
+		change_modulus(Rpk, sadd);
 		cl_MI modmodulus(Rpk, modulus);
 		s = s + sadd * modmodulus;
-		umod tadd = UPRpk->create(degree(tau));
-		change_modulus(tadd, tau);
+		umodpoly tadd = tau;
+		change_modulus(Rpk, tadd);
 		t = t + tadd * modmodulus;
 		modulus = modulus * p;
 	}
 
 	s_ = s; t_ = t;
-
-	DCOUTVAR(s);
-	DCOUTVAR(t);
-	DCOUT2(check, a*s + b*t);
-	DCOUT(END eea_lift);
 }
 
-umodvec univar_diophant(const umodvec& a, const ex& x, unsigned int m, unsigned int p, unsigned int k)
+static upvec univar_diophant(const upvec& a, const ex& x, unsigned int m, unsigned int p, unsigned int k)
 {
-	DCOUT(univar_diophant);
-	DCOUTVAR(a);
-	DCOUTVAR(x);
-	DCOUTVAR(m);
-	DCOUTVAR(p);
-	DCOUTVAR(k);
-
 	cl_modint_ring R = find_modint_ring(expt_pos(cl_I(p),k));
-	cl_univpoly_modint_ring UPR = find_univpoly_ring(R);
 
 	const size_t r = a.size();
-	umodvec result;
+	upvec result;
 	if ( r > 2 ) {
-		umodvec s = multiterm_eea_lift(a, x, p, k);
+		upvec s = multiterm_eea_lift(a, x, p, k);
 		for ( size_t j=0; j<r; ++j ) {
-			ex phi = expand(pow(x,m) * umod_to_ex(s[j], x));
-			umod bmod = umod_from_ex(phi, x, R);
-			umod buf = rem(bmod, a[j]);
+			umodpoly bmod = umodpoly_to_umodpoly(s[j], R, m);
+			umodpoly buf;
+			rem(bmod, a[j], buf);
 			result.push_back(buf);
 		}
 	}
 	else {
-		umod s = UPR->create(-1);
-		umod t = UPR->create(-1);
+		umodpoly s, t;
 		eea_lift(a[1], a[0], x, p, k, s, t);
-		ex phi = expand(pow(x,m) * umod_to_ex(s, x));
-		umod bmod = umod_from_ex(phi, x, R);
-		umod buf = rem(bmod, a[0]);
+		umodpoly bmod = umodpoly_to_umodpoly(s, R, m);
+		umodpoly buf, q;
+		remdiv(bmod, a[0], buf, q);
+		result.push_back(buf);
+		umodpoly t1mod = umodpoly_to_umodpoly(t, R, m);
+		buf = t1mod + q * a[1];
 		result.push_back(buf);
-		umod q = div(bmod, a[0]);
-		phi = expand(pow(x,m) * umod_to_ex(t, x));
-		umod t1mod = umod_from_ex(phi, x, R);
-		umod buf2 = t1mod + q * a[1];
-		result.push_back(buf2);
 	}
 
-	DCOUTVAR(result);
-	DCOUT(END univar_diophant);
 	return result;
 }
 
@@ -1185,39 +1622,17 @@ struct make_modular_map : public map_function {
 static ex make_modular(const ex& e, const cl_modint_ring& R)
 {
 	make_modular_map map(R);
-	return map(e);
+	return map(e.expand());
 }
 
-vector<ex> multivar_diophant(const vector<ex>& a_, const ex& x, const ex& c, const vector<EvalPoint>& I, unsigned int d, unsigned int p, unsigned int k)
+static vector<ex> multivar_diophant(const vector<ex>& a_, const ex& x, const ex& c, const vector<EvalPoint>& I,
+                                    unsigned int d, unsigned int p, unsigned int k)
 {
 	vector<ex> a = a_;
 
-	DCOUT(multivar_diophant);
-#ifdef DEBUGFACTOR
-	cout << "a ";
-	for ( size_t i=0; i<a.size(); ++i ) {
-		cout << a[i] << " ";
-	}
-	cout << endl;
-#endif
-	DCOUTVAR(x);
-	DCOUTVAR(c);
-#ifdef DEBUGFACTOR
-	cout << "I ";
-	for ( size_t i=0; i<I.size(); ++i ) {
-		cout << I[i].x << "=" << I[i].evalpoint << " ";
-	}
-	cout << endl;
-#endif
-	DCOUTVAR(d);
-	DCOUTVAR(p);
-	DCOUTVAR(k);
-
 	const cl_modint_ring R = find_modint_ring(expt_pos(cl_I(p),k));
 	const size_t r = a.size();
 	const size_t nu = I.size() + 1;
-	DCOUTVAR(r);
-	DCOUTVAR(nu);
 
 	vector<ex> sigma;
 	if ( nu > 1 ) {
@@ -1241,48 +1656,36 @@ vector<ex> multivar_diophant(const vector<ex>& a_, const ex& x, const ex& c, con
 		vector<EvalPoint> Inew = I;
 		Inew.pop_back();
 		sigma = multivar_diophant(anew, x, cnew, Inew, d, p, k);
-		DCOUTVAR(sigma);
 
 		ex buf = c;
 		for ( size_t i=0; i<r; ++i ) {
 			buf -= sigma[i] * b[i];
 		}
-		ex e = buf;
-		e = make_modular(e, R);
+		ex e = make_modular(buf, R);
 
-		e = e.expand();
-		DCOUTVAR(e);
-		DCOUTVAR(d);
 		ex monomial = 1;
-		for ( size_t m=1; m<=d; ++m ) {
-			DCOUTVAR(m);
-			while ( !e.is_zero() && e.has(xnu) ) {
-				monomial *= (xnu - alphanu);
-				monomial = expand(monomial);
-				DCOUTVAR(xnu);
-				DCOUTVAR(alphanu);
-				ex cm = e.diff(ex_to<symbol>(xnu), m).subs(xnu==alphanu) / factorial(m);
-				DCOUTVAR(cm);
-				if ( !cm.is_zero() ) {
-					vector<ex> delta_s = multivar_diophant(anew, x, cm, Inew, d, p, k);
-					DCOUTVAR(delta_s);
-					ex buf = e;
-					for ( size_t j=0; j<delta_s.size(); ++j ) {
-						delta_s[j] *= monomial;
-						sigma[j] += delta_s[j];
-						buf -= delta_s[j] * b[j];
-					}
-					e = buf.expand();
-					e = make_modular(e, R);
+		for ( size_t m=1; !e.is_zero() && e.has(xnu) && m<=d; ++m ) {
+			monomial *= (xnu - alphanu);
+			monomial = expand(monomial);
+			ex cm = e.diff(ex_to<symbol>(xnu), m).subs(xnu==alphanu) / factorial(m);
+			cm = make_modular(cm, R);
+			if ( !cm.is_zero() ) {
+				vector<ex> delta_s = multivar_diophant(anew, x, cm, Inew, d, p, k);
+				ex buf = e;
+				for ( size_t j=0; j<delta_s.size(); ++j ) {
+					delta_s[j] *= monomial;
+					sigma[j] += delta_s[j];
+					buf -= delta_s[j] * b[j];
 				}
+				e = make_modular(buf, R);
 			}
 		}
 	}
 	else {
-		DCOUT(uniterm left);
-		umodvec amod;
+		upvec amod;
 		for ( size_t i=0; i<a.size(); ++i ) {
-			umod up = umod_from_ex(a[i], x, R);
+			umodpoly up;
+			umodpoly_from_ex(up, a[i], x, R);
 			amod.push_back(up);
 		}
 
@@ -1297,58 +1700,30 @@ vector<ex> multivar_diophant(const vector<ex>& a_, const ex& x, const ex& c, con
 			nterms = 1;
 			z = c;
 		}
-		DCOUTVAR(nterms);
 		for ( size_t i=0; i<nterms; ++i ) {
-			DCOUTVAR(z);
 			int m = z.degree(x);
-			DCOUTVAR(m);
 			cl_I cm = the<cl_I>(ex_to<numeric>(z.lcoeff(x)).to_cl_N());
-			DCOUTVAR(cm);
-			umodvec delta_s = univar_diophant(amod, x, m, p, k);
+			upvec delta_s = univar_diophant(amod, x, m, p, k);
 			cl_MI modcm;
 			cl_I poscm = cm;
 			while ( poscm < 0 ) {
 				poscm = poscm + expt_pos(cl_I(p),k);
 			}
 			modcm = cl_MI(R, poscm);
-			DCOUTVAR(modcm);
 			for ( size_t j=0; j<delta_s.size(); ++j ) {
 				delta_s[j] = delta_s[j] * modcm;
-				sigma[j] = sigma[j] + umod_to_ex(delta_s[j], x);
-			}
-			DCOUTVAR(delta_s);
-#ifdef DEBUGFACTOR
-			cout << "STEP " << i << " sigma ";
-			for ( size_t p=0; p<sigma.size(); ++p ) {
-				cout << sigma[p] << " ";
+				sigma[j] = sigma[j] + umodpoly_to_ex(delta_s[j], x);
 			}
-			cout << endl;
-#endif
 			if ( nterms > 1 ) {
 				z = c.op(i+1);
 			}
 		}
 	}
-#ifdef DEBUGFACTOR
-	cout << "sigma ";
-	for ( size_t i=0; i<sigma.size(); ++i ) {
-		cout << sigma[i] << " ";
-	}
-	cout << endl;
-#endif
 
 	for ( size_t i=0; i<sigma.size(); ++i ) {
 		sigma[i] = make_modular(sigma[i], R);
 	}
 
-#ifdef DEBUGFACTOR
-	cout << "sigma ";
-	for ( size_t i=0; i<sigma.size(); ++i ) {
-		cout << sigma[i] << " ";
-	}
-	cout << endl;
-#endif
-	DCOUT(END multivar_diophant);
 	return sigma;
 }
 
@@ -1362,22 +1737,11 @@ ostream& operator<<(ostream& o, const vector<EvalPoint>& v)
 }
 #endif // def DEBUGFACTOR
 
-
-ex hensel_multivar(const ex& a, const ex& x, const vector<EvalPoint>& I, unsigned int p, const cl_I& l, const umodvec& u, const vector<ex>& lcU)
+static ex hensel_multivar(const ex& a, const ex& x, const vector<EvalPoint>& I, unsigned int p, const cl_I& l, const upvec& u, const vector<ex>& lcU)
 {
-	DCOUT(hensel_multivar);
-	DCOUTVAR(a);
-	DCOUTVAR(x);
-	DCOUTVAR(I);
-	DCOUTVAR(p);
-	DCOUTVAR(l);
-	DCOUTVAR(u);
-	DCOUTVAR(lcU);
 	const size_t nu = I.size() + 1;
 	const cl_modint_ring R = find_modint_ring(expt_pos(cl_I(p),l));
 
-	DCOUTVAR(nu);
-	
 	vector<ex> A(nu);
 	A[nu-1] = a;
 
@@ -1388,101 +1752,65 @@ ex hensel_multivar(const ex& a, const ex& x, const vector<EvalPoint>& I, unsigne
 		A[j-2] = make_modular(A[j-2], R);
 	}
 
-#ifdef DEBUGFACTOR
-	cout << "A ";
-	for ( size_t i=0; i<A.size(); ++i) cout << A[i] << " ";
-	cout << endl;
-#endif
-
 	int maxdeg = a.degree(I.front().x);
 	for ( size_t i=1; i<I.size(); ++i ) {
 		int maxdeg2 = a.degree(I[i].x);
 		if ( maxdeg2 > maxdeg ) maxdeg = maxdeg2;
 	}
-	DCOUTVAR(maxdeg);
 
 	const size_t n = u.size();
-	DCOUTVAR(n);
 	vector<ex> U(n);
 	for ( size_t i=0; i<n; ++i ) {
-		U[i] = umod_to_ex(u[i], x);
+		U[i] = umodpoly_to_ex(u[i], x);
 	}
-#ifdef DEBUGFACTOR
-	cout << "U ";
-	for ( size_t i=0; i<U.size(); ++i) cout << U[i] << " ";
-	cout << endl;
-#endif
 
 	for ( size_t j=2; j<=nu; ++j ) {
-		DCOUTVAR(j);
 		vector<ex> U1 = U;
 		ex monomial = 1;
-		DCOUTVAR(U);
 		for ( size_t m=0; m<n; ++m) {
 			if ( lcU[m] != 1 ) {
 				ex coef = lcU[m];
 				for ( size_t i=j-1; i<nu-1; ++i ) {
 					coef = coef.subs(I[i].x == I[i].evalpoint);
 				}
-				coef = expand(coef);
 				coef = make_modular(coef, R);
 				int deg = U[m].degree(x);
 				U[m] = U[m] - U[m].lcoeff(x) * pow(x,deg) + coef * pow(x,deg);
 			}
 		}
-		DCOUTVAR(U);
 		ex Uprod = 1;
 		for ( size_t i=0; i<n; ++i ) {
 			Uprod *= U[i];
 		}
 		ex e = expand(A[j-1] - Uprod);
-		DCOUTVAR(e);
 
 		vector<EvalPoint> newI;
 		for ( size_t i=1; i<=j-2; ++i ) {
 			newI.push_back(I[i-1]);
 		}
-		DCOUTVAR(newI);
 
 		ex xj = I[j-2].x;
 		int alphaj = I[j-2].evalpoint;
 		size_t deg = A[j-1].degree(xj);
-		DCOUTVAR(deg);
 		for ( size_t k=1; k<=deg; ++k ) {
-			DCOUTVAR(k);
 			if ( !e.is_zero() ) {
-				DCOUTVAR(xj);
-				DCOUTVAR(alphaj);
 				monomial *= (xj - alphaj);
 				monomial = expand(monomial);
-				DCOUTVAR(monomial);
 				ex dif = e.diff(ex_to<symbol>(xj), k);
-				DCOUTVAR(dif);
 				ex c = dif.subs(xj==alphaj) / factorial(k);
-				DCOUTVAR(c);
 				if ( !c.is_zero() ) {
 					vector<ex> deltaU = multivar_diophant(U1, x, c, newI, maxdeg, p, cl_I_to_uint(l));
 					for ( size_t i=0; i<n; ++i ) {
-						DCOUTVAR(i);
-						DCOUTVAR(deltaU[i]);
 						deltaU[i] *= monomial;
 						U[i] += deltaU[i];
 						U[i] = make_modular(U[i], R);
-						U[i] = U[i].expand();
-						DCOUTVAR(U[i]);
 					}
 					ex Uprod = 1;
 					for ( size_t i=0; i<n; ++i ) {
 						Uprod *= U[i];
 					}
-					DCOUTVAR(Uprod.expand());
-					DCOUTVAR(A[j-1]);
-					e = expand(A[j-1] - Uprod);
+					e = A[j-1] - Uprod;
 					e = make_modular(e, R);
-					DCOUTVAR(e);
-				}
-				else {
-					break;
 				}
 			}
 		}
@@ -1492,51 +1820,35 @@ ex hensel_multivar(const ex& a, const ex& x, const vector<EvalPoint>& I, unsigne
 	for ( size_t i=0; i<U.size(); ++i ) {
 		acand *= U[i];
 	}
-	DCOUTVAR(acand);
 	if ( expand(a-acand).is_zero() ) {
 		lst res;
 		for ( size_t i=0; i<U.size(); ++i ) {
 			res.append(U[i]);
 		}
-		DCOUTVAR(res);
-		DCOUT(END hensel_multivar);
 		return res;
 	}
 	else {
 		lst res;
-		DCOUTVAR(res);
-		DCOUT(END hensel_multivar);
 		return lst();
 	}
 }
 
 static ex put_factors_into_lst(const ex& e)
 {
-	DCOUT(put_factors_into_lst);
-	DCOUTVAR(e);
-
 	lst result;
 
 	if ( is_a<numeric>(e) ) {
 		result.append(e);
-		DCOUT(END put_factors_into_lst);
-		DCOUTVAR(result);
 		return result;
 	}
 	if ( is_a<power>(e) ) {
 		result.append(1);
 		result.append(e.op(0));
-		result.append(e.op(1));
-		DCOUT(END put_factors_into_lst);
-		DCOUTVAR(result);
 		return result;
 	}
 	if ( is_a<symbol>(e) || is_a<add>(e) ) {
 		result.append(1);
 		result.append(e);
-		result.append(1);
-		DCOUT(END put_factors_into_lst);
-		DCOUTVAR(result);
 		return result;
 	}
 	if ( is_a<mul>(e) ) {
@@ -1548,16 +1860,12 @@ static ex put_factors_into_lst(const ex& e)
 			}
 			if ( is_a<power>(op) ) {
 				result.append(op.op(0));
-				result.append(op.op(1));
 			}
 			if ( is_a<symbol>(op) || is_a<add>(op) ) {
 				result.append(op);
-				result.append(1);
 			}
 		}
 		result.prepend(nfac);
-		DCOUT(END put_factors_into_lst);
-		DCOUTVAR(result);
 		return result;
 	}
 	throw runtime_error("put_factors_into_lst: bad term.");
@@ -1573,370 +1881,257 @@ ostream& operator<<(ostream& o, const vector<numeric>& v)
 }
 #endif // def DEBUGFACTOR
 
-static bool checkdivisors(const lst& f, vector<numeric>& d)
+/** Checks whether in a set of numbers each has a unique prime factor.
+ *
+ *  @param[in]  f  list of numbers to check
+ *  @return        true: if number set is bad, false: otherwise
+ */
+static bool checkdivisors(const lst& f)
 {
-	DCOUT(checkdivisors);
-	const int k = f.nops()-2;
-	DCOUTVAR(k);
-	DCOUTVAR(d.size());
+	const int k = f.nops();
 	numeric q, r;
-	d[0] = ex_to<numeric>(f.op(0) * f.op(f.nops()-1));
-	if ( d[0] == 1 && k == 1 && abs(f.op(1)) != 1 ) {
-		DCOUT(false);
-		DCOUT(END checkdivisors);
-		return false;
-	}
-	DCOUTVAR(d[0]);
-	for ( int i=1; i<=k; ++i ) {
-		DCOUTVAR(i);
-		DCOUTVAR(abs(f.op(i)));
+	vector<numeric> d(k);
+	d[0] = ex_to<numeric>(abs(f.op(0)));
+	for ( int i=1; i<k; ++i ) {
 		q = ex_to<numeric>(abs(f.op(i)));
-		DCOUTVAR(q);
 		for ( int j=i-1; j>=0; --j ) {
 			r = d[j];
-			DCOUTVAR(r);
 			do {
 				r = gcd(r, q);
-				DCOUTVAR(r);
 				q = q/r;
-				DCOUTVAR(q);
 			} while ( r != 1 );
 			if ( q == 1 ) {
-				DCOUT(true);
-				DCOUT(END checkdivisors);
 				return true;
 			}
 		}
 		d[i] = q;
 	}
-	DCOUT(false);
-	DCOUT(END checkdivisors);
 	return false;
 }
 
-static bool generate_set(const ex& u, const ex& vn, const exset& syms, const ex& f, const numeric& modulus, vector<numeric>& a, vector<numeric>& d)
+/** Generates a set of evaluation points for a multivariate polynomial.
+ *  The set fulfills the following conditions:
+ *  1. lcoeff(evaluated_polynomial) does not vanish
+ *  2. factors of lcoeff(evaluated_polynomial) have each a unique prime factor
+ *  3. evaluated_polynomial is square free
+ *  See [W1] for more details.
+ *
+ *  @param[in]     u        multivariate polynomial to be factored
+ *  @param[in]     vn       leading coefficient of u in x (x==first symbol in syms)
+ *  @param[in]     syms     set of symbols that appear in u
+ *  @param[in]     f        lst containing the factors of the leading coefficient vn
+ *  @param[in,out] modulus  integer modulus for random number generation (i.e. |a_i| < modulus)
+ *  @param[out]    u0       returns the evaluated (univariate) polynomial
+ *  @param[out]    a        returns the valid evaluation points. must have initial size equal
+ *                          number of symbols-1 before calling generate_set
+ */
+static void generate_set(const ex& u, const ex& vn, const exset& syms, const lst& f,
+                         numeric& modulus, ex& u0, vector<numeric>& a)
 {
-	// computation of d is actually not necessary
-	DCOUT(generate_set);
-	DCOUTVAR(u);
-	DCOUTVAR(vn);
-	DCOUTVAR(f);
-	DCOUTVAR(modulus);
 	const ex& x = *syms.begin();
-	bool trying = true;
-	do {
-		ex u0 = u;
+	while ( true ) {
+		++modulus;
+		/* generate a set of integers ... */
+		u0 = u;
 		ex vna = vn;
 		ex vnatry;
 		exset::const_iterator s = syms.begin();
 		++s;
 		for ( size_t i=0; i<a.size(); ++i ) {
-			DCOUTVAR(*s);
 			do {
 				a[i] = mod(numeric(rand()), 2*modulus) - modulus;
 				vnatry = vna.subs(*s == a[i]);
+				/* ... for which the leading coefficient doesn't vanish ... */
 			} while ( vnatry == 0 );
 			vna = vnatry;
 			u0 = u0.subs(*s == a[i]);
 			++s;
 		}
-		DCOUTVAR(a);
-		DCOUTVAR(u0);
-		if ( gcd(u0,u0.diff(ex_to<symbol>(x))) != 1 ) {
+		/* ... for which u0 is square free ... */
+		ex g = gcd(u0, u0.diff(ex_to<symbol>(x)));
+		if ( !is_a<numeric>(g) ) {
 			continue;
 		}
-		if ( is_a<numeric>(vn) ) {
-			trying = false;
-		}
-		else {
-			DCOUT(do substitution);
-			lst fnum;
-			lst::const_iterator i = ex_to<lst>(f).begin();
-			fnum.append(*i++);
-			bool problem = false;
-			while ( i!=ex_to<lst>(f).end() ) {
-				ex fs = *i;
-				if ( !is_a<numeric>(fs) ) {
+		if ( !is_a<numeric>(vn) ) {
+			/* ... and for which the evaluated factors have each an unique prime factor */
+			lst fnum = f;
+			fnum.let_op(0) = fnum.op(0) * u0.content(x);
+			for ( size_t i=1; i<fnum.nops(); ++i ) {
+				if ( !is_a<numeric>(fnum.op(i)) ) {
 					s = syms.begin();
 					++s;
-					for ( size_t j=0; j<a.size(); ++j ) {
-						fs = fs.subs(*s == a[j]);
-						++s;
-					}
-					if ( abs(fs) == 1 ) {
-						problem = true;
-						break;
+					for ( size_t j=0; j<a.size(); ++j, ++s ) {
+						fnum.let_op(i) = fnum.op(i).subs(*s == a[j]);
 					}
 				}
-				fnum.append(fs);
-				++i; ++i;
 			}
-			if ( problem ) {
-				return true;
+			if ( checkdivisors(fnum) ) {
+				continue;
 			}
-			ex con = u0.content(x);
-			fnum.append(con);
-			DCOUTVAR(fnum);
-			trying = checkdivisors(fnum, d);
 		}
-	} while ( trying );
-	DCOUT(END generate_set);
-	return false;
+		/* ok, we have a valid set now */
+		return;
+	}
 }
 
+// forward declaration
+static ex factor_sqrfree(const ex& poly);
+
+/**
+ *  ASSERT: poly is expanded
+ */
 static ex factor_multivariate(const ex& poly, const exset& syms)
 {
-	DCOUT(factor_multivariate);
-	DCOUTVAR(poly);
-
 	exset::const_iterator s;
 	const ex& x = *syms.begin();
-	DCOUTVAR(x);
 
 	/* make polynomial primitive */
-	ex p = poly.expand().collect(x);
-	DCOUTVAR(p);
+	ex p = poly.collect(x);
 	ex cont = p.lcoeff(x);
-	for ( numeric i=p.degree(x)-1; i>=p.ldegree(x); --i ) {
-		cont = gcd(cont, p.coeff(x,ex_to<numeric>(i).to_int()));
+	for ( int i=p.degree(x)-1; i>=p.ldegree(x); --i ) {
+		cont = gcd(cont, p.coeff(x,i));
 		if ( cont == 1 ) break;
 	}
-	DCOUTVAR(cont);
 	ex pp = expand(normal(p / cont));
-	DCOUTVAR(pp);
 	if ( !is_a<numeric>(cont) ) {
-#ifdef DEBUGFACTOR
-		return ::factor(cont) * ::factor(pp);
-#else
-		return factor(cont) * factor(pp);
-#endif
+		return factor_sqrfree(cont) * factor_sqrfree(pp);
 	}
 
 	/* factor leading coefficient */
-	pp = pp.collect(x);
-	ex vn = pp.lcoeff(x);
-	pp = pp.expand();
+	ex vn = pp.collect(x).lcoeff(x);
 	ex vnlst;
 	if ( is_a<numeric>(vn) ) {
 		vnlst = lst(vn);
 	}
 	else {
-#ifdef DEBUGFACTOR
-		ex vnfactors = ::factor(vn);
-#else
 		ex vnfactors = factor(vn);
-#endif
 		vnlst = put_factors_into_lst(vnfactors);
 	}
-	DCOUTVAR(vnlst);
 
-	const numeric maxtrials = 3;
-	numeric modulus = (vnlst.nops()-1 > 3) ? vnlst.nops()-1 : 3;
-	DCOUTVAR(modulus);
-	numeric minimalr = -1;
+	const unsigned int maxtrials = 3;
+	numeric modulus = (vnlst.nops() > 3) ? vnlst.nops() : 3;
 	vector<numeric> a(syms.size()-1, 0);
-	vector<numeric> d((vnlst.nops()-1)/2+1, 0);
 
+	/* try now to factorize until we are successful */
 	while ( true ) {
-		numeric trialcount = 0;
-		ex u, delta;
+
+		unsigned int trialcount = 0;
 		unsigned int prime;
-		size_t factor_count;
-		ex ufac;
-		ex ufaclst;
+		int factor_count = 0;
+		int min_factor_count = -1;
+		ex u, delta;
+		ex ufac, ufaclst;
+
+		/* try several evaluation points to reduce the number of modular factors */
 		while ( trialcount < maxtrials ) {
-			bool problem = generate_set(pp, vn, syms, vnlst, modulus, a, d);
-			DCOUTVAR(problem);
-			if ( problem ) {
-				++modulus;
-				continue;
-			}
-			DCOUTVAR(a);
-			DCOUTVAR(d);
-			u = pp;
-			s = syms.begin();
-			++s;
-			for ( size_t i=0; i<a.size(); ++i ) {
-				u = u.subs(*s == a[i]);
-				++s;
-			}
-			delta = u.content(x);
-			DCOUTVAR(u);
-
-			// determine proper prime
-			prime = 3;
-			DCOUTVAR(prime);
-			cl_modint_ring R = find_modint_ring(prime);
-			DCOUTVAR(u.lcoeff(x));
-			while ( true ) {
-				if ( irem(ex_to<numeric>(u.lcoeff(x)), prime) != 0 ) {
-					umod modpoly = umod_from_ex(u, x, R);
-					if ( squarefree(modpoly) ) break;
-				}
-				prime = next_prime(prime);
-				DCOUTVAR(prime);
-				R = find_modint_ring(prime);
-			}
 
-#ifdef DEBUGFACTOR
-			ufac = ::factor(u);
-#else
-			ufac = factor(u);
-#endif
-			DCOUTVAR(ufac);
+			/* generate a set of valid evaluation points */
+			generate_set(pp, vn, syms, ex_to<lst>(vnlst), modulus, u, a);
+
+			ufac = factor_univariate(u, x, prime);
 			ufaclst = put_factors_into_lst(ufac);
-			DCOUTVAR(ufaclst);
-			factor_count = (ufaclst.nops()-1)/2;
-			DCOUTVAR(factor_count);
+			factor_count = ufaclst.nops()-1;
+			delta = ufaclst.op(0);
 
 			if ( factor_count <= 1 ) {
-				DCOUTVAR(poly);
-				DCOUT(END factor_multivariate);
+				/* irreducible */
 				return poly;
 			}
-
-			if ( minimalr < 0 ) {
-				minimalr = factor_count;
+			if ( min_factor_count < 0 ) {
+				/* first time here */
+				min_factor_count = factor_count;
 			}
-			else if ( minimalr == factor_count ) {
+			else if ( min_factor_count == factor_count ) {
+				/* one less to try */
 				++trialcount;
-				++modulus;
 			}
-			else if ( minimalr > factor_count ) {
-				minimalr = factor_count;
+			else if ( min_factor_count > factor_count ) {
+				/* new minimum, reset trial counter */
+				min_factor_count = factor_count;
 				trialcount = 0;
 			}
-			DCOUTVAR(trialcount);
-			DCOUTVAR(minimalr);
-			if ( minimalr <= 1 ) {
-				DCOUTVAR(poly);
-				DCOUT(END factor_multivariate);
-				return poly;
-			}
-		}
-
-		vector<numeric> ftilde((vnlst.nops()-1)/2+1);
-		ftilde[0] = ex_to<numeric>(vnlst.op(0));
-		for ( size_t i=1; i<ftilde.size(); ++i ) {
-			ex ft = vnlst.op((i-1)*2+1);
-			s = syms.begin();
-			++s;
-			for ( size_t j=0; j<a.size(); ++j ) {
-				ft = ft.subs(*s == a[j]);
-				++s;
-			}
-			ftilde[i] = ex_to<numeric>(ft);
-		}
-		DCOUTVAR(ftilde);
-
-		vector<bool> used_flag((vnlst.nops()-1)/2+1, false);
-		vector<ex> D(factor_count, 1);
-		for ( size_t i=0; i<=factor_count; ++i ) {
-			DCOUTVAR(i);
-			numeric prefac;
-			if ( i == 0 ) {
-				prefac = ex_to<numeric>(ufaclst.op(0));
-				ftilde[0] = ftilde[0] / prefac;
-				vnlst.let_op(0) = vnlst.op(0) / prefac;
-				continue;
-			}
-			else {
-				prefac = ex_to<numeric>(ufaclst.op(2*(i-1)+1).lcoeff(x));
-			}
-			DCOUTVAR(prefac);
-			for ( size_t j=(vnlst.nops()-1)/2+1; j>0; --j ) {
-				DCOUTVAR(j);
-				DCOUTVAR(prefac);
-				DCOUTVAR(ftilde[j-1]);
-				if ( abs(ftilde[j-1]) == 1 ) {
-					used_flag[j-1] = true;
-					continue;
-				}
-				numeric g = gcd(prefac, ftilde[j-1]);
-				DCOUTVAR(g);
-				if ( g != 1 ) {
-					DCOUT(has_common_prime);
-					prefac = prefac / g;
-					numeric count = abs(iquo(g, ftilde[j-1]));
-					DCOUTVAR(count);
-					used_flag[j-1] = true;
-					if ( i > 0 ) {
-						if ( j == 1 ) {
-							D[i-1] = D[i-1] * pow(vnlst.op(0), count);
-						}
-						else {
-							D[i-1] = D[i-1] * pow(vnlst.op(2*(j-2)+1), count);
-						}
-					}
-					else {
-						ftilde[j-1] = ftilde[j-1] / prefac;
-						DCOUT(BREAK);
-						DCOUTVAR(ftilde[j-1]);
-						break;
-					}
-					++j;
-				}
-			}
-		}
-		DCOUTVAR(D);
-
-		bool some_factor_unused = false;
-		for ( size_t i=0; i<used_flag.size(); ++i ) {
-			if ( !used_flag[i] ) {
-				some_factor_unused = true;
-				break;
-			}
-		}
-		if ( some_factor_unused ) {
-			DCOUT(some factor unused!);
-			continue;
 		}
 
+		// determine true leading coefficients for the Hensel lifting
 		vector<ex> C(factor_count);
-		DCOUTVAR(C);
-		DCOUTVAR(delta);
-		if ( delta == 1 ) {
-			for ( size_t i=0; i<D.size(); ++i ) {
-				ex Dtilde = D[i];
-				s = syms.begin();
-				++s;
-				for ( size_t j=0; j<a.size(); ++j ) {
-					Dtilde = Dtilde.subs(*s == a[j]);
-					++s;
-				}
-				DCOUTVAR(Dtilde);
-				C[i] = D[i] * (ufaclst.op(2*i+1).lcoeff(x) / Dtilde);
+		if ( is_a<numeric>(vn) ) {
+			for ( size_t i=1; i<ufaclst.nops(); ++i ) {
+				C[i-1] = ufaclst.op(i).lcoeff(x);
 			}
 		}
 		else {
-			for ( size_t i=0; i<D.size(); ++i ) {
-				ex Dtilde = D[i];
+			vector<numeric> ftilde(vnlst.nops()-1);
+			for ( size_t i=0; i<ftilde.size(); ++i ) {
+				ex ft = vnlst.op(i+1);
 				s = syms.begin();
 				++s;
 				for ( size_t j=0; j<a.size(); ++j ) {
-					Dtilde = Dtilde.subs(*s == a[j]);
+					ft = ft.subs(*s == a[j]);
 					++s;
 				}
-				ex ui;
-				if ( i == 0 ) {
-					ui = ufaclst.op(0);
+				ftilde[i] = ex_to<numeric>(ft);
+			}
+
+			vector<bool> used_flag(ftilde.size(), false);
+			vector<ex> D(factor_count, 1);
+			if ( delta == 1 ) {
+				for ( int i=0; i<factor_count; ++i ) {
+					numeric prefac = ex_to<numeric>(ufaclst.op(i+1).lcoeff(x));
+					for ( int j=ftilde.size()-1; j>=0; --j ) {
+						int count = 0;
+						while ( irem(prefac, ftilde[j]) == 0 ) {
+							prefac = iquo(prefac, ftilde[j]);
+							++count;
+						}
+						if ( count ) {
+							used_flag[j] = true;
+							D[i] = D[i] * pow(vnlst.op(j+1), count);
+						}
+					}
+					C[i] = D[i] * prefac;
 				}
-				else {
-					ui = ufaclst.op(2*(i-1)+1);
+			}
+			else {
+				for ( int i=0; i<factor_count; ++i ) {
+					numeric prefac = ex_to<numeric>(ufaclst.op(i+1).lcoeff(x));
+					for ( int j=ftilde.size()-1; j>=0; --j ) {
+						int count = 0;
+						while ( irem(prefac, ftilde[j]) == 0 ) {
+							prefac = iquo(prefac, ftilde[j]);
+							++count;
+						}
+						while ( irem(ex_to<numeric>(delta)*prefac, ftilde[j]) == 0 ) {
+							numeric g = gcd(prefac, ex_to<numeric>(ftilde[j]));
+							prefac = iquo(prefac, g);
+							delta = delta / (ftilde[j]/g);
+							ufaclst.let_op(i+1) = ufaclst.op(i+1) * (ftilde[j]/g);
+							++count;
+						}
+						if ( count ) {
+							used_flag[j] = true;
+							D[i] = D[i] * pow(vnlst.op(j+1), count);
+						}
+					}
+					C[i] = D[i] * prefac;
 				}
-				while ( true ) {
-					ex d = gcd(ui.lcoeff(x), Dtilde);
-					C[i] = D[i] * ( ui.lcoeff(x) / d );
-					ui = ui * ( Dtilde[i] / d );
-					delta = delta / ( Dtilde[i] / d );
-					if ( delta == 1 ) break;
-					ui = delta * ui;
-					C[i] = delta * C[i];
-					pp = pp * pow(delta, D.size()-1);
+			}
+
+			bool some_factor_unused = false;
+			for ( size_t i=0; i<used_flag.size(); ++i ) {
+				if ( !used_flag[i] ) {
+					some_factor_unused = true;
+					break;
 				}
 			}
+			if ( some_factor_unused ) {
+				continue;
+			}
+		}
+
+		if ( delta != 1 ) {
+			C[0] = C[0] * delta;
+			ufaclst.let_op(1) = ufaclst.op(1) * delta;
 		}
-		DCOUTVAR(C);
 
 		EvalPoint ep;
 		vector<EvalPoint> epv;
@@ -1947,45 +2142,34 @@ static ex factor_multivariate(const ex& poly, const exset& syms)
 			ep.evalpoint = a[i].to_int();
 			epv.push_back(ep);
 		}
-		DCOUTVAR(epv);
 
-		// calc bound B
-		ex maxcoeff;
-		for ( int i=u.degree(x); i>=u.ldegree(x); --i ) {
-			maxcoeff += pow(abs(u.coeff(x, i)),2);
-		}
-		cl_I normmc = ceiling1(the<cl_R>(cln::sqrt(ex_to<numeric>(maxcoeff).to_cl_N())));
-		unsigned int maxdegree = 0;
-		for ( size_t i=0; i<factor_count; ++i ) {
-			if ( ufaclst[2*i+1].degree(x) > (int)maxdegree ) {
-				maxdegree = ufaclst[2*i+1].degree(x);
+		// calc bound p^l
+		int maxdeg = 0;
+		for ( int i=1; i<=factor_count; ++i ) {
+			if ( ufaclst.op(i).degree(x) > maxdeg ) {
+				maxdeg = ufaclst[i].degree(x);
 			}
 		}
-		cl_I B = normmc * expt_pos(cl_I(2), maxdegree);
+		cl_I B = 2*calc_bound(u, x, maxdeg);
 		cl_I l = 1;
 		cl_I pl = prime;
 		while ( pl < B ) {
 			l = l + 1;
 			pl = pl * prime;
 		}
-
-		umodvec uvec;
 		cl_modint_ring R = find_modint_ring(expt_pos(cl_I(prime),l));
-		for ( size_t i=0; i<(ufaclst.nops()-1)/2; ++i ) {
-			umod newu = umod_from_ex(ufaclst.op(i*2+1), x, R);
-			uvec.push_back(newu);
+		upvec modfactors(ufaclst.nops()-1);
+		for ( size_t i=1; i<ufaclst.nops(); ++i ) {
+			umodpoly_from_ex(modfactors[i-1], ufaclst.op(i), x, R);
 		}
-		DCOUTVAR(uvec);
 
-		ex res = hensel_multivar(ufaclst.op(0)*pp, x, epv, prime, l, uvec, C);
+		ex res = hensel_multivar(pp, x, epv, prime, l, modfactors, C);
 		if ( res != lst() ) {
-			ex result = cont * ufaclst.op(0);
+			ex result = cont;
 			for ( size_t i=0; i<res.nops(); ++i ) {
 				result *= res.op(i).content(x) * res.op(i).unit(x);
 				result *= res.op(i).primpart(x);
 			}
-			DCOUTVAR(result);
-			DCOUT(END factor_multivariate);
 			return result;
 		}
 	}
@@ -2037,11 +2221,7 @@ struct apply_factor_map : public map_function {
 	ex operator()(const ex& e)
 	{
 		if ( e.info(info_flags::polynomial) ) {
-#ifdef DEBUGFACTOR
-			return ::factor(e, options);
-#else
 			return factor(e, options);
-#endif
 		}
 		if ( is_a<add>(e) ) {
 			ex s1, s2;
@@ -2055,11 +2235,7 @@ struct apply_factor_map : public map_function {
 			}
 			s1 = s1.eval();
 			s2 = s2.eval();
-#ifdef DEBUGFACTOR
-			return ::factor(s1, options) + s2.map(*this);
-#else
 			return factor(s1, options) + s2.map(*this);
-#endif
 		}
 		return e.map(*this);
 	}
@@ -2067,11 +2243,7 @@ struct apply_factor_map : public map_function {
 
 } // anonymous namespace
 
-#ifdef DEBUGFACTOR
-ex factor(const ex& poly, unsigned options = 0)
-#else
 ex factor(const ex& poly, unsigned options)
-#endif
 {
 	// check arguments
 	if ( !poly.info(info_flags::polynomial) ) {
@@ -2096,7 +2268,7 @@ ex factor(const ex& poly, unsigned options)
 	}
 
 	// make poly square free
-	ex sfpoly = sqrfree(poly, syms);
+	ex sfpoly = sqrfree(poly.expand(), syms);
 
 	// factorize the square free components
 	if ( is_a<power>(sfpoly) ) {
@@ -2143,3 +2315,7 @@ ex factor(const ex& poly, unsigned options)
 }
 
 } // namespace GiNaC
+
+#ifdef DEBUGFACTOR
+#include "test.h"
+#endif