Fixed lots of bugs in factor_multivariate().
[ginac.git] / ginac / factor.cpp
index f3b48fdb3c8a5cdb413f9326345b9fd0dcfa5ec9..fb73897218db06ed31b40f32bf52aabca7e33acf 100644 (file)
@@ -72,11 +72,29 @@ namespace {
 
 typedef vector<cl_MI> mvec;
 #ifdef DEBUGFACTOR
+ostream& operator<<(ostream& o, const vector<int>& v)
+{
+       vector<int>::const_iterator i = v.begin(), end = v.end();
+       while ( i != end ) {
+               o << *i++ << " ";
+       }
+       return o;
+}
+ostream& operator<<(ostream& o, const vector<cl_I>& v)
+{
+       vector<cl_I>::const_iterator i = v.begin(), end = v.end();
+       while ( i != end ) {
+               o << *i << "[" << i-v.begin() << "]" << " ";
+               ++i;
+       }
+       return o;
+}
 ostream& operator<<(ostream& o, const vector<cl_MI>& v)
 {
        vector<cl_MI>::const_iterator i = v.begin(), end = v.end();
        while ( i != end ) {
-               o << *i++ << " ";
+               o << *i << "[" << i-v.begin() << "]" << " ";
+               ++i;
        }
        return o;
 }
@@ -84,7 +102,8 @@ ostream& operator<<(ostream& o, const vector< vector<cl_MI> >& v)
 {
        vector< vector<cl_MI> >::const_iterator i = v.begin(), end = v.end();
        while ( i != end ) {
-               o << *i++ << endl;
+               o << i-v.begin() << ": " << *i << endl;
+               ++i;
        }
        return o;
 }
@@ -161,29 +180,16 @@ canonicalize(T& p, const typename T::size_type hint = std::numeric_limits<typena
 
 // END COPY FROM UPOLY.HPP
 
-static void expt_pos(const umodpoly& a, unsigned int q, umodpoly& b)
-{
-       throw runtime_error("expt_pos: not implemented!");
-       // code below is not correct!
-//     b.clear();
-//     if ( a.empty() ) return;
-//     b.resize(degree(a)*q+1, a[0].ring()->zero());
-//     cl_MI norm = recip(a[0]);
-//     umodpoly an = a;
-//     for ( size_t i=0; i<an.size(); ++i ) {
-//             an[i] = an[i] * norm;
-//     }
-//     b[0] = a[0].ring()->one();
-//     for ( size_t i=1; i<b.size(); ++i ) {
-//             for ( size_t j=1; j<i; ++j ) {
-//                     b[i] = b[i] + ((i-j+1)*q-i-1) * a[i-j] * b[j-1];
-//             }
-//             b[i] = b[i] / i;
-//     }
-//     cl_MI corr = expt_pos(a[0], q);
-//     for ( size_t i=0; i<b.size(); ++i ) {
-//             b[i] = b[i] * corr;
-//     }
+static void expt_pos(umodpoly& a, unsigned int q)
+{
+       if ( a.empty() ) return;
+       cl_MI zero = a[0].ring()->zero(); 
+       int deg = degree(a);
+       a.resize(degree(a)*q+1, zero);
+       for ( int i=deg; i>0; --i ) {
+               a[i*q] = a[i];
+               a[i] = zero;
+       }
 }
 
 template<typename T>
@@ -359,10 +365,12 @@ static void umodpoly_from_ex(umodpoly& ump, const ex& e, const ex& x, const cl_m
        canonicalize(ump);
 }
 
+#ifdef DEBUGFACTOR
 static void umodpoly_from_ex(umodpoly& ump, const ex& e, const ex& x, const cl_I& modulus)
 {
        umodpoly_from_ex(ump, e, x, find_modint_ring(modulus));
 }
+#endif
 
 static ex upoly_to_ex(const upoly& a, const ex& x)
 {
@@ -410,6 +418,20 @@ static upoly umodpoly_to_upoly(const umodpoly& a)
        return e;
 }
 
+static umodpoly umodpoly_to_umodpoly(const umodpoly& a, const cl_modint_ring& R, unsigned int m)
+{
+       umodpoly e;
+       if ( a.empty() ) return e;
+       cl_modint_ring oldR = a[0].ring();
+       size_t sa = a.size();
+       e.resize(sa+m, R->zero());
+       for ( size_t i=0; i<sa; ++i ) {
+               e[i+m] = R->canonhom(oldR->retract(a[i]));
+       }
+       canonicalize(e);
+       return e;
+}
+
 /** Divides all coefficients of the polynomial a by the integer x.
  *  All coefficients are supposed to be divisible by x. If they are not, the
  *  the<cl_I> cast will raise an exception.
@@ -586,7 +608,7 @@ static bool squarefree(const umodpoly& a)
        umodpoly b;
        deriv(a, b);
        if ( b.empty() ) {
-               return true;
+               return false;
        }
        umodpoly c;
        gcd(a, b, c);
@@ -877,170 +899,129 @@ static void berlekamp(const umodpoly& a, upvec& upv)
        }
 }
 
-static void rem_xq(int q, const umodpoly& b, umodpoly& c)
+static void expt_1_over_p(const umodpoly& a, unsigned int prime, umodpoly& ap)
 {
-       cl_modint_ring R = b[0].ring();
-
-       int n = degree(b);
-       if ( n > q ) {
-               c.resize(q+1, R->zero());
-               c[q] = R->one();
-               return;
+       size_t newdeg = degree(a)/prime;
+       ap.resize(newdeg+1);
+       ap[0] = a[0];
+       for ( size_t i=1; i<=newdeg; ++i ) {
+               ap[i] = a[i*prime];
        }
+}
 
-       c.clear();
-       c.resize(n+1, R->zero());
-       int k = q-n;
-       c[n] = R->one();
-
-       int ofs = 0;
-       do {
-               cl_MI qk = div(c[n-ofs], b[n]);
-               if ( !zerop(qk) ) {
-                       for ( int i=1; i<=n; ++i ) {
-                               c[n-i+ofs] = c[n-i] - qk * b[n-i];
+static void modsqrfree(const umodpoly& a, upvec& factors, vector<int>& mult)
+{
+       const unsigned int prime = cl_I_to_uint(a[0].ring()->modulus);
+       int i = 1;
+       umodpoly b;
+       deriv(a, b);
+       if ( b.size() ) {
+               umodpoly c;
+               gcd(a, b, c);
+               umodpoly w;
+               div(a, c, w);
+               while ( unequal_one(w) ) {
+                       umodpoly y;
+                       gcd(w, c, y);
+                       umodpoly z;
+                       div(w, y, z);
+                       factors.push_back(z);
+                       mult.push_back(i);
+                       ++i;
+                       w = y;
+                       umodpoly buf;
+                       div(c, y, buf);
+                       c = buf;
+               }
+               if ( unequal_one(c) ) {
+                       umodpoly cp;
+                       expt_1_over_p(c, prime, cp);
+                       size_t previ = mult.size();
+                       modsqrfree(cp, factors, mult);
+                       for ( size_t i=previ; i<mult.size(); ++i ) {
+                               mult[i] *= prime;
                        }
-                       ofs = ofs ? 0 : 1;
                }
-       } while ( k-- );
-
-       if ( ofs ) {
-               c.pop_back();
        }
        else {
-               c.erase(c.begin());
+               umodpoly ap;
+               expt_1_over_p(a, prime, ap);
+               size_t previ = mult.size();
+               modsqrfree(ap, factors, mult);
+               for ( size_t i=previ; i<mult.size(); ++i ) {
+                       mult[i] *= prime;
+               }
        }
-       canonicalize(c);
 }
 
-static void distinct_degree_factor(const umodpoly& a_, upvec& result)
+static void distinct_degree_factor(const umodpoly& a_, vector<int>& degrees, upvec& ddfactors)
 {
        umodpoly a = a_;
 
        cl_modint_ring R = a[0].ring();
        int q = cl_I_to_int(R->modulus);
-       int n = degree(a);
-       size_t nhalf = n/2;
+       int nhalf = degree(a)/2;
 
-       size_t i = 1;
-       umodpoly w(1, R->one());
+       int i = 1;
+       umodpoly w(2);
+       w[0] = R->zero();
+       w[1] = R->one();
        umodpoly x = w;
 
-       upvec ai;
-
        while ( i <= nhalf ) {
-               expt_pos(w, q, w);
-               rem(w, a, w);
-
+               expt_pos(w, q);
                umodpoly buf;
-               gcd(a, w-x, buf);
-               ai.push_back(buf);
-
-               if ( unequal_one(ai.back()) ) {
-                       div(a, ai.back(), a);
-                       rem(w, a, w);
+               rem(w, a, buf);
+               w = buf;
+               umodpoly wx = w - x;
+               gcd(a, wx, buf);
+               if ( unequal_one(buf) ) {
+                       degrees.push_back(i);
+                       ddfactors.push_back(buf);
+               }
+               if ( unequal_one(buf) ) {
+                       umodpoly buf2;
+                       div(a, buf, buf2);
+                       a = buf2;
+                       nhalf = degree(a)/2;
+                       rem(w, a, buf);
+                       w = buf;
                }
-
                ++i;
        }
-
-       result = ai;
-}
-
-static void same_degree_factor(const umodpoly& a, upvec& result)
-{
-       cl_modint_ring R = a[0].ring();
-       int deg = degree(a);
-
-       upvec buf;
-       distinct_degree_factor(a, buf);
-       int degsum = 0;
-
-       for ( size_t i=0; i<buf.size(); ++i ) {
-               if ( unequal_one(buf[i]) ) {
-                       degsum += degree(buf[i]);
-                       upvec upv;
-                       berlekamp(buf[i], upv);
-                       for ( size_t j=0; j<upv.size(); ++j ) {
-                               result.push_back(upv[j]);
-                       }
-               }
-       }
-
-       if ( degsum < deg ) {
-               result.push_back(a);
+       if ( unequal_one(a) ) {
+               degrees.push_back(degree(a));
+               ddfactors.push_back(a);
        }
 }
 
-static void distinct_degree_factor_BSGS(const umodpoly& a, upvec& result)
+static void same_degree_factor(const umodpoly& a, upvec& upv)
 {
        cl_modint_ring R = a[0].ring();
-       int q = cl_I_to_int(R->modulus);
-       int n = degree(a);
-
-       cl_N pm = 0.3;
-       int l = cl_I_to_int(ceiling1(the<cl_F>(expt(n, pm))));
-       upvec h(l+1);
-       umodpoly qk(1, R->one());
-       h[0] = qk;
-       for ( int i=1; i<=l; ++i ) {
-               expt_pos(h[i-1], q, qk);
-               rem(qk, a, h[i]);
-       }
 
-       int m = std::ceil(((double)n)/2/l);
-       upvec H(m);
-       int ql = std::pow(q, l);
-       H[0] = h[l];
-       for ( int i=1; i<m; ++i ) {
-               expt_pos(H[i-1], ql, qk);
-               rem(qk, a, H[i]);
-       }
+       vector<int> degrees;
+       upvec ddfactors;
+       distinct_degree_factor(a, degrees, ddfactors);
 
-       upvec I(m);
-       umodpoly one(1, R->one());
-       for ( int i=0; i<m; ++i ) {
-               I[i] = one;
-               for ( int j=0; j<l; ++j ) {
-                       I[i] = I[i] * (H[i] - h[j]);
+       for ( size_t i=0; i<degrees.size(); ++i ) {
+               if ( degrees[i] == degree(ddfactors[i]) ) {
+                       upv.push_back(ddfactors[i]);
                }
-               rem(I[i], a, I[i]);
-       }
-
-       upvec F(m, one);
-       umodpoly f = a;
-       for ( int i=0; i<m; ++i ) {
-               umodpoly g;
-               gcd(f, I[i], g); 
-               if ( g == one ) continue;
-               F[i] = g;
-               div(f, g, f);
-       }
-
-       result.resize(n, one);
-       if ( unequal_one(f) ) {
-               result[n] = f;
-       }
-       for ( int i=0; i<m; ++i ) {
-               umodpoly f = F[i];
-               for ( int j=l-1; j>=0; --j ) {
-                       umodpoly g;
-                       gcd(f, H[i]-h[j], g);
-                       result[l*(i+1)-j-1] = g;
-                       div(f, g, f);
+               else {
+                       berlekamp(ddfactors[i], upv);
                }
        }
 }
 
-static void cantor_zassenhaus(const umodpoly& a, upvec& result)
-{
-}
+#define USE_SAME_DEGREE_FACTOR
 
 static void factor_modular(const umodpoly& p, upvec& upv)
 {
-       //same_degree_factor(p, upv);
+#ifdef USE_SAME_DEGREE_FACTOR
+       same_degree_factor(p, upv);
+#else
        berlekamp(p, upv);
-       return;
+#endif
 }
 
 /** Calculates polynomials s and t such that a*s+b*t==1.
@@ -1101,35 +1082,52 @@ static upoly replace_lc(const upoly& poly, const cl_I& lc)
        return r;
 }
 
-static ex hensel_univar(const ex& a_, const ex& x, unsigned int p, const umodpoly& u1_, const umodpoly& w1_, const ex& gamma_ = 0)
+static inline cl_I calc_bound(const ex& a, const ex& x, int maxdeg)
 {
-       upoly a;
-       upoly_from_ex(a, a_, x);
-       const cl_modint_ring& R = u1_[0].ring();
+       cl_I maxcoeff = 0;
+       cl_R coeff = 0;
+       for ( int i=a.degree(x); i>=a.ldegree(x); --i ) {
+               cl_I aa = abs(the<cl_I>(ex_to<numeric>(a.coeff(x, i)).to_cl_N()));
+               if ( aa > maxcoeff ) maxcoeff = aa;
+               coeff = coeff + square(aa);
+       }
+       cl_I coeffnorm = ceiling1(the<cl_R>(cln::sqrt(coeff)));
+       cl_I B = coeffnorm * expt_pos(cl_I(2), cl_I(maxdeg));
+       return ( B > maxcoeff ) ? B : maxcoeff;
+}
 
-       // calc bound B
-       cl_R maxcoeff;
+static inline cl_I calc_bound(const upoly& a, int maxdeg)
+{
+       cl_I maxcoeff = 0;
+       cl_R coeff = 0;
        for ( int i=degree(a); i>=0; --i ) {
-               maxcoeff = maxcoeff + square(abs(a[i]));
+               cl_I aa = abs(a[i]);
+               if ( aa > maxcoeff ) maxcoeff = aa;
+               coeff = coeff + square(aa);
        }
-       cl_I normmc = ceiling1(the<cl_R>(cln::sqrt(maxcoeff)));
-       cl_I maxdegree = (degree(u1_) > degree(w1_)) ? degree(u1_) : degree(w1_);
-       cl_I B = normmc * expt_pos(cl_I(2), maxdegree);
+       cl_I coeffnorm = ceiling1(the<cl_R>(cln::sqrt(coeff)));
+       cl_I B = coeffnorm * expt_pos(cl_I(2), cl_I(maxdeg));
+       return ( B > maxcoeff ) ? B : maxcoeff;
+}
+
+static void hensel_univar(const upoly& a_, unsigned int p, const umodpoly& u1_, const umodpoly& w1_, upoly& u, upoly& w)
+{
+       upoly a = a_;
+       const cl_modint_ring& R = u1_[0].ring();
+
+       // calc bound B
+       int maxdeg = (degree(u1_) > degree(w1_)) ? degree(u1_) : degree(w1_);
+       cl_I maxmodulus = 2*calc_bound(a, maxdeg);
 
        // step 1
        cl_I alpha = lcoeff(a);
-       cl_I gamma = the<cl_I>(ex_to<numeric>(gamma_).to_cl_N());
-       if ( gamma == 0 ) {
-               gamma = alpha;
-       }
-       cl_I gamma_ui = abs(gamma);
-       a = a * gamma;
+       a = a * alpha;
        umodpoly nu1 = u1_;
        normalize_in_field(nu1);
        umodpoly nw1 = w1_;
        normalize_in_field(nw1);
        upoly phi;
-       phi = umodpoly_to_upoly(nu1) * gamma;
+       phi = umodpoly_to_upoly(nu1) * alpha;
        umodpoly u1;
        umodpoly_from_upoly(u1, phi, R);
        phi = umodpoly_to_upoly(nw1) * alpha;
@@ -1142,11 +1140,10 @@ static ex hensel_univar(const ex& a_, const ex& x, unsigned int p, const umodpol
        exteuclid(u1, w1, s, t);
 
        // step 3
-       upoly u = replace_lc(umodpoly_to_upoly(u1), gamma);
-       upoly w = replace_lc(umodpoly_to_upoly(w1), alpha);
+       u = replace_lc(umodpoly_to_upoly(u1), alpha);
+       w = replace_lc(umodpoly_to_upoly(w1), alpha);
        upoly e = a - u * w;
        cl_I modulus = p;
-       const cl_I maxmodulus = 2*B*gamma_ui;
 
        // step 4
        while ( !e.empty() && modulus < maxmodulus ) {
@@ -1171,15 +1168,21 @@ static ex hensel_univar(const ex& a_, const ex& x, unsigned int p, const umodpol
 
        // step 5
        if ( e.empty() ) {
-               ex ue = upoly_to_ex(u, x);
-               ex we = upoly_to_ex(w, x);
-               ex delta = ue.content(x);
-               ue = ue / delta;
-               we = we / numeric(gamma) * delta;
-               return lst(ue, we);
+               cl_I g = u[0];
+               for ( size_t i=1; i<u.size(); ++i ) {
+                       g = gcd(g, u[i]);
+                       if ( g == 1 ) break;
+               }
+               if ( g != 1 ) {
+                       u = u / g;
+                       w = w * g;
+               }
+               if ( alpha != 1 ) {
+                       w = w / alpha;
+               }
        }
        else {
-               return lst();
+               u.clear();
        }
 }
 
@@ -1213,93 +1216,156 @@ static unsigned int next_prime(unsigned int p)
        throw logic_error("next_prime: should not reach this point!");
 }
 
-class Partition
+class factor_partition
 {
 public:
-       Partition(size_t n_) : n(n_)
+       factor_partition(const upvec& factors_) : factors(factors_)
        {
-               k.resize(n, 1);
-               k[0] = 0;
-               sum = n-1;
+               n = factors.size();
+               k.resize(n, 0);
+               k[0] = 1;
+               cache.resize(n-1);
+               one.resize(1, factors.front()[0].ring()->one());
+               len = 1;
+               last = 0;
+               split();
        }
        int operator[](size_t i) const { return k[i]; }
        size_t size() const { return n; }
-       size_t size_first() const { return n-sum; }
-       size_t size_second() const { return sum; }
+       size_t size_left() const { return n-len; }
+       size_t size_right() const { return len; }
 #ifdef DEBUGFACTOR
-       void get() const
-       {
-               for ( size_t i=0; i<k.size(); ++i ) {
-                       cout << k[i] << " ";
-               }
-               cout << endl;
-       }
+       void get() const { DCOUTVAR(k); }
 #endif
        bool next()
        {
-               for ( size_t i=n-1; i>=1; --i ) {
-                       if ( k[i] ) {
-                               --k[i];
-                               --sum;
-                               return sum > 0;
+               if ( last == n-1 ) {
+                       int rem = len - 1;
+                       int p = last - 1;
+                       while ( rem ) {
+                               if ( k[p] ) {
+                                       --rem;
+                                       --p;
+                                       continue;
+                               }
+                               last = p - 1;
+                               while ( k[last] == 0 ) { --last; }
+                               if ( last == 0 && n == 2*len ) return false;
+                               k[last++] = 0;
+                               for ( size_t i=0; i<=len-rem; ++i ) {
+                                       k[last] = 1;
+                                       ++last;
+                               }
+                               fill(k.begin()+last, k.end(), 0);
+                               --last;
+                               split();
+                               return true;
                        }
-                       ++k[i];
-                       ++sum;
+                       last = len;
+                       ++len;
+                       if ( len > n/2 ) return false;
+                       fill(k.begin(), k.begin()+len, 1);
+                       fill(k.begin()+len+1, k.end(), 0);
                }
-               return false;
+               else {
+                       k[last++] = 0;
+                       k[last] = 1;
+               }
+               split();
+               return true;
        }
+       umodpoly& left() { return lr[0]; }
+       umodpoly& right() { return lr[1]; }
 private:
-       size_t n, sum;
-       vector<int> k;
-};
-
-static void split(const upvec& factors, const Partition& part, umodpoly& a, umodpoly& b)
-{
-       umodpoly one(1, factors.front()[0].ring()->one());
-       a = one;
-       b = one;
-       for ( size_t i=0; i<part.size(); ++i ) {
-               if ( part[i] ) {
-                       b = b * factors[i];
+       void split_cached()
+       {
+               size_t i = 0;
+               do {
+                       size_t pos = i;
+                       int group = k[i++];
+                       size_t d = 0;
+                       while ( i < n && k[i] == group ) { ++d; ++i; }
+                       if ( d ) {
+                               if ( cache[pos].size() >= d ) {
+                                       lr[group] = lr[group] * cache[pos][d-1];
+                               }
+                               else {
+                                       if ( cache[pos].size() == 0 ) {
+                                               cache[pos].push_back(factors[pos] * factors[pos+1]);
+                                       }
+                                       size_t j = pos + cache[pos].size() + 1;
+                                       d -= cache[pos].size();
+                                       while ( d ) {
+                                               umodpoly buf = cache[pos].back() * factors[j];
+                                               cache[pos].push_back(buf);
+                                               --d;
+                                               ++j;
+                                       }
+                                       lr[group] = lr[group] * cache[pos].back();
+                               }
+                       }
+                       else {
+                               lr[group] = lr[group] * factors[pos];
+                       }
+               } while ( i < n );
+       }
+       void split()
+       {
+               lr[0] = one;
+               lr[1] = one;
+               if ( n > 6 ) {
+                       split_cached();
                }
                else {
-                       a = a * factors[i];
+                       for ( size_t i=0; i<n; ++i ) {
+                               lr[k[i]] = lr[k[i]] * factors[i];
+                       }
                }
        }
-}
+private:
+       umodpoly lr[2];
+       vector< vector<umodpoly> > cache;
+       upvec factors;
+       umodpoly one;
+       size_t n;
+       size_t len;
+       size_t last;
+       vector<int> k;
+};
 
 struct ModFactors
 {
-       ex poly;
+       upoly poly;
        upvec factors;
 };
 
-static ex factor_univariate(const ex& poly, const ex& x)
+static ex factor_univariate(const ex& poly, const ex& x, unsigned int& prime)
 {
-       ex unit, cont, prim;
-       poly.unitcontprim(x, unit, cont, prim);
+       ex unit, cont, prim_ex;
+       poly.unitcontprim(x, unit, cont, prim_ex);
+       upoly prim;
+       upoly_from_ex(prim, prim_ex, x);
 
        // determine proper prime and minimize number of modular factors
-       unsigned int p = 3, lastp = 3;
+       prime = 3;
+       unsigned int lastp = prime;
        cl_modint_ring R;
        unsigned int trials = 0;
        unsigned int minfactors = 0;
-       numeric lcoeff = ex_to<numeric>(prim.lcoeff(x));
+       cl_I lc = lcoeff(prim) * the<cl_I>(ex_to<numeric>(cont).to_cl_N());
        upvec factors;
        while ( trials < 2 ) {
+               umodpoly modpoly;
                while ( true ) {
-                       p = next_prime(p);
-                       if ( irem(lcoeff, p) != 0 ) {
-                               R = find_modint_ring(p);
-                               umodpoly modpoly;
-                               umodpoly_from_ex(modpoly, prim, x, R);
+                       prime = next_prime(prime);
+                       if ( !zerop(rem(lc, prime)) ) {
+                               R = find_modint_ring(prime);
+                               umodpoly_from_upoly(modpoly, prim, R);
                                if ( squarefree(modpoly) ) break;
                        }
                }
 
                // do modular factorization
-               umodpoly modpoly;
-               umodpoly_from_ex(modpoly, prim, x, R);
                upvec trialfactors;
                factor_modular(modpoly, trialfactors);
                if ( trialfactors.size() <= 1 ) {
@@ -1309,17 +1375,16 @@ static ex factor_univariate(const ex& poly, const ex& x)
 
                if ( minfactors == 0 || trialfactors.size() < minfactors ) {
                        factors = trialfactors;
-                       minfactors = factors.size();
-                       lastp = p;
+                       minfactors = trialfactors.size();
+                       lastp = prime;
                        trials = 1;
                }
                else {
                        ++trials;
                }
        }
-       p = lastp;
-       R = find_modint_ring(p);
-       cl_univpoly_modint_ring UPR = find_univpoly_ring(R);
+       prime = lastp;
+       R = find_modint_ring(prime);
 
        // lift all factor combinations
        stack<ModFactors> tocheck;
@@ -1327,24 +1392,22 @@ static ex factor_univariate(const ex& poly, const ex& x)
        mf.poly = prim;
        mf.factors = factors;
        tocheck.push(mf);
+       upoly f1, f2;
        ex result = 1;
        while ( tocheck.size() ) {
                const size_t n = tocheck.top().factors.size();
-               Partition part(n);
+               factor_partition part(tocheck.top().factors);
                while ( true ) {
-                       umodpoly a, b;
-                       split(tocheck.top().factors, part, a, b);
-
-                       ex answer = hensel_univar(tocheck.top().poly, x, p, a, b);
-                       if ( answer != lst() ) {
-                               if ( part.size_first() == 1 ) {
-                                       if ( part.size_second() == 1 ) {
-                                               result *= answer.op(0) * answer.op(1);
+                       hensel_univar(tocheck.top().poly, prime, part.left(), part.right(), f1, f2);
+                       if ( !f1.empty() ) {
+                               if ( part.size_left() == 1 ) {
+                                       if ( part.size_right() == 1 ) {
+                                               result *= upoly_to_ex(f1, x) * upoly_to_ex(f2, x);
                                                tocheck.pop();
                                                break;
                                        }
-                                       result *= answer.op(0);
-                                       tocheck.top().poly = answer.op(1);
+                                       result *= upoly_to_ex(f1, x);
+                                       tocheck.top().poly = f2;
                                        for ( size_t i=0; i<n; ++i ) {
                                                if ( part[i] == 0 ) {
                                                        tocheck.top().factors.erase(tocheck.top().factors.begin()+i);
@@ -1353,14 +1416,14 @@ static ex factor_univariate(const ex& poly, const ex& x)
                                        }
                                        break;
                                }
-                               else if ( part.size_second() == 1 ) {
-                                       if ( part.size_first() == 1 ) {
-                                               result *= answer.op(0) * answer.op(1);
+                               else if ( part.size_right() == 1 ) {
+                                       if ( part.size_left() == 1 ) {
+                                               result *= upoly_to_ex(f1, x) * upoly_to_ex(f2, x);
                                                tocheck.pop();
                                                break;
                                        }
-                                       result *= answer.op(1);
-                                       tocheck.top().poly = answer.op(0);
+                                       result *= upoly_to_ex(f2, x);
+                                       tocheck.top().poly = f1;
                                        for ( size_t i=0; i<n; ++i ) {
                                                if ( part[i] == 1 ) {
                                                        tocheck.top().factors.erase(tocheck.top().factors.begin()+i);
@@ -1370,7 +1433,7 @@ static ex factor_univariate(const ex& poly, const ex& x)
                                        break;
                                }
                                else {
-                                       upvec newfactors1(part.size_first()), newfactors2(part.size_second());
+                                       upvec newfactors1(part.size_left()), newfactors2(part.size_right());
                                        upvec::iterator i1 = newfactors1.begin(), i2 = newfactors2.begin();
                                        for ( size_t i=0; i<n; ++i ) {
                                                if ( part[i] ) {
@@ -1381,17 +1444,17 @@ static ex factor_univariate(const ex& poly, const ex& x)
                                                }
                                        }
                                        tocheck.top().factors = newfactors1;
-                                       tocheck.top().poly = answer.op(0);
+                                       tocheck.top().poly = f1;
                                        ModFactors mf;
                                        mf.factors = newfactors2;
-                                       mf.poly = answer.op(1);
+                                       mf.poly = f2;
                                        tocheck.push(mf);
                                        break;
                                }
                        }
                        else {
                                if ( !part.next() ) {
-                                       result *= tocheck.top().poly;
+                                       result *= upoly_to_ex(tocheck.top().poly, x);
                                        tocheck.pop();
                                        break;
                                }
@@ -1402,6 +1465,12 @@ static ex factor_univariate(const ex& poly, const ex& x)
        return unit * cont * result;
 }
 
+static inline ex factor_univariate(const ex& poly, const ex& x)
+{
+       unsigned int prime;
+       return factor_univariate(poly, x, prime);
+}
+
 struct EvalPoint
 {
        ex x;
@@ -1409,9 +1478,9 @@ struct EvalPoint
 };
 
 // forward declaration
-vector<ex> multivar_diophant(const vector<ex>& a_, const ex& x, const ex& c, const vector<EvalPoint>& I, unsigned int d, unsigned int p, unsigned int k);
+static vector<ex> multivar_diophant(const vector<ex>& a_, const ex& x, const ex& c, const vector<EvalPoint>& I, unsigned int d, unsigned int p, unsigned int k);
 
-upvec multiterm_eea_lift(const upvec& a, const ex& x, unsigned int p, unsigned int k)
+static upvec multiterm_eea_lift(const upvec& a, const ex& x, unsigned int p, unsigned int k)
 {
        const size_t r = a.size();
        cl_modint_ring R = find_modint_ring(expt_pos(cl_I(p),k));
@@ -1442,7 +1511,7 @@ upvec multiterm_eea_lift(const upvec& a, const ex& x, unsigned int p, unsigned i
 /**
  *  Assert: a not empty.
  */
-void change_modulus(const cl_modint_ring& R, umodpoly& a)
+static void change_modulus(const cl_modint_ring& R, umodpoly& a)
 {
        if ( a.empty() ) return;
        cl_modint_ring oldR = a[0].ring();
@@ -1453,7 +1522,7 @@ void change_modulus(const cl_modint_ring& R, umodpoly& a)
        canonicalize(a);
 }
 
-void eea_lift(const umodpoly& a, const umodpoly& b, const ex& x, unsigned int p, unsigned int k, umodpoly& s_, umodpoly& t_)
+static void eea_lift(const umodpoly& a, const umodpoly& b, const ex& x, unsigned int p, unsigned int k, umodpoly& s_, umodpoly& t_)
 {
        cl_modint_ring R = find_modint_ring(p);
        umodpoly amod = a;
@@ -1496,7 +1565,7 @@ void eea_lift(const umodpoly& a, const umodpoly& b, const ex& x, unsigned int p,
        s_ = s; t_ = t;
 }
 
-upvec univar_diophant(const upvec& a, const ex& x, unsigned int m, unsigned int p, unsigned int k)
+static upvec univar_diophant(const upvec& a, const ex& x, unsigned int m, unsigned int p, unsigned int k)
 {
        cl_modint_ring R = find_modint_ring(expt_pos(cl_I(p),k));
 
@@ -1505,29 +1574,22 @@ upvec univar_diophant(const upvec& a, const ex& x, unsigned int m, unsigned int
        if ( r > 2 ) {
                upvec s = multiterm_eea_lift(a, x, p, k);
                for ( size_t j=0; j<r; ++j ) {
-                       ex phi = expand(pow(x,m) * umodpoly_to_ex(s[j], x));
-                       umodpoly bmod;
-                       umodpoly_from_ex(bmod, phi, x, R);
+                       umodpoly bmod = umodpoly_to_umodpoly(s[j], R, m);
                        umodpoly buf;
                        rem(bmod, a[j], buf);
                        result.push_back(buf);
                }
        }
        else {
-               umodpoly s;
-               umodpoly t;
+               umodpoly s, t;
                eea_lift(a[1], a[0], x, p, k, s, t);
-               ex phi = expand(pow(x,m) * umodpoly_to_ex(s, x));
-               umodpoly bmod;
-               umodpoly_from_ex(bmod, phi, x, R);
+               umodpoly bmod = umodpoly_to_umodpoly(s, R, m);
                umodpoly buf, q;
                remdiv(bmod, a[0], buf, q);
                result.push_back(buf);
-               phi = expand(pow(x,m) * umodpoly_to_ex(t, x));
-               umodpoly t1mod;
-               umodpoly_from_ex(t1mod, phi, x, R);
-               umodpoly buf2 = t1mod + q * a[1];
-               result.push_back(buf2);
+               umodpoly t1mod = umodpoly_to_umodpoly(t, R, m);
+               buf = t1mod + q * a[1];
+               result.push_back(buf);
        }
 
        return result;
@@ -1563,7 +1625,8 @@ static ex make_modular(const ex& e, const cl_modint_ring& R)
        return map(e.expand());
 }
 
-vector<ex> multivar_diophant(const vector<ex>& a_, const ex& x, const ex& c, const vector<EvalPoint>& I, unsigned int d, unsigned int p, unsigned int k)
+static vector<ex> multivar_diophant(const vector<ex>& a_, const ex& x, const ex& c, const vector<EvalPoint>& I,
+                                    unsigned int d, unsigned int p, unsigned int k)
 {
        vector<ex> a = a_;
 
@@ -1601,22 +1664,20 @@ vector<ex> multivar_diophant(const vector<ex>& a_, const ex& x, const ex& c, con
                ex e = make_modular(buf, R);
 
                ex monomial = 1;
-               for ( size_t m=1; m<=d; ++m ) {
-                       while ( !e.is_zero() && e.has(xnu) ) {
-                               monomial *= (xnu - alphanu);
-                               monomial = expand(monomial);
-                               ex cm = e.diff(ex_to<symbol>(xnu), m).subs(xnu==alphanu) / factorial(m);
-                               cm = make_modular(cm, R);
-                               if ( !cm.is_zero() ) {
-                                       vector<ex> delta_s = multivar_diophant(anew, x, cm, Inew, d, p, k);
-                                       ex buf = e;
-                                       for ( size_t j=0; j<delta_s.size(); ++j ) {
-                                               delta_s[j] *= monomial;
-                                               sigma[j] += delta_s[j];
-                                               buf -= delta_s[j] * b[j];
-                                       }
-                                       e = make_modular(buf, R);
+               for ( size_t m=1; !e.is_zero() && e.has(xnu) && m<=d; ++m ) {
+                       monomial *= (xnu - alphanu);
+                       monomial = expand(monomial);
+                       ex cm = e.diff(ex_to<symbol>(xnu), m).subs(xnu==alphanu) / factorial(m);
+                       cm = make_modular(cm, R);
+                       if ( !cm.is_zero() ) {
+                               vector<ex> delta_s = multivar_diophant(anew, x, cm, Inew, d, p, k);
+                               ex buf = e;
+                               for ( size_t j=0; j<delta_s.size(); ++j ) {
+                                       delta_s[j] *= monomial;
+                                       sigma[j] += delta_s[j];
+                                       buf -= delta_s[j] * b[j];
                                }
+                               e = make_modular(buf, R);
                        }
                }
        }
@@ -1676,7 +1737,7 @@ ostream& operator<<(ostream& o, const vector<EvalPoint>& v)
 }
 #endif // def DEBUGFACTOR
 
-ex hensel_multivar(const ex& a, const ex& x, const vector<EvalPoint>& I, unsigned int p, const cl_I& l, const upvec& u, const vector<ex>& lcU)
+static ex hensel_multivar(const ex& a, const ex& x, const vector<EvalPoint>& I, unsigned int p, const cl_I& l, const upvec& u, const vector<ex>& lcU)
 {
        const size_t nu = I.size() + 1;
        const cl_modint_ring R = find_modint_ring(expt_pos(cl_I(p),l));
@@ -1783,13 +1844,11 @@ static ex put_factors_into_lst(const ex& e)
        if ( is_a<power>(e) ) {
                result.append(1);
                result.append(e.op(0));
-               result.append(e.op(1));
                return result;
        }
        if ( is_a<symbol>(e) || is_a<add>(e) ) {
                result.append(1);
                result.append(e);
-               result.append(1);
                return result;
        }
        if ( is_a<mul>(e) ) {
@@ -1801,11 +1860,9 @@ static ex put_factors_into_lst(const ex& e)
                        }
                        if ( is_a<power>(op) ) {
                                result.append(op.op(0));
-                               result.append(op.op(1));
                        }
                        if ( is_a<symbol>(op) || is_a<add>(op) ) {
                                result.append(op);
-                               result.append(1);
                        }
                }
                result.prepend(nfac);
@@ -1824,15 +1881,18 @@ ostream& operator<<(ostream& o, const vector<numeric>& v)
 }
 #endif // def DEBUGFACTOR
 
-static bool checkdivisors(const lst& f, vector<numeric>& d)
+/** Checks whether in a set of numbers each has a unique prime factor.
+ *
+ *  @param[in]  f  list of numbers to check
+ *  @return        true: if number set is bad, false: otherwise
+ */
+static bool checkdivisors(const lst& f)
 {
-       const int k = f.nops()-2;
+       const int k = f.nops();
        numeric q, r;
-       d[0] = ex_to<numeric>(f.op(0) * f.op(f.nops()-1));
-       if ( d[0] == 1 && k == 1 && abs(f.op(1)) != 1 ) {
-               return false;
-       }
-       for ( int i=1; i<=k; ++i ) {
+       vector<numeric> d(k);
+       d[0] = ex_to<numeric>(abs(f.op(0)));
+       for ( int i=1; i<k; ++i ) {
                q = ex_to<numeric>(abs(f.op(i)));
                for ( int j=i-1; j>=0; --j ) {
                        r = d[j];
@@ -1849,13 +1909,30 @@ static bool checkdivisors(const lst& f, vector<numeric>& d)
        return false;
 }
 
-static bool generate_set(const ex& u, const ex& vn, const exset& syms, const ex& f, const numeric& modulus, vector<numeric>& a, vector<numeric>& d)
+/** Generates a set of evaluation points for a multivariate polynomial.
+ *  The set fulfills the following conditions:
+ *  1. lcoeff(evaluated_polynomial) does not vanish
+ *  2. factors of lcoeff(evaluated_polynomial) have each a unique prime factor
+ *  3. evaluated_polynomial is square free
+ *  See [W1] for more details.
+ *
+ *  @param[in]     u        multivariate polynomial to be factored
+ *  @param[in]     vn       leading coefficient of u in x (x==first symbol in syms)
+ *  @param[in]     syms     set of symbols that appear in u
+ *  @param[in]     f        lst containing the factors of the leading coefficient vn
+ *  @param[in,out] modulus  integer modulus for random number generation (i.e. |a_i| < modulus)
+ *  @param[out]    u0       returns the evaluated (univariate) polynomial
+ *  @param[out]    a        returns the valid evaluation points. must have initial size equal
+ *                          number of symbols-1 before calling generate_set
+ */
+static void generate_set(const ex& u, const ex& vn, const exset& syms, const lst& f,
+                         numeric& modulus, ex& u0, vector<numeric>& a)
 {
-       // computation of d is actually not necessary
        const ex& x = *syms.begin();
-       bool trying = true;
-       do {
-               ex u0 = u;
+       while ( true ) {
+               ++modulus;
+               /* generate a set of integers ... */
+               u0 = u;
                ex vna = vn;
                ex vnatry;
                exset::const_iterator s = syms.begin();
@@ -1864,71 +1941,64 @@ static bool generate_set(const ex& u, const ex& vn, const exset& syms, const ex&
                        do {
                                a[i] = mod(numeric(rand()), 2*modulus) - modulus;
                                vnatry = vna.subs(*s == a[i]);
+                               /* ... for which the leading coefficient doesn't vanish ... */
                        } while ( vnatry == 0 );
                        vna = vnatry;
                        u0 = u0.subs(*s == a[i]);
                        ++s;
                }
-               if ( gcd(u0,u0.diff(ex_to<symbol>(x))) != 1 ) {
+               /* ... for which u0 is square free ... */
+               ex g = gcd(u0, u0.diff(ex_to<symbol>(x)));
+               if ( !is_a<numeric>(g) ) {
                        continue;
                }
-               if ( is_a<numeric>(vn) ) {
-                       trying = false;
-               }
-               else {
-                       lst fnum;
-                       lst::const_iterator i = ex_to<lst>(f).begin();
-                       fnum.append(*i++);
-                       bool problem = false;
-                       while ( i!=ex_to<lst>(f).end() ) {
-                               ex fs = *i;
-                               if ( !is_a<numeric>(fs) ) {
+               if ( !is_a<numeric>(vn) ) {
+                       /* ... and for which the evaluated factors have each an unique prime factor */
+                       lst fnum = f;
+                       fnum.let_op(0) = fnum.op(0) * u0.content(x);
+                       for ( size_t i=1; i<fnum.nops(); ++i ) {
+                               if ( !is_a<numeric>(fnum.op(i)) ) {
                                        s = syms.begin();
                                        ++s;
-                                       for ( size_t j=0; j<a.size(); ++j ) {
-                                               fs = fs.subs(*s == a[j]);
-                                               ++s;
-                                       }
-                                       if ( abs(fs) == 1 ) {
-                                               problem = true;
-                                               break;
+                                       for ( size_t j=0; j<a.size(); ++j, ++s ) {
+                                               fnum.let_op(i) = fnum.op(i).subs(*s == a[j]);
                                        }
                                }
-                               fnum.append(fs);
-                               ++i; ++i;
                        }
-                       if ( problem ) {
-                               return true;
+                       if ( checkdivisors(fnum) ) {
+                               continue;
                        }
-                       ex con = u0.content(x);
-                       fnum.append(con);
-                       trying = checkdivisors(fnum, d);
                }
-       } while ( trying );
-       return false;
+               /* ok, we have a valid set now */
+               return;
+       }
 }
 
+// forward declaration
+static ex factor_sqrfree(const ex& poly);
+
+/**
+ *  ASSERT: poly is expanded
+ */
 static ex factor_multivariate(const ex& poly, const exset& syms)
 {
        exset::const_iterator s;
        const ex& x = *syms.begin();
 
        /* make polynomial primitive */
-       ex p = poly.expand().collect(x);
+       ex p = poly.collect(x);
        ex cont = p.lcoeff(x);
-       for ( numeric i=p.degree(x)-1; i>=p.ldegree(x); --i ) {
-               cont = gcd(cont, p.coeff(x,ex_to<numeric>(i).to_int()));
+       for ( int i=p.degree(x)-1; i>=p.ldegree(x); --i ) {
+               cont = gcd(cont, p.coeff(x,i));
                if ( cont == 1 ) break;
        }
        ex pp = expand(normal(p / cont));
        if ( !is_a<numeric>(cont) ) {
-               return factor(cont) * factor(pp);
+               return factor_sqrfree(cont) * factor_sqrfree(pp);
        }
 
        /* factor leading coefficient */
-       pp = pp.collect(x);
-       ex vn = pp.lcoeff(x);
-       pp = pp.expand();
+       ex vn = pp.collect(x).lcoeff(x);
        ex vnlst;
        if ( is_a<numeric>(vn) ) {
                vnlst = lst(vn);
@@ -1938,200 +2008,131 @@ static ex factor_multivariate(const ex& poly, const exset& syms)
                vnlst = put_factors_into_lst(vnfactors);
        }
 
-       const numeric maxtrials = 3;
-       numeric modulus = (vnlst.nops()-1 > 3) ? vnlst.nops()-1 : 3;
-       numeric minimalr = -1;
+       const unsigned int maxtrials = 3;
+       numeric modulus = (vnlst.nops() > 3) ? vnlst.nops() : 3;
        vector<numeric> a(syms.size()-1, 0);
-       vector<numeric> d((vnlst.nops()-1)/2+1, 0);
 
+       /* try now to factorize until we are successful */
        while ( true ) {
-               numeric trialcount = 0;
+
+               unsigned int trialcount = 0;
+               unsigned int prime;
+               int factor_count = 0;
+               int min_factor_count = -1;
                ex u, delta;
-               unsigned int prime = 3;
-               size_t factor_count = 0;
-               ex ufac;
-               ex ufaclst;
+               ex ufac, ufaclst;
+
+               /* try several evaluation points to reduce the number of modular factors */
                while ( trialcount < maxtrials ) {
-                       bool problem = generate_set(pp, vn, syms, vnlst, modulus, a, d);
-                       if ( problem ) {
-                               ++modulus;
-                               continue;
-                       }
-                       u = pp;
-                       s = syms.begin();
-                       ++s;
-                       for ( size_t i=0; i<a.size(); ++i ) {
-                               u = u.subs(*s == a[i]);
-                               ++s;
-                       }
-                       delta = u.content(x);
-
-                       // determine proper prime
-                       prime = 3;
-                       cl_modint_ring R = find_modint_ring(prime);
-                       while ( true ) {
-                               if ( irem(ex_to<numeric>(u.lcoeff(x)), prime) != 0 ) {
-                                       umodpoly modpoly;
-                                       umodpoly_from_ex(modpoly, u, x, R);
-                                       if ( squarefree(modpoly) ) break;
-                               }
-                               prime = next_prime(prime);
-                               R = find_modint_ring(prime);
-                       }
 
-                       ufac = factor(u);
+                       /* generate a set of valid evaluation points */
+                       generate_set(pp, vn, syms, ex_to<lst>(vnlst), modulus, u, a);
+
+                       ufac = factor_univariate(u, x, prime);
                        ufaclst = put_factors_into_lst(ufac);
-                       factor_count = (ufaclst.nops()-1)/2;
-
-                       // veto factorization for which gcd(u_i, u_j) != 1 for all i,j
-                       upvec tryu;
-                       for ( size_t i=0; i<(ufaclst.nops()-1)/2; ++i ) {
-                               umodpoly newu;
-                               umodpoly_from_ex(newu, ufaclst.op(i*2+1), x, R);
-                               tryu.push_back(newu);
-                       }
-                       bool veto = false;
-                       for ( size_t i=0; i<tryu.size()-1; ++i ) {
-                               for ( size_t j=i+1; j<tryu.size(); ++j ) {
-                                       umodpoly tryg;
-                                       gcd(tryu[i], tryu[j], tryg);
-                                       if ( unequal_one(tryg) ) {
-                                               veto = true;
-                                               goto escape_quickly;
-                                       }
-                               }
-                       }
-                       escape_quickly: ;
-                       if ( veto ) {
-                               continue;
-                       }
+                       factor_count = ufaclst.nops()-1;
+                       delta = ufaclst.op(0);
 
                        if ( factor_count <= 1 ) {
+                               /* irreducible */
                                return poly;
                        }
-
-                       if ( minimalr < 0 ) {
-                               minimalr = factor_count;
+                       if ( min_factor_count < 0 ) {
+                               /* first time here */
+                               min_factor_count = factor_count;
                        }
-                       else if ( minimalr == factor_count ) {
+                       else if ( min_factor_count == factor_count ) {
+                               /* one less to try */
                                ++trialcount;
-                               ++modulus;
                        }
-                       else if ( minimalr > factor_count ) {
-                               minimalr = factor_count;
+                       else if ( min_factor_count > factor_count ) {
+                               /* new minimum, reset trial counter */
+                               min_factor_count = factor_count;
                                trialcount = 0;
                        }
-                       if ( minimalr <= 1 ) {
-                               return poly;
-                       }
                }
 
-               vector<numeric> ftilde((vnlst.nops()-1)/2+1);
-               ftilde[0] = ex_to<numeric>(vnlst.op(0));
-               for ( size_t i=1; i<ftilde.size(); ++i ) {
-                       ex ft = vnlst.op((i-1)*2+1);
-                       s = syms.begin();
-                       ++s;
-                       for ( size_t j=0; j<a.size(); ++j ) {
-                               ft = ft.subs(*s == a[j]);
-                               ++s;
+               // determine true leading coefficients for the Hensel lifting
+               vector<ex> C(factor_count);
+               if ( is_a<numeric>(vn) ) {
+                       for ( size_t i=1; i<ufaclst.nops(); ++i ) {
+                               C[i-1] = ufaclst.op(i).lcoeff(x);
                        }
-                       ftilde[i] = ex_to<numeric>(ft);
                }
+               else {
+                       vector<numeric> ftilde(vnlst.nops()-1);
+                       for ( size_t i=0; i<ftilde.size(); ++i ) {
+                               ex ft = vnlst.op(i+1);
+                               s = syms.begin();
+                               ++s;
+                               for ( size_t j=0; j<a.size(); ++j ) {
+                                       ft = ft.subs(*s == a[j]);
+                                       ++s;
+                               }
+                               ftilde[i] = ex_to<numeric>(ft);
+                       }
 
-               vector<bool> used_flag((vnlst.nops()-1)/2+1, false);
-               vector<ex> D(factor_count, 1);
-               for ( size_t i=0; i<=factor_count; ++i ) {
-                       numeric prefac;
-                       if ( i == 0 ) {
-                               prefac = ex_to<numeric>(ufaclst.op(0));
-                               ftilde[0] = ftilde[0] / prefac;
-                               vnlst.let_op(0) = vnlst.op(0) / prefac;
-                               continue;
+                       vector<bool> used_flag(ftilde.size(), false);
+                       vector<ex> D(factor_count, 1);
+                       if ( delta == 1 ) {
+                               for ( int i=0; i<factor_count; ++i ) {
+                                       numeric prefac = ex_to<numeric>(ufaclst.op(i+1).lcoeff(x));
+                                       for ( int j=ftilde.size()-1; j>=0; --j ) {
+                                               int count = 0;
+                                               while ( irem(prefac, ftilde[j]) == 0 ) {
+                                                       prefac = iquo(prefac, ftilde[j]);
+                                                       ++count;
+                                               }
+                                               if ( count ) {
+                                                       used_flag[j] = true;
+                                                       D[i] = D[i] * pow(vnlst.op(j+1), count);
+                                               }
+                                       }
+                                       C[i] = D[i] * prefac;
+                               }
                        }
                        else {
-                               prefac = ex_to<numeric>(ufaclst.op(2*(i-1)+1).lcoeff(x));
-                       }
-                       for ( size_t j=(vnlst.nops()-1)/2+1; j>0; --j ) {
-                               if ( abs(ftilde[j-1]) == 1 ) {
-                                       used_flag[j-1] = true;
-                                       continue;
-                               }
-                               numeric g = gcd(prefac, ftilde[j-1]);
-                               if ( g != 1 ) {
-                                       prefac = prefac / g;
-                                       numeric count = abs(iquo(g, ftilde[j-1]));
-                                       used_flag[j-1] = true;
-                                       if ( i > 0 ) {
-                                               if ( j == 1 ) {
-                                                       D[i-1] = D[i-1] * pow(vnlst.op(0), count);
+                               for ( int i=0; i<factor_count; ++i ) {
+                                       numeric prefac = ex_to<numeric>(ufaclst.op(i+1).lcoeff(x));
+                                       for ( int j=ftilde.size()-1; j>=0; --j ) {
+                                               int count = 0;
+                                               while ( irem(prefac, ftilde[j]) == 0 ) {
+                                                       prefac = iquo(prefac, ftilde[j]);
+                                                       ++count;
                                                }
-                                               else {
-                                                       D[i-1] = D[i-1] * pow(vnlst.op(2*(j-2)+1), count);
+                                               while ( irem(ex_to<numeric>(delta)*prefac, ftilde[j]) == 0 ) {
+                                                       numeric g = gcd(prefac, ex_to<numeric>(ftilde[j]));
+                                                       prefac = iquo(prefac, g);
+                                                       delta = delta / (ftilde[j]/g);
+                                                       ufaclst.let_op(i+1) = ufaclst.op(i+1) * (ftilde[j]/g);
+                                                       ++count;
+                                               }
+                                               if ( count ) {
+                                                       used_flag[j] = true;
+                                                       D[i] = D[i] * pow(vnlst.op(j+1), count);
                                                }
                                        }
-                                       else {
-                                               ftilde[j-1] = ftilde[j-1] / prefac;
-                                               break;
-                                       }
-                                       ++j;
+                                       C[i] = D[i] * prefac;
                                }
                        }
-               }
 
-               bool some_factor_unused = false;
-               for ( size_t i=0; i<used_flag.size(); ++i ) {
-                       if ( !used_flag[i] ) {
-                               some_factor_unused = true;
-                               break;
-                       }
-               }
-               if ( some_factor_unused ) {
-                       continue;
-               }
-
-               vector<ex> C(factor_count);
-               if ( delta == 1 ) {
-                       for ( size_t i=0; i<D.size(); ++i ) {
-                               ex Dtilde = D[i];
-                               s = syms.begin();
-                               ++s;
-                               for ( size_t j=0; j<a.size(); ++j ) {
-                                       Dtilde = Dtilde.subs(*s == a[j]);
-                                       ++s;
+                       bool some_factor_unused = false;
+                       for ( size_t i=0; i<used_flag.size(); ++i ) {
+                               if ( !used_flag[i] ) {
+                                       some_factor_unused = true;
+                                       break;
                                }
-                               C[i] = D[i] * (ufaclst.op(2*i+1).lcoeff(x) / Dtilde);
                        }
-               }
-               else {
-                       for ( size_t i=0; i<D.size(); ++i ) {
-                               ex Dtilde = D[i];
-                               s = syms.begin();
-                               ++s;
-                               for ( size_t j=0; j<a.size(); ++j ) {
-                                       Dtilde = Dtilde.subs(*s == a[j]);
-                                       ++s;
-                               }
-                               ex ui;
-                               if ( i == 0 ) {
-                                       ui = ufaclst.op(0);
-                               }
-                               else {
-                                       ui = ufaclst.op(2*(i-1)+1);
-                               }
-                               while ( true ) {
-                                       ex d = gcd(ui.lcoeff(x), Dtilde);
-                                       C[i] = D[i] * ( ui.lcoeff(x) / d );
-                                       ui = ui * ( Dtilde[i] / d );
-                                       delta = delta / ( Dtilde[i] / d );
-                                       if ( delta == 1 ) break;
-                                       ui = delta * ui;
-                                       C[i] = delta * C[i];
-                                       pp = pp * pow(delta, D.size()-1);
-                               }
+                       if ( some_factor_unused ) {
+                               continue;
                        }
                }
 
+               if ( delta != 1 ) {
+                       C[0] = C[0] * delta;
+                       ufaclst.let_op(1) = ufaclst.op(1) * delta;
+               }
+
                EvalPoint ep;
                vector<EvalPoint> epv;
                s = syms.begin();
@@ -2142,37 +2143,29 @@ static ex factor_multivariate(const ex& poly, const exset& syms)
                        epv.push_back(ep);
                }
 
-               // calc bound B
-               ex maxcoeff;
-               for ( int i=u.degree(x); i>=u.ldegree(x); --i ) {
-                       maxcoeff += pow(abs(u.coeff(x, i)),2);
-               }
-               cl_I normmc = ceiling1(the<cl_R>(cln::sqrt(ex_to<numeric>(maxcoeff).to_cl_N())));
-               unsigned int maxdegree = 0;
-               for ( size_t i=0; i<factor_count; ++i ) {
-                       if ( ufaclst[2*i+1].degree(x) > (int)maxdegree ) {
-                               maxdegree = ufaclst[2*i+1].degree(x);
+               // calc bound p^l
+               int maxdeg = 0;
+               for ( int i=1; i<=factor_count; ++i ) {
+                       if ( ufaclst.op(i).degree(x) > maxdeg ) {
+                               maxdeg = ufaclst[i].degree(x);
                        }
                }
-               cl_I B = normmc * expt_pos(cl_I(2), maxdegree);
+               cl_I B = 2*calc_bound(u, x, maxdeg);
                cl_I l = 1;
                cl_I pl = prime;
                while ( pl < B ) {
                        l = l + 1;
                        pl = pl * prime;
                }
-
-               upvec uvec;
                cl_modint_ring R = find_modint_ring(expt_pos(cl_I(prime),l));
-               for ( size_t i=0; i<(ufaclst.nops()-1)/2; ++i ) {
-                       umodpoly newu;
-                       umodpoly_from_ex(newu, ufaclst.op(i*2+1), x, R);
-                       uvec.push_back(newu);
+               upvec modfactors(ufaclst.nops()-1);
+               for ( size_t i=1; i<ufaclst.nops(); ++i ) {
+                       umodpoly_from_ex(modfactors[i-1], ufaclst.op(i), x, R);
                }
 
-               ex res = hensel_multivar(ufaclst.op(0)*pp, x, epv, prime, l, uvec, C);
+               ex res = hensel_multivar(pp, x, epv, prime, l, modfactors, C);
                if ( res != lst() ) {
-                       ex result = cont * ufaclst.op(0);
+                       ex result = cont;
                        for ( size_t i=0; i<res.nops(); ++i ) {
                                result *= res.op(i).content(x) * res.op(i).unit(x);
                                result *= res.op(i).primpart(x);
@@ -2275,7 +2268,7 @@ ex factor(const ex& poly, unsigned options)
        }
 
        // make poly square free
-       ex sfpoly = sqrfree(poly, syms);
+       ex sfpoly = sqrfree(poly.expand(), syms);
 
        // factorize the square free components
        if ( is_a<power>(sfpoly) ) {