ginac/factor.cpp

   1 /** @file factor.cpp
   2  *
   3  *  Polynomial factorization (implementation).
   4  *
   5  *  The interface function factor() at the end of this file is defined in the
   6  *  GiNaC namespace. All other utility functions and classes are defined in an
   7  *  additional anonymous namespace.
   8  *
   9  *  Factorization starts by doing a square free factorization and making the
  10  *  coefficients integer. Then, depending on the number of free variables it
  11  *  proceeds either in dedicated univariate or multivariate factorization code.
  12  *
  13  *  Univariate factorization does a modular factorization via Berlekamp's
  14  *  algorithm and distinct degree factorization. Hensel lifting is used at the
  15  *  end.
  16  *
  17  *  Multivariate factorization uses the univariate factorization (applying a
  18  *  evaluation homomorphism first) and Hensel lifting raises the answer to the
  19  *  multivariate domain. The Hensel lifting code is completely distinct from the
  20  *  code used by the univariate factorization.
  21  *
  22  *  Algorithms used can be found in
  23  *    [Wan] An Improved Multivariate Polynomial Factoring Algorithm,
  24  *          P.S.Wang,
  25  *          Mathematics of Computation, Vol. 32, No. 144 (1978) 1215--1231.
  26  *    [GCL] Algorithms for Computer Algebra,
  27  *          K.O.Geddes, S.R.Czapor, G.Labahn,
  28  *          Springer Verlag, 1992.
  29  *    [Mig] Some Useful Bounds,
  30  *          M.Mignotte,
  31  *          In "Computer Algebra, Symbolic and Algebraic Computation" (B.Buchberger et al., eds.),
  32  *          pp. 259-263, Springer-Verlag, New York, 1982.
  33  */
  34
  35 /*
  36  *  GiNaC Copyright (C) 1999-2009 Johannes Gutenberg University Mainz, Germany
  37  *
  38  *  This program is free software; you can redistribute it and/or modify
  39  *  it under the terms of the GNU General Public License as published by
  40  *  the Free Software Foundation; either version 2 of the License, or
  41  *  (at your option) any later version.
  42  *
  43  *  This program is distributed in the hope that it will be useful,
  44  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  45  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  46  *  GNU General Public License for more details.
  47  *
  48  *  You should have received a copy of the GNU General Public License
  49  *  along with this program; if not, write to the Free Software
  50  *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
  51  */
  52
  53 //#define DEBUGFACTOR
  54
  55 #include "factor.h"
  56
  57 #include "ex.h"
  58 #include "numeric.h"
  59 #include "operators.h"
  60 #include "inifcns.h"
  61 #include "symbol.h"
  62 #include "relational.h"
  63 #include "power.h"
  64 #include "mul.h"
  65 #include "normal.h"
  66 #include "add.h"
  67
  68 #include <algorithm>
  69 #include <cmath>
  70 #include <limits>
  71 #include <list>
  72 #include <vector>
  73 #ifdef DEBUGFACTOR
  74 #include <ostream>
  75 #endif
  76 using namespace std;
  77
  78 #include <cln/cln.h>
  79 using namespace cln;
  80
  81 namespace GiNaC {
  82
  83 #ifdef DEBUGFACTOR
  84 #define DCOUT(str) cout << #str << endl
  85 #define DCOUTVAR(var) cout << #var << ": " << var << endl
  86 #define DCOUT2(str,var) cout << #str << ": " << var << endl
  87 ostream& operator<<(ostream& o, const vector<int>& v)
  88 {
  89         vector<int>::const_iterator i = v.begin(), end = v.end();
  90         while ( i != end ) {
  91                 o << *i++ << " ";
  92         }
  93         return o;
  94 }
  95 static ostream& operator<<(ostream& o, const vector<cl_I>& v)
  96 {
  97         vector<cl_I>::const_iterator i = v.begin(), end = v.end();
  98         while ( i != end ) {
  99                 o << *i << "[" << i-v.begin() << "]" << " ";
 100                 ++i;
 101         }
 102         return o;
 103 }
 104 static ostream& operator<<(ostream& o, const vector<cl_MI>& v)
 105 {
 106         vector<cl_MI>::const_iterator i = v.begin(), end = v.end();
 107         while ( i != end ) {
 108                 o << *i << "[" << i-v.begin() << "]" << " ";
 109                 ++i;
 110         }
 111         return o;
 112 }
 113 ostream& operator<<(ostream& o, const vector<numeric>& v)
 114 {
 115         for ( size_t i=0; i<v.size(); ++i ) {
 116                 o << v[i] << " ";
 117         }
 118         return o;
 119 }
 120 ostream& operator<<(ostream& o, const vector< vector<cl_MI> >& v)
 121 {
 122         vector< vector<cl_MI> >::const_iterator i = v.begin(), end = v.end();
 123         while ( i != end ) {
 124                 o << i-v.begin() << ": " << *i << endl;
 125                 ++i;
 126         }
 127         return o;
 128 }
 129 #else
 130 #define DCOUT(str)
 131 #define DCOUTVAR(var)
 132 #define DCOUT2(str,var)
 133 #endif // def DEBUGFACTOR
 134
 135 // anonymous namespace to hide all utility functions
 136 namespace {
 137
 138 ////////////////////////////////////////////////////////////////////////////////
 139 // modular univariate polynomial code
 140
 141 typedef std::vector<cln::cl_MI> umodpoly;
 142 typedef std::vector<cln::cl_I> upoly;
 143 typedef vector<umodpoly> upvec;
 144
 145 // COPY FROM UPOLY.HPP
 146
 147 // CHANGED size_t -> int !!!
 148 template<typename T> static int degree(const T& p)
 149 {
 150         return p.size() - 1;
 151 }
 152
 153 template<typename T> static typename T::value_type lcoeff(const T& p)
 154 {
 155         return p[p.size() - 1];
 156 }
 157
 158 static bool normalize_in_field(umodpoly& a)
 159 {
 160         if (a.size() == 0)
 161                 return true;
 162         if ( lcoeff(a) == a[0].ring()->one() ) {
 163                 return true;
 164         }
 165
 166         const cln::cl_MI lc_1 = recip(lcoeff(a));
 167         for (std::size_t k = a.size(); k-- != 0; )
 168                 a[k] = a[k]*lc_1;
 169         return false;
 170 }
 171
 172 template<typename T> static void
 173 canonicalize(T& p, const typename T::size_type hint = std::numeric_limits<typename T::size_type>::max())
 174 {
 175         if (p.empty())
 176                 return;
 177
 178         std::size_t i = p.size() - 1;
 179         // Be fast if the polynomial is already canonicalized
 180         if (!zerop(p[i]))
 181                 return;
 182
 183         if (hint < p.size())
 184                 i = hint;
 185
 186         bool is_zero = false;
 187         do {
 188                 if (!zerop(p[i])) {
 189                         ++i;
 190                         break;
 191                 }
 192                 if (i == 0) {
 193                         is_zero = true;
 194                         break;
 195                 }
 196                 --i;
 197         } while (true);
 198
 199         if (is_zero) {
 200                 p.clear();
 201                 return;
 202         }
 203
 204         p.erase(p.begin() + i, p.end());
 205 }
 206
 207 // END COPY FROM UPOLY.HPP
 208
 209 static void expt_pos(umodpoly& a, unsigned int q)
 210 {
 211         if ( a.empty() ) return;
 212         cl_MI zero = a[0].ring()->zero();
 213         int deg = degree(a);
 214         a.resize(degree(a)*q+1, zero);
 215         for ( int i=deg; i>0; --i ) {
 216                 a[i*q] = a[i];
 217                 a[i] = zero;
 218         }
 219 }
 220
 221 template<typename T>
 222 static T operator+(const T& a, const T& b)
 223 {
 224         int sa = a.size();
 225         int sb = b.size();
 226         if ( sa >= sb ) {
 227                 T r(sa);
 228                 int i = 0;
 229                 for ( ; i<sb; ++i ) {
 230                         r[i] = a[i] + b[i];
 231                 }
 232                 for ( ; i<sa; ++i ) {
 233                         r[i] = a[i];
 234                 }
 235                 canonicalize(r);
 236                 return r;
 237         }
 238         else {
 239                 T r(sb);
 240                 int i = 0;
 241                 for ( ; i<sa; ++i ) {
 242                         r[i] = a[i] + b[i];
 243                 }
 244                 for ( ; i<sb; ++i ) {
 245                         r[i] = b[i];
 246                 }
 247                 canonicalize(r);
 248                 return r;
 249         }
 250 }
 251
 252 template<typename T>
 253 static T operator-(const T& a, const T& b)
 254 {
 255         int sa = a.size();
 256         int sb = b.size();
 257         if ( sa >= sb ) {
 258                 T r(sa);
 259                 int i = 0;
 260                 for ( ; i<sb; ++i ) {
 261                         r[i] = a[i] - b[i];
 262                 }
 263                 for ( ; i<sa; ++i ) {
 264                         r[i] = a[i];
 265                 }
 266                 canonicalize(r);
 267                 return r;
 268         }
 269         else {
 270                 T r(sb);
 271                 int i = 0;
 272                 for ( ; i<sa; ++i ) {
 273                         r[i] = a[i] - b[i];
 274                 }
 275                 for ( ; i<sb; ++i ) {
 276                         r[i] = -b[i];
 277                 }
 278                 canonicalize(r);
 279                 return r;
 280         }
 281 }
 282
 283 static upoly operator*(const upoly& a, const upoly& b)
 284 {
 285         upoly c;
 286         if ( a.empty() || b.empty() ) return c;
 287
 288         int n = degree(a) + degree(b);
 289         c.resize(n+1, 0);
 290         for ( int i=0 ; i<=n; ++i ) {
 291                 for ( int j=0 ; j<=i; ++j ) {
 292                         if ( j > degree(a) || (i-j) > degree(b) ) continue;
 293                         c[i] = c[i] + a[j] * b[i-j];
 294                 }
 295         }
 296         canonicalize(c);
 297         return c;
 298 }
 299
 300 static umodpoly operator*(const umodpoly& a, const umodpoly& b)
 301 {
 302         umodpoly c;
 303         if ( a.empty() || b.empty() ) return c;
 304
 305         int n = degree(a) + degree(b);
 306         c.resize(n+1, a[0].ring()->zero());
 307         for ( int i=0 ; i<=n; ++i ) {
 308                 for ( int j=0 ; j<=i; ++j ) {
 309                         if ( j > degree(a) || (i-j) > degree(b) ) continue;
 310                         c[i] = c[i] + a[j] * b[i-j];
 311                 }
 312         }
 313         canonicalize(c);
 314         return c;
 315 }
 316
 317 static upoly operator*(const upoly& a, const cl_I& x)
 318 {
 319         if ( zerop(x) ) {
 320                 upoly r;
 321                 return r;
 322         }
 323         upoly r(a.size());
 324         for ( size_t i=0; i<a.size(); ++i ) {
 325                 r[i] = a[i] * x;
 326         }
 327         return r;
 328 }
 329
 330 static upoly operator/(const upoly& a, const cl_I& x)
 331 {
 332         if ( zerop(x) ) {
 333                 upoly r;
 334                 return r;
 335         }
 336         upoly r(a.size());
 337         for ( size_t i=0; i<a.size(); ++i ) {
 338                 r[i] = exquo(a[i],x);
 339         }
 340         return r;
 341 }
 342
 343 static umodpoly operator*(const umodpoly& a, const cl_MI& x)
 344 {
 345         umodpoly r(a.size());
 346         for ( size_t i=0; i<a.size(); ++i ) {
 347                 r[i] = a[i] * x;
 348         }
 349         canonicalize(r);
 350         return r;
 351 }
 352
 353 static void upoly_from_ex(upoly& up, const ex& e, const ex& x)
 354 {
 355         // assert: e is in Z[x]
 356         int deg = e.degree(x);
 357         up.resize(deg+1);
 358         int ldeg = e.ldegree(x);
 359         for ( ; deg>=ldeg; --deg ) {
 360                 up[deg] = the<cl_I>(ex_to<numeric>(e.coeff(x, deg)).to_cl_N());
 361         }
 362         for ( ; deg>=0; --deg ) {
 363                 up[deg] = 0;
 364         }
 365         canonicalize(up);
 366 }
 367
 368 static void umodpoly_from_upoly(umodpoly& ump, const upoly& e, const cl_modint_ring& R)
 369 {
 370         int deg = degree(e);
 371         ump.resize(deg+1);
 372         for ( ; deg>=0; --deg ) {
 373                 ump[deg] = R->canonhom(e[deg]);
 374         }
 375         canonicalize(ump);
 376 }
 377
 378 static void umodpoly_from_ex(umodpoly& ump, const ex& e, const ex& x, const cl_modint_ring& R)
 379 {
 380         // assert: e is in Z[x]
 381         int deg = e.degree(x);
 382         ump.resize(deg+1);
 383         int ldeg = e.ldegree(x);
 384         for ( ; deg>=ldeg; --deg ) {
 385                 cl_I coeff = the<cl_I>(ex_to<numeric>(e.coeff(x, deg)).to_cl_N());
 386                 ump[deg] = R->canonhom(coeff);
 387         }
 388         for ( ; deg>=0; --deg ) {
 389                 ump[deg] = R->zero();
 390         }
 391         canonicalize(ump);
 392 }
 393
 394 #ifdef DEBUGFACTOR
 395 static void umodpoly_from_ex(umodpoly& ump, const ex& e, const ex& x, const cl_I& modulus)
 396 {
 397         umodpoly_from_ex(ump, e, x, find_modint_ring(modulus));
 398 }
 399 #endif
 400
 401 static ex upoly_to_ex(const upoly& a, const ex& x)
 402 {
 403         if ( a.empty() ) return 0;
 404         ex e;
 405         for ( int i=degree(a); i>=0; --i ) {
 406                 e += numeric(a[i]) * pow(x, i);
 407         }
 408         return e;
 409 }
 410
 411 static ex umodpoly_to_ex(const umodpoly& a, const ex& x)
 412 {
 413         if ( a.empty() ) return 0;
 414         cl_modint_ring R = a[0].ring();
 415         cl_I mod = R->modulus;
 416         cl_I halfmod = (mod-1) >> 1;
 417         ex e;
 418         for ( int i=degree(a); i>=0; --i ) {
 419                 cl_I n = R->retract(a[i]);
 420                 if ( n > halfmod ) {
 421                         e += numeric(n-mod) * pow(x, i);
 422                 } else {
 423                         e += numeric(n) * pow(x, i);
 424                 }
 425         }
 426         return e;
 427 }
 428
 429 static upoly umodpoly_to_upoly(const umodpoly& a)
 430 {
 431         upoly e(a.size());
 432         if ( a.empty() ) return e;
 433         cl_modint_ring R = a[0].ring();
 434         cl_I mod = R->modulus;
 435         cl_I halfmod = (mod-1) >> 1;
 436         for ( int i=degree(a); i>=0; --i ) {
 437                 cl_I n = R->retract(a[i]);
 438                 if ( n > halfmod ) {
 439                         e[i] = n-mod;
 440                 } else {
 441                         e[i] = n;
 442                 }
 443         }
 444         return e;
 445 }
 446
 447 static umodpoly umodpoly_to_umodpoly(const umodpoly& a, const cl_modint_ring& R, unsigned int m)
 448 {
 449         umodpoly e;
 450         if ( a.empty() ) return e;
 451         cl_modint_ring oldR = a[0].ring();
 452         size_t sa = a.size();
 453         e.resize(sa+m, R->zero());
 454         for ( size_t i=0; i<sa; ++i ) {
 455                 e[i+m] = R->canonhom(oldR->retract(a[i]));
 456         }
 457         canonicalize(e);
 458         return e;
 459 }
 460
 461 /** Divides all coefficients of the polynomial a by the integer x.
 462  *  All coefficients are supposed to be divisible by x. If they are not, the
 463  *  the<cl_I> cast will raise an exception.
 464  *
 465  *  @param[in,out] a  polynomial of which the coefficients will be reduced by x
 466  *  @param[in]     x  integer that divides the coefficients
 467  */
 468 static void reduce_coeff(umodpoly& a, const cl_I& x)
 469 {
 470         if ( a.empty() ) return;
 471
 472         cl_modint_ring R = a[0].ring();
 473         umodpoly::iterator i = a.begin(), end = a.end();
 474         for ( ; i!=end; ++i ) {
 475                 // cln cannot perform this division in the modular field
 476                 cl_I c = R->retract(*i);
 477                 *i = cl_MI(R, the<cl_I>(c / x));
 478         }
 479 }
 480
 481 /** Calculates remainder of a/b.
 482  *  Assertion: a and b not empty.
 483  *
 484  *  @param[in]  a  polynomial dividend
 485  *  @param[in]  b  polynomial divisor
 486  *  @param[out] r  polynomial remainder
 487  */
 488 static void rem(const umodpoly& a, const umodpoly& b, umodpoly& r)
 489 {
 490         int k, n;
 491         n = degree(b);
 492         k = degree(a) - n;
 493         r = a;
 494         if ( k < 0 ) return;
 495
 496         do {
 497                 cl_MI qk = div(r[n+k], b[n]);
 498                 if ( !zerop(qk) ) {
 499                         for ( int i=0; i<n; ++i ) {
 500                                 unsigned int j = n + k - 1 - i;
 501                                 r[j] = r[j] - qk * b[j-k];
 502                         }
 503                 }
 504         } while ( k-- );
 505
 506         fill(r.begin()+n, r.end(), a[0].ring()->zero());
 507         canonicalize(r);
 508 }
 509
 510 /** Calculates quotient of a/b.
 511  *  Assertion: a and b not empty.
 512  *
 513  *  @param[in]  a  polynomial dividend
 514  *  @param[in]  b  polynomial divisor
 515  *  @param[out] q  polynomial quotient
 516  */
 517 static void div(const umodpoly& a, const umodpoly& b, umodpoly& q)
 518 {
 519         int k, n;
 520         n = degree(b);
 521         k = degree(a) - n;
 522         q.clear();
 523         if ( k < 0 ) return;
 524
 525         umodpoly r = a;
 526         q.resize(k+1, a[0].ring()->zero());
 527         do {
 528                 cl_MI qk = div(r[n+k], b[n]);
 529                 if ( !zerop(qk) ) {
 530                         q[k] = qk;
 531                         for ( int i=0; i<n; ++i ) {
 532                                 unsigned int j = n + k - 1 - i;
 533                                 r[j] = r[j] - qk * b[j-k];
 534                         }
 535                 }
 536         } while ( k-- );
 537
 538         canonicalize(q);
 539 }
 540
 541 /** Calculates quotient and remainder of a/b.
 542  *  Assertion: a and b not empty.
 543  *
 544  *  @param[in]  a  polynomial dividend
 545  *  @param[in]  b  polynomial divisor
 546  *  @param[out] r  polynomial remainder
 547  *  @param[out] q  polynomial quotient
 548  */
 549 static void remdiv(const umodpoly& a, const umodpoly& b, umodpoly& r, umodpoly& q)
 550 {
 551         int k, n;
 552         n = degree(b);
 553         k = degree(a) - n;
 554         q.clear();
 555         r = a;
 556         if ( k < 0 ) return;
 557
 558         q.resize(k+1, a[0].ring()->zero());
 559         do {
 560                 cl_MI qk = div(r[n+k], b[n]);
 561                 if ( !zerop(qk) ) {
 562                         q[k] = qk;
 563                         for ( int i=0; i<n; ++i ) {
 564                                 unsigned int j = n + k - 1 - i;
 565                                 r[j] = r[j] - qk * b[j-k];
 566                         }
 567                 }
 568         } while ( k-- );
 569
 570         fill(r.begin()+n, r.end(), a[0].ring()->zero());
 571         canonicalize(r);
 572         canonicalize(q);
 573 }
 574
 575 /** Calculates the GCD of polynomial a and b.
 576  *
 577  *  @param[in]  a  polynomial
 578  *  @param[in]  b  polynomial
 579  *  @param[out] c  GCD
 580  */
 581 static void gcd(const umodpoly& a, const umodpoly& b, umodpoly& c)
 582 {
 583         if ( degree(a) < degree(b) ) return gcd(b, a, c);
 584
 585         c = a;
 586         normalize_in_field(c);
 587         umodpoly d = b;
 588         normalize_in_field(d);
 589         umodpoly r;
 590         while ( !d.empty() ) {
 591                 rem(c, d, r);
 592                 c = d;
 593                 d = r;
 594         }
 595         normalize_in_field(c);
 596 }
 597
 598 /** Calculates the derivative of the polynomial a.
 599  *
 600  *  @param[in]  a  polynomial of which to take the derivative
 601  *  @param[out] d  result/derivative
 602  */
 603 static void deriv(const umodpoly& a, umodpoly& d)
 604 {
 605         d.clear();
 606         if ( a.size() <= 1 ) return;
 607
 608         d.insert(d.begin(), a.begin()+1, a.end());
 609         int max = d.size();
 610         for ( int i=1; i<max; ++i ) {
 611                 d[i] = d[i] * (i+1);
 612         }
 613         canonicalize(d);
 614 }
 615
 616 static bool unequal_one(const umodpoly& a)
 617 {
 618         if ( a.empty() ) return true;
 619         return ( a.size() != 1 || a[0] != a[0].ring()->one() );
 620 }
 621
 622 static bool equal_one(const umodpoly& a)
 623 {
 624         return ( a.size() == 1 && a[0] == a[0].ring()->one() );
 625 }
 626
 627 /** Returns true if polynomial a is square free.
 628  *
 629  *  @param[in] a  polynomial to check
 630  *  @return       true if polynomial is square free, false otherwise
 631  */
 632 static bool squarefree(const umodpoly& a)
 633 {
 634         umodpoly b;
 635         deriv(a, b);
 636         if ( b.empty() ) {
 637                 return false;
 638         }
 639         umodpoly c;
 640         gcd(a, b, c);
 641         return equal_one(c);
 642 }
 643
 644 // END modular univariate polynomial code
 645 ////////////////////////////////////////////////////////////////////////////////
 646
 647 ////////////////////////////////////////////////////////////////////////////////
 648 // modular matrix
 649
 650 typedef vector<cl_MI> mvec;
 651
 652 class modular_matrix
 653 {
 654         friend ostream& operator<<(ostream& o, const modular_matrix& m);
 655 public:
 656         modular_matrix(size_t r_, size_t c_, const cl_MI& init) : r(r_), c(c_)
 657         {
 658                 m.resize(c*r, init);
 659         }
 660         size_t rowsize() const { return r; }
 661         size_t colsize() const { return c; }
 662         cl_MI& operator()(size_t row, size_t col) { return m[row*c + col]; }
 663         cl_MI operator()(size_t row, size_t col) const { return m[row*c + col]; }
 664         void mul_col(size_t col, const cl_MI x)
 665         {
 666                 mvec::iterator i = m.begin() + col;
 667                 for ( size_t rc=0; rc<r; ++rc ) {
 668                         *i = *i * x;
 669                         i += c;
 670                 }
 671         }
 672         void sub_col(size_t col1, size_t col2, const cl_MI fac)
 673         {
 674                 mvec::iterator i1 = m.begin() + col1;
 675                 mvec::iterator i2 = m.begin() + col2;
 676                 for ( size_t rc=0; rc<r; ++rc ) {
 677                         *i1 = *i1 - *i2 * fac;
 678                         i1 += c;
 679                         i2 += c;
 680                 }
 681         }
 682         void switch_col(size_t col1, size_t col2)
 683         {
 684                 cl_MI buf;
 685                 mvec::iterator i1 = m.begin() + col1;
 686                 mvec::iterator i2 = m.begin() + col2;
 687                 for ( size_t rc=0; rc<r; ++rc ) {
 688                         buf = *i1; *i1 = *i2; *i2 = buf;
 689                         i1 += c;
 690                         i2 += c;
 691                 }
 692         }
 693         void mul_row(size_t row, const cl_MI x)
 694         {
 695                 vector<cl_MI>::iterator i = m.begin() + row*c;
 696                 for ( size_t cc=0; cc<c; ++cc ) {
 697                         *i = *i * x;
 698                         ++i;
 699                 }
 700         }
 701         void sub_row(size_t row1, size_t row2, const cl_MI fac)
 702         {
 703                 vector<cl_MI>::iterator i1 = m.begin() + row1*c;
 704                 vector<cl_MI>::iterator i2 = m.begin() + row2*c;
 705                 for ( size_t cc=0; cc<c; ++cc ) {
 706                         *i1 = *i1 - *i2 * fac;
 707                         ++i1;
 708                         ++i2;
 709                 }
 710         }
 711         void switch_row(size_t row1, size_t row2)
 712         {
 713                 cl_MI buf;
 714                 vector<cl_MI>::iterator i1 = m.begin() + row1*c;
 715                 vector<cl_MI>::iterator i2 = m.begin() + row2*c;
 716                 for ( size_t cc=0; cc<c; ++cc ) {
 717                         buf = *i1; *i1 = *i2; *i2 = buf;
 718                         ++i1;
 719                         ++i2;
 720                 }
 721         }
 722         bool is_col_zero(size_t col) const
 723         {
 724                 mvec::const_iterator i = m.begin() + col;
 725                 for ( size_t rr=0; rr<r; ++rr ) {
 726                         if ( !zerop(*i) ) {
 727                                 return false;
 728                         }
 729                         i += c;
 730                 }
 731                 return true;
 732         }
 733         bool is_row_zero(size_t row) const
 734         {
 735                 mvec::const_iterator i = m.begin() + row*c;
 736                 for ( size_t cc=0; cc<c; ++cc ) {
 737                         if ( !zerop(*i) ) {
 738                                 return false;
 739                         }
 740                         ++i;
 741                 }
 742                 return true;
 743         }
 744         void set_row(size_t row, const vector<cl_MI>& newrow)
 745         {
 746                 mvec::iterator i1 = m.begin() + row*c;
 747                 mvec::const_iterator i2 = newrow.begin(), end = newrow.end();
 748                 for ( ; i2 != end; ++i1, ++i2 ) {
 749                         *i1 = *i2;
 750                 }
 751         }
 752         mvec::const_iterator row_begin(size_t row) const { return m.begin()+row*c; }
 753         mvec::const_iterator row_end(size_t row) const { return m.begin()+row*c+r; }
 754 private:
 755         size_t r, c;
 756         mvec m;
 757 };
 758
 759 #ifdef DEBUGFACTOR
 760 modular_matrix operator*(const modular_matrix& m1, const modular_matrix& m2)
 761 {
 762         const unsigned int r = m1.rowsize();
 763         const unsigned int c = m2.colsize();
 764         modular_matrix o(r,c,m1(0,0));
 765
 766         for ( size_t i=0; i<r; ++i ) {
 767                 for ( size_t j=0; j<c; ++j ) {
 768                         cl_MI buf;
 769                         buf = m1(i,0) * m2(0,j);
 770                         for ( size_t k=1; k<c; ++k ) {
 771                                 buf = buf + m1(i,k)*m2(k,j);
 772                         }
 773                         o(i,j) = buf;
 774                 }
 775         }
 776         return o;
 777 }
 778
 779 ostream& operator<<(ostream& o, const modular_matrix& m)
 780 {
 781         cl_modint_ring R = m(0,0).ring();
 782         o << "{";
 783         for ( size_t i=0; i<m.rowsize(); ++i ) {
 784                 o << "{";
 785                 for ( size_t j=0; j<m.colsize()-1; ++j ) {
 786                         o << R->retract(m(i,j)) << ",";
 787                 }
 788                 o << R->retract(m(i,m.colsize()-1)) << "}";
 789                 if ( i != m.rowsize()-1 ) {
 790                         o << ",";
 791                 }
 792         }
 793         o << "}";
 794         return o;
 795 }
 796 #endif // def DEBUGFACTOR
 797
 798 // END modular matrix
 799 ////////////////////////////////////////////////////////////////////////////////
 800
 801 /** Calculates the Q matrix for a polynomial. Used by Berlekamp's algorithm.
 802  *
 803  *  @param[in]  a_  modular polynomial
 804  *  @param[out] Q   Q matrix
 805  */
 806 static void q_matrix(const umodpoly& a_, modular_matrix& Q)
 807 {
 808         umodpoly a = a_;
 809         normalize_in_field(a);
 810
 811         int n = degree(a);
 812         unsigned int q = cl_I_to_uint(a[0].ring()->modulus);
 813         umodpoly r(n, a[0].ring()->zero());
 814         r[0] = a[0].ring()->one();
 815         Q.set_row(0, r);
 816         unsigned int max = (n-1) * q;
 817         for ( size_t m=1; m<=max; ++m ) {
 818                 cl_MI rn_1 = r.back();
 819                 for ( size_t i=n-1; i>0; --i ) {
 820                         r[i] = r[i-1] - (rn_1 * a[i]);
 821                 }
 822                 r[0] = -rn_1 * a[0];
 823                 if ( (m % q) == 0 ) {
 824                         Q.set_row(m/q, r);
 825                 }
 826         }
 827 }
 828
 829 /** Determine the nullspace of a matrix M-1.
 830  *
 831  *  @param[in,out] M      matrix, will be modified
 832  *  @param[out]    basis  calculated nullspace of M-1
 833  */
 834 static void nullspace(modular_matrix& M, vector<mvec>& basis)
 835 {
 836         const size_t n = M.rowsize();
 837         const cl_MI one = M(0,0).ring()->one();
 838         for ( size_t i=0; i<n; ++i ) {
 839                 M(i,i) = M(i,i) - one;
 840         }
 841         for ( size_t r=0; r<n; ++r ) {
 842                 size_t cc = 0;
 843                 for ( ; cc<n; ++cc ) {
 844                         if ( !zerop(M(r,cc)) ) {
 845                                 if ( cc < r ) {
 846                                         if ( !zerop(M(cc,cc)) ) {
 847                                                 continue;
 848                                         }
 849                                         M.switch_col(cc, r);
 850                                 }
 851                                 else if ( cc > r ) {
 852                                         M.switch_col(cc, r);
 853                                 }
 854                                 break;
 855                         }
 856                 }
 857                 if ( cc < n ) {
 858                         M.mul_col(r, recip(M(r,r)));
 859                         for ( cc=0; cc<n; ++cc ) {
 860                                 if ( cc != r ) {
 861                                         M.sub_col(cc, r, M(r,cc));
 862                                 }
 863                         }
 864                 }
 865         }
 866
 867         for ( size_t i=0; i<n; ++i ) {
 868                 M(i,i) = M(i,i) - one;
 869         }
 870         for ( size_t i=0; i<n; ++i ) {
 871                 if ( !M.is_row_zero(i) ) {
 872                         mvec nu(M.row_begin(i), M.row_end(i));
 873                         basis.push_back(nu);
 874                 }
 875         }
 876 }
 877
 878 /** Berlekamp's modular factorization.
 879  *
 880  *  The implementation follows the algorithm in chapter 8 of [GCL].
 881  *
 882  *  @param[in]  a    modular polynomial
 883  *  @param[out] upv  vector containing modular factors. if upv was not empty the
 884  *                   new elements are added at the end
 885  */
 886 static void berlekamp(const umodpoly& a, upvec& upv)
 887 {
 888         cl_modint_ring R = a[0].ring();
 889         umodpoly one(1, R->one());
 890
 891         // find nullspace of Q matrix
 892         modular_matrix Q(degree(a), degree(a), R->zero());
 893         q_matrix(a, Q);
 894         vector<mvec> nu;
 895         nullspace(Q, nu);
 896
 897         const unsigned int k = nu.size();
 898         if ( k == 1 ) {
 899                 // irreducible
 900                 return;
 901         }
 902
 903         list<umodpoly> factors;
 904         factors.push_back(a);
 905         unsigned int size = 1;
 906         unsigned int r = 1;
 907         unsigned int q = cl_I_to_uint(R->modulus);
 908
 909         list<umodpoly>::iterator u = factors.begin();
 910
 911         // calculate all gcd's
 912         while ( true ) {
 913                 for ( unsigned int s=0; s<q; ++s ) {
 914                         umodpoly nur = nu[r];
 915                         nur[0] = nur[0] - cl_MI(R, s);
 916                         canonicalize(nur);
 917                         umodpoly g;
 918                         gcd(nur, *u, g);
 919                         if ( unequal_one(g) && g != *u ) {
 920                                 umodpoly uo;
 921                                 div(*u, g, uo);
 922                                 if ( equal_one(uo) ) {
 923                                         throw logic_error("berlekamp: unexpected divisor.");
 924                                 }
 925                                 else {
 926                                         *u = uo;
 927                                 }
 928                                 factors.push_back(g);
 929                                 size = 0;
 930                                 list<umodpoly>::const_iterator i = factors.begin(), end = factors.end();
 931                                 while ( i != end ) {
 932                                         if ( degree(*i) ) ++size;
 933                                         ++i;
 934                                 }
 935                                 if ( size == k ) {
 936                                         list<umodpoly>::const_iterator i = factors.begin(), end = factors.end();
 937                                         while ( i != end ) {
 938                                                 upv.push_back(*i++);
 939                                         }
 940                                         return;
 941                                 }
 942                         }
 943                 }
 944                 if ( ++r == k ) {
 945                         r = 1;
 946                         ++u;
 947                 }
 948         }
 949 }
 950
 951 // modular square free factorization is not used at the moment so we deactivate
 952 // the code
 953 #if 0
 954
 955 /** Calculates a^(1/prime).
 956  *
 957  *  @param[in] a      polynomial
 958  *  @param[in] prime  prime number -> exponent 1/prime
 959  *  @param[in] ap     resulting polynomial
 960  */
 961 static void expt_1_over_p(const umodpoly& a, unsigned int prime, umodpoly& ap)
 962 {
 963         size_t newdeg = degree(a)/prime;
 964         ap.resize(newdeg+1);
 965         ap[0] = a[0];
 966         for ( size_t i=1; i<=newdeg; ++i ) {
 967                 ap[i] = a[i*prime];
 968         }
 969 }
 970
 971 /** Modular square free factorization.
 972  *
 973  *  @param[in]  a        polynomial
 974  *  @param[out] factors  modular factors
 975  *  @param[out] mult     corresponding multiplicities (exponents)
 976  */
 977 static void modsqrfree(const umodpoly& a, upvec& factors, vector<int>& mult)
 978 {
 979         const unsigned int prime = cl_I_to_uint(a[0].ring()->modulus);
 980         int i = 1;
 981         umodpoly b;
 982         deriv(a, b);
 983         if ( b.size() ) {
 984                 umodpoly c;
 985                 gcd(a, b, c);
 986                 umodpoly w;
 987                 div(a, c, w);
 988                 while ( unequal_one(w) ) {
 989                         umodpoly y;
 990                         gcd(w, c, y);
 991                         umodpoly z;
 992                         div(w, y, z);
 993                         factors.push_back(z);
 994                         mult.push_back(i);
 995                         ++i;
 996                         w = y;
 997                         umodpoly buf;
 998                         div(c, y, buf);
 999                         c = buf;
1000                 }
1001                 if ( unequal_one(c) ) {
1002                         umodpoly cp;
1003                         expt_1_over_p(c, prime, cp);
1004                         size_t previ = mult.size();
1005                         modsqrfree(cp, factors, mult);
1006                         for ( size_t i=previ; i<mult.size(); ++i ) {
1007                                 mult[i] *= prime;
1008                         }
1009                 }
1010         }
1011         else {
1012                 umodpoly ap;
1013                 expt_1_over_p(a, prime, ap);
1014                 size_t previ = mult.size();
1015                 modsqrfree(ap, factors, mult);
1016                 for ( size_t i=previ; i<mult.size(); ++i ) {
1017                         mult[i] *= prime;
1018                 }
1019         }
1020 }
1021
1022 #endif // deactivation of square free factorization
1023
1024 /** Distinct degree factorization (DDF).
1025  *
1026  *  The implementation follows the algorithm in chapter 8 of [GCL].
1027  *
1028  *  @param[in]  a_         modular polynomial
1029  *  @param[out] degrees    vector containing the degrees of the factors of the
1030  *                         corresponding polynomials in ddfactors.
1031  *  @param[out] ddfactors  vector containing polynomials which factors have the
1032  *                         degree given in degrees.
1033  */
1034 static void distinct_degree_factor(const umodpoly& a_, vector<int>& degrees, upvec& ddfactors)
1035 {
1036         umodpoly a = a_;
1037
1038         cl_modint_ring R = a[0].ring();
1039         int q = cl_I_to_int(R->modulus);
1040         int nhalf = degree(a)/2;
1041
1042         int i = 1;
1043         umodpoly w(2);
1044         w[0] = R->zero();
1045         w[1] = R->one();
1046         umodpoly x = w;
1047
1048         while ( i <= nhalf ) {
1049                 expt_pos(w, q);
1050                 umodpoly buf;
1051                 rem(w, a, buf);
1052                 w = buf;
1053                 umodpoly wx = w - x;
1054                 gcd(a, wx, buf);
1055                 if ( unequal_one(buf) ) {
1056                         degrees.push_back(i);
1057                         ddfactors.push_back(buf);
1058                 }
1059                 if ( unequal_one(buf) ) {
1060                         umodpoly buf2;
1061                         div(a, buf, buf2);
1062                         a = buf2;
1063                         nhalf = degree(a)/2;
1064                         rem(w, a, buf);
1065                         w = buf;
1066                 }
1067                 ++i;
1068         }
1069         if ( unequal_one(a) ) {
1070                 degrees.push_back(degree(a));
1071                 ddfactors.push_back(a);
1072         }
1073 }
1074
1075 /** Modular same degree factorization.
1076  *  Same degree factorization is a kind of misnomer. It performs distinct degree
1077  *  factorization, but instead of using the Cantor-Zassenhaus algorithm it
1078  *  (sub-optimally) uses Berlekamp's algorithm for the factors of the same
1079  *  degree.
1080  *
1081  *  @param[in]  a    modular polynomial
1082  *  @param[out] upv  vector containing modular factors. if upv was not empty the
1083  *                   new elements are added at the end
1084  */
1085 static void same_degree_factor(const umodpoly& a, upvec& upv)
1086 {
1087         cl_modint_ring R = a[0].ring();
1088
1089         vector<int> degrees;
1090         upvec ddfactors;
1091         distinct_degree_factor(a, degrees, ddfactors);
1092
1093         for ( size_t i=0; i<degrees.size(); ++i ) {
1094                 if ( degrees[i] == degree(ddfactors[i]) ) {
1095                         upv.push_back(ddfactors[i]);
1096                 }
1097                 else {
1098                         berlekamp(ddfactors[i], upv);
1099                 }
1100         }
1101 }
1102
1103 // Yes, we can (choose).
1104 #define USE_SAME_DEGREE_FACTOR
1105
1106 /** Modular univariate factorization.
1107  *
1108  *  In principle, we have two algorithms at our disposal: Berlekamp's algorithm
1109  *  and same degree factorization (SDF). SDF seems to be slightly faster in
1110  *  almost all cases so it is activated as default.
1111  *
1112  *  @param[in]  p    modular polynomial
1113  *  @param[out] upv  vector containing modular factors. if upv was not empty the
1114  *                   new elements are added at the end
1115  */
1116 static void factor_modular(const umodpoly& p, upvec& upv)
1117 {
1118 #ifdef USE_SAME_DEGREE_FACTOR
1119         same_degree_factor(p, upv);
1120 #else
1121         berlekamp(p, upv);
1122 #endif
1123 }
1124
1125 /** Calculates modular polynomials s and t such that a*s+b*t==1.
1126  *  Assertion: a and b are relatively prime and not zero.
1127  *
1128  *  @param[in]  a  polynomial
1129  *  @param[in]  b  polynomial
1130  *  @param[out] s  polynomial
1131  *  @param[out] t  polynomial
1132  */
1133 static void exteuclid(const umodpoly& a, const umodpoly& b, umodpoly& s, umodpoly& t)
1134 {
1135         if ( degree(a) < degree(b) ) {
1136                 exteuclid(b, a, t, s);
1137                 return;
1138         }
1139
1140         umodpoly one(1, a[0].ring()->one());
1141         umodpoly c = a; normalize_in_field(c);
1142         umodpoly d = b; normalize_in_field(d);
1143         s = one;
1144         t.clear();
1145         umodpoly d1;
1146         umodpoly d2 = one;
1147         umodpoly q;
1148         while ( true ) {
1149                 div(c, d, q);
1150                 umodpoly r = c - q * d;
1151                 umodpoly r1 = s - q * d1;
1152                 umodpoly r2 = t - q * d2;
1153                 c = d;
1154                 s = d1;
1155                 t = d2;
1156                 if ( r.empty() ) break;
1157                 d = r;
1158                 d1 = r1;
1159                 d2 = r2;
1160         }
1161         cl_MI fac = recip(lcoeff(a) * lcoeff(c));
1162         umodpoly::iterator i = s.begin(), end = s.end();
1163         for ( ; i!=end; ++i ) {
1164                 *i = *i * fac;
1165         }
1166         canonicalize(s);
1167         fac = recip(lcoeff(b) * lcoeff(c));
1168         i = t.begin(), end = t.end();
1169         for ( ; i!=end; ++i ) {
1170                 *i = *i * fac;
1171         }
1172         canonicalize(t);
1173 }
1174
1175 /** Replaces the leading coefficient in a polynomial by a given number.
1176  *
1177  *  @param[in] poly  polynomial to change
1178  *  @param[in] lc    new leading coefficient
1179  *  @return          changed polynomial
1180  */
1181 static upoly replace_lc(const upoly& poly, const cl_I& lc)
1182 {
1183         if ( poly.empty() ) return poly;
1184         upoly r = poly;
1185         r.back() = lc;
1186         return r;
1187 }
1188
1189 /** Calculates the bound for the modulus.
1190  *  See [Mig].
1191  */
1192 static inline cl_I calc_bound(const ex& a, const ex& x, int maxdeg)
1193 {
1194         cl_I maxcoeff = 0;
1195         cl_R coeff = 0;
1196         for ( int i=a.degree(x); i>=a.ldegree(x); --i ) {
1197                 cl_I aa = abs(the<cl_I>(ex_to<numeric>(a.coeff(x, i)).to_cl_N()));
1198                 if ( aa > maxcoeff ) maxcoeff = aa;
1199                 coeff = coeff + square(aa);
1200         }
1201         cl_I coeffnorm = ceiling1(the<cl_R>(cln::sqrt(coeff)));
1202         cl_I B = coeffnorm * expt_pos(cl_I(2), cl_I(maxdeg));
1203         return ( B > maxcoeff ) ? B : maxcoeff;
1204 }
1205
1206 /** Calculates the bound for the modulus.
1207  *  See [Mig].
1208  */
1209 static inline cl_I calc_bound(const upoly& a, int maxdeg)
1210 {
1211         cl_I maxcoeff = 0;
1212         cl_R coeff = 0;
1213         for ( int i=degree(a); i>=0; --i ) {
1214                 cl_I aa = abs(a[i]);
1215                 if ( aa > maxcoeff ) maxcoeff = aa;
1216                 coeff = coeff + square(aa);
1217         }
1218         cl_I coeffnorm = ceiling1(the<cl_R>(cln::sqrt(coeff)));
1219         cl_I B = coeffnorm * expt_pos(cl_I(2), cl_I(maxdeg));
1220         return ( B > maxcoeff ) ? B : maxcoeff;
1221 }
1222
1223 /** Hensel lifting as used by factor_univariate().
1224  *
1225  *  The implementation follows the algorithm in chapter 6 of [GCL].
1226  *
1227  *  @param[in]  a_   primitive univariate polynomials
1228  *  @param[in]  p    prime number that does not divide lcoeff(a)
1229  *  @param[in]  u1_  modular factor of a (mod p)
1230  *  @param[in]  w1_  modular factor of a (mod p), relatively prime to u1_,
1231  *                   fulfilling  u1_*w1_ == a mod p
1232  *  @param[out] u    lifted factor
1233  *  @param[out] w    lifted factor, u*w = a
1234  */
1235 static void hensel_univar(const upoly& a_, unsigned int p, const umodpoly& u1_, const umodpoly& w1_, upoly& u, upoly& w)
1236 {
1237         upoly a = a_;
1238         const cl_modint_ring& R = u1_[0].ring();
1239
1240         // calc bound B
1241         int maxdeg = (degree(u1_) > degree(w1_)) ? degree(u1_) : degree(w1_);
1242         cl_I maxmodulus = 2*calc_bound(a, maxdeg);
1243
1244         // step 1
1245         cl_I alpha = lcoeff(a);
1246         a = a * alpha;
1247         umodpoly nu1 = u1_;
1248         normalize_in_field(nu1);
1249         umodpoly nw1 = w1_;
1250         normalize_in_field(nw1);
1251         upoly phi;
1252         phi = umodpoly_to_upoly(nu1) * alpha;
1253         umodpoly u1;
1254         umodpoly_from_upoly(u1, phi, R);
1255         phi = umodpoly_to_upoly(nw1) * alpha;
1256         umodpoly w1;
1257         umodpoly_from_upoly(w1, phi, R);
1258
1259         // step 2
1260         umodpoly s;
1261         umodpoly t;
1262         exteuclid(u1, w1, s, t);
1263
1264         // step 3
1265         u = replace_lc(umodpoly_to_upoly(u1), alpha);
1266         w = replace_lc(umodpoly_to_upoly(w1), alpha);
1267         upoly e = a - u * w;
1268         cl_I modulus = p;
1269
1270         // step 4
1271         while ( !e.empty() && modulus < maxmodulus ) {
1272                 upoly c = e / modulus;
1273                 phi = umodpoly_to_upoly(s) * c;
1274                 umodpoly sigmatilde;
1275                 umodpoly_from_upoly(sigmatilde, phi, R);
1276                 phi = umodpoly_to_upoly(t) * c;
1277                 umodpoly tautilde;
1278                 umodpoly_from_upoly(tautilde, phi, R);
1279                 umodpoly r, q;
1280                 remdiv(sigmatilde, w1, r, q);
1281                 umodpoly sigma = r;
1282                 phi = umodpoly_to_upoly(tautilde) + umodpoly_to_upoly(q) * umodpoly_to_upoly(u1);
1283                 umodpoly tau;
1284                 umodpoly_from_upoly(tau, phi, R);
1285                 u = u + umodpoly_to_upoly(tau) * modulus;
1286                 w = w + umodpoly_to_upoly(sigma) * modulus;
1287                 e = a - u * w;
1288                 modulus = modulus * p;
1289         }
1290
1291         // step 5
1292         if ( e.empty() ) {
1293                 cl_I g = u[0];
1294                 for ( size_t i=1; i<u.size(); ++i ) {
1295                         g = gcd(g, u[i]);
1296                         if ( g == 1 ) break;
1297                 }
1298                 if ( g != 1 ) {
1299                         u = u / g;
1300                         w = w * g;
1301                 }
1302                 if ( alpha != 1 ) {
1303                         w = w / alpha;
1304                 }
1305         }
1306         else {
1307                 u.clear();
1308         }
1309 }
1310
1311 /** Returns a new prime number.
1312  *
1313  *  @param[in] p  prime number
1314  *  @return       next prime number after p
1315  */
1316 static unsigned int next_prime(unsigned int p)
1317 {
1318         static vector<unsigned int> primes;
1319         if ( primes.size() == 0 ) {
1320                 primes.push_back(3); primes.push_back(5); primes.push_back(7);
1321         }
1322         vector<unsigned int>::const_iterator it = primes.begin();
1323         if ( p >= primes.back() ) {
1324                 unsigned int candidate = primes.back() + 2;
1325                 while ( true ) {
1326                         size_t n = primes.size()/2;
1327                         for ( size_t i=0; i<n; ++i ) {
1328                                 if ( candidate % primes[i] ) continue;
1329                                 candidate += 2;
1330                                 i=-1;
1331                         }
1332                         primes.push_back(candidate);
1333                         if ( candidate > p ) break;
1334                 }
1335                 return candidate;
1336         }
1337         vector<unsigned int>::const_iterator end = primes.end();
1338         for ( ; it!=end; ++it ) {
1339                 if ( *it > p ) {
1340                         return *it;
1341                 }
1342         }
1343         throw logic_error("next_prime: should not reach this point!");
1344 }
1345
1346 /** Manages the splitting a vector of of modular factors into two partitions.
1347  */
1348 class factor_partition
1349 {
1350 public:
1351         /** Takes the vector of modular factors and initializes the first partition */
1352         factor_partition(const upvec& factors_) : factors(factors_)
1353         {
1354                 n = factors.size();
1355                 k.resize(n, 0);
1356                 k[0] = 1;
1357                 cache.resize(n-1);
1358                 one.resize(1, factors.front()[0].ring()->one());
1359                 len = 1;
1360                 last = 0;
1361                 split();
1362         }
1363         int operator[](size_t i) const { return k[i]; }
1364         size_t size() const { return n; }
1365         size_t size_left() const { return n-len; }
1366         size_t size_right() const { return len; }
1367         /** Initializes the next partition.
1368             Returns true, if there is one, false otherwise. */
1369         bool next()
1370         {
1371                 if ( last == n-1 ) {
1372                         int rem = len - 1;
1373                         int p = last - 1;
1374                         while ( rem ) {
1375                                 if ( k[p] ) {
1376                                         --rem;
1377                                         --p;
1378                                         continue;
1379                                 }
1380                                 last = p - 1;
1381                                 while ( k[last] == 0 ) { --last; }
1382                                 if ( last == 0 && n == 2*len ) return false;
1383                                 k[last++] = 0;
1384                                 for ( size_t i=0; i<=len-rem; ++i ) {
1385                                         k[last] = 1;
1386                                         ++last;
1387                                 }
1388                                 fill(k.begin()+last, k.end(), 0);
1389                                 --last;
1390                                 split();
1391                                 return true;
1392                         }
1393                         last = len;
1394                         ++len;
1395                         if ( len > n/2 ) return false;
1396                         fill(k.begin(), k.begin()+len, 1);
1397                         fill(k.begin()+len+1, k.end(), 0);
1398                 }
1399                 else {
1400                         k[last++] = 0;
1401                         k[last] = 1;
1402                 }
1403                 split();
1404                 return true;
1405         }
1406         /** Get first partition */
1407         umodpoly& left() { return lr[0]; }
1408         /** Get second partition */
1409         umodpoly& right() { return lr[1]; }
1410 private:
1411         void split_cached()
1412         {
1413                 size_t i = 0;
1414                 do {
1415                         size_t pos = i;
1416                         int group = k[i++];
1417                         size_t d = 0;
1418                         while ( i < n && k[i] == group ) { ++d; ++i; }
1419                         if ( d ) {
1420                                 if ( cache[pos].size() >= d ) {
1421                                         lr[group] = lr[group] * cache[pos][d-1];
1422                                 }
1423                                 else {
1424                                         if ( cache[pos].size() == 0 ) {
1425                                                 cache[pos].push_back(factors[pos] * factors[pos+1]);
1426                                         }
1427                                         size_t j = pos + cache[pos].size() + 1;
1428                                         d -= cache[pos].size();
1429                                         while ( d ) {
1430                                                 umodpoly buf = cache[pos].back() * factors[j];
1431                                                 cache[pos].push_back(buf);
1432                                                 --d;
1433                                                 ++j;
1434                                         }
1435                                         lr[group] = lr[group] * cache[pos].back();
1436                                 }
1437                         }
1438                         else {
1439                                 lr[group] = lr[group] * factors[pos];
1440                         }
1441                 } while ( i < n );
1442         }
1443         void split()
1444         {
1445                 lr[0] = one;
1446                 lr[1] = one;
1447                 if ( n > 6 ) {
1448                         split_cached();
1449                 }
1450                 else {
1451                         for ( size_t i=0; i<n; ++i ) {
1452                                 lr[k[i]] = lr[k[i]] * factors[i];
1453                         }
1454                 }
1455         }
1456 private:
1457         umodpoly lr[2];
1458         vector< vector<umodpoly> > cache;
1459         upvec factors;
1460         umodpoly one;
1461         size_t n;
1462         size_t len;
1463         size_t last;
1464         vector<int> k;
1465 };
1466
1467 /** Contains a pair of univariate polynomial and its modular factors.
1468  *  Used by factor_univariate().
1469  */
1470 struct ModFactors
1471 {
1472         upoly poly;
1473         upvec factors;
1474 };
1475
1476 /** Univariate polynomial factorization.
1477  *
1478  *  Modular factorization is tried for several primes to minimize the number of
1479  *  modular factors. Then, Hensel lifting is performed.
1480  *
1481  *  @param[in]     poly   expanded square free univariate polynomial
1482  *  @param[in]     x      symbol
1483  *  @param[in,out] prime  prime number to start trying modular factorization with,
1484  *                        output value is the prime number actually used
1485  */
1486 static ex factor_univariate(const ex& poly, const ex& x, unsigned int& prime)
1487 {
1488         ex unit, cont, prim_ex;
1489         poly.unitcontprim(x, unit, cont, prim_ex);
1490         upoly prim;
1491         upoly_from_ex(prim, prim_ex, x);
1492
1493         // determine proper prime and minimize number of modular factors
1494         prime = 3;
1495         unsigned int lastp = prime;
1496         cl_modint_ring R;
1497         unsigned int trials = 0;
1498         unsigned int minfactors = 0;
1499         cl_I lc = lcoeff(prim) * the<cl_I>(ex_to<numeric>(cont).to_cl_N());
1500         upvec factors;
1501         while ( trials < 2 ) {
1502                 umodpoly modpoly;
1503                 while ( true ) {
1504                         prime = next_prime(prime);
1505                         if ( !zerop(rem(lc, prime)) ) {
1506                                 R = find_modint_ring(prime);
1507                                 umodpoly_from_upoly(modpoly, prim, R);
1508                                 if ( squarefree(modpoly) ) break;
1509                         }
1510                 }
1511
1512                 // do modular factorization
1513                 upvec trialfactors;
1514                 factor_modular(modpoly, trialfactors);
1515                 if ( trialfactors.size() <= 1 ) {
1516                         // irreducible for sure
1517                         return poly;
1518                 }
1519
1520                 if ( minfactors == 0 || trialfactors.size() < minfactors ) {
1521                         factors = trialfactors;
1522                         minfactors = trialfactors.size();
1523                         lastp = prime;
1524                         trials = 1;
1525                 }
1526                 else {
1527                         ++trials;
1528                 }
1529         }
1530         prime = lastp;
1531         R = find_modint_ring(prime);
1532
1533         // lift all factor combinations
1534         stack<ModFactors> tocheck;
1535         ModFactors mf;
1536         mf.poly = prim;
1537         mf.factors = factors;
1538         tocheck.push(mf);
1539         upoly f1, f2;
1540         ex result = 1;
1541         while ( tocheck.size() ) {
1542                 const size_t n = tocheck.top().factors.size();
1543                 factor_partition part(tocheck.top().factors);
1544                 while ( true ) {
1545                         // call Hensel lifting
1546                         hensel_univar(tocheck.top().poly, prime, part.left(), part.right(), f1, f2);
1547                         if ( !f1.empty() ) {
1548                                 // successful, update the stack and the result
1549                                 if ( part.size_left() == 1 ) {
1550                                         if ( part.size_right() == 1 ) {
1551                                                 result *= upoly_to_ex(f1, x) * upoly_to_ex(f2, x);
1552                                                 tocheck.pop();
1553                                                 break;
1554                                         }
1555                                         result *= upoly_to_ex(f1, x);
1556                                         tocheck.top().poly = f2;
1557                                         for ( size_t i=0; i<n; ++i ) {
1558                                                 if ( part[i] == 0 ) {
1559                                                         tocheck.top().factors.erase(tocheck.top().factors.begin()+i);
1560                                                         break;
1561                                                 }
1562                                         }
1563                                         break;
1564                                 }
1565                                 else if ( part.size_right() == 1 ) {
1566                                         if ( part.size_left() == 1 ) {
1567                                                 result *= upoly_to_ex(f1, x) * upoly_to_ex(f2, x);
1568                                                 tocheck.pop();
1569                                                 break;
1570                                         }
1571                                         result *= upoly_to_ex(f2, x);
1572                                         tocheck.top().poly = f1;
1573                                         for ( size_t i=0; i<n; ++i ) {
1574                                                 if ( part[i] == 1 ) {
1575                                                         tocheck.top().factors.erase(tocheck.top().factors.begin()+i);
1576                                                         break;
1577                                                 }
1578                                         }
1579                                         break;
1580                                 }
1581                                 else {
1582                                         upvec newfactors1(part.size_left()), newfactors2(part.size_right());
1583                                         upvec::iterator i1 = newfactors1.begin(), i2 = newfactors2.begin();
1584                                         for ( size_t i=0; i<n; ++i ) {
1585                                                 if ( part[i] ) {
1586                                                         *i2++ = tocheck.top().factors[i];
1587                                                 }
1588                                                 else {
1589                                                         *i1++ = tocheck.top().factors[i];
1590                                                 }
1591                                         }
1592                                         tocheck.top().factors = newfactors1;
1593                                         tocheck.top().poly = f1;
1594                                         ModFactors mf;
1595                                         mf.factors = newfactors2;
1596                                         mf.poly = f2;
1597                                         tocheck.push(mf);
1598                                         break;
1599                                 }
1600                         }
1601                         else {
1602                                 // not successful
1603                                 if ( !part.next() ) {
1604                                         // if no more combinations left, return polynomial as
1605                                         // irreducible
1606                                         result *= upoly_to_ex(tocheck.top().poly, x);
1607                                         tocheck.pop();
1608                                         break;
1609                                 }
1610                         }
1611                 }
1612         }
1613
1614         return unit * cont * result;
1615 }
1616
1617 /** Second interface to factor_univariate() to be used if the information about
1618  *  the prime is not needed.
1619  */
1620 static inline ex factor_univariate(const ex& poly, const ex& x)
1621 {
1622         unsigned int prime;
1623         return factor_univariate(poly, x, prime);
1624 }
1625
1626 /** Represents an evaluation point (<symbol>==<integer>).
1627  */
1628 struct EvalPoint
1629 {
1630         ex x;
1631         int evalpoint;
1632 };
1633
1634 #ifdef DEBUGFACTOR
1635 ostream& operator<<(ostream& o, const vector<EvalPoint>& v)
1636 {
1637         for ( size_t i=0; i<v.size(); ++i ) {
1638                 o << "(" << v[i].x << "==" << v[i].evalpoint << ") ";
1639         }
1640         return o;
1641 }
1642 #endif // def DEBUGFACTOR
1643
1644 // forward declaration
1645 static vector<ex> multivar_diophant(const vector<ex>& a_, const ex& x, const ex& c, const vector<EvalPoint>& I, unsigned int d, unsigned int p, unsigned int k);
1646
1647 /** Utility function for multivariate Hensel lifting.
1648  *
1649  *  Solves the equation
1650  *    s_1*b_1 + ... + s_r*b_r == 1 mod p^k
1651  *  with deg(s_i) < deg(a_i)
1652  *  and with given b_1 = a_1 * ... * a_{i-1} * a_{i+1} * ... * a_r
1653  *
1654  *  The implementation follows the algorithm in chapter 6 of [GCL].
1655  *
1656  *  @param[in]  a   vector of modular univariate polynomials
1657  *  @param[in]  x   symbol
1658  *  @param[in]  p   prime number
1659  *  @param[in]  k   p^k is modulus
1660  *  @return         vector of polynomials (s_i)
1661  */
1662 static upvec multiterm_eea_lift(const upvec& a, const ex& x, unsigned int p, unsigned int k)
1663 {
1664         const size_t r = a.size();
1665         cl_modint_ring R = find_modint_ring(expt_pos(cl_I(p),k));
1666         upvec q(r-1);
1667         q[r-2] = a[r-1];
1668         for ( size_t j=r-2; j>=1; --j ) {
1669                 q[j-1] = a[j] * q[j];
1670         }
1671         umodpoly beta(1, R->one());
1672         upvec s;
1673         for ( size_t j=1; j<r; ++j ) {
1674                 vector<ex> mdarg(2);
1675                 mdarg[0] = umodpoly_to_ex(q[j-1], x);
1676                 mdarg[1] = umodpoly_to_ex(a[j-1], x);
1677                 vector<EvalPoint> empty;
1678                 vector<ex> exsigma = multivar_diophant(mdarg, x, umodpoly_to_ex(beta, x), empty, 0, p, k);
1679                 umodpoly sigma1;
1680                 umodpoly_from_ex(sigma1, exsigma[0], x, R);
1681                 umodpoly sigma2;
1682                 umodpoly_from_ex(sigma2, exsigma[1], x, R);
1683                 beta = sigma1;
1684                 s.push_back(sigma2);
1685         }
1686         s.push_back(beta);
1687         return s;
1688 }
1689
1690 /** Changes the modulus of a modular polynomial. Used by eea_lift().
1691  *
1692  *  @param[in]     R  new modular ring
1693  *  @param[in,out] a  polynomial to change (in situ)
1694  */
1695 static void change_modulus(const cl_modint_ring& R, umodpoly& a)
1696 {
1697         if ( a.empty() ) return;
1698         cl_modint_ring oldR = a[0].ring();
1699         umodpoly::iterator i = a.begin(), end = a.end();
1700         for ( ; i!=end; ++i ) {
1701                 *i = R->canonhom(oldR->retract(*i));
1702         }
1703         canonicalize(a);
1704 }
1705
1706 /** Utility function for multivariate Hensel lifting.
1707  *
1708  *  Solves  s*a + t*b == 1 mod p^k  given a,b.
1709  *
1710  *  The implementation follows the algorithm in chapter 6 of [GCL].
1711  *
1712  *  @param[in]  a   polynomial
1713  *  @param[in]  b   polynomial
1714  *  @param[in]  x   symbol
1715  *  @param[in]  p   prime number
1716  *  @param[in]  k   p^k is modulus
1717  *  @param[out] s_  output polynomial
1718  *  @param[out] t_  output polynomial
1719  */
1720 static void eea_lift(const umodpoly& a, const umodpoly& b, const ex& x, unsigned int p, unsigned int k, umodpoly& s_, umodpoly& t_)
1721 {
1722         cl_modint_ring R = find_modint_ring(p);
1723         umodpoly amod = a;
1724         change_modulus(R, amod);
1725         umodpoly bmod = b;
1726         change_modulus(R, bmod);
1727
1728         umodpoly smod;
1729         umodpoly tmod;
1730         exteuclid(amod, bmod, smod, tmod);
1731
1732         cl_modint_ring Rpk = find_modint_ring(expt_pos(cl_I(p),k));
1733         umodpoly s = smod;
1734         change_modulus(Rpk, s);
1735         umodpoly t = tmod;
1736         change_modulus(Rpk, t);
1737
1738         cl_I modulus(p);
1739         umodpoly one(1, Rpk->one());
1740         for ( size_t j=1; j<k; ++j ) {
1741                 umodpoly e = one - a * s - b * t;
1742                 reduce_coeff(e, modulus);
1743                 umodpoly c = e;
1744                 change_modulus(R, c);
1745                 umodpoly sigmabar = smod * c;
1746                 umodpoly taubar = tmod * c;
1747                 umodpoly sigma, q;
1748                 remdiv(sigmabar, bmod, sigma, q);
1749                 umodpoly tau = taubar + q * amod;
1750                 umodpoly sadd = sigma;
1751                 change_modulus(Rpk, sadd);
1752                 cl_MI modmodulus(Rpk, modulus);
1753                 s = s + sadd * modmodulus;
1754                 umodpoly tadd = tau;
1755                 change_modulus(Rpk, tadd);
1756                 t = t + tadd * modmodulus;
1757                 modulus = modulus * p;
1758         }
1759
1760         s_ = s; t_ = t;
1761 }
1762
1763 /** Utility function for multivariate Hensel lifting.
1764  *
1765  *  Solves the equation
1766  *    s_1*b_1 + ... + s_r*b_r == x^m mod p^k
1767  *  with given b_1 = a_1 * ... * a_{i-1} * a_{i+1} * ... * a_r
1768  *
1769  *  The implementation follows the algorithm in chapter 6 of [GCL].
1770  *
1771  *  @param a  vector with univariate polynomials mod p^k
1772  *  @param x  symbol
1773  *  @param m  exponent of x^m in the equation to solve
1774  *  @param p  prime number
1775  *  @param k  p^k is modulus
1776  *  @return   vector of polynomials (s_i)
1777  */
1778 static upvec univar_diophant(const upvec& a, const ex& x, unsigned int m, unsigned int p, unsigned int k)
1779 {
1780         cl_modint_ring R = find_modint_ring(expt_pos(cl_I(p),k));
1781
1782         const size_t r = a.size();
1783         upvec result;
1784         if ( r > 2 ) {
1785                 upvec s = multiterm_eea_lift(a, x, p, k);
1786                 for ( size_t j=0; j<r; ++j ) {
1787                         umodpoly bmod = umodpoly_to_umodpoly(s[j], R, m);
1788                         umodpoly buf;
1789                         rem(bmod, a[j], buf);
1790                         result.push_back(buf);
1791                 }
1792         }
1793         else {
1794                 umodpoly s, t;
1795                 eea_lift(a[1], a[0], x, p, k, s, t);
1796                 umodpoly bmod = umodpoly_to_umodpoly(s, R, m);
1797                 umodpoly buf, q;
1798                 remdiv(bmod, a[0], buf, q);
1799                 result.push_back(buf);
1800                 umodpoly t1mod = umodpoly_to_umodpoly(t, R, m);
1801                 buf = t1mod + q * a[1];
1802                 result.push_back(buf);
1803         }
1804
1805         return result;
1806 }
1807
1808 /** Map used by function make_modular().
1809  *  Finds every coefficient in a polynomial and replaces it by is value in the
1810  *  given modular ring R (symmetric representation).
1811  */
1812 struct make_modular_map : public map_function {
1813         cl_modint_ring R;
1814         make_modular_map(const cl_modint_ring& R_) : R(R_) { }
1815         ex operator()(const ex& e)
1816         {
1817                 if ( is_a<add>(e) || is_a<mul>(e) ) {
1818                         return e.map(*this);
1819                 }
1820                 else if ( is_a<numeric>(e) ) {
1821                         numeric mod(R->modulus);
1822                         numeric halfmod = (mod-1)/2;
1823                         cl_MI emod = R->canonhom(the<cl_I>(ex_to<numeric>(e).to_cl_N()));
1824                         numeric n(R->retract(emod));
1825                         if ( n > halfmod ) {
1826                                 return n-mod;
1827                         }
1828                         else {
1829                                 return n;
1830                         }
1831                 }
1832                 return e;
1833         }
1834 };
1835
1836 /** Helps mimicking modular multivariate polynomial arithmetic.
1837  *
1838  *  @param e  expression of which to make the coefficients equal to their value
1839  *            in the modular ring R (symmetric representation)
1840  *  @param R  modular ring
1841  *  @return   resulting expression
1842  */
1843 static ex make_modular(const ex& e, const cl_modint_ring& R)
1844 {
1845         make_modular_map map(R);
1846         return map(e.expand());
1847 }
1848
1849 /** Utility function for multivariate Hensel lifting.
1850  *
1851  *  Returns the polynomials s_i that fulfill
1852  *    s_1*b_1 + ... + s_r*b_r == c mod <I^(d+1),p^k>
1853  *  with given b_1 = a_1 * ... * a_{i-1} * a_{i+1} * ... * a_r
1854  *
1855  *  The implementation follows the algorithm in chapter 6 of [GCL].
1856  *
1857  *  @param a_  vector of multivariate factors mod p^k
1858  *  @param x   symbol (equiv. x_1 in [GCL])
1859  *  @param c   polynomial mod p^k
1860  *  @param I   vector of evaluation points
1861  *  @param d   maximum total degree of result
1862  *  @param p   prime number
1863  *  @param k   p^k is modulus
1864  *  @return    vector of polynomials (s_i)
1865  */
1866 static vector<ex> multivar_diophant(const vector<ex>& a_, const ex& x, const ex& c, const vector<EvalPoint>& I,
1867                                     unsigned int d, unsigned int p, unsigned int k)
1868 {
1869         vector<ex> a = a_;
1870
1871         const cl_modint_ring R = find_modint_ring(expt_pos(cl_I(p),k));
1872         const size_t r = a.size();
1873         const size_t nu = I.size() + 1;
1874
1875         vector<ex> sigma;
1876         if ( nu > 1 ) {
1877                 ex xnu = I.back().x;
1878                 int alphanu = I.back().evalpoint;
1879
1880                 ex A = 1;
1881                 for ( size_t i=0; i<r; ++i ) {
1882                         A *= a[i];
1883                 }
1884                 vector<ex> b(r);
1885                 for ( size_t i=0; i<r; ++i ) {
1886                         b[i] = normal(A / a[i]);
1887                 }
1888
1889                 vector<ex> anew = a;
1890                 for ( size_t i=0; i<r; ++i ) {
1891                         anew[i] = anew[i].subs(xnu == alphanu);
1892                 }
1893                 ex cnew = c.subs(xnu == alphanu);
1894                 vector<EvalPoint> Inew = I;
1895                 Inew.pop_back();
1896                 sigma = multivar_diophant(anew, x, cnew, Inew, d, p, k);
1897
1898                 ex buf = c;
1899                 for ( size_t i=0; i<r; ++i ) {
1900                         buf -= sigma[i] * b[i];
1901                 }
1902                 ex e = make_modular(buf, R);
1903
1904                 ex monomial = 1;
1905                 for ( size_t m=1; !e.is_zero() && e.has(xnu) && m<=d; ++m ) {
1906                         monomial *= (xnu - alphanu);
1907                         monomial = expand(monomial);
1908                         ex cm = e.diff(ex_to<symbol>(xnu), m).subs(xnu==alphanu) / factorial(m);
1909                         cm = make_modular(cm, R);
1910                         if ( !cm.is_zero() ) {
1911                                 vector<ex> delta_s = multivar_diophant(anew, x, cm, Inew, d, p, k);
1912                                 ex buf = e;
1913                                 for ( size_t j=0; j<delta_s.size(); ++j ) {
1914                                         delta_s[j] *= monomial;
1915                                         sigma[j] += delta_s[j];
1916                                         buf -= delta_s[j] * b[j];
1917                                 }
1918                                 e = make_modular(buf, R);
1919                         }
1920                 }
1921         }
1922         else {
1923                 upvec amod;
1924                 for ( size_t i=0; i<a.size(); ++i ) {
1925                         umodpoly up;
1926                         umodpoly_from_ex(up, a[i], x, R);
1927                         amod.push_back(up);
1928                 }
1929
1930                 sigma.insert(sigma.begin(), r, 0);
1931                 size_t nterms;
1932                 ex z;
1933                 if ( is_a<add>(c) ) {
1934                         nterms = c.nops();
1935                         z = c.op(0);
1936                 }
1937                 else {
1938                         nterms = 1;
1939                         z = c;
1940                 }
1941                 for ( size_t i=0; i<nterms; ++i ) {
1942                         int m = z.degree(x);
1943                         cl_I cm = the<cl_I>(ex_to<numeric>(z.lcoeff(x)).to_cl_N());
1944                         upvec delta_s = univar_diophant(amod, x, m, p, k);
1945                         cl_MI modcm;
1946                         cl_I poscm = cm;
1947                         while ( poscm < 0 ) {
1948                                 poscm = poscm + expt_pos(cl_I(p),k);
1949                         }
1950                         modcm = cl_MI(R, poscm);
1951                         for ( size_t j=0; j<delta_s.size(); ++j ) {
1952                                 delta_s[j] = delta_s[j] * modcm;
1953                                 sigma[j] = sigma[j] + umodpoly_to_ex(delta_s[j], x);
1954                         }
1955                         if ( nterms > 1 ) {
1956                                 z = c.op(i+1);
1957                         }
1958                 }
1959         }
1960
1961         for ( size_t i=0; i<sigma.size(); ++i ) {
1962                 sigma[i] = make_modular(sigma[i], R);
1963         }
1964
1965         return sigma;
1966 }
1967
1968 /** Multivariate Hensel lifting.
1969  *  The implementation follows the algorithm in chapter 6 of [GCL].
1970  *  Since we don't have a data type for modular multivariate polynomials, the
1971  *  respective operations are done in a GiNaC::ex and the function
1972  *  make_modular() is then called to make the coefficient modular p^l.
1973  *
1974  *  @param a    multivariate polynomial primitive in x
1975  *  @param x    symbol (equiv. x_1 in [GCL])
1976  *  @param I    vector of evaluation points (x_2==a_2,x_3==a_3,...)
1977  *  @param p    prime number (should not divide lcoeff(a mod I))
1978  *  @param l    p^l is the modulus of the lifted univariate field
1979  *  @param u    vector of modular (mod p^l) factors of a mod I
1980  *  @param lcU  correct leading coefficient of the univariate factors of a mod I
1981  *  @return     list GiNaC::lst with lifted factors (multivariate factors of a),
1982  *              empty if Hensel lifting did not succeed
1983  */
1984 static ex hensel_multivar(const ex& a, const ex& x, const vector<EvalPoint>& I,
1985                           unsigned int p, const cl_I& l, const upvec& u, const vector<ex>& lcU)
1986 {
1987         const size_t nu = I.size() + 1;
1988         const cl_modint_ring R = find_modint_ring(expt_pos(cl_I(p),l));
1989
1990         vector<ex> A(nu);
1991         A[nu-1] = a;
1992
1993         for ( size_t j=nu; j>=2; --j ) {
1994                 ex x = I[j-2].x;
1995                 int alpha = I[j-2].evalpoint;
1996                 A[j-2] = A[j-1].subs(x==alpha);
1997                 A[j-2] = make_modular(A[j-2], R);
1998         }
1999
2000         int maxdeg = a.degree(I.front().x);
2001         for ( size_t i=1; i<I.size(); ++i ) {
2002                 int maxdeg2 = a.degree(I[i].x);
2003                 if ( maxdeg2 > maxdeg ) maxdeg = maxdeg2;
2004         }
2005
2006         const size_t n = u.size();
2007         vector<ex> U(n);
2008         for ( size_t i=0; i<n; ++i ) {
2009                 U[i] = umodpoly_to_ex(u[i], x);
2010         }
2011
2012         for ( size_t j=2; j<=nu; ++j ) {
2013                 vector<ex> U1 = U;
2014                 ex monomial = 1;
2015                 for ( size_t m=0; m<n; ++m) {
2016                         if ( lcU[m] != 1 ) {
2017                                 ex coef = lcU[m];
2018                                 for ( size_t i=j-1; i<nu-1; ++i ) {
2019                                         coef = coef.subs(I[i].x == I[i].evalpoint);
2020                                 }
2021                                 coef = make_modular(coef, R);
2022                                 int deg = U[m].degree(x);
2023                                 U[m] = U[m] - U[m].lcoeff(x) * pow(x,deg) + coef * pow(x,deg);
2024                         }
2025                 }
2026                 ex Uprod = 1;
2027                 for ( size_t i=0; i<n; ++i ) {
2028                         Uprod *= U[i];
2029                 }
2030                 ex e = expand(A[j-1] - Uprod);
2031
2032                 vector<EvalPoint> newI;
2033                 for ( size_t i=1; i<=j-2; ++i ) {
2034                         newI.push_back(I[i-1]);
2035                 }
2036
2037                 ex xj = I[j-2].x;
2038                 int alphaj = I[j-2].evalpoint;
2039                 size_t deg = A[j-1].degree(xj);
2040                 for ( size_t k=1; k<=deg; ++k ) {
2041                         if ( !e.is_zero() ) {
2042                                 monomial *= (xj - alphaj);
2043                                 monomial = expand(monomial);
2044                                 ex dif = e.diff(ex_to<symbol>(xj), k);
2045                                 ex c = dif.subs(xj==alphaj) / factorial(k);
2046                                 if ( !c.is_zero() ) {
2047                                         vector<ex> deltaU = multivar_diophant(U1, x, c, newI, maxdeg, p, cl_I_to_uint(l));
2048                                         for ( size_t i=0; i<n; ++i ) {
2049                                                 deltaU[i] *= monomial;
2050                                                 U[i] += deltaU[i];
2051                                                 U[i] = make_modular(U[i], R);
2052                                         }
2053                                         ex Uprod = 1;
2054                                         for ( size_t i=0; i<n; ++i ) {
2055                                                 Uprod *= U[i];
2056                                         }
2057                                         e = A[j-1] - Uprod;
2058                                         e = make_modular(e, R);
2059                                 }
2060                         }
2061                 }
2062         }
2063
2064         ex acand = 1;
2065         for ( size_t i=0; i<U.size(); ++i ) {
2066                 acand *= U[i];
2067         }
2068         if ( expand(a-acand).is_zero() ) {
2069                 lst res;
2070                 for ( size_t i=0; i<U.size(); ++i ) {
2071                         res.append(U[i]);
2072                 }
2073                 return res;
2074         }
2075         else {
2076                 lst res;
2077                 return lst();
2078         }
2079 }
2080
2081 /** Takes a factorized expression and puts the factors in a lst. The exponents
2082  *  of the factors are discarded, e.g. 7*x^2*(y+1)^4 --> {7,x,y+1}. The first
2083  *  element of the list is always the numeric coefficient.
2084  */
2085 static ex put_factors_into_lst(const ex& e)
2086 {
2087         lst result;
2088         if ( is_a<numeric>(e) ) {
2089                 result.append(e);
2090                 return result;
2091         }
2092         if ( is_a<power>(e) ) {
2093                 result.append(1);
2094                 result.append(e.op(0));
2095                 return result;
2096         }
2097         if ( is_a<symbol>(e) || is_a<add>(e) ) {
2098                 result.append(1);
2099                 result.append(e);
2100                 return result;
2101         }
2102         if ( is_a<mul>(e) ) {
2103                 ex nfac = 1;
2104                 for ( size_t i=0; i<e.nops(); ++i ) {
2105                         ex op = e.op(i);
2106                         if ( is_a<numeric>(op) ) {
2107                                 nfac = op;
2108                         }
2109                         if ( is_a<power>(op) ) {
2110                                 result.append(op.op(0));
2111                         }
2112                         if ( is_a<symbol>(op) || is_a<add>(op) ) {
2113                                 result.append(op);
2114                         }
2115                 }
2116                 result.prepend(nfac);
2117                 return result;
2118         }
2119         throw runtime_error("put_factors_into_lst: bad term.");
2120 }
2121
2122 /** Checks a set of numbers for whether each number has a unique prime factor.
2123  *
2124  *  @param[in]  f  list of numbers to check
2125  *  @return        true: if number set is bad, false: if set is okay (has unique
2126  *                 prime factors)
2127  */
2128 static bool checkdivisors(const lst& f)
2129 {
2130         const int k = f.nops();
2131         numeric q, r;
2132         vector<numeric> d(k);
2133         d[0] = ex_to<numeric>(abs(f.op(0)));
2134         for ( int i=1; i<k; ++i ) {
2135                 q = ex_to<numeric>(abs(f.op(i)));
2136                 for ( int j=i-1; j>=0; --j ) {
2137                         r = d[j];
2138                         do {
2139                                 r = gcd(r, q);
2140                                 q = q/r;
2141                         } while ( r != 1 );
2142                         if ( q == 1 ) {
2143                                 return true;
2144                         }
2145                 }
2146                 d[i] = q;
2147         }
2148         return false;
2149 }
2150
2151 /** Generates a set of evaluation points for a multivariate polynomial.
2152  *  The set fulfills the following conditions:
2153  *  1. lcoeff(evaluated_polynomial) does not vanish
2154  *  2. factors of lcoeff(evaluated_polynomial) have each a unique prime factor
2155  *  3. evaluated_polynomial is square free
2156  *  See [Wan] for more details.
2157  *
2158  *  @param[in]     u        multivariate polynomial to be factored
2159  *  @param[in]     vn       leading coefficient of u in x (x==first symbol in syms)
2160  *  @param[in]     syms     set of symbols that appear in u
2161  *  @param[in]     f        lst containing the factors of the leading coefficient vn
2162  *  @param[in,out] modulus  integer modulus for random number generation (i.e. |a_i| < modulus)
2163  *  @param[out]    u0       returns the evaluated (univariate) polynomial
2164  *  @param[out]    a        returns the valid evaluation points. must have initial size equal
2165  *                          number of symbols-1 before calling generate_set
2166  */
2167 static void generate_set(const ex& u, const ex& vn, const exset& syms, const lst& f,
2168                          numeric& modulus, ex& u0, vector<numeric>& a)
2169 {
2170         const ex& x = *syms.begin();
2171         while ( true ) {
2172                 ++modulus;
2173                 // generate a set of integers ...
2174                 u0 = u;
2175                 ex vna = vn;
2176                 ex vnatry;
2177                 exset::const_iterator s = syms.begin();
2178                 ++s;
2179                 for ( size_t i=0; i<a.size(); ++i ) {
2180                         do {
2181                                 a[i] = mod(numeric(rand()), 2*modulus) - modulus;
2182                                 vnatry = vna.subs(*s == a[i]);
2183                                 // ... for which the leading coefficient doesn't vanish ...
2184                         } while ( vnatry == 0 );
2185                         vna = vnatry;
2186                         u0 = u0.subs(*s == a[i]);
2187                         ++s;
2188                 }
2189                 // ... for which u0 is square free ...
2190                 ex g = gcd(u0, u0.diff(ex_to<symbol>(x)));
2191                 if ( !is_a<numeric>(g) ) {
2192                         continue;
2193                 }
2194                 if ( !is_a<numeric>(vn) ) {
2195                         // ... and for which the evaluated factors have each an unique prime factor
2196                         lst fnum = f;
2197                         fnum.let_op(0) = fnum.op(0) * u0.content(x);
2198                         for ( size_t i=1; i<fnum.nops(); ++i ) {
2199                                 if ( !is_a<numeric>(fnum.op(i)) ) {
2200                                         s = syms.begin();
2201                                         ++s;
2202                                         for ( size_t j=0; j<a.size(); ++j, ++s ) {
2203                                                 fnum.let_op(i) = fnum.op(i).subs(*s == a[j]);
2204                                         }
2205                                 }
2206                         }
2207                         if ( checkdivisors(fnum) ) {
2208                                 continue;
2209                         }
2210                 }
2211                 // ok, we have a valid set now
2212                 return;
2213         }
2214 }
2215
2216 // forward declaration
2217 static ex factor_sqrfree(const ex& poly);
2218
2219 /** Multivariate factorization.
2220  *
2221  *  The implementation is based on the algorithm described in [Wan].
2222  *  An evaluation homomorphism (a set of integers) is determined that fulfills
2223  *  certain criteria. The evaluated polynomial is univariate and is factorized
2224  *  by factor_univariate(). The main work then is to find the correct leading
2225  *  coefficients of the univariate factors. They have to correspond to the
2226  *  factors of the (multivariate) leading coefficient of the input polynomial
2227  *  (as defined for a specific variable x). After that the Hensel lifting can be
2228  *  performed.
2229  *
2230  *  @param[in] poly  expanded, square free polynomial
2231  *  @param[in] syms  contains the symbols in the polynomial
2232  *  @return          factorized polynomial
2233  */
2234 static ex factor_multivariate(const ex& poly, const exset& syms)
2235 {
2236         exset::const_iterator s;
2237         const ex& x = *syms.begin();
2238
2239         // make polynomial primitive
2240         ex unit, cont, pp;
2241         poly.unitcontprim(x, unit, cont, pp);
2242         if ( !is_a<numeric>(cont) ) {
2243                 return factor_sqrfree(cont) * factor_sqrfree(pp);
2244         }
2245
2246         // factor leading coefficient
2247         ex vn = pp.collect(x).lcoeff(x);
2248         ex vnlst;
2249         if ( is_a<numeric>(vn) ) {
2250                 vnlst = lst(vn);
2251         }
2252         else {
2253                 ex vnfactors = factor(vn);
2254                 vnlst = put_factors_into_lst(vnfactors);
2255         }
2256
2257         const unsigned int maxtrials = 3;
2258         numeric modulus = (vnlst.nops() > 3) ? vnlst.nops() : 3;
2259         vector<numeric> a(syms.size()-1, 0);
2260
2261         // try now to factorize until we are successful
2262         while ( true ) {
2263
2264                 unsigned int trialcount = 0;
2265                 unsigned int prime;
2266                 int factor_count = 0;
2267                 int min_factor_count = -1;
2268                 ex u, delta;
2269                 ex ufac, ufaclst;
2270
2271                 // try several evaluation points to reduce the number of factors
2272                 while ( trialcount < maxtrials ) {
2273
2274                         // generate a set of valid evaluation points
2275                         generate_set(pp, vn, syms, ex_to<lst>(vnlst), modulus, u, a);
2276
2277                         ufac = factor_univariate(u, x, prime);
2278                         ufaclst = put_factors_into_lst(ufac);
2279                         factor_count = ufaclst.nops()-1;
2280                         delta = ufaclst.op(0);
2281
2282                         if ( factor_count <= 1 ) {
2283                                 // irreducible
2284                                 return poly;
2285                         }
2286                         if ( min_factor_count < 0 ) {
2287                                 // first time here
2288                                 min_factor_count = factor_count;
2289                         }
2290                         else if ( min_factor_count == factor_count ) {
2291                                 // one less to try
2292                                 ++trialcount;
2293                         }
2294                         else if ( min_factor_count > factor_count ) {
2295                                 // new minimum, reset trial counter
2296                                 min_factor_count = factor_count;
2297                                 trialcount = 0;
2298                         }
2299                 }
2300
2301                 // determine true leading coefficients for the Hensel lifting
2302                 vector<ex> C(factor_count);
2303                 if ( is_a<numeric>(vn) ) {
2304                         // easy case
2305                         for ( size_t i=1; i<ufaclst.nops(); ++i ) {
2306                                 C[i-1] = ufaclst.op(i).lcoeff(x);
2307                         }
2308                 }
2309                 else {
2310                         // difficult case.
2311                         // we use the property of the ftilde having a unique prime factor.
2312                         // details can be found in [Wan].
2313                         // calculate ftilde
2314                         vector<numeric> ftilde(vnlst.nops()-1);
2315                         for ( size_t i=0; i<ftilde.size(); ++i ) {
2316                                 ex ft = vnlst.op(i+1);
2317                                 s = syms.begin();
2318                                 ++s;
2319                                 for ( size_t j=0; j<a.size(); ++j ) {
2320                                         ft = ft.subs(*s == a[j]);
2321                                         ++s;
2322                                 }
2323                                 ftilde[i] = ex_to<numeric>(ft);
2324                         }
2325                         // calculate D and C
2326                         vector<bool> used_flag(ftilde.size(), false);
2327                         vector<ex> D(factor_count, 1);
2328                         if ( delta == 1 ) {
2329                                 for ( int i=0; i<factor_count; ++i ) {
2330                                         numeric prefac = ex_to<numeric>(ufaclst.op(i+1).lcoeff(x));
2331                                         for ( int j=ftilde.size()-1; j>=0; --j ) {
2332                                                 int count = 0;
2333                                                 while ( irem(prefac, ftilde[j]) == 0 ) {
2334                                                         prefac = iquo(prefac, ftilde[j]);
2335                                                         ++count;
2336                                                 }
2337                                                 if ( count ) {
2338                                                         used_flag[j] = true;
2339                                                         D[i] = D[i] * pow(vnlst.op(j+1), count);
2340                                                 }
2341                                         }
2342                                         C[i] = D[i] * prefac;
2343                                 }
2344                         }
2345                         else {
2346                                 for ( int i=0; i<factor_count; ++i ) {
2347                                         numeric prefac = ex_to<numeric>(ufaclst.op(i+1).lcoeff(x));
2348                                         for ( int j=ftilde.size()-1; j>=0; --j ) {
2349                                                 int count = 0;
2350                                                 while ( irem(prefac, ftilde[j]) == 0 ) {
2351                                                         prefac = iquo(prefac, ftilde[j]);
2352                                                         ++count;
2353                                                 }
2354                                                 while ( irem(ex_to<numeric>(delta)*prefac, ftilde[j]) == 0 ) {
2355                                                         numeric g = gcd(prefac, ex_to<numeric>(ftilde[j]));
2356                                                         prefac = iquo(prefac, g);
2357                                                         delta = delta / (ftilde[j]/g);
2358                                                         ufaclst.let_op(i+1) = ufaclst.op(i+1) * (ftilde[j]/g);
2359                                                         ++count;
2360                                                 }
2361                                                 if ( count ) {
2362                                                         used_flag[j] = true;
2363                                                         D[i] = D[i] * pow(vnlst.op(j+1), count);
2364                                                 }
2365                                         }
2366                                         C[i] = D[i] * prefac;
2367                                 }
2368                         }
2369                         // check if something went wrong
2370                         bool some_factor_unused = false;
2371                         for ( size_t i=0; i<used_flag.size(); ++i ) {
2372                                 if ( !used_flag[i] ) {
2373                                         some_factor_unused = true;
2374                                         break;
2375                                 }
2376                         }
2377                         if ( some_factor_unused ) {
2378                                 continue;
2379                         }
2380                 }
2381
2382                 // multiply the remaining content of the univariate polynomial into the
2383                 // first factor
2384                 if ( delta != 1 ) {
2385                         C[0] = C[0] * delta;
2386                         ufaclst.let_op(1) = ufaclst.op(1) * delta;
2387                 }
2388
2389                 // set up evaluation points
2390                 EvalPoint ep;
2391                 vector<EvalPoint> epv;
2392                 s = syms.begin();
2393                 ++s;
2394                 for ( size_t i=0; i<a.size(); ++i ) {
2395                         ep.x = *s++;
2396                         ep.evalpoint = a[i].to_int();
2397                         epv.push_back(ep);
2398                 }
2399
2400                 // calc bound p^l
2401                 int maxdeg = 0;
2402                 for ( int i=1; i<=factor_count; ++i ) {
2403                         if ( ufaclst.op(i).degree(x) > maxdeg ) {
2404                                 maxdeg = ufaclst[i].degree(x);
2405                         }
2406                 }
2407                 cl_I B = 2*calc_bound(u, x, maxdeg);
2408                 cl_I l = 1;
2409                 cl_I pl = prime;
2410                 while ( pl < B ) {
2411                         l = l + 1;
2412                         pl = pl * prime;
2413                 }
2414
2415                 // set up modular factors (mod p^l)
2416                 cl_modint_ring R = find_modint_ring(expt_pos(cl_I(prime),l));
2417                 upvec modfactors(ufaclst.nops()-1);
2418                 for ( size_t i=1; i<ufaclst.nops(); ++i ) {
2419                         umodpoly_from_ex(modfactors[i-1], ufaclst.op(i), x, R);
2420                 }
2421
2422                 // try Hensel lifting
2423                 ex res = hensel_multivar(pp, x, epv, prime, l, modfactors, C);
2424                 if ( res != lst() ) {
2425                         ex result = cont * unit;
2426                         for ( size_t i=0; i<res.nops(); ++i ) {
2427                                 result *= res.op(i).content(x) * res.op(i).unit(x);
2428                                 result *= res.op(i).primpart(x);
2429                         }
2430                         return result;
2431                 }
2432         }
2433 }
2434
2435 /** Finds all symbols in an expression. Used by factor_sqrfree() and factor().
2436  */
2437 struct find_symbols_map : public map_function {
2438         exset syms;
2439         ex operator()(const ex& e)
2440         {
2441                 if ( is_a<symbol>(e) ) {
2442                         syms.insert(e);
2443                         return e;
2444                 }
2445                 return e.map(*this);
2446         }
2447 };
2448
2449 /** Factorizes a polynomial that is square free. It calls either the univariate
2450  *  or the multivariate factorization functions.
2451  */
2452 static ex factor_sqrfree(const ex& poly)
2453 {
2454         // determine all symbols in poly
2455         find_symbols_map findsymbols;
2456         findsymbols(poly);
2457         if ( findsymbols.syms.size() == 0 ) {
2458                 return poly;
2459         }
2460
2461         if ( findsymbols.syms.size() == 1 ) {
2462                 // univariate case
2463                 const ex& x = *(findsymbols.syms.begin());
2464                 if ( poly.ldegree(x) > 0 ) {
2465                         // pull out direct factors
2466                         int ld = poly.ldegree(x);
2467                         ex res = factor_univariate(expand(poly/pow(x, ld)), x);
2468                         return res * pow(x,ld);
2469                 }
2470                 else {
2471                         ex res = factor_univariate(poly, x);
2472                         return res;
2473                 }
2474         }
2475
2476         // multivariate case
2477         ex res = factor_multivariate(poly, findsymbols.syms);
2478         return res;
2479 }
2480
2481 /** Map used by factor() when factor_options::all is given to access all
2482  *  subexpressions and to call factor() on them.
2483  */
2484 struct apply_factor_map : public map_function {
2485         unsigned options;
2486         apply_factor_map(unsigned options_) : options(options_) { }
2487         ex operator()(const ex& e)
2488         {
2489                 if ( e.info(info_flags::polynomial) ) {
2490                         return factor(e, options);
2491                 }
2492                 if ( is_a<add>(e) ) {
2493                         ex s1, s2;
2494                         for ( size_t i=0; i<e.nops(); ++i ) {
2495                                 if ( e.op(i).info(info_flags::polynomial) ) {
2496                                         s1 += e.op(i);
2497                                 }
2498                                 else {
2499                                         s2 += e.op(i);
2500                                 }
2501                         }
2502                         s1 = s1.eval();
2503                         s2 = s2.eval();
2504                         return factor(s1, options) + s2.map(*this);
2505                 }
2506                 return e.map(*this);
2507         }
2508 };
2509
2510 } // anonymous namespace
2511
2512 /** Interface function to the outside world. It checks the arguments, tries a
2513  *  square free factorization, and then calls factor_sqrfree to do the hard
2514  *  work.
2515  */
2516 ex factor(const ex& poly, unsigned options)
2517 {
2518         // check arguments
2519         if ( !poly.info(info_flags::polynomial) ) {
2520                 if ( options & factor_options::all ) {
2521                         options &= ~factor_options::all;
2522                         apply_factor_map factor_map(options);
2523                         return factor_map(poly);
2524                 }
2525                 return poly;
2526         }
2527
2528         // determine all symbols in poly
2529         find_symbols_map findsymbols;
2530         findsymbols(poly);
2531         if ( findsymbols.syms.size() == 0 ) {
2532                 return poly;
2533         }
2534         lst syms;
2535         exset::const_iterator i=findsymbols.syms.begin(), end=findsymbols.syms.end();
2536         for ( ; i!=end; ++i ) {
2537                 syms.append(*i);
2538         }
2539
2540         // make poly square free
2541         ex sfpoly = sqrfree(poly.expand(), syms);
2542
2543         // factorize the square free components
2544         if ( is_a<power>(sfpoly) ) {
2545                 // case: (polynomial)^exponent
2546                 const ex& base = sfpoly.op(0);
2547                 if ( !is_a<add>(base) ) {
2548                         // simple case: (monomial)^exponent
2549                         return sfpoly;
2550                 }
2551                 ex f = factor_sqrfree(base);
2552                 return pow(f, sfpoly.op(1));
2553         }
2554         if ( is_a<mul>(sfpoly) ) {
2555                 // case: multiple factors
2556                 ex res = 1;
2557                 for ( size_t i=0; i<sfpoly.nops(); ++i ) {
2558                         const ex& t = sfpoly.op(i);
2559                         if ( is_a<power>(t) ) {
2560                                 const ex& base = t.op(0);
2561                                 if ( !is_a<add>(base) ) {
2562                                         res *= t;
2563                                 }
2564                                 else {
2565                                         ex f = factor_sqrfree(base);
2566                                         res *= pow(f, t.op(1));
2567                                 }
2568                         }
2569                         else if ( is_a<add>(t) ) {
2570                                 ex f = factor_sqrfree(t);
2571                                 res *= f;
2572                         }
2573                         else {
2574                                 res *= t;
2575                         }
2576                 }
2577                 return res;
2578         }
2579         if ( is_a<symbol>(sfpoly) ) {
2580                 return poly;
2581         }
2582         // case: (polynomial)
2583         ex f = factor_sqrfree(sfpoly);
2584         return f;
2585 }
2586
2587 } // namespace GiNaC
2588
2589 #ifdef DEBUGFACTOR
2590 #include "test.h"
2591 #endif