ginac/factor.cpp

   1 /** @file factor.cpp
   2  *
   3  *  Polynomial factorization (implementation).
   4  *
   5  *  The interface function factor() at the end of this file is defined in the
   6  *  GiNaC namespace. All other utility functions and classes are defined in an
   7  *  additional anonymous namespace.
   8  *
   9  *  Factorization starts by doing a square free factorization and making the
  10  *  coefficients integer. Then, depending on the number of free variables it
  11  *  proceeds either in dedicated univariate or multivariate factorization code.
  12  *
  13  *  Univariate factorization does a modular factorization via Berlekamp's
  14  *  algorithm and distinct degree factorization. Hensel lifting is used at the
  15  *  end.
  16  *
  17  *  Multivariate factorization uses the univariate factorization (applying a
  18  *  evaluation homomorphism first) and Hensel lifting raises the answer to the
  19  *  multivariate domain. The Hensel lifting code is completely distinct from the
  20  *  code used by the univariate factorization.
  21  *
  22  *  Algorithms used can be found in
  23  *    [Wan] An Improved Multivariate Polynomial Factoring Algorithm,
  24  *          P.S.Wang,
  25  *          Mathematics of Computation, Vol. 32, No. 144 (1978) 1215--1231.
  26  *    [GCL] Algorithms for Computer Algebra,
  27  *          K.O.Geddes, S.R.Czapor, G.Labahn,
  28  *          Springer Verlag, 1992.
  29  *    [Mig] Some Useful Bounds,
  30  *          M.Mignotte,
  31  *          In "Computer Algebra, Symbolic and Algebraic Computation" (B.Buchberger et al., eds.),
  32  *          pp. 259-263, Springer-Verlag, New York, 1982.
  33  */
  34
  35 /*
  36  *  GiNaC Copyright (C) 1999-2009 Johannes Gutenberg University Mainz, Germany
  37  *
  38  *  This program is free software; you can redistribute it and/or modify
  39  *  it under the terms of the GNU General Public License as published by
  40  *  the Free Software Foundation; either version 2 of the License, or
  41  *  (at your option) any later version.
  42  *
  43  *  This program is distributed in the hope that it will be useful,
  44  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  45  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  46  *  GNU General Public License for more details.
  47  *
  48  *  You should have received a copy of the GNU General Public License
  49  *  along with this program; if not, write to the Free Software
  50  *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
  51  */
  52
  53 //#define DEBUGFACTOR
  54
  55 #include "factor.h"
  56
  57 #include "ex.h"
  58 #include "numeric.h"
  59 #include "operators.h"
  60 #include "inifcns.h"
  61 #include "symbol.h"
  62 #include "relational.h"
  63 #include "power.h"
  64 #include "mul.h"
  65 #include "normal.h"
  66 #include "add.h"
  67
  68 #include <algorithm>
  69 #include <cmath>
  70 #include <limits>
  71 #include <list>
  72 #include <vector>
  73 #ifdef DEBUGFACTOR
  74 #include <ostream>
  75 #endif
  76 using namespace std;
  77
  78 #include <cln/cln.h>
  79 using namespace cln;
  80
  81 namespace GiNaC {
  82
  83 #ifdef DEBUGFACTOR
  84 #define DCOUT(str) cout << #str << endl
  85 #define DCOUTVAR(var) cout << #var << ": " << var << endl
  86 #define DCOUT2(str,var) cout << #str << ": " << var << endl
  87 ostream& operator<<(ostream& o, const vector<int>& v)
  88 {
  89         vector<int>::const_iterator i = v.begin(), end = v.end();
  90         while ( i != end ) {
  91                 o << *i++ << " ";
  92         }
  93         return o;
  94 }
  95 static ostream& operator<<(ostream& o, const vector<cl_I>& v)
  96 {
  97         vector<cl_I>::const_iterator i = v.begin(), end = v.end();
  98         while ( i != end ) {
  99                 o << *i << "[" << i-v.begin() << "]" << " ";
 100                 ++i;
 101         }
 102         return o;
 103 }
 104 static ostream& operator<<(ostream& o, const vector<cl_MI>& v)
 105 {
 106         vector<cl_MI>::const_iterator i = v.begin(), end = v.end();
 107         while ( i != end ) {
 108                 o << *i << "[" << i-v.begin() << "]" << " ";
 109                 ++i;
 110         }
 111         return o;
 112 }
 113 ostream& operator<<(ostream& o, const vector<numeric>& v)
 114 {
 115         for ( size_t i=0; i<v.size(); ++i ) {
 116                 o << v[i] << " ";
 117         }
 118         return o;
 119 }
 120 ostream& operator<<(ostream& o, const vector< vector<cl_MI> >& v)
 121 {
 122         vector< vector<cl_MI> >::const_iterator i = v.begin(), end = v.end();
 123         while ( i != end ) {
 124                 o << i-v.begin() << ": " << *i << endl;
 125                 ++i;
 126         }
 127         return o;
 128 }
 129 #else
 130 #define DCOUT(str)
 131 #define DCOUTVAR(var)
 132 #define DCOUT2(str,var)
 133 #endif // def DEBUGFACTOR
 134
 135 // anonymous namespace to hide all utility functions
 136 namespace {
 137
 138 ////////////////////////////////////////////////////////////////////////////////
 139 // modular univariate polynomial code
 140
 141 typedef std::vector<cln::cl_MI> umodpoly;
 142 typedef std::vector<cln::cl_I> upoly;
 143 typedef vector<umodpoly> upvec;
 144
 145 // COPY FROM UPOLY.HPP
 146
 147 // CHANGED size_t -> int !!!
 148 template<typename T> static int degree(const T& p)
 149 {
 150         return p.size() - 1;
 151 }
 152
 153 template<typename T> static typename T::value_type lcoeff(const T& p)
 154 {
 155         return p[p.size() - 1];
 156 }
 157
 158 static bool normalize_in_field(umodpoly& a)
 159 {
 160         if (a.size() == 0)
 161                 return true;
 162         if ( lcoeff(a) == a[0].ring()->one() ) {
 163                 return true;
 164         }
 165
 166         const cln::cl_MI lc_1 = recip(lcoeff(a));
 167         for (std::size_t k = a.size(); k-- != 0; )
 168                 a[k] = a[k]*lc_1;
 169         return false;
 170 }
 171
 172 template<typename T> static void
 173 canonicalize(T& p, const typename T::size_type hint = std::numeric_limits<typename T::size_type>::max())
 174 {
 175         if (p.empty())
 176                 return;
 177
 178         std::size_t i = p.size() - 1;
 179         // Be fast if the polynomial is already canonicalized
 180         if (!zerop(p[i]))
 181                 return;
 182
 183         if (hint < p.size())
 184                 i = hint;
 185
 186         bool is_zero = false;
 187         do {
 188                 if (!zerop(p[i])) {
 189                         ++i;
 190                         break;
 191                 }
 192                 if (i == 0) {
 193                         is_zero = true;
 194                         break;
 195                 }
 196                 --i;
 197         } while (true);
 198
 199         if (is_zero) {
 200                 p.clear();
 201                 return;
 202         }
 203
 204         p.erase(p.begin() + i, p.end());
 205 }
 206
 207 // END COPY FROM UPOLY.HPP
 208
 209 static void expt_pos(umodpoly& a, unsigned int q)
 210 {
 211         if ( a.empty() ) return;
 212         cl_MI zero = a[0].ring()->zero();
 213         int deg = degree(a);
 214         a.resize(degree(a)*q+1, zero);
 215         for ( int i=deg; i>0; --i ) {
 216                 a[i*q] = a[i];
 217                 a[i] = zero;
 218         }
 219 }
 220
 221 template<bool COND, typename T = void> struct enable_if
 222 {
 223         typedef T type;
 224 };
 225
 226 template<typename T> struct enable_if<false, T> { /* empty */ };
 227
 228 template<typename T> struct uvar_poly_p
 229 {
 230         static const bool value = false;
 231 };
 232
 233 template<> struct uvar_poly_p<upoly>
 234 {
 235         static const bool value = true;
 236 };
 237
 238 template<> struct uvar_poly_p<umodpoly>
 239 {
 240         static const bool value = true;
 241 };
 242
 243 template<typename T>
 244 // Don't define this for anything but univariate polynomials.
 245 static typename enable_if<uvar_poly_p<T>::value, T>::type
 246 operator+(const T& a, const T& b)
 247 {
 248         int sa = a.size();
 249         int sb = b.size();
 250         if ( sa >= sb ) {
 251                 T r(sa);
 252                 int i = 0;
 253                 for ( ; i<sb; ++i ) {
 254                         r[i] = a[i] + b[i];
 255                 }
 256                 for ( ; i<sa; ++i ) {
 257                         r[i] = a[i];
 258                 }
 259                 canonicalize(r);
 260                 return r;
 261         }
 262         else {
 263                 T r(sb);
 264                 int i = 0;
 265                 for ( ; i<sa; ++i ) {
 266                         r[i] = a[i] + b[i];
 267                 }
 268                 for ( ; i<sb; ++i ) {
 269                         r[i] = b[i];
 270                 }
 271                 canonicalize(r);
 272                 return r;
 273         }
 274 }
 275
 276 template<typename T>
 277 // Don't define this for anything but univariate polynomials. Otherwise
 278 // overload resolution might fail (this actually happens when compiling
 279 // GiNaC with g++ 3.4).
 280 static typename enable_if<uvar_poly_p<T>::value, T>::type
 281 operator-(const T& a, const T& b)
 282 {
 283         int sa = a.size();
 284         int sb = b.size();
 285         if ( sa >= sb ) {
 286                 T r(sa);
 287                 int i = 0;
 288                 for ( ; i<sb; ++i ) {
 289                         r[i] = a[i] - b[i];
 290                 }
 291                 for ( ; i<sa; ++i ) {
 292                         r[i] = a[i];
 293                 }
 294                 canonicalize(r);
 295                 return r;
 296         }
 297         else {
 298                 T r(sb);
 299                 int i = 0;
 300                 for ( ; i<sa; ++i ) {
 301                         r[i] = a[i] - b[i];
 302                 }
 303                 for ( ; i<sb; ++i ) {
 304                         r[i] = -b[i];
 305                 }
 306                 canonicalize(r);
 307                 return r;
 308         }
 309 }
 310
 311 static upoly operator*(const upoly& a, const upoly& b)
 312 {
 313         upoly c;
 314         if ( a.empty() || b.empty() ) return c;
 315
 316         int n = degree(a) + degree(b);
 317         c.resize(n+1, 0);
 318         for ( int i=0 ; i<=n; ++i ) {
 319                 for ( int j=0 ; j<=i; ++j ) {
 320                         if ( j > degree(a) || (i-j) > degree(b) ) continue;
 321                         c[i] = c[i] + a[j] * b[i-j];
 322                 }
 323         }
 324         canonicalize(c);
 325         return c;
 326 }
 327
 328 static umodpoly operator*(const umodpoly& a, const umodpoly& b)
 329 {
 330         umodpoly c;
 331         if ( a.empty() || b.empty() ) return c;
 332
 333         int n = degree(a) + degree(b);
 334         c.resize(n+1, a[0].ring()->zero());
 335         for ( int i=0 ; i<=n; ++i ) {
 336                 for ( int j=0 ; j<=i; ++j ) {
 337                         if ( j > degree(a) || (i-j) > degree(b) ) continue;
 338                         c[i] = c[i] + a[j] * b[i-j];
 339                 }
 340         }
 341         canonicalize(c);
 342         return c;
 343 }
 344
 345 static upoly operator*(const upoly& a, const cl_I& x)
 346 {
 347         if ( zerop(x) ) {
 348                 upoly r;
 349                 return r;
 350         }
 351         upoly r(a.size());
 352         for ( size_t i=0; i<a.size(); ++i ) {
 353                 r[i] = a[i] * x;
 354         }
 355         return r;
 356 }
 357
 358 static upoly operator/(const upoly& a, const cl_I& x)
 359 {
 360         if ( zerop(x) ) {
 361                 upoly r;
 362                 return r;
 363         }
 364         upoly r(a.size());
 365         for ( size_t i=0; i<a.size(); ++i ) {
 366                 r[i] = exquo(a[i],x);
 367         }
 368         return r;
 369 }
 370
 371 static umodpoly operator*(const umodpoly& a, const cl_MI& x)
 372 {
 373         umodpoly r(a.size());
 374         for ( size_t i=0; i<a.size(); ++i ) {
 375                 r[i] = a[i] * x;
 376         }
 377         canonicalize(r);
 378         return r;
 379 }
 380
 381 static void upoly_from_ex(upoly& up, const ex& e, const ex& x)
 382 {
 383         // assert: e is in Z[x]
 384         int deg = e.degree(x);
 385         up.resize(deg+1);
 386         int ldeg = e.ldegree(x);
 387         for ( ; deg>=ldeg; --deg ) {
 388                 up[deg] = the<cl_I>(ex_to<numeric>(e.coeff(x, deg)).to_cl_N());
 389         }
 390         for ( ; deg>=0; --deg ) {
 391                 up[deg] = 0;
 392         }
 393         canonicalize(up);
 394 }
 395
 396 static void umodpoly_from_upoly(umodpoly& ump, const upoly& e, const cl_modint_ring& R)
 397 {
 398         int deg = degree(e);
 399         ump.resize(deg+1);
 400         for ( ; deg>=0; --deg ) {
 401                 ump[deg] = R->canonhom(e[deg]);
 402         }
 403         canonicalize(ump);
 404 }
 405
 406 static void umodpoly_from_ex(umodpoly& ump, const ex& e, const ex& x, const cl_modint_ring& R)
 407 {
 408         // assert: e is in Z[x]
 409         int deg = e.degree(x);
 410         ump.resize(deg+1);
 411         int ldeg = e.ldegree(x);
 412         for ( ; deg>=ldeg; --deg ) {
 413                 cl_I coeff = the<cl_I>(ex_to<numeric>(e.coeff(x, deg)).to_cl_N());
 414                 ump[deg] = R->canonhom(coeff);
 415         }
 416         for ( ; deg>=0; --deg ) {
 417                 ump[deg] = R->zero();
 418         }
 419         canonicalize(ump);
 420 }
 421
 422 #ifdef DEBUGFACTOR
 423 static void umodpoly_from_ex(umodpoly& ump, const ex& e, const ex& x, const cl_I& modulus)
 424 {
 425         umodpoly_from_ex(ump, e, x, find_modint_ring(modulus));
 426 }
 427 #endif
 428
 429 static ex upoly_to_ex(const upoly& a, const ex& x)
 430 {
 431         if ( a.empty() ) return 0;
 432         ex e;
 433         for ( int i=degree(a); i>=0; --i ) {
 434                 e += numeric(a[i]) * pow(x, i);
 435         }
 436         return e;
 437 }
 438
 439 static ex umodpoly_to_ex(const umodpoly& a, const ex& x)
 440 {
 441         if ( a.empty() ) return 0;
 442         cl_modint_ring R = a[0].ring();
 443         cl_I mod = R->modulus;
 444         cl_I halfmod = (mod-1) >> 1;
 445         ex e;
 446         for ( int i=degree(a); i>=0; --i ) {
 447                 cl_I n = R->retract(a[i]);
 448                 if ( n > halfmod ) {
 449                         e += numeric(n-mod) * pow(x, i);
 450                 } else {
 451                         e += numeric(n) * pow(x, i);
 452                 }
 453         }
 454         return e;
 455 }
 456
 457 static upoly umodpoly_to_upoly(const umodpoly& a)
 458 {
 459         upoly e(a.size());
 460         if ( a.empty() ) return e;
 461         cl_modint_ring R = a[0].ring();
 462         cl_I mod = R->modulus;
 463         cl_I halfmod = (mod-1) >> 1;
 464         for ( int i=degree(a); i>=0; --i ) {
 465                 cl_I n = R->retract(a[i]);
 466                 if ( n > halfmod ) {
 467                         e[i] = n-mod;
 468                 } else {
 469                         e[i] = n;
 470                 }
 471         }
 472         return e;
 473 }
 474
 475 static umodpoly umodpoly_to_umodpoly(const umodpoly& a, const cl_modint_ring& R, unsigned int m)
 476 {
 477         umodpoly e;
 478         if ( a.empty() ) return e;
 479         cl_modint_ring oldR = a[0].ring();
 480         size_t sa = a.size();
 481         e.resize(sa+m, R->zero());
 482         for ( size_t i=0; i<sa; ++i ) {
 483                 e[i+m] = R->canonhom(oldR->retract(a[i]));
 484         }
 485         canonicalize(e);
 486         return e;
 487 }
 488
 489 /** Divides all coefficients of the polynomial a by the integer x.
 490  *  All coefficients are supposed to be divisible by x. If they are not, the
 491  *  the<cl_I> cast will raise an exception.
 492  *
 493  *  @param[in,out] a  polynomial of which the coefficients will be reduced by x
 494  *  @param[in]     x  integer that divides the coefficients
 495  */
 496 static void reduce_coeff(umodpoly& a, const cl_I& x)
 497 {
 498         if ( a.empty() ) return;
 499
 500         cl_modint_ring R = a[0].ring();
 501         umodpoly::iterator i = a.begin(), end = a.end();
 502         for ( ; i!=end; ++i ) {
 503                 // cln cannot perform this division in the modular field
 504                 cl_I c = R->retract(*i);
 505                 *i = cl_MI(R, the<cl_I>(c / x));
 506         }
 507 }
 508
 509 /** Calculates remainder of a/b.
 510  *  Assertion: a and b not empty.
 511  *
 512  *  @param[in]  a  polynomial dividend
 513  *  @param[in]  b  polynomial divisor
 514  *  @param[out] r  polynomial remainder
 515  */
 516 static void rem(const umodpoly& a, const umodpoly& b, umodpoly& r)
 517 {
 518         int k, n;
 519         n = degree(b);
 520         k = degree(a) - n;
 521         r = a;
 522         if ( k < 0 ) return;
 523
 524         do {
 525                 cl_MI qk = div(r[n+k], b[n]);
 526                 if ( !zerop(qk) ) {
 527                         for ( int i=0; i<n; ++i ) {
 528                                 unsigned int j = n + k - 1 - i;
 529                                 r[j] = r[j] - qk * b[j-k];
 530                         }
 531                 }
 532         } while ( k-- );
 533
 534         fill(r.begin()+n, r.end(), a[0].ring()->zero());
 535         canonicalize(r);
 536 }
 537
 538 /** Calculates quotient of a/b.
 539  *  Assertion: a and b not empty.
 540  *
 541  *  @param[in]  a  polynomial dividend
 542  *  @param[in]  b  polynomial divisor
 543  *  @param[out] q  polynomial quotient
 544  */
 545 static void div(const umodpoly& a, const umodpoly& b, umodpoly& q)
 546 {
 547         int k, n;
 548         n = degree(b);
 549         k = degree(a) - n;
 550         q.clear();
 551         if ( k < 0 ) return;
 552
 553         umodpoly r = a;
 554         q.resize(k+1, a[0].ring()->zero());
 555         do {
 556                 cl_MI qk = div(r[n+k], b[n]);
 557                 if ( !zerop(qk) ) {
 558                         q[k] = qk;
 559                         for ( int i=0; i<n; ++i ) {
 560                                 unsigned int j = n + k - 1 - i;
 561                                 r[j] = r[j] - qk * b[j-k];
 562                         }
 563                 }
 564         } while ( k-- );
 565
 566         canonicalize(q);
 567 }
 568
 569 /** Calculates quotient and remainder of a/b.
 570  *  Assertion: a and b not empty.
 571  *
 572  *  @param[in]  a  polynomial dividend
 573  *  @param[in]  b  polynomial divisor
 574  *  @param[out] r  polynomial remainder
 575  *  @param[out] q  polynomial quotient
 576  */
 577 static void remdiv(const umodpoly& a, const umodpoly& b, umodpoly& r, umodpoly& q)
 578 {
 579         int k, n;
 580         n = degree(b);
 581         k = degree(a) - n;
 582         q.clear();
 583         r = a;
 584         if ( k < 0 ) return;
 585
 586         q.resize(k+1, a[0].ring()->zero());
 587         do {
 588                 cl_MI qk = div(r[n+k], b[n]);
 589                 if ( !zerop(qk) ) {
 590                         q[k] = qk;
 591                         for ( int i=0; i<n; ++i ) {
 592                                 unsigned int j = n + k - 1 - i;
 593                                 r[j] = r[j] - qk * b[j-k];
 594                         }
 595                 }
 596         } while ( k-- );
 597
 598         fill(r.begin()+n, r.end(), a[0].ring()->zero());
 599         canonicalize(r);
 600         canonicalize(q);
 601 }
 602
 603 /** Calculates the GCD of polynomial a and b.
 604  *
 605  *  @param[in]  a  polynomial
 606  *  @param[in]  b  polynomial
 607  *  @param[out] c  GCD
 608  */
 609 static void gcd(const umodpoly& a, const umodpoly& b, umodpoly& c)
 610 {
 611         if ( degree(a) < degree(b) ) return gcd(b, a, c);
 612
 613         c = a;
 614         normalize_in_field(c);
 615         umodpoly d = b;
 616         normalize_in_field(d);
 617         umodpoly r;
 618         while ( !d.empty() ) {
 619                 rem(c, d, r);
 620                 c = d;
 621                 d = r;
 622         }
 623         normalize_in_field(c);
 624 }
 625
 626 /** Calculates the derivative of the polynomial a.
 627  *
 628  *  @param[in]  a  polynomial of which to take the derivative
 629  *  @param[out] d  result/derivative
 630  */
 631 static void deriv(const umodpoly& a, umodpoly& d)
 632 {
 633         d.clear();
 634         if ( a.size() <= 1 ) return;
 635
 636         d.insert(d.begin(), a.begin()+1, a.end());
 637         int max = d.size();
 638         for ( int i=1; i<max; ++i ) {
 639                 d[i] = d[i] * (i+1);
 640         }
 641         canonicalize(d);
 642 }
 643
 644 static bool unequal_one(const umodpoly& a)
 645 {
 646         if ( a.empty() ) return true;
 647         return ( a.size() != 1 || a[0] != a[0].ring()->one() );
 648 }
 649
 650 static bool equal_one(const umodpoly& a)
 651 {
 652         return ( a.size() == 1 && a[0] == a[0].ring()->one() );
 653 }
 654
 655 /** Returns true if polynomial a is square free.
 656  *
 657  *  @param[in] a  polynomial to check
 658  *  @return       true if polynomial is square free, false otherwise
 659  */
 660 static bool squarefree(const umodpoly& a)
 661 {
 662         umodpoly b;
 663         deriv(a, b);
 664         if ( b.empty() ) {
 665                 return false;
 666         }
 667         umodpoly c;
 668         gcd(a, b, c);
 669         return equal_one(c);
 670 }
 671
 672 // END modular univariate polynomial code
 673 ////////////////////////////////////////////////////////////////////////////////
 674
 675 ////////////////////////////////////////////////////////////////////////////////
 676 // modular matrix
 677
 678 typedef vector<cl_MI> mvec;
 679
 680 class modular_matrix
 681 {
 682         friend ostream& operator<<(ostream& o, const modular_matrix& m);
 683 public:
 684         modular_matrix(size_t r_, size_t c_, const cl_MI& init) : r(r_), c(c_)
 685         {
 686                 m.resize(c*r, init);
 687         }
 688         size_t rowsize() const { return r; }
 689         size_t colsize() const { return c; }
 690         cl_MI& operator()(size_t row, size_t col) { return m[row*c + col]; }
 691         cl_MI operator()(size_t row, size_t col) const { return m[row*c + col]; }
 692         void mul_col(size_t col, const cl_MI x)
 693         {
 694                 mvec::iterator i = m.begin() + col;
 695                 for ( size_t rc=0; rc<r; ++rc ) {
 696                         *i = *i * x;
 697                         i += c;
 698                 }
 699         }
 700         void sub_col(size_t col1, size_t col2, const cl_MI fac)
 701         {
 702                 mvec::iterator i1 = m.begin() + col1;
 703                 mvec::iterator i2 = m.begin() + col2;
 704                 for ( size_t rc=0; rc<r; ++rc ) {
 705                         *i1 = *i1 - *i2 * fac;
 706                         i1 += c;
 707                         i2 += c;
 708                 }
 709         }
 710         void switch_col(size_t col1, size_t col2)
 711         {
 712                 cl_MI buf;
 713                 mvec::iterator i1 = m.begin() + col1;
 714                 mvec::iterator i2 = m.begin() + col2;
 715                 for ( size_t rc=0; rc<r; ++rc ) {
 716                         buf = *i1; *i1 = *i2; *i2 = buf;
 717                         i1 += c;
 718                         i2 += c;
 719                 }
 720         }
 721         void mul_row(size_t row, const cl_MI x)
 722         {
 723                 vector<cl_MI>::iterator i = m.begin() + row*c;
 724                 for ( size_t cc=0; cc<c; ++cc ) {
 725                         *i = *i * x;
 726                         ++i;
 727                 }
 728         }
 729         void sub_row(size_t row1, size_t row2, const cl_MI fac)
 730         {
 731                 vector<cl_MI>::iterator i1 = m.begin() + row1*c;
 732                 vector<cl_MI>::iterator i2 = m.begin() + row2*c;
 733                 for ( size_t cc=0; cc<c; ++cc ) {
 734                         *i1 = *i1 - *i2 * fac;
 735                         ++i1;
 736                         ++i2;
 737                 }
 738         }
 739         void switch_row(size_t row1, size_t row2)
 740         {
 741                 cl_MI buf;
 742                 vector<cl_MI>::iterator i1 = m.begin() + row1*c;
 743                 vector<cl_MI>::iterator i2 = m.begin() + row2*c;
 744                 for ( size_t cc=0; cc<c; ++cc ) {
 745                         buf = *i1; *i1 = *i2; *i2 = buf;
 746                         ++i1;
 747                         ++i2;
 748                 }
 749         }
 750         bool is_col_zero(size_t col) const
 751         {
 752                 mvec::const_iterator i = m.begin() + col;
 753                 for ( size_t rr=0; rr<r; ++rr ) {
 754                         if ( !zerop(*i) ) {
 755                                 return false;
 756                         }
 757                         i += c;
 758                 }
 759                 return true;
 760         }
 761         bool is_row_zero(size_t row) const
 762         {
 763                 mvec::const_iterator i = m.begin() + row*c;
 764                 for ( size_t cc=0; cc<c; ++cc ) {
 765                         if ( !zerop(*i) ) {
 766                                 return false;
 767                         }
 768                         ++i;
 769                 }
 770                 return true;
 771         }
 772         void set_row(size_t row, const vector<cl_MI>& newrow)
 773         {
 774                 mvec::iterator i1 = m.begin() + row*c;
 775                 mvec::const_iterator i2 = newrow.begin(), end = newrow.end();
 776                 for ( ; i2 != end; ++i1, ++i2 ) {
 777                         *i1 = *i2;
 778                 }
 779         }
 780         mvec::const_iterator row_begin(size_t row) const { return m.begin()+row*c; }
 781         mvec::const_iterator row_end(size_t row) const { return m.begin()+row*c+r; }
 782 private:
 783         size_t r, c;
 784         mvec m;
 785 };
 786
 787 #ifdef DEBUGFACTOR
 788 modular_matrix operator*(const modular_matrix& m1, const modular_matrix& m2)
 789 {
 790         const unsigned int r = m1.rowsize();
 791         const unsigned int c = m2.colsize();
 792         modular_matrix o(r,c,m1(0,0));
 793
 794         for ( size_t i=0; i<r; ++i ) {
 795                 for ( size_t j=0; j<c; ++j ) {
 796                         cl_MI buf;
 797                         buf = m1(i,0) * m2(0,j);
 798                         for ( size_t k=1; k<c; ++k ) {
 799                                 buf = buf + m1(i,k)*m2(k,j);
 800                         }
 801                         o(i,j) = buf;
 802                 }
 803         }
 804         return o;
 805 }
 806
 807 ostream& operator<<(ostream& o, const modular_matrix& m)
 808 {
 809         cl_modint_ring R = m(0,0).ring();
 810         o << "{";
 811         for ( size_t i=0; i<m.rowsize(); ++i ) {
 812                 o << "{";
 813                 for ( size_t j=0; j<m.colsize()-1; ++j ) {
 814                         o << R->retract(m(i,j)) << ",";
 815                 }
 816                 o << R->retract(m(i,m.colsize()-1)) << "}";
 817                 if ( i != m.rowsize()-1 ) {
 818                         o << ",";
 819                 }
 820         }
 821         o << "}";
 822         return o;
 823 }
 824 #endif // def DEBUGFACTOR
 825
 826 // END modular matrix
 827 ////////////////////////////////////////////////////////////////////////////////
 828
 829 /** Calculates the Q matrix for a polynomial. Used by Berlekamp's algorithm.
 830  *
 831  *  @param[in]  a_  modular polynomial
 832  *  @param[out] Q   Q matrix
 833  */
 834 static void q_matrix(const umodpoly& a_, modular_matrix& Q)
 835 {
 836         umodpoly a = a_;
 837         normalize_in_field(a);
 838
 839         int n = degree(a);
 840         unsigned int q = cl_I_to_uint(a[0].ring()->modulus);
 841         umodpoly r(n, a[0].ring()->zero());
 842         r[0] = a[0].ring()->one();
 843         Q.set_row(0, r);
 844         unsigned int max = (n-1) * q;
 845         for ( size_t m=1; m<=max; ++m ) {
 846                 cl_MI rn_1 = r.back();
 847                 for ( size_t i=n-1; i>0; --i ) {
 848                         r[i] = r[i-1] - (rn_1 * a[i]);
 849                 }
 850                 r[0] = -rn_1 * a[0];
 851                 if ( (m % q) == 0 ) {
 852                         Q.set_row(m/q, r);
 853                 }
 854         }
 855 }
 856
 857 /** Determine the nullspace of a matrix M-1.
 858  *
 859  *  @param[in,out] M      matrix, will be modified
 860  *  @param[out]    basis  calculated nullspace of M-1
 861  */
 862 static void nullspace(modular_matrix& M, vector<mvec>& basis)
 863 {
 864         const size_t n = M.rowsize();
 865         const cl_MI one = M(0,0).ring()->one();
 866         for ( size_t i=0; i<n; ++i ) {
 867                 M(i,i) = M(i,i) - one;
 868         }
 869         for ( size_t r=0; r<n; ++r ) {
 870                 size_t cc = 0;
 871                 for ( ; cc<n; ++cc ) {
 872                         if ( !zerop(M(r,cc)) ) {
 873                                 if ( cc < r ) {
 874                                         if ( !zerop(M(cc,cc)) ) {
 875                                                 continue;
 876                                         }
 877                                         M.switch_col(cc, r);
 878                                 }
 879                                 else if ( cc > r ) {
 880                                         M.switch_col(cc, r);
 881                                 }
 882                                 break;
 883                         }
 884                 }
 885                 if ( cc < n ) {
 886                         M.mul_col(r, recip(M(r,r)));
 887                         for ( cc=0; cc<n; ++cc ) {
 888                                 if ( cc != r ) {
 889                                         M.sub_col(cc, r, M(r,cc));
 890                                 }
 891                         }
 892                 }
 893         }
 894
 895         for ( size_t i=0; i<n; ++i ) {
 896                 M(i,i) = M(i,i) - one;
 897         }
 898         for ( size_t i=0; i<n; ++i ) {
 899                 if ( !M.is_row_zero(i) ) {
 900                         mvec nu(M.row_begin(i), M.row_end(i));
 901                         basis.push_back(nu);
 902                 }
 903         }
 904 }
 905
 906 /** Berlekamp's modular factorization.
 907  *
 908  *  The implementation follows the algorithm in chapter 8 of [GCL].
 909  *
 910  *  @param[in]  a    modular polynomial
 911  *  @param[out] upv  vector containing modular factors. if upv was not empty the
 912  *                   new elements are added at the end
 913  */
 914 static void berlekamp(const umodpoly& a, upvec& upv)
 915 {
 916         cl_modint_ring R = a[0].ring();
 917         umodpoly one(1, R->one());
 918
 919         // find nullspace of Q matrix
 920         modular_matrix Q(degree(a), degree(a), R->zero());
 921         q_matrix(a, Q);
 922         vector<mvec> nu;
 923         nullspace(Q, nu);
 924
 925         const unsigned int k = nu.size();
 926         if ( k == 1 ) {
 927                 // irreducible
 928                 return;
 929         }
 930
 931         list<umodpoly> factors;
 932         factors.push_back(a);
 933         unsigned int size = 1;
 934         unsigned int r = 1;
 935         unsigned int q = cl_I_to_uint(R->modulus);
 936
 937         list<umodpoly>::iterator u = factors.begin();
 938
 939         // calculate all gcd's
 940         while ( true ) {
 941                 for ( unsigned int s=0; s<q; ++s ) {
 942                         umodpoly nur = nu[r];
 943                         nur[0] = nur[0] - cl_MI(R, s);
 944                         canonicalize(nur);
 945                         umodpoly g;
 946                         gcd(nur, *u, g);
 947                         if ( unequal_one(g) && g != *u ) {
 948                                 umodpoly uo;
 949                                 div(*u, g, uo);
 950                                 if ( equal_one(uo) ) {
 951                                         throw logic_error("berlekamp: unexpected divisor.");
 952                                 }
 953                                 else {
 954                                         *u = uo;
 955                                 }
 956                                 factors.push_back(g);
 957                                 size = 0;
 958                                 list<umodpoly>::const_iterator i = factors.begin(), end = factors.end();
 959                                 while ( i != end ) {
 960                                         if ( degree(*i) ) ++size;
 961                                         ++i;
 962                                 }
 963                                 if ( size == k ) {
 964                                         list<umodpoly>::const_iterator i = factors.begin(), end = factors.end();
 965                                         while ( i != end ) {
 966                                                 upv.push_back(*i++);
 967                                         }
 968                                         return;
 969                                 }
 970                         }
 971                 }
 972                 if ( ++r == k ) {
 973                         r = 1;
 974                         ++u;
 975                 }
 976         }
 977 }
 978
 979 // modular square free factorization is not used at the moment so we deactivate
 980 // the code
 981 #if 0
 982
 983 /** Calculates a^(1/prime).
 984  *
 985  *  @param[in] a      polynomial
 986  *  @param[in] prime  prime number -> exponent 1/prime
 987  *  @param[in] ap     resulting polynomial
 988  */
 989 static void expt_1_over_p(const umodpoly& a, unsigned int prime, umodpoly& ap)
 990 {
 991         size_t newdeg = degree(a)/prime;
 992         ap.resize(newdeg+1);
 993         ap[0] = a[0];
 994         for ( size_t i=1; i<=newdeg; ++i ) {
 995                 ap[i] = a[i*prime];
 996         }
 997 }
 998
 999 /** Modular square free factorization.
1000  *
1001  *  @param[in]  a        polynomial
1002  *  @param[out] factors  modular factors
1003  *  @param[out] mult     corresponding multiplicities (exponents)
1004  */
1005 static void modsqrfree(const umodpoly& a, upvec& factors, vector<int>& mult)
1006 {
1007         const unsigned int prime = cl_I_to_uint(a[0].ring()->modulus);
1008         int i = 1;
1009         umodpoly b;
1010         deriv(a, b);
1011         if ( b.size() ) {
1012                 umodpoly c;
1013                 gcd(a, b, c);
1014                 umodpoly w;
1015                 div(a, c, w);
1016                 while ( unequal_one(w) ) {
1017                         umodpoly y;
1018                         gcd(w, c, y);
1019                         umodpoly z;
1020                         div(w, y, z);
1021                         factors.push_back(z);
1022                         mult.push_back(i);
1023                         ++i;
1024                         w = y;
1025                         umodpoly buf;
1026                         div(c, y, buf);
1027                         c = buf;
1028                 }
1029                 if ( unequal_one(c) ) {
1030                         umodpoly cp;
1031                         expt_1_over_p(c, prime, cp);
1032                         size_t previ = mult.size();
1033                         modsqrfree(cp, factors, mult);
1034                         for ( size_t i=previ; i<mult.size(); ++i ) {
1035                                 mult[i] *= prime;
1036                         }
1037                 }
1038         }
1039         else {
1040                 umodpoly ap;
1041                 expt_1_over_p(a, prime, ap);
1042                 size_t previ = mult.size();
1043                 modsqrfree(ap, factors, mult);
1044                 for ( size_t i=previ; i<mult.size(); ++i ) {
1045                         mult[i] *= prime;
1046                 }
1047         }
1048 }
1049
1050 #endif // deactivation of square free factorization
1051
1052 /** Distinct degree factorization (DDF).
1053  *
1054  *  The implementation follows the algorithm in chapter 8 of [GCL].
1055  *
1056  *  @param[in]  a_         modular polynomial
1057  *  @param[out] degrees    vector containing the degrees of the factors of the
1058  *                         corresponding polynomials in ddfactors.
1059  *  @param[out] ddfactors  vector containing polynomials which factors have the
1060  *                         degree given in degrees.
1061  */
1062 static void distinct_degree_factor(const umodpoly& a_, vector<int>& degrees, upvec& ddfactors)
1063 {
1064         umodpoly a = a_;
1065
1066         cl_modint_ring R = a[0].ring();
1067         int q = cl_I_to_int(R->modulus);
1068         int nhalf = degree(a)/2;
1069
1070         int i = 1;
1071         umodpoly w(2);
1072         w[0] = R->zero();
1073         w[1] = R->one();
1074         umodpoly x = w;
1075
1076         while ( i <= nhalf ) {
1077                 expt_pos(w, q);
1078                 umodpoly buf;
1079                 rem(w, a, buf);
1080                 w = buf;
1081                 umodpoly wx = w - x;
1082                 gcd(a, wx, buf);
1083                 if ( unequal_one(buf) ) {
1084                         degrees.push_back(i);
1085                         ddfactors.push_back(buf);
1086                 }
1087                 if ( unequal_one(buf) ) {
1088                         umodpoly buf2;
1089                         div(a, buf, buf2);
1090                         a = buf2;
1091                         nhalf = degree(a)/2;
1092                         rem(w, a, buf);
1093                         w = buf;
1094                 }
1095                 ++i;
1096         }
1097         if ( unequal_one(a) ) {
1098                 degrees.push_back(degree(a));
1099                 ddfactors.push_back(a);
1100         }
1101 }
1102
1103 /** Modular same degree factorization.
1104  *  Same degree factorization is a kind of misnomer. It performs distinct degree
1105  *  factorization, but instead of using the Cantor-Zassenhaus algorithm it
1106  *  (sub-optimally) uses Berlekamp's algorithm for the factors of the same
1107  *  degree.
1108  *
1109  *  @param[in]  a    modular polynomial
1110  *  @param[out] upv  vector containing modular factors. if upv was not empty the
1111  *                   new elements are added at the end
1112  */
1113 static void same_degree_factor(const umodpoly& a, upvec& upv)
1114 {
1115         cl_modint_ring R = a[0].ring();
1116
1117         vector<int> degrees;
1118         upvec ddfactors;
1119         distinct_degree_factor(a, degrees, ddfactors);
1120
1121         for ( size_t i=0; i<degrees.size(); ++i ) {
1122                 if ( degrees[i] == degree(ddfactors[i]) ) {
1123                         upv.push_back(ddfactors[i]);
1124                 }
1125                 else {
1126                         berlekamp(ddfactors[i], upv);
1127                 }
1128         }
1129 }
1130
1131 // Yes, we can (choose).
1132 #define USE_SAME_DEGREE_FACTOR
1133
1134 /** Modular univariate factorization.
1135  *
1136  *  In principle, we have two algorithms at our disposal: Berlekamp's algorithm
1137  *  and same degree factorization (SDF). SDF seems to be slightly faster in
1138  *  almost all cases so it is activated as default.
1139  *
1140  *  @param[in]  p    modular polynomial
1141  *  @param[out] upv  vector containing modular factors. if upv was not empty the
1142  *                   new elements are added at the end
1143  */
1144 static void factor_modular(const umodpoly& p, upvec& upv)
1145 {
1146 #ifdef USE_SAME_DEGREE_FACTOR
1147         same_degree_factor(p, upv);
1148 #else
1149         berlekamp(p, upv);
1150 #endif
1151 }
1152
1153 /** Calculates modular polynomials s and t such that a*s+b*t==1.
1154  *  Assertion: a and b are relatively prime and not zero.
1155  *
1156  *  @param[in]  a  polynomial
1157  *  @param[in]  b  polynomial
1158  *  @param[out] s  polynomial
1159  *  @param[out] t  polynomial
1160  */
1161 static void exteuclid(const umodpoly& a, const umodpoly& b, umodpoly& s, umodpoly& t)
1162 {
1163         if ( degree(a) < degree(b) ) {
1164                 exteuclid(b, a, t, s);
1165                 return;
1166         }
1167
1168         umodpoly one(1, a[0].ring()->one());
1169         umodpoly c = a; normalize_in_field(c);
1170         umodpoly d = b; normalize_in_field(d);
1171         s = one;
1172         t.clear();
1173         umodpoly d1;
1174         umodpoly d2 = one;
1175         umodpoly q;
1176         while ( true ) {
1177                 div(c, d, q);
1178                 umodpoly r = c - q * d;
1179                 umodpoly r1 = s - q * d1;
1180                 umodpoly r2 = t - q * d2;
1181                 c = d;
1182                 s = d1;
1183                 t = d2;
1184                 if ( r.empty() ) break;
1185                 d = r;
1186                 d1 = r1;
1187                 d2 = r2;
1188         }
1189         cl_MI fac = recip(lcoeff(a) * lcoeff(c));
1190         umodpoly::iterator i = s.begin(), end = s.end();
1191         for ( ; i!=end; ++i ) {
1192                 *i = *i * fac;
1193         }
1194         canonicalize(s);
1195         fac = recip(lcoeff(b) * lcoeff(c));
1196         i = t.begin(), end = t.end();
1197         for ( ; i!=end; ++i ) {
1198                 *i = *i * fac;
1199         }
1200         canonicalize(t);
1201 }
1202
1203 /** Replaces the leading coefficient in a polynomial by a given number.
1204  *
1205  *  @param[in] poly  polynomial to change
1206  *  @param[in] lc    new leading coefficient
1207  *  @return          changed polynomial
1208  */
1209 static upoly replace_lc(const upoly& poly, const cl_I& lc)
1210 {
1211         if ( poly.empty() ) return poly;
1212         upoly r = poly;
1213         r.back() = lc;
1214         return r;
1215 }
1216
1217 /** Calculates the bound for the modulus.
1218  *  See [Mig].
1219  */
1220 static inline cl_I calc_bound(const ex& a, const ex& x, int maxdeg)
1221 {
1222         cl_I maxcoeff = 0;
1223         cl_R coeff = 0;
1224         for ( int i=a.degree(x); i>=a.ldegree(x); --i ) {
1225                 cl_I aa = abs(the<cl_I>(ex_to<numeric>(a.coeff(x, i)).to_cl_N()));
1226                 if ( aa > maxcoeff ) maxcoeff = aa;
1227                 coeff = coeff + square(aa);
1228         }
1229         cl_I coeffnorm = ceiling1(the<cl_R>(cln::sqrt(coeff)));
1230         cl_I B = coeffnorm * expt_pos(cl_I(2), cl_I(maxdeg));
1231         return ( B > maxcoeff ) ? B : maxcoeff;
1232 }
1233
1234 /** Calculates the bound for the modulus.
1235  *  See [Mig].
1236  */
1237 static inline cl_I calc_bound(const upoly& a, int maxdeg)
1238 {
1239         cl_I maxcoeff = 0;
1240         cl_R coeff = 0;
1241         for ( int i=degree(a); i>=0; --i ) {
1242                 cl_I aa = abs(a[i]);
1243                 if ( aa > maxcoeff ) maxcoeff = aa;
1244                 coeff = coeff + square(aa);
1245         }
1246         cl_I coeffnorm = ceiling1(the<cl_R>(cln::sqrt(coeff)));
1247         cl_I B = coeffnorm * expt_pos(cl_I(2), cl_I(maxdeg));
1248         return ( B > maxcoeff ) ? B : maxcoeff;
1249 }
1250
1251 /** Hensel lifting as used by factor_univariate().
1252  *
1253  *  The implementation follows the algorithm in chapter 6 of [GCL].
1254  *
1255  *  @param[in]  a_   primitive univariate polynomials
1256  *  @param[in]  p    prime number that does not divide lcoeff(a)
1257  *  @param[in]  u1_  modular factor of a (mod p)
1258  *  @param[in]  w1_  modular factor of a (mod p), relatively prime to u1_,
1259  *                   fulfilling  u1_*w1_ == a mod p
1260  *  @param[out] u    lifted factor
1261  *  @param[out] w    lifted factor, u*w = a
1262  */
1263 static void hensel_univar(const upoly& a_, unsigned int p, const umodpoly& u1_, const umodpoly& w1_, upoly& u, upoly& w)
1264 {
1265         upoly a = a_;
1266         const cl_modint_ring& R = u1_[0].ring();
1267
1268         // calc bound B
1269         int maxdeg = (degree(u1_) > degree(w1_)) ? degree(u1_) : degree(w1_);
1270         cl_I maxmodulus = 2*calc_bound(a, maxdeg);
1271
1272         // step 1
1273         cl_I alpha = lcoeff(a);
1274         a = a * alpha;
1275         umodpoly nu1 = u1_;
1276         normalize_in_field(nu1);
1277         umodpoly nw1 = w1_;
1278         normalize_in_field(nw1);
1279         upoly phi;
1280         phi = umodpoly_to_upoly(nu1) * alpha;
1281         umodpoly u1;
1282         umodpoly_from_upoly(u1, phi, R);
1283         phi = umodpoly_to_upoly(nw1) * alpha;
1284         umodpoly w1;
1285         umodpoly_from_upoly(w1, phi, R);
1286
1287         // step 2
1288         umodpoly s;
1289         umodpoly t;
1290         exteuclid(u1, w1, s, t);
1291
1292         // step 3
1293         u = replace_lc(umodpoly_to_upoly(u1), alpha);
1294         w = replace_lc(umodpoly_to_upoly(w1), alpha);
1295         upoly e = a - u * w;
1296         cl_I modulus = p;
1297
1298         // step 4
1299         while ( !e.empty() && modulus < maxmodulus ) {
1300                 upoly c = e / modulus;
1301                 phi = umodpoly_to_upoly(s) * c;
1302                 umodpoly sigmatilde;
1303                 umodpoly_from_upoly(sigmatilde, phi, R);
1304                 phi = umodpoly_to_upoly(t) * c;
1305                 umodpoly tautilde;
1306                 umodpoly_from_upoly(tautilde, phi, R);
1307                 umodpoly r, q;
1308                 remdiv(sigmatilde, w1, r, q);
1309                 umodpoly sigma = r;
1310                 phi = umodpoly_to_upoly(tautilde) + umodpoly_to_upoly(q) * umodpoly_to_upoly(u1);
1311                 umodpoly tau;
1312                 umodpoly_from_upoly(tau, phi, R);
1313                 u = u + umodpoly_to_upoly(tau) * modulus;
1314                 w = w + umodpoly_to_upoly(sigma) * modulus;
1315                 e = a - u * w;
1316                 modulus = modulus * p;
1317         }
1318
1319         // step 5
1320         if ( e.empty() ) {
1321                 cl_I g = u[0];
1322                 for ( size_t i=1; i<u.size(); ++i ) {
1323                         g = gcd(g, u[i]);
1324                         if ( g == 1 ) break;
1325                 }
1326                 if ( g != 1 ) {
1327                         u = u / g;
1328                         w = w * g;
1329                 }
1330                 if ( alpha != 1 ) {
1331                         w = w / alpha;
1332                 }
1333         }
1334         else {
1335                 u.clear();
1336         }
1337 }
1338
1339 /** Returns a new prime number.
1340  *
1341  *  @param[in] p  prime number
1342  *  @return       next prime number after p
1343  */
1344 static unsigned int next_prime(unsigned int p)
1345 {
1346         static vector<unsigned int> primes;
1347         if ( primes.size() == 0 ) {
1348                 primes.push_back(3); primes.push_back(5); primes.push_back(7);
1349         }
1350         vector<unsigned int>::const_iterator it = primes.begin();
1351         if ( p >= primes.back() ) {
1352                 unsigned int candidate = primes.back() + 2;
1353                 while ( true ) {
1354                         size_t n = primes.size()/2;
1355                         for ( size_t i=0; i<n; ++i ) {
1356                                 if ( candidate % primes[i] ) continue;
1357                                 candidate += 2;
1358                                 i=-1;
1359                         }
1360                         primes.push_back(candidate);
1361                         if ( candidate > p ) break;
1362                 }
1363                 return candidate;
1364         }
1365         vector<unsigned int>::const_iterator end = primes.end();
1366         for ( ; it!=end; ++it ) {
1367                 if ( *it > p ) {
1368                         return *it;
1369                 }
1370         }
1371         throw logic_error("next_prime: should not reach this point!");
1372 }
1373
1374 /** Manages the splitting a vector of of modular factors into two partitions.
1375  */
1376 class factor_partition
1377 {
1378 public:
1379         /** Takes the vector of modular factors and initializes the first partition */
1380         factor_partition(const upvec& factors_) : factors(factors_)
1381         {
1382                 n = factors.size();
1383                 k.resize(n, 0);
1384                 k[0] = 1;
1385                 cache.resize(n-1);
1386                 one.resize(1, factors.front()[0].ring()->one());
1387                 len = 1;
1388                 last = 0;
1389                 split();
1390         }
1391         int operator[](size_t i) const { return k[i]; }
1392         size_t size() const { return n; }
1393         size_t size_left() const { return n-len; }
1394         size_t size_right() const { return len; }
1395         /** Initializes the next partition.
1396             Returns true, if there is one, false otherwise. */
1397         bool next()
1398         {
1399                 if ( last == n-1 ) {
1400                         int rem = len - 1;
1401                         int p = last - 1;
1402                         while ( rem ) {
1403                                 if ( k[p] ) {
1404                                         --rem;
1405                                         --p;
1406                                         continue;
1407                                 }
1408                                 last = p - 1;
1409                                 while ( k[last] == 0 ) { --last; }
1410                                 if ( last == 0 && n == 2*len ) return false;
1411                                 k[last++] = 0;
1412                                 for ( size_t i=0; i<=len-rem; ++i ) {
1413                                         k[last] = 1;
1414                                         ++last;
1415                                 }
1416                                 fill(k.begin()+last, k.end(), 0);
1417                                 --last;
1418                                 split();
1419                                 return true;
1420                         }
1421                         last = len;
1422                         ++len;
1423                         if ( len > n/2 ) return false;
1424                         fill(k.begin(), k.begin()+len, 1);
1425                         fill(k.begin()+len+1, k.end(), 0);
1426                 }
1427                 else {
1428                         k[last++] = 0;
1429                         k[last] = 1;
1430                 }
1431                 split();
1432                 return true;
1433         }
1434         /** Get first partition */
1435         umodpoly& left() { return lr[0]; }
1436         /** Get second partition */
1437         umodpoly& right() { return lr[1]; }
1438 private:
1439         void split_cached()
1440         {
1441                 size_t i = 0;
1442                 do {
1443                         size_t pos = i;
1444                         int group = k[i++];
1445                         size_t d = 0;
1446                         while ( i < n && k[i] == group ) { ++d; ++i; }
1447                         if ( d ) {
1448                                 if ( cache[pos].size() >= d ) {
1449                                         lr[group] = lr[group] * cache[pos][d-1];
1450                                 }
1451                                 else {
1452                                         if ( cache[pos].size() == 0 ) {
1453                                                 cache[pos].push_back(factors[pos] * factors[pos+1]);
1454                                         }
1455                                         size_t j = pos + cache[pos].size() + 1;
1456                                         d -= cache[pos].size();
1457                                         while ( d ) {
1458                                                 umodpoly buf = cache[pos].back() * factors[j];
1459                                                 cache[pos].push_back(buf);
1460                                                 --d;
1461                                                 ++j;
1462                                         }
1463                                         lr[group] = lr[group] * cache[pos].back();
1464                                 }
1465                         }
1466                         else {
1467                                 lr[group] = lr[group] * factors[pos];
1468                         }
1469                 } while ( i < n );
1470         }
1471         void split()
1472         {
1473                 lr[0] = one;
1474                 lr[1] = one;
1475                 if ( n > 6 ) {
1476                         split_cached();
1477                 }
1478                 else {
1479                         for ( size_t i=0; i<n; ++i ) {
1480                                 lr[k[i]] = lr[k[i]] * factors[i];
1481                         }
1482                 }
1483         }
1484 private:
1485         umodpoly lr[2];
1486         vector< vector<umodpoly> > cache;
1487         upvec factors;
1488         umodpoly one;
1489         size_t n;
1490         size_t len;
1491         size_t last;
1492         vector<int> k;
1493 };
1494
1495 /** Contains a pair of univariate polynomial and its modular factors.
1496  *  Used by factor_univariate().
1497  */
1498 struct ModFactors
1499 {
1500         upoly poly;
1501         upvec factors;
1502 };
1503
1504 /** Univariate polynomial factorization.
1505  *
1506  *  Modular factorization is tried for several primes to minimize the number of
1507  *  modular factors. Then, Hensel lifting is performed.
1508  *
1509  *  @param[in]     poly   expanded square free univariate polynomial
1510  *  @param[in]     x      symbol
1511  *  @param[in,out] prime  prime number to start trying modular factorization with,
1512  *                        output value is the prime number actually used
1513  */
1514 static ex factor_univariate(const ex& poly, const ex& x, unsigned int& prime)
1515 {
1516         ex unit, cont, prim_ex;
1517         poly.unitcontprim(x, unit, cont, prim_ex);
1518         upoly prim;
1519         upoly_from_ex(prim, prim_ex, x);
1520
1521         // determine proper prime and minimize number of modular factors
1522         prime = 3;
1523         unsigned int lastp = prime;
1524         cl_modint_ring R;
1525         unsigned int trials = 0;
1526         unsigned int minfactors = 0;
1527         cl_I lc = lcoeff(prim) * the<cl_I>(ex_to<numeric>(cont).to_cl_N());
1528         upvec factors;
1529         while ( trials < 2 ) {
1530                 umodpoly modpoly;
1531                 while ( true ) {
1532                         prime = next_prime(prime);
1533                         if ( !zerop(rem(lc, prime)) ) {
1534                                 R = find_modint_ring(prime);
1535                                 umodpoly_from_upoly(modpoly, prim, R);
1536                                 if ( squarefree(modpoly) ) break;
1537                         }
1538                 }
1539
1540                 // do modular factorization
1541                 upvec trialfactors;
1542                 factor_modular(modpoly, trialfactors);
1543                 if ( trialfactors.size() <= 1 ) {
1544                         // irreducible for sure
1545                         return poly;
1546                 }
1547
1548                 if ( minfactors == 0 || trialfactors.size() < minfactors ) {
1549                         factors = trialfactors;
1550                         minfactors = trialfactors.size();
1551                         lastp = prime;
1552                         trials = 1;
1553                 }
1554                 else {
1555                         ++trials;
1556                 }
1557         }
1558         prime = lastp;
1559         R = find_modint_ring(prime);
1560
1561         // lift all factor combinations
1562         stack<ModFactors> tocheck;
1563         ModFactors mf;
1564         mf.poly = prim;
1565         mf.factors = factors;
1566         tocheck.push(mf);
1567         upoly f1, f2;
1568         ex result = 1;
1569         while ( tocheck.size() ) {
1570                 const size_t n = tocheck.top().factors.size();
1571                 factor_partition part(tocheck.top().factors);
1572                 while ( true ) {
1573                         // call Hensel lifting
1574                         hensel_univar(tocheck.top().poly, prime, part.left(), part.right(), f1, f2);
1575                         if ( !f1.empty() ) {
1576                                 // successful, update the stack and the result
1577                                 if ( part.size_left() == 1 ) {
1578                                         if ( part.size_right() == 1 ) {
1579                                                 result *= upoly_to_ex(f1, x) * upoly_to_ex(f2, x);
1580                                                 tocheck.pop();
1581                                                 break;
1582                                         }
1583                                         result *= upoly_to_ex(f1, x);
1584                                         tocheck.top().poly = f2;
1585                                         for ( size_t i=0; i<n; ++i ) {
1586                                                 if ( part[i] == 0 ) {
1587                                                         tocheck.top().factors.erase(tocheck.top().factors.begin()+i);
1588                                                         break;
1589                                                 }
1590                                         }
1591                                         break;
1592                                 }
1593                                 else if ( part.size_right() == 1 ) {
1594                                         if ( part.size_left() == 1 ) {
1595                                                 result *= upoly_to_ex(f1, x) * upoly_to_ex(f2, x);
1596                                                 tocheck.pop();
1597                                                 break;
1598                                         }
1599                                         result *= upoly_to_ex(f2, x);
1600                                         tocheck.top().poly = f1;
1601                                         for ( size_t i=0; i<n; ++i ) {
1602                                                 if ( part[i] == 1 ) {
1603                                                         tocheck.top().factors.erase(tocheck.top().factors.begin()+i);
1604                                                         break;
1605                                                 }
1606                                         }
1607                                         break;
1608                                 }
1609                                 else {
1610                                         upvec newfactors1(part.size_left()), newfactors2(part.size_right());
1611                                         upvec::iterator i1 = newfactors1.begin(), i2 = newfactors2.begin();
1612                                         for ( size_t i=0; i<n; ++i ) {
1613                                                 if ( part[i] ) {
1614                                                         *i2++ = tocheck.top().factors[i];
1615                                                 }
1616                                                 else {
1617                                                         *i1++ = tocheck.top().factors[i];
1618                                                 }
1619                                         }
1620                                         tocheck.top().factors = newfactors1;
1621                                         tocheck.top().poly = f1;
1622                                         ModFactors mf;
1623                                         mf.factors = newfactors2;
1624                                         mf.poly = f2;
1625                                         tocheck.push(mf);
1626                                         break;
1627                                 }
1628                         }
1629                         else {
1630                                 // not successful
1631                                 if ( !part.next() ) {
1632                                         // if no more combinations left, return polynomial as
1633                                         // irreducible
1634                                         result *= upoly_to_ex(tocheck.top().poly, x);
1635                                         tocheck.pop();
1636                                         break;
1637                                 }
1638                         }
1639                 }
1640         }
1641
1642         return unit * cont * result;
1643 }
1644
1645 /** Second interface to factor_univariate() to be used if the information about
1646  *  the prime is not needed.
1647  */
1648 static inline ex factor_univariate(const ex& poly, const ex& x)
1649 {
1650         unsigned int prime;
1651         return factor_univariate(poly, x, prime);
1652 }
1653
1654 /** Represents an evaluation point (<symbol>==<integer>).
1655  */
1656 struct EvalPoint
1657 {
1658         ex x;
1659         int evalpoint;
1660 };
1661
1662 #ifdef DEBUGFACTOR
1663 ostream& operator<<(ostream& o, const vector<EvalPoint>& v)
1664 {
1665         for ( size_t i=0; i<v.size(); ++i ) {
1666                 o << "(" << v[i].x << "==" << v[i].evalpoint << ") ";
1667         }
1668         return o;
1669 }
1670 #endif // def DEBUGFACTOR
1671
1672 // forward declaration
1673 static vector<ex> multivar_diophant(const vector<ex>& a_, const ex& x, const ex& c, const vector<EvalPoint>& I, unsigned int d, unsigned int p, unsigned int k);
1674
1675 /** Utility function for multivariate Hensel lifting.
1676  *
1677  *  Solves the equation
1678  *    s_1*b_1 + ... + s_r*b_r == 1 mod p^k
1679  *  with deg(s_i) < deg(a_i)
1680  *  and with given b_1 = a_1 * ... * a_{i-1} * a_{i+1} * ... * a_r
1681  *
1682  *  The implementation follows the algorithm in chapter 6 of [GCL].
1683  *
1684  *  @param[in]  a   vector of modular univariate polynomials
1685  *  @param[in]  x   symbol
1686  *  @param[in]  p   prime number
1687  *  @param[in]  k   p^k is modulus
1688  *  @return         vector of polynomials (s_i)
1689  */
1690 static upvec multiterm_eea_lift(const upvec& a, const ex& x, unsigned int p, unsigned int k)
1691 {
1692         const size_t r = a.size();
1693         cl_modint_ring R = find_modint_ring(expt_pos(cl_I(p),k));
1694         upvec q(r-1);
1695         q[r-2] = a[r-1];
1696         for ( size_t j=r-2; j>=1; --j ) {
1697                 q[j-1] = a[j] * q[j];
1698         }
1699         umodpoly beta(1, R->one());
1700         upvec s;
1701         for ( size_t j=1; j<r; ++j ) {
1702                 vector<ex> mdarg(2);
1703                 mdarg[0] = umodpoly_to_ex(q[j-1], x);
1704                 mdarg[1] = umodpoly_to_ex(a[j-1], x);
1705                 vector<EvalPoint> empty;
1706                 vector<ex> exsigma = multivar_diophant(mdarg, x, umodpoly_to_ex(beta, x), empty, 0, p, k);
1707                 umodpoly sigma1;
1708                 umodpoly_from_ex(sigma1, exsigma[0], x, R);
1709                 umodpoly sigma2;
1710                 umodpoly_from_ex(sigma2, exsigma[1], x, R);
1711                 beta = sigma1;
1712                 s.push_back(sigma2);
1713         }
1714         s.push_back(beta);
1715         return s;
1716 }
1717
1718 /** Changes the modulus of a modular polynomial. Used by eea_lift().
1719  *
1720  *  @param[in]     R  new modular ring
1721  *  @param[in,out] a  polynomial to change (in situ)
1722  */
1723 static void change_modulus(const cl_modint_ring& R, umodpoly& a)
1724 {
1725         if ( a.empty() ) return;
1726         cl_modint_ring oldR = a[0].ring();
1727         umodpoly::iterator i = a.begin(), end = a.end();
1728         for ( ; i!=end; ++i ) {
1729                 *i = R->canonhom(oldR->retract(*i));
1730         }
1731         canonicalize(a);
1732 }
1733
1734 /** Utility function for multivariate Hensel lifting.
1735  *
1736  *  Solves  s*a + t*b == 1 mod p^k  given a,b.
1737  *
1738  *  The implementation follows the algorithm in chapter 6 of [GCL].
1739  *
1740  *  @param[in]  a   polynomial
1741  *  @param[in]  b   polynomial
1742  *  @param[in]  x   symbol
1743  *  @param[in]  p   prime number
1744  *  @param[in]  k   p^k is modulus
1745  *  @param[out] s_  output polynomial
1746  *  @param[out] t_  output polynomial
1747  */
1748 static void eea_lift(const umodpoly& a, const umodpoly& b, const ex& x, unsigned int p, unsigned int k, umodpoly& s_, umodpoly& t_)
1749 {
1750         cl_modint_ring R = find_modint_ring(p);
1751         umodpoly amod = a;
1752         change_modulus(R, amod);
1753         umodpoly bmod = b;
1754         change_modulus(R, bmod);
1755
1756         umodpoly smod;
1757         umodpoly tmod;
1758         exteuclid(amod, bmod, smod, tmod);
1759
1760         cl_modint_ring Rpk = find_modint_ring(expt_pos(cl_I(p),k));
1761         umodpoly s = smod;
1762         change_modulus(Rpk, s);
1763         umodpoly t = tmod;
1764         change_modulus(Rpk, t);
1765
1766         cl_I modulus(p);
1767         umodpoly one(1, Rpk->one());
1768         for ( size_t j=1; j<k; ++j ) {
1769                 umodpoly e = one - a * s - b * t;
1770                 reduce_coeff(e, modulus);
1771                 umodpoly c = e;
1772                 change_modulus(R, c);
1773                 umodpoly sigmabar = smod * c;
1774                 umodpoly taubar = tmod * c;
1775                 umodpoly sigma, q;
1776                 remdiv(sigmabar, bmod, sigma, q);
1777                 umodpoly tau = taubar + q * amod;
1778                 umodpoly sadd = sigma;
1779                 change_modulus(Rpk, sadd);
1780                 cl_MI modmodulus(Rpk, modulus);
1781                 s = s + sadd * modmodulus;
1782                 umodpoly tadd = tau;
1783                 change_modulus(Rpk, tadd);
1784                 t = t + tadd * modmodulus;
1785                 modulus = modulus * p;
1786         }
1787
1788         s_ = s; t_ = t;
1789 }
1790
1791 /** Utility function for multivariate Hensel lifting.
1792  *
1793  *  Solves the equation
1794  *    s_1*b_1 + ... + s_r*b_r == x^m mod p^k
1795  *  with given b_1 = a_1 * ... * a_{i-1} * a_{i+1} * ... * a_r
1796  *
1797  *  The implementation follows the algorithm in chapter 6 of [GCL].
1798  *
1799  *  @param a  vector with univariate polynomials mod p^k
1800  *  @param x  symbol
1801  *  @param m  exponent of x^m in the equation to solve
1802  *  @param p  prime number
1803  *  @param k  p^k is modulus
1804  *  @return   vector of polynomials (s_i)
1805  */
1806 static upvec univar_diophant(const upvec& a, const ex& x, unsigned int m, unsigned int p, unsigned int k)
1807 {
1808         cl_modint_ring R = find_modint_ring(expt_pos(cl_I(p),k));
1809
1810         const size_t r = a.size();
1811         upvec result;
1812         if ( r > 2 ) {
1813                 upvec s = multiterm_eea_lift(a, x, p, k);
1814                 for ( size_t j=0; j<r; ++j ) {
1815                         umodpoly bmod = umodpoly_to_umodpoly(s[j], R, m);
1816                         umodpoly buf;
1817                         rem(bmod, a[j], buf);
1818                         result.push_back(buf);
1819                 }
1820         }
1821         else {
1822                 umodpoly s, t;
1823                 eea_lift(a[1], a[0], x, p, k, s, t);
1824                 umodpoly bmod = umodpoly_to_umodpoly(s, R, m);
1825                 umodpoly buf, q;
1826                 remdiv(bmod, a[0], buf, q);
1827                 result.push_back(buf);
1828                 umodpoly t1mod = umodpoly_to_umodpoly(t, R, m);
1829                 buf = t1mod + q * a[1];
1830                 result.push_back(buf);
1831         }
1832
1833         return result;
1834 }
1835
1836 /** Map used by function make_modular().
1837  *  Finds every coefficient in a polynomial and replaces it by is value in the
1838  *  given modular ring R (symmetric representation).
1839  */
1840 struct make_modular_map : public map_function {
1841         cl_modint_ring R;
1842         make_modular_map(const cl_modint_ring& R_) : R(R_) { }
1843         ex operator()(const ex& e)
1844         {
1845                 if ( is_a<add>(e) || is_a<mul>(e) ) {
1846                         return e.map(*this);
1847                 }
1848                 else if ( is_a<numeric>(e) ) {
1849                         numeric mod(R->modulus);
1850                         numeric halfmod = (mod-1)/2;
1851                         cl_MI emod = R->canonhom(the<cl_I>(ex_to<numeric>(e).to_cl_N()));
1852                         numeric n(R->retract(emod));
1853                         if ( n > halfmod ) {
1854                                 return n-mod;
1855                         }
1856                         else {
1857                                 return n;
1858                         }
1859                 }
1860                 return e;
1861         }
1862 };
1863
1864 /** Helps mimicking modular multivariate polynomial arithmetic.
1865  *
1866  *  @param e  expression of which to make the coefficients equal to their value
1867  *            in the modular ring R (symmetric representation)
1868  *  @param R  modular ring
1869  *  @return   resulting expression
1870  */
1871 static ex make_modular(const ex& e, const cl_modint_ring& R)
1872 {
1873         make_modular_map map(R);
1874         return map(e.expand());
1875 }
1876
1877 /** Utility function for multivariate Hensel lifting.
1878  *
1879  *  Returns the polynomials s_i that fulfill
1880  *    s_1*b_1 + ... + s_r*b_r == c mod <I^(d+1),p^k>
1881  *  with given b_1 = a_1 * ... * a_{i-1} * a_{i+1} * ... * a_r
1882  *
1883  *  The implementation follows the algorithm in chapter 6 of [GCL].
1884  *
1885  *  @param a_  vector of multivariate factors mod p^k
1886  *  @param x   symbol (equiv. x_1 in [GCL])
1887  *  @param c   polynomial mod p^k
1888  *  @param I   vector of evaluation points
1889  *  @param d   maximum total degree of result
1890  *  @param p   prime number
1891  *  @param k   p^k is modulus
1892  *  @return    vector of polynomials (s_i)
1893  */
1894 static vector<ex> multivar_diophant(const vector<ex>& a_, const ex& x, const ex& c, const vector<EvalPoint>& I,
1895                                     unsigned int d, unsigned int p, unsigned int k)
1896 {
1897         vector<ex> a = a_;
1898
1899         const cl_modint_ring R = find_modint_ring(expt_pos(cl_I(p),k));
1900         const size_t r = a.size();
1901         const size_t nu = I.size() + 1;
1902
1903         vector<ex> sigma;
1904         if ( nu > 1 ) {
1905                 ex xnu = I.back().x;
1906                 int alphanu = I.back().evalpoint;
1907
1908                 ex A = 1;
1909                 for ( size_t i=0; i<r; ++i ) {
1910                         A *= a[i];
1911                 }
1912                 vector<ex> b(r);
1913                 for ( size_t i=0; i<r; ++i ) {
1914                         b[i] = normal(A / a[i]);
1915                 }
1916
1917                 vector<ex> anew = a;
1918                 for ( size_t i=0; i<r; ++i ) {
1919                         anew[i] = anew[i].subs(xnu == alphanu);
1920                 }
1921                 ex cnew = c.subs(xnu == alphanu);
1922                 vector<EvalPoint> Inew = I;
1923                 Inew.pop_back();
1924                 sigma = multivar_diophant(anew, x, cnew, Inew, d, p, k);
1925
1926                 ex buf = c;
1927                 for ( size_t i=0; i<r; ++i ) {
1928                         buf -= sigma[i] * b[i];
1929                 }
1930                 ex e = make_modular(buf, R);
1931
1932                 ex monomial = 1;
1933                 for ( size_t m=1; !e.is_zero() && e.has(xnu) && m<=d; ++m ) {
1934                         monomial *= (xnu - alphanu);
1935                         monomial = expand(monomial);
1936                         ex cm = e.diff(ex_to<symbol>(xnu), m).subs(xnu==alphanu) / factorial(m);
1937                         cm = make_modular(cm, R);
1938                         if ( !cm.is_zero() ) {
1939                                 vector<ex> delta_s = multivar_diophant(anew, x, cm, Inew, d, p, k);
1940                                 ex buf = e;
1941                                 for ( size_t j=0; j<delta_s.size(); ++j ) {
1942                                         delta_s[j] *= monomial;
1943                                         sigma[j] += delta_s[j];
1944                                         buf -= delta_s[j] * b[j];
1945                                 }
1946                                 e = make_modular(buf, R);
1947                         }
1948                 }
1949         }
1950         else {
1951                 upvec amod;
1952                 for ( size_t i=0; i<a.size(); ++i ) {
1953                         umodpoly up;
1954                         umodpoly_from_ex(up, a[i], x, R);
1955                         amod.push_back(up);
1956                 }
1957
1958                 sigma.insert(sigma.begin(), r, 0);
1959                 size_t nterms;
1960                 ex z;
1961                 if ( is_a<add>(c) ) {
1962                         nterms = c.nops();
1963                         z = c.op(0);
1964                 }
1965                 else {
1966                         nterms = 1;
1967                         z = c;
1968                 }
1969                 for ( size_t i=0; i<nterms; ++i ) {
1970                         int m = z.degree(x);
1971                         cl_I cm = the<cl_I>(ex_to<numeric>(z.lcoeff(x)).to_cl_N());
1972                         upvec delta_s = univar_diophant(amod, x, m, p, k);
1973                         cl_MI modcm;
1974                         cl_I poscm = cm;
1975                         while ( poscm < 0 ) {
1976                                 poscm = poscm + expt_pos(cl_I(p),k);
1977                         }
1978                         modcm = cl_MI(R, poscm);
1979                         for ( size_t j=0; j<delta_s.size(); ++j ) {
1980                                 delta_s[j] = delta_s[j] * modcm;
1981                                 sigma[j] = sigma[j] + umodpoly_to_ex(delta_s[j], x);
1982                         }
1983                         if ( nterms > 1 ) {
1984                                 z = c.op(i+1);
1985                         }
1986                 }
1987         }
1988
1989         for ( size_t i=0; i<sigma.size(); ++i ) {
1990                 sigma[i] = make_modular(sigma[i], R);
1991         }
1992
1993         return sigma;
1994 }
1995
1996 /** Multivariate Hensel lifting.
1997  *  The implementation follows the algorithm in chapter 6 of [GCL].
1998  *  Since we don't have a data type for modular multivariate polynomials, the
1999  *  respective operations are done in a GiNaC::ex and the function
2000  *  make_modular() is then called to make the coefficient modular p^l.
2001  *
2002  *  @param a    multivariate polynomial primitive in x
2003  *  @param x    symbol (equiv. x_1 in [GCL])
2004  *  @param I    vector of evaluation points (x_2==a_2,x_3==a_3,...)
2005  *  @param p    prime number (should not divide lcoeff(a mod I))
2006  *  @param l    p^l is the modulus of the lifted univariate field
2007  *  @param u    vector of modular (mod p^l) factors of a mod I
2008  *  @param lcU  correct leading coefficient of the univariate factors of a mod I
2009  *  @return     list GiNaC::lst with lifted factors (multivariate factors of a),
2010  *              empty if Hensel lifting did not succeed
2011  */
2012 static ex hensel_multivar(const ex& a, const ex& x, const vector<EvalPoint>& I,
2013                           unsigned int p, const cl_I& l, const upvec& u, const vector<ex>& lcU)
2014 {
2015         const size_t nu = I.size() + 1;
2016         const cl_modint_ring R = find_modint_ring(expt_pos(cl_I(p),l));
2017
2018         vector<ex> A(nu);
2019         A[nu-1] = a;
2020
2021         for ( size_t j=nu; j>=2; --j ) {
2022                 ex x = I[j-2].x;
2023                 int alpha = I[j-2].evalpoint;
2024                 A[j-2] = A[j-1].subs(x==alpha);
2025                 A[j-2] = make_modular(A[j-2], R);
2026         }
2027
2028         int maxdeg = a.degree(I.front().x);
2029         for ( size_t i=1; i<I.size(); ++i ) {
2030                 int maxdeg2 = a.degree(I[i].x);
2031                 if ( maxdeg2 > maxdeg ) maxdeg = maxdeg2;
2032         }
2033
2034         const size_t n = u.size();
2035         vector<ex> U(n);
2036         for ( size_t i=0; i<n; ++i ) {
2037                 U[i] = umodpoly_to_ex(u[i], x);
2038         }
2039
2040         for ( size_t j=2; j<=nu; ++j ) {
2041                 vector<ex> U1 = U;
2042                 ex monomial = 1;
2043                 for ( size_t m=0; m<n; ++m) {
2044                         if ( lcU[m] != 1 ) {
2045                                 ex coef = lcU[m];
2046                                 for ( size_t i=j-1; i<nu-1; ++i ) {
2047                                         coef = coef.subs(I[i].x == I[i].evalpoint);
2048                                 }
2049                                 coef = make_modular(coef, R);
2050                                 int deg = U[m].degree(x);
2051                                 U[m] = U[m] - U[m].lcoeff(x) * pow(x,deg) + coef * pow(x,deg);
2052                         }
2053                 }
2054                 ex Uprod = 1;
2055                 for ( size_t i=0; i<n; ++i ) {
2056                         Uprod *= U[i];
2057                 }
2058                 ex e = expand(A[j-1] - Uprod);
2059
2060                 vector<EvalPoint> newI;
2061                 for ( size_t i=1; i<=j-2; ++i ) {
2062                         newI.push_back(I[i-1]);
2063                 }
2064
2065                 ex xj = I[j-2].x;
2066                 int alphaj = I[j-2].evalpoint;
2067                 size_t deg = A[j-1].degree(xj);
2068                 for ( size_t k=1; k<=deg; ++k ) {
2069                         if ( !e.is_zero() ) {
2070                                 monomial *= (xj - alphaj);
2071                                 monomial = expand(monomial);
2072                                 ex dif = e.diff(ex_to<symbol>(xj), k);
2073                                 ex c = dif.subs(xj==alphaj) / factorial(k);
2074                                 if ( !c.is_zero() ) {
2075                                         vector<ex> deltaU = multivar_diophant(U1, x, c, newI, maxdeg, p, cl_I_to_uint(l));
2076                                         for ( size_t i=0; i<n; ++i ) {
2077                                                 deltaU[i] *= monomial;
2078                                                 U[i] += deltaU[i];
2079                                                 U[i] = make_modular(U[i], R);
2080                                         }
2081                                         ex Uprod = 1;
2082                                         for ( size_t i=0; i<n; ++i ) {
2083                                                 Uprod *= U[i];
2084                                         }
2085                                         e = A[j-1] - Uprod;
2086                                         e = make_modular(e, R);
2087                                 }
2088                         }
2089                 }
2090         }
2091
2092         ex acand = 1;
2093         for ( size_t i=0; i<U.size(); ++i ) {
2094                 acand *= U[i];
2095         }
2096         if ( expand(a-acand).is_zero() ) {
2097                 lst res;
2098                 for ( size_t i=0; i<U.size(); ++i ) {
2099                         res.append(U[i]);
2100                 }
2101                 return res;
2102         }
2103         else {
2104                 lst res;
2105                 return lst();
2106         }
2107 }
2108
2109 /** Takes a factorized expression and puts the factors in a lst. The exponents
2110  *  of the factors are discarded, e.g. 7*x^2*(y+1)^4 --> {7,x,y+1}. The first
2111  *  element of the list is always the numeric coefficient.
2112  */
2113 static ex put_factors_into_lst(const ex& e)
2114 {
2115         lst result;
2116         if ( is_a<numeric>(e) ) {
2117                 result.append(e);
2118                 return result;
2119         }
2120         if ( is_a<power>(e) ) {
2121                 result.append(1);
2122                 result.append(e.op(0));
2123                 return result;
2124         }
2125         if ( is_a<symbol>(e) || is_a<add>(e) ) {
2126                 result.append(1);
2127                 result.append(e);
2128                 return result;
2129         }
2130         if ( is_a<mul>(e) ) {
2131                 ex nfac = 1;
2132                 for ( size_t i=0; i<e.nops(); ++i ) {
2133                         ex op = e.op(i);
2134                         if ( is_a<numeric>(op) ) {
2135                                 nfac = op;
2136                         }
2137                         if ( is_a<power>(op) ) {
2138                                 result.append(op.op(0));
2139                         }
2140                         if ( is_a<symbol>(op) || is_a<add>(op) ) {
2141                                 result.append(op);
2142                         }
2143                 }
2144                 result.prepend(nfac);
2145                 return result;
2146         }
2147         throw runtime_error("put_factors_into_lst: bad term.");
2148 }
2149
2150 /** Checks a set of numbers for whether each number has a unique prime factor.
2151  *
2152  *  @param[in]  f  list of numbers to check
2153  *  @return        true: if number set is bad, false: if set is okay (has unique
2154  *                 prime factors)
2155  */
2156 static bool checkdivisors(const lst& f)
2157 {
2158         const int k = f.nops();
2159         numeric q, r;
2160         vector<numeric> d(k);
2161         d[0] = ex_to<numeric>(abs(f.op(0)));
2162         for ( int i=1; i<k; ++i ) {
2163                 q = ex_to<numeric>(abs(f.op(i)));
2164                 for ( int j=i-1; j>=0; --j ) {
2165                         r = d[j];
2166                         do {
2167                                 r = gcd(r, q);
2168                                 q = q/r;
2169                         } while ( r != 1 );
2170                         if ( q == 1 ) {
2171                                 return true;
2172                         }
2173                 }
2174                 d[i] = q;
2175         }
2176         return false;
2177 }
2178
2179 /** Generates a set of evaluation points for a multivariate polynomial.
2180  *  The set fulfills the following conditions:
2181  *  1. lcoeff(evaluated_polynomial) does not vanish
2182  *  2. factors of lcoeff(evaluated_polynomial) have each a unique prime factor
2183  *  3. evaluated_polynomial is square free
2184  *  See [Wan] for more details.
2185  *
2186  *  @param[in]     u        multivariate polynomial to be factored
2187  *  @param[in]     vn       leading coefficient of u in x (x==first symbol in syms)
2188  *  @param[in]     syms     set of symbols that appear in u
2189  *  @param[in]     f        lst containing the factors of the leading coefficient vn
2190  *  @param[in,out] modulus  integer modulus for random number generation (i.e. |a_i| < modulus)
2191  *  @param[out]    u0       returns the evaluated (univariate) polynomial
2192  *  @param[out]    a        returns the valid evaluation points. must have initial size equal
2193  *                          number of symbols-1 before calling generate_set
2194  */
2195 static void generate_set(const ex& u, const ex& vn, const exset& syms, const lst& f,
2196                          numeric& modulus, ex& u0, vector<numeric>& a)
2197 {
2198         const ex& x = *syms.begin();
2199         while ( true ) {
2200                 ++modulus;
2201                 // generate a set of integers ...
2202                 u0 = u;
2203                 ex vna = vn;
2204                 ex vnatry;
2205                 exset::const_iterator s = syms.begin();
2206                 ++s;
2207                 for ( size_t i=0; i<a.size(); ++i ) {
2208                         do {
2209                                 a[i] = mod(numeric(rand()), 2*modulus) - modulus;
2210                                 vnatry = vna.subs(*s == a[i]);
2211                                 // ... for which the leading coefficient doesn't vanish ...
2212                         } while ( vnatry == 0 );
2213                         vna = vnatry;
2214                         u0 = u0.subs(*s == a[i]);
2215                         ++s;
2216                 }
2217                 // ... for which u0 is square free ...
2218                 ex g = gcd(u0, u0.diff(ex_to<symbol>(x)));
2219                 if ( !is_a<numeric>(g) ) {
2220                         continue;
2221                 }
2222                 if ( !is_a<numeric>(vn) ) {
2223                         // ... and for which the evaluated factors have each an unique prime factor
2224                         lst fnum = f;
2225                         fnum.let_op(0) = fnum.op(0) * u0.content(x);
2226                         for ( size_t i=1; i<fnum.nops(); ++i ) {
2227                                 if ( !is_a<numeric>(fnum.op(i)) ) {
2228                                         s = syms.begin();
2229                                         ++s;
2230                                         for ( size_t j=0; j<a.size(); ++j, ++s ) {
2231                                                 fnum.let_op(i) = fnum.op(i).subs(*s == a[j]);
2232                                         }
2233                                 }
2234                         }
2235                         if ( checkdivisors(fnum) ) {
2236                                 continue;
2237                         }
2238                 }
2239                 // ok, we have a valid set now
2240                 return;
2241         }
2242 }
2243
2244 // forward declaration
2245 static ex factor_sqrfree(const ex& poly);
2246
2247 /** Multivariate factorization.
2248  *
2249  *  The implementation is based on the algorithm described in [Wan].
2250  *  An evaluation homomorphism (a set of integers) is determined that fulfills
2251  *  certain criteria. The evaluated polynomial is univariate and is factorized
2252  *  by factor_univariate(). The main work then is to find the correct leading
2253  *  coefficients of the univariate factors. They have to correspond to the
2254  *  factors of the (multivariate) leading coefficient of the input polynomial
2255  *  (as defined for a specific variable x). After that the Hensel lifting can be
2256  *  performed.
2257  *
2258  *  @param[in] poly  expanded, square free polynomial
2259  *  @param[in] syms  contains the symbols in the polynomial
2260  *  @return          factorized polynomial
2261  */
2262 static ex factor_multivariate(const ex& poly, const exset& syms)
2263 {
2264         exset::const_iterator s;
2265         const ex& x = *syms.begin();
2266
2267         // make polynomial primitive
2268         ex unit, cont, pp;
2269         poly.unitcontprim(x, unit, cont, pp);
2270         if ( !is_a<numeric>(cont) ) {
2271                 return factor_sqrfree(cont) * factor_sqrfree(pp);
2272         }
2273
2274         // factor leading coefficient
2275         ex vn = pp.collect(x).lcoeff(x);
2276         ex vnlst;
2277         if ( is_a<numeric>(vn) ) {
2278                 vnlst = lst(vn);
2279         }
2280         else {
2281                 ex vnfactors = factor(vn);
2282                 vnlst = put_factors_into_lst(vnfactors);
2283         }
2284
2285         const unsigned int maxtrials = 3;
2286         numeric modulus = (vnlst.nops() > 3) ? vnlst.nops() : 3;
2287         vector<numeric> a(syms.size()-1, 0);
2288
2289         // try now to factorize until we are successful
2290         while ( true ) {
2291
2292                 unsigned int trialcount = 0;
2293                 unsigned int prime;
2294                 int factor_count = 0;
2295                 int min_factor_count = -1;
2296                 ex u, delta;
2297                 ex ufac, ufaclst;
2298
2299                 // try several evaluation points to reduce the number of factors
2300                 while ( trialcount < maxtrials ) {
2301
2302                         // generate a set of valid evaluation points
2303                         generate_set(pp, vn, syms, ex_to<lst>(vnlst), modulus, u, a);
2304
2305                         ufac = factor_univariate(u, x, prime);
2306                         ufaclst = put_factors_into_lst(ufac);
2307                         factor_count = ufaclst.nops()-1;
2308                         delta = ufaclst.op(0);
2309
2310                         if ( factor_count <= 1 ) {
2311                                 // irreducible
2312                                 return poly;
2313                         }
2314                         if ( min_factor_count < 0 ) {
2315                                 // first time here
2316                                 min_factor_count = factor_count;
2317                         }
2318                         else if ( min_factor_count == factor_count ) {
2319                                 // one less to try
2320                                 ++trialcount;
2321                         }
2322                         else if ( min_factor_count > factor_count ) {
2323                                 // new minimum, reset trial counter
2324                                 min_factor_count = factor_count;
2325                                 trialcount = 0;
2326                         }
2327                 }
2328
2329                 // determine true leading coefficients for the Hensel lifting
2330                 vector<ex> C(factor_count);
2331                 if ( is_a<numeric>(vn) ) {
2332                         // easy case
2333                         for ( size_t i=1; i<ufaclst.nops(); ++i ) {
2334                                 C[i-1] = ufaclst.op(i).lcoeff(x);
2335                         }
2336                 }
2337                 else {
2338                         // difficult case.
2339                         // we use the property of the ftilde having a unique prime factor.
2340                         // details can be found in [Wan].
2341                         // calculate ftilde
2342                         vector<numeric> ftilde(vnlst.nops()-1);
2343                         for ( size_t i=0; i<ftilde.size(); ++i ) {
2344                                 ex ft = vnlst.op(i+1);
2345                                 s = syms.begin();
2346                                 ++s;
2347                                 for ( size_t j=0; j<a.size(); ++j ) {
2348                                         ft = ft.subs(*s == a[j]);
2349                                         ++s;
2350                                 }
2351                                 ftilde[i] = ex_to<numeric>(ft);
2352                         }
2353                         // calculate D and C
2354                         vector<bool> used_flag(ftilde.size(), false);
2355                         vector<ex> D(factor_count, 1);
2356                         if ( delta == 1 ) {
2357                                 for ( int i=0; i<factor_count; ++i ) {
2358                                         numeric prefac = ex_to<numeric>(ufaclst.op(i+1).lcoeff(x));
2359                                         for ( int j=ftilde.size()-1; j>=0; --j ) {
2360                                                 int count = 0;
2361                                                 while ( irem(prefac, ftilde[j]) == 0 ) {
2362                                                         prefac = iquo(prefac, ftilde[j]);
2363                                                         ++count;
2364                                                 }
2365                                                 if ( count ) {
2366                                                         used_flag[j] = true;
2367                                                         D[i] = D[i] * pow(vnlst.op(j+1), count);
2368                                                 }
2369                                         }
2370                                         C[i] = D[i] * prefac;
2371                                 }
2372                         }
2373                         else {
2374                                 for ( int i=0; i<factor_count; ++i ) {
2375                                         numeric prefac = ex_to<numeric>(ufaclst.op(i+1).lcoeff(x));
2376                                         for ( int j=ftilde.size()-1; j>=0; --j ) {
2377                                                 int count = 0;
2378                                                 while ( irem(prefac, ftilde[j]) == 0 ) {
2379                                                         prefac = iquo(prefac, ftilde[j]);
2380                                                         ++count;
2381                                                 }
2382                                                 while ( irem(ex_to<numeric>(delta)*prefac, ftilde[j]) == 0 ) {
2383                                                         numeric g = gcd(prefac, ex_to<numeric>(ftilde[j]));
2384                                                         prefac = iquo(prefac, g);
2385                                                         delta = delta / (ftilde[j]/g);
2386                                                         ufaclst.let_op(i+1) = ufaclst.op(i+1) * (ftilde[j]/g);
2387                                                         ++count;
2388                                                 }
2389                                                 if ( count ) {
2390                                                         used_flag[j] = true;
2391                                                         D[i] = D[i] * pow(vnlst.op(j+1), count);
2392                                                 }
2393                                         }
2394                                         C[i] = D[i] * prefac;
2395                                 }
2396                         }
2397                         // check if something went wrong
2398                         bool some_factor_unused = false;
2399                         for ( size_t i=0; i<used_flag.size(); ++i ) {
2400                                 if ( !used_flag[i] ) {
2401                                         some_factor_unused = true;
2402                                         break;
2403                                 }
2404                         }
2405                         if ( some_factor_unused ) {
2406                                 continue;
2407                         }
2408                 }
2409
2410                 // multiply the remaining content of the univariate polynomial into the
2411                 // first factor
2412                 if ( delta != 1 ) {
2413                         C[0] = C[0] * delta;
2414                         ufaclst.let_op(1) = ufaclst.op(1) * delta;
2415                 }
2416
2417                 // set up evaluation points
2418                 EvalPoint ep;
2419                 vector<EvalPoint> epv;
2420                 s = syms.begin();
2421                 ++s;
2422                 for ( size_t i=0; i<a.size(); ++i ) {
2423                         ep.x = *s++;
2424                         ep.evalpoint = a[i].to_int();
2425                         epv.push_back(ep);
2426                 }
2427
2428                 // calc bound p^l
2429                 int maxdeg = 0;
2430                 for ( int i=1; i<=factor_count; ++i ) {
2431                         if ( ufaclst.op(i).degree(x) > maxdeg ) {
2432                                 maxdeg = ufaclst[i].degree(x);
2433                         }
2434                 }
2435                 cl_I B = 2*calc_bound(u, x, maxdeg);
2436                 cl_I l = 1;
2437                 cl_I pl = prime;
2438                 while ( pl < B ) {
2439                         l = l + 1;
2440                         pl = pl * prime;
2441                 }
2442
2443                 // set up modular factors (mod p^l)
2444                 cl_modint_ring R = find_modint_ring(expt_pos(cl_I(prime),l));
2445                 upvec modfactors(ufaclst.nops()-1);
2446                 for ( size_t i=1; i<ufaclst.nops(); ++i ) {
2447                         umodpoly_from_ex(modfactors[i-1], ufaclst.op(i), x, R);
2448                 }
2449
2450                 // try Hensel lifting
2451                 ex res = hensel_multivar(pp, x, epv, prime, l, modfactors, C);
2452                 if ( res != lst() ) {
2453                         ex result = cont * unit;
2454                         for ( size_t i=0; i<res.nops(); ++i ) {
2455                                 result *= res.op(i).content(x) * res.op(i).unit(x);
2456                                 result *= res.op(i).primpart(x);
2457                         }
2458                         return result;
2459                 }
2460         }
2461 }
2462
2463 /** Finds all symbols in an expression. Used by factor_sqrfree() and factor().
2464  */
2465 struct find_symbols_map : public map_function {
2466         exset syms;
2467         ex operator()(const ex& e)
2468         {
2469                 if ( is_a<symbol>(e) ) {
2470                         syms.insert(e);
2471                         return e;
2472                 }
2473                 return e.map(*this);
2474         }
2475 };
2476
2477 /** Factorizes a polynomial that is square free. It calls either the univariate
2478  *  or the multivariate factorization functions.
2479  */
2480 static ex factor_sqrfree(const ex& poly)
2481 {
2482         // determine all symbols in poly
2483         find_symbols_map findsymbols;
2484         findsymbols(poly);
2485         if ( findsymbols.syms.size() == 0 ) {
2486                 return poly;
2487         }
2488
2489         if ( findsymbols.syms.size() == 1 ) {
2490                 // univariate case
2491                 const ex& x = *(findsymbols.syms.begin());
2492                 if ( poly.ldegree(x) > 0 ) {
2493                         // pull out direct factors
2494                         int ld = poly.ldegree(x);
2495                         ex res = factor_univariate(expand(poly/pow(x, ld)), x);
2496                         return res * pow(x,ld);
2497                 }
2498                 else {
2499                         ex res = factor_univariate(poly, x);
2500                         return res;
2501                 }
2502         }
2503
2504         // multivariate case
2505         ex res = factor_multivariate(poly, findsymbols.syms);
2506         return res;
2507 }
2508
2509 /** Map used by factor() when factor_options::all is given to access all
2510  *  subexpressions and to call factor() on them.
2511  */
2512 struct apply_factor_map : public map_function {
2513         unsigned options;
2514         apply_factor_map(unsigned options_) : options(options_) { }
2515         ex operator()(const ex& e)
2516         {
2517                 if ( e.info(info_flags::polynomial) ) {
2518                         return factor(e, options);
2519                 }
2520                 if ( is_a<add>(e) ) {
2521                         ex s1, s2;
2522                         for ( size_t i=0; i<e.nops(); ++i ) {
2523                                 if ( e.op(i).info(info_flags::polynomial) ) {
2524                                         s1 += e.op(i);
2525                                 }
2526                                 else {
2527                                         s2 += e.op(i);
2528                                 }
2529                         }
2530                         s1 = s1.eval();
2531                         s2 = s2.eval();
2532                         return factor(s1, options) + s2.map(*this);
2533                 }
2534                 return e.map(*this);
2535         }
2536 };
2537
2538 } // anonymous namespace
2539
2540 /** Interface function to the outside world. It checks the arguments, tries a
2541  *  square free factorization, and then calls factor_sqrfree to do the hard
2542  *  work.
2543  */
2544 ex factor(const ex& poly, unsigned options)
2545 {
2546         // check arguments
2547         if ( !poly.info(info_flags::polynomial) ) {
2548                 if ( options & factor_options::all ) {
2549                         options &= ~factor_options::all;
2550                         apply_factor_map factor_map(options);
2551                         return factor_map(poly);
2552                 }
2553                 return poly;
2554         }
2555
2556         // determine all symbols in poly
2557         find_symbols_map findsymbols;
2558         findsymbols(poly);
2559         if ( findsymbols.syms.size() == 0 ) {
2560                 return poly;
2561         }
2562         lst syms;
2563         exset::const_iterator i=findsymbols.syms.begin(), end=findsymbols.syms.end();
2564         for ( ; i!=end; ++i ) {
2565                 syms.append(*i);
2566         }
2567
2568         // make poly square free
2569         ex sfpoly = sqrfree(poly.expand(), syms);
2570
2571         // factorize the square free components
2572         if ( is_a<power>(sfpoly) ) {
2573                 // case: (polynomial)^exponent
2574                 const ex& base = sfpoly.op(0);
2575                 if ( !is_a<add>(base) ) {
2576                         // simple case: (monomial)^exponent
2577                         return sfpoly;
2578                 }
2579                 ex f = factor_sqrfree(base);
2580                 return pow(f, sfpoly.op(1));
2581         }
2582         if ( is_a<mul>(sfpoly) ) {
2583                 // case: multiple factors
2584                 ex res = 1;
2585                 for ( size_t i=0; i<sfpoly.nops(); ++i ) {
2586                         const ex& t = sfpoly.op(i);
2587                         if ( is_a<power>(t) ) {
2588                                 const ex& base = t.op(0);
2589                                 if ( !is_a<add>(base) ) {
2590                                         res *= t;
2591                                 }
2592                                 else {
2593                                         ex f = factor_sqrfree(base);
2594                                         res *= pow(f, t.op(1));
2595                                 }
2596                         }
2597                         else if ( is_a<add>(t) ) {
2598                                 ex f = factor_sqrfree(t);
2599                                 res *= f;
2600                         }
2601                         else {
2602                                 res *= t;
2603                         }
2604                 }
2605                 return res;
2606         }
2607         if ( is_a<symbol>(sfpoly) ) {
2608                 return poly;
2609         }
2610         // case: (polynomial)
2611         ex f = factor_sqrfree(sfpoly);
2612         return f;
2613 }
2614
2615 } // namespace GiNaC
2616
2617 #ifdef DEBUGFACTOR
2618 #include "test.h"
2619 #endif