ginac/factor.cpp

   1 /** @file factor.cpp
   2  *
   3  *  Polynomial factorization (implementation).
   4  *
   5  *  The interface function factor() at the end of this file is defined in the
   6  *  GiNaC namespace. All other utility functions and classes are defined in an
   7  *  additional anonymous namespace.
   8  *
   9  *  Factorization starts by doing a square free factorization and making the
  10  *  coefficients integer. Then, depending on the number of free variables it
  11  *  proceeds either in dedicated univariate or multivariate factorization code.
  12  *
  13  *  Univariate factorization does a modular factorization via Berlekamp's
  14  *  algorithm and distinct degree factorization. Hensel lifting is used at the
  15  *  end.
  16  *
  17  *  Multivariate factorization uses the univariate factorization (applying a
  18  *  evaluation homomorphism first) and Hensel lifting raises the answer to the
  19  *  multivariate domain. The Hensel lifting code is completely distinct from the
  20  *  code used by the univariate factorization.
  21  *
  22  *  Algorithms used can be found in
  23  *    [Wan] An Improved Multivariate Polynomial Factoring Algorithm,
  24  *          P.S.Wang,
  25  *          Mathematics of Computation, Vol. 32, No. 144 (1978) 1215--1231.
  26  *    [GCL] Algorithms for Computer Algebra,
  27  *          K.O.Geddes, S.R.Czapor, G.Labahn,
  28  *          Springer Verlag, 1992.
  29  *    [Mig] Some Useful Bounds,
  30  *          M.Mignotte,
  31  *          In "Computer Algebra, Symbolic and Algebraic Computation" (B.Buchberger et al., eds.),
  32  *          pp. 259-263, Springer-Verlag, New York, 1982.
  33  */
  34
  35 /*
  36  *  GiNaC Copyright (C) 1999-2018 Johannes Gutenberg University Mainz, Germany
  37  *
  38  *  This program is free software; you can redistribute it and/or modify
  39  *  it under the terms of the GNU General Public License as published by
  40  *  the Free Software Foundation; either version 2 of the License, or
  41  *  (at your option) any later version.
  42  *
  43  *  This program is distributed in the hope that it will be useful,
  44  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  45  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  46  *  GNU General Public License for more details.
  47  *
  48  *  You should have received a copy of the GNU General Public License
  49  *  along with this program; if not, write to the Free Software
  50  *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
  51  */
  52
  53 //#define DEBUGFACTOR
  54
  55 #include "factor.h"
  56
  57 #include "ex.h"
  58 #include "numeric.h"
  59 #include "operators.h"
  60 #include "inifcns.h"
  61 #include "symbol.h"
  62 #include "relational.h"
  63 #include "power.h"
  64 #include "mul.h"
  65 #include "normal.h"
  66 #include "add.h"
  67
  68 #include <algorithm>
  69 #include <limits>
  70 #include <list>
  71 #include <vector>
  72 #include <stack>
  73 #ifdef DEBUGFACTOR
  74 #include <ostream>
  75 #endif
  76 using namespace std;
  77
  78 #include <cln/cln.h>
  79 using namespace cln;
  80
  81 namespace GiNaC {
  82
  83 #ifdef DEBUGFACTOR
  84 #define DCOUT(str) cout << #str << endl
  85 #define DCOUTVAR(var) cout << #var << ": " << var << endl
  86 #define DCOUT2(str,var) cout << #str << ": " << var << endl
  87 ostream& operator<<(ostream& o, const vector<int>& v)
  88 {
  89         auto i = v.begin(), end = v.end();
  90         while ( i != end ) {
  91                 o << *i << " ";
  92                 ++i;
  93         }
  94         return o;
  95 }
  96 static ostream& operator<<(ostream& o, const vector<cl_I>& v)
  97 {
  98         auto i = v.begin(), end = v.end();
  99         while ( i != end ) {
 100                 o << *i << "[" << i-v.begin() << "]" << " ";
 101                 ++i;
 102         }
 103         return o;
 104 }
 105 static ostream& operator<<(ostream& o, const vector<cl_MI>& v)
 106 {
 107         auto i = v.begin(), end = v.end();
 108         while ( i != end ) {
 109                 o << *i << "[" << i-v.begin() << "]" << " ";
 110                 ++i;
 111         }
 112         return o;
 113 }
 114 ostream& operator<<(ostream& o, const vector<numeric>& v)
 115 {
 116         for ( size_t i=0; i<v.size(); ++i ) {
 117                 o << v[i] << " ";
 118         }
 119         return o;
 120 }
 121 ostream& operator<<(ostream& o, const vector<vector<cl_MI>>& v)
 122 {
 123         auto i = v.begin(), end = v.end();
 124         while ( i != end ) {
 125                 o << i-v.begin() << ": " << *i << endl;
 126                 ++i;
 127         }
 128         return o;
 129 }
 130 #else
 131 #define DCOUT(str)
 132 #define DCOUTVAR(var)
 133 #define DCOUT2(str,var)
 134 #endif // def DEBUGFACTOR
 135
 136 // anonymous namespace to hide all utility functions
 137 namespace {
 138
 139 ////////////////////////////////////////////////////////////////////////////////
 140 // modular univariate polynomial code
 141
 142 typedef std::vector<cln::cl_MI> umodpoly;
 143 typedef std::vector<cln::cl_I> upoly;
 144 typedef vector<umodpoly> upvec;
 145
 146 // COPY FROM UPOLY.HPP
 147
 148 // CHANGED size_t -> int !!!
 149 template<typename T> static int degree(const T& p)
 150 {
 151         return p.size() - 1;
 152 }
 153
 154 template<typename T> static typename T::value_type lcoeff(const T& p)
 155 {
 156         return p[p.size() - 1];
 157 }
 158
 159 static bool normalize_in_field(umodpoly& a)
 160 {
 161         if (a.size() == 0)
 162                 return true;
 163         if ( lcoeff(a) == a[0].ring()->one() ) {
 164                 return true;
 165         }
 166
 167         const cln::cl_MI lc_1 = recip(lcoeff(a));
 168         for (std::size_t k = a.size(); k-- != 0; )
 169                 a[k] = a[k]*lc_1;
 170         return false;
 171 }
 172
 173 template<typename T> static void
 174 canonicalize(T& p, const typename T::size_type hint = std::numeric_limits<typename T::size_type>::max())
 175 {
 176         if (p.empty())
 177                 return;
 178
 179         std::size_t i = p.size() - 1;
 180         // Be fast if the polynomial is already canonicalized
 181         if (!zerop(p[i]))
 182                 return;
 183
 184         if (hint < p.size())
 185                 i = hint;
 186
 187         bool is_zero = false;
 188         do {
 189                 if (!zerop(p[i])) {
 190                         ++i;
 191                         break;
 192                 }
 193                 if (i == 0) {
 194                         is_zero = true;
 195                         break;
 196                 }
 197                 --i;
 198         } while (true);
 199
 200         if (is_zero) {
 201                 p.clear();
 202                 return;
 203         }
 204
 205         p.erase(p.begin() + i, p.end());
 206 }
 207
 208 // END COPY FROM UPOLY.HPP
 209
 210 static void expt_pos(umodpoly& a, unsigned int q)
 211 {
 212         if ( a.empty() ) return;
 213         cl_MI zero = a[0].ring()->zero();
 214         int deg = degree(a);
 215         a.resize(degree(a)*q+1, zero);
 216         for ( int i=deg; i>0; --i ) {
 217                 a[i*q] = a[i];
 218                 a[i] = zero;
 219         }
 220 }
 221
 222 template<bool COND, typename T = void> struct enable_if
 223 {
 224         typedef T type;
 225 };
 226
 227 template<typename T> struct enable_if<false, T> { /* empty */ };
 228
 229 template<typename T> struct uvar_poly_p
 230 {
 231         static const bool value = false;
 232 };
 233
 234 template<> struct uvar_poly_p<upoly>
 235 {
 236         static const bool value = true;
 237 };
 238
 239 template<> struct uvar_poly_p<umodpoly>
 240 {
 241         static const bool value = true;
 242 };
 243
 244 template<typename T>
 245 // Don't define this for anything but univariate polynomials.
 246 static typename enable_if<uvar_poly_p<T>::value, T>::type
 247 operator+(const T& a, const T& b)
 248 {
 249         int sa = a.size();
 250         int sb = b.size();
 251         if ( sa >= sb ) {
 252                 T r(sa);
 253                 int i = 0;
 254                 for ( ; i<sb; ++i ) {
 255                         r[i] = a[i] + b[i];
 256                 }
 257                 for ( ; i<sa; ++i ) {
 258                         r[i] = a[i];
 259                 }
 260                 canonicalize(r);
 261                 return r;
 262         }
 263         else {
 264                 T r(sb);
 265                 int i = 0;
 266                 for ( ; i<sa; ++i ) {
 267                         r[i] = a[i] + b[i];
 268                 }
 269                 for ( ; i<sb; ++i ) {
 270                         r[i] = b[i];
 271                 }
 272                 canonicalize(r);
 273                 return r;
 274         }
 275 }
 276
 277 template<typename T>
 278 // Don't define this for anything but univariate polynomials. Otherwise
 279 // overload resolution might fail (this actually happens when compiling
 280 // GiNaC with g++ 3.4).
 281 static typename enable_if<uvar_poly_p<T>::value, T>::type
 282 operator-(const T& a, const T& b)
 283 {
 284         int sa = a.size();
 285         int sb = b.size();
 286         if ( sa >= sb ) {
 287                 T r(sa);
 288                 int i = 0;
 289                 for ( ; i<sb; ++i ) {
 290                         r[i] = a[i] - b[i];
 291                 }
 292                 for ( ; i<sa; ++i ) {
 293                         r[i] = a[i];
 294                 }
 295                 canonicalize(r);
 296                 return r;
 297         }
 298         else {
 299                 T r(sb);
 300                 int i = 0;
 301                 for ( ; i<sa; ++i ) {
 302                         r[i] = a[i] - b[i];
 303                 }
 304                 for ( ; i<sb; ++i ) {
 305                         r[i] = -b[i];
 306                 }
 307                 canonicalize(r);
 308                 return r;
 309         }
 310 }
 311
 312 static upoly operator*(const upoly& a, const upoly& b)
 313 {
 314         upoly c;
 315         if ( a.empty() || b.empty() ) return c;
 316
 317         int n = degree(a) + degree(b);
 318         c.resize(n+1, 0);
 319         for ( int i=0 ; i<=n; ++i ) {
 320                 for ( int j=0 ; j<=i; ++j ) {
 321                         if ( j > degree(a) || (i-j) > degree(b) ) continue;
 322                         c[i] = c[i] + a[j] * b[i-j];
 323                 }
 324         }
 325         canonicalize(c);
 326         return c;
 327 }
 328
 329 static umodpoly operator*(const umodpoly& a, const umodpoly& b)
 330 {
 331         umodpoly c;
 332         if ( a.empty() || b.empty() ) return c;
 333
 334         int n = degree(a) + degree(b);
 335         c.resize(n+1, a[0].ring()->zero());
 336         for ( int i=0 ; i<=n; ++i ) {
 337                 for ( int j=0 ; j<=i; ++j ) {
 338                         if ( j > degree(a) || (i-j) > degree(b) ) continue;
 339                         c[i] = c[i] + a[j] * b[i-j];
 340                 }
 341         }
 342         canonicalize(c);
 343         return c;
 344 }
 345
 346 static upoly operator*(const upoly& a, const cl_I& x)
 347 {
 348         if ( zerop(x) ) {
 349                 upoly r;
 350                 return r;
 351         }
 352         upoly r(a.size());
 353         for ( size_t i=0; i<a.size(); ++i ) {
 354                 r[i] = a[i] * x;
 355         }
 356         return r;
 357 }
 358
 359 static upoly operator/(const upoly& a, const cl_I& x)
 360 {
 361         if ( zerop(x) ) {
 362                 upoly r;
 363                 return r;
 364         }
 365         upoly r(a.size());
 366         for ( size_t i=0; i<a.size(); ++i ) {
 367                 r[i] = exquo(a[i],x);
 368         }
 369         return r;
 370 }
 371
 372 static umodpoly operator*(const umodpoly& a, const cl_MI& x)
 373 {
 374         umodpoly r(a.size());
 375         for ( size_t i=0; i<a.size(); ++i ) {
 376                 r[i] = a[i] * x;
 377         }
 378         canonicalize(r);
 379         return r;
 380 }
 381
 382 static void upoly_from_ex(upoly& up, const ex& e, const ex& x)
 383 {
 384         // assert: e is in Z[x]
 385         int deg = e.degree(x);
 386         up.resize(deg+1);
 387         int ldeg = e.ldegree(x);
 388         for ( ; deg>=ldeg; --deg ) {
 389                 up[deg] = the<cl_I>(ex_to<numeric>(e.coeff(x, deg)).to_cl_N());
 390         }
 391         for ( ; deg>=0; --deg ) {
 392                 up[deg] = 0;
 393         }
 394         canonicalize(up);
 395 }
 396
 397 static void umodpoly_from_upoly(umodpoly& ump, const upoly& e, const cl_modint_ring& R)
 398 {
 399         int deg = degree(e);
 400         ump.resize(deg+1);
 401         for ( ; deg>=0; --deg ) {
 402                 ump[deg] = R->canonhom(e[deg]);
 403         }
 404         canonicalize(ump);
 405 }
 406
 407 static void umodpoly_from_ex(umodpoly& ump, const ex& e, const ex& x, const cl_modint_ring& R)
 408 {
 409         // assert: e is in Z[x]
 410         int deg = e.degree(x);
 411         ump.resize(deg+1);
 412         int ldeg = e.ldegree(x);
 413         for ( ; deg>=ldeg; --deg ) {
 414                 cl_I coeff = the<cl_I>(ex_to<numeric>(e.coeff(x, deg)).to_cl_N());
 415                 ump[deg] = R->canonhom(coeff);
 416         }
 417         for ( ; deg>=0; --deg ) {
 418                 ump[deg] = R->zero();
 419         }
 420         canonicalize(ump);
 421 }
 422
 423 #ifdef DEBUGFACTOR
 424 static void umodpoly_from_ex(umodpoly& ump, const ex& e, const ex& x, const cl_I& modulus)
 425 {
 426         umodpoly_from_ex(ump, e, x, find_modint_ring(modulus));
 427 }
 428 #endif
 429
 430 static ex upoly_to_ex(const upoly& a, const ex& x)
 431 {
 432         if ( a.empty() ) return 0;
 433         ex e;
 434         for ( int i=degree(a); i>=0; --i ) {
 435                 e += numeric(a[i]) * pow(x, i);
 436         }
 437         return e;
 438 }
 439
 440 static ex umodpoly_to_ex(const umodpoly& a, const ex& x)
 441 {
 442         if ( a.empty() ) return 0;
 443         cl_modint_ring R = a[0].ring();
 444         cl_I mod = R->modulus;
 445         cl_I halfmod = (mod-1) >> 1;
 446         ex e;
 447         for ( int i=degree(a); i>=0; --i ) {
 448                 cl_I n = R->retract(a[i]);
 449                 if ( n > halfmod ) {
 450                         e += numeric(n-mod) * pow(x, i);
 451                 } else {
 452                         e += numeric(n) * pow(x, i);
 453                 }
 454         }
 455         return e;
 456 }
 457
 458 static upoly umodpoly_to_upoly(const umodpoly& a)
 459 {
 460         upoly e(a.size());
 461         if ( a.empty() ) return e;
 462         cl_modint_ring R = a[0].ring();
 463         cl_I mod = R->modulus;
 464         cl_I halfmod = (mod-1) >> 1;
 465         for ( int i=degree(a); i>=0; --i ) {
 466                 cl_I n = R->retract(a[i]);
 467                 if ( n > halfmod ) {
 468                         e[i] = n-mod;
 469                 } else {
 470                         e[i] = n;
 471                 }
 472         }
 473         return e;
 474 }
 475
 476 static umodpoly umodpoly_to_umodpoly(const umodpoly& a, const cl_modint_ring& R, unsigned int m)
 477 {
 478         umodpoly e;
 479         if ( a.empty() ) return e;
 480         cl_modint_ring oldR = a[0].ring();
 481         size_t sa = a.size();
 482         e.resize(sa+m, R->zero());
 483         for ( size_t i=0; i<sa; ++i ) {
 484                 e[i+m] = R->canonhom(oldR->retract(a[i]));
 485         }
 486         canonicalize(e);
 487         return e;
 488 }
 489
 490 /** Divides all coefficients of the polynomial a by the integer x.
 491  *  All coefficients are supposed to be divisible by x. If they are not, the
 492  *  the<cl_I> cast will raise an exception.
 493  *
 494  *  @param[in,out] a  polynomial of which the coefficients will be reduced by x
 495  *  @param[in]     x  integer that divides the coefficients
 496  */
 497 static void reduce_coeff(umodpoly& a, const cl_I& x)
 498 {
 499         if ( a.empty() ) return;
 500
 501         cl_modint_ring R = a[0].ring();
 502         for (auto & i : a) {
 503                 // cln cannot perform this division in the modular field
 504                 cl_I c = R->retract(i);
 505                 i = cl_MI(R, the<cl_I>(c / x));
 506         }
 507 }
 508
 509 /** Calculates remainder of a/b.
 510  *  Assertion: a and b not empty.
 511  *
 512  *  @param[in]  a  polynomial dividend
 513  *  @param[in]  b  polynomial divisor
 514  *  @param[out] r  polynomial remainder
 515  */
 516 static void rem(const umodpoly& a, const umodpoly& b, umodpoly& r)
 517 {
 518         int k, n;
 519         n = degree(b);
 520         k = degree(a) - n;
 521         r = a;
 522         if ( k < 0 ) return;
 523
 524         do {
 525                 cl_MI qk = div(r[n+k], b[n]);
 526                 if ( !zerop(qk) ) {
 527                         for ( int i=0; i<n; ++i ) {
 528                                 unsigned int j = n + k - 1 - i;
 529                                 r[j] = r[j] - qk * b[j-k];
 530                         }
 531                 }
 532         } while ( k-- );
 533
 534         fill(r.begin()+n, r.end(), a[0].ring()->zero());
 535         canonicalize(r);
 536 }
 537
 538 /** Calculates quotient of a/b.
 539  *  Assertion: a and b not empty.
 540  *
 541  *  @param[in]  a  polynomial dividend
 542  *  @param[in]  b  polynomial divisor
 543  *  @param[out] q  polynomial quotient
 544  */
 545 static void div(const umodpoly& a, const umodpoly& b, umodpoly& q)
 546 {
 547         int k, n;
 548         n = degree(b);
 549         k = degree(a) - n;
 550         q.clear();
 551         if ( k < 0 ) return;
 552
 553         umodpoly r = a;
 554         q.resize(k+1, a[0].ring()->zero());
 555         do {
 556                 cl_MI qk = div(r[n+k], b[n]);
 557                 if ( !zerop(qk) ) {
 558                         q[k] = qk;
 559                         for ( int i=0; i<n; ++i ) {
 560                                 unsigned int j = n + k - 1 - i;
 561                                 r[j] = r[j] - qk * b[j-k];
 562                         }
 563                 }
 564         } while ( k-- );
 565
 566         canonicalize(q);
 567 }
 568
 569 /** Calculates quotient and remainder of a/b.
 570  *  Assertion: a and b not empty.
 571  *
 572  *  @param[in]  a  polynomial dividend
 573  *  @param[in]  b  polynomial divisor
 574  *  @param[out] r  polynomial remainder
 575  *  @param[out] q  polynomial quotient
 576  */
 577 static void remdiv(const umodpoly& a, const umodpoly& b, umodpoly& r, umodpoly& q)
 578 {
 579         int k, n;
 580         n = degree(b);
 581         k = degree(a) - n;
 582         q.clear();
 583         r = a;
 584         if ( k < 0 ) return;
 585
 586         q.resize(k+1, a[0].ring()->zero());
 587         do {
 588                 cl_MI qk = div(r[n+k], b[n]);
 589                 if ( !zerop(qk) ) {
 590                         q[k] = qk;
 591                         for ( int i=0; i<n; ++i ) {
 592                                 unsigned int j = n + k - 1 - i;
 593                                 r[j] = r[j] - qk * b[j-k];
 594                         }
 595                 }
 596         } while ( k-- );
 597
 598         fill(r.begin()+n, r.end(), a[0].ring()->zero());
 599         canonicalize(r);
 600         canonicalize(q);
 601 }
 602
 603 /** Calculates the GCD of polynomial a and b.
 604  *
 605  *  @param[in]  a  polynomial
 606  *  @param[in]  b  polynomial
 607  *  @param[out] c  GCD
 608  */
 609 static void gcd(const umodpoly& a, const umodpoly& b, umodpoly& c)
 610 {
 611         if ( degree(a) < degree(b) ) return gcd(b, a, c);
 612
 613         c = a;
 614         normalize_in_field(c);
 615         umodpoly d = b;
 616         normalize_in_field(d);
 617         umodpoly r;
 618         while ( !d.empty() ) {
 619                 rem(c, d, r);
 620                 c = d;
 621                 d = r;
 622         }
 623         normalize_in_field(c);
 624 }
 625
 626 /** Calculates the derivative of the polynomial a.
 627  *
 628  *  @param[in]  a  polynomial of which to take the derivative
 629  *  @param[out] d  result/derivative
 630  */
 631 static void deriv(const umodpoly& a, umodpoly& d)
 632 {
 633         d.clear();
 634         if ( a.size() <= 1 ) return;
 635
 636         d.insert(d.begin(), a.begin()+1, a.end());
 637         int max = d.size();
 638         for ( int i=1; i<max; ++i ) {
 639                 d[i] = d[i] * (i+1);
 640         }
 641         canonicalize(d);
 642 }
 643
 644 static bool unequal_one(const umodpoly& a)
 645 {
 646         if ( a.empty() ) return true;
 647         return ( a.size() != 1 || a[0] != a[0].ring()->one() );
 648 }
 649
 650 static bool equal_one(const umodpoly& a)
 651 {
 652         return ( a.size() == 1 && a[0] == a[0].ring()->one() );
 653 }
 654
 655 /** Returns true if polynomial a is square free.
 656  *
 657  *  @param[in] a  polynomial to check
 658  *  @return       true if polynomial is square free, false otherwise
 659  */
 660 static bool squarefree(const umodpoly& a)
 661 {
 662         umodpoly b;
 663         deriv(a, b);
 664         if ( b.empty() ) {
 665                 return false;
 666         }
 667         umodpoly c;
 668         gcd(a, b, c);
 669         return equal_one(c);
 670 }
 671
 672 // END modular univariate polynomial code
 673 ////////////////////////////////////////////////////////////////////////////////
 674
 675 ////////////////////////////////////////////////////////////////////////////////
 676 // modular matrix
 677
 678 typedef vector<cl_MI> mvec;
 679
 680 class modular_matrix
 681 {
 682 #ifdef DEBUGFACTOR
 683         friend ostream& operator<<(ostream& o, const modular_matrix& m);
 684 #endif
 685 public:
 686         modular_matrix(size_t r_, size_t c_, const cl_MI& init) : r(r_), c(c_)
 687         {
 688                 m.resize(c*r, init);
 689         }
 690         size_t rowsize() const { return r; }
 691         size_t colsize() const { return c; }
 692         cl_MI& operator()(size_t row, size_t col) { return m[row*c + col]; }
 693         cl_MI operator()(size_t row, size_t col) const { return m[row*c + col]; }
 694         void mul_col(size_t col, const cl_MI x)
 695         {
 696                 for ( size_t rc=0; rc<r; ++rc ) {
 697                         std::size_t i = c*rc + col;
 698                         m[i] = m[i] * x;
 699                 }
 700         }
 701         void sub_col(size_t col1, size_t col2, const cl_MI fac)
 702         {
 703                 for ( size_t rc=0; rc<r; ++rc ) {
 704                         std::size_t i1 = col1 + c*rc;
 705                         std::size_t i2 = col2 + c*rc;
 706                         m[i1] = m[i1] - m[i2]*fac;
 707                 }
 708         }
 709         void switch_col(size_t col1, size_t col2)
 710         {
 711                 for ( size_t rc=0; rc<r; ++rc ) {
 712                         std::size_t i1 = col1 + rc*c;
 713                         std::size_t i2 = col2 + rc*c;
 714                         std::swap(m[i1], m[i2]);
 715                 }
 716         }
 717         void mul_row(size_t row, const cl_MI x)
 718         {
 719                 for ( size_t cc=0; cc<c; ++cc ) {
 720                         std::size_t i = row*c + cc;
 721                         m[i] = m[i] * x;
 722                 }
 723         }
 724         void sub_row(size_t row1, size_t row2, const cl_MI fac)
 725         {
 726                 for ( size_t cc=0; cc<c; ++cc ) {
 727                         std::size_t i1 = row1*c + cc;
 728                         std::size_t i2 = row2*c + cc;
 729                         m[i1] = m[i1] - m[i2]*fac;
 730                 }
 731         }
 732         void switch_row(size_t row1, size_t row2)
 733         {
 734                 for ( size_t cc=0; cc<c; ++cc ) {
 735                         std::size_t i1 = row1*c + cc;
 736                         std::size_t i2 = row2*c + cc;
 737                         std::swap(m[i1], m[i2]);
 738                 }
 739         }
 740         bool is_col_zero(size_t col) const
 741         {
 742                 for ( size_t rr=0; rr<r; ++rr ) {
 743                         std::size_t i = col + rr*c;
 744                         if ( !zerop(m[i]) ) {
 745                                 return false;
 746                         }
 747                 }
 748                 return true;
 749         }
 750         bool is_row_zero(size_t row) const
 751         {
 752                 for ( size_t cc=0; cc<c; ++cc ) {
 753                         std::size_t i = row*c + cc;
 754                         if ( !zerop(m[i]) ) {
 755                                 return false;
 756                         }
 757                 }
 758                 return true;
 759         }
 760         void set_row(size_t row, const vector<cl_MI>& newrow)
 761         {
 762                 for (std::size_t i2 = 0; i2 < newrow.size(); ++i2) {
 763                         std::size_t i1 = row*c + i2;
 764                         m[i1] = newrow[i2];
 765                 }
 766         }
 767         mvec::const_iterator row_begin(size_t row) const { return m.begin()+row*c; }
 768         mvec::const_iterator row_end(size_t row) const { return m.begin()+row*c+r; }
 769 private:
 770         size_t r, c;
 771         mvec m;
 772 };
 773
 774 #ifdef DEBUGFACTOR
 775 modular_matrix operator*(const modular_matrix& m1, const modular_matrix& m2)
 776 {
 777         const unsigned int r = m1.rowsize();
 778         const unsigned int c = m2.colsize();
 779         modular_matrix o(r,c,m1(0,0));
 780
 781         for ( size_t i=0; i<r; ++i ) {
 782                 for ( size_t j=0; j<c; ++j ) {
 783                         cl_MI buf;
 784                         buf = m1(i,0) * m2(0,j);
 785                         for ( size_t k=1; k<c; ++k ) {
 786                                 buf = buf + m1(i,k)*m2(k,j);
 787                         }
 788                         o(i,j) = buf;
 789                 }
 790         }
 791         return o;
 792 }
 793
 794 ostream& operator<<(ostream& o, const modular_matrix& m)
 795 {
 796         cl_modint_ring R = m(0,0).ring();
 797         o << "{";
 798         for ( size_t i=0; i<m.rowsize(); ++i ) {
 799                 o << "{";
 800                 for ( size_t j=0; j<m.colsize()-1; ++j ) {
 801                         o << R->retract(m(i,j)) << ",";
 802                 }
 803                 o << R->retract(m(i,m.colsize()-1)) << "}";
 804                 if ( i != m.rowsize()-1 ) {
 805                         o << ",";
 806                 }
 807         }
 808         o << "}";
 809         return o;
 810 }
 811 #endif // def DEBUGFACTOR
 812
 813 // END modular matrix
 814 ////////////////////////////////////////////////////////////////////////////////
 815
 816 /** Calculates the Q matrix for a polynomial. Used by Berlekamp's algorithm.
 817  *
 818  *  @param[in]  a_  modular polynomial
 819  *  @param[out] Q   Q matrix
 820  */
 821 static void q_matrix(const umodpoly& a_, modular_matrix& Q)
 822 {
 823         umodpoly a = a_;
 824         normalize_in_field(a);
 825
 826         int n = degree(a);
 827         unsigned int q = cl_I_to_uint(a[0].ring()->modulus);
 828         umodpoly r(n, a[0].ring()->zero());
 829         r[0] = a[0].ring()->one();
 830         Q.set_row(0, r);
 831         unsigned int max = (n-1) * q;
 832         for ( size_t m=1; m<=max; ++m ) {
 833                 cl_MI rn_1 = r.back();
 834                 for ( size_t i=n-1; i>0; --i ) {
 835                         r[i] = r[i-1] - (rn_1 * a[i]);
 836                 }
 837                 r[0] = -rn_1 * a[0];
 838                 if ( (m % q) == 0 ) {
 839                         Q.set_row(m/q, r);
 840                 }
 841         }
 842 }
 843
 844 /** Determine the nullspace of a matrix M-1.
 845  *
 846  *  @param[in,out] M      matrix, will be modified
 847  *  @param[out]    basis  calculated nullspace of M-1
 848  */
 849 static void nullspace(modular_matrix& M, vector<mvec>& basis)
 850 {
 851         const size_t n = M.rowsize();
 852         const cl_MI one = M(0,0).ring()->one();
 853         for ( size_t i=0; i<n; ++i ) {
 854                 M(i,i) = M(i,i) - one;
 855         }
 856         for ( size_t r=0; r<n; ++r ) {
 857                 size_t cc = 0;
 858                 for ( ; cc<n; ++cc ) {
 859                         if ( !zerop(M(r,cc)) ) {
 860                                 if ( cc < r ) {
 861                                         if ( !zerop(M(cc,cc)) ) {
 862                                                 continue;
 863                                         }
 864                                         M.switch_col(cc, r);
 865                                 }
 866                                 else if ( cc > r ) {
 867                                         M.switch_col(cc, r);
 868                                 }
 869                                 break;
 870                         }
 871                 }
 872                 if ( cc < n ) {
 873                         M.mul_col(r, recip(M(r,r)));
 874                         for ( cc=0; cc<n; ++cc ) {
 875                                 if ( cc != r ) {
 876                                         M.sub_col(cc, r, M(r,cc));
 877                                 }
 878                         }
 879                 }
 880         }
 881
 882         for ( size_t i=0; i<n; ++i ) {
 883                 M(i,i) = M(i,i) - one;
 884         }
 885         for ( size_t i=0; i<n; ++i ) {
 886                 if ( !M.is_row_zero(i) ) {
 887                         mvec nu(M.row_begin(i), M.row_end(i));
 888                         basis.push_back(nu);
 889                 }
 890         }
 891 }
 892
 893 /** Berlekamp's modular factorization.
 894  *
 895  *  The implementation follows the algorithm in chapter 8 of [GCL].
 896  *
 897  *  @param[in]  a    modular polynomial
 898  *  @param[out] upv  vector containing modular factors. if upv was not empty the
 899  *                   new elements are added at the end
 900  */
 901 static void berlekamp(const umodpoly& a, upvec& upv)
 902 {
 903         cl_modint_ring R = a[0].ring();
 904         umodpoly one(1, R->one());
 905
 906         // find nullspace of Q matrix
 907         modular_matrix Q(degree(a), degree(a), R->zero());
 908         q_matrix(a, Q);
 909         vector<mvec> nu;
 910         nullspace(Q, nu);
 911
 912         const unsigned int k = nu.size();
 913         if ( k == 1 ) {
 914                 // irreducible
 915                 return;
 916         }
 917
 918         list<umodpoly> factors = {a};
 919         unsigned int size = 1;
 920         unsigned int r = 1;
 921         unsigned int q = cl_I_to_uint(R->modulus);
 922
 923         list<umodpoly>::iterator u = factors.begin();
 924
 925         // calculate all gcd's
 926         while ( true ) {
 927                 for ( unsigned int s=0; s<q; ++s ) {
 928                         umodpoly nur = nu[r];
 929                         nur[0] = nur[0] - cl_MI(R, s);
 930                         canonicalize(nur);
 931                         umodpoly g;
 932                         gcd(nur, *u, g);
 933                         if ( unequal_one(g) && g != *u ) {
 934                                 umodpoly uo;
 935                                 div(*u, g, uo);
 936                                 if ( equal_one(uo) ) {
 937                                         throw logic_error("berlekamp: unexpected divisor.");
 938                                 } else {
 939                                         *u = uo;
 940                                 }
 941                                 factors.push_back(g);
 942                                 size = 0;
 943                                 for (auto & i : factors) {
 944                                         if (degree(i))
 945                                                 ++size;
 946                                 }
 947                                 if ( size == k ) {
 948                                         for (auto & i : factors) {
 949                                                 upv.push_back(i);
 950                                         }
 951                                         return;
 952                                 }
 953                         }
 954                 }
 955                 if ( ++r == k ) {
 956                         r = 1;
 957                         ++u;
 958                 }
 959         }
 960 }
 961
 962 // modular square free factorization is not used at the moment so we deactivate
 963 // the code
 964 #if 0
 965
 966 /** Calculates a^(1/prime).
 967  *
 968  *  @param[in] a      polynomial
 969  *  @param[in] prime  prime number -> exponent 1/prime
 970  *  @param[in] ap     resulting polynomial
 971  */
 972 static void expt_1_over_p(const umodpoly& a, unsigned int prime, umodpoly& ap)
 973 {
 974         size_t newdeg = degree(a)/prime;
 975         ap.resize(newdeg+1);
 976         ap[0] = a[0];
 977         for ( size_t i=1; i<=newdeg; ++i ) {
 978                 ap[i] = a[i*prime];
 979         }
 980 }
 981
 982 /** Modular square free factorization.
 983  *
 984  *  @param[in]  a        polynomial
 985  *  @param[out] factors  modular factors
 986  *  @param[out] mult     corresponding multiplicities (exponents)
 987  */
 988 static void modsqrfree(const umodpoly& a, upvec& factors, vector<int>& mult)
 989 {
 990         const unsigned int prime = cl_I_to_uint(a[0].ring()->modulus);
 991         int i = 1;
 992         umodpoly b;
 993         deriv(a, b);
 994         if ( b.size() ) {
 995                 umodpoly c;
 996                 gcd(a, b, c);
 997                 umodpoly w;
 998                 div(a, c, w);
 999                 while ( unequal_one(w) ) {
1000                         umodpoly y;
1001                         gcd(w, c, y);
1002                         umodpoly z;
1003                         div(w, y, z);
1004                         factors.push_back(z);
1005                         mult.push_back(i);
1006                         ++i;
1007                         w = y;
1008                         umodpoly buf;
1009                         div(c, y, buf);
1010                         c = buf;
1011                 }
1012                 if ( unequal_one(c) ) {
1013                         umodpoly cp;
1014                         expt_1_over_p(c, prime, cp);
1015                         size_t previ = mult.size();
1016                         modsqrfree(cp, factors, mult);
1017                         for ( size_t i=previ; i<mult.size(); ++i ) {
1018                                 mult[i] *= prime;
1019                         }
1020                 }
1021         } else {
1022                 umodpoly ap;
1023                 expt_1_over_p(a, prime, ap);
1024                 size_t previ = mult.size();
1025                 modsqrfree(ap, factors, mult);
1026                 for ( size_t i=previ; i<mult.size(); ++i ) {
1027                         mult[i] *= prime;
1028                 }
1029         }
1030 }
1031
1032 #endif // deactivation of square free factorization
1033
1034 /** Distinct degree factorization (DDF).
1035  *
1036  *  The implementation follows the algorithm in chapter 8 of [GCL].
1037  *
1038  *  @param[in]  a_         modular polynomial
1039  *  @param[out] degrees    vector containing the degrees of the factors of the
1040  *                         corresponding polynomials in ddfactors.
1041  *  @param[out] ddfactors  vector containing polynomials which factors have the
1042  *                         degree given in degrees.
1043  */
1044 static void distinct_degree_factor(const umodpoly& a_, vector<int>& degrees, upvec& ddfactors)
1045 {
1046         umodpoly a = a_;
1047
1048         cl_modint_ring R = a[0].ring();
1049         int q = cl_I_to_int(R->modulus);
1050         int nhalf = degree(a)/2;
1051
1052         int i = 1;
1053         umodpoly w(2);
1054         w[0] = R->zero();
1055         w[1] = R->one();
1056         umodpoly x = w;
1057
1058         while ( i <= nhalf ) {
1059                 expt_pos(w, q);
1060                 umodpoly buf;
1061                 rem(w, a, buf);
1062                 w = buf;
1063                 umodpoly wx = w - x;
1064                 gcd(a, wx, buf);
1065                 if ( unequal_one(buf) ) {
1066                         degrees.push_back(i);
1067                         ddfactors.push_back(buf);
1068                 }
1069                 if ( unequal_one(buf) ) {
1070                         umodpoly buf2;
1071                         div(a, buf, buf2);
1072                         a = buf2;
1073                         nhalf = degree(a)/2;
1074                         rem(w, a, buf);
1075                         w = buf;
1076                 }
1077                 ++i;
1078         }
1079         if ( unequal_one(a) ) {
1080                 degrees.push_back(degree(a));
1081                 ddfactors.push_back(a);
1082         }
1083 }
1084
1085 /** Modular same degree factorization.
1086  *  Same degree factorization is a kind of misnomer. It performs distinct degree
1087  *  factorization, but instead of using the Cantor-Zassenhaus algorithm it
1088  *  (sub-optimally) uses Berlekamp's algorithm for the factors of the same
1089  *  degree.
1090  *
1091  *  @param[in]  a    modular polynomial
1092  *  @param[out] upv  vector containing modular factors. if upv was not empty the
1093  *                   new elements are added at the end
1094  */
1095 static void same_degree_factor(const umodpoly& a, upvec& upv)
1096 {
1097         cl_modint_ring R = a[0].ring();
1098
1099         vector<int> degrees;
1100         upvec ddfactors;
1101         distinct_degree_factor(a, degrees, ddfactors);
1102
1103         for ( size_t i=0; i<degrees.size(); ++i ) {
1104                 if ( degrees[i] == degree(ddfactors[i]) ) {
1105                         upv.push_back(ddfactors[i]);
1106                 } else {
1107                         berlekamp(ddfactors[i], upv);
1108                 }
1109         }
1110 }
1111
1112 // Yes, we can (choose).
1113 #define USE_SAME_DEGREE_FACTOR
1114
1115 /** Modular univariate factorization.
1116  *
1117  *  In principle, we have two algorithms at our disposal: Berlekamp's algorithm
1118  *  and same degree factorization (SDF). SDF seems to be slightly faster in
1119  *  almost all cases so it is activated as default.
1120  *
1121  *  @param[in]  p    modular polynomial
1122  *  @param[out] upv  vector containing modular factors. if upv was not empty the
1123  *                   new elements are added at the end
1124  */
1125 static void factor_modular(const umodpoly& p, upvec& upv)
1126 {
1127 #ifdef USE_SAME_DEGREE_FACTOR
1128         same_degree_factor(p, upv);
1129 #else
1130         berlekamp(p, upv);
1131 #endif
1132 }
1133
1134 /** Calculates modular polynomials s and t such that a*s+b*t==1.
1135  *  Assertion: a and b are relatively prime and not zero.
1136  *
1137  *  @param[in]  a  polynomial
1138  *  @param[in]  b  polynomial
1139  *  @param[out] s  polynomial
1140  *  @param[out] t  polynomial
1141  */
1142 static void exteuclid(const umodpoly& a, const umodpoly& b, umodpoly& s, umodpoly& t)
1143 {
1144         if ( degree(a) < degree(b) ) {
1145                 exteuclid(b, a, t, s);
1146                 return;
1147         }
1148
1149         umodpoly one(1, a[0].ring()->one());
1150         umodpoly c = a; normalize_in_field(c);
1151         umodpoly d = b; normalize_in_field(d);
1152         s = one;
1153         t.clear();
1154         umodpoly d1;
1155         umodpoly d2 = one;
1156         umodpoly q;
1157         while ( true ) {
1158                 div(c, d, q);
1159                 umodpoly r = c - q * d;
1160                 umodpoly r1 = s - q * d1;
1161                 umodpoly r2 = t - q * d2;
1162                 c = d;
1163                 s = d1;
1164                 t = d2;
1165                 if ( r.empty() ) break;
1166                 d = r;
1167                 d1 = r1;
1168                 d2 = r2;
1169         }
1170         cl_MI fac = recip(lcoeff(a) * lcoeff(c));
1171         for (auto & i : s) {
1172                 i = i * fac;
1173         }
1174         canonicalize(s);
1175         fac = recip(lcoeff(b) * lcoeff(c));
1176         for (auto & i : t) {
1177                 i = i * fac;
1178         }
1179         canonicalize(t);
1180 }
1181
1182 /** Replaces the leading coefficient in a polynomial by a given number.
1183  *
1184  *  @param[in] poly  polynomial to change
1185  *  @param[in] lc    new leading coefficient
1186  *  @return          changed polynomial
1187  */
1188 static upoly replace_lc(const upoly& poly, const cl_I& lc)
1189 {
1190         if ( poly.empty() ) return poly;
1191         upoly r = poly;
1192         r.back() = lc;
1193         return r;
1194 }
1195
1196 /** Calculates the bound for the modulus.
1197  *  See [Mig].
1198  */
1199 static inline cl_I calc_bound(const ex& a, const ex& x, int maxdeg)
1200 {
1201         cl_I maxcoeff = 0;
1202         cl_R coeff = 0;
1203         for ( int i=a.degree(x); i>=a.ldegree(x); --i ) {
1204                 cl_I aa = abs(the<cl_I>(ex_to<numeric>(a.coeff(x, i)).to_cl_N()));
1205                 if ( aa > maxcoeff ) maxcoeff = aa;
1206                 coeff = coeff + square(aa);
1207         }
1208         cl_I coeffnorm = ceiling1(the<cl_R>(cln::sqrt(coeff)));
1209         cl_I B = coeffnorm * expt_pos(cl_I(2), cl_I(maxdeg));
1210         return ( B > maxcoeff ) ? B : maxcoeff;
1211 }
1212
1213 /** Calculates the bound for the modulus.
1214  *  See [Mig].
1215  */
1216 static inline cl_I calc_bound(const upoly& a, int maxdeg)
1217 {
1218         cl_I maxcoeff = 0;
1219         cl_R coeff = 0;
1220         for ( int i=degree(a); i>=0; --i ) {
1221                 cl_I aa = abs(a[i]);
1222                 if ( aa > maxcoeff ) maxcoeff = aa;
1223                 coeff = coeff + square(aa);
1224         }
1225         cl_I coeffnorm = ceiling1(the<cl_R>(cln::sqrt(coeff)));
1226         cl_I B = coeffnorm * expt_pos(cl_I(2), cl_I(maxdeg));
1227         return ( B > maxcoeff ) ? B : maxcoeff;
1228 }
1229
1230 /** Hensel lifting as used by factor_univariate().
1231  *
1232  *  The implementation follows the algorithm in chapter 6 of [GCL].
1233  *
1234  *  @param[in]  a_   primitive univariate polynomials
1235  *  @param[in]  p    prime number that does not divide lcoeff(a)
1236  *  @param[in]  u1_  modular factor of a (mod p)
1237  *  @param[in]  w1_  modular factor of a (mod p), relatively prime to u1_,
1238  *                   fulfilling  u1_*w1_ == a mod p
1239  *  @param[out] u    lifted factor
1240  *  @param[out] w    lifted factor, u*w = a
1241  */
1242 static void hensel_univar(const upoly& a_, unsigned int p, const umodpoly& u1_, const umodpoly& w1_, upoly& u, upoly& w)
1243 {
1244         upoly a = a_;
1245         const cl_modint_ring& R = u1_[0].ring();
1246
1247         // calc bound B
1248         int maxdeg = (degree(u1_) > degree(w1_)) ? degree(u1_) : degree(w1_);
1249         cl_I maxmodulus = 2*calc_bound(a, maxdeg);
1250
1251         // step 1
1252         cl_I alpha = lcoeff(a);
1253         a = a * alpha;
1254         umodpoly nu1 = u1_;
1255         normalize_in_field(nu1);
1256         umodpoly nw1 = w1_;
1257         normalize_in_field(nw1);
1258         upoly phi;
1259         phi = umodpoly_to_upoly(nu1) * alpha;
1260         umodpoly u1;
1261         umodpoly_from_upoly(u1, phi, R);
1262         phi = umodpoly_to_upoly(nw1) * alpha;
1263         umodpoly w1;
1264         umodpoly_from_upoly(w1, phi, R);
1265
1266         // step 2
1267         umodpoly s;
1268         umodpoly t;
1269         exteuclid(u1, w1, s, t);
1270
1271         // step 3
1272         u = replace_lc(umodpoly_to_upoly(u1), alpha);
1273         w = replace_lc(umodpoly_to_upoly(w1), alpha);
1274         upoly e = a - u * w;
1275         cl_I modulus = p;
1276
1277         // step 4
1278         while ( !e.empty() && modulus < maxmodulus ) {
1279                 upoly c = e / modulus;
1280                 phi = umodpoly_to_upoly(s) * c;
1281                 umodpoly sigmatilde;
1282                 umodpoly_from_upoly(sigmatilde, phi, R);
1283                 phi = umodpoly_to_upoly(t) * c;
1284                 umodpoly tautilde;
1285                 umodpoly_from_upoly(tautilde, phi, R);
1286                 umodpoly r, q;
1287                 remdiv(sigmatilde, w1, r, q);
1288                 umodpoly sigma = r;
1289                 phi = umodpoly_to_upoly(tautilde) + umodpoly_to_upoly(q) * umodpoly_to_upoly(u1);
1290                 umodpoly tau;
1291                 umodpoly_from_upoly(tau, phi, R);
1292                 u = u + umodpoly_to_upoly(tau) * modulus;
1293                 w = w + umodpoly_to_upoly(sigma) * modulus;
1294                 e = a - u * w;
1295                 modulus = modulus * p;
1296         }
1297
1298         // step 5
1299         if ( e.empty() ) {
1300                 cl_I g = u[0];
1301                 for ( size_t i=1; i<u.size(); ++i ) {
1302                         g = gcd(g, u[i]);
1303                         if ( g == 1 ) break;
1304                 }
1305                 if ( g != 1 ) {
1306                         u = u / g;
1307                         w = w * g;
1308                 }
1309                 if ( alpha != 1 ) {
1310                         w = w / alpha;
1311                 }
1312         } else {
1313                 u.clear();
1314         }
1315 }
1316
1317 /** Returns a new prime number.
1318  *
1319  *  @param[in] p  prime number
1320  *  @return       next prime number after p
1321  */
1322 static unsigned int next_prime(unsigned int p)
1323 {
1324         static vector<unsigned int> primes;
1325         if (primes.empty()) {
1326                 primes = {3, 5, 7};
1327         }
1328         if ( p >= primes.back() ) {
1329                 unsigned int candidate = primes.back() + 2;
1330                 while ( true ) {
1331                         size_t n = primes.size()/2;
1332                         for ( size_t i=0; i<n; ++i ) {
1333                                 if (candidate % primes[i])
1334                                         continue;
1335                                 candidate += 2;
1336                                 i=-1;
1337                         }
1338                         primes.push_back(candidate);
1339                         if (candidate > p)
1340                                 break;
1341                 }
1342                 return candidate;
1343         }
1344         for (auto & it : primes) {
1345                 if ( it > p ) {
1346                         return it;
1347                 }
1348         }
1349         throw logic_error("next_prime: should not reach this point!");
1350 }
1351
1352 /** Manages the splitting a vector of of modular factors into two partitions.
1353  */
1354 class factor_partition
1355 {
1356 public:
1357         /** Takes the vector of modular factors and initializes the first partition */
1358         factor_partition(const upvec& factors_) : factors(factors_)
1359         {
1360                 n = factors.size();
1361                 k.resize(n, 0);
1362                 k[0] = 1;
1363                 cache.resize(n-1);
1364                 one.resize(1, factors.front()[0].ring()->one());
1365                 len = 1;
1366                 last = 0;
1367                 split();
1368         }
1369         int operator[](size_t i) const { return k[i]; }
1370         size_t size() const { return n; }
1371         size_t size_left() const { return n-len; }
1372         size_t size_right() const { return len; }
1373         /** Initializes the next partition.
1374             Returns true, if there is one, false otherwise. */
1375         bool next()
1376         {
1377                 if ( last == n-1 ) {
1378                         int rem = len - 1;
1379                         int p = last - 1;
1380                         while ( rem ) {
1381                                 if ( k[p] ) {
1382                                         --rem;
1383                                         --p;
1384                                         continue;
1385                                 }
1386                                 last = p - 1;
1387                                 while ( k[last] == 0 ) { --last; }
1388                                 if ( last == 0 && n == 2*len ) return false;
1389                                 k[last++] = 0;
1390                                 for ( size_t i=0; i<=len-rem; ++i ) {
1391                                         k[last] = 1;
1392                                         ++last;
1393                                 }
1394                                 fill(k.begin()+last, k.end(), 0);
1395                                 --last;
1396                                 split();
1397                                 return true;
1398                         }
1399                         last = len;
1400                         ++len;
1401                         if ( len > n/2 ) return false;
1402                         fill(k.begin(), k.begin()+len, 1);
1403                         fill(k.begin()+len+1, k.end(), 0);
1404                 } else {
1405                         k[last++] = 0;
1406                         k[last] = 1;
1407                 }
1408                 split();
1409                 return true;
1410         }
1411         /** Get first partition */
1412         umodpoly& left() { return lr[0]; }
1413         /** Get second partition */
1414         umodpoly& right() { return lr[1]; }
1415 private:
1416         void split_cached()
1417         {
1418                 size_t i = 0;
1419                 do {
1420                         size_t pos = i;
1421                         int group = k[i++];
1422                         size_t d = 0;
1423                         while ( i < n && k[i] == group ) { ++d; ++i; }
1424                         if ( d ) {
1425                                 if ( cache[pos].size() >= d ) {
1426                                         lr[group] = lr[group] * cache[pos][d-1];
1427                                 } else {
1428                                         if ( cache[pos].size() == 0 ) {
1429                                                 cache[pos].push_back(factors[pos] * factors[pos+1]);
1430                                         }
1431                                         size_t j = pos + cache[pos].size() + 1;
1432                                         d -= cache[pos].size();
1433                                         while ( d ) {
1434                                                 umodpoly buf = cache[pos].back() * factors[j];
1435                                                 cache[pos].push_back(buf);
1436                                                 --d;
1437                                                 ++j;
1438                                         }
1439                                         lr[group] = lr[group] * cache[pos].back();
1440                                 }
1441                         } else {
1442                                 lr[group] = lr[group] * factors[pos];
1443                         }
1444                 } while ( i < n );
1445         }
1446         void split()
1447         {
1448                 lr[0] = one;
1449                 lr[1] = one;
1450                 if ( n > 6 ) {
1451                         split_cached();
1452                 } else {
1453                         for ( size_t i=0; i<n; ++i ) {
1454                                 lr[k[i]] = lr[k[i]] * factors[i];
1455                         }
1456                 }
1457         }
1458 private:
1459         umodpoly lr[2];
1460         vector<vector<umodpoly>> cache;
1461         upvec factors;
1462         umodpoly one;
1463         size_t n;
1464         size_t len;
1465         size_t last;
1466         vector<int> k;
1467 };
1468
1469 /** Contains a pair of univariate polynomial and its modular factors.
1470  *  Used by factor_univariate().
1471  */
1472 struct ModFactors
1473 {
1474         upoly poly;
1475         upvec factors;
1476 };
1477
1478 /** Univariate polynomial factorization.
1479  *
1480  *  Modular factorization is tried for several primes to minimize the number of
1481  *  modular factors. Then, Hensel lifting is performed.
1482  *
1483  *  @param[in]     poly   expanded square free univariate polynomial
1484  *  @param[in]     x      symbol
1485  *  @param[in,out] prime  prime number to start trying modular factorization with,
1486  *                        output value is the prime number actually used
1487  */
1488 static ex factor_univariate(const ex& poly, const ex& x, unsigned int& prime)
1489 {
1490         ex unit, cont, prim_ex;
1491         poly.unitcontprim(x, unit, cont, prim_ex);
1492         upoly prim;
1493         upoly_from_ex(prim, prim_ex, x);
1494
1495         // determine proper prime and minimize number of modular factors
1496         prime = 3;
1497         unsigned int lastp = prime;
1498         cl_modint_ring R;
1499         unsigned int trials = 0;
1500         unsigned int minfactors = 0;
1501
1502         const numeric& cont_n = ex_to<numeric>(cont);
1503         cl_I i_cont;
1504         if (cont_n.is_integer()) {
1505                 i_cont = the<cl_I>(cont_n.to_cl_N());
1506         } else {
1507                 // poly \in Q[x] => poly = q ipoly, ipoly \in Z[x], q \in Q
1508                 // factor(poly) \equiv q factor(ipoly)
1509                 i_cont = cl_I(1);
1510         }
1511         cl_I lc = lcoeff(prim)*i_cont;
1512         upvec factors;
1513         while ( trials < 2 ) {
1514                 umodpoly modpoly;
1515                 while ( true ) {
1516                         prime = next_prime(prime);
1517                         if ( !zerop(rem(lc, prime)) ) {
1518                                 R = find_modint_ring(prime);
1519                                 umodpoly_from_upoly(modpoly, prim, R);
1520                                 if ( squarefree(modpoly) ) break;
1521                         }
1522                 }
1523
1524                 // do modular factorization
1525                 upvec trialfactors;
1526                 factor_modular(modpoly, trialfactors);
1527                 if ( trialfactors.size() <= 1 ) {
1528                         // irreducible for sure
1529                         return poly;
1530                 }
1531
1532                 if ( minfactors == 0 || trialfactors.size() < minfactors ) {
1533                         factors = trialfactors;
1534                         minfactors = trialfactors.size();
1535                         lastp = prime;
1536                         trials = 1;
1537                 } else {
1538                         ++trials;
1539                 }
1540         }
1541         prime = lastp;
1542         R = find_modint_ring(prime);
1543
1544         // lift all factor combinations
1545         stack<ModFactors> tocheck;
1546         ModFactors mf;
1547         mf.poly = prim;
1548         mf.factors = factors;
1549         tocheck.push(mf);
1550         upoly f1, f2;
1551         ex result = 1;
1552         while ( tocheck.size() ) {
1553                 const size_t n = tocheck.top().factors.size();
1554                 factor_partition part(tocheck.top().factors);
1555                 while ( true ) {
1556                         // call Hensel lifting
1557                         hensel_univar(tocheck.top().poly, prime, part.left(), part.right(), f1, f2);
1558                         if ( !f1.empty() ) {
1559                                 // successful, update the stack and the result
1560                                 if ( part.size_left() == 1 ) {
1561                                         if ( part.size_right() == 1 ) {
1562                                                 result *= upoly_to_ex(f1, x) * upoly_to_ex(f2, x);
1563                                                 tocheck.pop();
1564                                                 break;
1565                                         }
1566                                         result *= upoly_to_ex(f1, x);
1567                                         tocheck.top().poly = f2;
1568                                         for ( size_t i=0; i<n; ++i ) {
1569                                                 if ( part[i] == 0 ) {
1570                                                         tocheck.top().factors.erase(tocheck.top().factors.begin()+i);
1571                                                         break;
1572                                                 }
1573                                         }
1574                                         break;
1575                                 }
1576                                 else if ( part.size_right() == 1 ) {
1577                                         if ( part.size_left() == 1 ) {
1578                                                 result *= upoly_to_ex(f1, x) * upoly_to_ex(f2, x);
1579                                                 tocheck.pop();
1580                                                 break;
1581                                         }
1582                                         result *= upoly_to_ex(f2, x);
1583                                         tocheck.top().poly = f1;
1584                                         for ( size_t i=0; i<n; ++i ) {
1585                                                 if ( part[i] == 1 ) {
1586                                                         tocheck.top().factors.erase(tocheck.top().factors.begin()+i);
1587                                                         break;
1588                                                 }
1589                                         }
1590                                         break;
1591                                 } else {
1592                                         upvec newfactors1(part.size_left()), newfactors2(part.size_right());
1593                                         auto i1 = newfactors1.begin(), i2 = newfactors2.begin();
1594                                         for ( size_t i=0; i<n; ++i ) {
1595                                                 if ( part[i] ) {
1596                                                         *i2++ = tocheck.top().factors[i];
1597                                                 } else {
1598                                                         *i1++ = tocheck.top().factors[i];
1599                                                 }
1600                                         }
1601                                         tocheck.top().factors = newfactors1;
1602                                         tocheck.top().poly = f1;
1603                                         ModFactors mf;
1604                                         mf.factors = newfactors2;
1605                                         mf.poly = f2;
1606                                         tocheck.push(mf);
1607                                         break;
1608                                 }
1609                         } else {
1610                                 // not successful
1611                                 if ( !part.next() ) {
1612                                         // if no more combinations left, return polynomial as
1613                                         // irreducible
1614                                         result *= upoly_to_ex(tocheck.top().poly, x);
1615                                         tocheck.pop();
1616                                         break;
1617                                 }
1618                         }
1619                 }
1620         }
1621
1622         return unit * cont * result;
1623 }
1624
1625 /** Second interface to factor_univariate() to be used if the information about
1626  *  the prime is not needed.
1627  */
1628 static inline ex factor_univariate(const ex& poly, const ex& x)
1629 {
1630         unsigned int prime;
1631         return factor_univariate(poly, x, prime);
1632 }
1633
1634 /** Represents an evaluation point (<symbol>==<integer>).
1635  */
1636 struct EvalPoint
1637 {
1638         ex x;
1639         int evalpoint;
1640 };
1641
1642 #ifdef DEBUGFACTOR
1643 ostream& operator<<(ostream& o, const vector<EvalPoint>& v)
1644 {
1645         for ( size_t i=0; i<v.size(); ++i ) {
1646                 o << "(" << v[i].x << "==" << v[i].evalpoint << ") ";
1647         }
1648         return o;
1649 }
1650 #endif // def DEBUGFACTOR
1651
1652 // forward declaration
1653 static vector<ex> multivar_diophant(const vector<ex>& a_, const ex& x, const ex& c, const vector<EvalPoint>& I, unsigned int d, unsigned int p, unsigned int k);
1654
1655 /** Utility function for multivariate Hensel lifting.
1656  *
1657  *  Solves the equation
1658  *    s_1*b_1 + ... + s_r*b_r == 1 mod p^k
1659  *  with deg(s_i) < deg(a_i)
1660  *  and with given b_1 = a_1 * ... * a_{i-1} * a_{i+1} * ... * a_r
1661  *
1662  *  The implementation follows the algorithm in chapter 6 of [GCL].
1663  *
1664  *  @param[in]  a   vector of modular univariate polynomials
1665  *  @param[in]  x   symbol
1666  *  @param[in]  p   prime number
1667  *  @param[in]  k   p^k is modulus
1668  *  @return         vector of polynomials (s_i)
1669  */
1670 static upvec multiterm_eea_lift(const upvec& a, const ex& x, unsigned int p, unsigned int k)
1671 {
1672         const size_t r = a.size();
1673         cl_modint_ring R = find_modint_ring(expt_pos(cl_I(p),k));
1674         upvec q(r-1);
1675         q[r-2] = a[r-1];
1676         for ( size_t j=r-2; j>=1; --j ) {
1677                 q[j-1] = a[j] * q[j];
1678         }
1679         umodpoly beta(1, R->one());
1680         upvec s;
1681         for ( size_t j=1; j<r; ++j ) {
1682                 vector<ex> mdarg(2);
1683                 mdarg[0] = umodpoly_to_ex(q[j-1], x);
1684                 mdarg[1] = umodpoly_to_ex(a[j-1], x);
1685                 vector<EvalPoint> empty;
1686                 vector<ex> exsigma = multivar_diophant(mdarg, x, umodpoly_to_ex(beta, x), empty, 0, p, k);
1687                 umodpoly sigma1;
1688                 umodpoly_from_ex(sigma1, exsigma[0], x, R);
1689                 umodpoly sigma2;
1690                 umodpoly_from_ex(sigma2, exsigma[1], x, R);
1691                 beta = sigma1;
1692                 s.push_back(sigma2);
1693         }
1694         s.push_back(beta);
1695         return s;
1696 }
1697
1698 /** Changes the modulus of a modular polynomial. Used by eea_lift().
1699  *
1700  *  @param[in]     R  new modular ring
1701  *  @param[in,out] a  polynomial to change (in situ)
1702  */
1703 static void change_modulus(const cl_modint_ring& R, umodpoly& a)
1704 {
1705         if ( a.empty() ) return;
1706         cl_modint_ring oldR = a[0].ring();
1707         for (auto & i : a) {
1708                 i = R->canonhom(oldR->retract(i));
1709         }
1710         canonicalize(a);
1711 }
1712
1713 /** Utility function for multivariate Hensel lifting.
1714  *
1715  *  Solves  s*a + t*b == 1 mod p^k  given a,b.
1716  *
1717  *  The implementation follows the algorithm in chapter 6 of [GCL].
1718  *
1719  *  @param[in]  a   polynomial
1720  *  @param[in]  b   polynomial
1721  *  @param[in]  x   symbol
1722  *  @param[in]  p   prime number
1723  *  @param[in]  k   p^k is modulus
1724  *  @param[out] s_  output polynomial
1725  *  @param[out] t_  output polynomial
1726  */
1727 static void eea_lift(const umodpoly& a, const umodpoly& b, const ex& x, unsigned int p, unsigned int k, umodpoly& s_, umodpoly& t_)
1728 {
1729         cl_modint_ring R = find_modint_ring(p);
1730         umodpoly amod = a;
1731         change_modulus(R, amod);
1732         umodpoly bmod = b;
1733         change_modulus(R, bmod);
1734
1735         umodpoly smod;
1736         umodpoly tmod;
1737         exteuclid(amod, bmod, smod, tmod);
1738
1739         cl_modint_ring Rpk = find_modint_ring(expt_pos(cl_I(p),k));
1740         umodpoly s = smod;
1741         change_modulus(Rpk, s);
1742         umodpoly t = tmod;
1743         change_modulus(Rpk, t);
1744
1745         cl_I modulus(p);
1746         umodpoly one(1, Rpk->one());
1747         for ( size_t j=1; j<k; ++j ) {
1748                 umodpoly e = one - a * s - b * t;
1749                 reduce_coeff(e, modulus);
1750                 umodpoly c = e;
1751                 change_modulus(R, c);
1752                 umodpoly sigmabar = smod * c;
1753                 umodpoly taubar = tmod * c;
1754                 umodpoly sigma, q;
1755                 remdiv(sigmabar, bmod, sigma, q);
1756                 umodpoly tau = taubar + q * amod;
1757                 umodpoly sadd = sigma;
1758                 change_modulus(Rpk, sadd);
1759                 cl_MI modmodulus(Rpk, modulus);
1760                 s = s + sadd * modmodulus;
1761                 umodpoly tadd = tau;
1762                 change_modulus(Rpk, tadd);
1763                 t = t + tadd * modmodulus;
1764                 modulus = modulus * p;
1765         }
1766
1767         s_ = s; t_ = t;
1768 }
1769
1770 /** Utility function for multivariate Hensel lifting.
1771  *
1772  *  Solves the equation
1773  *    s_1*b_1 + ... + s_r*b_r == x^m mod p^k
1774  *  with given b_1 = a_1 * ... * a_{i-1} * a_{i+1} * ... * a_r
1775  *
1776  *  The implementation follows the algorithm in chapter 6 of [GCL].
1777  *
1778  *  @param a  vector with univariate polynomials mod p^k
1779  *  @param x  symbol
1780  *  @param m  exponent of x^m in the equation to solve
1781  *  @param p  prime number
1782  *  @param k  p^k is modulus
1783  *  @return   vector of polynomials (s_i)
1784  */
1785 static upvec univar_diophant(const upvec& a, const ex& x, unsigned int m, unsigned int p, unsigned int k)
1786 {
1787         cl_modint_ring R = find_modint_ring(expt_pos(cl_I(p),k));
1788
1789         const size_t r = a.size();
1790         upvec result;
1791         if ( r > 2 ) {
1792                 upvec s = multiterm_eea_lift(a, x, p, k);
1793                 for ( size_t j=0; j<r; ++j ) {
1794                         umodpoly bmod = umodpoly_to_umodpoly(s[j], R, m);
1795                         umodpoly buf;
1796                         rem(bmod, a[j], buf);
1797                         result.push_back(buf);
1798                 }
1799         } else {
1800                 umodpoly s, t;
1801                 eea_lift(a[1], a[0], x, p, k, s, t);
1802                 umodpoly bmod = umodpoly_to_umodpoly(s, R, m);
1803                 umodpoly buf, q;
1804                 remdiv(bmod, a[0], buf, q);
1805                 result.push_back(buf);
1806                 umodpoly t1mod = umodpoly_to_umodpoly(t, R, m);
1807                 buf = t1mod + q * a[1];
1808                 result.push_back(buf);
1809         }
1810
1811         return result;
1812 }
1813
1814 /** Map used by function make_modular().
1815  *  Finds every coefficient in a polynomial and replaces it by is value in the
1816  *  given modular ring R (symmetric representation).
1817  */
1818 struct make_modular_map : public map_function {
1819         cl_modint_ring R;
1820         make_modular_map(const cl_modint_ring& R_) : R(R_) { }
1821         ex operator()(const ex& e) override
1822         {
1823                 if ( is_a<add>(e) || is_a<mul>(e) ) {
1824                         return e.map(*this);
1825                 }
1826                 else if ( is_a<numeric>(e) ) {
1827                         numeric mod(R->modulus);
1828                         numeric halfmod = (mod-1)/2;
1829                         cl_MI emod = R->canonhom(the<cl_I>(ex_to<numeric>(e).to_cl_N()));
1830                         numeric n(R->retract(emod));
1831                         if ( n > halfmod ) {
1832                                 return n-mod;
1833                         } else {
1834                                 return n;
1835                         }
1836                 }
1837                 return e;
1838         }
1839 };
1840
1841 /** Helps mimicking modular multivariate polynomial arithmetic.
1842  *
1843  *  @param e  expression of which to make the coefficients equal to their value
1844  *            in the modular ring R (symmetric representation)
1845  *  @param R  modular ring
1846  *  @return   resulting expression
1847  */
1848 static ex make_modular(const ex& e, const cl_modint_ring& R)
1849 {
1850         make_modular_map map(R);
1851         return map(e.expand());
1852 }
1853
1854 /** Utility function for multivariate Hensel lifting.
1855  *
1856  *  Returns the polynomials s_i that fulfill
1857  *    s_1*b_1 + ... + s_r*b_r == c mod <I^(d+1),p^k>
1858  *  with given b_1 = a_1 * ... * a_{i-1} * a_{i+1} * ... * a_r
1859  *
1860  *  The implementation follows the algorithm in chapter 6 of [GCL].
1861  *
1862  *  @param a_  vector of multivariate factors mod p^k
1863  *  @param x   symbol (equiv. x_1 in [GCL])
1864  *  @param c   polynomial mod p^k
1865  *  @param I   vector of evaluation points
1866  *  @param d   maximum total degree of result
1867  *  @param p   prime number
1868  *  @param k   p^k is modulus
1869  *  @return    vector of polynomials (s_i)
1870  */
1871 static vector<ex> multivar_diophant(const vector<ex>& a_, const ex& x, const ex& c, const vector<EvalPoint>& I,
1872                                     unsigned int d, unsigned int p, unsigned int k)
1873 {
1874         vector<ex> a = a_;
1875
1876         const cl_I modulus = expt_pos(cl_I(p),k);
1877         const cl_modint_ring R = find_modint_ring(modulus);
1878         const size_t r = a.size();
1879         const size_t nu = I.size() + 1;
1880
1881         vector<ex> sigma;
1882         if ( nu > 1 ) {
1883                 ex xnu = I.back().x;
1884                 int alphanu = I.back().evalpoint;
1885
1886                 ex A = 1;
1887                 for ( size_t i=0; i<r; ++i ) {
1888                         A *= a[i];
1889                 }
1890                 vector<ex> b(r);
1891                 for ( size_t i=0; i<r; ++i ) {
1892                         b[i] = normal(A / a[i]);
1893                 }
1894
1895                 vector<ex> anew = a;
1896                 for ( size_t i=0; i<r; ++i ) {
1897                         anew[i] = anew[i].subs(xnu == alphanu);
1898                 }
1899                 ex cnew = c.subs(xnu == alphanu);
1900                 vector<EvalPoint> Inew = I;
1901                 Inew.pop_back();
1902                 sigma = multivar_diophant(anew, x, cnew, Inew, d, p, k);
1903
1904                 ex buf = c;
1905                 for ( size_t i=0; i<r; ++i ) {
1906                         buf -= sigma[i] * b[i];
1907                 }
1908                 ex e = make_modular(buf, R);
1909
1910                 ex monomial = 1;
1911                 for ( size_t m=1; !e.is_zero() && e.has(xnu) && m<=d; ++m ) {
1912                         monomial *= (xnu - alphanu);
1913                         monomial = expand(monomial);
1914                         ex cm = e.diff(ex_to<symbol>(xnu), m).subs(xnu==alphanu) / factorial(m);
1915                         cm = make_modular(cm, R);
1916                         if ( !cm.is_zero() ) {
1917                                 vector<ex> delta_s = multivar_diophant(anew, x, cm, Inew, d, p, k);
1918                                 ex buf = e;
1919                                 for ( size_t j=0; j<delta_s.size(); ++j ) {
1920                                         delta_s[j] *= monomial;
1921                                         sigma[j] += delta_s[j];
1922                                         buf -= delta_s[j] * b[j];
1923                                 }
1924                                 e = make_modular(buf, R);
1925                         }
1926                 }
1927         } else {
1928                 upvec amod;
1929                 for ( size_t i=0; i<a.size(); ++i ) {
1930                         umodpoly up;
1931                         umodpoly_from_ex(up, a[i], x, R);
1932                         amod.push_back(up);
1933                 }
1934
1935                 sigma.insert(sigma.begin(), r, 0);
1936                 size_t nterms;
1937                 ex z;
1938                 if ( is_a<add>(c) ) {
1939                         nterms = c.nops();
1940                         z = c.op(0);
1941                 } else {
1942                         nterms = 1;
1943                         z = c;
1944                 }
1945                 for ( size_t i=0; i<nterms; ++i ) {
1946                         int m = z.degree(x);
1947                         cl_I cm = the<cl_I>(ex_to<numeric>(z.lcoeff(x)).to_cl_N());
1948                         upvec delta_s = univar_diophant(amod, x, m, p, k);
1949                         cl_MI modcm;
1950                         cl_I poscm = plusp(cm) ? cm : mod(cm, modulus);
1951                         modcm = cl_MI(R, poscm);
1952                         for ( size_t j=0; j<delta_s.size(); ++j ) {
1953                                 delta_s[j] = delta_s[j] * modcm;
1954                                 sigma[j] = sigma[j] + umodpoly_to_ex(delta_s[j], x);
1955                         }
1956                         if ( nterms > 1 ) {
1957                                 z = c.op(i+1);
1958                         }
1959                 }
1960         }
1961
1962         for ( size_t i=0; i<sigma.size(); ++i ) {
1963                 sigma[i] = make_modular(sigma[i], R);
1964         }
1965
1966         return sigma;
1967 }
1968
1969 /** Multivariate Hensel lifting.
1970  *  The implementation follows the algorithm in chapter 6 of [GCL].
1971  *  Since we don't have a data type for modular multivariate polynomials, the
1972  *  respective operations are done in a GiNaC::ex and the function
1973  *  make_modular() is then called to make the coefficient modular p^l.
1974  *
1975  *  @param a    multivariate polynomial primitive in x
1976  *  @param x    symbol (equiv. x_1 in [GCL])
1977  *  @param I    vector of evaluation points (x_2==a_2,x_3==a_3,...)
1978  *  @param p    prime number (should not divide lcoeff(a mod I))
1979  *  @param l    p^l is the modulus of the lifted univariate field
1980  *  @param u    vector of modular (mod p^l) factors of a mod I
1981  *  @param lcU  correct leading coefficient of the univariate factors of a mod I
1982  *  @return     list GiNaC::lst with lifted factors (multivariate factors of a),
1983  *              empty if Hensel lifting did not succeed
1984  */
1985 static ex hensel_multivar(const ex& a, const ex& x, const vector<EvalPoint>& I,
1986                           unsigned int p, const cl_I& l, const upvec& u, const vector<ex>& lcU)
1987 {
1988         const size_t nu = I.size() + 1;
1989         const cl_modint_ring R = find_modint_ring(expt_pos(cl_I(p),l));
1990
1991         vector<ex> A(nu);
1992         A[nu-1] = a;
1993
1994         for ( size_t j=nu; j>=2; --j ) {
1995                 ex x = I[j-2].x;
1996                 int alpha = I[j-2].evalpoint;
1997                 A[j-2] = A[j-1].subs(x==alpha);
1998                 A[j-2] = make_modular(A[j-2], R);
1999         }
2000
2001         int maxdeg = a.degree(I.front().x);
2002         for ( size_t i=1; i<I.size(); ++i ) {
2003                 int maxdeg2 = a.degree(I[i].x);
2004                 if ( maxdeg2 > maxdeg ) maxdeg = maxdeg2;
2005         }
2006
2007         const size_t n = u.size();
2008         vector<ex> U(n);
2009         for ( size_t i=0; i<n; ++i ) {
2010                 U[i] = umodpoly_to_ex(u[i], x);
2011         }
2012
2013         for ( size_t j=2; j<=nu; ++j ) {
2014                 vector<ex> U1 = U;
2015                 ex monomial = 1;
2016                 for ( size_t m=0; m<n; ++m) {
2017                         if ( lcU[m] != 1 ) {
2018                                 ex coef = lcU[m];
2019                                 for ( size_t i=j-1; i<nu-1; ++i ) {
2020                                         coef = coef.subs(I[i].x == I[i].evalpoint);
2021                                 }
2022                                 coef = make_modular(coef, R);
2023                                 int deg = U[m].degree(x);
2024                                 U[m] = U[m] - U[m].lcoeff(x) * pow(x,deg) + coef * pow(x,deg);
2025                         }
2026                 }
2027                 ex Uprod = 1;
2028                 for ( size_t i=0; i<n; ++i ) {
2029                         Uprod *= U[i];
2030                 }
2031                 ex e = expand(A[j-1] - Uprod);
2032
2033                 vector<EvalPoint> newI;
2034                 for ( size_t i=1; i<=j-2; ++i ) {
2035                         newI.push_back(I[i-1]);
2036                 }
2037
2038                 ex xj = I[j-2].x;
2039                 int alphaj = I[j-2].evalpoint;
2040                 size_t deg = A[j-1].degree(xj);
2041                 for ( size_t k=1; k<=deg; ++k ) {
2042                         if ( !e.is_zero() ) {
2043                                 monomial *= (xj - alphaj);
2044                                 monomial = expand(monomial);
2045                                 ex dif = e.diff(ex_to<symbol>(xj), k);
2046                                 ex c = dif.subs(xj==alphaj) / factorial(k);
2047                                 if ( !c.is_zero() ) {
2048                                         vector<ex> deltaU = multivar_diophant(U1, x, c, newI, maxdeg, p, cl_I_to_uint(l));
2049                                         for ( size_t i=0; i<n; ++i ) {
2050                                                 deltaU[i] *= monomial;
2051                                                 U[i] += deltaU[i];
2052                                                 U[i] = make_modular(U[i], R);
2053                                         }
2054                                         ex Uprod = 1;
2055                                         for ( size_t i=0; i<n; ++i ) {
2056                                                 Uprod *= U[i];
2057                                         }
2058                                         e = A[j-1] - Uprod;
2059                                         e = make_modular(e, R);
2060                                 }
2061                         }
2062                 }
2063         }
2064
2065         ex acand = 1;
2066         for ( size_t i=0; i<U.size(); ++i ) {
2067                 acand *= U[i];
2068         }
2069         if ( expand(a-acand).is_zero() ) {
2070                 return lst(U.begin(), U.end());
2071         } else {
2072                 return lst{};
2073         }
2074 }
2075
2076 /** Takes a factorized expression and puts the factors in a lst. The exponents
2077  *  of the factors are discarded, e.g. 7*x^2*(y+1)^4 --> {7,x,y+1}. The first
2078  *  element of the list is always the numeric coefficient.
2079  */
2080 static ex put_factors_into_lst(const ex& e)
2081 {
2082         lst result;
2083         if ( is_a<numeric>(e) ) {
2084                 result.append(e);
2085                 return result;
2086         }
2087         if ( is_a<power>(e) ) {
2088                 result.append(1);
2089                 result.append(e.op(0));
2090                 return result;
2091         }
2092         if ( is_a<symbol>(e) || is_a<add>(e) ) {
2093                 ex icont(e.integer_content());
2094                 result.append(icont);
2095                 result.append(e/icont);
2096                 return result;
2097         }
2098         if ( is_a<mul>(e) ) {
2099                 ex nfac = 1;
2100                 for ( size_t i=0; i<e.nops(); ++i ) {
2101                         ex op = e.op(i);
2102                         if ( is_a<numeric>(op) ) {
2103                                 nfac = op;
2104                         }
2105                         if ( is_a<power>(op) ) {
2106                                 result.append(op.op(0));
2107                         }
2108                         if ( is_a<symbol>(op) || is_a<add>(op) ) {
2109                                 result.append(op);
2110                         }
2111                 }
2112                 result.prepend(nfac);
2113                 return result;
2114         }
2115         throw runtime_error("put_factors_into_lst: bad term.");
2116 }
2117
2118 /** Checks a set of numbers for whether each number has a unique prime factor.
2119  *
2120  *  @param[in]  f  list of numbers to check
2121  *  @return        true: if number set is bad, false: if set is okay (has unique
2122  *                 prime factors)
2123  */
2124 static bool checkdivisors(const lst& f)
2125 {
2126         const int k = f.nops();
2127         numeric q, r;
2128         vector<numeric> d(k);
2129         d[0] = ex_to<numeric>(abs(f.op(0)));
2130         for ( int i=1; i<k; ++i ) {
2131                 q = ex_to<numeric>(abs(f.op(i)));
2132                 for ( int j=i-1; j>=0; --j ) {
2133                         r = d[j];
2134                         do {
2135                                 r = gcd(r, q);
2136                                 q = q/r;
2137                         } while ( r != 1 );
2138                         if ( q == 1 ) {
2139                                 return true;
2140                         }
2141                 }
2142                 d[i] = q;
2143         }
2144         return false;
2145 }
2146
2147 /** Generates a set of evaluation points for a multivariate polynomial.
2148  *  The set fulfills the following conditions:
2149  *  1. lcoeff(evaluated_polynomial) does not vanish
2150  *  2. factors of lcoeff(evaluated_polynomial) have each a unique prime factor
2151  *  3. evaluated_polynomial is square free
2152  *  See [Wan] for more details.
2153  *
2154  *  @param[in]     u        multivariate polynomial to be factored
2155  *  @param[in]     vn       leading coefficient of u in x (x==first symbol in syms)
2156  *  @param[in]     syms     set of symbols that appear in u
2157  *  @param[in]     f        lst containing the factors of the leading coefficient vn
2158  *  @param[in,out] modulus  integer modulus for random number generation (i.e. |a_i| < modulus)
2159  *  @param[out]    u0       returns the evaluated (univariate) polynomial
2160  *  @param[out]    a        returns the valid evaluation points. must have initial size equal
2161  *                          number of symbols-1 before calling generate_set
2162  */
2163 static void generate_set(const ex& u, const ex& vn, const exset& syms, const lst& f,
2164                          numeric& modulus, ex& u0, vector<numeric>& a)
2165 {
2166         const ex& x = *syms.begin();
2167         while ( true ) {
2168                 ++modulus;
2169                 // generate a set of integers ...
2170                 u0 = u;
2171                 ex vna = vn;
2172                 ex vnatry;
2173                 exset::const_iterator s = syms.begin();
2174                 ++s;
2175                 for ( size_t i=0; i<a.size(); ++i ) {
2176                         do {
2177                                 a[i] = mod(numeric(rand()), 2*modulus) - modulus;
2178                                 vnatry = vna.subs(*s == a[i]);
2179                                 // ... for which the leading coefficient doesn't vanish ...
2180                         } while ( vnatry == 0 );
2181                         vna = vnatry;
2182                         u0 = u0.subs(*s == a[i]);
2183                         ++s;
2184                 }
2185                 // ... for which u0 is square free ...
2186                 ex g = gcd(u0, u0.diff(ex_to<symbol>(x)));
2187                 if ( !is_a<numeric>(g) ) {
2188                         continue;
2189                 }
2190                 if ( !is_a<numeric>(vn) ) {
2191                         // ... and for which the evaluated factors have each an unique prime factor
2192                         lst fnum = f;
2193                         fnum.let_op(0) = fnum.op(0) * u0.content(x);
2194                         for ( size_t i=1; i<fnum.nops(); ++i ) {
2195                                 if ( !is_a<numeric>(fnum.op(i)) ) {
2196                                         s = syms.begin();
2197                                         ++s;
2198                                         for ( size_t j=0; j<a.size(); ++j, ++s ) {
2199                                                 fnum.let_op(i) = fnum.op(i).subs(*s == a[j]);
2200                                         }
2201                                 }
2202                         }
2203                         if ( checkdivisors(fnum) ) {
2204                                 continue;
2205                         }
2206                 }
2207                 // ok, we have a valid set now
2208                 return;
2209         }
2210 }
2211
2212 // forward declaration
2213 static ex factor_sqrfree(const ex& poly);
2214
2215 /** Multivariate factorization.
2216  *
2217  *  The implementation is based on the algorithm described in [Wan].
2218  *  An evaluation homomorphism (a set of integers) is determined that fulfills
2219  *  certain criteria. The evaluated polynomial is univariate and is factorized
2220  *  by factor_univariate(). The main work then is to find the correct leading
2221  *  coefficients of the univariate factors. They have to correspond to the
2222  *  factors of the (multivariate) leading coefficient of the input polynomial
2223  *  (as defined for a specific variable x). After that the Hensel lifting can be
2224  *  performed.
2225  *
2226  *  @param[in] poly  expanded, square free polynomial
2227  *  @param[in] syms  contains the symbols in the polynomial
2228  *  @return          factorized polynomial
2229  */
2230 static ex factor_multivariate(const ex& poly, const exset& syms)
2231 {
2232         exset::const_iterator s;
2233         const ex& x = *syms.begin();
2234
2235         // make polynomial primitive
2236         ex unit, cont, pp;
2237         poly.unitcontprim(x, unit, cont, pp);
2238         if ( !is_a<numeric>(cont) ) {
2239                 return factor_sqrfree(cont) * factor_sqrfree(pp);
2240         }
2241
2242         // factor leading coefficient
2243         ex vn = pp.collect(x).lcoeff(x);
2244         ex vnlst;
2245         if ( is_a<numeric>(vn) ) {
2246                 vnlst = lst{vn};
2247         }
2248         else {
2249                 ex vnfactors = factor(vn);
2250                 vnlst = put_factors_into_lst(vnfactors);
2251         }
2252
2253         const unsigned int maxtrials = 3;
2254         numeric modulus = (vnlst.nops() > 3) ? vnlst.nops() : 3;
2255         vector<numeric> a(syms.size()-1, 0);
2256
2257         // try now to factorize until we are successful
2258         while ( true ) {
2259
2260                 unsigned int trialcount = 0;
2261                 unsigned int prime;
2262                 int factor_count = 0;
2263                 int min_factor_count = -1;
2264                 ex u, delta;
2265                 ex ufac, ufaclst;
2266
2267                 // try several evaluation points to reduce the number of factors
2268                 while ( trialcount < maxtrials ) {
2269
2270                         // generate a set of valid evaluation points
2271                         generate_set(pp, vn, syms, ex_to<lst>(vnlst), modulus, u, a);
2272
2273                         ufac = factor_univariate(u, x, prime);
2274                         ufaclst = put_factors_into_lst(ufac);
2275                         factor_count = ufaclst.nops()-1;
2276                         delta = ufaclst.op(0);
2277
2278                         if ( factor_count <= 1 ) {
2279                                 // irreducible
2280                                 return poly;
2281                         }
2282                         if ( min_factor_count < 0 ) {
2283                                 // first time here
2284                                 min_factor_count = factor_count;
2285                         }
2286                         else if ( min_factor_count == factor_count ) {
2287                                 // one less to try
2288                                 ++trialcount;
2289                         }
2290                         else if ( min_factor_count > factor_count ) {
2291                                 // new minimum, reset trial counter
2292                                 min_factor_count = factor_count;
2293                                 trialcount = 0;
2294                         }
2295                 }
2296
2297                 // determine true leading coefficients for the Hensel lifting
2298                 vector<ex> C(factor_count);
2299                 if ( is_a<numeric>(vn) ) {
2300                         // easy case
2301                         for ( size_t i=1; i<ufaclst.nops(); ++i ) {
2302                                 C[i-1] = ufaclst.op(i).lcoeff(x);
2303                         }
2304                 } else {
2305                         // difficult case.
2306                         // we use the property of the ftilde having a unique prime factor.
2307                         // details can be found in [Wan].
2308                         // calculate ftilde
2309                         vector<numeric> ftilde(vnlst.nops()-1);
2310                         for ( size_t i=0; i<ftilde.size(); ++i ) {
2311                                 ex ft = vnlst.op(i+1);
2312                                 s = syms.begin();
2313                                 ++s;
2314                                 for ( size_t j=0; j<a.size(); ++j ) {
2315                                         ft = ft.subs(*s == a[j]);
2316                                         ++s;
2317                                 }
2318                                 ftilde[i] = ex_to<numeric>(ft);
2319                         }
2320                         // calculate D and C
2321                         vector<bool> used_flag(ftilde.size(), false);
2322                         vector<ex> D(factor_count, 1);
2323                         if ( delta == 1 ) {
2324                                 for ( int i=0; i<factor_count; ++i ) {
2325                                         numeric prefac = ex_to<numeric>(ufaclst.op(i+1).lcoeff(x));
2326                                         for ( int j=ftilde.size()-1; j>=0; --j ) {
2327                                                 int count = 0;
2328                                                 while ( irem(prefac, ftilde[j]) == 0 ) {
2329                                                         prefac = iquo(prefac, ftilde[j]);
2330                                                         ++count;
2331                                                 }
2332                                                 if ( count ) {
2333                                                         used_flag[j] = true;
2334                                                         D[i] = D[i] * pow(vnlst.op(j+1), count);
2335                                                 }
2336                                         }
2337                                         C[i] = D[i] * prefac;
2338                                 }
2339                         } else {
2340                                 for ( int i=0; i<factor_count; ++i ) {
2341                                         numeric prefac = ex_to<numeric>(ufaclst.op(i+1).lcoeff(x));
2342                                         for ( int j=ftilde.size()-1; j>=0; --j ) {
2343                                                 int count = 0;
2344                                                 while ( irem(prefac, ftilde[j]) == 0 ) {
2345                                                         prefac = iquo(prefac, ftilde[j]);
2346                                                         ++count;
2347                                                 }
2348                                                 while ( irem(ex_to<numeric>(delta)*prefac, ftilde[j]) == 0 ) {
2349                                                         numeric g = gcd(prefac, ex_to<numeric>(ftilde[j]));
2350                                                         prefac = iquo(prefac, g);
2351                                                         delta = delta / (ftilde[j]/g);
2352                                                         ufaclst.let_op(i+1) = ufaclst.op(i+1) * (ftilde[j]/g);
2353                                                         ++count;
2354                                                 }
2355                                                 if ( count ) {
2356                                                         used_flag[j] = true;
2357                                                         D[i] = D[i] * pow(vnlst.op(j+1), count);
2358                                                 }
2359                                         }
2360                                         C[i] = D[i] * prefac;
2361                                 }
2362                         }
2363                         // check if something went wrong
2364                         bool some_factor_unused = false;
2365                         for ( size_t i=0; i<used_flag.size(); ++i ) {
2366                                 if ( !used_flag[i] ) {
2367                                         some_factor_unused = true;
2368                                         break;
2369                                 }
2370                         }
2371                         if ( some_factor_unused ) {
2372                                 continue;
2373                         }
2374                 }
2375
2376                 // multiply the remaining content of the univariate polynomial into the
2377                 // first factor
2378                 if ( delta != 1 ) {
2379                         C[0] = C[0] * delta;
2380                         ufaclst.let_op(1) = ufaclst.op(1) * delta;
2381                 }
2382
2383                 // set up evaluation points
2384                 EvalPoint ep;
2385                 vector<EvalPoint> epv;
2386                 s = syms.begin();
2387                 ++s;
2388                 for ( size_t i=0; i<a.size(); ++i ) {
2389                         ep.x = *s++;
2390                         ep.evalpoint = a[i].to_int();
2391                         epv.push_back(ep);
2392                 }
2393
2394                 // calc bound p^l
2395                 int maxdeg = 0;
2396                 for ( int i=1; i<=factor_count; ++i ) {
2397                         if ( ufaclst.op(i).degree(x) > maxdeg ) {
2398                                 maxdeg = ufaclst[i].degree(x);
2399                         }
2400                 }
2401                 cl_I B = 2*calc_bound(u, x, maxdeg);
2402                 cl_I l = 1;
2403                 cl_I pl = prime;
2404                 while ( pl < B ) {
2405                         l = l + 1;
2406                         pl = pl * prime;
2407                 }
2408
2409                 // set up modular factors (mod p^l)
2410                 cl_modint_ring R = find_modint_ring(expt_pos(cl_I(prime),l));
2411                 upvec modfactors(ufaclst.nops()-1);
2412                 for ( size_t i=1; i<ufaclst.nops(); ++i ) {
2413                         umodpoly_from_ex(modfactors[i-1], ufaclst.op(i), x, R);
2414                 }
2415
2416                 // try Hensel lifting
2417                 ex res = hensel_multivar(pp, x, epv, prime, l, modfactors, C);
2418                 if ( res != lst{} ) {
2419                         ex result = cont * unit;
2420                         for ( size_t i=0; i<res.nops(); ++i ) {
2421                                 result *= res.op(i).content(x) * res.op(i).unit(x);
2422                                 result *= res.op(i).primpart(x);
2423                         }
2424                         return result;
2425                 }
2426         }
2427 }
2428
2429 /** Finds all symbols in an expression. Used by factor_sqrfree() and factor().
2430  */
2431 struct find_symbols_map : public map_function {
2432         exset syms;
2433         ex operator()(const ex& e) override
2434         {
2435                 if ( is_a<symbol>(e) ) {
2436                         syms.insert(e);
2437                         return e;
2438                 }
2439                 return e.map(*this);
2440         }
2441 };
2442
2443 /** Factorizes a polynomial that is square free. It calls either the univariate
2444  *  or the multivariate factorization functions.
2445  */
2446 static ex factor_sqrfree(const ex& poly)
2447 {
2448         // determine all symbols in poly
2449         find_symbols_map findsymbols;
2450         findsymbols(poly);
2451         if ( findsymbols.syms.size() == 0 ) {
2452                 return poly;
2453         }
2454
2455         if ( findsymbols.syms.size() == 1 ) {
2456                 // univariate case
2457                 const ex& x = *(findsymbols.syms.begin());
2458                 if ( poly.ldegree(x) > 0 ) {
2459                         // pull out direct factors
2460                         int ld = poly.ldegree(x);
2461                         ex res = factor_univariate(expand(poly/pow(x, ld)), x);
2462                         return res * pow(x,ld);
2463                 } else {
2464                         ex res = factor_univariate(poly, x);
2465                         return res;
2466                 }
2467         }
2468
2469         // multivariate case
2470         ex res = factor_multivariate(poly, findsymbols.syms);
2471         return res;
2472 }
2473
2474 /** Map used by factor() when factor_options::all is given to access all
2475  *  subexpressions and to call factor() on them.
2476  */
2477 struct apply_factor_map : public map_function {
2478         unsigned options;
2479         apply_factor_map(unsigned options_) : options(options_) { }
2480         ex operator()(const ex& e) override
2481         {
2482                 if ( e.info(info_flags::polynomial) ) {
2483                         return factor(e, options);
2484                 }
2485                 if ( is_a<add>(e) ) {
2486                         ex s1, s2;
2487                         for ( size_t i=0; i<e.nops(); ++i ) {
2488                                 if ( e.op(i).info(info_flags::polynomial) ) {
2489                                         s1 += e.op(i);
2490                                 } else {
2491                                         s2 += e.op(i);
2492                                 }
2493                         }
2494                         return factor(s1, options) + s2.map(*this);
2495                 }
2496                 return e.map(*this);
2497         }
2498 };
2499
2500 /** Iterate through explicit factors of e, call yield(f, k) for
2501  *  each factor of the form f^k.
2502  *
2503  *  Note that this function doesn't factor e itself, it only
2504  *  iterates through the factors already explicitly present.
2505  */
2506 template <typename F> void
2507 factor_iter(const ex &e, F yield)
2508 {
2509         if (is_a<mul>(e)) {
2510                 for (const auto &f : e) {
2511                         if (is_a<power>(f)) {
2512                                 yield(f.op(0), f.op(1));
2513                         } else {
2514                                 yield(f, ex(1));
2515                         }
2516                 }
2517         } else {
2518                 if (is_a<power>(e)) {
2519                         yield(e.op(0), e.op(1));
2520                 } else {
2521                         yield(e, ex(1));
2522                 }
2523         }
2524 }
2525
2526 /** This function factorizes a polynomial. It checks the arguments,
2527  *  tries a square free factorization, and then calls factor_sqrfree
2528  *  to do the hard work.
2529  *
2530  *  This function expands its argument, so for polynomials with
2531  *  explicit factors it's better to call it on each one separately
2532  *  (or use factor() which does just that).
2533  */
2534 static ex factor1(const ex& poly, unsigned options)
2535 {
2536         // check arguments
2537         if ( !poly.info(info_flags::polynomial) ) {
2538                 if ( options & factor_options::all ) {
2539                         options &= ~factor_options::all;
2540                         apply_factor_map factor_map(options);
2541                         return factor_map(poly);
2542                 }
2543                 return poly;
2544         }
2545
2546         // determine all symbols in poly
2547         find_symbols_map findsymbols;
2548         findsymbols(poly);
2549         if ( findsymbols.syms.size() == 0 ) {
2550                 return poly;
2551         }
2552         lst syms;
2553         for (auto & i : findsymbols.syms ) {
2554                 syms.append(i);
2555         }
2556
2557         // make poly square free
2558         ex sfpoly = sqrfree(poly.expand(), syms);
2559
2560         // factorize the square free components
2561         ex res = 1;
2562         factor_iter(sfpoly,
2563                 [&](const ex &f, const ex &k) {
2564                         if ( is_a<add>(f) ) {
2565                                 res *= pow(factor_sqrfree(f), k);
2566                         } else {
2567                                 // simple case: (monomial)^exponent
2568                                 res *= pow(f, k);
2569                         }
2570                 });
2571         return res;
2572 }
2573
2574 } // anonymous namespace
2575
2576 /** Interface function to the outside world. It uses factor1()
2577  *  on each of the explicitly present factors of poly.
2578  */
2579 ex factor(const ex& poly, unsigned options)
2580 {
2581         ex result = 1;
2582         factor_iter(poly,
2583                 [&](const ex &f1, const ex &k1) {
2584                         factor_iter(factor1(f1, options),
2585                                 [&](const ex &f2, const ex &k2) {
2586                                         result *= pow(f2, k1*k2);
2587                                 });
2588                 });
2589         return result;
2590 }
2591
2592 } // namespace GiNaC
2593
2594 #ifdef DEBUGFACTOR
2595 #include "test.h"
2596 #endif