ginac/factor.cpp

   1 /** @file factor.cpp
   2  *
   3  *  Polynomial factorization (implementation).
   4  *
   5  *  The interface function factor() at the end of this file is defined in the
   6  *  GiNaC namespace. All other utility functions and classes are defined in an
   7  *  additional anonymous namespace.
   8  *
   9  *  Factorization starts by doing a square free factorization and making the
  10  *  coefficients integer. Then, depending on the number of free variables it
  11  *  proceeds either in dedicated univariate or multivariate factorization code.
  12  *
  13  *  Univariate factorization does a modular factorization via Berlekamp's
  14  *  algorithm and distinct degree factorization. Hensel lifting is used at the
  15  *  end.
  16  *
  17  *  Multivariate factorization uses the univariate factorization (applying a
  18  *  evaluation homomorphism first) and Hensel lifting raises the answer to the
  19  *  multivariate domain. The Hensel lifting code is completely distinct from the
  20  *  code used by the univariate factorization.
  21  *
  22  *  Algorithms used can be found in
  23  *    [Wan] An Improved Multivariate Polynomial Factoring Algorithm,
  24  *          P.S.Wang,
  25  *          Mathematics of Computation, Vol. 32, No. 144 (1978) 1215--1231.
  26  *    [GCL] Algorithms for Computer Algebra,
  27  *          K.O.Geddes, S.R.Czapor, G.Labahn,
  28  *          Springer Verlag, 1992.
  29  *    [Mig] Some Useful Bounds,
  30  *          M.Mignotte,
  31  *          In "Computer Algebra, Symbolic and Algebraic Computation" (B.Buchberger et al., eds.),
  32  *          pp. 259-263, Springer-Verlag, New York, 1982.
  33  */
  34
  35 /*
  36  *  GiNaC Copyright (C) 1999-2015 Johannes Gutenberg University Mainz, Germany
  37  *
  38  *  This program is free software; you can redistribute it and/or modify
  39  *  it under the terms of the GNU General Public License as published by
  40  *  the Free Software Foundation; either version 2 of the License, or
  41  *  (at your option) any later version.
  42  *
  43  *  This program is distributed in the hope that it will be useful,
  44  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  45  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  46  *  GNU General Public License for more details.
  47  *
  48  *  You should have received a copy of the GNU General Public License
  49  *  along with this program; if not, write to the Free Software
  50  *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
  51  */
  52
  53 //#define DEBUGFACTOR
  54
  55 #include "factor.h"
  56
  57 #include "ex.h"
  58 #include "numeric.h"
  59 #include "operators.h"
  60 #include "inifcns.h"
  61 #include "symbol.h"
  62 #include "relational.h"
  63 #include "power.h"
  64 #include "mul.h"
  65 #include "normal.h"
  66 #include "add.h"
  67
  68 #include <algorithm>
  69 #include <cmath>
  70 #include <limits>
  71 #include <list>
  72 #include <vector>
  73 #include <stack>
  74 #ifdef DEBUGFACTOR
  75 #include <ostream>
  76 #endif
  77 using namespace std;
  78
  79 #include <cln/cln.h>
  80 using namespace cln;
  81
  82 namespace GiNaC {
  83
  84 #ifdef DEBUGFACTOR
  85 #define DCOUT(str) cout << #str << endl
  86 #define DCOUTVAR(var) cout << #var << ": " << var << endl
  87 #define DCOUT2(str,var) cout << #str << ": " << var << endl
  88 ostream& operator<<(ostream& o, const vector<int>& v)
  89 {
  90         auto i = v.begin(), end = v.end();
  91         while ( i != end ) {
  92                 o << *i << " ";
  93                 ++i;
  94         }
  95         return o;
  96 }
  97 static ostream& operator<<(ostream& o, const vector<cl_I>& v)
  98 {
  99         auto i = v.begin(), end = v.end();
 100         while ( i != end ) {
 101                 o << *i << "[" << i-v.begin() << "]" << " ";
 102                 ++i;
 103         }
 104         return o;
 105 }
 106 static ostream& operator<<(ostream& o, const vector<cl_MI>& v)
 107 {
 108         auto i = v.begin(), end = v.end();
 109         while ( i != end ) {
 110                 o << *i << "[" << i-v.begin() << "]" << " ";
 111                 ++i;
 112         }
 113         return o;
 114 }
 115 ostream& operator<<(ostream& o, const vector<numeric>& v)
 116 {
 117         for ( size_t i=0; i<v.size(); ++i ) {
 118                 o << v[i] << " ";
 119         }
 120         return o;
 121 }
 122 ostream& operator<<(ostream& o, const vector<vector<cl_MI>>& v)
 123 {
 124         auto i = v.begin(), end = v.end();
 125         while ( i != end ) {
 126                 o << i-v.begin() << ": " << *i << endl;
 127                 ++i;
 128         }
 129         return o;
 130 }
 131 #else
 132 #define DCOUT(str)
 133 #define DCOUTVAR(var)
 134 #define DCOUT2(str,var)
 135 #endif // def DEBUGFACTOR
 136
 137 // anonymous namespace to hide all utility functions
 138 namespace {
 139
 140 ////////////////////////////////////////////////////////////////////////////////
 141 // modular univariate polynomial code
 142
 143 typedef std::vector<cln::cl_MI> umodpoly;
 144 typedef std::vector<cln::cl_I> upoly;
 145 typedef vector<umodpoly> upvec;
 146
 147 // COPY FROM UPOLY.HPP
 148
 149 // CHANGED size_t -> int !!!
 150 template<typename T> static int degree(const T& p)
 151 {
 152         return p.size() - 1;
 153 }
 154
 155 template<typename T> static typename T::value_type lcoeff(const T& p)
 156 {
 157         return p[p.size() - 1];
 158 }
 159
 160 static bool normalize_in_field(umodpoly& a)
 161 {
 162         if (a.size() == 0)
 163                 return true;
 164         if ( lcoeff(a) == a[0].ring()->one() ) {
 165                 return true;
 166         }
 167
 168         const cln::cl_MI lc_1 = recip(lcoeff(a));
 169         for (std::size_t k = a.size(); k-- != 0; )
 170                 a[k] = a[k]*lc_1;
 171         return false;
 172 }
 173
 174 template<typename T> static void
 175 canonicalize(T& p, const typename T::size_type hint = std::numeric_limits<typename T::size_type>::max())
 176 {
 177         if (p.empty())
 178                 return;
 179
 180         std::size_t i = p.size() - 1;
 181         // Be fast if the polynomial is already canonicalized
 182         if (!zerop(p[i]))
 183                 return;
 184
 185         if (hint < p.size())
 186                 i = hint;
 187
 188         bool is_zero = false;
 189         do {
 190                 if (!zerop(p[i])) {
 191                         ++i;
 192                         break;
 193                 }
 194                 if (i == 0) {
 195                         is_zero = true;
 196                         break;
 197                 }
 198                 --i;
 199         } while (true);
 200
 201         if (is_zero) {
 202                 p.clear();
 203                 return;
 204         }
 205
 206         p.erase(p.begin() + i, p.end());
 207 }
 208
 209 // END COPY FROM UPOLY.HPP
 210
 211 static void expt_pos(umodpoly& a, unsigned int q)
 212 {
 213         if ( a.empty() ) return;
 214         cl_MI zero = a[0].ring()->zero();
 215         int deg = degree(a);
 216         a.resize(degree(a)*q+1, zero);
 217         for ( int i=deg; i>0; --i ) {
 218                 a[i*q] = a[i];
 219                 a[i] = zero;
 220         }
 221 }
 222
 223 template<bool COND, typename T = void> struct enable_if
 224 {
 225         typedef T type;
 226 };
 227
 228 template<typename T> struct enable_if<false, T> { /* empty */ };
 229
 230 template<typename T> struct uvar_poly_p
 231 {
 232         static const bool value = false;
 233 };
 234
 235 template<> struct uvar_poly_p<upoly>
 236 {
 237         static const bool value = true;
 238 };
 239
 240 template<> struct uvar_poly_p<umodpoly>
 241 {
 242         static const bool value = true;
 243 };
 244
 245 template<typename T>
 246 // Don't define this for anything but univariate polynomials.
 247 static typename enable_if<uvar_poly_p<T>::value, T>::type
 248 operator+(const T& a, const T& b)
 249 {
 250         int sa = a.size();
 251         int sb = b.size();
 252         if ( sa >= sb ) {
 253                 T r(sa);
 254                 int i = 0;
 255                 for ( ; i<sb; ++i ) {
 256                         r[i] = a[i] + b[i];
 257                 }
 258                 for ( ; i<sa; ++i ) {
 259                         r[i] = a[i];
 260                 }
 261                 canonicalize(r);
 262                 return r;
 263         }
 264         else {
 265                 T r(sb);
 266                 int i = 0;
 267                 for ( ; i<sa; ++i ) {
 268                         r[i] = a[i] + b[i];
 269                 }
 270                 for ( ; i<sb; ++i ) {
 271                         r[i] = b[i];
 272                 }
 273                 canonicalize(r);
 274                 return r;
 275         }
 276 }
 277
 278 template<typename T>
 279 // Don't define this for anything but univariate polynomials. Otherwise
 280 // overload resolution might fail (this actually happens when compiling
 281 // GiNaC with g++ 3.4).
 282 static typename enable_if<uvar_poly_p<T>::value, T>::type
 283 operator-(const T& a, const T& b)
 284 {
 285         int sa = a.size();
 286         int sb = b.size();
 287         if ( sa >= sb ) {
 288                 T r(sa);
 289                 int i = 0;
 290                 for ( ; i<sb; ++i ) {
 291                         r[i] = a[i] - b[i];
 292                 }
 293                 for ( ; i<sa; ++i ) {
 294                         r[i] = a[i];
 295                 }
 296                 canonicalize(r);
 297                 return r;
 298         }
 299         else {
 300                 T r(sb);
 301                 int i = 0;
 302                 for ( ; i<sa; ++i ) {
 303                         r[i] = a[i] - b[i];
 304                 }
 305                 for ( ; i<sb; ++i ) {
 306                         r[i] = -b[i];
 307                 }
 308                 canonicalize(r);
 309                 return r;
 310         }
 311 }
 312
 313 static upoly operator*(const upoly& a, const upoly& b)
 314 {
 315         upoly c;
 316         if ( a.empty() || b.empty() ) return c;
 317
 318         int n = degree(a) + degree(b);
 319         c.resize(n+1, 0);
 320         for ( int i=0 ; i<=n; ++i ) {
 321                 for ( int j=0 ; j<=i; ++j ) {
 322                         if ( j > degree(a) || (i-j) > degree(b) ) continue;
 323                         c[i] = c[i] + a[j] * b[i-j];
 324                 }
 325         }
 326         canonicalize(c);
 327         return c;
 328 }
 329
 330 static umodpoly operator*(const umodpoly& a, const umodpoly& b)
 331 {
 332         umodpoly c;
 333         if ( a.empty() || b.empty() ) return c;
 334
 335         int n = degree(a) + degree(b);
 336         c.resize(n+1, a[0].ring()->zero());
 337         for ( int i=0 ; i<=n; ++i ) {
 338                 for ( int j=0 ; j<=i; ++j ) {
 339                         if ( j > degree(a) || (i-j) > degree(b) ) continue;
 340                         c[i] = c[i] + a[j] * b[i-j];
 341                 }
 342         }
 343         canonicalize(c);
 344         return c;
 345 }
 346
 347 static upoly operator*(const upoly& a, const cl_I& x)
 348 {
 349         if ( zerop(x) ) {
 350                 upoly r;
 351                 return r;
 352         }
 353         upoly r(a.size());
 354         for ( size_t i=0; i<a.size(); ++i ) {
 355                 r[i] = a[i] * x;
 356         }
 357         return r;
 358 }
 359
 360 static upoly operator/(const upoly& a, const cl_I& x)
 361 {
 362         if ( zerop(x) ) {
 363                 upoly r;
 364                 return r;
 365         }
 366         upoly r(a.size());
 367         for ( size_t i=0; i<a.size(); ++i ) {
 368                 r[i] = exquo(a[i],x);
 369         }
 370         return r;
 371 }
 372
 373 static umodpoly operator*(const umodpoly& a, const cl_MI& x)
 374 {
 375         umodpoly r(a.size());
 376         for ( size_t i=0; i<a.size(); ++i ) {
 377                 r[i] = a[i] * x;
 378         }
 379         canonicalize(r);
 380         return r;
 381 }
 382
 383 static void upoly_from_ex(upoly& up, const ex& e, const ex& x)
 384 {
 385         // assert: e is in Z[x]
 386         int deg = e.degree(x);
 387         up.resize(deg+1);
 388         int ldeg = e.ldegree(x);
 389         for ( ; deg>=ldeg; --deg ) {
 390                 up[deg] = the<cl_I>(ex_to<numeric>(e.coeff(x, deg)).to_cl_N());
 391         }
 392         for ( ; deg>=0; --deg ) {
 393                 up[deg] = 0;
 394         }
 395         canonicalize(up);
 396 }
 397
 398 static void umodpoly_from_upoly(umodpoly& ump, const upoly& e, const cl_modint_ring& R)
 399 {
 400         int deg = degree(e);
 401         ump.resize(deg+1);
 402         for ( ; deg>=0; --deg ) {
 403                 ump[deg] = R->canonhom(e[deg]);
 404         }
 405         canonicalize(ump);
 406 }
 407
 408 static void umodpoly_from_ex(umodpoly& ump, const ex& e, const ex& x, const cl_modint_ring& R)
 409 {
 410         // assert: e is in Z[x]
 411         int deg = e.degree(x);
 412         ump.resize(deg+1);
 413         int ldeg = e.ldegree(x);
 414         for ( ; deg>=ldeg; --deg ) {
 415                 cl_I coeff = the<cl_I>(ex_to<numeric>(e.coeff(x, deg)).to_cl_N());
 416                 ump[deg] = R->canonhom(coeff);
 417         }
 418         for ( ; deg>=0; --deg ) {
 419                 ump[deg] = R->zero();
 420         }
 421         canonicalize(ump);
 422 }
 423
 424 #ifdef DEBUGFACTOR
 425 static void umodpoly_from_ex(umodpoly& ump, const ex& e, const ex& x, const cl_I& modulus)
 426 {
 427         umodpoly_from_ex(ump, e, x, find_modint_ring(modulus));
 428 }
 429 #endif
 430
 431 static ex upoly_to_ex(const upoly& a, const ex& x)
 432 {
 433         if ( a.empty() ) return 0;
 434         ex e;
 435         for ( int i=degree(a); i>=0; --i ) {
 436                 e += numeric(a[i]) * pow(x, i);
 437         }
 438         return e;
 439 }
 440
 441 static ex umodpoly_to_ex(const umodpoly& a, const ex& x)
 442 {
 443         if ( a.empty() ) return 0;
 444         cl_modint_ring R = a[0].ring();
 445         cl_I mod = R->modulus;
 446         cl_I halfmod = (mod-1) >> 1;
 447         ex e;
 448         for ( int i=degree(a); i>=0; --i ) {
 449                 cl_I n = R->retract(a[i]);
 450                 if ( n > halfmod ) {
 451                         e += numeric(n-mod) * pow(x, i);
 452                 } else {
 453                         e += numeric(n) * pow(x, i);
 454                 }
 455         }
 456         return e;
 457 }
 458
 459 static upoly umodpoly_to_upoly(const umodpoly& a)
 460 {
 461         upoly e(a.size());
 462         if ( a.empty() ) return e;
 463         cl_modint_ring R = a[0].ring();
 464         cl_I mod = R->modulus;
 465         cl_I halfmod = (mod-1) >> 1;
 466         for ( int i=degree(a); i>=0; --i ) {
 467                 cl_I n = R->retract(a[i]);
 468                 if ( n > halfmod ) {
 469                         e[i] = n-mod;
 470                 } else {
 471                         e[i] = n;
 472                 }
 473         }
 474         return e;
 475 }
 476
 477 static umodpoly umodpoly_to_umodpoly(const umodpoly& a, const cl_modint_ring& R, unsigned int m)
 478 {
 479         umodpoly e;
 480         if ( a.empty() ) return e;
 481         cl_modint_ring oldR = a[0].ring();
 482         size_t sa = a.size();
 483         e.resize(sa+m, R->zero());
 484         for ( size_t i=0; i<sa; ++i ) {
 485                 e[i+m] = R->canonhom(oldR->retract(a[i]));
 486         }
 487         canonicalize(e);
 488         return e;
 489 }
 490
 491 /** Divides all coefficients of the polynomial a by the integer x.
 492  *  All coefficients are supposed to be divisible by x. If they are not, the
 493  *  the<cl_I> cast will raise an exception.
 494  *
 495  *  @param[in,out] a  polynomial of which the coefficients will be reduced by x
 496  *  @param[in]     x  integer that divides the coefficients
 497  */
 498 static void reduce_coeff(umodpoly& a, const cl_I& x)
 499 {
 500         if ( a.empty() ) return;
 501
 502         cl_modint_ring R = a[0].ring();
 503         for (auto & i : a) {
 504                 // cln cannot perform this division in the modular field
 505                 cl_I c = R->retract(i);
 506                 i = cl_MI(R, the<cl_I>(c / x));
 507         }
 508 }
 509
 510 /** Calculates remainder of a/b.
 511  *  Assertion: a and b not empty.
 512  *
 513  *  @param[in]  a  polynomial dividend
 514  *  @param[in]  b  polynomial divisor
 515  *  @param[out] r  polynomial remainder
 516  */
 517 static void rem(const umodpoly& a, const umodpoly& b, umodpoly& r)
 518 {
 519         int k, n;
 520         n = degree(b);
 521         k = degree(a) - n;
 522         r = a;
 523         if ( k < 0 ) return;
 524
 525         do {
 526                 cl_MI qk = div(r[n+k], b[n]);
 527                 if ( !zerop(qk) ) {
 528                         for ( int i=0; i<n; ++i ) {
 529                                 unsigned int j = n + k - 1 - i;
 530                                 r[j] = r[j] - qk * b[j-k];
 531                         }
 532                 }
 533         } while ( k-- );
 534
 535         fill(r.begin()+n, r.end(), a[0].ring()->zero());
 536         canonicalize(r);
 537 }
 538
 539 /** Calculates quotient of a/b.
 540  *  Assertion: a and b not empty.
 541  *
 542  *  @param[in]  a  polynomial dividend
 543  *  @param[in]  b  polynomial divisor
 544  *  @param[out] q  polynomial quotient
 545  */
 546 static void div(const umodpoly& a, const umodpoly& b, umodpoly& q)
 547 {
 548         int k, n;
 549         n = degree(b);
 550         k = degree(a) - n;
 551         q.clear();
 552         if ( k < 0 ) return;
 553
 554         umodpoly r = a;
 555         q.resize(k+1, a[0].ring()->zero());
 556         do {
 557                 cl_MI qk = div(r[n+k], b[n]);
 558                 if ( !zerop(qk) ) {
 559                         q[k] = qk;
 560                         for ( int i=0; i<n; ++i ) {
 561                                 unsigned int j = n + k - 1 - i;
 562                                 r[j] = r[j] - qk * b[j-k];
 563                         }
 564                 }
 565         } while ( k-- );
 566
 567         canonicalize(q);
 568 }
 569
 570 /** Calculates quotient and remainder of a/b.
 571  *  Assertion: a and b not empty.
 572  *
 573  *  @param[in]  a  polynomial dividend
 574  *  @param[in]  b  polynomial divisor
 575  *  @param[out] r  polynomial remainder
 576  *  @param[out] q  polynomial quotient
 577  */
 578 static void remdiv(const umodpoly& a, const umodpoly& b, umodpoly& r, umodpoly& q)
 579 {
 580         int k, n;
 581         n = degree(b);
 582         k = degree(a) - n;
 583         q.clear();
 584         r = a;
 585         if ( k < 0 ) return;
 586
 587         q.resize(k+1, a[0].ring()->zero());
 588         do {
 589                 cl_MI qk = div(r[n+k], b[n]);
 590                 if ( !zerop(qk) ) {
 591                         q[k] = qk;
 592                         for ( int i=0; i<n; ++i ) {
 593                                 unsigned int j = n + k - 1 - i;
 594                                 r[j] = r[j] - qk * b[j-k];
 595                         }
 596                 }
 597         } while ( k-- );
 598
 599         fill(r.begin()+n, r.end(), a[0].ring()->zero());
 600         canonicalize(r);
 601         canonicalize(q);
 602 }
 603
 604 /** Calculates the GCD of polynomial a and b.
 605  *
 606  *  @param[in]  a  polynomial
 607  *  @param[in]  b  polynomial
 608  *  @param[out] c  GCD
 609  */
 610 static void gcd(const umodpoly& a, const umodpoly& b, umodpoly& c)
 611 {
 612         if ( degree(a) < degree(b) ) return gcd(b, a, c);
 613
 614         c = a;
 615         normalize_in_field(c);
 616         umodpoly d = b;
 617         normalize_in_field(d);
 618         umodpoly r;
 619         while ( !d.empty() ) {
 620                 rem(c, d, r);
 621                 c = d;
 622                 d = r;
 623         }
 624         normalize_in_field(c);
 625 }
 626
 627 /** Calculates the derivative of the polynomial a.
 628  *
 629  *  @param[in]  a  polynomial of which to take the derivative
 630  *  @param[out] d  result/derivative
 631  */
 632 static void deriv(const umodpoly& a, umodpoly& d)
 633 {
 634         d.clear();
 635         if ( a.size() <= 1 ) return;
 636
 637         d.insert(d.begin(), a.begin()+1, a.end());
 638         int max = d.size();
 639         for ( int i=1; i<max; ++i ) {
 640                 d[i] = d[i] * (i+1);
 641         }
 642         canonicalize(d);
 643 }
 644
 645 static bool unequal_one(const umodpoly& a)
 646 {
 647         if ( a.empty() ) return true;
 648         return ( a.size() != 1 || a[0] != a[0].ring()->one() );
 649 }
 650
 651 static bool equal_one(const umodpoly& a)
 652 {
 653         return ( a.size() == 1 && a[0] == a[0].ring()->one() );
 654 }
 655
 656 /** Returns true if polynomial a is square free.
 657  *
 658  *  @param[in] a  polynomial to check
 659  *  @return       true if polynomial is square free, false otherwise
 660  */
 661 static bool squarefree(const umodpoly& a)
 662 {
 663         umodpoly b;
 664         deriv(a, b);
 665         if ( b.empty() ) {
 666                 return false;
 667         }
 668         umodpoly c;
 669         gcd(a, b, c);
 670         return equal_one(c);
 671 }
 672
 673 // END modular univariate polynomial code
 674 ////////////////////////////////////////////////////////////////////////////////
 675
 676 ////////////////////////////////////////////////////////////////////////////////
 677 // modular matrix
 678
 679 typedef vector<cl_MI> mvec;
 680
 681 class modular_matrix
 682 {
 683 #ifdef DEBUGFACTOR
 684         friend ostream& operator<<(ostream& o, const modular_matrix& m);
 685 #endif
 686 public:
 687         modular_matrix(size_t r_, size_t c_, const cl_MI& init) : r(r_), c(c_)
 688         {
 689                 m.resize(c*r, init);
 690         }
 691         size_t rowsize() const { return r; }
 692         size_t colsize() const { return c; }
 693         cl_MI& operator()(size_t row, size_t col) { return m[row*c + col]; }
 694         cl_MI operator()(size_t row, size_t col) const { return m[row*c + col]; }
 695         void mul_col(size_t col, const cl_MI x)
 696         {
 697                 for ( size_t rc=0; rc<r; ++rc ) {
 698                         std::size_t i = c*rc + col;
 699                         m[i] = m[i] * x;
 700                 }
 701         }
 702         void sub_col(size_t col1, size_t col2, const cl_MI fac)
 703         {
 704                 for ( size_t rc=0; rc<r; ++rc ) {
 705                         std::size_t i1 = col1 + c*rc;
 706                         std::size_t i2 = col2 + c*rc;
 707                         m[i1] = m[i1] - m[i2]*fac;
 708                 }
 709         }
 710         void switch_col(size_t col1, size_t col2)
 711         {
 712                 for ( size_t rc=0; rc<r; ++rc ) {
 713                         std::size_t i1 = col1 + rc*c;
 714                         std::size_t i2 = col2 + rc*c;
 715                         std::swap(m[i1], m[i2]);
 716                 }
 717         }
 718         void mul_row(size_t row, const cl_MI x)
 719         {
 720                 for ( size_t cc=0; cc<c; ++cc ) {
 721                         std::size_t i = row*c + cc;
 722                         m[i] = m[i] * x;
 723                 }
 724         }
 725         void sub_row(size_t row1, size_t row2, const cl_MI fac)
 726         {
 727                 for ( size_t cc=0; cc<c; ++cc ) {
 728                         std::size_t i1 = row1*c + cc;
 729                         std::size_t i2 = row2*c + cc;
 730                         m[i1] = m[i1] - m[i2]*fac;
 731                 }
 732         }
 733         void switch_row(size_t row1, size_t row2)
 734         {
 735                 for ( size_t cc=0; cc<c; ++cc ) {
 736                         std::size_t i1 = row1*c + cc;
 737                         std::size_t i2 = row2*c + cc;
 738                         std::swap(m[i1], m[i2]);
 739                 }
 740         }
 741         bool is_col_zero(size_t col) const
 742         {
 743                 for ( size_t rr=0; rr<r; ++rr ) {
 744                         std::size_t i = col + rr*c;
 745                         if ( !zerop(m[i]) ) {
 746                                 return false;
 747                         }
 748                 }
 749                 return true;
 750         }
 751         bool is_row_zero(size_t row) const
 752         {
 753                 for ( size_t cc=0; cc<c; ++cc ) {
 754                         std::size_t i = row*c + cc;
 755                         if ( !zerop(m[i]) ) {
 756                                 return false;
 757                         }
 758                 }
 759                 return true;
 760         }
 761         void set_row(size_t row, const vector<cl_MI>& newrow)
 762         {
 763                 for (std::size_t i2 = 0; i2 < newrow.size(); ++i2) {
 764                         std::size_t i1 = row*c + i2;
 765                         m[i1] = newrow[i2];
 766                 }
 767         }
 768         mvec::const_iterator row_begin(size_t row) const { return m.begin()+row*c; }
 769         mvec::const_iterator row_end(size_t row) const { return m.begin()+row*c+r; }
 770 private:
 771         size_t r, c;
 772         mvec m;
 773 };
 774
 775 #ifdef DEBUGFACTOR
 776 modular_matrix operator*(const modular_matrix& m1, const modular_matrix& m2)
 777 {
 778         const unsigned int r = m1.rowsize();
 779         const unsigned int c = m2.colsize();
 780         modular_matrix o(r,c,m1(0,0));
 781
 782         for ( size_t i=0; i<r; ++i ) {
 783                 for ( size_t j=0; j<c; ++j ) {
 784                         cl_MI buf;
 785                         buf = m1(i,0) * m2(0,j);
 786                         for ( size_t k=1; k<c; ++k ) {
 787                                 buf = buf + m1(i,k)*m2(k,j);
 788                         }
 789                         o(i,j) = buf;
 790                 }
 791         }
 792         return o;
 793 }
 794
 795 ostream& operator<<(ostream& o, const modular_matrix& m)
 796 {
 797         cl_modint_ring R = m(0,0).ring();
 798         o << "{";
 799         for ( size_t i=0; i<m.rowsize(); ++i ) {
 800                 o << "{";
 801                 for ( size_t j=0; j<m.colsize()-1; ++j ) {
 802                         o << R->retract(m(i,j)) << ",";
 803                 }
 804                 o << R->retract(m(i,m.colsize()-1)) << "}";
 805                 if ( i != m.rowsize()-1 ) {
 806                         o << ",";
 807                 }
 808         }
 809         o << "}";
 810         return o;
 811 }
 812 #endif // def DEBUGFACTOR
 813
 814 // END modular matrix
 815 ////////////////////////////////////////////////////////////////////////////////
 816
 817 /** Calculates the Q matrix for a polynomial. Used by Berlekamp's algorithm.
 818  *
 819  *  @param[in]  a_  modular polynomial
 820  *  @param[out] Q   Q matrix
 821  */
 822 static void q_matrix(const umodpoly& a_, modular_matrix& Q)
 823 {
 824         umodpoly a = a_;
 825         normalize_in_field(a);
 826
 827         int n = degree(a);
 828         unsigned int q = cl_I_to_uint(a[0].ring()->modulus);
 829         umodpoly r(n, a[0].ring()->zero());
 830         r[0] = a[0].ring()->one();
 831         Q.set_row(0, r);
 832         unsigned int max = (n-1) * q;
 833         for ( size_t m=1; m<=max; ++m ) {
 834                 cl_MI rn_1 = r.back();
 835                 for ( size_t i=n-1; i>0; --i ) {
 836                         r[i] = r[i-1] - (rn_1 * a[i]);
 837                 }
 838                 r[0] = -rn_1 * a[0];
 839                 if ( (m % q) == 0 ) {
 840                         Q.set_row(m/q, r);
 841                 }
 842         }
 843 }
 844
 845 /** Determine the nullspace of a matrix M-1.
 846  *
 847  *  @param[in,out] M      matrix, will be modified
 848  *  @param[out]    basis  calculated nullspace of M-1
 849  */
 850 static void nullspace(modular_matrix& M, vector<mvec>& basis)
 851 {
 852         const size_t n = M.rowsize();
 853         const cl_MI one = M(0,0).ring()->one();
 854         for ( size_t i=0; i<n; ++i ) {
 855                 M(i,i) = M(i,i) - one;
 856         }
 857         for ( size_t r=0; r<n; ++r ) {
 858                 size_t cc = 0;
 859                 for ( ; cc<n; ++cc ) {
 860                         if ( !zerop(M(r,cc)) ) {
 861                                 if ( cc < r ) {
 862                                         if ( !zerop(M(cc,cc)) ) {
 863                                                 continue;
 864                                         }
 865                                         M.switch_col(cc, r);
 866                                 }
 867                                 else if ( cc > r ) {
 868                                         M.switch_col(cc, r);
 869                                 }
 870                                 break;
 871                         }
 872                 }
 873                 if ( cc < n ) {
 874                         M.mul_col(r, recip(M(r,r)));
 875                         for ( cc=0; cc<n; ++cc ) {
 876                                 if ( cc != r ) {
 877                                         M.sub_col(cc, r, M(r,cc));
 878                                 }
 879                         }
 880                 }
 881         }
 882
 883         for ( size_t i=0; i<n; ++i ) {
 884                 M(i,i) = M(i,i) - one;
 885         }
 886         for ( size_t i=0; i<n; ++i ) {
 887                 if ( !M.is_row_zero(i) ) {
 888                         mvec nu(M.row_begin(i), M.row_end(i));
 889                         basis.push_back(nu);
 890                 }
 891         }
 892 }
 893
 894 /** Berlekamp's modular factorization.
 895  *
 896  *  The implementation follows the algorithm in chapter 8 of [GCL].
 897  *
 898  *  @param[in]  a    modular polynomial
 899  *  @param[out] upv  vector containing modular factors. if upv was not empty the
 900  *                   new elements are added at the end
 901  */
 902 static void berlekamp(const umodpoly& a, upvec& upv)
 903 {
 904         cl_modint_ring R = a[0].ring();
 905         umodpoly one(1, R->one());
 906
 907         // find nullspace of Q matrix
 908         modular_matrix Q(degree(a), degree(a), R->zero());
 909         q_matrix(a, Q);
 910         vector<mvec> nu;
 911         nullspace(Q, nu);
 912
 913         const unsigned int k = nu.size();
 914         if ( k == 1 ) {
 915                 // irreducible
 916                 return;
 917         }
 918
 919         list<umodpoly> factors = {a};
 920         unsigned int size = 1;
 921         unsigned int r = 1;
 922         unsigned int q = cl_I_to_uint(R->modulus);
 923
 924         list<umodpoly>::iterator u = factors.begin();
 925
 926         // calculate all gcd's
 927         while ( true ) {
 928                 for ( unsigned int s=0; s<q; ++s ) {
 929                         umodpoly nur = nu[r];
 930                         nur[0] = nur[0] - cl_MI(R, s);
 931                         canonicalize(nur);
 932                         umodpoly g;
 933                         gcd(nur, *u, g);
 934                         if ( unequal_one(g) && g != *u ) {
 935                                 umodpoly uo;
 936                                 div(*u, g, uo);
 937                                 if ( equal_one(uo) ) {
 938                                         throw logic_error("berlekamp: unexpected divisor.");
 939                                 } else {
 940                                         *u = uo;
 941                                 }
 942                                 factors.push_back(g);
 943                                 size = 0;
 944                                 for (auto & i : factors) {
 945                                         if (degree(i))
 946                                                 ++size;
 947                                 }
 948                                 if ( size == k ) {
 949                                         for (auto & i : factors) {
 950                                                 upv.push_back(i);
 951                                         }
 952                                         return;
 953                                 }
 954                         }
 955                 }
 956                 if ( ++r == k ) {
 957                         r = 1;
 958                         ++u;
 959                 }
 960         }
 961 }
 962
 963 // modular square free factorization is not used at the moment so we deactivate
 964 // the code
 965 #if 0
 966
 967 /** Calculates a^(1/prime).
 968  *
 969  *  @param[in] a      polynomial
 970  *  @param[in] prime  prime number -> exponent 1/prime
 971  *  @param[in] ap     resulting polynomial
 972  */
 973 static void expt_1_over_p(const umodpoly& a, unsigned int prime, umodpoly& ap)
 974 {
 975         size_t newdeg = degree(a)/prime;
 976         ap.resize(newdeg+1);
 977         ap[0] = a[0];
 978         for ( size_t i=1; i<=newdeg; ++i ) {
 979                 ap[i] = a[i*prime];
 980         }
 981 }
 982
 983 /** Modular square free factorization.
 984  *
 985  *  @param[in]  a        polynomial
 986  *  @param[out] factors  modular factors
 987  *  @param[out] mult     corresponding multiplicities (exponents)
 988  */
 989 static void modsqrfree(const umodpoly& a, upvec& factors, vector<int>& mult)
 990 {
 991         const unsigned int prime = cl_I_to_uint(a[0].ring()->modulus);
 992         int i = 1;
 993         umodpoly b;
 994         deriv(a, b);
 995         if ( b.size() ) {
 996                 umodpoly c;
 997                 gcd(a, b, c);
 998                 umodpoly w;
 999                 div(a, c, w);
1000                 while ( unequal_one(w) ) {
1001                         umodpoly y;
1002                         gcd(w, c, y);
1003                         umodpoly z;
1004                         div(w, y, z);
1005                         factors.push_back(z);
1006                         mult.push_back(i);
1007                         ++i;
1008                         w = y;
1009                         umodpoly buf;
1010                         div(c, y, buf);
1011                         c = buf;
1012                 }
1013                 if ( unequal_one(c) ) {
1014                         umodpoly cp;
1015                         expt_1_over_p(c, prime, cp);
1016                         size_t previ = mult.size();
1017                         modsqrfree(cp, factors, mult);
1018                         for ( size_t i=previ; i<mult.size(); ++i ) {
1019                                 mult[i] *= prime;
1020                         }
1021                 }
1022         } else {
1023                 umodpoly ap;
1024                 expt_1_over_p(a, prime, ap);
1025                 size_t previ = mult.size();
1026                 modsqrfree(ap, factors, mult);
1027                 for ( size_t i=previ; i<mult.size(); ++i ) {
1028                         mult[i] *= prime;
1029                 }
1030         }
1031 }
1032
1033 #endif // deactivation of square free factorization
1034
1035 /** Distinct degree factorization (DDF).
1036  *
1037  *  The implementation follows the algorithm in chapter 8 of [GCL].
1038  *
1039  *  @param[in]  a_         modular polynomial
1040  *  @param[out] degrees    vector containing the degrees of the factors of the
1041  *                         corresponding polynomials in ddfactors.
1042  *  @param[out] ddfactors  vector containing polynomials which factors have the
1043  *                         degree given in degrees.
1044  */
1045 static void distinct_degree_factor(const umodpoly& a_, vector<int>& degrees, upvec& ddfactors)
1046 {
1047         umodpoly a = a_;
1048
1049         cl_modint_ring R = a[0].ring();
1050         int q = cl_I_to_int(R->modulus);
1051         int nhalf = degree(a)/2;
1052
1053         int i = 1;
1054         umodpoly w(2);
1055         w[0] = R->zero();
1056         w[1] = R->one();
1057         umodpoly x = w;
1058
1059         while ( i <= nhalf ) {
1060                 expt_pos(w, q);
1061                 umodpoly buf;
1062                 rem(w, a, buf);
1063                 w = buf;
1064                 umodpoly wx = w - x;
1065                 gcd(a, wx, buf);
1066                 if ( unequal_one(buf) ) {
1067                         degrees.push_back(i);
1068                         ddfactors.push_back(buf);
1069                 }
1070                 if ( unequal_one(buf) ) {
1071                         umodpoly buf2;
1072                         div(a, buf, buf2);
1073                         a = buf2;
1074                         nhalf = degree(a)/2;
1075                         rem(w, a, buf);
1076                         w = buf;
1077                 }
1078                 ++i;
1079         }
1080         if ( unequal_one(a) ) {
1081                 degrees.push_back(degree(a));
1082                 ddfactors.push_back(a);
1083         }
1084 }
1085
1086 /** Modular same degree factorization.
1087  *  Same degree factorization is a kind of misnomer. It performs distinct degree
1088  *  factorization, but instead of using the Cantor-Zassenhaus algorithm it
1089  *  (sub-optimally) uses Berlekamp's algorithm for the factors of the same
1090  *  degree.
1091  *
1092  *  @param[in]  a    modular polynomial
1093  *  @param[out] upv  vector containing modular factors. if upv was not empty the
1094  *                   new elements are added at the end
1095  */
1096 static void same_degree_factor(const umodpoly& a, upvec& upv)
1097 {
1098         cl_modint_ring R = a[0].ring();
1099
1100         vector<int> degrees;
1101         upvec ddfactors;
1102         distinct_degree_factor(a, degrees, ddfactors);
1103
1104         for ( size_t i=0; i<degrees.size(); ++i ) {
1105                 if ( degrees[i] == degree(ddfactors[i]) ) {
1106                         upv.push_back(ddfactors[i]);
1107                 } else {
1108                         berlekamp(ddfactors[i], upv);
1109                 }
1110         }
1111 }
1112
1113 // Yes, we can (choose).
1114 #define USE_SAME_DEGREE_FACTOR
1115
1116 /** Modular univariate factorization.
1117  *
1118  *  In principle, we have two algorithms at our disposal: Berlekamp's algorithm
1119  *  and same degree factorization (SDF). SDF seems to be slightly faster in
1120  *  almost all cases so it is activated as default.
1121  *
1122  *  @param[in]  p    modular polynomial
1123  *  @param[out] upv  vector containing modular factors. if upv was not empty the
1124  *                   new elements are added at the end
1125  */
1126 static void factor_modular(const umodpoly& p, upvec& upv)
1127 {
1128 #ifdef USE_SAME_DEGREE_FACTOR
1129         same_degree_factor(p, upv);
1130 #else
1131         berlekamp(p, upv);
1132 #endif
1133 }
1134
1135 /** Calculates modular polynomials s and t such that a*s+b*t==1.
1136  *  Assertion: a and b are relatively prime and not zero.
1137  *
1138  *  @param[in]  a  polynomial
1139  *  @param[in]  b  polynomial
1140  *  @param[out] s  polynomial
1141  *  @param[out] t  polynomial
1142  */
1143 static void exteuclid(const umodpoly& a, const umodpoly& b, umodpoly& s, umodpoly& t)
1144 {
1145         if ( degree(a) < degree(b) ) {
1146                 exteuclid(b, a, t, s);
1147                 return;
1148         }
1149
1150         umodpoly one(1, a[0].ring()->one());
1151         umodpoly c = a; normalize_in_field(c);
1152         umodpoly d = b; normalize_in_field(d);
1153         s = one;
1154         t.clear();
1155         umodpoly d1;
1156         umodpoly d2 = one;
1157         umodpoly q;
1158         while ( true ) {
1159                 div(c, d, q);
1160                 umodpoly r = c - q * d;
1161                 umodpoly r1 = s - q * d1;
1162                 umodpoly r2 = t - q * d2;
1163                 c = d;
1164                 s = d1;
1165                 t = d2;
1166                 if ( r.empty() ) break;
1167                 d = r;
1168                 d1 = r1;
1169                 d2 = r2;
1170         }
1171         cl_MI fac = recip(lcoeff(a) * lcoeff(c));
1172         for (auto & i : s) {
1173                 i = i * fac;
1174         }
1175         canonicalize(s);
1176         fac = recip(lcoeff(b) * lcoeff(c));
1177         for (auto & i : t) {
1178                 i = i * fac;
1179         }
1180         canonicalize(t);
1181 }
1182
1183 /** Replaces the leading coefficient in a polynomial by a given number.
1184  *
1185  *  @param[in] poly  polynomial to change
1186  *  @param[in] lc    new leading coefficient
1187  *  @return          changed polynomial
1188  */
1189 static upoly replace_lc(const upoly& poly, const cl_I& lc)
1190 {
1191         if ( poly.empty() ) return poly;
1192         upoly r = poly;
1193         r.back() = lc;
1194         return r;
1195 }
1196
1197 /** Calculates the bound for the modulus.
1198  *  See [Mig].
1199  */
1200 static inline cl_I calc_bound(const ex& a, const ex& x, int maxdeg)
1201 {
1202         cl_I maxcoeff = 0;
1203         cl_R coeff = 0;
1204         for ( int i=a.degree(x); i>=a.ldegree(x); --i ) {
1205                 cl_I aa = abs(the<cl_I>(ex_to<numeric>(a.coeff(x, i)).to_cl_N()));
1206                 if ( aa > maxcoeff ) maxcoeff = aa;
1207                 coeff = coeff + square(aa);
1208         }
1209         cl_I coeffnorm = ceiling1(the<cl_R>(cln::sqrt(coeff)));
1210         cl_I B = coeffnorm * expt_pos(cl_I(2), cl_I(maxdeg));
1211         return ( B > maxcoeff ) ? B : maxcoeff;
1212 }
1213
1214 /** Calculates the bound for the modulus.
1215  *  See [Mig].
1216  */
1217 static inline cl_I calc_bound(const upoly& a, int maxdeg)
1218 {
1219         cl_I maxcoeff = 0;
1220         cl_R coeff = 0;
1221         for ( int i=degree(a); i>=0; --i ) {
1222                 cl_I aa = abs(a[i]);
1223                 if ( aa > maxcoeff ) maxcoeff = aa;
1224                 coeff = coeff + square(aa);
1225         }
1226         cl_I coeffnorm = ceiling1(the<cl_R>(cln::sqrt(coeff)));
1227         cl_I B = coeffnorm * expt_pos(cl_I(2), cl_I(maxdeg));
1228         return ( B > maxcoeff ) ? B : maxcoeff;
1229 }
1230
1231 /** Hensel lifting as used by factor_univariate().
1232  *
1233  *  The implementation follows the algorithm in chapter 6 of [GCL].
1234  *
1235  *  @param[in]  a_   primitive univariate polynomials
1236  *  @param[in]  p    prime number that does not divide lcoeff(a)
1237  *  @param[in]  u1_  modular factor of a (mod p)
1238  *  @param[in]  w1_  modular factor of a (mod p), relatively prime to u1_,
1239  *                   fulfilling  u1_*w1_ == a mod p
1240  *  @param[out] u    lifted factor
1241  *  @param[out] w    lifted factor, u*w = a
1242  */
1243 static void hensel_univar(const upoly& a_, unsigned int p, const umodpoly& u1_, const umodpoly& w1_, upoly& u, upoly& w)
1244 {
1245         upoly a = a_;
1246         const cl_modint_ring& R = u1_[0].ring();
1247
1248         // calc bound B
1249         int maxdeg = (degree(u1_) > degree(w1_)) ? degree(u1_) : degree(w1_);
1250         cl_I maxmodulus = 2*calc_bound(a, maxdeg);
1251
1252         // step 1
1253         cl_I alpha = lcoeff(a);
1254         a = a * alpha;
1255         umodpoly nu1 = u1_;
1256         normalize_in_field(nu1);
1257         umodpoly nw1 = w1_;
1258         normalize_in_field(nw1);
1259         upoly phi;
1260         phi = umodpoly_to_upoly(nu1) * alpha;
1261         umodpoly u1;
1262         umodpoly_from_upoly(u1, phi, R);
1263         phi = umodpoly_to_upoly(nw1) * alpha;
1264         umodpoly w1;
1265         umodpoly_from_upoly(w1, phi, R);
1266
1267         // step 2
1268         umodpoly s;
1269         umodpoly t;
1270         exteuclid(u1, w1, s, t);
1271
1272         // step 3
1273         u = replace_lc(umodpoly_to_upoly(u1), alpha);
1274         w = replace_lc(umodpoly_to_upoly(w1), alpha);
1275         upoly e = a - u * w;
1276         cl_I modulus = p;
1277
1278         // step 4
1279         while ( !e.empty() && modulus < maxmodulus ) {
1280                 upoly c = e / modulus;
1281                 phi = umodpoly_to_upoly(s) * c;
1282                 umodpoly sigmatilde;
1283                 umodpoly_from_upoly(sigmatilde, phi, R);
1284                 phi = umodpoly_to_upoly(t) * c;
1285                 umodpoly tautilde;
1286                 umodpoly_from_upoly(tautilde, phi, R);
1287                 umodpoly r, q;
1288                 remdiv(sigmatilde, w1, r, q);
1289                 umodpoly sigma = r;
1290                 phi = umodpoly_to_upoly(tautilde) + umodpoly_to_upoly(q) * umodpoly_to_upoly(u1);
1291                 umodpoly tau;
1292                 umodpoly_from_upoly(tau, phi, R);
1293                 u = u + umodpoly_to_upoly(tau) * modulus;
1294                 w = w + umodpoly_to_upoly(sigma) * modulus;
1295                 e = a - u * w;
1296                 modulus = modulus * p;
1297         }
1298
1299         // step 5
1300         if ( e.empty() ) {
1301                 cl_I g = u[0];
1302                 for ( size_t i=1; i<u.size(); ++i ) {
1303                         g = gcd(g, u[i]);
1304                         if ( g == 1 ) break;
1305                 }
1306                 if ( g != 1 ) {
1307                         u = u / g;
1308                         w = w * g;
1309                 }
1310                 if ( alpha != 1 ) {
1311                         w = w / alpha;
1312                 }
1313         } else {
1314                 u.clear();
1315         }
1316 }
1317
1318 /** Returns a new prime number.
1319  *
1320  *  @param[in] p  prime number
1321  *  @return       next prime number after p
1322  */
1323 static unsigned int next_prime(unsigned int p)
1324 {
1325         static vector<unsigned int> primes;
1326         if (primes.empty()) {
1327                 primes = {3, 5, 7};
1328         }
1329         if ( p >= primes.back() ) {
1330                 unsigned int candidate = primes.back() + 2;
1331                 while ( true ) {
1332                         size_t n = primes.size()/2;
1333                         for ( size_t i=0; i<n; ++i ) {
1334                                 if (candidate % primes[i])
1335                                         continue;
1336                                 candidate += 2;
1337                                 i=-1;
1338                         }
1339                         primes.push_back(candidate);
1340                         if (candidate > p)
1341                                 break;
1342                 }
1343                 return candidate;
1344         }
1345         for (auto & it : primes) {
1346                 if ( it > p ) {
1347                         return it;
1348                 }
1349         }
1350         throw logic_error("next_prime: should not reach this point!");
1351 }
1352
1353 /** Manages the splitting a vector of of modular factors into two partitions.
1354  */
1355 class factor_partition
1356 {
1357 public:
1358         /** Takes the vector of modular factors and initializes the first partition */
1359         factor_partition(const upvec& factors_) : factors(factors_)
1360         {
1361                 n = factors.size();
1362                 k.resize(n, 0);
1363                 k[0] = 1;
1364                 cache.resize(n-1);
1365                 one.resize(1, factors.front()[0].ring()->one());
1366                 len = 1;
1367                 last = 0;
1368                 split();
1369         }
1370         int operator[](size_t i) const { return k[i]; }
1371         size_t size() const { return n; }
1372         size_t size_left() const { return n-len; }
1373         size_t size_right() const { return len; }
1374         /** Initializes the next partition.
1375             Returns true, if there is one, false otherwise. */
1376         bool next()
1377         {
1378                 if ( last == n-1 ) {
1379                         int rem = len - 1;
1380                         int p = last - 1;
1381                         while ( rem ) {
1382                                 if ( k[p] ) {
1383                                         --rem;
1384                                         --p;
1385                                         continue;
1386                                 }
1387                                 last = p - 1;
1388                                 while ( k[last] == 0 ) { --last; }
1389                                 if ( last == 0 && n == 2*len ) return false;
1390                                 k[last++] = 0;
1391                                 for ( size_t i=0; i<=len-rem; ++i ) {
1392                                         k[last] = 1;
1393                                         ++last;
1394                                 }
1395                                 fill(k.begin()+last, k.end(), 0);
1396                                 --last;
1397                                 split();
1398                                 return true;
1399                         }
1400                         last = len;
1401                         ++len;
1402                         if ( len > n/2 ) return false;
1403                         fill(k.begin(), k.begin()+len, 1);
1404                         fill(k.begin()+len+1, k.end(), 0);
1405                 } else {
1406                         k[last++] = 0;
1407                         k[last] = 1;
1408                 }
1409                 split();
1410                 return true;
1411         }
1412         /** Get first partition */
1413         umodpoly& left() { return lr[0]; }
1414         /** Get second partition */
1415         umodpoly& right() { return lr[1]; }
1416 private:
1417         void split_cached()
1418         {
1419                 size_t i = 0;
1420                 do {
1421                         size_t pos = i;
1422                         int group = k[i++];
1423                         size_t d = 0;
1424                         while ( i < n && k[i] == group ) { ++d; ++i; }
1425                         if ( d ) {
1426                                 if ( cache[pos].size() >= d ) {
1427                                         lr[group] = lr[group] * cache[pos][d-1];
1428                                 } else {
1429                                         if ( cache[pos].size() == 0 ) {
1430                                                 cache[pos].push_back(factors[pos] * factors[pos+1]);
1431                                         }
1432                                         size_t j = pos + cache[pos].size() + 1;
1433                                         d -= cache[pos].size();
1434                                         while ( d ) {
1435                                                 umodpoly buf = cache[pos].back() * factors[j];
1436                                                 cache[pos].push_back(buf);
1437                                                 --d;
1438                                                 ++j;
1439                                         }
1440                                         lr[group] = lr[group] * cache[pos].back();
1441                                 }
1442                         } else {
1443                                 lr[group] = lr[group] * factors[pos];
1444                         }
1445                 } while ( i < n );
1446         }
1447         void split()
1448         {
1449                 lr[0] = one;
1450                 lr[1] = one;
1451                 if ( n > 6 ) {
1452                         split_cached();
1453                 } else {
1454                         for ( size_t i=0; i<n; ++i ) {
1455                                 lr[k[i]] = lr[k[i]] * factors[i];
1456                         }
1457                 }
1458         }
1459 private:
1460         umodpoly lr[2];
1461         vector<vector<umodpoly>> cache;
1462         upvec factors;
1463         umodpoly one;
1464         size_t n;
1465         size_t len;
1466         size_t last;
1467         vector<int> k;
1468 };
1469
1470 /** Contains a pair of univariate polynomial and its modular factors.
1471  *  Used by factor_univariate().
1472  */
1473 struct ModFactors
1474 {
1475         upoly poly;
1476         upvec factors;
1477 };
1478
1479 /** Univariate polynomial factorization.
1480  *
1481  *  Modular factorization is tried for several primes to minimize the number of
1482  *  modular factors. Then, Hensel lifting is performed.
1483  *
1484  *  @param[in]     poly   expanded square free univariate polynomial
1485  *  @param[in]     x      symbol
1486  *  @param[in,out] prime  prime number to start trying modular factorization with,
1487  *                        output value is the prime number actually used
1488  */
1489 static ex factor_univariate(const ex& poly, const ex& x, unsigned int& prime)
1490 {
1491         ex unit, cont, prim_ex;
1492         poly.unitcontprim(x, unit, cont, prim_ex);
1493         upoly prim;
1494         upoly_from_ex(prim, prim_ex, x);
1495
1496         // determine proper prime and minimize number of modular factors
1497         prime = 3;
1498         unsigned int lastp = prime;
1499         cl_modint_ring R;
1500         unsigned int trials = 0;
1501         unsigned int minfactors = 0;
1502
1503         const numeric& cont_n = ex_to<numeric>(cont);
1504         cl_I i_cont;
1505         if (cont_n.is_integer()) {
1506                 i_cont = the<cl_I>(cont_n.to_cl_N());
1507         } else {
1508                 // poly \in Q[x] => poly = q ipoly, ipoly \in Z[x], q \in Q
1509                 // factor(poly) \equiv q factor(ipoly)
1510                 i_cont = cl_I(1);
1511         }
1512         cl_I lc = lcoeff(prim)*i_cont;
1513         upvec factors;
1514         while ( trials < 2 ) {
1515                 umodpoly modpoly;
1516                 while ( true ) {
1517                         prime = next_prime(prime);
1518                         if ( !zerop(rem(lc, prime)) ) {
1519                                 R = find_modint_ring(prime);
1520                                 umodpoly_from_upoly(modpoly, prim, R);
1521                                 if ( squarefree(modpoly) ) break;
1522                         }
1523                 }
1524
1525                 // do modular factorization
1526                 upvec trialfactors;
1527                 factor_modular(modpoly, trialfactors);
1528                 if ( trialfactors.size() <= 1 ) {
1529                         // irreducible for sure
1530                         return poly;
1531                 }
1532
1533                 if ( minfactors == 0 || trialfactors.size() < minfactors ) {
1534                         factors = trialfactors;
1535                         minfactors = trialfactors.size();
1536                         lastp = prime;
1537                         trials = 1;
1538                 } else {
1539                         ++trials;
1540                 }
1541         }
1542         prime = lastp;
1543         R = find_modint_ring(prime);
1544
1545         // lift all factor combinations
1546         stack<ModFactors> tocheck;
1547         ModFactors mf;
1548         mf.poly = prim;
1549         mf.factors = factors;
1550         tocheck.push(mf);
1551         upoly f1, f2;
1552         ex result = 1;
1553         while ( tocheck.size() ) {
1554                 const size_t n = tocheck.top().factors.size();
1555                 factor_partition part(tocheck.top().factors);
1556                 while ( true ) {
1557                         // call Hensel lifting
1558                         hensel_univar(tocheck.top().poly, prime, part.left(), part.right(), f1, f2);
1559                         if ( !f1.empty() ) {
1560                                 // successful, update the stack and the result
1561                                 if ( part.size_left() == 1 ) {
1562                                         if ( part.size_right() == 1 ) {
1563                                                 result *= upoly_to_ex(f1, x) * upoly_to_ex(f2, x);
1564                                                 tocheck.pop();
1565                                                 break;
1566                                         }
1567                                         result *= upoly_to_ex(f1, x);
1568                                         tocheck.top().poly = f2;
1569                                         for ( size_t i=0; i<n; ++i ) {
1570                                                 if ( part[i] == 0 ) {
1571                                                         tocheck.top().factors.erase(tocheck.top().factors.begin()+i);
1572                                                         break;
1573                                                 }
1574                                         }
1575                                         break;
1576                                 }
1577                                 else if ( part.size_right() == 1 ) {
1578                                         if ( part.size_left() == 1 ) {
1579                                                 result *= upoly_to_ex(f1, x) * upoly_to_ex(f2, x);
1580                                                 tocheck.pop();
1581                                                 break;
1582                                         }
1583                                         result *= upoly_to_ex(f2, x);
1584                                         tocheck.top().poly = f1;
1585                                         for ( size_t i=0; i<n; ++i ) {
1586                                                 if ( part[i] == 1 ) {
1587                                                         tocheck.top().factors.erase(tocheck.top().factors.begin()+i);
1588                                                         break;
1589                                                 }
1590                                         }
1591                                         break;
1592                                 } else {
1593                                         upvec newfactors1(part.size_left()), newfactors2(part.size_right());
1594                                         auto i1 = newfactors1.begin(), i2 = newfactors2.begin();
1595                                         for ( size_t i=0; i<n; ++i ) {
1596                                                 if ( part[i] ) {
1597                                                         *i2++ = tocheck.top().factors[i];
1598                                                 } else {
1599                                                         *i1++ = tocheck.top().factors[i];
1600                                                 }
1601                                         }
1602                                         tocheck.top().factors = newfactors1;
1603                                         tocheck.top().poly = f1;
1604                                         ModFactors mf;
1605                                         mf.factors = newfactors2;
1606                                         mf.poly = f2;
1607                                         tocheck.push(mf);
1608                                         break;
1609                                 }
1610                         } else {
1611                                 // not successful
1612                                 if ( !part.next() ) {
1613                                         // if no more combinations left, return polynomial as
1614                                         // irreducible
1615                                         result *= upoly_to_ex(tocheck.top().poly, x);
1616                                         tocheck.pop();
1617                                         break;
1618                                 }
1619                         }
1620                 }
1621         }
1622
1623         return unit * cont * result;
1624 }
1625
1626 /** Second interface to factor_univariate() to be used if the information about
1627  *  the prime is not needed.
1628  */
1629 static inline ex factor_univariate(const ex& poly, const ex& x)
1630 {
1631         unsigned int prime;
1632         return factor_univariate(poly, x, prime);
1633 }
1634
1635 /** Represents an evaluation point (<symbol>==<integer>).
1636  */
1637 struct EvalPoint
1638 {
1639         ex x;
1640         int evalpoint;
1641 };
1642
1643 #ifdef DEBUGFACTOR
1644 ostream& operator<<(ostream& o, const vector<EvalPoint>& v)
1645 {
1646         for ( size_t i=0; i<v.size(); ++i ) {
1647                 o << "(" << v[i].x << "==" << v[i].evalpoint << ") ";
1648         }
1649         return o;
1650 }
1651 #endif // def DEBUGFACTOR
1652
1653 // forward declaration
1654 static vector<ex> multivar_diophant(const vector<ex>& a_, const ex& x, const ex& c, const vector<EvalPoint>& I, unsigned int d, unsigned int p, unsigned int k);
1655
1656 /** Utility function for multivariate Hensel lifting.
1657  *
1658  *  Solves the equation
1659  *    s_1*b_1 + ... + s_r*b_r == 1 mod p^k
1660  *  with deg(s_i) < deg(a_i)
1661  *  and with given b_1 = a_1 * ... * a_{i-1} * a_{i+1} * ... * a_r
1662  *
1663  *  The implementation follows the algorithm in chapter 6 of [GCL].
1664  *
1665  *  @param[in]  a   vector of modular univariate polynomials
1666  *  @param[in]  x   symbol
1667  *  @param[in]  p   prime number
1668  *  @param[in]  k   p^k is modulus
1669  *  @return         vector of polynomials (s_i)
1670  */
1671 static upvec multiterm_eea_lift(const upvec& a, const ex& x, unsigned int p, unsigned int k)
1672 {
1673         const size_t r = a.size();
1674         cl_modint_ring R = find_modint_ring(expt_pos(cl_I(p),k));
1675         upvec q(r-1);
1676         q[r-2] = a[r-1];
1677         for ( size_t j=r-2; j>=1; --j ) {
1678                 q[j-1] = a[j] * q[j];
1679         }
1680         umodpoly beta(1, R->one());
1681         upvec s;
1682         for ( size_t j=1; j<r; ++j ) {
1683                 vector<ex> mdarg(2);
1684                 mdarg[0] = umodpoly_to_ex(q[j-1], x);
1685                 mdarg[1] = umodpoly_to_ex(a[j-1], x);
1686                 vector<EvalPoint> empty;
1687                 vector<ex> exsigma = multivar_diophant(mdarg, x, umodpoly_to_ex(beta, x), empty, 0, p, k);
1688                 umodpoly sigma1;
1689                 umodpoly_from_ex(sigma1, exsigma[0], x, R);
1690                 umodpoly sigma2;
1691                 umodpoly_from_ex(sigma2, exsigma[1], x, R);
1692                 beta = sigma1;
1693                 s.push_back(sigma2);
1694         }
1695         s.push_back(beta);
1696         return s;
1697 }
1698
1699 /** Changes the modulus of a modular polynomial. Used by eea_lift().
1700  *
1701  *  @param[in]     R  new modular ring
1702  *  @param[in,out] a  polynomial to change (in situ)
1703  */
1704 static void change_modulus(const cl_modint_ring& R, umodpoly& a)
1705 {
1706         if ( a.empty() ) return;
1707         cl_modint_ring oldR = a[0].ring();
1708         for (auto & i : a) {
1709                 i = R->canonhom(oldR->retract(i));
1710         }
1711         canonicalize(a);
1712 }
1713
1714 /** Utility function for multivariate Hensel lifting.
1715  *
1716  *  Solves  s*a + t*b == 1 mod p^k  given a,b.
1717  *
1718  *  The implementation follows the algorithm in chapter 6 of [GCL].
1719  *
1720  *  @param[in]  a   polynomial
1721  *  @param[in]  b   polynomial
1722  *  @param[in]  x   symbol
1723  *  @param[in]  p   prime number
1724  *  @param[in]  k   p^k is modulus
1725  *  @param[out] s_  output polynomial
1726  *  @param[out] t_  output polynomial
1727  */
1728 static void eea_lift(const umodpoly& a, const umodpoly& b, const ex& x, unsigned int p, unsigned int k, umodpoly& s_, umodpoly& t_)
1729 {
1730         cl_modint_ring R = find_modint_ring(p);
1731         umodpoly amod = a;
1732         change_modulus(R, amod);
1733         umodpoly bmod = b;
1734         change_modulus(R, bmod);
1735
1736         umodpoly smod;
1737         umodpoly tmod;
1738         exteuclid(amod, bmod, smod, tmod);
1739
1740         cl_modint_ring Rpk = find_modint_ring(expt_pos(cl_I(p),k));
1741         umodpoly s = smod;
1742         change_modulus(Rpk, s);
1743         umodpoly t = tmod;
1744         change_modulus(Rpk, t);
1745
1746         cl_I modulus(p);
1747         umodpoly one(1, Rpk->one());
1748         for ( size_t j=1; j<k; ++j ) {
1749                 umodpoly e = one - a * s - b * t;
1750                 reduce_coeff(e, modulus);
1751                 umodpoly c = e;
1752                 change_modulus(R, c);
1753                 umodpoly sigmabar = smod * c;
1754                 umodpoly taubar = tmod * c;
1755                 umodpoly sigma, q;
1756                 remdiv(sigmabar, bmod, sigma, q);
1757                 umodpoly tau = taubar + q * amod;
1758                 umodpoly sadd = sigma;
1759                 change_modulus(Rpk, sadd);
1760                 cl_MI modmodulus(Rpk, modulus);
1761                 s = s + sadd * modmodulus;
1762                 umodpoly tadd = tau;
1763                 change_modulus(Rpk, tadd);
1764                 t = t + tadd * modmodulus;
1765                 modulus = modulus * p;
1766         }
1767
1768         s_ = s; t_ = t;
1769 }
1770
1771 /** Utility function for multivariate Hensel lifting.
1772  *
1773  *  Solves the equation
1774  *    s_1*b_1 + ... + s_r*b_r == x^m mod p^k
1775  *  with given b_1 = a_1 * ... * a_{i-1} * a_{i+1} * ... * a_r
1776  *
1777  *  The implementation follows the algorithm in chapter 6 of [GCL].
1778  *
1779  *  @param a  vector with univariate polynomials mod p^k
1780  *  @param x  symbol
1781  *  @param m  exponent of x^m in the equation to solve
1782  *  @param p  prime number
1783  *  @param k  p^k is modulus
1784  *  @return   vector of polynomials (s_i)
1785  */
1786 static upvec univar_diophant(const upvec& a, const ex& x, unsigned int m, unsigned int p, unsigned int k)
1787 {
1788         cl_modint_ring R = find_modint_ring(expt_pos(cl_I(p),k));
1789
1790         const size_t r = a.size();
1791         upvec result;
1792         if ( r > 2 ) {
1793                 upvec s = multiterm_eea_lift(a, x, p, k);
1794                 for ( size_t j=0; j<r; ++j ) {
1795                         umodpoly bmod = umodpoly_to_umodpoly(s[j], R, m);
1796                         umodpoly buf;
1797                         rem(bmod, a[j], buf);
1798                         result.push_back(buf);
1799                 }
1800         } else {
1801                 umodpoly s, t;
1802                 eea_lift(a[1], a[0], x, p, k, s, t);
1803                 umodpoly bmod = umodpoly_to_umodpoly(s, R, m);
1804                 umodpoly buf, q;
1805                 remdiv(bmod, a[0], buf, q);
1806                 result.push_back(buf);
1807                 umodpoly t1mod = umodpoly_to_umodpoly(t, R, m);
1808                 buf = t1mod + q * a[1];
1809                 result.push_back(buf);
1810         }
1811
1812         return result;
1813 }
1814
1815 /** Map used by function make_modular().
1816  *  Finds every coefficient in a polynomial and replaces it by is value in the
1817  *  given modular ring R (symmetric representation).
1818  */
1819 struct make_modular_map : public map_function {
1820         cl_modint_ring R;
1821         make_modular_map(const cl_modint_ring& R_) : R(R_) { }
1822         ex operator()(const ex& e) override
1823         {
1824                 if ( is_a<add>(e) || is_a<mul>(e) ) {
1825                         return e.map(*this);
1826                 }
1827                 else if ( is_a<numeric>(e) ) {
1828                         numeric mod(R->modulus);
1829                         numeric halfmod = (mod-1)/2;
1830                         cl_MI emod = R->canonhom(the<cl_I>(ex_to<numeric>(e).to_cl_N()));
1831                         numeric n(R->retract(emod));
1832                         if ( n > halfmod ) {
1833                                 return n-mod;
1834                         } else {
1835                                 return n;
1836                         }
1837                 }
1838                 return e;
1839         }
1840 };
1841
1842 /** Helps mimicking modular multivariate polynomial arithmetic.
1843  *
1844  *  @param e  expression of which to make the coefficients equal to their value
1845  *            in the modular ring R (symmetric representation)
1846  *  @param R  modular ring
1847  *  @return   resulting expression
1848  */
1849 static ex make_modular(const ex& e, const cl_modint_ring& R)
1850 {
1851         make_modular_map map(R);
1852         return map(e.expand());
1853 }
1854
1855 /** Utility function for multivariate Hensel lifting.
1856  *
1857  *  Returns the polynomials s_i that fulfill
1858  *    s_1*b_1 + ... + s_r*b_r == c mod <I^(d+1),p^k>
1859  *  with given b_1 = a_1 * ... * a_{i-1} * a_{i+1} * ... * a_r
1860  *
1861  *  The implementation follows the algorithm in chapter 6 of [GCL].
1862  *
1863  *  @param a_  vector of multivariate factors mod p^k
1864  *  @param x   symbol (equiv. x_1 in [GCL])
1865  *  @param c   polynomial mod p^k
1866  *  @param I   vector of evaluation points
1867  *  @param d   maximum total degree of result
1868  *  @param p   prime number
1869  *  @param k   p^k is modulus
1870  *  @return    vector of polynomials (s_i)
1871  */
1872 static vector<ex> multivar_diophant(const vector<ex>& a_, const ex& x, const ex& c, const vector<EvalPoint>& I,
1873                                     unsigned int d, unsigned int p, unsigned int k)
1874 {
1875         vector<ex> a = a_;
1876
1877         const cl_modint_ring R = find_modint_ring(expt_pos(cl_I(p),k));
1878         const size_t r = a.size();
1879         const size_t nu = I.size() + 1;
1880
1881         vector<ex> sigma;
1882         if ( nu > 1 ) {
1883                 ex xnu = I.back().x;
1884                 int alphanu = I.back().evalpoint;
1885
1886                 ex A = 1;
1887                 for ( size_t i=0; i<r; ++i ) {
1888                         A *= a[i];
1889                 }
1890                 vector<ex> b(r);
1891                 for ( size_t i=0; i<r; ++i ) {
1892                         b[i] = normal(A / a[i]);
1893                 }
1894
1895                 vector<ex> anew = a;
1896                 for ( size_t i=0; i<r; ++i ) {
1897                         anew[i] = anew[i].subs(xnu == alphanu);
1898                 }
1899                 ex cnew = c.subs(xnu == alphanu);
1900                 vector<EvalPoint> Inew = I;
1901                 Inew.pop_back();
1902                 sigma = multivar_diophant(anew, x, cnew, Inew, d, p, k);
1903
1904                 ex buf = c;
1905                 for ( size_t i=0; i<r; ++i ) {
1906                         buf -= sigma[i] * b[i];
1907                 }
1908                 ex e = make_modular(buf, R);
1909
1910                 ex monomial = 1;
1911                 for ( size_t m=1; !e.is_zero() && e.has(xnu) && m<=d; ++m ) {
1912                         monomial *= (xnu - alphanu);
1913                         monomial = expand(monomial);
1914                         ex cm = e.diff(ex_to<symbol>(xnu), m).subs(xnu==alphanu) / factorial(m);
1915                         cm = make_modular(cm, R);
1916                         if ( !cm.is_zero() ) {
1917                                 vector<ex> delta_s = multivar_diophant(anew, x, cm, Inew, d, p, k);
1918                                 ex buf = e;
1919                                 for ( size_t j=0; j<delta_s.size(); ++j ) {
1920                                         delta_s[j] *= monomial;
1921                                         sigma[j] += delta_s[j];
1922                                         buf -= delta_s[j] * b[j];
1923                                 }
1924                                 e = make_modular(buf, R);
1925                         }
1926                 }
1927         } else {
1928                 upvec amod;
1929                 for ( size_t i=0; i<a.size(); ++i ) {
1930                         umodpoly up;
1931                         umodpoly_from_ex(up, a[i], x, R);
1932                         amod.push_back(up);
1933                 }
1934
1935                 sigma.insert(sigma.begin(), r, 0);
1936                 size_t nterms;
1937                 ex z;
1938                 if ( is_a<add>(c) ) {
1939                         nterms = c.nops();
1940                         z = c.op(0);
1941                 } else {
1942                         nterms = 1;
1943                         z = c;
1944                 }
1945                 for ( size_t i=0; i<nterms; ++i ) {
1946                         int m = z.degree(x);
1947                         cl_I cm = the<cl_I>(ex_to<numeric>(z.lcoeff(x)).to_cl_N());
1948                         upvec delta_s = univar_diophant(amod, x, m, p, k);
1949                         cl_MI modcm;
1950                         cl_I poscm = cm;
1951                         while ( poscm < 0 ) {
1952                                 poscm = poscm + expt_pos(cl_I(p),k);
1953                         }
1954                         modcm = cl_MI(R, poscm);
1955                         for ( size_t j=0; j<delta_s.size(); ++j ) {
1956                                 delta_s[j] = delta_s[j] * modcm;
1957                                 sigma[j] = sigma[j] + umodpoly_to_ex(delta_s[j], x);
1958                         }
1959                         if ( nterms > 1 ) {
1960                                 z = c.op(i+1);
1961                         }
1962                 }
1963         }
1964
1965         for ( size_t i=0; i<sigma.size(); ++i ) {
1966                 sigma[i] = make_modular(sigma[i], R);
1967         }
1968
1969         return sigma;
1970 }
1971
1972 /** Multivariate Hensel lifting.
1973  *  The implementation follows the algorithm in chapter 6 of [GCL].
1974  *  Since we don't have a data type for modular multivariate polynomials, the
1975  *  respective operations are done in a GiNaC::ex and the function
1976  *  make_modular() is then called to make the coefficient modular p^l.
1977  *
1978  *  @param a    multivariate polynomial primitive in x
1979  *  @param x    symbol (equiv. x_1 in [GCL])
1980  *  @param I    vector of evaluation points (x_2==a_2,x_3==a_3,...)
1981  *  @param p    prime number (should not divide lcoeff(a mod I))
1982  *  @param l    p^l is the modulus of the lifted univariate field
1983  *  @param u    vector of modular (mod p^l) factors of a mod I
1984  *  @param lcU  correct leading coefficient of the univariate factors of a mod I
1985  *  @return     list GiNaC::lst with lifted factors (multivariate factors of a),
1986  *              empty if Hensel lifting did not succeed
1987  */
1988 static ex hensel_multivar(const ex& a, const ex& x, const vector<EvalPoint>& I,
1989                           unsigned int p, const cl_I& l, const upvec& u, const vector<ex>& lcU)
1990 {
1991         const size_t nu = I.size() + 1;
1992         const cl_modint_ring R = find_modint_ring(expt_pos(cl_I(p),l));
1993
1994         vector<ex> A(nu);
1995         A[nu-1] = a;
1996
1997         for ( size_t j=nu; j>=2; --j ) {
1998                 ex x = I[j-2].x;
1999                 int alpha = I[j-2].evalpoint;
2000                 A[j-2] = A[j-1].subs(x==alpha);
2001                 A[j-2] = make_modular(A[j-2], R);
2002         }
2003
2004         int maxdeg = a.degree(I.front().x);
2005         for ( size_t i=1; i<I.size(); ++i ) {
2006                 int maxdeg2 = a.degree(I[i].x);
2007                 if ( maxdeg2 > maxdeg ) maxdeg = maxdeg2;
2008         }
2009
2010         const size_t n = u.size();
2011         vector<ex> U(n);
2012         for ( size_t i=0; i<n; ++i ) {
2013                 U[i] = umodpoly_to_ex(u[i], x);
2014         }
2015
2016         for ( size_t j=2; j<=nu; ++j ) {
2017                 vector<ex> U1 = U;
2018                 ex monomial = 1;
2019                 for ( size_t m=0; m<n; ++m) {
2020                         if ( lcU[m] != 1 ) {
2021                                 ex coef = lcU[m];
2022                                 for ( size_t i=j-1; i<nu-1; ++i ) {
2023                                         coef = coef.subs(I[i].x == I[i].evalpoint);
2024                                 }
2025                                 coef = make_modular(coef, R);
2026                                 int deg = U[m].degree(x);
2027                                 U[m] = U[m] - U[m].lcoeff(x) * pow(x,deg) + coef * pow(x,deg);
2028                         }
2029                 }
2030                 ex Uprod = 1;
2031                 for ( size_t i=0; i<n; ++i ) {
2032                         Uprod *= U[i];
2033                 }
2034                 ex e = expand(A[j-1] - Uprod);
2035
2036                 vector<EvalPoint> newI;
2037                 for ( size_t i=1; i<=j-2; ++i ) {
2038                         newI.push_back(I[i-1]);
2039                 }
2040
2041                 ex xj = I[j-2].x;
2042                 int alphaj = I[j-2].evalpoint;
2043                 size_t deg = A[j-1].degree(xj);
2044                 for ( size_t k=1; k<=deg; ++k ) {
2045                         if ( !e.is_zero() ) {
2046                                 monomial *= (xj - alphaj);
2047                                 monomial = expand(monomial);
2048                                 ex dif = e.diff(ex_to<symbol>(xj), k);
2049                                 ex c = dif.subs(xj==alphaj) / factorial(k);
2050                                 if ( !c.is_zero() ) {
2051                                         vector<ex> deltaU = multivar_diophant(U1, x, c, newI, maxdeg, p, cl_I_to_uint(l));
2052                                         for ( size_t i=0; i<n; ++i ) {
2053                                                 deltaU[i] *= monomial;
2054                                                 U[i] += deltaU[i];
2055                                                 U[i] = make_modular(U[i], R);
2056                                         }
2057                                         ex Uprod = 1;
2058                                         for ( size_t i=0; i<n; ++i ) {
2059                                                 Uprod *= U[i];
2060                                         }
2061                                         e = A[j-1] - Uprod;
2062                                         e = make_modular(e, R);
2063                                 }
2064                         }
2065                 }
2066         }
2067
2068         ex acand = 1;
2069         for ( size_t i=0; i<U.size(); ++i ) {
2070                 acand *= U[i];
2071         }
2072         if ( expand(a-acand).is_zero() ) {
2073                 lst res;
2074                 for ( size_t i=0; i<U.size(); ++i ) {
2075                         res.append(U[i]);
2076                 }
2077                 return res;
2078         } else {
2079                 lst res;
2080                 return lst{};
2081         }
2082 }
2083
2084 /** Takes a factorized expression and puts the factors in a lst. The exponents
2085  *  of the factors are discarded, e.g. 7*x^2*(y+1)^4 --> {7,x,y+1}. The first
2086  *  element of the list is always the numeric coefficient.
2087  */
2088 static ex put_factors_into_lst(const ex& e)
2089 {
2090         lst result;
2091         if ( is_a<numeric>(e) ) {
2092                 result.append(e);
2093                 return result;
2094         }
2095         if ( is_a<power>(e) ) {
2096                 result.append(1);
2097                 result.append(e.op(0));
2098                 return result;
2099         }
2100         if ( is_a<symbol>(e) || is_a<add>(e) ) {
2101                 ex icont(e.integer_content());
2102                 result.append(icont);
2103                 result.append(e/icont);
2104                 return result;
2105         }
2106         if ( is_a<mul>(e) ) {
2107                 ex nfac = 1;
2108                 for ( size_t i=0; i<e.nops(); ++i ) {
2109                         ex op = e.op(i);
2110                         if ( is_a<numeric>(op) ) {
2111                                 nfac = op;
2112                         }
2113                         if ( is_a<power>(op) ) {
2114                                 result.append(op.op(0));
2115                         }
2116                         if ( is_a<symbol>(op) || is_a<add>(op) ) {
2117                                 result.append(op);
2118                         }
2119                 }
2120                 result.prepend(nfac);
2121                 return result;
2122         }
2123         throw runtime_error("put_factors_into_lst: bad term.");
2124 }
2125
2126 /** Checks a set of numbers for whether each number has a unique prime factor.
2127  *
2128  *  @param[in]  f  list of numbers to check
2129  *  @return        true: if number set is bad, false: if set is okay (has unique
2130  *                 prime factors)
2131  */
2132 static bool checkdivisors(const lst& f)
2133 {
2134         const int k = f.nops();
2135         numeric q, r;
2136         vector<numeric> d(k);
2137         d[0] = ex_to<numeric>(abs(f.op(0)));
2138         for ( int i=1; i<k; ++i ) {
2139                 q = ex_to<numeric>(abs(f.op(i)));
2140                 for ( int j=i-1; j>=0; --j ) {
2141                         r = d[j];
2142                         do {
2143                                 r = gcd(r, q);
2144                                 q = q/r;
2145                         } while ( r != 1 );
2146                         if ( q == 1 ) {
2147                                 return true;
2148                         }
2149                 }
2150                 d[i] = q;
2151         }
2152         return false;
2153 }
2154
2155 /** Generates a set of evaluation points for a multivariate polynomial.
2156  *  The set fulfills the following conditions:
2157  *  1. lcoeff(evaluated_polynomial) does not vanish
2158  *  2. factors of lcoeff(evaluated_polynomial) have each a unique prime factor
2159  *  3. evaluated_polynomial is square free
2160  *  See [Wan] for more details.
2161  *
2162  *  @param[in]     u        multivariate polynomial to be factored
2163  *  @param[in]     vn       leading coefficient of u in x (x==first symbol in syms)
2164  *  @param[in]     syms     set of symbols that appear in u
2165  *  @param[in]     f        lst containing the factors of the leading coefficient vn
2166  *  @param[in,out] modulus  integer modulus for random number generation (i.e. |a_i| < modulus)
2167  *  @param[out]    u0       returns the evaluated (univariate) polynomial
2168  *  @param[out]    a        returns the valid evaluation points. must have initial size equal
2169  *                          number of symbols-1 before calling generate_set
2170  */
2171 static void generate_set(const ex& u, const ex& vn, const exset& syms, const lst& f,
2172                          numeric& modulus, ex& u0, vector<numeric>& a)
2173 {
2174         const ex& x = *syms.begin();
2175         while ( true ) {
2176                 ++modulus;
2177                 // generate a set of integers ...
2178                 u0 = u;
2179                 ex vna = vn;
2180                 ex vnatry;
2181                 exset::const_iterator s = syms.begin();
2182                 ++s;
2183                 for ( size_t i=0; i<a.size(); ++i ) {
2184                         do {
2185                                 a[i] = mod(numeric(rand()), 2*modulus) - modulus;
2186                                 vnatry = vna.subs(*s == a[i]);
2187                                 // ... for which the leading coefficient doesn't vanish ...
2188                         } while ( vnatry == 0 );
2189                         vna = vnatry;
2190                         u0 = u0.subs(*s == a[i]);
2191                         ++s;
2192                 }
2193                 // ... for which u0 is square free ...
2194                 ex g = gcd(u0, u0.diff(ex_to<symbol>(x)));
2195                 if ( !is_a<numeric>(g) ) {
2196                         continue;
2197                 }
2198                 if ( !is_a<numeric>(vn) ) {
2199                         // ... and for which the evaluated factors have each an unique prime factor
2200                         lst fnum = f;
2201                         fnum.let_op(0) = fnum.op(0) * u0.content(x);
2202                         for ( size_t i=1; i<fnum.nops(); ++i ) {
2203                                 if ( !is_a<numeric>(fnum.op(i)) ) {
2204                                         s = syms.begin();
2205                                         ++s;
2206                                         for ( size_t j=0; j<a.size(); ++j, ++s ) {
2207                                                 fnum.let_op(i) = fnum.op(i).subs(*s == a[j]);
2208                                         }
2209                                 }
2210                         }
2211                         if ( checkdivisors(fnum) ) {
2212                                 continue;
2213                         }
2214                 }
2215                 // ok, we have a valid set now
2216                 return;
2217         }
2218 }
2219
2220 // forward declaration
2221 static ex factor_sqrfree(const ex& poly);
2222
2223 /** Multivariate factorization.
2224  *
2225  *  The implementation is based on the algorithm described in [Wan].
2226  *  An evaluation homomorphism (a set of integers) is determined that fulfills
2227  *  certain criteria. The evaluated polynomial is univariate and is factorized
2228  *  by factor_univariate(). The main work then is to find the correct leading
2229  *  coefficients of the univariate factors. They have to correspond to the
2230  *  factors of the (multivariate) leading coefficient of the input polynomial
2231  *  (as defined for a specific variable x). After that the Hensel lifting can be
2232  *  performed.
2233  *
2234  *  @param[in] poly  expanded, square free polynomial
2235  *  @param[in] syms  contains the symbols in the polynomial
2236  *  @return          factorized polynomial
2237  */
2238 static ex factor_multivariate(const ex& poly, const exset& syms)
2239 {
2240         exset::const_iterator s;
2241         const ex& x = *syms.begin();
2242
2243         // make polynomial primitive
2244         ex unit, cont, pp;
2245         poly.unitcontprim(x, unit, cont, pp);
2246         if ( !is_a<numeric>(cont) ) {
2247                 return factor_sqrfree(cont) * factor_sqrfree(pp);
2248         }
2249
2250         // factor leading coefficient
2251         ex vn = pp.collect(x).lcoeff(x);
2252         ex vnlst;
2253         if ( is_a<numeric>(vn) ) {
2254                 vnlst = lst{vn};
2255         }
2256         else {
2257                 ex vnfactors = factor(vn);
2258                 vnlst = put_factors_into_lst(vnfactors);
2259         }
2260
2261         const unsigned int maxtrials = 3;
2262         numeric modulus = (vnlst.nops() > 3) ? vnlst.nops() : 3;
2263         vector<numeric> a(syms.size()-1, 0);
2264
2265         // try now to factorize until we are successful
2266         while ( true ) {
2267
2268                 unsigned int trialcount = 0;
2269                 unsigned int prime;
2270                 int factor_count = 0;
2271                 int min_factor_count = -1;
2272                 ex u, delta;
2273                 ex ufac, ufaclst;
2274
2275                 // try several evaluation points to reduce the number of factors
2276                 while ( trialcount < maxtrials ) {
2277
2278                         // generate a set of valid evaluation points
2279                         generate_set(pp, vn, syms, ex_to<lst>(vnlst), modulus, u, a);
2280
2281                         ufac = factor_univariate(u, x, prime);
2282                         ufaclst = put_factors_into_lst(ufac);
2283                         factor_count = ufaclst.nops()-1;
2284                         delta = ufaclst.op(0);
2285
2286                         if ( factor_count <= 1 ) {
2287                                 // irreducible
2288                                 return poly;
2289                         }
2290                         if ( min_factor_count < 0 ) {
2291                                 // first time here
2292                                 min_factor_count = factor_count;
2293                         }
2294                         else if ( min_factor_count == factor_count ) {
2295                                 // one less to try
2296                                 ++trialcount;
2297                         }
2298                         else if ( min_factor_count > factor_count ) {
2299                                 // new minimum, reset trial counter
2300                                 min_factor_count = factor_count;
2301                                 trialcount = 0;
2302                         }
2303                 }
2304
2305                 // determine true leading coefficients for the Hensel lifting
2306                 vector<ex> C(factor_count);
2307                 if ( is_a<numeric>(vn) ) {
2308                         // easy case
2309                         for ( size_t i=1; i<ufaclst.nops(); ++i ) {
2310                                 C[i-1] = ufaclst.op(i).lcoeff(x);
2311                         }
2312                 } else {
2313                         // difficult case.
2314                         // we use the property of the ftilde having a unique prime factor.
2315                         // details can be found in [Wan].
2316                         // calculate ftilde
2317                         vector<numeric> ftilde(vnlst.nops()-1);
2318                         for ( size_t i=0; i<ftilde.size(); ++i ) {
2319                                 ex ft = vnlst.op(i+1);
2320                                 s = syms.begin();
2321                                 ++s;
2322                                 for ( size_t j=0; j<a.size(); ++j ) {
2323                                         ft = ft.subs(*s == a[j]);
2324                                         ++s;
2325                                 }
2326                                 ftilde[i] = ex_to<numeric>(ft);
2327                         }
2328                         // calculate D and C
2329                         vector<bool> used_flag(ftilde.size(), false);
2330                         vector<ex> D(factor_count, 1);
2331                         if ( delta == 1 ) {
2332                                 for ( int i=0; i<factor_count; ++i ) {
2333                                         numeric prefac = ex_to<numeric>(ufaclst.op(i+1).lcoeff(x));
2334                                         for ( int j=ftilde.size()-1; j>=0; --j ) {
2335                                                 int count = 0;
2336                                                 while ( irem(prefac, ftilde[j]) == 0 ) {
2337                                                         prefac = iquo(prefac, ftilde[j]);
2338                                                         ++count;
2339                                                 }
2340                                                 if ( count ) {
2341                                                         used_flag[j] = true;
2342                                                         D[i] = D[i] * pow(vnlst.op(j+1), count);
2343                                                 }
2344                                         }
2345                                         C[i] = D[i] * prefac;
2346                                 }
2347                         } else {
2348                                 for ( int i=0; i<factor_count; ++i ) {
2349                                         numeric prefac = ex_to<numeric>(ufaclst.op(i+1).lcoeff(x));
2350                                         for ( int j=ftilde.size()-1; j>=0; --j ) {
2351                                                 int count = 0;
2352                                                 while ( irem(prefac, ftilde[j]) == 0 ) {
2353                                                         prefac = iquo(prefac, ftilde[j]);
2354                                                         ++count;
2355                                                 }
2356                                                 while ( irem(ex_to<numeric>(delta)*prefac, ftilde[j]) == 0 ) {
2357                                                         numeric g = gcd(prefac, ex_to<numeric>(ftilde[j]));
2358                                                         prefac = iquo(prefac, g);
2359                                                         delta = delta / (ftilde[j]/g);
2360                                                         ufaclst.let_op(i+1) = ufaclst.op(i+1) * (ftilde[j]/g);
2361                                                         ++count;
2362                                                 }
2363                                                 if ( count ) {
2364                                                         used_flag[j] = true;
2365                                                         D[i] = D[i] * pow(vnlst.op(j+1), count);
2366                                                 }
2367                                         }
2368                                         C[i] = D[i] * prefac;
2369                                 }
2370                         }
2371                         // check if something went wrong
2372                         bool some_factor_unused = false;
2373                         for ( size_t i=0; i<used_flag.size(); ++i ) {
2374                                 if ( !used_flag[i] ) {
2375                                         some_factor_unused = true;
2376                                         break;
2377                                 }
2378                         }
2379                         if ( some_factor_unused ) {
2380                                 continue;
2381                         }
2382                 }
2383
2384                 // multiply the remaining content of the univariate polynomial into the
2385                 // first factor
2386                 if ( delta != 1 ) {
2387                         C[0] = C[0] * delta;
2388                         ufaclst.let_op(1) = ufaclst.op(1) * delta;
2389                 }
2390
2391                 // set up evaluation points
2392                 EvalPoint ep;
2393                 vector<EvalPoint> epv;
2394                 s = syms.begin();
2395                 ++s;
2396                 for ( size_t i=0; i<a.size(); ++i ) {
2397                         ep.x = *s++;
2398                         ep.evalpoint = a[i].to_int();
2399                         epv.push_back(ep);
2400                 }
2401
2402                 // calc bound p^l
2403                 int maxdeg = 0;
2404                 for ( int i=1; i<=factor_count; ++i ) {
2405                         if ( ufaclst.op(i).degree(x) > maxdeg ) {
2406                                 maxdeg = ufaclst[i].degree(x);
2407                         }
2408                 }
2409                 cl_I B = 2*calc_bound(u, x, maxdeg);
2410                 cl_I l = 1;
2411                 cl_I pl = prime;
2412                 while ( pl < B ) {
2413                         l = l + 1;
2414                         pl = pl * prime;
2415                 }
2416
2417                 // set up modular factors (mod p^l)
2418                 cl_modint_ring R = find_modint_ring(expt_pos(cl_I(prime),l));
2419                 upvec modfactors(ufaclst.nops()-1);
2420                 for ( size_t i=1; i<ufaclst.nops(); ++i ) {
2421                         umodpoly_from_ex(modfactors[i-1], ufaclst.op(i), x, R);
2422                 }
2423
2424                 // try Hensel lifting
2425                 ex res = hensel_multivar(pp, x, epv, prime, l, modfactors, C);
2426                 if ( res != lst{} ) {
2427                         ex result = cont * unit;
2428                         for ( size_t i=0; i<res.nops(); ++i ) {
2429                                 result *= res.op(i).content(x) * res.op(i).unit(x);
2430                                 result *= res.op(i).primpart(x);
2431                         }
2432                         return result;
2433                 }
2434         }
2435 }
2436
2437 /** Finds all symbols in an expression. Used by factor_sqrfree() and factor().
2438  */
2439 struct find_symbols_map : public map_function {
2440         exset syms;
2441         ex operator()(const ex& e) override
2442         {
2443                 if ( is_a<symbol>(e) ) {
2444                         syms.insert(e);
2445                         return e;
2446                 }
2447                 return e.map(*this);
2448         }
2449 };
2450
2451 /** Factorizes a polynomial that is square free. It calls either the univariate
2452  *  or the multivariate factorization functions.
2453  */
2454 static ex factor_sqrfree(const ex& poly)
2455 {
2456         // determine all symbols in poly
2457         find_symbols_map findsymbols;
2458         findsymbols(poly);
2459         if ( findsymbols.syms.size() == 0 ) {
2460                 return poly;
2461         }
2462
2463         if ( findsymbols.syms.size() == 1 ) {
2464                 // univariate case
2465                 const ex& x = *(findsymbols.syms.begin());
2466                 if ( poly.ldegree(x) > 0 ) {
2467                         // pull out direct factors
2468                         int ld = poly.ldegree(x);
2469                         ex res = factor_univariate(expand(poly/pow(x, ld)), x);
2470                         return res * pow(x,ld);
2471                 } else {
2472                         ex res = factor_univariate(poly, x);
2473                         return res;
2474                 }
2475         }
2476
2477         // multivariate case
2478         ex res = factor_multivariate(poly, findsymbols.syms);
2479         return res;
2480 }
2481
2482 /** Map used by factor() when factor_options::all is given to access all
2483  *  subexpressions and to call factor() on them.
2484  */
2485 struct apply_factor_map : public map_function {
2486         unsigned options;
2487         apply_factor_map(unsigned options_) : options(options_) { }
2488         ex operator()(const ex& e) override
2489         {
2490                 if ( e.info(info_flags::polynomial) ) {
2491                         return factor(e, options);
2492                 }
2493                 if ( is_a<add>(e) ) {
2494                         ex s1, s2;
2495                         for ( size_t i=0; i<e.nops(); ++i ) {
2496                                 if ( e.op(i).info(info_flags::polynomial) ) {
2497                                         s1 += e.op(i);
2498                                 } else {
2499                                         s2 += e.op(i);
2500                                 }
2501                         }
2502                         return factor(s1, options) + s2.map(*this);
2503                 }
2504                 return e.map(*this);
2505         }
2506 };
2507
2508 } // anonymous namespace
2509
2510 /** Interface function to the outside world. It checks the arguments, tries a
2511  *  square free factorization, and then calls factor_sqrfree to do the hard
2512  *  work.
2513  */
2514 ex factor(const ex& poly, unsigned options)
2515 {
2516         // check arguments
2517         if ( !poly.info(info_flags::polynomial) ) {
2518                 if ( options & factor_options::all ) {
2519                         options &= ~factor_options::all;
2520                         apply_factor_map factor_map(options);
2521                         return factor_map(poly);
2522                 }
2523                 return poly;
2524         }
2525
2526         // determine all symbols in poly
2527         find_symbols_map findsymbols;
2528         findsymbols(poly);
2529         if ( findsymbols.syms.size() == 0 ) {
2530                 return poly;
2531         }
2532         lst syms;
2533         for (auto & i : findsymbols.syms ) {
2534                 syms.append(i);
2535         }
2536
2537         // make poly square free
2538         ex sfpoly = sqrfree(poly.expand(), syms);
2539
2540         // factorize the square free components
2541         if ( is_a<power>(sfpoly) ) {
2542                 // case: (polynomial)^exponent
2543                 const ex& base = sfpoly.op(0);
2544                 if ( !is_a<add>(base) ) {
2545                         // simple case: (monomial)^exponent
2546                         return sfpoly;
2547                 }
2548                 ex f = factor_sqrfree(base);
2549                 return pow(f, sfpoly.op(1));
2550         }
2551         if ( is_a<mul>(sfpoly) ) {
2552                 // case: multiple factors
2553                 ex res = 1;
2554                 for ( size_t i=0; i<sfpoly.nops(); ++i ) {
2555                         const ex& t = sfpoly.op(i);
2556                         if ( is_a<power>(t) ) {
2557                                 const ex& base = t.op(0);
2558                                 if ( !is_a<add>(base) ) {
2559                                         res *= t;
2560                                 } else {
2561                                         ex f = factor_sqrfree(base);
2562                                         res *= pow(f, t.op(1));
2563                                 }
2564                         } else if ( is_a<add>(t) ) {
2565                                 ex f = factor_sqrfree(t);
2566                                 res *= f;
2567                         } else {
2568                                 res *= t;
2569                         }
2570                 }
2571                 return res;
2572         }
2573         if ( is_a<symbol>(sfpoly) ) {
2574                 return poly;
2575         }
2576         // case: (polynomial)
2577         ex f = factor_sqrfree(sfpoly);
2578         return f;
2579 }
2580
2581 } // namespace GiNaC
2582
2583 #ifdef DEBUGFACTOR
2584 #include "test.h"
2585 #endif