ginac/factor.cpp

   1 /** @file factor.cpp
   2  *
   3  *  Polynomial factorization (implementation).
   4  *
   5  *  The interface function factor() at the end of this file is defined in the
   6  *  GiNaC namespace. All other utility functions and classes are defined in an
   7  *  additional anonymous namespace.
   8  *
   9  *  Factorization starts by doing a square free factorization and making the
  10  *  coefficients integer. Then, depending on the number of free variables it
  11  *  proceeds either in dedicated univariate or multivariate factorization code.
  12  *
  13  *  Univariate factorization does a modular factorization via Berlekamp's
  14  *  algorithm and distinct degree factorization. Hensel lifting is used at the
  15  *  end.
  16  *
  17  *  Multivariate factorization uses the univariate factorization (applying a
  18  *  evaluation homomorphism first) and Hensel lifting raises the answer to the
  19  *  multivariate domain. The Hensel lifting code is completely distinct from the
  20  *  code used by the univariate factorization.
  21  *
  22  *  Algorithms used can be found in
  23  *    [Wan] An Improved Multivariate Polynomial Factoring Algorithm,
  24  *          P.S.Wang,
  25  *          Mathematics of Computation, Vol. 32, No. 144 (1978) 1215--1231.
  26  *    [GCL] Algorithms for Computer Algebra,
  27  *          K.O.Geddes, S.R.Czapor, G.Labahn,
  28  *          Springer Verlag, 1992.
  29  *    [Mig] Some Useful Bounds,
  30  *          M.Mignotte,
  31  *          In "Computer Algebra, Symbolic and Algebraic Computation" (B.Buchberger et al., eds.),
  32  *          pp. 259-263, Springer-Verlag, New York, 1982.
  33  */
  34
  35 /*
  36  *  GiNaC Copyright (C) 1999-2023 Johannes Gutenberg University Mainz, Germany
  37  *
  38  *  This program is free software; you can redistribute it and/or modify
  39  *  it under the terms of the GNU General Public License as published by
  40  *  the Free Software Foundation; either version 2 of the License, or
  41  *  (at your option) any later version.
  42  *
  43  *  This program is distributed in the hope that it will be useful,
  44  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  45  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  46  *  GNU General Public License for more details.
  47  *
  48  *  You should have received a copy of the GNU General Public License
  49  *  along with this program; if not, write to the Free Software
  50  *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
  51  */
  52
  53 //#define DEBUGFACTOR
  54
  55 #include "factor.h"
  56
  57 #include "ex.h"
  58 #include "numeric.h"
  59 #include "operators.h"
  60 #include "inifcns.h"
  61 #include "symbol.h"
  62 #include "relational.h"
  63 #include "power.h"
  64 #include "mul.h"
  65 #include "normal.h"
  66 #include "add.h"
  67
  68 #include <type_traits>
  69 #include <algorithm>
  70 #include <limits>
  71 #include <list>
  72 #include <vector>
  73 #include <stack>
  74 #ifdef DEBUGFACTOR
  75 #include <ostream>
  76 #endif
  77 using namespace std;
  78
  79 #include <cln/cln.h>
  80 using namespace cln;
  81
  82 namespace GiNaC {
  83
  84 // anonymous namespace to hide all utility functions
  85 namespace {
  86
  87 #ifdef DEBUGFACTOR
  88 #define DCOUT(str) cout << #str << endl
  89 #define DCOUTVAR(var) cout << #var << ": " << var << endl
  90 #define DCOUT2(str,var) cout << #str << ": " << var << endl
  91 ostream& operator<<(ostream& o, const vector<int>& v)
  92 {
  93         auto i = v.begin(), end = v.end();
  94         while ( i != end ) {
  95                 o << *i << " ";
  96                 ++i;
  97         }
  98         return o;
  99 }
 100 static ostream& operator<<(ostream& o, const vector<cl_I>& v)
 101 {
 102         auto i = v.begin(), end = v.end();
 103         while ( i != end ) {
 104                 o << *i << "[" << i-v.begin() << "]" << " ";
 105                 ++i;
 106         }
 107         return o;
 108 }
 109 static ostream& operator<<(ostream& o, const vector<cl_MI>& v)
 110 {
 111         auto i = v.begin(), end = v.end();
 112         while ( i != end ) {
 113                 o << *i << "[" << i-v.begin() << "]" << " ";
 114                 ++i;
 115         }
 116         return o;
 117 }
 118 ostream& operator<<(ostream& o, const vector<numeric>& v)
 119 {
 120         for ( size_t i=0; i<v.size(); ++i ) {
 121                 o << v[i] << " ";
 122         }
 123         return o;
 124 }
 125 ostream& operator<<(ostream& o, const vector<vector<cl_MI>>& v)
 126 {
 127         auto i = v.begin(), end = v.end();
 128         while ( i != end ) {
 129                 o << i-v.begin() << ": " << *i << endl;
 130                 ++i;
 131         }
 132         return o;
 133 }
 134 #else
 135 #define DCOUT(str)
 136 #define DCOUTVAR(var)
 137 #define DCOUT2(str,var)
 138 #endif // def DEBUGFACTOR
 139
 140 ////////////////////////////////////////////////////////////////////////////////
 141 // modular univariate polynomial code
 142
 143 typedef std::vector<cln::cl_MI> umodpoly;
 144 typedef std::vector<cln::cl_I> upoly;
 145 typedef vector<umodpoly> upvec;
 146
 147
 148 // COPY FROM UPOLY.H
 149
 150 // CHANGED size_t -> int !!!
 151 template<typename T> static int degree(const T& p)
 152 {
 153         return p.size() - 1;
 154 }
 155
 156 template<typename T> static typename T::value_type lcoeff(const T& p)
 157 {
 158         return p[p.size() - 1];
 159 }
 160
 161 /** Make the polynomial unit normal (having unit normal leading coefficient).
 162  *
 163  *  @param[in, out] a  polynomial to make unit normal
 164  *  @return            true if polynomial a was already unit normal, false otherwise
 165  */
 166 static bool normalize_in_field(umodpoly& a)
 167 {
 168         if (a.size() == 0)
 169                 return true;
 170         if ( lcoeff(a) == a[0].ring()->one() ) {
 171                 return true;
 172         }
 173
 174         const cln::cl_MI lc_1 = recip(lcoeff(a));
 175         for (std::size_t k = a.size(); k-- != 0; )
 176                 a[k] = a[k]*lc_1;
 177         return false;
 178 }
 179
 180 /** Remove leading zero coefficients from polynomial.
 181  *
 182  *  @param[in, out] p     polynomial from which the zero leading coefficients will be removed
 183  *  @param[in]      hint  assume all coefficients of order ≥ hint are zero
 184  */
 185 template<typename T> static void
 186 canonicalize(T& p, const typename T::size_type hint = std::numeric_limits<typename T::size_type>::max())
 187 {
 188         std::size_t i = min(p.size(), hint);
 189
 190         while ( i-- && zerop(p[i]) ) { }
 191
 192         p.erase(p.begin() + i + 1, p.end());
 193 }
 194
 195 // END COPY FROM UPOLY.H
 196
 197 template<typename T> struct uvar_poly_p
 198 {
 199         static const bool value = false;
 200 };
 201
 202 template<> struct uvar_poly_p<upoly>
 203 {
 204         static const bool value = true;
 205 };
 206
 207 template<> struct uvar_poly_p<umodpoly>
 208 {
 209         static const bool value = true;
 210 };
 211
 212 template<typename T>
 213 // Don't define this for anything but univariate polynomials.
 214 static typename enable_if<uvar_poly_p<T>::value, T>::type
 215 operator+(const T& a, const T& b)
 216 {
 217         int sa = a.size();
 218         int sb = b.size();
 219         if ( sa >= sb ) {
 220                 T r(sa);
 221                 int i = 0;
 222                 for ( ; i<sb; ++i ) {
 223                         r[i] = a[i] + b[i];
 224                 }
 225                 for ( ; i<sa; ++i ) {
 226                         r[i] = a[i];
 227                 }
 228                 canonicalize(r);
 229                 return r;
 230         }
 231         else {
 232                 T r(sb);
 233                 int i = 0;
 234                 for ( ; i<sa; ++i ) {
 235                         r[i] = a[i] + b[i];
 236                 }
 237                 for ( ; i<sb; ++i ) {
 238                         r[i] = b[i];
 239                 }
 240                 canonicalize(r);
 241                 return r;
 242         }
 243 }
 244
 245 template<typename T>
 246 // Don't define this for anything but univariate polynomials. Otherwise
 247 // overload resolution might fail (this actually happens when compiling
 248 // GiNaC with g++ 3.4).
 249 static typename enable_if<uvar_poly_p<T>::value, T>::type
 250 operator-(const T& a, const T& b)
 251 {
 252         int sa = a.size();
 253         int sb = b.size();
 254         if ( sa >= sb ) {
 255                 T r(sa);
 256                 int i = 0;
 257                 for ( ; i<sb; ++i ) {
 258                         r[i] = a[i] - b[i];
 259                 }
 260                 for ( ; i<sa; ++i ) {
 261                         r[i] = a[i];
 262                 }
 263                 canonicalize(r);
 264                 return r;
 265         }
 266         else {
 267                 T r(sb);
 268                 int i = 0;
 269                 for ( ; i<sa; ++i ) {
 270                         r[i] = a[i] - b[i];
 271                 }
 272                 for ( ; i<sb; ++i ) {
 273                         r[i] = -b[i];
 274                 }
 275                 canonicalize(r);
 276                 return r;
 277         }
 278 }
 279
 280 static upoly operator*(const upoly& a, const upoly& b)
 281 {
 282         upoly c;
 283         if ( a.empty() || b.empty() ) return c;
 284
 285         int n = degree(a) + degree(b);
 286         c.resize(n+1, 0);
 287         for ( int i=0 ; i<=n; ++i ) {
 288                 for ( int j=0 ; j<=i; ++j ) {
 289                         if ( j > degree(a) || (i-j) > degree(b) ) continue;
 290                         c[i] = c[i] + a[j] * b[i-j];
 291                 }
 292         }
 293         canonicalize(c);
 294         return c;
 295 }
 296
 297 static umodpoly operator*(const umodpoly& a, const umodpoly& b)
 298 {
 299         umodpoly c;
 300         if ( a.empty() || b.empty() ) return c;
 301
 302         int n = degree(a) + degree(b);
 303         c.resize(n+1, a[0].ring()->zero());
 304         for ( int i=0 ; i<=n; ++i ) {
 305                 for ( int j=0 ; j<=i; ++j ) {
 306                         if ( j > degree(a) || (i-j) > degree(b) ) continue;
 307                         c[i] = c[i] + a[j] * b[i-j];
 308                 }
 309         }
 310         canonicalize(c);
 311         return c;
 312 }
 313
 314 static upoly operator*(const upoly& a, const cl_I& x)
 315 {
 316         if ( zerop(x) ) {
 317                 upoly r;
 318                 return r;
 319         }
 320         upoly r(a.size());
 321         for ( size_t i=0; i<a.size(); ++i ) {
 322                 r[i] = a[i] * x;
 323         }
 324         return r;
 325 }
 326
 327 static upoly operator/(const upoly& a, const cl_I& x)
 328 {
 329         if ( zerop(x) ) {
 330                 upoly r;
 331                 return r;
 332         }
 333         upoly r(a.size());
 334         for ( size_t i=0; i<a.size(); ++i ) {
 335                 r[i] = exquo(a[i],x);
 336         }
 337         return r;
 338 }
 339
 340 static umodpoly operator*(const umodpoly& a, const cl_MI& x)
 341 {
 342         umodpoly r(a.size());
 343         for ( size_t i=0; i<a.size(); ++i ) {
 344                 r[i] = a[i] * x;
 345         }
 346         canonicalize(r);
 347         return r;
 348 }
 349
 350 static void upoly_from_ex(upoly& up, const ex& e, const ex& x)
 351 {
 352         // assert: e is in Z[x]
 353         int deg = e.degree(x);
 354         up.resize(deg+1);
 355         int ldeg = e.ldegree(x);
 356         for ( ; deg>=ldeg; --deg ) {
 357                 up[deg] = the<cl_I>(ex_to<numeric>(e.coeff(x, deg)).to_cl_N());
 358         }
 359         for ( ; deg>=0; --deg ) {
 360                 up[deg] = 0;
 361         }
 362         canonicalize(up);
 363 }
 364
 365 static void umodpoly_from_upoly(umodpoly& ump, const upoly& e, const cl_modint_ring& R)
 366 {
 367         int deg = degree(e);
 368         ump.resize(deg+1);
 369         for ( ; deg>=0; --deg ) {
 370                 ump[deg] = R->canonhom(e[deg]);
 371         }
 372         canonicalize(ump);
 373 }
 374
 375 static void umodpoly_from_ex(umodpoly& ump, const ex& e, const ex& x, const cl_modint_ring& R)
 376 {
 377         // assert: e is in Z[x]
 378         int deg = e.degree(x);
 379         ump.resize(deg+1);
 380         int ldeg = e.ldegree(x);
 381         for ( ; deg>=ldeg; --deg ) {
 382                 cl_I coeff = the<cl_I>(ex_to<numeric>(e.coeff(x, deg)).to_cl_N());
 383                 ump[deg] = R->canonhom(coeff);
 384         }
 385         for ( ; deg>=0; --deg ) {
 386                 ump[deg] = R->zero();
 387         }
 388         canonicalize(ump);
 389 }
 390
 391 #ifdef DEBUGFACTOR
 392 static void umodpoly_from_ex(umodpoly& ump, const ex& e, const ex& x, const cl_I& modulus)
 393 {
 394         umodpoly_from_ex(ump, e, x, find_modint_ring(modulus));
 395 }
 396 #endif
 397
 398 static ex upoly_to_ex(const upoly& a, const ex& x)
 399 {
 400         if ( a.empty() ) return 0;
 401         ex e;
 402         for ( int i=degree(a); i>=0; --i ) {
 403                 e += numeric(a[i]) * pow(x, i);
 404         }
 405         return e;
 406 }
 407
 408 static ex umodpoly_to_ex(const umodpoly& a, const ex& x)
 409 {
 410         if ( a.empty() ) return 0;
 411         cl_modint_ring R = a[0].ring();
 412         cl_I mod = R->modulus;
 413         cl_I halfmod = (mod-1) >> 1;
 414         ex e;
 415         for ( int i=degree(a); i>=0; --i ) {
 416                 cl_I n = R->retract(a[i]);
 417                 if ( n > halfmod ) {
 418                         e += numeric(n-mod) * pow(x, i);
 419                 } else {
 420                         e += numeric(n) * pow(x, i);
 421                 }
 422         }
 423         return e;
 424 }
 425
 426 static upoly umodpoly_to_upoly(const umodpoly& a)
 427 {
 428         upoly e(a.size());
 429         if ( a.empty() ) return e;
 430         cl_modint_ring R = a[0].ring();
 431         cl_I mod = R->modulus;
 432         cl_I halfmod = (mod-1) >> 1;
 433         for ( int i=degree(a); i>=0; --i ) {
 434                 cl_I n = R->retract(a[i]);
 435                 if ( n > halfmod ) {
 436                         e[i] = n-mod;
 437                 } else {
 438                         e[i] = n;
 439                 }
 440         }
 441         return e;
 442 }
 443
 444 static umodpoly umodpoly_to_umodpoly(const umodpoly& a, const cl_modint_ring& R, unsigned int m)
 445 {
 446         umodpoly e;
 447         if ( a.empty() ) return e;
 448         cl_modint_ring oldR = a[0].ring();
 449         size_t sa = a.size();
 450         e.resize(sa+m, R->zero());
 451         for ( size_t i=0; i<sa; ++i ) {
 452                 e[i+m] = R->canonhom(oldR->retract(a[i]));
 453         }
 454         canonicalize(e);
 455         return e;
 456 }
 457
 458 /** Divides all coefficients of the polynomial a by the positive integer x.
 459  *  All coefficients are supposed to be divisible by x. If they are not, the
 460  *  division will raise an exception.
 461  *
 462  *  @param[in,out] a  polynomial of which the coefficients will be reduced by x
 463  *  @param[in]     x  positive integer that divides the coefficients
 464  */
 465 static void reduce_coeff(umodpoly& a, const cl_I& x)
 466 {
 467         if ( a.empty() ) return;
 468
 469         cl_modint_ring R = a[0].ring();
 470         for (auto & i : a) {
 471                 // cln cannot perform this division in the modular field
 472                 cl_I c = R->retract(i);
 473                 i = cl_MI(R, exquopos(c, x));
 474         }
 475 }
 476
 477 /** Calculates remainder of a/b.
 478  *  Assertion: a and b not empty.
 479  *
 480  *  @param[in]  a  polynomial dividend
 481  *  @param[in]  b  polynomial divisor
 482  *  @param[out] r  polynomial remainder
 483  */
 484 static void rem(const umodpoly& a, const umodpoly& b, umodpoly& r)
 485 {
 486         int k, n;
 487         n = degree(b);
 488         k = degree(a) - n;
 489         r = a;
 490         if ( k < 0 ) return;
 491
 492         do {
 493                 cl_MI qk = div(r[n+k], b[n]);
 494                 if ( !zerop(qk) ) {
 495                         for ( int i=0; i<n; ++i ) {
 496                                 unsigned int j = n + k - 1 - i;
 497                                 r[j] = r[j] - qk * b[j-k];
 498                         }
 499                 }
 500         } while ( k-- );
 501
 502         fill(r.begin()+n, r.end(), a[0].ring()->zero());
 503         canonicalize(r, n);
 504 }
 505
 506 /** Calculates quotient of a/b.
 507  *  Assertion: a and b not empty.
 508  *
 509  *  @param[in]  a  polynomial dividend
 510  *  @param[in]  b  polynomial divisor
 511  *  @param[out] q  polynomial quotient
 512  */
 513 static void div(const umodpoly& a, const umodpoly& b, umodpoly& q)
 514 {
 515         int k, n;
 516         n = degree(b);
 517         k = degree(a) - n;
 518         q.clear();
 519         if ( k < 0 ) return;
 520
 521         umodpoly r = a;
 522         q.resize(k+1, a[0].ring()->zero());
 523         do {
 524                 cl_MI qk = div(r[n+k], b[n]);
 525                 if ( !zerop(qk) ) {
 526                         q[k] = qk;
 527                         for ( int i=0; i<n; ++i ) {
 528                                 unsigned int j = n + k - 1 - i;
 529                                 r[j] = r[j] - qk * b[j-k];
 530                         }
 531                 }
 532         } while ( k-- );
 533
 534         canonicalize(q);
 535 }
 536
 537 /** Calculates quotient and remainder of a/b.
 538  *  Assertion: a and b not empty.
 539  *
 540  *  @param[in]  a  polynomial dividend
 541  *  @param[in]  b  polynomial divisor
 542  *  @param[out] r  polynomial remainder
 543  *  @param[out] q  polynomial quotient
 544  */
 545 static void remdiv(const umodpoly& a, const umodpoly& b, umodpoly& r, umodpoly& q)
 546 {
 547         int k, n;
 548         n = degree(b);
 549         k = degree(a) - n;
 550         q.clear();
 551         r = a;
 552         if ( k < 0 ) return;
 553
 554         q.resize(k+1, a[0].ring()->zero());
 555         do {
 556                 cl_MI qk = div(r[n+k], b[n]);
 557                 if ( !zerop(qk) ) {
 558                         q[k] = qk;
 559                         for ( int i=0; i<n; ++i ) {
 560                                 unsigned int j = n + k - 1 - i;
 561                                 r[j] = r[j] - qk * b[j-k];
 562                         }
 563                 }
 564         } while ( k-- );
 565
 566         fill(r.begin()+n, r.end(), a[0].ring()->zero());
 567         canonicalize(r);
 568         canonicalize(q);
 569 }
 570
 571 /** Calculates the GCD of polynomial a and b.
 572  *
 573  *  @param[in]  a  polynomial
 574  *  @param[in]  b  polynomial
 575  *  @param[out] c  GCD
 576  */
 577 static void gcd(const umodpoly& a, const umodpoly& b, umodpoly& c)
 578 {
 579         if ( degree(a) < degree(b) ) return gcd(b, a, c);
 580
 581         c = a;
 582         normalize_in_field(c);
 583         umodpoly d = b;
 584         normalize_in_field(d);
 585         umodpoly r;
 586         while ( !d.empty() ) {
 587                 rem(c, d, r);
 588                 c = d;
 589                 d = r;
 590         }
 591         normalize_in_field(c);
 592 }
 593
 594 /** Calculates the derivative of the polynomial a.
 595  *
 596  *  @param[in]  a  polynomial of which to take the derivative
 597  *  @param[out] d  result/derivative
 598  */
 599 static void deriv(const umodpoly& a, umodpoly& d)
 600 {
 601         d.clear();
 602         if ( a.size() <= 1 ) return;
 603
 604         d.insert(d.begin(), a.begin()+1, a.end());
 605         int max = d.size();
 606         for ( int i=1; i<max; ++i ) {
 607                 d[i] = d[i] * (i+1);
 608         }
 609         canonicalize(d);
 610 }
 611
 612 static bool unequal_one(const umodpoly& a)
 613 {
 614         return ( a.size() != 1 || a[0] != a[0].ring()->one() );
 615 }
 616
 617 static bool equal_one(const umodpoly& a)
 618 {
 619         return ( a.size() == 1 && a[0] == a[0].ring()->one() );
 620 }
 621
 622 /** Returns true if polynomial a is square free.
 623  *
 624  *  @param[in] a  polynomial to check
 625  *  @return       true if polynomial is square free, false otherwise
 626  */
 627 static bool squarefree(const umodpoly& a)
 628 {
 629         umodpoly b;
 630         deriv(a, b);
 631         if ( b.empty() ) {
 632                 return false;
 633         }
 634         umodpoly c;
 635         gcd(a, b, c);
 636         return equal_one(c);
 637 }
 638
 639 /** Computes w^q mod a.
 640  *  Uses theorem 2.1 from A.K.Lenstra's PhD thesis; see exercise 8.13 in [GCL].
 641  *
 642  *  @param[in]  w  polynomial
 643  *  @param[in]  a  modulus polynomial
 644  *  @param[in]  q  common modulus of w and a
 645  *  @param[out] r  result
 646  */
 647 static void expt_pos_Q(const umodpoly& w, const umodpoly& a, unsigned int q, umodpoly& r)
 648 {
 649         if ( w.empty() ) return;
 650         cl_MI zero = w[0].ring()->zero();
 651         int deg = degree(w);
 652         umodpoly buf(deg*q+1, zero);
 653         for ( size_t i=0; i<=deg; ++i ) {
 654                 buf[i*q] = w[i];
 655         }
 656         rem(buf, a, r);
 657 }
 658
 659 // END modular univariate polynomial code
 660 ////////////////////////////////////////////////////////////////////////////////
 661
 662 ////////////////////////////////////////////////////////////////////////////////
 663 // modular matrix
 664
 665 typedef vector<cl_MI> mvec;
 666
 667 class modular_matrix
 668 {
 669 #ifdef DEBUGFACTOR
 670         friend ostream& operator<<(ostream& o, const modular_matrix& m);
 671 #endif
 672 public:
 673         modular_matrix(size_t r_, size_t c_, const cl_MI& init) : r(r_), c(c_)
 674         {
 675                 m.resize(c*r, init);
 676         }
 677         size_t rowsize() const { return r; }
 678         size_t colsize() const { return c; }
 679         cl_MI& operator()(size_t row, size_t col) { return m[row*c + col]; }
 680         cl_MI operator()(size_t row, size_t col) const { return m[row*c + col]; }
 681         void mul_col(size_t col, const cl_MI x)
 682         {
 683                 for ( size_t rc=0; rc<r; ++rc ) {
 684                         std::size_t i = c*rc + col;
 685                         m[i] = m[i] * x;
 686                 }
 687         }
 688         void sub_col(size_t col1, size_t col2, const cl_MI fac)
 689         {
 690                 for ( size_t rc=0; rc<r; ++rc ) {
 691                         std::size_t i1 = col1 + c*rc;
 692                         std::size_t i2 = col2 + c*rc;
 693                         m[i1] = m[i1] - m[i2]*fac;
 694                 }
 695         }
 696         void switch_col(size_t col1, size_t col2)
 697         {
 698                 for ( size_t rc=0; rc<r; ++rc ) {
 699                         std::size_t i1 = col1 + rc*c;
 700                         std::size_t i2 = col2 + rc*c;
 701                         std::swap(m[i1], m[i2]);
 702                 }
 703         }
 704         void mul_row(size_t row, const cl_MI x)
 705         {
 706                 for ( size_t cc=0; cc<c; ++cc ) {
 707                         std::size_t i = row*c + cc;
 708                         m[i] = m[i] * x;
 709                 }
 710         }
 711         void sub_row(size_t row1, size_t row2, const cl_MI fac)
 712         {
 713                 for ( size_t cc=0; cc<c; ++cc ) {
 714                         std::size_t i1 = row1*c + cc;
 715                         std::size_t i2 = row2*c + cc;
 716                         m[i1] = m[i1] - m[i2]*fac;
 717                 }
 718         }
 719         void switch_row(size_t row1, size_t row2)
 720         {
 721                 for ( size_t cc=0; cc<c; ++cc ) {
 722                         std::size_t i1 = row1*c + cc;
 723                         std::size_t i2 = row2*c + cc;
 724                         std::swap(m[i1], m[i2]);
 725                 }
 726         }
 727         bool is_col_zero(size_t col) const
 728         {
 729                 for ( size_t rr=0; rr<r; ++rr ) {
 730                         std::size_t i = col + rr*c;
 731                         if ( !zerop(m[i]) ) {
 732                                 return false;
 733                         }
 734                 }
 735                 return true;
 736         }
 737         bool is_row_zero(size_t row) const
 738         {
 739                 for ( size_t cc=0; cc<c; ++cc ) {
 740                         std::size_t i = row*c + cc;
 741                         if ( !zerop(m[i]) ) {
 742                                 return false;
 743                         }
 744                 }
 745                 return true;
 746         }
 747         void set_row(size_t row, const vector<cl_MI>& newrow)
 748         {
 749                 for (std::size_t i2 = 0; i2 < newrow.size(); ++i2) {
 750                         std::size_t i1 = row*c + i2;
 751                         m[i1] = newrow[i2];
 752                 }
 753         }
 754         mvec::const_iterator row_begin(size_t row) const { return m.begin()+row*c; }
 755         mvec::const_iterator row_end(size_t row) const { return m.begin()+row*c+r; }
 756 private:
 757         size_t r, c;
 758         mvec m;
 759 };
 760
 761 #ifdef DEBUGFACTOR
 762 modular_matrix operator*(const modular_matrix& m1, const modular_matrix& m2)
 763 {
 764         const unsigned int r = m1.rowsize();
 765         const unsigned int c = m2.colsize();
 766         modular_matrix o(r,c,m1(0,0));
 767
 768         for ( size_t i=0; i<r; ++i ) {
 769                 for ( size_t j=0; j<c; ++j ) {
 770                         cl_MI buf;
 771                         buf = m1(i,0) * m2(0,j);
 772                         for ( size_t k=1; k<c; ++k ) {
 773                                 buf = buf + m1(i,k)*m2(k,j);
 774                         }
 775                         o(i,j) = buf;
 776                 }
 777         }
 778         return o;
 779 }
 780
 781 ostream& operator<<(ostream& o, const modular_matrix& m)
 782 {
 783         cl_modint_ring R = m(0,0).ring();
 784         o << "{";
 785         for ( size_t i=0; i<m.rowsize(); ++i ) {
 786                 o << "{";
 787                 for ( size_t j=0; j<m.colsize()-1; ++j ) {
 788                         o << R->retract(m(i,j)) << ",";
 789                 }
 790                 o << R->retract(m(i,m.colsize()-1)) << "}";
 791                 if ( i != m.rowsize()-1 ) {
 792                         o << ",";
 793                 }
 794         }
 795         o << "}";
 796         return o;
 797 }
 798 #endif // def DEBUGFACTOR
 799
 800 // END modular matrix
 801 ////////////////////////////////////////////////////////////////////////////////
 802
 803 /** Calculates the Q matrix for a polynomial. Used by Berlekamp's algorithm.
 804  *
 805  *  The implementation follows algorithm 8.5 of [GCL].
 806  *
 807  *  @param[in]  a_  modular polynomial
 808  *  @param[out] Q   Q matrix
 809  */
 810 static void q_matrix(const umodpoly& a_, modular_matrix& Q)
 811 {
 812         umodpoly a = a_;
 813         normalize_in_field(a);
 814
 815         int n = degree(a);
 816         unsigned int q = cl_I_to_uint(a[0].ring()->modulus);
 817         umodpoly r(n, a[0].ring()->zero());
 818         r[0] = a[0].ring()->one();
 819         Q.set_row(0, r);
 820         unsigned int max = (n-1) * q;
 821         for ( size_t m=1; m<=max; ++m ) {
 822                 cl_MI rn_1 = r.back();
 823                 for ( size_t i=n-1; i>0; --i ) {
 824                         r[i] = r[i-1] - (rn_1 * a[i]);
 825                 }
 826                 r[0] = -rn_1 * a[0];
 827                 if ( (m % q) == 0 ) {
 828                         Q.set_row(m/q, r);
 829                 }
 830         }
 831 }
 832
 833 /** Determine the nullspace of a matrix M-1.
 834  *
 835  *  @param[in,out] M      matrix, will be modified
 836  *  @param[out]    basis  calculated nullspace of M-1
 837  */
 838 static void nullspace(modular_matrix& M, vector<mvec>& basis)
 839 {
 840         const size_t n = M.rowsize();
 841         const cl_MI one = M(0,0).ring()->one();
 842         for ( size_t i=0; i<n; ++i ) {
 843                 M(i,i) = M(i,i) - one;
 844         }
 845         for ( size_t r=0; r<n; ++r ) {
 846                 size_t cc = 0;
 847                 for ( ; cc<n; ++cc ) {
 848                         if ( !zerop(M(r,cc)) ) {
 849                                 if ( cc < r ) {
 850                                         if ( !zerop(M(cc,cc)) ) {
 851                                                 continue;
 852                                         }
 853                                         M.switch_col(cc, r);
 854                                 }
 855                                 else if ( cc > r ) {
 856                                         M.switch_col(cc, r);
 857                                 }
 858                                 break;
 859                         }
 860                 }
 861                 if ( cc < n ) {
 862                         M.mul_col(r, recip(M(r,r)));
 863                         for ( cc=0; cc<n; ++cc ) {
 864                                 if ( cc != r ) {
 865                                         M.sub_col(cc, r, M(r,cc));
 866                                 }
 867                         }
 868                 }
 869         }
 870
 871         for ( size_t i=0; i<n; ++i ) {
 872                 M(i,i) = M(i,i) - one;
 873         }
 874         for ( size_t i=0; i<n; ++i ) {
 875                 if ( !M.is_row_zero(i) ) {
 876                         mvec nu(M.row_begin(i), M.row_end(i));
 877                         basis.push_back(nu);
 878                 }
 879         }
 880 }
 881
 882 /** Berlekamp's modular factorization.
 883  *
 884  *  The implementation follows algorithm 8.4 of [GCL].
 885  *
 886  *  @param[in]  a    modular polynomial
 887  *  @param[out] upv  vector containing modular factors. if upv was not empty the
 888  *                   new elements are added at the end
 889  */
 890 static void berlekamp(const umodpoly& a, upvec& upv)
 891 {
 892         cl_modint_ring R = a[0].ring();
 893         umodpoly one(1, R->one());
 894
 895         // find nullspace of Q matrix
 896         modular_matrix Q(degree(a), degree(a), R->zero());
 897         q_matrix(a, Q);
 898         vector<mvec> nu;
 899         nullspace(Q, nu);
 900
 901         const unsigned int k = nu.size();
 902         if ( k == 1 ) {
 903                 // irreducible
 904                 return;
 905         }
 906
 907         list<umodpoly> factors = {a};
 908         unsigned int size = 1;
 909         unsigned int r = 1;
 910         unsigned int q = cl_I_to_uint(R->modulus);
 911
 912         list<umodpoly>::iterator u = factors.begin();
 913
 914         // calculate all gcd's
 915         while ( true ) {
 916                 for ( unsigned int s=0; s<q; ++s ) {
 917                         umodpoly nur = nu[r];
 918                         nur[0] = nur[0] - cl_MI(R, s);
 919                         canonicalize(nur);
 920                         umodpoly g;
 921                         gcd(nur, *u, g);
 922                         if ( unequal_one(g) && g != *u ) {
 923                                 umodpoly uo;
 924                                 div(*u, g, uo);
 925                                 if ( equal_one(uo) ) {
 926                                         throw logic_error("berlekamp: unexpected divisor.");
 927                                 } else {
 928                                         *u = uo;
 929                                 }
 930                                 factors.push_back(g);
 931                                 size = 0;
 932                                 for (auto & i : factors) {
 933                                         if (degree(i))
 934                                                 ++size;
 935                                 }
 936                                 if ( size == k ) {
 937                                         for (auto & i : factors) {
 938                                                 upv.push_back(i);
 939                                         }
 940                                         return;
 941                                 }
 942                         }
 943                 }
 944                 if ( ++r == k ) {
 945                         r = 1;
 946                         ++u;
 947                 }
 948         }
 949 }
 950
 951 // modular square free factorization is not used at the moment so we deactivate
 952 // the code
 953 #if 0
 954
 955 /** Calculates a^(1/prime).
 956  *
 957  *  @param[in]  a      polynomial
 958  *  @param[in]  prime  prime number -> exponent 1/prime
 959  *  @param[out] ap     resulting polynomial
 960  */
 961 static void expt_1_over_p(const umodpoly& a, unsigned int prime, umodpoly& ap)
 962 {
 963         size_t newdeg = degree(a)/prime;
 964         ap.resize(newdeg+1);
 965         ap[0] = a[0];
 966         for ( size_t i=1; i<=newdeg; ++i ) {
 967                 ap[i] = a[i*prime];
 968         }
 969 }
 970
 971 /** Modular square free factorization.
 972  *
 973  *  @param[in]  a        polynomial
 974  *  @param[out] factors  modular factors
 975  *  @param[out] mult     corresponding multiplicities (exponents)
 976  */
 977 static void modsqrfree(const umodpoly& a, upvec& factors, vector<int>& mult)
 978 {
 979         const unsigned int prime = cl_I_to_uint(a[0].ring()->modulus);
 980         int i = 1;
 981         umodpoly b;
 982         deriv(a, b);
 983         if ( b.size() ) {
 984                 umodpoly c;
 985                 gcd(a, b, c);
 986                 umodpoly w;
 987                 div(a, c, w);
 988                 while ( unequal_one(w) ) {
 989                         umodpoly y;
 990                         gcd(w, c, y);
 991                         umodpoly z;
 992                         div(w, y, z);
 993                         factors.push_back(z);
 994                         mult.push_back(i);
 995                         ++i;
 996                         w = y;
 997                         umodpoly buf;
 998                         div(c, y, buf);
 999                         c = buf;
1000                 }
1001                 if ( unequal_one(c) ) {
1002                         umodpoly cp;
1003                         expt_1_over_p(c, prime, cp);
1004                         size_t previ = mult.size();
1005                         modsqrfree(cp, factors, mult);
1006                         for ( size_t i=previ; i<mult.size(); ++i ) {
1007                                 mult[i] *= prime;
1008                         }
1009                 }
1010         } else {
1011                 umodpoly ap;
1012                 expt_1_over_p(a, prime, ap);
1013                 size_t previ = mult.size();
1014                 modsqrfree(ap, factors, mult);
1015                 for ( size_t i=previ; i<mult.size(); ++i ) {
1016                         mult[i] *= prime;
1017                 }
1018         }
1019 }
1020
1021 #endif // deactivation of square free factorization
1022
1023 /** Distinct degree factorization (DDF).
1024  *
1025  *  The implementation follows algorithm 8.8 of [GCL].
1026  *
1027  *  @param[in]  a_         modular polynomial
1028  *  @param[out] degrees    vector containing the degrees of the factors of the
1029  *                         corresponding polynomials in ddfactors.
1030  *  @param[out] ddfactors  vector containing polynomials which factors have the
1031  *                         degree given in degrees.
1032  */
1033 static void distinct_degree_factor(const umodpoly& a_, vector<int>& degrees, upvec& ddfactors)
1034 {
1035         umodpoly a = a_;
1036
1037         cl_modint_ring R = a[0].ring();
1038         int q = cl_I_to_int(R->modulus);
1039         int nhalf = degree(a)/2;
1040
1041         int i = 1;
1042         umodpoly w = {R->zero(), R->one()};
1043         umodpoly x = w;
1044
1045         while ( i <= nhalf ) {
1046                 umodpoly buf;
1047                 expt_pos_Q(w, a, q, buf);
1048                 w = buf;
1049                 gcd(a, w - x, buf);
1050                 if ( unequal_one(buf) ) {
1051                         degrees.push_back(i);
1052                         ddfactors.push_back(buf);
1053                         umodpoly buf2;
1054                         div(a, buf, buf2);
1055                         a = buf2;
1056                         nhalf = degree(a)/2;
1057                         rem(w, a, buf);
1058                         w = buf;
1059                 }
1060                 ++i;
1061         }
1062         if ( unequal_one(a) ) {
1063                 degrees.push_back(degree(a));
1064                 ddfactors.push_back(a);
1065         }
1066 }
1067
1068 /** Modular same degree factorization.
1069  *  Same degree factorization is a kind of misnomer. It performs distinct degree
1070  *  factorization, but instead of using the Cantor-Zassenhaus algorithm it
1071  *  (sub-optimally) uses Berlekamp's algorithm for the factors of the same
1072  *  degree.
1073  *
1074  *  @param[in]  a    modular polynomial
1075  *  @param[out] upv  vector containing modular factors. if upv was not empty the
1076  *                   new elements are added at the end
1077  */
1078 static void same_degree_factor(const umodpoly& a, upvec& upv)
1079 {
1080         cl_modint_ring R = a[0].ring();
1081
1082         vector<int> degrees;
1083         upvec ddfactors;
1084         distinct_degree_factor(a, degrees, ddfactors);
1085
1086         for ( size_t i=0; i<degrees.size(); ++i ) {
1087                 if ( degrees[i] == degree(ddfactors[i]) ) {
1088                         upv.push_back(ddfactors[i]);
1089                 } else {
1090                         berlekamp(ddfactors[i], upv);
1091                 }
1092         }
1093 }
1094
1095 // Yes, we can (choose).
1096 #define USE_SAME_DEGREE_FACTOR
1097
1098 /** Modular univariate factorization.
1099  *
1100  *  In principle, we have two algorithms at our disposal: Berlekamp's algorithm
1101  *  and same degree factorization (SDF). SDF seems to be slightly faster in
1102  *  almost all cases so it is activated as default.
1103  *
1104  *  @param[in]  p    modular polynomial
1105  *  @param[out] upv  vector containing modular factors. if upv was not empty the
1106  *                   new elements are added at the end
1107  */
1108 static void factor_modular(const umodpoly& p, upvec& upv)
1109 {
1110 #ifdef USE_SAME_DEGREE_FACTOR
1111         same_degree_factor(p, upv);
1112 #else
1113         berlekamp(p, upv);
1114 #endif
1115 }
1116
1117 /** Calculates modular polynomials s and t such that a*s+b*t==1.
1118  *  Assertion: a and b are relatively prime and not zero.
1119  *
1120  *  @param[in]  a  polynomial
1121  *  @param[in]  b  polynomial
1122  *  @param[out] s  polynomial
1123  *  @param[out] t  polynomial
1124  */
1125 static void exteuclid(const umodpoly& a, const umodpoly& b, umodpoly& s, umodpoly& t)
1126 {
1127         if ( degree(a) < degree(b) ) {
1128                 exteuclid(b, a, t, s);
1129                 return;
1130         }
1131
1132         umodpoly one(1, a[0].ring()->one());
1133         umodpoly c = a; normalize_in_field(c);
1134         umodpoly d = b; normalize_in_field(d);
1135         s = one;
1136         t.clear();
1137         umodpoly d1;
1138         umodpoly d2 = one;
1139         umodpoly q;
1140         while ( true ) {
1141                 div(c, d, q);
1142                 umodpoly r = c - q * d;
1143                 umodpoly r1 = s - q * d1;
1144                 umodpoly r2 = t - q * d2;
1145                 c = d;
1146                 s = d1;
1147                 t = d2;
1148                 if ( r.empty() ) break;
1149                 d = r;
1150                 d1 = r1;
1151                 d2 = r2;
1152         }
1153         cl_MI fac = recip(lcoeff(a) * lcoeff(c));
1154         for (auto & i : s) {
1155                 i = i * fac;
1156         }
1157         canonicalize(s);
1158         fac = recip(lcoeff(b) * lcoeff(c));
1159         for (auto & i : t) {
1160                 i = i * fac;
1161         }
1162         canonicalize(t);
1163 }
1164
1165 /** Replaces the leading coefficient in a polynomial by a given number.
1166  *
1167  *  @param[in] poly  polynomial to change
1168  *  @param[in] lc    new leading coefficient
1169  *  @return          changed polynomial
1170  */
1171 static upoly replace_lc(const upoly& poly, const cl_I& lc)
1172 {
1173         if ( poly.empty() ) return poly;
1174         upoly r = poly;
1175         r.back() = lc;
1176         return r;
1177 }
1178
1179 /** Calculates bound for the product of absolute values (modulus) of the roots.
1180  *  Uses Landau's inequality, see [Mig].
1181  */
1182 static inline cl_I calc_bound(const ex& a, const ex& x)
1183 {
1184         cl_R radicand = 0;
1185         for ( int i=a.degree(x); i>=a.ldegree(x); --i ) {
1186                 cl_I aa = abs(the<cl_I>(ex_to<numeric>(a.coeff(x, i)).to_cl_N()));
1187                 radicand = radicand + square(aa);
1188         }
1189         return ceiling1(the<cl_R>(cln::sqrt(radicand)));
1190 }
1191
1192 /** Calculates bound for the product of absolute values (modulus) of the roots.
1193  *  Uses Landau's inequality, see [Mig].
1194  */
1195 static inline cl_I calc_bound(const upoly& a)
1196 {
1197         cl_R radicand = 0;
1198         for ( int i=degree(a); i>=0; --i ) {
1199                 cl_I aa = abs(a[i]);
1200                 radicand = radicand + square(aa);
1201         }
1202         return ceiling1(the<cl_R>(cln::sqrt(radicand)));
1203 }
1204
1205 /** Hensel lifting as used by factor_univariate().
1206  *
1207  *  The implementation follows algorithm 6.1 of [GCL].
1208  *
1209  *  @param[in]  a_   primitive univariate polynomials
1210  *  @param[in]  p    prime number that does not divide lcoeff(a)
1211  *  @param[in]  u1_  modular factor of a (mod p)
1212  *  @param[in]  w1_  modular factor of a (mod p), relatively prime to u1_,
1213  *                   fulfilling  u1_*w1_ == a mod p
1214  *  @param[out] u    lifted factor
1215  *  @param[out] w    lifted factor, u*w = a
1216  */
1217 static void hensel_univar(const upoly& a_, unsigned int p, const umodpoly& u1_, const umodpoly& w1_, upoly& u, upoly& w)
1218 {
1219         upoly a = a_;
1220         const cl_modint_ring& R = u1_[0].ring();
1221
1222         // calc bound B
1223         int maxdeg = (degree(u1_) > degree(w1_)) ? degree(u1_) : degree(w1_);
1224         cl_I maxmodulus = ash(calc_bound(a), maxdeg+1);  // = 2 * calc_bound(a) * 2^maxdeg
1225
1226         // step 1
1227         cl_I alpha = lcoeff(a);
1228         a = a * alpha;
1229         umodpoly nu1 = u1_;
1230         normalize_in_field(nu1);
1231         umodpoly nw1 = w1_;
1232         normalize_in_field(nw1);
1233         upoly phi;
1234         phi = umodpoly_to_upoly(nu1) * alpha;
1235         umodpoly u1;
1236         umodpoly_from_upoly(u1, phi, R);
1237         phi = umodpoly_to_upoly(nw1) * alpha;
1238         umodpoly w1;
1239         umodpoly_from_upoly(w1, phi, R);
1240
1241         // step 2
1242         umodpoly s;
1243         umodpoly t;
1244         exteuclid(u1, w1, s, t);
1245
1246         // step 3
1247         u = replace_lc(umodpoly_to_upoly(u1), alpha);
1248         w = replace_lc(umodpoly_to_upoly(w1), alpha);
1249         upoly e = a - u * w;
1250         cl_I modulus = p;
1251
1252         // step 4
1253         while ( !e.empty() && modulus < maxmodulus ) {
1254                 upoly c = e / modulus;
1255                 phi = umodpoly_to_upoly(s) * c;
1256                 umodpoly sigmatilde;
1257                 umodpoly_from_upoly(sigmatilde, phi, R);
1258                 phi = umodpoly_to_upoly(t) * c;
1259                 umodpoly tautilde;
1260                 umodpoly_from_upoly(tautilde, phi, R);
1261                 umodpoly r, q;
1262                 remdiv(sigmatilde, w1, r, q);
1263                 umodpoly sigma = r;
1264                 phi = umodpoly_to_upoly(tautilde) + umodpoly_to_upoly(q) * umodpoly_to_upoly(u1);
1265                 umodpoly tau;
1266                 umodpoly_from_upoly(tau, phi, R);
1267                 u = u + umodpoly_to_upoly(tau) * modulus;
1268                 w = w + umodpoly_to_upoly(sigma) * modulus;
1269                 e = a - u * w;
1270                 modulus = modulus * p;
1271         }
1272
1273         // step 5
1274         if ( e.empty() ) {
1275                 cl_I g = u[0];
1276                 for ( size_t i=1; i<u.size(); ++i ) {
1277                         g = gcd(g, u[i]);
1278                         if ( g == 1 ) break;
1279                 }
1280                 if ( g != 1 ) {
1281                         u = u / g;
1282                         w = w * g;
1283                 }
1284                 if ( alpha != 1 ) {
1285                         w = w / alpha;
1286                 }
1287         } else {
1288                 u.clear();
1289         }
1290 }
1291
1292 /** Returns a new small prime number.
1293  *
1294  *  @param[in] n  an integer
1295  *  @return       smallest prime greater than n
1296  */
1297 static unsigned int next_prime(unsigned int n)
1298 {
1299         static vector<unsigned int> primes = {2, 3, 5, 7};
1300         unsigned int candidate = primes.back();
1301         while (primes.back() <= n) {
1302                 candidate += 2;
1303                 bool is_prime = true;
1304                 for (size_t i=1; primes[i]*primes[i]<=candidate; ++i) {
1305                         if (candidate % primes[i] == 0) {
1306                                 is_prime = false;
1307                                 break;
1308                         }
1309                 }
1310                 if (is_prime)
1311                         primes.push_back(candidate);
1312         }
1313         for (auto & it : primes) {
1314                 if ( it > n ) {
1315                         return it;
1316                 }
1317         }
1318         throw logic_error("next_prime: should not reach this point!");
1319 }
1320
1321 /** Manages the splitting of a vector of modular factors into two partitions.
1322  */
1323 class factor_partition
1324 {
1325 public:
1326         /** Takes the vector of modular factors and initializes the first partition */
1327         factor_partition(const upvec& factors_) : factors(factors_)
1328         {
1329                 n = factors.size();
1330                 k.resize(n, 0);
1331                 k[0] = 1;
1332                 cache.resize(n-1);
1333                 one.resize(1, factors.front()[0].ring()->one());
1334                 len = 1;
1335                 last = 0;
1336                 split();
1337         }
1338         int operator[](size_t i) const { return k[i]; }
1339         size_t size() const { return n; }
1340         size_t size_left() const { return n-len; }
1341         size_t size_right() const { return len; }
1342         /** Initializes the next partition.
1343             Returns true, if there is one, false otherwise. */
1344         bool next()
1345         {
1346                 if ( last == n-1 ) {
1347                         int rem = len - 1;
1348                         int p = last - 1;
1349                         while ( rem ) {
1350                                 if ( k[p] ) {
1351                                         --rem;
1352                                         --p;
1353                                         continue;
1354                                 }
1355                                 last = p - 1;
1356                                 while ( k[last] == 0 ) { --last; }
1357                                 if ( last == 0 && n == 2*len ) return false;
1358                                 k[last++] = 0;
1359                                 for ( size_t i=0; i<=len-rem; ++i ) {
1360                                         k[last] = 1;
1361                                         ++last;
1362                                 }
1363                                 fill(k.begin()+last, k.end(), 0);
1364                                 --last;
1365                                 split();
1366                                 return true;
1367                         }
1368                         last = len;
1369                         ++len;
1370                         if ( len > n/2 ) return false;
1371                         fill(k.begin(), k.begin()+len, 1);
1372                         fill(k.begin()+len+1, k.end(), 0);
1373                 } else {
1374                         k[last++] = 0;
1375                         k[last] = 1;
1376                 }
1377                 split();
1378                 return true;
1379         }
1380         /** Get first partition */
1381         umodpoly& left() { return lr[0]; }
1382         /** Get second partition */
1383         umodpoly& right() { return lr[1]; }
1384 private:
1385         void split_cached()
1386         {
1387                 size_t i = 0;
1388                 do {
1389                         size_t pos = i;
1390                         int group = k[i++];
1391                         size_t d = 0;
1392                         while ( i < n && k[i] == group ) { ++d; ++i; }
1393                         if ( d ) {
1394                                 if ( cache[pos].size() >= d ) {
1395                                         lr[group] = lr[group] * cache[pos][d-1];
1396                                 } else {
1397                                         if ( cache[pos].size() == 0 ) {
1398                                                 cache[pos].push_back(factors[pos] * factors[pos+1]);
1399                                         }
1400                                         size_t j = pos + cache[pos].size() + 1;
1401                                         d -= cache[pos].size();
1402                                         while ( d ) {
1403                                                 umodpoly buf = cache[pos].back() * factors[j];
1404                                                 cache[pos].push_back(buf);
1405                                                 --d;
1406                                                 ++j;
1407                                         }
1408                                         lr[group] = lr[group] * cache[pos].back();
1409                                 }
1410                         } else {
1411                                 lr[group] = lr[group] * factors[pos];
1412                         }
1413                 } while ( i < n );
1414         }
1415         void split()
1416         {
1417                 lr[0] = one;
1418                 lr[1] = one;
1419                 if ( n > 6 ) {
1420                         split_cached();
1421                 } else {
1422                         for ( size_t i=0; i<n; ++i ) {
1423                                 lr[k[i]] = lr[k[i]] * factors[i];
1424                         }
1425                 }
1426         }
1427 private:
1428         umodpoly lr[2];
1429         vector<vector<umodpoly>> cache;
1430         upvec factors;
1431         umodpoly one;
1432         size_t n;
1433         size_t len;
1434         size_t last;
1435         vector<int> k;
1436 };
1437
1438 /** Contains a pair of univariate polynomial and its modular factors.
1439  *  Used by factor_univariate().
1440  */
1441 struct ModFactors
1442 {
1443         upoly poly;
1444         upvec factors;
1445 };
1446
1447 /** Univariate polynomial factorization.
1448  *
1449  *  Modular factorization is tried for several primes to minimize the number of
1450  *  modular factors. Then, Hensel lifting is performed.
1451  *
1452  *  @param[in]     poly   expanded square free univariate polynomial
1453  *  @param[in]     x      symbol
1454  *  @param[in,out] prime  prime number to start trying modular factorization with,
1455  *                        output value is the prime number actually used
1456  */
1457 static ex factor_univariate(const ex& poly, const ex& x, unsigned int& prime)
1458 {
1459         ex unit, cont, prim_ex;
1460         poly.unitcontprim(x, unit, cont, prim_ex);
1461         upoly prim;
1462         upoly_from_ex(prim, prim_ex, x);
1463         if (prim_ex.is_equal(1)) {
1464                 return poly;
1465         }
1466
1467         // determine proper prime and minimize number of modular factors
1468         prime = 3;
1469         unsigned int lastp = prime;
1470         cl_modint_ring R;
1471         unsigned int trials = 0;
1472         unsigned int minfactors = 0;
1473
1474         const numeric& cont_n = ex_to<numeric>(cont);
1475         cl_I i_cont;
1476         if (cont_n.is_integer()) {
1477                 i_cont = the<cl_I>(cont_n.to_cl_N());
1478         } else {
1479                 // poly \in Q[x] => poly = q ipoly, ipoly \in Z[x], q \in Q
1480                 // factor(poly) \equiv q factor(ipoly)
1481                 i_cont = cl_I(1);
1482         }
1483         cl_I lc = lcoeff(prim)*i_cont;
1484         upvec factors;
1485         while ( trials < 2 ) {
1486                 umodpoly modpoly;
1487                 while ( true ) {
1488                         prime = next_prime(prime);
1489                         if ( !zerop(rem(lc, prime)) ) {
1490                                 R = find_modint_ring(prime);
1491                                 umodpoly_from_upoly(modpoly, prim, R);
1492                                 if ( squarefree(modpoly) ) break;
1493                         }
1494                 }
1495
1496                 // do modular factorization
1497                 upvec trialfactors;
1498                 factor_modular(modpoly, trialfactors);
1499                 if ( trialfactors.size() <= 1 ) {
1500                         // irreducible for sure
1501                         return poly;
1502                 }
1503
1504                 if ( minfactors == 0 || trialfactors.size() < minfactors ) {
1505                         factors = trialfactors;
1506                         minfactors = trialfactors.size();
1507                         lastp = prime;
1508                         trials = 1;
1509                 } else {
1510                         ++trials;
1511                 }
1512         }
1513         prime = lastp;
1514         R = find_modint_ring(prime);
1515
1516         // lift all factor combinations
1517         stack<ModFactors> tocheck;
1518         ModFactors mf;
1519         mf.poly = prim;
1520         mf.factors = factors;
1521         tocheck.push(mf);
1522         upoly f1, f2;
1523         ex result = 1;
1524         while ( tocheck.size() ) {
1525                 const size_t n = tocheck.top().factors.size();
1526                 factor_partition part(tocheck.top().factors);
1527                 while ( true ) {
1528                         // call Hensel lifting
1529                         hensel_univar(tocheck.top().poly, prime, part.left(), part.right(), f1, f2);
1530                         if ( !f1.empty() ) {
1531                                 // successful, update the stack and the result
1532                                 if ( part.size_left() == 1 ) {
1533                                         if ( part.size_right() == 1 ) {
1534                                                 result *= upoly_to_ex(f1, x) * upoly_to_ex(f2, x);
1535                                                 tocheck.pop();
1536                                                 break;
1537                                         }
1538                                         result *= upoly_to_ex(f1, x);
1539                                         tocheck.top().poly = f2;
1540                                         for ( size_t i=0; i<n; ++i ) {
1541                                                 if ( part[i] == 0 ) {
1542                                                         tocheck.top().factors.erase(tocheck.top().factors.begin()+i);
1543                                                         break;
1544                                                 }
1545                                         }
1546                                         break;
1547                                 }
1548                                 else if ( part.size_right() == 1 ) {
1549                                         if ( part.size_left() == 1 ) {
1550                                                 result *= upoly_to_ex(f1, x) * upoly_to_ex(f2, x);
1551                                                 tocheck.pop();
1552                                                 break;
1553                                         }
1554                                         result *= upoly_to_ex(f2, x);
1555                                         tocheck.top().poly = f1;
1556                                         for ( size_t i=0; i<n; ++i ) {
1557                                                 if ( part[i] == 1 ) {
1558                                                         tocheck.top().factors.erase(tocheck.top().factors.begin()+i);
1559                                                         break;
1560                                                 }
1561                                         }
1562                                         break;
1563                                 } else {
1564                                         upvec newfactors1(part.size_left()), newfactors2(part.size_right());
1565                                         auto i1 = newfactors1.begin(), i2 = newfactors2.begin();
1566                                         for ( size_t i=0; i<n; ++i ) {
1567                                                 if ( part[i] ) {
1568                                                         *i2++ = tocheck.top().factors[i];
1569                                                 } else {
1570                                                         *i1++ = tocheck.top().factors[i];
1571                                                 }
1572                                         }
1573                                         tocheck.top().factors = newfactors1;
1574                                         tocheck.top().poly = f1;
1575                                         ModFactors mf;
1576                                         mf.factors = newfactors2;
1577                                         mf.poly = f2;
1578                                         tocheck.push(mf);
1579                                         break;
1580                                 }
1581                         } else {
1582                                 // not successful
1583                                 if ( !part.next() ) {
1584                                         // if no more combinations left, return polynomial as
1585                                         // irreducible
1586                                         result *= upoly_to_ex(tocheck.top().poly, x);
1587                                         tocheck.pop();
1588                                         break;
1589                                 }
1590                         }
1591                 }
1592         }
1593
1594         return unit * cont * result;
1595 }
1596
1597 /** Second interface to factor_univariate() to be used if the information about
1598  *  the prime is not needed.
1599  */
1600 static inline ex factor_univariate(const ex& poly, const ex& x)
1601 {
1602         unsigned int prime;
1603         return factor_univariate(poly, x, prime);
1604 }
1605
1606 /** Represents an evaluation point (<symbol>==<integer>).
1607  */
1608 struct EvalPoint
1609 {
1610         ex x;
1611         int evalpoint;
1612 };
1613
1614 #ifdef DEBUGFACTOR
1615 ostream& operator<<(ostream& o, const vector<EvalPoint>& v)
1616 {
1617         for ( size_t i=0; i<v.size(); ++i ) {
1618                 o << "(" << v[i].x << "==" << v[i].evalpoint << ") ";
1619         }
1620         return o;
1621 }
1622 #endif // def DEBUGFACTOR
1623
1624 // forward declaration
1625 static vector<ex> multivar_diophant(const vector<ex>& a_, const ex& x, const ex& c, const vector<EvalPoint>& I, unsigned int d, unsigned int p, unsigned int k);
1626
1627 /** Utility function for multivariate Hensel lifting.
1628  *
1629  *  Solves the equation
1630  *    s_1*b_1 + ... + s_r*b_r == 1 mod p^k
1631  *  with deg(s_i) < deg(a_i)
1632  *  and with given b_1 = a_1 * ... * a_{i-1} * a_{i+1} * ... * a_r
1633  *
1634  *  The implementation follows algorithm 6.3 of [GCL].
1635  *
1636  *  @param[in]  a   vector of modular univariate polynomials
1637  *  @param[in]  x   symbol
1638  *  @param[in]  p   prime number
1639  *  @param[in]  k   p^k is modulus
1640  *  @return         vector of polynomials (s_i)
1641  */
1642 static upvec multiterm_eea_lift(const upvec& a, const ex& x, unsigned int p, unsigned int k)
1643 {
1644         const size_t r = a.size();
1645         cl_modint_ring R = find_modint_ring(expt_pos(cl_I(p),k));
1646         upvec q(r-1);
1647         q[r-2] = a[r-1];
1648         for ( size_t j=r-2; j>=1; --j ) {
1649                 q[j-1] = a[j] * q[j];
1650         }
1651         umodpoly beta(1, R->one());
1652         upvec s;
1653         for ( size_t j=1; j<r; ++j ) {
1654                 vector<ex> mdarg(2);
1655                 mdarg[0] = umodpoly_to_ex(q[j-1], x);
1656                 mdarg[1] = umodpoly_to_ex(a[j-1], x);
1657                 vector<EvalPoint> empty;
1658                 vector<ex> exsigma = multivar_diophant(mdarg, x, umodpoly_to_ex(beta, x), empty, 0, p, k);
1659                 umodpoly sigma1;
1660                 umodpoly_from_ex(sigma1, exsigma[0], x, R);
1661                 umodpoly sigma2;
1662                 umodpoly_from_ex(sigma2, exsigma[1], x, R);
1663                 beta = sigma1;
1664                 s.push_back(sigma2);
1665         }
1666         s.push_back(beta);
1667         return s;
1668 }
1669
1670 /** Changes the modulus of a modular polynomial. Used by eea_lift().
1671  *
1672  *  @param[in]     R  new modular ring
1673  *  @param[in,out] a  polynomial to change (in situ)
1674  */
1675 static void change_modulus(const cl_modint_ring& R, umodpoly& a)
1676 {
1677         if ( a.empty() ) return;
1678         cl_modint_ring oldR = a[0].ring();
1679         for (auto & i : a) {
1680                 i = R->canonhom(oldR->retract(i));
1681         }
1682         canonicalize(a);
1683 }
1684
1685 /** Utility function for multivariate Hensel lifting.
1686  *
1687  *  Solves  s*a + t*b == 1 mod p^k  given a,b.
1688  *
1689  *  The implementation follows algorithm 6.3 of [GCL].
1690  *
1691  *  @param[in]  a   polynomial
1692  *  @param[in]  b   polynomial
1693  *  @param[in]  x   symbol
1694  *  @param[in]  p   prime number
1695  *  @param[in]  k   p^k is modulus
1696  *  @param[out] s_  output polynomial
1697  *  @param[out] t_  output polynomial
1698  */
1699 static void eea_lift(const umodpoly& a, const umodpoly& b, const ex& x, unsigned int p, unsigned int k, umodpoly& s_, umodpoly& t_)
1700 {
1701         cl_modint_ring R = find_modint_ring(p);
1702         umodpoly amod = a;
1703         change_modulus(R, amod);
1704         umodpoly bmod = b;
1705         change_modulus(R, bmod);
1706
1707         umodpoly smod;
1708         umodpoly tmod;
1709         exteuclid(amod, bmod, smod, tmod);
1710
1711         cl_modint_ring Rpk = find_modint_ring(expt_pos(cl_I(p),k));
1712         umodpoly s = smod;
1713         change_modulus(Rpk, s);
1714         umodpoly t = tmod;
1715         change_modulus(Rpk, t);
1716
1717         cl_I modulus(p);
1718         umodpoly one(1, Rpk->one());
1719         for ( size_t j=1; j<k; ++j ) {
1720                 umodpoly e = one - a * s - b * t;
1721                 reduce_coeff(e, modulus);
1722                 umodpoly c = e;
1723                 change_modulus(R, c);
1724                 umodpoly sigmabar = smod * c;
1725                 umodpoly taubar = tmod * c;
1726                 umodpoly sigma, q;
1727                 remdiv(sigmabar, bmod, sigma, q);
1728                 umodpoly tau = taubar + q * amod;
1729                 umodpoly sadd = sigma;
1730                 change_modulus(Rpk, sadd);
1731                 cl_MI modmodulus(Rpk, modulus);
1732                 s = s + sadd * modmodulus;
1733                 umodpoly tadd = tau;
1734                 change_modulus(Rpk, tadd);
1735                 t = t + tadd * modmodulus;
1736                 modulus = modulus * p;
1737         }
1738
1739         s_ = s; t_ = t;
1740 }
1741
1742 /** Utility function for multivariate Hensel lifting.
1743  *
1744  *  Solves the equation
1745  *    s_1*b_1 + ... + s_r*b_r == x^m mod p^k
1746  *  with given b_1 = a_1 * ... * a_{i-1} * a_{i+1} * ... * a_r
1747  *
1748  *  The implementation follows algorithm 6.3 of [GCL].
1749  *
1750  *  @param a  vector with univariate polynomials mod p^k
1751  *  @param x  symbol
1752  *  @param m  exponent of x^m in the equation to solve
1753  *  @param p  prime number
1754  *  @param k  p^k is modulus
1755  *  @return   vector of polynomials (s_i)
1756  */
1757 static upvec univar_diophant(const upvec& a, const ex& x, unsigned int m, unsigned int p, unsigned int k)
1758 {
1759         cl_modint_ring R = find_modint_ring(expt_pos(cl_I(p),k));
1760
1761         const size_t r = a.size();
1762         upvec result;
1763         if ( r > 2 ) {
1764                 upvec s = multiterm_eea_lift(a, x, p, k);
1765                 for ( size_t j=0; j<r; ++j ) {
1766                         umodpoly bmod = umodpoly_to_umodpoly(s[j], R, m);
1767                         umodpoly buf;
1768                         rem(bmod, a[j], buf);
1769                         result.push_back(buf);
1770                 }
1771         } else {
1772                 umodpoly s, t;
1773                 eea_lift(a[1], a[0], x, p, k, s, t);
1774                 umodpoly bmod = umodpoly_to_umodpoly(s, R, m);
1775                 umodpoly buf, q;
1776                 remdiv(bmod, a[0], buf, q);
1777                 result.push_back(buf);
1778                 umodpoly t1mod = umodpoly_to_umodpoly(t, R, m);
1779                 buf = t1mod + q * a[1];
1780                 result.push_back(buf);
1781         }
1782
1783         return result;
1784 }
1785
1786 /** Map used by function make_modular().
1787  *  Finds every coefficient in a polynomial and replaces it by is value in the
1788  *  given modular ring R (symmetric representation).
1789  */
1790 struct make_modular_map : public map_function {
1791         cl_modint_ring R;
1792         make_modular_map(const cl_modint_ring& R_) : R(R_) { }
1793         ex operator()(const ex& e) override
1794         {
1795                 if ( is_a<add>(e) || is_a<mul>(e) ) {
1796                         return e.map(*this);
1797                 }
1798                 else if ( is_a<numeric>(e) ) {
1799                         numeric mod(R->modulus);
1800                         numeric halfmod = (mod-1)/2;
1801                         cl_MI emod = R->canonhom(the<cl_I>(ex_to<numeric>(e).to_cl_N()));
1802                         numeric n(R->retract(emod));
1803                         if ( n > halfmod ) {
1804                                 return n-mod;
1805                         } else {
1806                                 return n;
1807                         }
1808                 }
1809                 return e;
1810         }
1811 };
1812
1813 /** Helps mimicking modular multivariate polynomial arithmetic.
1814  *
1815  *  @param e  expression of which to make the coefficients equal to their value
1816  *            in the modular ring R (symmetric representation)
1817  *  @param R  modular ring
1818  *  @return   resulting expression
1819  */
1820 static ex make_modular(const ex& e, const cl_modint_ring& R)
1821 {
1822         make_modular_map map(R);
1823         return map(e.expand());
1824 }
1825
1826 /** Utility function for multivariate Hensel lifting.
1827  *
1828  *  Returns the polynomials s_i that fulfill
1829  *    s_1*b_1 + ... + s_r*b_r == c mod <I^(d+1),p^k>
1830  *  with given b_1 = a_1 * ... * a_{i-1} * a_{i+1} * ... * a_r
1831  *
1832  *  The implementation follows algorithm 6.2 of [GCL].
1833  *
1834  *  @param a_  vector of multivariate factors mod p^k
1835  *  @param x   symbol (equiv. x_1 in [GCL])
1836  *  @param c   polynomial mod p^k
1837  *  @param I   vector of evaluation points
1838  *  @param d   maximum total degree of result
1839  *  @param p   prime number
1840  *  @param k   p^k is modulus
1841  *  @return    vector of polynomials (s_i)
1842  */
1843 static vector<ex> multivar_diophant(const vector<ex>& a_, const ex& x, const ex& c, const vector<EvalPoint>& I,
1844                                     unsigned int d, unsigned int p, unsigned int k)
1845 {
1846         vector<ex> a = a_;
1847
1848         const cl_I modulus = expt_pos(cl_I(p),k);
1849         const cl_modint_ring R = find_modint_ring(modulus);
1850         const size_t r = a.size();
1851         const size_t nu = I.size() + 1;
1852
1853         vector<ex> sigma;
1854         if ( nu > 1 ) {
1855                 ex xnu = I.back().x;
1856                 int alphanu = I.back().evalpoint;
1857
1858                 ex A = 1;
1859                 for ( size_t i=0; i<r; ++i ) {
1860                         A *= a[i];
1861                 }
1862                 vector<ex> b(r);
1863                 for ( size_t i=0; i<r; ++i ) {
1864                         b[i] = normal(A / a[i]);
1865                 }
1866
1867                 vector<ex> anew = a;
1868                 for ( size_t i=0; i<r; ++i ) {
1869                         anew[i] = anew[i].subs(xnu == alphanu);
1870                 }
1871                 ex cnew = c.subs(xnu == alphanu);
1872                 vector<EvalPoint> Inew = I;
1873                 Inew.pop_back();
1874                 sigma = multivar_diophant(anew, x, cnew, Inew, d, p, k);
1875
1876                 ex buf = c;
1877                 for ( size_t i=0; i<r; ++i ) {
1878                         buf -= sigma[i] * b[i];
1879                 }
1880                 ex e = make_modular(buf, R);
1881
1882                 ex monomial = 1;
1883                 for ( size_t m=1; !e.is_zero() && e.has(xnu) && m<=d; ++m ) {
1884                         monomial *= (xnu - alphanu);
1885                         monomial = expand(monomial);
1886                         ex cm = e.diff(ex_to<symbol>(xnu), m).subs(xnu==alphanu) / factorial(m);
1887                         cm = make_modular(cm, R);
1888                         if ( !cm.is_zero() ) {
1889                                 vector<ex> delta_s = multivar_diophant(anew, x, cm, Inew, d, p, k);
1890                                 ex buf = e;
1891                                 for ( size_t j=0; j<delta_s.size(); ++j ) {
1892                                         delta_s[j] *= monomial;
1893                                         sigma[j] += delta_s[j];
1894                                         buf -= delta_s[j] * b[j];
1895                                 }
1896                                 e = make_modular(buf, R);
1897                         }
1898                 }
1899         } else {
1900                 upvec amod;
1901                 for ( size_t i=0; i<a.size(); ++i ) {
1902                         umodpoly up;
1903                         umodpoly_from_ex(up, a[i], x, R);
1904                         amod.push_back(up);
1905                 }
1906
1907                 sigma.insert(sigma.begin(), r, 0);
1908                 size_t nterms;
1909                 ex z;
1910                 if ( is_a<add>(c) ) {
1911                         nterms = c.nops();
1912                         z = c.op(0);
1913                 } else {
1914                         nterms = 1;
1915                         z = c;
1916                 }
1917                 for ( size_t i=0; i<nterms; ++i ) {
1918                         int m = z.degree(x);
1919                         cl_I cm = the<cl_I>(ex_to<numeric>(z.lcoeff(x)).to_cl_N());
1920                         upvec delta_s = univar_diophant(amod, x, m, p, k);
1921                         cl_MI modcm;
1922                         cl_I poscm = plusp(cm) ? cm : mod(cm, modulus);
1923                         modcm = cl_MI(R, poscm);
1924                         for ( size_t j=0; j<delta_s.size(); ++j ) {
1925                                 delta_s[j] = delta_s[j] * modcm;
1926                                 sigma[j] = sigma[j] + umodpoly_to_ex(delta_s[j], x);
1927                         }
1928                         if ( nterms > 1 && i+1 != nterms ) {
1929                                 z = c.op(i+1);
1930                         }
1931                 }
1932         }
1933
1934         for ( size_t i=0; i<sigma.size(); ++i ) {
1935                 sigma[i] = make_modular(sigma[i], R);
1936         }
1937
1938         return sigma;
1939 }
1940
1941 /** Multivariate Hensel lifting.
1942  *  The implementation follows algorithm 6.4 of [GCL].
1943  *  Since we don't have a data type for modular multivariate polynomials, the
1944  *  respective operations are done in a GiNaC::ex and the function
1945  *  make_modular() is then called to make the coefficient modular p^l.
1946  *
1947  *  @param a    multivariate polynomial primitive in x
1948  *  @param x    symbol (equiv. x_1 in [GCL])
1949  *  @param I    vector of evaluation points (x_2==a_2,x_3==a_3,...)
1950  *  @param p    prime number (should not divide lcoeff(a mod I))
1951  *  @param l    p^l is the modulus of the lifted univariate field
1952  *  @param u    vector of modular (mod p^l) factors of a mod I
1953  *  @param lcU  correct leading coefficient of the univariate factors of a mod I
1954  *  @return     list GiNaC::lst with lifted factors (multivariate factors of a),
1955  *              empty if Hensel lifting did not succeed
1956  */
1957 static ex hensel_multivar(const ex& a, const ex& x, const vector<EvalPoint>& I,
1958                           unsigned int p, const cl_I& l, const upvec& u, const vector<ex>& lcU)
1959 {
1960         const size_t nu = I.size() + 1;
1961         const cl_modint_ring R = find_modint_ring(expt_pos(cl_I(p),l));
1962
1963         vector<ex> A(nu);
1964         A[nu-1] = a;
1965
1966         for ( size_t j=nu; j>=2; --j ) {
1967                 ex x = I[j-2].x;
1968                 int alpha = I[j-2].evalpoint;
1969                 A[j-2] = A[j-1].subs(x==alpha);
1970                 A[j-2] = make_modular(A[j-2], R);
1971         }
1972
1973         int maxdeg = a.degree(I.front().x);
1974         for ( size_t i=1; i<I.size(); ++i ) {
1975                 int maxdeg2 = a.degree(I[i].x);
1976                 if ( maxdeg2 > maxdeg ) maxdeg = maxdeg2;
1977         }
1978
1979         const size_t n = u.size();
1980         vector<ex> U(n);
1981         for ( size_t i=0; i<n; ++i ) {
1982                 U[i] = umodpoly_to_ex(u[i], x);
1983         }
1984
1985         for ( size_t j=2; j<=nu; ++j ) {
1986                 vector<ex> U1 = U;
1987                 ex monomial = 1;
1988                 for ( size_t m=0; m<n; ++m) {
1989                         if ( lcU[m] != 1 ) {
1990                                 ex coef = lcU[m];
1991                                 for ( size_t i=j-1; i<nu-1; ++i ) {
1992                                         coef = coef.subs(I[i].x == I[i].evalpoint);
1993                                 }
1994                                 coef = make_modular(coef, R);
1995                                 int deg = U[m].degree(x);
1996                                 U[m] = U[m] - U[m].lcoeff(x) * pow(x,deg) + coef * pow(x,deg);
1997                         }
1998                 }
1999                 ex Uprod = 1;
2000                 for ( size_t i=0; i<n; ++i ) {
2001                         Uprod *= U[i];
2002                 }
2003                 ex e = expand(A[j-1] - Uprod);
2004
2005                 vector<EvalPoint> newI;
2006                 for ( size_t i=1; i<=j-2; ++i ) {
2007                         newI.push_back(I[i-1]);
2008                 }
2009
2010                 ex xj = I[j-2].x;
2011                 int alphaj = I[j-2].evalpoint;
2012                 size_t deg = A[j-1].degree(xj);
2013                 for ( size_t k=1; k<=deg; ++k ) {
2014                         if ( !e.is_zero() ) {
2015                                 monomial *= (xj - alphaj);
2016                                 monomial = expand(monomial);
2017                                 ex dif = e.diff(ex_to<symbol>(xj), k);
2018                                 ex c = dif.subs(xj==alphaj) / factorial(k);
2019                                 if ( !c.is_zero() ) {
2020                                         vector<ex> deltaU = multivar_diophant(U1, x, c, newI, maxdeg, p, cl_I_to_uint(l));
2021                                         for ( size_t i=0; i<n; ++i ) {
2022                                                 deltaU[i] *= monomial;
2023                                                 U[i] += deltaU[i];
2024                                                 U[i] = make_modular(U[i], R);
2025                                         }
2026                                         ex Uprod = 1;
2027                                         for ( size_t i=0; i<n; ++i ) {
2028                                                 Uprod *= U[i];
2029                                         }
2030                                         e = A[j-1] - Uprod;
2031                                         e = make_modular(e, R);
2032                                 }
2033                         }
2034                 }
2035         }
2036
2037         ex acand = 1;
2038         for ( size_t i=0; i<U.size(); ++i ) {
2039                 acand *= U[i];
2040         }
2041         if ( expand(a-acand).is_zero() ) {
2042                 return lst(U.begin(), U.end());
2043         } else {
2044                 return lst{};
2045         }
2046 }
2047
2048 /** Takes a factorized expression and puts the factors in a vector. The exponents
2049  *  of the factors are discarded, e.g. 7*x^2*(y+1)^4 --> {7,x,y+1}. The first
2050  *  element of the result is always the numeric coefficient.
2051  */
2052 static exvector put_factors_into_vec(const ex& e)
2053 {
2054         exvector result;
2055         if ( is_a<numeric>(e) ) {
2056                 result.push_back(e);
2057                 return result;
2058         }
2059         if ( is_a<power>(e) ) {
2060                 result.push_back(1);
2061                 result.push_back(e.op(0));
2062                 return result;
2063         }
2064         if ( is_a<symbol>(e) || is_a<add>(e) ) {
2065                 ex icont(e.integer_content());
2066                 result.push_back(icont);
2067                 result.push_back(e/icont);
2068                 return result;
2069         }
2070         if ( is_a<mul>(e) ) {
2071                 ex nfac = 1;
2072                 result.push_back(nfac);
2073                 for ( size_t i=0; i<e.nops(); ++i ) {
2074                         ex op = e.op(i);
2075                         if ( is_a<numeric>(op) ) {
2076                                 nfac = op;
2077                         }
2078                         if ( is_a<power>(op) ) {
2079                                 result.push_back(op.op(0));
2080                         }
2081                         if ( is_a<symbol>(op) || is_a<add>(op) ) {
2082                                 result.push_back(op);
2083                         }
2084                 }
2085                 result[0] = nfac;
2086                 return result;
2087         }
2088         throw runtime_error("put_factors_into_vec: bad term.");
2089 }
2090
2091 /** Checks a set of numbers for whether each number has a unique prime factor.
2092  *
2093  *  @param[in]  f  numbers to check
2094  *  @return        true: if number set is bad, false: if set is okay (has unique
2095  *                 prime factors)
2096  */
2097 static bool checkdivisors(const exvector& f)
2098 {
2099         const int k = f.size();
2100         numeric q, r;
2101         vector<numeric> d(k);
2102         d[0] = ex_to<numeric>(abs(f[0]));
2103         for ( int i=1; i<k; ++i ) {
2104                 q = ex_to<numeric>(abs(f[i]));
2105                 for ( int j=i-1; j>=0; --j ) {
2106                         r = d[j];
2107                         do {
2108                                 r = gcd(r, q);
2109                                 q = q/r;
2110                         } while ( r != 1 );
2111                         if ( q == 1 ) {
2112                                 return true;
2113                         }
2114                 }
2115                 d[i] = q;
2116         }
2117         return false;
2118 }
2119
2120 /** Generates a set of evaluation points for a multivariate polynomial.
2121  *  The set fulfills the following conditions:
2122  *  1. lcoeff(evaluated_polynomial) does not vanish
2123  *  2. factors of lcoeff(evaluated_polynomial) have each a unique prime factor
2124  *  3. evaluated_polynomial is square free
2125  *  See [Wan] for more details.
2126  *
2127  *  @param[in]     u        multivariate polynomial to be factored
2128  *  @param[in]     vn       leading coefficient of u in x (x==first symbol in syms)
2129  *  @param[in]     x        first symbol that appears in u
2130  *  @param[in]     syms_wox remaining symbols that appear in u
2131  *  @param[in]     f        vector containing the factors of the leading coefficient vn
2132  *  @param[in,out] modulus  integer modulus for random number generation (i.e. |a_i| < modulus)
2133  *  @param[out]    u0       returns the evaluated (univariate) polynomial
2134  *  @param[out]    a        returns the valid evaluation points. must have initial size equal
2135  *                          number of symbols-1 before calling generate_set
2136  */
2137 static void generate_set(const ex& u, const ex& vn, const ex& x, const exset& syms_wox, const exvector& f,
2138                          numeric& modulus, ex& u0, vector<numeric>& a)
2139 {
2140         while ( true ) {
2141                 ++modulus;
2142                 // generate a set of integers ...
2143                 u0 = u;
2144                 ex vna = vn;
2145                 ex vnatry;
2146                 auto s = syms_wox.begin();
2147                 for ( size_t i=0; i<a.size(); ++i ) {
2148                         do {
2149                                 a[i] = mod(numeric(rand()), 2*modulus) - modulus;
2150                                 vnatry = vna.subs(*s == a[i]);
2151                                 // ... for which the leading coefficient doesn't vanish ...
2152                         } while ( vnatry == 0 );
2153                         vna = vnatry;
2154                         u0 = u0.subs(*s == a[i]);
2155                         ++s;
2156                 }
2157                 // ... for which u0 is square free ...
2158                 ex g = gcd(u0, u0.diff(ex_to<symbol>(x)));
2159                 if ( !is_a<numeric>(g) ) {
2160                         continue;
2161                 }
2162                 if ( !is_a<numeric>(vn) ) {
2163                         // ... and for which the evaluated factors have each an unique prime factor
2164                         exvector fnum = f;
2165                         fnum[0] = fnum[0] * u0.content(x);
2166                         for ( size_t i=1; i<fnum.size(); ++i ) {
2167                                 if ( !is_a<numeric>(fnum[i]) ) {
2168                                         s = syms_wox.begin();
2169                                         for ( size_t j=0; j<a.size(); ++j, ++s ) {
2170                                                 fnum[i] = fnum[i].subs(*s == a[j]);
2171                                         }
2172                                 }
2173                         }
2174                         if ( checkdivisors(fnum) ) {
2175                                 continue;
2176                         }
2177                 }
2178                 // ok, we have a valid set now
2179                 return;
2180         }
2181 }
2182
2183 // forward declaration
2184 static ex factor_sqrfree(const ex& poly);
2185
2186 /** Used by factor_multivariate().
2187  */
2188 struct factorization_ctx {
2189         const ex poly, x;         // polynomial, first symbol x...
2190         const exset syms_wox;     // ...remaining symbols w/o x
2191         ex unit, cont, pp;        // unit * cont * pp == poly
2192         ex vn; exvector vnlst;    // leading coeff, factors of leading coeff
2193         numeric modulus;          // incremented each time we try
2194         /** returns factors or empty if it did not succeed */
2195         ex try_next_evaluation_homomorphism()
2196         {
2197                 constexpr unsigned maxtrials = 3;
2198                 vector<numeric> a(syms_wox.size(), 0);
2199
2200                 unsigned int trialcount = 0;
2201                 unsigned int prime;
2202                 int factor_count = 0;
2203                 int min_factor_count = -1;
2204                 ex u, delta;
2205                 ex ufac;
2206                 exvector ufaclst;
2207
2208                 // try several evaluation points to reduce the number of factors
2209                 while ( trialcount < maxtrials ) {
2210
2211                         // generate a set of valid evaluation points
2212                         generate_set(pp, vn, x, syms_wox, vnlst, modulus, u, a);
2213
2214                         ufac = factor_univariate(u, x, prime);
2215                         ufaclst = put_factors_into_vec(ufac);
2216                         factor_count = ufaclst.size()-1;
2217                         delta = ufaclst[0];
2218
2219                         if ( factor_count <= 1 ) {
2220                                 // irreducible
2221                                 return lst{pp};
2222                         }
2223                         if ( min_factor_count < 0 ) {
2224                                 // first time here
2225                                 min_factor_count = factor_count;
2226                         }
2227                         else if ( min_factor_count == factor_count ) {
2228                                 // one less to try
2229                                 ++trialcount;
2230                         }
2231                         else if ( min_factor_count > factor_count ) {
2232                                 // new minimum, reset trial counter
2233                                 min_factor_count = factor_count;
2234                                 trialcount = 0;
2235                         }
2236                 }
2237
2238                 // determine true leading coefficients for the Hensel lifting
2239                 vector<ex> C(factor_count);
2240                 if ( is_a<numeric>(vn) ) {
2241                         // easy case
2242                         for ( size_t i=1; i<ufaclst.size(); ++i ) {
2243                                 C[i-1] = ufaclst[i].lcoeff(x);
2244                         }
2245                 } else {
2246                         // difficult case.
2247                         // we use the property of the ftilde having a unique prime factor.
2248                         // details can be found in [Wan].
2249                         // calculate ftilde
2250                         vector<numeric> ftilde(vnlst.size()-1);
2251                         for ( size_t i=0; i<ftilde.size(); ++i ) {
2252                                 ex ft = vnlst[i+1];
2253                                 auto s = syms_wox.begin();
2254                                 for ( size_t j=0; j<a.size(); ++j ) {
2255                                         ft = ft.subs(*s == a[j]);
2256                                         ++s;
2257                                 }
2258                                 ftilde[i] = ex_to<numeric>(ft);
2259                         }
2260                         // calculate D and C
2261                         vector<bool> used_flag(ftilde.size(), false);
2262                         vector<ex> D(factor_count, 1);
2263                         if ( delta == 1 ) {
2264                                 for ( int i=0; i<factor_count; ++i ) {
2265                                         numeric prefac = ex_to<numeric>(ufaclst[i+1].lcoeff(x));
2266                                         for ( int j=ftilde.size()-1; j>=0; --j ) {
2267                                                 int count = 0;
2268                                                 numeric q;
2269                                                 while ( irem(prefac, ftilde[j], q) == 0 ) {
2270                                                         prefac = q;
2271                                                         ++count;
2272                                                 }
2273                                                 if ( count ) {
2274                                                         used_flag[j] = true;
2275                                                         D[i] = D[i] * pow(vnlst[j+1], count);
2276                                                 }
2277                                         }
2278                                         C[i] = D[i] * prefac;
2279                                 }
2280                         } else {
2281                                 for ( int i=0; i<factor_count; ++i ) {
2282                                         numeric prefac = ex_to<numeric>(ufaclst[i+1].lcoeff(x));
2283                                         for ( int j=ftilde.size()-1; j>=0; --j ) {
2284                                                 int count = 0;
2285                                                 numeric q;
2286                                                 while ( irem(prefac, ftilde[j], q) == 0 ) {
2287                                                         prefac = q;
2288                                                         ++count;
2289                                                 }
2290                                                 while ( irem(ex_to<numeric>(delta)*prefac, ftilde[j]) == 0 ) {
2291                                                         numeric g = gcd(prefac, ex_to<numeric>(ftilde[j]));
2292                                                         prefac = iquo(prefac, g);
2293                                                         delta = delta / (ftilde[j]/g);
2294                                                         ufaclst[i+1] = ufaclst[i+1] * (ftilde[j]/g);
2295                                                         ++count;
2296                                                 }
2297                                                 if ( count ) {
2298                                                         used_flag[j] = true;
2299                                                         D[i] = D[i] * pow(vnlst[j+1], count);
2300                                                 }
2301                                         }
2302                                         C[i] = D[i] * prefac;
2303                                 }
2304                         }
2305                         // check if something went wrong
2306                         bool some_factor_unused = false;
2307                         for ( size_t i=0; i<used_flag.size(); ++i ) {
2308                                 if ( !used_flag[i] ) {
2309                                         some_factor_unused = true;
2310                                         break;
2311                                 }
2312                         }
2313                         if ( some_factor_unused ) {
2314                                 return lst{};  // next try
2315                         }
2316                 }
2317
2318                 // multiply the remaining content of the univariate polynomial into the
2319                 // first factor
2320                 if ( delta != 1 ) {
2321                         C[0] = C[0] * delta;
2322                         ufaclst[1] = ufaclst[1] * delta;
2323                 }
2324
2325                 // set up evaluation points
2326                 vector<EvalPoint> epv;
2327                 auto s = syms_wox.begin();
2328                 for ( size_t i=0; i<a.size(); ++i ) {
2329                         epv.emplace_back(EvalPoint{*s++, a[i].to_int()});
2330                 }
2331
2332                 // calc bound p^l
2333                 int maxdeg = 0;
2334                 for ( int i=1; i<=factor_count; ++i ) {
2335                         if ( ufaclst[i].degree(x) > maxdeg ) {
2336                                 maxdeg = ufaclst[i].degree(x);
2337                         }
2338                 }
2339                 cl_I B = ash(calc_bound(u, x), maxdeg+1);  // = 2 * calc_bound(u,x) * 2^maxdeg
2340                 cl_I l = 1;
2341                 cl_I pl = prime;
2342                 while ( pl < B ) {
2343                         l = l + 1;
2344                         pl = pl * prime;
2345                 }
2346
2347                 // set up modular factors (mod p^l)
2348                 cl_modint_ring R = find_modint_ring(pl);
2349                 upvec modfactors(ufaclst.size()-1);
2350                 for ( size_t i=1; i<ufaclst.size(); ++i ) {
2351                         umodpoly_from_ex(modfactors[i-1], ufaclst[i], x, R);
2352                 }
2353
2354                 // try Hensel lifting
2355                 return hensel_multivar(pp, x, epv, prime, l, modfactors, C);
2356         }
2357 };
2358
2359 /** Multivariate factorization.
2360  *
2361  *  The implementation is based on the algorithm described in [Wan].
2362  *  An evaluation homomorphism (a set of integers) is determined that fulfills
2363  *  certain criteria. The evaluated polynomial is univariate and is factorized
2364  *  by factor_univariate(). The main work then is to find the correct leading
2365  *  coefficients of the univariate factors. They have to correspond to the
2366  *  factors of the (multivariate) leading coefficient of the input polynomial
2367  *  (as defined for a specific variable x). After that the Hensel lifting can be
2368  *  performed. This is done in round-robin for each x in syms until success.
2369  *
2370  *  @param[in] poly  expanded, square free polynomial
2371  *  @param[in] syms  contains the symbols in the polynomial
2372  *  @return          factorized polynomial
2373  */
2374 static ex factor_multivariate(const ex& poly, const exset& syms)
2375 {
2376         // set up one factorization context for each symbol
2377         vector<factorization_ctx> ctx_in_x;
2378         for (auto x : syms) {
2379                 exset syms_wox;  // remaining syms w/o x
2380                 copy_if(syms.begin(), syms.end(),
2381                         inserter(syms_wox, syms_wox.end()), [x](const ex& y){ return y != x; });
2382
2383                 factorization_ctx ctx = {.poly = poly, .x = x,
2384                                          .syms_wox = syms_wox};
2385
2386                 // make polynomial primitive
2387                 poly.unitcontprim(x, ctx.unit, ctx.cont, ctx.pp);
2388                 if ( !is_a<numeric>(ctx.cont) ) {
2389                         // content is a polynomial in one or more of remaining syms, let's start over
2390                         return ctx.unit * factor_sqrfree(ctx.cont) * factor_sqrfree(ctx.pp);
2391                 }
2392
2393                 // find factors of leading coefficient
2394                 ctx.vn = ctx.pp.collect(x).lcoeff(x);
2395                 ctx.vnlst = put_factors_into_vec(factor(ctx.vn));
2396
2397                 ctx.modulus = (ctx.vnlst.size() > 3) ? ctx.vnlst.size() : 3;
2398
2399                 ctx_in_x.push_back(ctx);
2400         }
2401
2402         // try an evaluation homomorphism for each context in round-robin
2403         auto ctx = ctx_in_x.begin();
2404         while ( true ) {
2405
2406                 ex res = ctx->try_next_evaluation_homomorphism();
2407
2408                 if ( res != lst{} ) {
2409                         // found the factors
2410                         ex result = ctx->cont * ctx->unit;
2411                         for ( size_t i=0; i<res.nops(); ++i ) {
2412                                 ex unit, cont, pp;
2413                                 res.op(i).unitcontprim(ctx->x, unit, cont, pp);
2414                                 result *= unit * cont * pp;
2415                         }
2416                         return result;
2417                 }
2418
2419                 // switch context for next symbol
2420                 if (++ctx == ctx_in_x.end()) {
2421                         ctx = ctx_in_x.begin();
2422                 }
2423         }
2424 }
2425
2426 /** Finds all symbols in an expression. Used by factor_sqrfree() and factor().
2427  */
2428 struct find_symbols_map : public map_function {
2429         exset syms;
2430         ex operator()(const ex& e) override
2431         {
2432                 if ( is_a<symbol>(e) ) {
2433                         syms.insert(e);
2434                         return e;
2435                 }
2436                 return e.map(*this);
2437         }
2438 };
2439
2440 /** Factorizes a polynomial that is square free. It calls either the univariate
2441  *  or the multivariate factorization functions.
2442  */
2443 static ex factor_sqrfree(const ex& poly)
2444 {
2445         // determine all symbols in poly
2446         find_symbols_map findsymbols;
2447         findsymbols(poly);
2448         if ( findsymbols.syms.size() == 0 ) {
2449                 return poly;
2450         }
2451
2452         if ( findsymbols.syms.size() == 1 ) {
2453                 // univariate case
2454                 const ex& x = *(findsymbols.syms.begin());
2455                 int ld = poly.ldegree(x);
2456                 if ( ld > 0 ) {
2457                         // pull out direct factors
2458                         ex res = factor_univariate(expand(poly/pow(x, ld)), x);
2459                         return res * pow(x,ld);
2460                 } else {
2461                         ex res = factor_univariate(poly, x);
2462                         return res;
2463                 }
2464         }
2465
2466         // multivariate case
2467         ex res = factor_multivariate(poly, findsymbols.syms);
2468         return res;
2469 }
2470
2471 /** Map used by factor() when factor_options::all is given to access all
2472  *  subexpressions and to call factor() on them.
2473  */
2474 struct apply_factor_map : public map_function {
2475         unsigned options;
2476         apply_factor_map(unsigned options_) : options(options_) { }
2477         ex operator()(const ex& e) override
2478         {
2479                 if ( e.info(info_flags::polynomial) ) {
2480                         return factor(e, options);
2481                 }
2482                 if ( is_a<add>(e) ) {
2483                         ex s1, s2;
2484                         for ( size_t i=0; i<e.nops(); ++i ) {
2485                                 if ( e.op(i).info(info_flags::polynomial) ) {
2486                                         s1 += e.op(i);
2487                                 } else {
2488                                         s2 += e.op(i);
2489                                 }
2490                         }
2491                         return factor(s1, options) + s2.map(*this);
2492                 }
2493                 return e.map(*this);
2494         }
2495 };
2496
2497 /** Iterate through explicit factors of e, call yield(f, k) for
2498  *  each factor of the form f^k.
2499  *
2500  *  Note that this function doesn't factor e itself, it only
2501  *  iterates through the factors already explicitly present.
2502  */
2503 template <typename F> void
2504 factor_iter(const ex &e, F yield)
2505 {
2506         if (is_a<mul>(e)) {
2507                 for (const auto &f : e) {
2508                         if (is_a<power>(f)) {
2509                                 yield(f.op(0), f.op(1));
2510                         } else {
2511                                 yield(f, ex(1));
2512                         }
2513                 }
2514         } else {
2515                 if (is_a<power>(e)) {
2516                         yield(e.op(0), e.op(1));
2517                 } else {
2518                         yield(e, ex(1));
2519                 }
2520         }
2521 }
2522
2523 /** This function factorizes a polynomial. It checks the arguments,
2524  *  tries a square free factorization, and then calls factor_sqrfree
2525  *  to do the hard work.
2526  *
2527  *  This function expands its argument, so for polynomials with
2528  *  explicit factors it's better to call it on each one separately
2529  *  (or use factor() which does just that).
2530  */
2531 static ex factor1(const ex& poly, unsigned options)
2532 {
2533         // check arguments
2534         if ( !poly.info(info_flags::polynomial) ) {
2535                 if ( options & factor_options::all ) {
2536                         options &= ~factor_options::all;
2537                         apply_factor_map factor_map(options);
2538                         return factor_map(poly);
2539                 }
2540                 return poly;
2541         }
2542
2543         // determine all symbols in poly
2544         find_symbols_map findsymbols;
2545         findsymbols(poly);
2546         if ( findsymbols.syms.size() == 0 ) {
2547                 return poly;
2548         }
2549         lst syms;
2550         for (auto & i : findsymbols.syms ) {
2551                 syms.append(i);
2552         }
2553
2554         // make poly square free
2555         ex sfpoly = sqrfree(poly.expand(), syms);
2556
2557         // factorize the square free components
2558         ex res = 1;
2559         factor_iter(sfpoly,
2560                 [&](const ex &f, const ex &k) {
2561                         if ( is_a<add>(f) ) {
2562                                 res *= pow(factor_sqrfree(f), k);
2563                         } else {
2564                                 // simple case: (monomial)^exponent
2565                                 res *= pow(f, k);
2566                         }
2567                 });
2568         return res;
2569 }
2570
2571 } // anonymous namespace
2572
2573 /** Interface function to the outside world. It uses factor1()
2574  *  on each of the explicitly present factors of poly.
2575  */
2576 ex factor(const ex& poly, unsigned options)
2577 {
2578         ex result = 1;
2579         factor_iter(poly,
2580                 [&](const ex &f1, const ex &k1) {
2581                         factor_iter(factor1(f1, options),
2582                                 [&](const ex &f2, const ex &k2) {
2583                                         result *= pow(f2, k1*k2);
2584                                 });
2585                 });
2586         return result;
2587 }
2588
2589 } // namespace GiNaC