From: Richard Kreckel Date: Sat, 18 Jul 2015 22:04:49 +0000 (+0200) Subject: Speed up pow(+(...),n).expand() where n>2. X-Git-Tag: ginac_1-6-5~2 X-Git-Url: https://www.ginac.de/ginac.git//ginac.git?p=ginac.git;a=commitdiff_plain;h=d56a0f74afa5380a1730599c3b1b21f34be2f061 Speed up pow(+(...),n).expand() where n>2. This new algorithm is faster than the old one because it re-uses multinomial coefficients as much as possible. --- diff --git a/check/timer.cpp b/check/timer.cpp index e9e379b0..98a2fe45 100644 --- a/check/timer.cpp +++ b/check/timer.cpp @@ -82,7 +82,6 @@ void timer::reset() double timer::read() { - double elapsed; #ifdef HAVE_RUSAGE if (running()) getrusage(RUSAGE_SELF, &used2); diff --git a/ginac/add.cpp b/ginac/add.cpp index b62ef917..1b487d80 100644 --- a/ginac/add.cpp +++ b/ginac/add.cpp @@ -575,7 +575,7 @@ expair add::split_ex_to_pair(const ex & e) const } expair add::combine_ex_with_coeff_to_pair(const ex & e, - const ex & c) const + const ex & c) const { GINAC_ASSERT(is_exactly_a(c)); if (is_exactly_a(e)) { @@ -601,7 +601,7 @@ expair add::combine_ex_with_coeff_to_pair(const ex & e, } expair add::combine_pair_with_coeff_to_pair(const expair & p, - const ex & c) const + const ex & c) const { GINAC_ASSERT(is_exactly_a(p.coeff)); GINAC_ASSERT(is_exactly_a(c)); diff --git a/ginac/power.cpp b/ginac/power.cpp index 6fa619ca..c3fe14a1 100644 --- a/ginac/power.cpp +++ b/ginac/power.cpp @@ -53,8 +53,6 @@ GINAC_IMPLEMENT_REGISTERED_CLASS_OPT(power, basic, print_func(&power::do_print_python_repr). print_func(&power::do_print_csrc_cl_N)) -typedef std::vector intvector; - ////////// // default constructor ////////// @@ -749,8 +747,6 @@ ex power::imag_part() const // protected -// protected - /** Implementation of ex::diff() for a power. * @see ex::diff */ ex power::derivative(const symbol & s) const @@ -914,96 +910,326 @@ ex power::expand(unsigned options) const // non-virtual functions in this class ////////// +namespace { // anonymous namespace for power::expand_add() helpers + +/** Helper class to generate all bounded combinatorial partitions of an integer + * n with exactly m parts (including zero parts) in non-decreaing order. + */ +class partition_generator { +private: + // Partitions n into m parts, not including zero parts. + // (Cf. OEIS sequence A008284; implementation adapted from Jörg Arndt's + // FXT library) + struct mpartition2 + { + // partition: x[1] + x[2] + ... + x[m] = n and sentinel x[0] == 0 + std::vector x; + int n; // n>0 + int m; // 0 partition; // current partition +public: + partition_generator(unsigned n_, unsigned m_) + : mpgen(n_, 1), m(m_), partition(m_) + { } + // returns current partition in non-decreasing order, padded with zeros + const std::vector& current() const + { + for (int i = 0; i < m - mpgen.m; ++i) + partition[i] = 0; // pad with zeros + + for (int i = m - mpgen.m; i < m; ++i) + partition[i] = mpgen.x[i - m + mpgen.m + 1]; + + return partition; + } + bool next() + { + if (!mpgen.next_partition()) { + if (mpgen.m == m || mpgen.m == mpgen.n) + return false; // current is last + // increment number of parts + mpgen = mpartition2(mpgen.n, mpgen.m + 1); + } + return true; + } +}; + +/** Helper class to generate all compositions of a partition of an integer n, + * starting with the compositions which has non-decreasing order. + */ +class composition_generator { +private: + // Generates all distinct permutations of a multiset. + // (Based on Aaron Williams' algorithm 1 from "Loopless Generation of + // Multiset Permutations using a Constant Number of Variables by Prefix + // Shifts." ) + struct coolmulti { + // element of singly linked list + struct element { + int value; + element* next; + element(int val, element* n) + : value(val), next(n) {} + ~element() + { // recurses down to the end of the singly linked list + delete next; + } + }; + element *head, *i, *after_i; + // NB: Partition must be sorted in non-decreasing order. + explicit coolmulti(const std::vector& partition) + { + head = NULL; + for (unsigned n = 0; n < partition.size(); ++n) { + head = new element(partition[n], head); + if (n <= 1) + i = head; + } + after_i = i->next; + } + ~coolmulti() + { // deletes singly linked list + delete head; + } + void next_permutation() + { + element *before_k; + if (after_i->next != NULL && i->value >= after_i->next->value) + before_k = after_i; + else + before_k = i; + element *k = before_k->next; + before_k->next = k->next; + k->next = head; + if (k->value < head->value) + i = k; + after_i = i->next; + head = k; + } + bool finished() const + { + return after_i->next == NULL && after_i->value >= head->value; + } + } cmgen; + bool atend; // needed for simplifying iteration over permutations + bool trivial; // likewise, true if all elements are equal + mutable std::vector composition; // current compositions +public: + explicit composition_generator(const std::vector& partition) + : cmgen(partition), atend(false), trivial(true), composition(partition.size()) + { + for (unsigned i=1; i& current() const + { + coolmulti::element* it = cmgen.head; + size_t i = 0; + while (it != NULL) { + composition[i] = it->value; + it = it->next; + ++i; + } + return composition; + } + bool next() + { + // This ugly contortion is needed because the original coolmulti + // algorithm requires code duplication of the payload procedure, + // one before the loop and one inside it. + if (trivial || atend) + return false; + cmgen.next_permutation(); + atend = cmgen.finished(); + return true; + } +}; + +/** Helper function to compute the multinomial coefficient n!/(p1!*p2!*...*pk!) + * where n = p1+p2+...+pk, i.e. p is a partition of n. + */ +const numeric +multinomial_coefficient(const std::vector p) +{ + numeric n = 0, d = 1; + std::vector::const_iterator it = p.begin(), itend = p.end(); + while (it != itend) { + n += numeric(*it); + d *= factorial(numeric(*it)); + ++it; + } + return factorial(numeric(n)) / d; +} + +} // anonymous namespace + /** expand a^n where a is an add and n is a positive integer. * @see power::expand */ ex power::expand_add(const add & a, int n, unsigned options) const { + // The special case power(+(x,...y;x),2) can be optimized better. if (n==2) return expand_add_2(a, options); - const size_t m = a.nops(); - exvector result; + // method: + // + // Consider base as the sum of all symbolic terms and the overall numeric + // coefficient and apply the binomial theorem: + // S = power(+(x,...,z;c),n) + // = power(+(+(x,...,z;0);c),n) + // = sum(binomial(n,k)*power(+(x,...,z;0),k)*c^(n-k), k=1..n) + c^n + // Then, apply the multinomial theorem to expand all power(+(x,...,z;0),k): + // The multinomial theorem is computed by an outer loop over all + // partitions of the exponent and an inner loop over all compositions of + // that partition. This method makes the expansion a combinatorial + // problem and allows us to directly construct the expanded sum and also + // to re-use the multinomial coefficients (since they depend only on the + // partition, not on the composition). + // + // multinomial power(+(x,y,z;0),3) example: + // partition : compositions : multinomial coefficient + // [0,0,3] : [3,0,0],[0,3,0],[0,0,3] : 3!/(3!*0!*0!) = 1 + // [0,1,2] : [2,1,0],[1,2,0],[2,0,1],... : 3!/(2!*1!*0!) = 3 + // [1,1,1] : [1,1,1] : 3!/(1!*1!*1!) = 6 + // => (x + y + z)^3 = + // x^3 + y^3 + z^3 + // + 3*x^2*y + 3*x*y^2 + 3*y^2*z + 3*y*z^2 + 3*x*z^2 + 3*x^2*z + // + 6*x*y*z + // + // multinomial power(+(x,y,z;0),4) example: + // partition : compositions : multinomial coefficient + // [0,0,4] : [4,0,0],[0,4,0],[0,0,4] : 4!/(4!*0!*0!) = 1 + // [0,1,3] : [3,1,0],[1,3,0],[3,0,1],... : 4!/(3!*1!*0!) = 4 + // [0,2,2] : [2,2,0],[2,0,2],[0,2,2] : 4!/(2!*2!*0!) = 6 + // [1,1,2] : [2,1,1],[1,2,1],[1,1,2] : 4!/(2!*1!*1!) = 12 + // (no [1,1,1,1] partition since it has too many parts) + // => (x + y + z)^4 = + // x^4 + y^4 + z^4 + // + 4*x^3*y + 4*x*y^3 + 4*y^3*z + 4*y*z^3 + 4*x*z^3 + 4*x^3*z + // + 6*x^2*y^2 + 6*y^2*z^2 + 6*x^2*z^2 + // + 12*x^2*y*z + 12*x*y^2*z + 12*x*y*z^2 + // + // Summary: + // r = 0 + // for k from 0 to n: + // f = c^(n-k)*binomial(n,k) + // for p in all partitions of n with m parts (including zero parts): + // h = f * multinomial coefficient of p + // for c in all compositions of p: + // t = 1 + // for e in all elements of c: + // t = t * a[e]^e + // r = r + h*t + // return r + + epvector result; // The number of terms will be the number of combinatorial compositions, // i.e. the number of unordered arrangements of m nonnegative integers // which sum up to n. It is frequently written as C_n(m) and directly - // related with binomial coefficients: - result.reserve(binomial(numeric(n+m-1), numeric(m-1)).to_int()); - intvector k(m-1); - intvector k_cum(m-1); // k_cum[l]:=sum(i=0,l,k[l]); - intvector upper_limit(m-1); - - for (size_t l=0; l(b)); - GINAC_ASSERT(!is_exactly_a(b) || - !is_exactly_a(ex_to(b).exponent) || - !ex_to(ex_to(b).exponent).is_pos_integer() || - !is_exactly_a(ex_to(b).basis) || - !is_exactly_a(ex_to(b).basis) || - !is_exactly_a(ex_to(b).basis)); - if (is_exactly_a(b)) - term.push_back(expand_mul(ex_to(b), numeric(k[l]), options, true)); - else - term.push_back(power(b,k[l])); - } - - const ex & b = a.op(m - 1); - GINAC_ASSERT(!is_exactly_a(b)); - GINAC_ASSERT(!is_exactly_a(b) || - !is_exactly_a(ex_to(b).exponent) || - !ex_to(ex_to(b).exponent).is_pos_integer() || - !is_exactly_a(ex_to(b).basis) || - !is_exactly_a(ex_to(b).basis) || - !is_exactly_a(ex_to(b).basis)); - if (is_exactly_a(b)) - term.push_back(expand_mul(ex_to(b), numeric(n-k_cum[m-2]), options, true)); - else - term.push_back(power(b,n-k_cum[m-2])); - - numeric f = binomial(numeric(n),numeric(k[0])); - for (std::size_t l = 1; l < m - 1; ++l) - f *= binomial(numeric(n-k_cum[l-1]),numeric(k[l])); - - term.push_back(f); - - result.push_back(ex((new mul(term))->setflag(status_flags::dynallocated)).expand(options)); - - // increment k[] - bool done = false; - std::size_t l = m - 2; - while ((++k[l]) > upper_limit[l]) { - k[l] = 0; - if (l != 0) - --l; - else { - done = true; - break; + result.reserve(result_size); + + // Iterate over all terms in binomial expansion of + // S = power(+(x,...,z;c),n) + // = sum(binomial(n,k)*power(+(x,...,z;0),k)*c^(n-k), k=1..n) + c^n + for (int k = 1; k <= n; ++k) { + numeric binomial_coefficient; // binomial(n,k)*c^(n-k) + if (a.overall_coeff.is_zero()) { + // degenerate case with zero overall_coeff: + // apply multinomial theorem directly to power(+(x,...z;0),n) + binomial_coefficient = 1; + if (k < n) { + continue; } + } else { + binomial_coefficient = binomial(numeric(n), numeric(k)) * pow(ex_to(a.overall_coeff), numeric(n-k)); } - if (done) - break; - // recalc k_cum[] and upper_limit[] - k_cum[l] = (l==0 ? k[0] : k_cum[l-1]+k[l]); + // Multinomial expansion of power(+(x,...,z;0),k)*c^(n-k): + // Iterate over all partitions of k with exactly as many parts as + // there are symbolic terms in the basis (including zero parts). + partition_generator partitions(k, a.seq.size()); + do { + const std::vector& partition = partitions.current(); + const numeric coeff = multinomial_coefficient(partition) * binomial_coefficient; + + // Iterate over all compositions of the current partition. + composition_generator compositions(partition); + do { + const std::vector& exponent = compositions.current(); + exvector term; + term.reserve(n); + numeric factor = coeff; + for (unsigned i = 0; i < exponent.size(); ++i) { + const ex & r = a.seq[i].rest; + const ex & c = a.seq[i].coeff; + GINAC_ASSERT(!is_exactly_a(r)); + GINAC_ASSERT(!is_exactly_a(r) || + !is_exactly_a(ex_to(r).exponent) || + !ex_to(ex_to(r).exponent).is_pos_integer() || + !is_exactly_a(ex_to(r).basis) || + !is_exactly_a(ex_to(r).basis) || + !is_exactly_a(ex_to(r).basis)); + if (exponent[i] == 0) { + // optimize away + } else if (exponent[i] == 1) { + // optimized + term.push_back(r); + factor = factor.mul(ex_to(c)); + } else { // general case exponent[i] > 1 + term.push_back((new power(r, exponent[i]))->setflag(status_flags::dynallocated)); + factor = factor.mul(ex_to(c).power(exponent[i])); + } + } + result.push_back(a.combine_ex_with_coeff_to_pair(mul(term).expand(options), factor)); + } while (compositions.next()); + } while (partitions.next()); + } - for (size_t i=l+1; isetflag(status_flags::dynallocated | + status_flags::expanded); + } else { + return (new add(result, ex_to(a.overall_coeff).power(n)))->setflag(status_flags::dynallocated | + status_flags::expanded); } - - return (new add(result))->setflag(status_flags::dynallocated | - status_flags::expanded); }