Speed up pow(+(...),n).expand() where n>2.
authorRichard Kreckel <kreckel@ginac.de>
Sat, 18 Jul 2015 22:04:49 +0000 (00:04 +0200)
committerRichard Kreckel <kreckel@ginac.de>
Sat, 18 Jul 2015 22:12:01 +0000 (00:12 +0200)
This new algorithm is faster than the old one because it re-uses
multinomial coefficients as much as possible.

check/timer.cpp
ginac/add.cpp
ginac/power.cpp

index e9e379b..98a2fe4 100644 (file)
@@ -82,7 +82,6 @@ void timer::reset()
 
 double timer::read()
 {
-       double elapsed;
 #ifdef HAVE_RUSAGE
        if (running())
                getrusage(RUSAGE_SELF, &used2);
index b62ef91..1b487d8 100644 (file)
@@ -575,7 +575,7 @@ expair add::split_ex_to_pair(const ex & e) const
 }
 
 expair add::combine_ex_with_coeff_to_pair(const ex & e,
-                                                                                 const ex & c) const
+                                          const ex & c) const
 {
        GINAC_ASSERT(is_exactly_a<numeric>(c));
        if (is_exactly_a<mul>(e)) {
@@ -601,7 +601,7 @@ expair add::combine_ex_with_coeff_to_pair(const ex & e,
 }
 
 expair add::combine_pair_with_coeff_to_pair(const expair & p,
-                                                                                       const ex & c) const
+                                            const ex & c) const
 {
        GINAC_ASSERT(is_exactly_a<numeric>(p.coeff));
        GINAC_ASSERT(is_exactly_a<numeric>(c));
index 6fa619c..c3fe14a 100644 (file)
@@ -53,8 +53,6 @@ GINAC_IMPLEMENT_REGISTERED_CLASS_OPT(power, basic,
   print_func<print_python_repr>(&power::do_print_python_repr).
   print_func<print_csrc_cl_N>(&power::do_print_csrc_cl_N))
 
-typedef std::vector<int> intvector;
-
 //////////
 // default constructor
 //////////
@@ -749,8 +747,6 @@ ex power::imag_part() const
 
 // protected
 
-// protected
-
 /** Implementation of ex::diff() for a power.
  *  @see ex::diff */
 ex power::derivative(const symbol & s) const
@@ -914,96 +910,326 @@ ex power::expand(unsigned options) const
 // non-virtual functions in this class
 //////////
 
+namespace {  // anonymous namespace for power::expand_add() helpers
+
+/** Helper class to generate all bounded combinatorial partitions of an integer
+ *  n with exactly m parts (including zero parts) in non-decreaing order.
+ */
+class partition_generator {
+private:
+       // Partitions n into m parts, not including zero parts.
+       // (Cf. OEIS sequence A008284; implementation adapted from Jörg Arndt's
+       // FXT library)
+       struct mpartition2
+       {
+               // partition: x[1] + x[2] + ... + x[m] = n and sentinel x[0] == 0
+               std::vector<int> x;
+               int n;   // n>0
+               int m;   // 0<m<=n
+               mpartition2(unsigned n_, unsigned m_)
+                 : x(m_+1), n(n_), m(m_)
+               {
+                       for (int k=1; k<m; ++k)
+                               x[k] = 1;
+                       x[m] = n - m + 1;
+               }
+               bool next_partition()
+               {
+                       int u = x[m];  // last element
+                       int k = m;
+                       int s = u;
+                       while (--k) {
+                               s += x[k];
+                               if (x[k] + 2 <= u)
+                                       break;
+                       }
+                       if (k==0)
+                               return false;  // current is last
+                       int f = x[k] + 1;
+                       while (k < m) {
+                               x[k] = f;
+                               s -= f;
+                               ++k;
+                       }
+                       x[m] = s;
+                       return true;
+               }
+       } mpgen;
+       int m;  // number of parts 0<m<=n
+       mutable std::vector<int> partition;  // current partition
+public:
+       partition_generator(unsigned n_, unsigned m_)
+         : mpgen(n_, 1), m(m_), partition(m_)
+       { }
+       // returns current partition in non-decreasing order, padded with zeros
+       const std::vector<int>& current() const
+       {
+               for (int i = 0; i < m - mpgen.m; ++i)
+                       partition[i] = 0;  // pad with zeros
+
+               for (int i = m - mpgen.m; i < m; ++i)
+                       partition[i] = mpgen.x[i - m + mpgen.m + 1];
+
+               return partition;
+       }
+       bool next()
+       {
+               if (!mpgen.next_partition()) {
+                       if (mpgen.m == m || mpgen.m == mpgen.n)
+                               return false;  // current is last
+                       // increment number of parts
+                       mpgen = mpartition2(mpgen.n, mpgen.m + 1);
+               }
+               return true;
+       }
+};
+
+/** Helper class to generate all compositions of a partition of an integer n,
+ *  starting with the compositions which has non-decreasing order.
+ */
+class composition_generator {
+private:
+       // Generates all distinct permutations of a multiset.
+       // (Based on Aaron Williams' algorithm 1 from "Loopless Generation of
+       // Multiset Permutations using a Constant Number of Variables by Prefix
+       // Shifts." <http://webhome.csc.uvic.ca/~haron/CoolMulti.pdf>)
+       struct coolmulti {
+               // element of singly linked list
+               struct element {
+                       int value;
+                       element* next;
+                       element(int val, element* n)
+                         : value(val), next(n) {}
+                       ~element()
+                       {   // recurses down to the end of the singly linked list
+                               delete next;
+                       }
+               };
+               element *head, *i, *after_i;
+               // NB: Partition must be sorted in non-decreasing order.
+               explicit coolmulti(const std::vector<int>& partition)
+               {
+                       head = NULL;
+                       for (unsigned n = 0; n < partition.size(); ++n) {
+                               head = new element(partition[n], head);
+                               if (n <= 1)
+                                       i = head;
+                       }
+                       after_i = i->next;
+               }
+               ~coolmulti()
+               {   // deletes singly linked list
+                       delete head;
+               }
+               void next_permutation()
+               {
+                       element *before_k;
+                       if (after_i->next != NULL && i->value >= after_i->next->value)
+                               before_k = after_i;
+                       else
+                               before_k = i;
+                       element *k = before_k->next;
+                       before_k->next = k->next;
+                       k->next = head;
+                       if (k->value < head->value)
+                               i = k;
+                       after_i = i->next;
+                       head = k;
+               }
+               bool finished() const
+               {
+                       return after_i->next == NULL && after_i->value >= head->value;
+               }
+       } cmgen;
+       bool atend;  // needed for simplifying iteration over permutations
+       bool trivial;  // likewise, true if all elements are equal
+       mutable std::vector<int> composition;  // current compositions
+public:
+       explicit composition_generator(const std::vector<int>& partition)
+         : cmgen(partition), atend(false), trivial(true), composition(partition.size())
+       {
+               for (unsigned i=1; i<partition.size(); ++i)
+                       trivial = trivial && (partition[0] == partition[i]);
+       }
+       const std::vector<int>& current() const
+       {
+               coolmulti::element* it = cmgen.head;
+               size_t i = 0;
+               while (it != NULL) {
+                       composition[i] = it->value;
+                       it = it->next;
+                       ++i;
+               }
+               return composition;
+       }
+       bool next()
+       {
+               // This ugly contortion is needed because the original coolmulti
+               // algorithm requires code duplication of the payload procedure,
+               // one before the loop and one inside it.
+               if (trivial || atend)
+                       return false;
+               cmgen.next_permutation();
+               atend = cmgen.finished();
+               return true;
+       }
+};
+
+/** Helper function to compute the multinomial coefficient n!/(p1!*p2!*...*pk!)
+ *  where n = p1+p2+...+pk, i.e. p is a partition of n.
+ */
+const numeric
+multinomial_coefficient(const std::vector<int> p)
+{
+       numeric n = 0, d = 1;
+       std::vector<int>::const_iterator it = p.begin(), itend = p.end();
+       while (it != itend) {
+               n += numeric(*it);
+               d *= factorial(numeric(*it));
+               ++it;
+       }
+       return factorial(numeric(n)) / d;
+}
+
+}  // anonymous namespace
+
 /** expand a^n where a is an add and n is a positive integer.
  *  @see power::expand */
 ex power::expand_add(const add & a, int n, unsigned options) const
 {
+       // The special case power(+(x,...y;x),2) can be optimized better.
        if (n==2)
                return expand_add_2(a, options);
 
-       const size_t m = a.nops();
-       exvector result;
+       // method:
+       //
+       // Consider base as the sum of all symbolic terms and the overall numeric
+       // coefficient and apply the binomial theorem:
+       // S = power(+(x,...,z;c),n)
+       //   = power(+(+(x,...,z;0);c),n)
+       //   = sum(binomial(n,k)*power(+(x,...,z;0),k)*c^(n-k), k=1..n) + c^n
+       // Then, apply the multinomial theorem to expand all power(+(x,...,z;0),k):
+       // The multinomial theorem is computed by an outer loop over all
+       // partitions of the exponent and an inner loop over all compositions of
+       // that partition. This method makes the expansion a combinatorial
+       // problem and allows us to directly construct the expanded sum and also
+       // to re-use the multinomial coefficients (since they depend only on the
+       // partition, not on the composition).
+       // 
+       // multinomial power(+(x,y,z;0),3) example:
+       // partition : compositions                : multinomial coefficient
+       // [0,0,3]   : [3,0,0],[0,3,0],[0,0,3]     : 3!/(3!*0!*0!) = 1
+       // [0,1,2]   : [2,1,0],[1,2,0],[2,0,1],... : 3!/(2!*1!*0!) = 3
+       // [1,1,1]   : [1,1,1]                     : 3!/(1!*1!*1!) = 6
+       //  =>  (x + y + z)^3 =
+       //        x^3 + y^3 + z^3
+       //      + 3*x^2*y + 3*x*y^2 + 3*y^2*z + 3*y*z^2 + 3*x*z^2 + 3*x^2*z
+       //      + 6*x*y*z
+       //
+       // multinomial power(+(x,y,z;0),4) example:
+       // partition : compositions                : multinomial coefficient
+       // [0,0,4]   : [4,0,0],[0,4,0],[0,0,4]     : 4!/(4!*0!*0!) = 1
+       // [0,1,3]   : [3,1,0],[1,3,0],[3,0,1],... : 4!/(3!*1!*0!) = 4
+       // [0,2,2]   : [2,2,0],[2,0,2],[0,2,2]     : 4!/(2!*2!*0!) = 6
+       // [1,1,2]   : [2,1,1],[1,2,1],[1,1,2]     : 4!/(2!*1!*1!) = 12
+       // (no [1,1,1,1] partition since it has too many parts)
+       //  =>  (x + y + z)^4 =
+       //        x^4 + y^4 + z^4
+       //      + 4*x^3*y + 4*x*y^3 + 4*y^3*z + 4*y*z^3 + 4*x*z^3 + 4*x^3*z
+       //      + 6*x^2*y^2 + 6*y^2*z^2 + 6*x^2*z^2
+       //      + 12*x^2*y*z + 12*x*y^2*z + 12*x*y*z^2
+       //
+       // Summary:
+       // r = 0
+       // for k from 0 to n:
+       //     f = c^(n-k)*binomial(n,k)
+       //     for p in all partitions of n with m parts (including zero parts):
+       //         h = f * multinomial coefficient of p
+       //         for c in all compositions of p:
+       //             t = 1
+       //             for e in all elements of c:
+       //                 t = t * a[e]^e
+       //             r = r + h*t
+       // return r
+
+       epvector result;
        // The number of terms will be the number of combinatorial compositions,
        // i.e. the number of unordered arrangements of m nonnegative integers
        // which sum up to n.  It is frequently written as C_n(m) and directly
-       // related with binomial coefficients:
-       result.reserve(binomial(numeric(n+m-1), numeric(m-1)).to_int());
-       intvector k(m-1);
-       intvector k_cum(m-1); // k_cum[l]:=sum(i=0,l,k[l]);
-       intvector upper_limit(m-1);
-
-       for (size_t l=0; l<m-1; ++l) {
-               k[l] = 0;
-               k_cum[l] = 0;
-               upper_limit[l] = n;
+       // related with binomial coefficients: binomial(n+m-1,m-1).
+       size_t result_size = binomial(numeric(n+a.nops()-1), numeric(a.nops()-1)).to_int();
+       if (!a.overall_coeff.is_zero()) {
+               // the result's overall_coeff is one of the terms
+               --result_size;
        }
-
-       while (true) {
-               exvector term;
-               term.reserve(m+1);
-               for (std::size_t l = 0; l < m - 1; ++l) {
-                       const ex & b = a.op(l);
-                       GINAC_ASSERT(!is_exactly_a<add>(b));
-                       GINAC_ASSERT(!is_exactly_a<power>(b) ||
-                                    !is_exactly_a<numeric>(ex_to<power>(b).exponent) ||
-                                    !ex_to<numeric>(ex_to<power>(b).exponent).is_pos_integer() ||
-                                    !is_exactly_a<add>(ex_to<power>(b).basis) ||
-                                    !is_exactly_a<mul>(ex_to<power>(b).basis) ||
-                                    !is_exactly_a<power>(ex_to<power>(b).basis));
-                       if (is_exactly_a<mul>(b))
-                               term.push_back(expand_mul(ex_to<mul>(b), numeric(k[l]), options, true));
-                       else
-                               term.push_back(power(b,k[l]));
-               }
-
-               const ex & b = a.op(m - 1);
-               GINAC_ASSERT(!is_exactly_a<add>(b));
-               GINAC_ASSERT(!is_exactly_a<power>(b) ||
-                            !is_exactly_a<numeric>(ex_to<power>(b).exponent) ||
-                            !ex_to<numeric>(ex_to<power>(b).exponent).is_pos_integer() ||
-                            !is_exactly_a<add>(ex_to<power>(b).basis) ||
-                            !is_exactly_a<mul>(ex_to<power>(b).basis) ||
-                            !is_exactly_a<power>(ex_to<power>(b).basis));
-               if (is_exactly_a<mul>(b))
-                       term.push_back(expand_mul(ex_to<mul>(b), numeric(n-k_cum[m-2]), options, true));
-               else
-                       term.push_back(power(b,n-k_cum[m-2]));
-
-               numeric f = binomial(numeric(n),numeric(k[0]));
-               for (std::size_t l = 1; l < m - 1; ++l)
-                       f *= binomial(numeric(n-k_cum[l-1]),numeric(k[l]));
-
-               term.push_back(f);
-
-               result.push_back(ex((new mul(term))->setflag(status_flags::dynallocated)).expand(options));
-
-               // increment k[]
-               bool done = false;
-               std::size_t l = m - 2;
-               while ((++k[l]) > upper_limit[l]) {
-                       k[l] = 0;
-                       if (l != 0)
-                               --l;
-                       else {
-                               done = true;
-                               break;
+       result.reserve(result_size);
+
+       // Iterate over all terms in binomial expansion of
+       // S = power(+(x,...,z;c),n)
+       //   = sum(binomial(n,k)*power(+(x,...,z;0),k)*c^(n-k), k=1..n) + c^n
+       for (int k = 1; k <= n; ++k) {
+               numeric binomial_coefficient;  // binomial(n,k)*c^(n-k)
+               if (a.overall_coeff.is_zero()) {
+                       // degenerate case with zero overall_coeff:
+                       // apply multinomial theorem directly to power(+(x,...z;0),n)
+                       binomial_coefficient = 1;
+                       if (k < n) {
+                               continue;
                        }
+               } else {
+                       binomial_coefficient = binomial(numeric(n), numeric(k)) * pow(ex_to<numeric>(a.overall_coeff), numeric(n-k));
                }
-               if (done)
-                       break;
 
-               // recalc k_cum[] and upper_limit[]
-               k_cum[l] = (l==0 ? k[0] : k_cum[l-1]+k[l]);
+               // Multinomial expansion of power(+(x,...,z;0),k)*c^(n-k):
+               // Iterate over all partitions of k with exactly as many parts as
+               // there are symbolic terms in the basis (including zero parts).
+               partition_generator partitions(k, a.seq.size());
+               do {
+                       const std::vector<int>& partition = partitions.current();
+                       const numeric coeff = multinomial_coefficient(partition) * binomial_coefficient;
+
+                       // Iterate over all compositions of the current partition.
+                       composition_generator compositions(partition);
+                       do {
+                               const std::vector<int>& exponent = compositions.current();
+                               exvector term;
+                               term.reserve(n);
+                               numeric factor = coeff;
+                               for (unsigned i = 0; i < exponent.size(); ++i) {
+                                       const ex & r = a.seq[i].rest;
+                                       const ex & c = a.seq[i].coeff;
+                                       GINAC_ASSERT(!is_exactly_a<add>(r));
+                                       GINAC_ASSERT(!is_exactly_a<power>(r) ||
+                                                    !is_exactly_a<numeric>(ex_to<power>(r).exponent) ||
+                                                    !ex_to<numeric>(ex_to<power>(r).exponent).is_pos_integer() ||
+                                                    !is_exactly_a<add>(ex_to<power>(r).basis) ||
+                                                    !is_exactly_a<mul>(ex_to<power>(r).basis) ||
+                                                    !is_exactly_a<power>(ex_to<power>(r).basis));
+                                       if (exponent[i] == 0) {
+                                               // optimize away
+                                       } else if (exponent[i] == 1) {
+                                               // optimized
+                                               term.push_back(r);
+                                               factor = factor.mul(ex_to<numeric>(c));
+                                       } else { // general case exponent[i] > 1
+                                               term.push_back((new power(r, exponent[i]))->setflag(status_flags::dynallocated));
+                                               factor = factor.mul(ex_to<numeric>(c).power(exponent[i]));
+                                       }
+                               }
+                               result.push_back(a.combine_ex_with_coeff_to_pair(mul(term).expand(options), factor));
+                       } while (compositions.next());
+               } while (partitions.next());
+       }
 
-               for (size_t i=l+1; i<m-1; ++i)
-                       k_cum[i] = k_cum[i-1]+k[i];
+       GINAC_ASSERT(result.size() == result_size);
 
-               for (size_t i=l+1; i<m-1; ++i)
-                       upper_limit[i] = n-k_cum[i-1];
+       if (a.overall_coeff.is_zero()) {
+               return (new add(result))->setflag(status_flags::dynallocated |
+                                                 status_flags::expanded);
+       } else {
+               return (new add(result, ex_to<numeric>(a.overall_coeff).power(n)))->setflag(status_flags::dynallocated |
+                                                                                           status_flags::expanded);
        }
-
-       return (new add(result))->setflag(status_flags::dynallocated |
-                                         status_flags::expanded);
 }