From: Richard Kreckel <kreckel@ginac.de>
Date: Sat, 18 Jul 2015 22:04:49 +0000 (+0200)
Subject: Speed up pow(+(...),n).expand() where n>2.
X-Git-Tag: ginac_1-6-5~2
X-Git-Url: https://www.ginac.de/ginac.git//ginac.git?p=ginac.git;a=commitdiff_plain;h=d56a0f74afa5380a1730599c3b1b21f34be2f061

Speed up pow(+(...),n).expand() where n>2.

This new algorithm is faster than the old one because it re-uses
multinomial coefficients as much as possible.
---

diff --git a/check/timer.cpp b/check/timer.cpp
index e9e379b0..98a2fe45 100644
--- a/check/timer.cpp
+++ b/check/timer.cpp
@@ -82,7 +82,6 @@ void timer::reset()
 
 double timer::read()
 {
-	double elapsed;
 #ifdef HAVE_RUSAGE
 	if (running())
 		getrusage(RUSAGE_SELF, &used2);
diff --git a/ginac/add.cpp b/ginac/add.cpp
index b62ef917..1b487d80 100644
--- a/ginac/add.cpp
+++ b/ginac/add.cpp
@@ -575,7 +575,7 @@ expair add::split_ex_to_pair(const ex & e) const
 }
 
 expair add::combine_ex_with_coeff_to_pair(const ex & e,
-										  const ex & c) const
+                                          const ex & c) const
 {
 	GINAC_ASSERT(is_exactly_a<numeric>(c));
 	if (is_exactly_a<mul>(e)) {
@@ -601,7 +601,7 @@ expair add::combine_ex_with_coeff_to_pair(const ex & e,
 }
 
 expair add::combine_pair_with_coeff_to_pair(const expair & p,
-											const ex & c) const
+                                            const ex & c) const
 {
 	GINAC_ASSERT(is_exactly_a<numeric>(p.coeff));
 	GINAC_ASSERT(is_exactly_a<numeric>(c));
diff --git a/ginac/power.cpp b/ginac/power.cpp
index 6fa619ca..c3fe14a1 100644
--- a/ginac/power.cpp
+++ b/ginac/power.cpp
@@ -53,8 +53,6 @@ GINAC_IMPLEMENT_REGISTERED_CLASS_OPT(power, basic,
   print_func<print_python_repr>(&power::do_print_python_repr).
   print_func<print_csrc_cl_N>(&power::do_print_csrc_cl_N))
 
-typedef std::vector<int> intvector;
-
 //////////
 // default constructor
 //////////
@@ -749,8 +747,6 @@ ex power::imag_part() const
 
 // protected
 
-// protected
-
 /** Implementation of ex::diff() for a power.
  *  @see ex::diff */
 ex power::derivative(const symbol & s) const
@@ -914,96 +910,326 @@ ex power::expand(unsigned options) const
 // non-virtual functions in this class
 //////////
 
+namespace {  // anonymous namespace for power::expand_add() helpers
+
+/** Helper class to generate all bounded combinatorial partitions of an integer
+ *  n with exactly m parts (including zero parts) in non-decreaing order.
+ */
+class partition_generator {
+private:
+	// Partitions n into m parts, not including zero parts.
+	// (Cf. OEIS sequence A008284; implementation adapted from JÃ¶rg Arndt's
+	// FXT library)
+	struct mpartition2
+	{
+		// partition: x[1] + x[2] + ... + x[m] = n and sentinel x[0] == 0
+		std::vector<int> x;
+		int n;   // n>0
+		int m;   // 0<m<=n
+		mpartition2(unsigned n_, unsigned m_)
+		  : x(m_+1), n(n_), m(m_)
+		{
+			for (int k=1; k<m; ++k)
+				x[k] = 1;
+			x[m] = n - m + 1;
+		}
+		bool next_partition()
+		{
+			int u = x[m];  // last element
+			int k = m;
+			int s = u;
+			while (--k) {
+				s += x[k];
+				if (x[k] + 2 <= u)
+					break;
+			}
+			if (k==0)
+				return false;  // current is last
+			int f = x[k] + 1;
+			while (k < m) {
+				x[k] = f;
+				s -= f;
+				++k;
+			}
+			x[m] = s;
+			return true;
+		}
+	} mpgen;
+	int m;  // number of parts 0<m<=n
+	mutable std::vector<int> partition;  // current partition
+public:
+	partition_generator(unsigned n_, unsigned m_)
+	  : mpgen(n_, 1), m(m_), partition(m_)
+	{ }
+	// returns current partition in non-decreasing order, padded with zeros
+	const std::vector<int>& current() const
+	{
+		for (int i = 0; i < m - mpgen.m; ++i)
+			partition[i] = 0;  // pad with zeros
+
+		for (int i = m - mpgen.m; i < m; ++i)
+			partition[i] = mpgen.x[i - m + mpgen.m + 1];
+
+		return partition;
+	}
+	bool next()
+	{
+		if (!mpgen.next_partition()) {
+			if (mpgen.m == m || mpgen.m == mpgen.n)
+				return false;  // current is last
+			// increment number of parts
+			mpgen = mpartition2(mpgen.n, mpgen.m + 1);
+		}
+		return true;
+	}
+};
+
+/** Helper class to generate all compositions of a partition of an integer n,
+ *  starting with the compositions which has non-decreasing order.
+ */
+class composition_generator {
+private:
+	// Generates all distinct permutations of a multiset.
+	// (Based on Aaron Williams' algorithm 1 from "Loopless Generation of
+	// Multiset Permutations using a Constant Number of Variables by Prefix
+	// Shifts." <http://webhome.csc.uvic.ca/~haron/CoolMulti.pdf>)
+	struct coolmulti {
+		// element of singly linked list
+		struct element {
+			int value;
+			element* next;
+			element(int val, element* n)
+			  : value(val), next(n) {}
+			~element()
+			{   // recurses down to the end of the singly linked list
+				delete next;
+			}
+		};
+		element *head, *i, *after_i;
+		// NB: Partition must be sorted in non-decreasing order.
+		explicit coolmulti(const std::vector<int>& partition)
+		{
+			head = NULL;
+			for (unsigned n = 0; n < partition.size(); ++n) {
+				head = new element(partition[n], head);
+				if (n <= 1)
+					i = head;
+			}
+			after_i = i->next;
+		}
+		~coolmulti()
+		{   // deletes singly linked list
+			delete head;
+		}
+		void next_permutation()
+		{
+			element *before_k;
+			if (after_i->next != NULL && i->value >= after_i->next->value)
+				before_k = after_i;
+			else
+				before_k = i;
+			element *k = before_k->next;
+			before_k->next = k->next;
+			k->next = head;
+			if (k->value < head->value)
+				i = k;
+			after_i = i->next;
+			head = k;
+		}
+		bool finished() const
+		{
+			return after_i->next == NULL && after_i->value >= head->value;
+		}
+	} cmgen;
+	bool atend;  // needed for simplifying iteration over permutations
+	bool trivial;  // likewise, true if all elements are equal
+	mutable std::vector<int> composition;  // current compositions
+public:
+	explicit composition_generator(const std::vector<int>& partition)
+	  : cmgen(partition), atend(false), trivial(true), composition(partition.size())
+	{
+		for (unsigned i=1; i<partition.size(); ++i)
+			trivial = trivial && (partition[0] == partition[i]);
+	}
+	const std::vector<int>& current() const
+	{
+		coolmulti::element* it = cmgen.head;
+		size_t i = 0;
+		while (it != NULL) {
+			composition[i] = it->value;
+			it = it->next;
+			++i;
+		}
+		return composition;
+	}
+	bool next()
+	{
+		// This ugly contortion is needed because the original coolmulti
+		// algorithm requires code duplication of the payload procedure,
+		// one before the loop and one inside it.
+		if (trivial || atend)
+			return false;
+		cmgen.next_permutation();
+		atend = cmgen.finished();
+		return true;
+	}
+};
+
+/** Helper function to compute the multinomial coefficient n!/(p1!*p2!*...*pk!)
+ *  where n = p1+p2+...+pk, i.e. p is a partition of n.
+ */
+const numeric
+multinomial_coefficient(const std::vector<int> p)
+{
+	numeric n = 0, d = 1;
+	std::vector<int>::const_iterator it = p.begin(), itend = p.end();
+	while (it != itend) {
+		n += numeric(*it);
+		d *= factorial(numeric(*it));
+		++it;
+	}
+	return factorial(numeric(n)) / d;
+}
+
+}  // anonymous namespace
+
 /** expand a^n where a is an add and n is a positive integer.
  *  @see power::expand */
 ex power::expand_add(const add & a, int n, unsigned options) const
 {
+	// The special case power(+(x,...y;x),2) can be optimized better.
 	if (n==2)
 		return expand_add_2(a, options);
 
-	const size_t m = a.nops();
-	exvector result;
+	// method:
+	//
+	// Consider base as the sum of all symbolic terms and the overall numeric
+	// coefficient and apply the binomial theorem:
+	// S = power(+(x,...,z;c),n)
+	//   = power(+(+(x,...,z;0);c),n)
+	//   = sum(binomial(n,k)*power(+(x,...,z;0),k)*c^(n-k), k=1..n) + c^n
+	// Then, apply the multinomial theorem to expand all power(+(x,...,z;0),k):
+	// The multinomial theorem is computed by an outer loop over all
+	// partitions of the exponent and an inner loop over all compositions of
+	// that partition. This method makes the expansion a combinatorial
+	// problem and allows us to directly construct the expanded sum and also
+	// to re-use the multinomial coefficients (since they depend only on the
+	// partition, not on the composition).
+	// 
+	// multinomial power(+(x,y,z;0),3) example:
+	// partition : compositions                : multinomial coefficient
+	// [0,0,3]   : [3,0,0],[0,3,0],[0,0,3]     : 3!/(3!*0!*0!) = 1
+	// [0,1,2]   : [2,1,0],[1,2,0],[2,0,1],... : 3!/(2!*1!*0!) = 3
+	// [1,1,1]   : [1,1,1]                     : 3!/(1!*1!*1!) = 6
+	//  =>  (x + y + z)^3 =
+	//        x^3 + y^3 + z^3
+	//      + 3*x^2*y + 3*x*y^2 + 3*y^2*z + 3*y*z^2 + 3*x*z^2 + 3*x^2*z
+	//      + 6*x*y*z
+	//
+	// multinomial power(+(x,y,z;0),4) example:
+	// partition : compositions                : multinomial coefficient
+	// [0,0,4]   : [4,0,0],[0,4,0],[0,0,4]     : 4!/(4!*0!*0!) = 1
+	// [0,1,3]   : [3,1,0],[1,3,0],[3,0,1],... : 4!/(3!*1!*0!) = 4
+	// [0,2,2]   : [2,2,0],[2,0,2],[0,2,2]     : 4!/(2!*2!*0!) = 6
+	// [1,1,2]   : [2,1,1],[1,2,1],[1,1,2]     : 4!/(2!*1!*1!) = 12
+	// (no [1,1,1,1] partition since it has too many parts)
+	//  =>  (x + y + z)^4 =
+	//        x^4 + y^4 + z^4
+	//      + 4*x^3*y + 4*x*y^3 + 4*y^3*z + 4*y*z^3 + 4*x*z^3 + 4*x^3*z
+	//      + 6*x^2*y^2 + 6*y^2*z^2 + 6*x^2*z^2
+	//      + 12*x^2*y*z + 12*x*y^2*z + 12*x*y*z^2
+	//
+	// Summary:
+	// r = 0
+	// for k from 0 to n:
+	//     f = c^(n-k)*binomial(n,k)
+	//     for p in all partitions of n with m parts (including zero parts):
+	//         h = f * multinomial coefficient of p
+	//         for c in all compositions of p:
+	//             t = 1
+	//             for e in all elements of c:
+	//                 t = t * a[e]^e
+	//             r = r + h*t
+	// return r
+
+	epvector result;
 	// The number of terms will be the number of combinatorial compositions,
 	// i.e. the number of unordered arrangements of m nonnegative integers
 	// which sum up to n.  It is frequently written as C_n(m) and directly
-	// related with binomial coefficients:
-	result.reserve(binomial(numeric(n+m-1), numeric(m-1)).to_int());
-	intvector k(m-1);
-	intvector k_cum(m-1); // k_cum[l]:=sum(i=0,l,k[l]);
-	intvector upper_limit(m-1);
-
-	for (size_t l=0; l<m-1; ++l) {
-		k[l] = 0;
-		k_cum[l] = 0;
-		upper_limit[l] = n;
+	// related with binomial coefficients: binomial(n+m-1,m-1).
+	size_t result_size = binomial(numeric(n+a.nops()-1), numeric(a.nops()-1)).to_int();
+	if (!a.overall_coeff.is_zero()) {
+		// the result's overall_coeff is one of the terms
+		--result_size;
 	}
-
-	while (true) {
-		exvector term;
-		term.reserve(m+1);
-		for (std::size_t l = 0; l < m - 1; ++l) {
-			const ex & b = a.op(l);
-			GINAC_ASSERT(!is_exactly_a<add>(b));
-			GINAC_ASSERT(!is_exactly_a<power>(b) ||
-			             !is_exactly_a<numeric>(ex_to<power>(b).exponent) ||
-			             !ex_to<numeric>(ex_to<power>(b).exponent).is_pos_integer() ||
-			             !is_exactly_a<add>(ex_to<power>(b).basis) ||
-			             !is_exactly_a<mul>(ex_to<power>(b).basis) ||
-			             !is_exactly_a<power>(ex_to<power>(b).basis));
-			if (is_exactly_a<mul>(b))
-				term.push_back(expand_mul(ex_to<mul>(b), numeric(k[l]), options, true));
-			else
-				term.push_back(power(b,k[l]));
-		}
-
-		const ex & b = a.op(m - 1);
-		GINAC_ASSERT(!is_exactly_a<add>(b));
-		GINAC_ASSERT(!is_exactly_a<power>(b) ||
-		             !is_exactly_a<numeric>(ex_to<power>(b).exponent) ||
-		             !ex_to<numeric>(ex_to<power>(b).exponent).is_pos_integer() ||
-		             !is_exactly_a<add>(ex_to<power>(b).basis) ||
-		             !is_exactly_a<mul>(ex_to<power>(b).basis) ||
-		             !is_exactly_a<power>(ex_to<power>(b).basis));
-		if (is_exactly_a<mul>(b))
-			term.push_back(expand_mul(ex_to<mul>(b), numeric(n-k_cum[m-2]), options, true));
-		else
-			term.push_back(power(b,n-k_cum[m-2]));
-
-		numeric f = binomial(numeric(n),numeric(k[0]));
-		for (std::size_t l = 1; l < m - 1; ++l)
-			f *= binomial(numeric(n-k_cum[l-1]),numeric(k[l]));
-
-		term.push_back(f);
-
-		result.push_back(ex((new mul(term))->setflag(status_flags::dynallocated)).expand(options));
-
-		// increment k[]
-		bool done = false;
-		std::size_t l = m - 2;
-		while ((++k[l]) > upper_limit[l]) {
-			k[l] = 0;
-			if (l != 0)
-				--l;
-			else {
-				done = true;
-				break;
+	result.reserve(result_size);
+
+	// Iterate over all terms in binomial expansion of
+	// S = power(+(x,...,z;c),n)
+	//   = sum(binomial(n,k)*power(+(x,...,z;0),k)*c^(n-k), k=1..n) + c^n
+	for (int k = 1; k <= n; ++k) {
+		numeric binomial_coefficient;  // binomial(n,k)*c^(n-k)
+		if (a.overall_coeff.is_zero()) {
+			// degenerate case with zero overall_coeff:
+			// apply multinomial theorem directly to power(+(x,...z;0),n)
+			binomial_coefficient = 1;
+			if (k < n) {
+				continue;
 			}
+		} else {
+			binomial_coefficient = binomial(numeric(n), numeric(k)) * pow(ex_to<numeric>(a.overall_coeff), numeric(n-k));
 		}
-		if (done)
-			break;
 
-		// recalc k_cum[] and upper_limit[]
-		k_cum[l] = (l==0 ? k[0] : k_cum[l-1]+k[l]);
+		// Multinomial expansion of power(+(x,...,z;0),k)*c^(n-k):
+		// Iterate over all partitions of k with exactly as many parts as
+		// there are symbolic terms in the basis (including zero parts).
+		partition_generator partitions(k, a.seq.size());
+		do {
+			const std::vector<int>& partition = partitions.current();
+			const numeric coeff = multinomial_coefficient(partition) * binomial_coefficient;
+
+			// Iterate over all compositions of the current partition.
+			composition_generator compositions(partition);
+			do {
+				const std::vector<int>& exponent = compositions.current();
+				exvector term;
+				term.reserve(n);
+				numeric factor = coeff;
+				for (unsigned i = 0; i < exponent.size(); ++i) {
+					const ex & r = a.seq[i].rest;
+					const ex & c = a.seq[i].coeff;
+					GINAC_ASSERT(!is_exactly_a<add>(r));
+					GINAC_ASSERT(!is_exactly_a<power>(r) ||
+						     !is_exactly_a<numeric>(ex_to<power>(r).exponent) ||
+						     !ex_to<numeric>(ex_to<power>(r).exponent).is_pos_integer() ||
+						     !is_exactly_a<add>(ex_to<power>(r).basis) ||
+						     !is_exactly_a<mul>(ex_to<power>(r).basis) ||
+						     !is_exactly_a<power>(ex_to<power>(r).basis));
+					if (exponent[i] == 0) {
+						// optimize away
+					} else if (exponent[i] == 1) {
+						// optimized
+						term.push_back(r);
+						factor = factor.mul(ex_to<numeric>(c));
+					} else { // general case exponent[i] > 1
+						term.push_back((new power(r, exponent[i]))->setflag(status_flags::dynallocated));
+						factor = factor.mul(ex_to<numeric>(c).power(exponent[i]));
+					}
+				}
+				result.push_back(a.combine_ex_with_coeff_to_pair(mul(term).expand(options), factor));
+			} while (compositions.next());
+		} while (partitions.next());
+	}
 
-		for (size_t i=l+1; i<m-1; ++i)
-			k_cum[i] = k_cum[i-1]+k[i];
+	GINAC_ASSERT(result.size() == result_size);
 
-		for (size_t i=l+1; i<m-1; ++i)
-			upper_limit[i] = n-k_cum[i-1];
+	if (a.overall_coeff.is_zero()) {
+		return (new add(result))->setflag(status_flags::dynallocated |
+		                                  status_flags::expanded);
+	} else {
+		return (new add(result, ex_to<numeric>(a.overall_coeff).power(n)))->setflag(status_flags::dynallocated |
+		                                                                            status_flags::expanded);
 	}
-
-	return (new add(result))->setflag(status_flags::dynallocated |
-	                                  status_flags::expanded);
 }