From e432b3743d3d32f6060600af580e49d7033dcae9 Mon Sep 17 00:00:00 2001
From: Vitaly Magerya <vmagerya@gmail.com>
Date: Thu, 31 May 2018 17:43:56 +0200
Subject: [PATCH] Add Markowitz-ordered Gaussian elimination algorithm.

This algorithm avoids the 'fill-in' problem of Gaussian elimination
and significantly improves the times for solving large sparse systems.
See: <https://www.ginac.de/pipermail/ginac-list/2018-May/002202.html>.
---
 AUTHORS                |   2 +-
 check/check_lsolve.cpp |  55 ++++++++---------
 ginac/flags.h          |   8 ++-
 ginac/inifcns.cpp      |  26 +++++++-
 ginac/matrix.cpp       | 134 +++++++++++++++++++++++++++++++++++++++--
 ginac/matrix.h         |   1 +
 6 files changed, 189 insertions(+), 37 deletions(-)

diff --git a/AUTHORS b/AUTHORS
index d6eaa874..7fb3466b 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -12,7 +12,7 @@ Contributors of patches
 -----------------------
 
 Roberto Bagnara, Do Hoang Son, Markus Nullmeier, Pearu Peterson, Benedikt
-Pluemper, Ben Sapp, Stefan Weinzierl.
+Pluemper, Ben Sapp, Stefan Weinzierl, Vitaly Magerya.
 (Please send email if you think you were forgotten.)
 
 
diff --git a/check/check_lsolve.cpp b/check/check_lsolve.cpp
index 8c4f438b..247776ba 100644
--- a/check/check_lsolve.cpp
+++ b/check/check_lsolve.cpp
@@ -118,7 +118,6 @@ static unsigned check_inifcns_lsolve(unsigned n)
 		}
 		lst eqns;  // equation list
 		lst vars;  // variable list
-		ex sol;	// solution
 		// Create a random linear system...
 		for (unsigned i=0; i<n; ++i) {
 			ex lhs = rand()%201-100;
@@ -131,32 +130,34 @@ static unsigned check_inifcns_lsolve(unsigned n)
 			eqns.append(lhs==rhs);
 			vars.append(x[i]);
 		}
-		// ...solve it...
-		sol = lsolve(eqns, vars);
-		
-		// ...and check the solution:
-		if (sol.nops() == 0) {
-			// no solution was found
-			// is the coefficient matrix really, really, really degenerate?
-			matrix coeffmat(n,n);
-			for (unsigned ro=0; ro<n; ++ro)
-				for (unsigned co=0; co<n; ++co)
-					coeffmat.set(ro,co,eqns.op(co).rhs().coeff(a[co],1));
-			if (!coeffmat.determinant().is_zero()) {
-				++result;
-				clog << "solution of the system " << eqns << " for " << vars
-					 << " was not found" << endl;
-			}
-		} else {
-			// insert the solution into rhs of out equations
-			bool errorflag = false;
-			for (unsigned i=0; i<n; ++i)
-				if (eqns.op(i).rhs().subs(sol) != eqns.op(i).lhs())
-					errorflag = true;
-			if (errorflag) {
-				++result;
-				clog << "solution of the system " << eqns << " for " << vars
-				     << " erroneously returned " << sol << endl;
+		// ...solve it with each algorithm...
+		for (int algo = solve_algo::automatic; algo <= solve_algo::markowitz; algo++) {
+			ex sol = lsolve(eqns, vars, algo);
+			// ...and check the solution:
+			if (sol.nops() == 0) {
+				// no solution was found
+				// is the coefficient matrix really, really, really degenerate?
+				matrix coeffmat(n,n);
+				for (unsigned ro=0; ro<n; ++ro)
+					for (unsigned co=0; co<n; ++co)
+						coeffmat.set(ro,co,eqns.op(co).rhs().coeff(a[co],1));
+				if (!coeffmat.determinant().is_zero()) {
+					++result;
+					clog << "solution of the system " << eqns << " for " << vars
+					     << " was not found using algorithm " << algo << endl;
+				}
+			} else {
+				// insert the solution into rhs of out equations
+				bool errorflag = false;
+				for (unsigned i=0; i<n; ++i)
+					if (eqns.op(i).rhs().subs(sol) != eqns.op(i).lhs())
+						errorflag = true;
+				if (errorflag) {
+					++result;
+					clog << "solution of the system " << eqns << " for " << vars
+					     << " erroneously returned " << sol << " using algorithm "
+					     << algo << endl;
+				}
 			}
 		}
 	}
diff --git a/ginac/flags.h b/ginac/flags.h
index c97229af..cebc3fd4 100644
--- a/ginac/flags.h
+++ b/ginac/flags.h
@@ -182,7 +182,13 @@ public:
 		 *  linear systems.  In contrast to division-free elimination it only
 		 *  has a linear expression swell.  For two-dimensional systems, the
 		 *  two algorithms are equivalent, however. */
-		bareiss
+		bareiss,
+		/** Markowitz-ordered Gaussian elimination. Same as the usual
+		 *  Gaussian elimination, but with additional effort spent on
+		 *  selection pivots that minimize fill-in. Much faster than the
+		 *  methods above for large sparse matrices, marginally slower
+		 *  than Gaussian elimination otherwise. */
+		markowitz
 	};
 };
 
diff --git a/ginac/inifcns.cpp b/ginac/inifcns.cpp
index 4e426f4c..d81b03c7 100644
--- a/ginac/inifcns.cpp
+++ b/ginac/inifcns.cpp
@@ -1038,6 +1038,24 @@ REGISTER_FUNCTION(Order, eval_func(Order_eval).
 // Solve linear system
 //////////
 
+static void insert_symbols(exset &es, const ex &e)
+{
+	if (is_a<symbol>(e)) {
+		es.insert(e);
+	} else {
+		for (const ex &sube : e) {
+			insert_symbols(es, sube);
+		}
+	}
+}
+
+static exset symbolset(const ex &e)
+{
+	exset s;
+	insert_symbols(s, e);
+	return s;
+}
+
 ex lsolve(const ex &eqns, const ex &symbols, unsigned options)
 {
 	// solve a system of linear equations
@@ -1077,8 +1095,10 @@ ex lsolve(const ex &eqns, const ex &symbols, unsigned options)
 	
 	for (size_t r=0; r<eqns.nops(); r++) {
 		const ex eq = eqns.op(r).op(0)-eqns.op(r).op(1); // lhs-rhs==0
+		const exset syms = symbolset(eq);
 		ex linpart = eq;
 		for (size_t c=0; c<symbols.nops(); c++) {
+			if (syms.count(symbols.op(c)) == 0) continue;
 			const ex co = eq.coeff(ex_to<symbol>(symbols.op(c)),1);
 			linpart -= co*symbols.op(c);
 			sys(r,c) = co;
@@ -1088,11 +1108,13 @@ ex lsolve(const ex &eqns, const ex &symbols, unsigned options)
 	}
 	
 	// test if system is linear and fill vars matrix
+	const exset sys_syms = symbolset(sys);
+	const exset rhs_syms = symbolset(rhs);
 	for (size_t i=0; i<symbols.nops(); i++) {
 		vars(i,0) = symbols.op(i);
-		if (sys.has(symbols.op(i)))
+		if (sys_syms.count(symbols.op(i)) != 0)
 			throw(std::logic_error("lsolve: system is not linear"));
-		if (rhs.has(symbols.op(i)))
+		if (rhs_syms.count(symbols.op(i)) != 0)
 			throw(std::logic_error("lsolve: system is not linear"));
 	}
 	
diff --git a/ginac/matrix.cpp b/ginac/matrix.cpp
index 5b982251..7b0b3488 100644
--- a/ginac/matrix.cpp
+++ b/ginac/matrix.cpp
@@ -1003,7 +1003,7 @@ matrix matrix::solve(const matrix & vars,
 	const unsigned p = rhs.cols();
 	
 	// syntax checks    
-	if ((rhs.rows() != m) || (vars.rows() != n) || (vars.col != p))
+	if ((rhs.rows() != m) || (vars.rows() != n) || (vars.cols() != p))
 		throw (std::logic_error("matrix::solve(): incompatible matrices"));
 	for (unsigned ro=0; ro<n; ++ro)
 		for (unsigned co=0; co<p; ++co)
@@ -1042,6 +1042,10 @@ matrix matrix::solve(const matrix & vars,
 	}
 	
 	// Eliminate the augmented matrix:
+	std::vector<unsigned> colid(aug.cols());
+	for (unsigned c = 0; c < aug.cols(); c++) {
+		colid[c] = c;
+	}
 	switch(algo) {
 		case solve_algo::gauss:
 			aug.gauss_elimination();
@@ -1050,8 +1054,13 @@ matrix matrix::solve(const matrix & vars,
 			aug.division_free_elimination();
 			break;
 		case solve_algo::bareiss:
-		default:
 			aug.fraction_free_elimination();
+			break;
+		case solve_algo::markowitz:
+			colid = aug.markowitz_elimination(n);
+			break;
+		default:
+			throw std::invalid_argument("matrix::solve(): 'algo' is not one of the solve_algo enum");
 	}
 	
 	// assemble the solution matrix:
@@ -1071,18 +1080,18 @@ matrix matrix::solve(const matrix & vars,
 				// assign solutions for vars between fnz+1 and
 				// last_assigned_sol-1: free parameters
 				for (unsigned c=fnz; c<last_assigned_sol-1; ++c)
-					sol(c,co) = vars.m[c*p+co];
+					sol(colid[c],co) = vars.m[colid[c]*p+co];
 				ex e = aug.m[r*(n+p)+n+co];
 				for (unsigned c=fnz; c<n; ++c)
-					e -= aug.m[r*(n+p)+c]*sol.m[c*p+co];
-				sol(fnz-1,co) = (e/(aug.m[r*(n+p)+(fnz-1)])).normal();
+					e -= aug.m[r*(n+p)+c]*sol.m[colid[c]*p+co];
+				sol(colid[fnz-1],co) = (e/(aug.m[r*(n+p)+fnz-1])).normal();
 				last_assigned_sol = fnz;
 			}
 		}
 		// assign solutions for vars between 1 and
 		// last_assigned_sol-1: free parameters
 		for (unsigned ro=0; ro<last_assigned_sol-1; ++ro)
-			sol(ro,co) = vars(ro,co);
+			sol(colid[ro],co) = vars(colid[ro],co);
 	}
 	
 	return sol;
@@ -1293,6 +1302,119 @@ int matrix::gauss_elimination(const bool det)
 	return sign;
 }
 
+/* Perform Markowitz-ordered Gaussian elimination (with full
+ * pivoting) on a matrix, constraining the choice of pivots to
+ * the first n columns (this simplifies handling of augmented
+ * matrices). Return the column id vector v, such that v[column]
+ * is the original number of the column before shuffling (v[i]==i
+ * for i >= n). */
+std::vector<unsigned>
+matrix::markowitz_elimination(unsigned n)
+{
+	GINAC_ASSERT(n <= col);
+	std::vector<int> rowcnt(row, 0);
+	std::vector<int> colcnt(col, 0);
+	// Normalize everything before start. We'll keep all the
+	// cells normalized throughout the algorithm to properly
+	// handle unnormal zeros.
+	for (unsigned r = 0; r < row; r++) {
+		for (unsigned c = 0; c < col; c++) {
+			if (!m[r*col + c].is_zero()) {
+				m[r*col + c] = m[r*col + c].normal();
+				rowcnt[r]++;
+				colcnt[c]++;
+			}
+		}
+	}
+	std::vector<unsigned> colid(col);
+	for (unsigned c = 0; c < col; c++) {
+		colid[c] = c;
+	}
+	exvector ab(row);
+	for (unsigned k = 0; (k < col) && (k < row - 1); k++) {
+		// Find the pivot that minimizes (rowcnt[r]-1)*(colcnt[c]-1).
+		unsigned pivot_r = row + 1;
+		unsigned pivot_c = col + 1;
+		int pivot_m = row*col;
+		for (unsigned r = k; r < row; r++) {
+			for (unsigned c = k; c < n; c++) {
+				const ex &mrc = m[r*col + c];
+				if (mrc.is_zero())
+					continue;
+				GINAC_ASSERT(rowcnt[r] > 0);
+				GINAC_ASSERT(colcnt[c] > 0);
+				int measure = (rowcnt[r] - 1)*(colcnt[c] - 1);
+				if (measure < pivot_m) {
+					pivot_m = measure;
+					pivot_r = r;
+					pivot_c = c;
+				}
+			}
+		}
+		if (pivot_m == row*col) {
+			// The rest of the matrix is zero.
+			break;
+		}
+		GINAC_ASSERT(k <= pivot_r && pivot_r < row);
+		GINAC_ASSERT(k <= pivot_c && pivot_c < col);
+		// Swap the pivot into (k, k).
+		if (pivot_c != k) {
+			for (unsigned r = 0; r < row; r++) {
+				m[r*col + pivot_c].swap(m[r*col + k]);
+			}
+			std::swap(colid[pivot_c], colid[k]);
+			std::swap(colcnt[pivot_c], colcnt[k]);
+		}
+		if (pivot_r != k) {
+			for (unsigned c = k; c < col; c++) {
+				m[pivot_r*col + c].swap(m[k*col + c]);
+			}
+			std::swap(rowcnt[pivot_r], rowcnt[k]);
+		}
+		// No normalization before is_zero() here, because
+		// we maintain the matrix normalized throughout the
+		// algorithm.
+		ex a = m[k*col + k];
+		GINAC_ASSERT(!a.is_zero());
+		// Subtract the pivot row KJI-style (so: loop by pivot, then
+		// column, then row) to maximally exploit pivot row zeros (at
+		// the expense of the pivot column zeros). The speedup compared
+		// to the usual KIJ order is not really significant though...
+		for (unsigned r = k + 1; r < row; r++) {
+			const ex &b = m[r*col + k];
+			if (!b.is_zero()) {
+				ab[r] = b/a;
+				rowcnt[r]--;
+			}
+		}
+		colcnt[k] = rowcnt[k] = 0;
+		for (unsigned c = k + 1; c < col; c++) {
+			const ex &mr0c = m[k*col + c];
+			if (mr0c.is_zero())
+				continue;
+			colcnt[c]--;
+			for (unsigned r = k + 1; r < row; r++) {
+				if (ab[r].is_zero())
+					continue;
+				bool waszero = m[r*col + c].is_zero();
+				m[r*col + c] = (m[r*col + c] - ab[r]*mr0c).normal();
+				bool iszero = m[r*col + c].is_zero();
+				if (waszero && !iszero) {
+					rowcnt[r]++;
+					colcnt[c]++;
+				}
+				if (!waszero && iszero) {
+					rowcnt[r]--;
+					colcnt[c]--;
+				}
+			}
+		}
+		for (unsigned r = k + 1; r < row; r++) {
+			ab[r] = m[r*col + k] = _ex0;
+		}
+	}
+	return colid;
+}
 
 /** Perform the steps of division free elimination to bring the m x n matrix
  *  into an upper echelon form.
diff --git a/ginac/matrix.h b/ginac/matrix.h
index 1a95d0aa..cf7dd039 100644
--- a/ginac/matrix.h
+++ b/ginac/matrix.h
@@ -159,6 +159,7 @@ protected:
 	int gauss_elimination(const bool det = false);
 	int division_free_elimination(const bool det = false);
 	int fraction_free_elimination(const bool det = false);
+	std::vector<unsigned> markowitz_elimination(unsigned n);
 	int pivot(unsigned ro, unsigned co, bool symbolic = true);
 
 	void print_elements(const print_context & c, const char *row_start, const char *row_end, const char *row_sep, const char *col_sep) const;
-- 
2.45.0