[GiNaC-devel] [PATCH] Faster, better (recursive descent) expression parser.

Alexei Sheplyakov varg at theor.jinr.ru
Tue Aug 19 20:09:40 CEST 2008


bison generated parser has a number of problems:

1. Bad performance. Parsing a sum seems to be O(N^{2 + a}) (a > 0) [1].
   For example, parsing a sum (actually, a univariate polynomial) of 32768
   terms takes about 90 sec. on my box, parsing a sum of 10^6 terms takes
   "eternity".
2. The user is expected to provide list of all symbols in the expression.
   Often this is very annoying (and useless).
3. Parser is not reentrant (bison *can* produce reentrant parsers, but that
   won't solve other problems).
4. Parser is difficult to extend.

Hence the new parser.

Features:

1. Parsing large sums and products is O(N).
2. Parser is reentrant (well, almost).
3. It's possible to insert (shell style) comments inside the expressions.
4. matrices, lists, FAIL are NOT handled. Yes, this *is* a feature :-)

Limitations:

1. Error handling is a bit terse: on error exception is thrown, and that's it.
2. Binary, octal, and hexadecimal numbers can not be parsed (yet).
3. Tensors, noncommutative products, etc. can not be parsed.

Other notes:

1. ex ctor still uses the old parser.
2. ginsh still uses the old parser.

[1] Mesured by this script (requires gnuplot):

make_expression_string ()
{
	printf "1 + x"
	local n=2
	while test $n -le $1; do
		printf " + %s*x^%s" $n $n
		n=$(expr $n + 1)
	done
}

single_test ()
{
	printf "$1 \t"
	( printf "time("; make_expression_string $1; printf ");" ) | \
	ginsh | sed -e 's/s$//'
}

benchmark ()
{
	local N=$1
	while test $N -le $2; do
		single_test $N
		N=$(expr $N \* 2)
	done

}

gnuplot_header ()
{
 	echo "set logscale xy"
	echo "set xlabel 'degree (# of terms)'"
	echo "set ylabel 'time, sec.'"
	echo "set xrange [${1}:${2}]"
	echo "plot '-' using 1:2 with lines title '1+x+2*x^2+...+n*x^n'"
}

gnuplot_footer ()
{
	echo "e"
}

benchmark_and_plot ()
{
	( gnuplot_header $1 $2
	  benchmark $1 $2 | tee $3.txt
	  gnuplot_footer ) | \
	gnuplot -persist '-'
}

N_ini=${1:-1024}
N_fin=${2:-32768}
out_base=${3:-parser_benchmark}

benchmark_and_plot $N_ini $N_fin $out_base
---
 INSTALL                          |    3 +-
 check/Makefile.am                |    8 ++-
 check/time_parser.cpp            |   79 +++++++++++++++++
 configure.ac                     |    4 +
 ginac/Makefile.am                |   31 ++++++-
 ginac/parser/builtin_fcns.def    |   90 +++++++++++++++++++
 ginac/parser/debug.hpp           |   35 ++++++++
 ginac/parser/default_reader.tpl  |   47 ++++++++++
 ginac/parser/lexer.cpp           |  132 ++++++++++++++++++++++++++++
 ginac/parser/lexer.hpp           |   45 ++++++++++
 ginac/parser/parse_binop_rhs.cpp |  179 ++++++++++++++++++++++++++++++++++++++
 ginac/parser/parse_context.cpp   |   27 ++++++
 ginac/parser/parse_context.hpp   |   82 +++++++++++++++++
 ginac/parser/parser.cpp          |  172 ++++++++++++++++++++++++++++++++++++
 ginac/parser/parser.hpp          |   99 +++++++++++++++++++++
 15 files changed, 1028 insertions(+), 5 deletions(-)
 create mode 100644 check/time_parser.cpp
 create mode 100644 ginac/parser/builtin_fcns.def
 create mode 100644 ginac/parser/debug.hpp
 create mode 100644 ginac/parser/default_reader.tpl
 create mode 100644 ginac/parser/lexer.cpp
 create mode 100644 ginac/parser/lexer.hpp
 create mode 100644 ginac/parser/parse_binop_rhs.cpp
 create mode 100644 ginac/parser/parse_context.cpp
 create mode 100644 ginac/parser/parse_context.hpp
 create mode 100644 ginac/parser/parser.cpp
 create mode 100644 ginac/parser/parser.hpp

diff --git a/INSTALL b/INSTALL
index b542d9e..1cad277 100644
--- a/INSTALL
+++ b/INSTALL
@@ -30,7 +30,8 @@ Known not to work with:
    is missing there.
 
 If you install from CVS, you also need GNU autoconf (>=2.59), automake (>=1.7),
-libtool (>= 1.5), bison (>= 2.3), flex (>= 2.5.33) to be installed.
+libtool (>= 1.5), bison (>= 2.3), flex (>= 2.5.33), autogen (>= 5.6.0) to be
+installed.
 
 
 INSTALLATION
diff --git a/check/Makefile.am b/check/Makefile.am
index a34bb9f..b656c63 100644
--- a/check/Makefile.am
+++ b/check/Makefile.am
@@ -50,7 +50,8 @@ TIMES = time_dennyfliegner \
 	time_lw_Q \
 	time_lw_Qprime \
 	time_antipode \
-	time_fateman_expand
+	time_fateman_expand \
+	time_parser
 
 TESTS = $(CHECKS) $(EXAMS) $(TIMES)
 check_PROGRAMS = $(CHECKS) $(EXAMS) $(TIMES)
@@ -227,6 +228,11 @@ time_fateman_expand_SOURCES = time_fateman_expand.cpp \
 			      randomize_serials.cpp timer.cpp timer.h
 time_fateman_expand_LDADD = ../ginac/libginac.la
 
+time_parser_SOURCES = time_parser.cpp \
+		      randomize_serials.cpp timer.cpp timer.h
+time_parser_LDADD = ../ginac/libginac.la
+
+
 AM_CPPFLAGS = -I$(srcdir)/../ginac -I../ginac
 
 CLEANFILES = exam.gar
diff --git a/check/time_parser.cpp b/check/time_parser.cpp
new file mode 100644
index 0000000..ca35fb6
--- /dev/null
+++ b/check/time_parser.cpp
@@ -0,0 +1,79 @@
+#include <iostream>
+#include <string>
+#include <sstream>
+#include <cstddef>
+#include <cstdlib>
+#include <vector>
+#include <stdexcept>
+#include "ginac.h"
+#include "parser/parser.hpp"
+#include "timer.h"
+extern void randomify_symbol_serials();
+using namespace std;
+using namespace GiNaC;
+
+/// make a string "1+x+2*x^2+...+n*x^n"
+static string prepare_str(const unsigned n, const char x = 'x')
+{
+	ostringstream s;
+	s << x;
+	for (unsigned i = 2; i < n; i++)
+		s << '+' << i << '*' << x << '^' << i; 
+	return s.str();
+}
+
+void benchmark_and_cmp(const string& srep, double& t_new, double& t_old)
+{
+	parser the_parser;
+	timer RSD10;
+	RSD10.start();
+	ex e = the_parser(srep);
+	t_new = RSD10.read();
+	RSD10.stop();
+	if (t_new > 2.0)
+		cout << '.' << flush;
+
+	symtab syms = the_parser.get_syms();
+	const symbol x = find_or_insert_symbol("x", syms, true);
+	lst sl;
+	sl = x;
+	RSD10.start();
+	ex e2(srep, sl);
+	t_old = RSD10.read();
+	
+	if (t_old > 2.0)
+		cout << '.' << flush;
+
+	ex dif = (e - e2).expand();
+	if (!dif.is_zero()) {
+		cerr << "Got a difference: " << dif << endl;
+		throw std::logic_error("New and old parser give different results");
+	}
+}
+
+int main(int argc, char** argv)
+{
+	cout << "timing GiNaC parser..." << flush;
+	randomify_symbol_serials();
+	unsigned n_min = 1024;
+	unsigned n_max = 32768;
+	if (argc > 1)
+		n_max = atoi(argv[1]);
+
+	vector<double> times_new, times_old;
+	vector<unsigned> ns;
+	for (unsigned n = n_min; n <= n_max; n = n << 1) {
+		double t_new, t_old;
+		string srep = prepare_str(n);
+		benchmark_and_cmp(srep, t_new, t_old);
+		times_new.push_back(t_new);
+		times_old.push_back(t_old);
+		ns.push_back(n);
+	}
+
+	cout << "OK" << endl;
+	cout << "# terms  new parser, s  old parser, s" << endl;
+	for (size_t i = 0; i < times_new.size(); i++)
+		cout << " " << ns[i] << '\t' << times_new[i] << '\t' << times_old[i] << endl;
+	return 0;
+}
diff --git a/configure.ac b/configure.ac
index e4a1e60..22f9c96 100644
--- a/configure.ac
+++ b/configure.ac
@@ -149,6 +149,10 @@ AM_CONDITIONAL(CONFIG_TEX, [test ! \( -z "$LATEX" -o -z $"PDFLATEX" -o -z "$MAKE
 AC_PATH_PROG(FIG2DEV, fig2dev, "")
 AM_CONDITIONAL(CONFIG_FIG2DEV, [test ! -z "$FIG2DEV"])
 
+dnl generate boilerplate code for the (new) parser.
+dnl Only developers need this tool.
+AC_PATH_PROG(AUTOGEN, autogen, "")
+
 dnl Output makefiles etc.
 AC_CONFIG_FILES([
 Makefile
diff --git a/ginac/Makefile.am b/ginac/Makefile.am
index d2ef614..0570fde 100644
--- a/ginac/Makefile.am
+++ b/ginac/Makefile.am
@@ -9,7 +9,15 @@ libginac_la_SOURCES = add.cpp archive.cpp basic.cpp clifford.cpp color.cpp \
   operators.cpp power.cpp registrar.cpp relational.cpp remember.cpp \
   pseries.cpp print.cpp symbol.cpp symmetry.cpp tensor.cpp \
   utils.cpp wildcard.cpp input_parser.yy input_lexer.ll \
-  input_lexer.h remember.h tostring.h utils.h compiler.h
+  input_lexer.h remember.h tostring.h utils.h compiler.h \
+  parser/parse_binop_rhs.cpp \
+  parser/parser.cpp \
+  parser/parse_context.cpp \
+  parser/builtin_fcns.cpp \
+  parser/lexer.cpp \
+  parser/lexer.hpp \
+  parser/debug.hpp
+  
 libginac_la_LDFLAGS = -version-info $(LT_VERSION_INFO) -release $(LT_RELEASE)
 libginac_la_LIBADD = $(DL_LIBS)
 ginacincludedir = $(includedir)/ginac
@@ -18,10 +26,27 @@ ginacinclude_HEADERS = ginac.h add.h archive.h assertion.h basic.h class_info.h
   exprseq.h fail.h factor.h fderivative.h flags.h function.h hash_map.h idx.h indexed.h \
   inifcns.h integral.h lst.h matrix.h mul.h ncmul.h normal.h numeric.h operators.h \
   power.h print.h pseries.h ptr.h registrar.h relational.h structure.h \
-  symbol.h symmetry.h tensor.h version.h wildcard.h
+  symbol.h symmetry.h tensor.h version.h wildcard.h \
+  parser/parser.hpp \
+  parser/parse_context.hpp
+
 AM_LFLAGS = -Pginac_yy -olex.yy.c
 AM_YFLAGS = -p ginac_yy -d
-EXTRA_DIST = function.pl input_parser.h version.h.in
+EXTRA_DIST = function.pl input_parser.h version.h.in \
+parser/default_reader.tpl parser/builtin_fcns.def
+
+# Files produced by autogen(1) from templates
+$(srcdir)/parser/builtin_fcns.cpp: $(srcdir)/parser/builtin_fcns.def $(srcdir)/parser/default_reader.tpl
+	set -e; if [ -n "$(AUTOGEN)" ]; then \
+		cd $(srcdir)/parser; \
+		$(AUTOGEN) -T default_reader.tpl builtin_fcns.def; \
+	elif [ -f $@ ]; then \
+		echo "WARNING: AutoGen is not available, the \"$@\" file WON'T be re-generated"; \
+	else \
+		echo "*** ERROR: the \"$@\" file does not exist, and AutoGen is not installed on your system"; \
+		echo "***        Please install AutoGen (http://www.gnu.org/software/autogen)"; \
+	fi
+		
 
 # Files which are generated by perl scripts
 $(srcdir)/function.h $(srcdir)/function.cpp: $(srcdir)/function.pl
diff --git a/ginac/parser/builtin_fcns.def b/ginac/parser/builtin_fcns.def
new file mode 100644
index 0000000..897b1f7
--- /dev/null
+++ b/ginac/parser/builtin_fcns.def
@@ -0,0 +1,90 @@
+Autogen definitions ginacfcns;
+
+function = { name = "log"; };
+function = { name = "exp"; };
+function = { name = "sin"; };
+function = { name = "cos"; };
+function = { name = "tan"; };
+function = { name = "asin"; };
+function = { name = "acos"; };
+function = { name = "atan"; };
+
+function = { name = "sinh"; };
+function = { name = "cosh"; };
+function = { name = "tanh"; };
+function = { name = "asinh"; };
+function = { name = "acosh"; };
+function = { name = "atanh"; };
+
+function = { 
+	name = "atan2";
+	args = 2;
+};
+
+function = { 
+	name = "Li2";
+	comment = "Dilogarithm";
+};
+
+function = {
+	name = "Li3";
+	comment = "Trilogarithm";
+};
+
+function = {
+	name = "zetaderiv";
+	comment = "Derivatives of Riemann's Zeta-function";
+	args = 2;
+};
+
+function = {
+	name = "Li";
+	args = 2;
+	comment = "Polylogarithm and multiple polylogarithm";
+};
+
+function = {
+	name =  "S";
+	args = 3;
+	comment = "Nielsen's generalized polylogarithm";
+};
+
+function = {
+	name =  "H";
+	args = 2;
+	comment = "Harmonic polylogarithm";
+};
+
+function = { name = "lgamma"; };
+function = { name = "tgamma"; };
+
+function = {
+	name = "beta";
+	args = 2;
+	comment = "Beta-function";
+};
+
+function = { name = "factorial"; };
+
+function = {
+	name = "binomial";
+	args = 2;
+};
+
+function = {
+	name = "Order";
+	comment = "Order term function (for truncated power series)";
+};
+
+/* Thease are not functions, but anyway ... */
+function = { name = "sqrt"; };
+
+function = { 
+	name = "pow";
+	args = 2;
+};
+
+function = {
+	name = "power";
+	args = 2;
+};
diff --git a/ginac/parser/debug.hpp b/ginac/parser/debug.hpp
new file mode 100644
index 0000000..a43cc7a
--- /dev/null
+++ b/ginac/parser/debug.hpp
@@ -0,0 +1,35 @@
+#ifndef GINAC_PARSER_DEBUG_HPP
+#define GINAC_PARSER_DEBUG_HPP
+#include <iosfwd>
+#include <sstream>
+#include <stdexcept>
+#include "compiler.h"
+#ifndef __GNUC__
+#if __STDC_VERSION__ < 199901L
+#define __PRETTY_FUNCTION__ "<unknown>"
+#else
+#define __PRETTY_FUNCTION__ __func__
+#endif
+#endif
+
+#define bail_out(exception, message) \
+do { \
+	std::ostringstream err; \
+	err << __PRETTY_FUNCTION__ << "(" << __FILE__ << ':' << __LINE__ << ": "; \
+	err << message; \
+	throw exception(err.str()); \
+} while (0)
+
+#define bug(message) bail_out(std::logic_error, message)
+
+#define dout(condition, message) \
+do { \
+	if (unlikely(condition)) { \
+		std::cerr << __PRETTY_FUNCTION__ \
+			<< " (" << __FILE__ << ':' << __LINE__ << "): " \
+			<< message << std::endl; \
+	} \
+} while (0)
+
+#endif // GINAC_PARSER_DEBUG_HPP
+
diff --git a/ginac/parser/default_reader.tpl b/ginac/parser/default_reader.tpl
new file mode 100644
index 0000000..f3b150d
--- /dev/null
+++ b/ginac/parser/default_reader.tpl
@@ -0,0 +1,47 @@
+[+ AutoGen5 template .cpp +][+ 
+COMMENT a part of GiNaC parser -- construct functions from a byte stream.
++][+
+(use-modules (ice-9 format))
+
+(define (sequence start end . step)
+  (let ((step (if (null? step) 1 (car step))))
+    (let loop ((n start))
+      (if (> n end) '() (cons n (loop (+ step n)))))))
++]/*
+[+ (dne " * " " * " ) +]
+ *
+ * If you want to change this file, edit either `[+ (def-file) +]' or
+ * `[+ (tpl-file) +]' file, and run the following command:
+ *
+ * autogen -T [+ (tpl-file) +] [+ (def-file) +]
+ */
+#include "parse_context.hpp"
+
+namespace GiNaC
+{
+[+ FOR function +]
+static ex [+ (get "name") +]_reader(const exvector& ev)
+{
+	return GiNaC::[+ (get "name") +]([+
+		(let ((nargs (if (exist? "args")
+		                 (string->number (get "args")) 1)))
+		  (format '#f "~{ev[~a]~^, ~}" (sequence 0 (- nargs 1)))) +]);
+}[+ ENDFOR +]
+
+const prototype_table& get_default_reader()
+{
+	using std::make_pair;
+	static bool initialized = false;
+	static prototype_table reader;
+	if (!initialized) {
+[+ FOR function +]
+		reader[make_pair("[+ (get "name") +]", [+ 
+				    (if (exist? "args") (get "args") "1")
+				 +])] = [+ (get "name") +]_reader;[+
+   ENDFOR +]
+		initialized = true;
+	}
+	return reader;
+}
+} // namespace GiNaC
+
diff --git a/ginac/parser/lexer.cpp b/ginac/parser/lexer.cpp
new file mode 100644
index 0000000..ed1f894
--- /dev/null
+++ b/ginac/parser/lexer.cpp
@@ -0,0 +1,132 @@
+#include <string>
+#include <iostream>
+#include <sstream>
+#include "lexer.hpp"
+#include "compiler.h"
+
+namespace GiNaC
+{
+/// Skip to the end of line
+static int skipline(std::istream* s);
+/// Skip to the next non-whitespace character
+static int skipspace(std::istream* s, int c, std::size_t& line);
+/// Check if the identifier is predefined literal
+static bool literal_p(const std::string& name);
+
+/// gettok - Return the next token from standard input.
+int lexer::gettok()
+{
+	// Skip any whitespace.
+	c = skipspace(input, c, line_num);
+
+	// identifier: [a-zA-Z][a-zA-Z0-9]*
+	if (isalpha(c)) { 
+		str = c;
+		do {
+			c = input->get();
+			if (isalnum(c))
+				str += c;
+			else
+				break;
+		} while (true);
+		if (unlikely(literal_p(str)))
+			return token_type::literal;
+		else
+			return token_type::identifier;
+	}
+
+	// Number: [0-9.]+
+	if (isdigit(c) || c == '.') {
+		str = "";
+		do {
+			str += c;
+			c = input->get();
+		} while (isdigit(c) || c == '.');
+		return token_type::number;
+	}
+
+	// Comment until end of line.
+	if (c == '#') {
+		c = skipline(input);
+		++line_num;
+		if (c != EOF)
+			return gettok();
+	}
+
+	// Check for end of file.  Don't eat the EOF.
+	if (c == EOF)
+		return token_type::eof;
+
+	// Otherwise, just return the character as its ascii value.
+	int current = c;
+	c = input->get();
+	return current;
+}
+
+static int skipline(std::istream* s)
+{
+	int c;
+	do {
+		c = s->get();
+	} while (c != EOF && c != '\n' && c != '\r');
+	return c;
+}
+
+static int skipspace(std::istream* s, int c, std::size_t& line)
+{
+	while (isspace(c)) {
+		if (c == '\n')
+			++line;
+		c = s->get();
+	}
+	return c;
+}
+
+static bool literal_p(const std::string& name)
+{
+	if (name == "I")
+		return true;
+	else if (name == "Pi")
+		return true;
+	else if (name == "Euler")
+		return true;
+	else if (name == "Catalan")
+		return true;
+	else
+		return false; 
+}
+
+lexer::lexer(std::istream* in, std::ostream* out, std::ostream* err)
+{
+	if (in)
+		input = in;
+	else
+		in = &std::cin;
+
+	if (out)
+		output = out;
+	else
+		output = &std::cout;
+
+	if (err)
+		error = err;
+	else
+		error = &std::cerr;
+
+	c = ' ';
+	str = "";
+	line_num = 0;
+	column = 0;
+}
+
+lexer::~lexer() { }
+
+void lexer::switch_input(std::istream* in)
+{
+	input = in;
+	line_num = 0;
+	column = 0;
+}
+
+} // namespace GiNaC
+
diff --git a/ginac/parser/lexer.hpp b/ginac/parser/lexer.hpp
new file mode 100644
index 0000000..b53d08f
--- /dev/null
+++ b/ginac/parser/lexer.hpp
@@ -0,0 +1,45 @@
+#ifndef GINAC_LEXER_HPP_
+#define GINAC_LEXER_HPP_
+#include <iosfwd>
+#include <string>
+#include <cstddef>
+namespace GiNaC
+{
+
+class lexer
+{
+	std::istream* input;
+	std::ostream* output;
+	std::ostream* error;
+	/// last character read from stream
+	int c;
+	/// identifier and number tokens are stored here
+	std::string str;
+	std::size_t line_num;
+	std::size_t column;
+	friend class parser;
+public:
+
+	lexer(std::istream* in = 0, std::ostream* out = 0, std::ostream* err = 0);
+	~lexer();
+
+	int gettok();
+	void switch_input(std::istream* in);
+
+	struct token_type
+	{
+		enum
+		{
+			eof		= -1,
+			identifier	= -4,
+			number		= -5,
+			literal		= -6
+
+		};
+	};
+};
+
+} // namespace GiNaC
+
+#endif // GINAC_LEXER_HPP_
+
diff --git a/ginac/parser/parse_binop_rhs.cpp b/ginac/parser/parse_binop_rhs.cpp
new file mode 100644
index 0000000..b118197
--- /dev/null
+++ b/ginac/parser/parse_binop_rhs.cpp
@@ -0,0 +1,179 @@
+#ifndef IN_GINAC
+#include <ginac/ginac.h>
+#else
+#include "ex.h"
+#include "symbol.h"
+#include "mul.h"
+#include "add.h"
+#include "power.h"
+#endif
+#include <stdexcept>
+#include <sstream>
+#include "parser.hpp"
+#include "lexer.hpp"
+#include "debug.hpp"
+
+namespace GiNaC
+{
+
+/// Make a sum or a product.
+static ex make_binop_expr(const int binop, const exvector& args);
+/// Check if the token is a binary operator. 
+static inline bool is_binop(const int c);
+/// Get the precedence of the pending binary operator.
+static int get_tok_prec(const int c);
+
+/// binoprhs: ([+*/^-] primary)*
+ex parser::parse_binop_rhs(int expr_prec, ex& lhs)
+{
+	exvector args;
+	args.push_back(lhs);
+	int binop = -1, orig_binop = -1;
+	bool need_sign_flip = false;
+	while (1) {
+		// check if this is a binop
+		if (!is_binop(token)) {
+			if (args.size() > 1)
+				return make_binop_expr(orig_binop, args);
+			else
+				return lhs;
+		}
+		
+		// Okay, we know this is a binop.
+		if (args.size() == 1)
+			orig_binop = token;
+
+		binop = token;
+
+		// If this is a binop that binds at least as tightly as
+		// the current binop, consume it, otherwise we are done.
+		int tok_prec = get_tok_prec(token);
+		if (tok_prec < expr_prec) {
+			if (args.size() > 1)
+				return make_binop_expr(orig_binop, args);
+			else 
+				return lhs;
+		}
+
+		get_next_tok();  // eat binop
+
+		// Parse the primary expression after the binary operator.
+		ex rhs = parse_primary();
+
+		// If binop binds less tightly with rhs than the operator after
+		// rhs, let the pending operator take rhs as its lhs.
+		int next_prec = get_tok_prec(token);
+		if (tok_prec < next_prec)
+			rhs = parse_binop_rhs(tok_prec + 1, rhs);
+
+		// previous operator was '+', and current one is '-'
+		// (or vice a versa).
+		if (need_sign_flip)
+			rhs = - rhs;
+
+		args.push_back(rhs);
+
+		// Minimize the number of eval() and ctor calls. This is
+		// crucial for a reasonable performance. If the next operator
+		// is compatible with the pending one (or the same) don't create
+		// the expression and continue collecting operands instead.
+		if (binop == token)
+			continue;
+		else if (binop == '+' && token == '-') {
+			need_sign_flip = token != orig_binop;
+			continue;
+		} else if (binop == '-' && token == '+') {
+			need_sign_flip = token != orig_binop;
+			continue;
+		} else { 
+			if (args.size() <= 1)
+				bug("binop has " << args.size() << " arguments, expected >= 2");
+			lhs = make_binop_expr(orig_binop, args);
+			args.clear();
+			args.push_back(lhs);
+		}
+	}
+}
+
+extern numeric* _num_1_p;
+
+static ex make_minus_expr(const exvector& args)
+{
+	exvector rest_args;
+	rest_args.reserve(args.size() - 1);
+	std::copy(args.begin() + 1, args.end(), std::back_inserter(rest_args));
+	ex rest_base = (new add(rest_args))->setflag(status_flags::dynallocated);
+	ex rest = (new mul(rest_base, *_num_1_p))->setflag(status_flags::dynallocated);
+	ex ret = (new add(args[0], rest))->setflag(status_flags::dynallocated);
+	return ret;
+}
+
+static ex make_divide_expr(const exvector& args)
+{
+	exvector rest_args;
+	rest_args.reserve(args.size() - 1);
+	std::copy(args.begin() + 1, args.end(), std::back_inserter(rest_args));
+	ex rest_base = (new mul(rest_args))->setflag(status_flags::dynallocated);
+	ex rest = pow(rest_base, *_num_1_p);
+	return (new mul(args[0], rest))->setflag(status_flags::dynallocated);
+}
+
+static ex make_binop_expr(const int binop, const exvector& args)
+{
+	switch (binop) {
+		case '+':
+			return (new add(args))->setflag(status_flags::dynallocated);
+		case '-':
+			return make_minus_expr(args);
+		case '*':
+			return (new mul(args))->setflag(status_flags::dynallocated);
+		case '/':
+			return make_divide_expr(args);
+		case '^':
+			if (args.size() != 2)
+				throw std::invalid_argument(
+						std::string(__func__) 
+						+ ": power should have exactly 2 operands");
+			return pow(args[0], args[1]);
+		default:
+			throw std::invalid_argument(
+					std::string(__func__) 
+					+ ": invalid binary operation: " 
+					+ char(binop));
+	}
+}
+
+static inline bool is_binop(const int c)
+{
+	switch (c) {
+		case '+':
+		case '-':
+		case '*':
+		case '/':
+		case '^':
+			return true;
+		default:
+			return false;
+	}
+}
+
+/// Get the precedence of the pending binary operator.
+static int get_tok_prec(const int c)
+{
+	switch (c) {
+		case '+':
+		case '-':
+			return 20;
+		case '*':
+		case '/':
+			return 40;
+		case '^':
+			return 60;
+		default:
+			return -1;
+			// means 'this is not a binary operator'
+	}
+}
+
+} // namespace GiNaC
+
diff --git a/ginac/parser/parse_context.cpp b/ginac/parser/parse_context.cpp
new file mode 100644
index 0000000..e4956ba
--- /dev/null
+++ b/ginac/parser/parse_context.cpp
@@ -0,0 +1,27 @@
+#include "parse_context.hpp"
+#include <sstream>
+#include <stdexcept>
+namespace GiNaC
+{
+
+const symbol&
+find_or_insert_symbol(const std::string& name, symtab& syms, const bool strict)
+{
+	symtab::const_iterator p = syms.find(name);
+	if (p != syms.end())
+		return p->second.first;
+
+	if (strict)
+		throw std::invalid_argument(
+				std::string("find_or_insert_symbol: symbol \"") 
+				+ name + "\" not found");
+
+	// false means this symbol was created by parser 
+	const std::pair<symbol, bool> tmp = std::make_pair(symbol(name), false);
+
+	symtab::iterator i = syms.insert(symtab::value_type(name, tmp)).first;
+	return i->second.first;
+}
+
+}
+
diff --git a/ginac/parser/parse_context.hpp b/ginac/parser/parse_context.hpp
new file mode 100644
index 0000000..c7cc19b
--- /dev/null
+++ b/ginac/parser/parse_context.hpp
@@ -0,0 +1,82 @@
+#ifndef _GINAC_PARSE_CONTEXT_HPP
+#define _GINAC_PARSE_CONTEXT_HPP
+#include <string>
+#include <cstddef> // size_t
+#ifndef IN_GINAC
+#include <ginac/ginac.h>
+#else
+#include "ex.h"
+#include "symbol.h"
+#endif
+#include <map>
+#include <utility>
+
+namespace GiNaC
+{
+
+/**
+ * Establishes correspondence between the strings and symbols.
+ * The parser will create missing symbols (if not instructed otherwise,
+ * in which case it fails if the expression contains unknown symbols).
+ * The .second element of pair helps to distinguish between the user
+ * supplied symbols and parser generated ones. The .first is the symbol
+ * itself
+ */
+typedef std::map<std::string, std::pair<symbol, bool> > symtab;
+
+/**
+ * Find the symbol with the @a name in the symbol table @a syms.
+ *
+ * If symbol is missing and @a strict = false, insert it, otherwise
+ * throw an exception.
+ */
+extern const symbol&
+find_or_insert_symbol(const std::string& name, symtab& syms,
+	              const bool strict);
+
+/**
+ * Function (or class ctor) prototype
+ * .first is  the name of function(or ctor),
+ * .second is the number of arguments (each of type ex)
+ */
+typedef std::pair<std::string, std::size_t> prototype;
+
+/**
+ * A (C++) function for reading functions and classes from the stream.
+ *
+ * The parser uses (an associative array of) such functions to construct
+ * (GiNaC) classes and functions from a sequence of characters.
+ */
+typedef ex (*reader_func)(const exvector& args);
+
+/**
+ * Prototype table.
+ *
+ * If parser sees an expression which looks like a function call (e.g.
+ * foo(x+y, z^2, t)), it looks up such a table to find out which
+ * function (or class) corresponds to the given name and has the given
+ * number of the arguments.
+ *
+ * N.B.
+ *
+ * 1. The function don't have to return a (GiNaC) function or class, it
+ *    can return any expression.
+ * 2. Overloaded functions/ctors are paritally supported, i.e. there might
+ *    be several functions with the same name, but they should take different
+ *    number of arguments.
+ * 3. User can extend the parser via custom prototype tables. It's possible
+ *    to read user defined classes, create abbreviations, etc.
+ */
+typedef std::map<prototype, reader_func> prototype_table;
+
+/**
+ * Default prototype table.
+ *
+ * It supports most of builtin GiNaC functions.
+ */
+extern const prototype_table& get_default_reader();
+
+}
+
+#endif // _GINAC_PARSE_CONTEXT_HPP
+
diff --git a/ginac/parser/parser.cpp b/ginac/parser/parser.cpp
new file mode 100644
index 0000000..bd44137
--- /dev/null
+++ b/ginac/parser/parser.cpp
@@ -0,0 +1,172 @@
+#include <ginac/ginac.h>
+#include <stdexcept>
+#include <sstream>
+#include "parser.hpp"
+#include "lexer.hpp"
+#include "debug.hpp"
+
+namespace GiNaC 
+{
+
+/// identifier_expr:  identifier |  identifier '(' expression* ')'
+ex parser::parse_identifier_expr()
+{
+	std::string name = scanner->str;
+	get_next_tok();  // eat identifier.
+
+	if (token != '(') // symbol
+		return find_or_insert_symbol(name, syms, strict);
+
+	// function/ctor call.
+	get_next_tok();  // eat (
+	exvector args;
+	if (token != ')') {
+		while (true) {
+			ex e = parse_expression();
+			args.push_back(e);
+
+			if (token == ')')
+				break;
+
+			if (token != ',')
+				throw std::invalid_argument("Expected ')' or ',' in argument list");
+
+			get_next_tok();
+		}
+	}
+	// Eat the ')'.
+	get_next_tok();
+	prototype the_prototype = make_pair(name, args.size());
+	prototype_table::const_iterator reader = funcs.find(the_prototype);
+	if (reader == funcs.end()) {
+		bail_out(std::invalid_argument,
+			"no function \"" << name << "\" with " << args.size()
+			<< " arguments");
+	}
+	ex ret = reader->second(args);
+	return ret;
+}
+
+/// paren_expr:  '(' expression ')'
+ex parser::parse_paren_expr()
+{
+	get_next_tok();  // eat (.
+	ex e = parse_expression();
+
+	if (token != ')')
+		throw std::invalid_argument("parser::parse_paren_expr: expected ')'");
+	get_next_tok();  // eat ).
+	return e;
+}
+
+extern numeric* _num_1_p;
+
+/// unary_expr: [+-] expression
+ex parser::parse_unary_expr(const int s)
+{
+	// consume '-' (or '+')
+	get_next_tok();
+	ex v = parse_expression();
+	switch (s) {
+		case '-':
+			return (new mul(v, *_num_1_p))->setflag(status_flags::dynallocated);
+		case '+':
+			return v;
+		default:
+			throw std::invalid_argument(
+					std::string(__func__)
+					+ ": invalid unary operator \""
+					+ char(s) + "\"");
+	}
+}
+
+/// primary: identifier_expr | number_expr | paren_expr | unary_expr 
+ex parser::parse_primary() 
+{
+	switch (token) {
+		case lexer::token_type::identifier:
+			 return parse_identifier_expr();
+		case lexer::token_type::number:
+			 return parse_number_expr();
+		case '(': 
+			 return parse_paren_expr();
+		case '-':
+			 return parse_unary_expr('-');
+		case '+':
+			 return parse_unary_expr('+');
+		case lexer::token_type::literal:
+			 return parse_literal_expr();
+		case lexer::token_type::eof:
+			 bail_out(std::invalid_argument, "got EOF while parsing the expression");
+		default:
+			 bail_out(std::invalid_argument, "unknown token " <<
+				token << " (\"" << 
+				(token ? std::string("") + char(token) : "NULL")
+				<< "\")");
+	}
+}
+
+/// expression ::= primary binoprhs
+ex parser::parse_expression() 
+{
+	ex lhs = parse_primary();
+	ex res = parse_binop_rhs(0, lhs);
+	return res;
+}
+
+/// number_expr: number
+ex parser::parse_number_expr()
+{
+	ex n = numeric(scanner->str.c_str());
+	get_next_tok(); // consume the number
+	return n;
+}
+
+/// literal_expr: 'I' | 'Pi' | 'Euler' | 'Catalan'
+ex parser::parse_literal_expr()
+{
+	if (scanner->str == "I")
+		return I;
+	else if (scanner->str == "Pi")
+		return Pi;
+	else if (scanner->str == "Euler")
+		return Euler;
+	else if (scanner->str == "Catalan")
+		return Catalan;
+	bug("unknown literal: \"" << scanner->str << "\"");
+}
+
+ex parser::operator()(std::istream& input)
+{
+	scanner->switch_input(&input);
+	get_next_tok();
+	ex ret = parse_expression();
+	return ret;
+}
+
+ex parser::operator()(const std::string& input)
+{
+	std::istringstream is(input);
+	ex ret = operator()(is);
+	return ret;
+}
+
+int parser::get_next_tok()
+{
+	token = scanner->gettok();
+	return token;
+}
+
+parser::parser(const symtab& syms_, const prototype_table& funcs_,
+	       const bool strict_) : strict(strict_), funcs(funcs_),
+	syms(syms_)
+{
+	scanner = new lexer();
+}
+
+parser::~parser()
+{
+	delete scanner;
+}
+
+} // namespace GiNaC
diff --git a/ginac/parser/parser.hpp b/ginac/parser/parser.hpp
new file mode 100644
index 0000000..40e7e07
--- /dev/null
+++ b/ginac/parser/parser.hpp
@@ -0,0 +1,99 @@
+#ifndef GINAC_PARSER_HPP_
+
+#include "parse_context.hpp"
+#include <stdexcept>
+#ifndef IN_GINAC
+#include <ginac/ginac.h>
+#else
+#include "ex.h"
+#endif
+
+namespace GiNaC
+{
+
+class lexer;
+
+/**
+ * Recursive descent parser for GiNaC expressions.
+ */
+class parser
+{
+	// The actual parser rules (in EBNF-alike notation):
+
+	/// expression: primary binoprhs
+	ex parse_expression();
+
+	/// primary: indentifier_expr | number_expr | paren_expr | unary_expr
+	ex parse_primary();
+
+	/// binoprhs: ([+*/^-] primary)*
+	ex parse_binop_rhs(int, ex&);
+
+	/// identifier_expr: identifier |
+	///                  identifier '(' expression (',' expression)* ')'
+	ex parse_identifier_expr();
+
+	/// paren_expr: '(' expression ')'
+	ex parse_paren_expr();
+
+	/// number_expr: number
+	ex parse_number_expr();
+
+	/// unary_expr: [+-] expression
+	ex parse_unary_expr(const int c);
+
+	/// literal_expr: 'I' | 'Pi' | 'Euler' | 'Catalan'
+	ex parse_literal_expr();
+
+public:
+	/**
+	 * @param syms_ symbol table.
+	 * @param funcs_ function/ctors table.
+	 * @param strict_ if true, throw an exception if unknown
+	 *        symbol is encountered.
+	 */
+	parser(const symtab& syms_ = symtab(),
+		const prototype_table& funcs_ = get_default_reader(),
+		const bool strict_ = false);
+	~parser();
+
+	/// parse the stream @a input
+	ex operator()(std::istream& input);
+	/// parse the string @a input
+	ex operator()(const std::string& input);
+
+	/// report the symbol table used by parser
+	symtab get_syms() const 
+	{ 
+		return syms; 
+	}
+
+private:
+	/// If true, throw an exception if an unknown symbol is encountered.
+	const bool strict;
+	/**
+	 * Function/ctor table, maps a prototype (which is a name and number
+	 * arguments) to a C++ function. Used for parsing identifier_expr's
+	 * (see parse_identifier_expr). If expression contains unknown
+	 * prototype, an exception will be thrown.
+	 */
+	const prototype_table funcs;
+	/**
+	 * Symbol (variable) table. Used for parsing identifier_expr's
+	 * (see parse_identifier_expr). If parser is strict, exception is
+	 * thrown if an unknown symbol is encountered. Non-strict parser
+	 * appends unknown symbols to the symbol table.
+	 */
+	symtab syms;
+	/// token scanner
+	lexer* scanner;
+	/// current token the parser is looking at
+	int token;
+	/// read the next token from the scanner
+	int get_next_tok();
+};
+
+} // namespace GiNaC
+
+#endif // GINAC_PARSER_HPP_
+
-- 
1.5.6

Best regards,
	Alexei

-- 
All science is either physics or stamp collecting.

-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 827 bytes
Desc: Digital signature
Url : http://www.cebix.net/pipermail/ginac-devel/attachments/20080819/deca974e/attachment-0001.sig 


More information about the GiNaC-devel mailing list