From: Alexei Sheplyakov Date: Tue, 19 Aug 2008 19:48:03 +0000 (+0400) Subject: Faster, better (recursive descent) expression parser. X-Git-Tag: release_1-5-0~82 X-Git-Url: https://www.ginac.de/ginac.git//ginac.git?p=ginac.git;a=commitdiff_plain;h=1222eac51cee964961d2aad889dc4ceccb144a36 Faster, better (recursive descent) expression parser. Ouch, it fails to compile. Here is a correct version: From: Alexei Sheplyakov Date: Tue, 19 Aug 2008 21:50:03 +0400 Subject: [PATCH] Faster, better (recursive descent) expression parser. bison generated parser has a number of problems: 1. Bad performance. Parsing a sum seems to be O(N^{2 + a}) (a > 0) [1]. For example, parsing a sum (actually, a univariate polynomial) of 32768 terms takes about 90 sec. on my box, parsing a sum of 10^6 terms takes "eternity". 2. The user is expected to provide list of all symbols in the expression. Often this is very annoying (and useless). 3. Parser is not reentrant (bison *can* produce reentrant parsers, but that won't solve other problems). 4. Parser is difficult to extend. Hence the new parser. Features: 1. Parsing large sums and products is O(N). 2. Parser is reentrant (well, almost). 3. It's possible to insert (shell style) comments inside the expressions. 4. matrices, lists, FAIL are NOT handled. Yes, this *is* a feature :-) Limitations: 1. Error handling is a bit terse: on error exception is thrown, and that's it. 2. Binary, octal, and hexadecimal numbers can not be parsed (yet). 3. Tensors, noncommutative products, etc. can not be parsed. Other notes: 1. ex ctor still uses the old parser. 2. ginsh still uses the old parser. [1] Mesured by this script (requires gnuplot): make_expression_string () { printf "1 + x" local n=2 while test $n -le $1; do printf " + %s*x^%s" $n $n n=$(expr $n + 1) done } single_test () { printf "$1 \t" ( printf "time("; make_expression_string $1; printf ");" ) | \ ginsh | sed -e 's/s$//' } benchmark () { local N=$1 while test $N -le $2; do single_test $N N=$(expr $N \* 2) done } gnuplot_header () { echo "set logscale xy" echo "set xlabel 'degree (# of terms)'" echo "set ylabel 'time, sec.'" echo "set xrange [${1}:${2}]" echo "plot '-' using 1:2 with lines title '1+x+2*x^2+...+n*x^n'" } gnuplot_footer () { echo "e" } benchmark_and_plot () { ( gnuplot_header $1 $2 benchmark $1 $2 | tee $3.txt gnuplot_footer ) | \ gnuplot -persist '-' } N_ini=${1:-1024} N_fin=${2:-32768} out_base=${3:-parser_benchmark} benchmark_and_plot $N_ini $N_fin $out_base --- diff --git a/INSTALL b/INSTALL index b542d9e6..1cad2773 100644 --- a/INSTALL +++ b/INSTALL @@ -30,7 +30,8 @@ Known not to work with: is missing there. If you install from CVS, you also need GNU autoconf (>=2.59), automake (>=1.7), -libtool (>= 1.5), bison (>= 2.3), flex (>= 2.5.33) to be installed. +libtool (>= 1.5), bison (>= 2.3), flex (>= 2.5.33), autogen (>= 5.6.0) to be +installed. INSTALLATION diff --git a/check/Makefile.am b/check/Makefile.am index a34bb9f3..b656c631 100644 --- a/check/Makefile.am +++ b/check/Makefile.am @@ -50,7 +50,8 @@ TIMES = time_dennyfliegner \ time_lw_Q \ time_lw_Qprime \ time_antipode \ - time_fateman_expand + time_fateman_expand \ + time_parser TESTS = $(CHECKS) $(EXAMS) $(TIMES) check_PROGRAMS = $(CHECKS) $(EXAMS) $(TIMES) @@ -227,6 +228,11 @@ time_fateman_expand_SOURCES = time_fateman_expand.cpp \ randomize_serials.cpp timer.cpp timer.h time_fateman_expand_LDADD = ../ginac/libginac.la +time_parser_SOURCES = time_parser.cpp \ + randomize_serials.cpp timer.cpp timer.h +time_parser_LDADD = ../ginac/libginac.la + + AM_CPPFLAGS = -I$(srcdir)/../ginac -I../ginac CLEANFILES = exam.gar diff --git a/check/time_parser.cpp b/check/time_parser.cpp new file mode 100644 index 00000000..ca35fb62 --- /dev/null +++ b/check/time_parser.cpp @@ -0,0 +1,79 @@ +#include +#include +#include +#include +#include +#include +#include +#include "ginac.h" +#include "parser/parser.hpp" +#include "timer.h" +extern void randomify_symbol_serials(); +using namespace std; +using namespace GiNaC; + +/// make a string "1+x+2*x^2+...+n*x^n" +static string prepare_str(const unsigned n, const char x = 'x') +{ + ostringstream s; + s << x; + for (unsigned i = 2; i < n; i++) + s << '+' << i << '*' << x << '^' << i; + return s.str(); +} + +void benchmark_and_cmp(const string& srep, double& t_new, double& t_old) +{ + parser the_parser; + timer RSD10; + RSD10.start(); + ex e = the_parser(srep); + t_new = RSD10.read(); + RSD10.stop(); + if (t_new > 2.0) + cout << '.' << flush; + + symtab syms = the_parser.get_syms(); + const symbol x = find_or_insert_symbol("x", syms, true); + lst sl; + sl = x; + RSD10.start(); + ex e2(srep, sl); + t_old = RSD10.read(); + + if (t_old > 2.0) + cout << '.' << flush; + + ex dif = (e - e2).expand(); + if (!dif.is_zero()) { + cerr << "Got a difference: " << dif << endl; + throw std::logic_error("New and old parser give different results"); + } +} + +int main(int argc, char** argv) +{ + cout << "timing GiNaC parser..." << flush; + randomify_symbol_serials(); + unsigned n_min = 1024; + unsigned n_max = 32768; + if (argc > 1) + n_max = atoi(argv[1]); + + vector times_new, times_old; + vector ns; + for (unsigned n = n_min; n <= n_max; n = n << 1) { + double t_new, t_old; + string srep = prepare_str(n); + benchmark_and_cmp(srep, t_new, t_old); + times_new.push_back(t_new); + times_old.push_back(t_old); + ns.push_back(n); + } + + cout << "OK" << endl; + cout << "# terms new parser, s old parser, s" << endl; + for (size_t i = 0; i < times_new.size(); i++) + cout << " " << ns[i] << '\t' << times_new[i] << '\t' << times_old[i] << endl; + return 0; +} diff --git a/configure.ac b/configure.ac index e4a1e60a..22f9c965 100644 --- a/configure.ac +++ b/configure.ac @@ -149,6 +149,10 @@ AM_CONDITIONAL(CONFIG_TEX, [test ! \( -z "$LATEX" -o -z $"PDFLATEX" -o -z "$MAKE AC_PATH_PROG(FIG2DEV, fig2dev, "") AM_CONDITIONAL(CONFIG_FIG2DEV, [test ! -z "$FIG2DEV"]) +dnl generate boilerplate code for the (new) parser. +dnl Only developers need this tool. +AC_PATH_PROG(AUTOGEN, autogen, "") + dnl Output makefiles etc. AC_CONFIG_FILES([ Makefile diff --git a/ginac/Makefile.am b/ginac/Makefile.am index d2ef6145..0570fde2 100644 --- a/ginac/Makefile.am +++ b/ginac/Makefile.am @@ -9,7 +9,15 @@ libginac_la_SOURCES = add.cpp archive.cpp basic.cpp clifford.cpp color.cpp \ operators.cpp power.cpp registrar.cpp relational.cpp remember.cpp \ pseries.cpp print.cpp symbol.cpp symmetry.cpp tensor.cpp \ utils.cpp wildcard.cpp input_parser.yy input_lexer.ll \ - input_lexer.h remember.h tostring.h utils.h compiler.h + input_lexer.h remember.h tostring.h utils.h compiler.h \ + parser/parse_binop_rhs.cpp \ + parser/parser.cpp \ + parser/parse_context.cpp \ + parser/builtin_fcns.cpp \ + parser/lexer.cpp \ + parser/lexer.hpp \ + parser/debug.hpp + libginac_la_LDFLAGS = -version-info $(LT_VERSION_INFO) -release $(LT_RELEASE) libginac_la_LIBADD = $(DL_LIBS) ginacincludedir = $(includedir)/ginac @@ -18,10 +26,27 @@ ginacinclude_HEADERS = ginac.h add.h archive.h assertion.h basic.h class_info.h exprseq.h fail.h factor.h fderivative.h flags.h function.h hash_map.h idx.h indexed.h \ inifcns.h integral.h lst.h matrix.h mul.h ncmul.h normal.h numeric.h operators.h \ power.h print.h pseries.h ptr.h registrar.h relational.h structure.h \ - symbol.h symmetry.h tensor.h version.h wildcard.h + symbol.h symmetry.h tensor.h version.h wildcard.h \ + parser/parser.hpp \ + parser/parse_context.hpp + AM_LFLAGS = -Pginac_yy -olex.yy.c AM_YFLAGS = -p ginac_yy -d -EXTRA_DIST = function.pl input_parser.h version.h.in +EXTRA_DIST = function.pl input_parser.h version.h.in \ +parser/default_reader.tpl parser/builtin_fcns.def + +# Files produced by autogen(1) from templates +$(srcdir)/parser/builtin_fcns.cpp: $(srcdir)/parser/builtin_fcns.def $(srcdir)/parser/default_reader.tpl + set -e; if [ -n "$(AUTOGEN)" ]; then \ + cd $(srcdir)/parser; \ + $(AUTOGEN) -T default_reader.tpl builtin_fcns.def; \ + elif [ -f $@ ]; then \ + echo "WARNING: AutoGen is not available, the \"$@\" file WON'T be re-generated"; \ + else \ + echo "*** ERROR: the \"$@\" file does not exist, and AutoGen is not installed on your system"; \ + echo "*** Please install AutoGen (http://www.gnu.org/software/autogen)"; \ + fi + # Files which are generated by perl scripts $(srcdir)/function.h $(srcdir)/function.cpp: $(srcdir)/function.pl diff --git a/ginac/parser/builtin_fcns.def b/ginac/parser/builtin_fcns.def new file mode 100644 index 00000000..897b1f72 --- /dev/null +++ b/ginac/parser/builtin_fcns.def @@ -0,0 +1,90 @@ +Autogen definitions ginacfcns; + +function = { name = "log"; }; +function = { name = "exp"; }; +function = { name = "sin"; }; +function = { name = "cos"; }; +function = { name = "tan"; }; +function = { name = "asin"; }; +function = { name = "acos"; }; +function = { name = "atan"; }; + +function = { name = "sinh"; }; +function = { name = "cosh"; }; +function = { name = "tanh"; }; +function = { name = "asinh"; }; +function = { name = "acosh"; }; +function = { name = "atanh"; }; + +function = { + name = "atan2"; + args = 2; +}; + +function = { + name = "Li2"; + comment = "Dilogarithm"; +}; + +function = { + name = "Li3"; + comment = "Trilogarithm"; +}; + +function = { + name = "zetaderiv"; + comment = "Derivatives of Riemann's Zeta-function"; + args = 2; +}; + +function = { + name = "Li"; + args = 2; + comment = "Polylogarithm and multiple polylogarithm"; +}; + +function = { + name = "S"; + args = 3; + comment = "Nielsen's generalized polylogarithm"; +}; + +function = { + name = "H"; + args = 2; + comment = "Harmonic polylogarithm"; +}; + +function = { name = "lgamma"; }; +function = { name = "tgamma"; }; + +function = { + name = "beta"; + args = 2; + comment = "Beta-function"; +}; + +function = { name = "factorial"; }; + +function = { + name = "binomial"; + args = 2; +}; + +function = { + name = "Order"; + comment = "Order term function (for truncated power series)"; +}; + +/* Thease are not functions, but anyway ... */ +function = { name = "sqrt"; }; + +function = { + name = "pow"; + args = 2; +}; + +function = { + name = "power"; + args = 2; +}; diff --git a/ginac/parser/debug.hpp b/ginac/parser/debug.hpp new file mode 100644 index 00000000..a43cc7ad --- /dev/null +++ b/ginac/parser/debug.hpp @@ -0,0 +1,35 @@ +#ifndef GINAC_PARSER_DEBUG_HPP +#define GINAC_PARSER_DEBUG_HPP +#include +#include +#include +#include "compiler.h" +#ifndef __GNUC__ +#if __STDC_VERSION__ < 199901L +#define __PRETTY_FUNCTION__ "" +#else +#define __PRETTY_FUNCTION__ __func__ +#endif +#endif + +#define bail_out(exception, message) \ +do { \ + std::ostringstream err; \ + err << __PRETTY_FUNCTION__ << "(" << __FILE__ << ':' << __LINE__ << ": "; \ + err << message; \ + throw exception(err.str()); \ +} while (0) + +#define bug(message) bail_out(std::logic_error, message) + +#define dout(condition, message) \ +do { \ + if (unlikely(condition)) { \ + std::cerr << __PRETTY_FUNCTION__ \ + << " (" << __FILE__ << ':' << __LINE__ << "): " \ + << message << std::endl; \ + } \ +} while (0) + +#endif // GINAC_PARSER_DEBUG_HPP + diff --git a/ginac/parser/default_reader.tpl b/ginac/parser/default_reader.tpl new file mode 100644 index 00000000..cfb82365 --- /dev/null +++ b/ginac/parser/default_reader.tpl @@ -0,0 +1,50 @@ +[+ AutoGen5 template .cpp +][+ +COMMENT a part of GiNaC parser -- construct functions from a byte stream. ++][+ +(use-modules (ice-9 format)) + +(define (sequence start end . step) + (let ((step (if (null? step) 1 (car step)))) + (let loop ((n start)) + (if (> n end) '() (cons n (loop (+ step n))))))) ++]/* +[+ (dne " * " " * " ) +] + * + * If you want to change this file, edit either `[+ (def-file) +]' or + * `[+ (tpl-file) +]' file, and run the following command: + * + * autogen -T [+ (tpl-file) +] [+ (def-file) +] + */ +#include "parse_context.hpp" +#include "power.h" +#include "operators.h" +#include "inifcns.h" + +namespace GiNaC +{ +[+ FOR function +] +static ex [+ (get "name") +]_reader(const exvector& ev) +{ + return GiNaC::[+ (get "name") +]([+ + (let ((nargs (if (exist? "args") + (string->number (get "args")) 1))) + (format '#f "~{ev[~a]~^, ~}" (sequence 0 (- nargs 1)))) +]); +}[+ ENDFOR +] + +const prototype_table& get_default_reader() +{ + using std::make_pair; + static bool initialized = false; + static prototype_table reader; + if (!initialized) { +[+ FOR function +] + reader[make_pair("[+ (get "name") +]", [+ + (if (exist? "args") (get "args") "1") + +])] = [+ (get "name") +]_reader;[+ + ENDFOR +] + initialized = true; + } + return reader; +} +} // namespace GiNaC + diff --git a/ginac/parser/lexer.cpp b/ginac/parser/lexer.cpp new file mode 100644 index 00000000..ed1f894f --- /dev/null +++ b/ginac/parser/lexer.cpp @@ -0,0 +1,132 @@ +#include +#include +#include +#include "lexer.hpp" +#include "compiler.h" + +namespace GiNaC +{ +/// Skip to the end of line +static int skipline(std::istream* s); +/// Skip to the next non-whitespace character +static int skipspace(std::istream* s, int c, std::size_t& line); +/// Check if the identifier is predefined literal +static bool literal_p(const std::string& name); + +/// gettok - Return the next token from standard input. +int lexer::gettok() +{ + // Skip any whitespace. + c = skipspace(input, c, line_num); + + // identifier: [a-zA-Z][a-zA-Z0-9]* + if (isalpha(c)) { + str = c; + do { + c = input->get(); + if (isalnum(c)) + str += c; + else + break; + } while (true); + if (unlikely(literal_p(str))) + return token_type::literal; + else + return token_type::identifier; + } + + // Number: [0-9.]+ + if (isdigit(c) || c == '.') { + str = ""; + do { + str += c; + c = input->get(); + } while (isdigit(c) || c == '.'); + return token_type::number; + } + + // Comment until end of line. + if (c == '#') { + c = skipline(input); + ++line_num; + if (c != EOF) + return gettok(); + } + + // Check for end of file. Don't eat the EOF. + if (c == EOF) + return token_type::eof; + + // Otherwise, just return the character as its ascii value. + int current = c; + c = input->get(); + return current; +} + +static int skipline(std::istream* s) +{ + int c; + do { + c = s->get(); + } while (c != EOF && c != '\n' && c != '\r'); + return c; +} + +static int skipspace(std::istream* s, int c, std::size_t& line) +{ + while (isspace(c)) { + if (c == '\n') + ++line; + c = s->get(); + } + return c; +} + +static bool literal_p(const std::string& name) +{ + if (name == "I") + return true; + else if (name == "Pi") + return true; + else if (name == "Euler") + return true; + else if (name == "Catalan") + return true; + else + return false; +} + +lexer::lexer(std::istream* in, std::ostream* out, std::ostream* err) +{ + if (in) + input = in; + else + in = &std::cin; + + if (out) + output = out; + else + output = &std::cout; + + if (err) + error = err; + else + error = &std::cerr; + + c = ' '; + str = ""; + line_num = 0; + column = 0; +} + +lexer::~lexer() { } + +void lexer::switch_input(std::istream* in) +{ + input = in; + line_num = 0; + column = 0; +} + +} // namespace GiNaC + diff --git a/ginac/parser/lexer.hpp b/ginac/parser/lexer.hpp new file mode 100644 index 00000000..b53d08f3 --- /dev/null +++ b/ginac/parser/lexer.hpp @@ -0,0 +1,45 @@ +#ifndef GINAC_LEXER_HPP_ +#define GINAC_LEXER_HPP_ +#include +#include +#include +namespace GiNaC +{ + +class lexer +{ + std::istream* input; + std::ostream* output; + std::ostream* error; + /// last character read from stream + int c; + /// identifier and number tokens are stored here + std::string str; + std::size_t line_num; + std::size_t column; + friend class parser; +public: + + lexer(std::istream* in = 0, std::ostream* out = 0, std::ostream* err = 0); + ~lexer(); + + int gettok(); + void switch_input(std::istream* in); + + struct token_type + { + enum + { + eof = -1, + identifier = -4, + number = -5, + literal = -6 + + }; + }; +}; + +} // namespace GiNaC + +#endif // GINAC_LEXER_HPP_ + diff --git a/ginac/parser/parse_binop_rhs.cpp b/ginac/parser/parse_binop_rhs.cpp new file mode 100644 index 00000000..4433a3c0 --- /dev/null +++ b/ginac/parser/parse_binop_rhs.cpp @@ -0,0 +1,176 @@ +#include "ex.h" +#include "symbol.h" +#include "mul.h" +#include "add.h" +#include "power.h" +#include "operators.h" +#include +#include +#include "parser.hpp" +#include "lexer.hpp" +#include "debug.hpp" + +namespace GiNaC +{ + +/// Make a sum or a product. +static ex make_binop_expr(const int binop, const exvector& args); +/// Check if the token is a binary operator. +static inline bool is_binop(const int c); +/// Get the precedence of the pending binary operator. +static int get_tok_prec(const int c); + +/// binoprhs: ([+*/^-] primary)* +ex parser::parse_binop_rhs(int expr_prec, ex& lhs) +{ + exvector args; + args.push_back(lhs); + int binop = -1, orig_binop = -1; + bool need_sign_flip = false; + while (1) { + // check if this is a binop + if (!is_binop(token)) { + if (args.size() > 1) + return make_binop_expr(orig_binop, args); + else + return lhs; + } + + // Okay, we know this is a binop. + if (args.size() == 1) + orig_binop = token; + + binop = token; + + // If this is a binop that binds at least as tightly as + // the current binop, consume it, otherwise we are done. + int tok_prec = get_tok_prec(token); + if (tok_prec < expr_prec) { + if (args.size() > 1) + return make_binop_expr(orig_binop, args); + else + return lhs; + } + + get_next_tok(); // eat binop + + // Parse the primary expression after the binary operator. + ex rhs = parse_primary(); + + // If binop binds less tightly with rhs than the operator after + // rhs, let the pending operator take rhs as its lhs. + int next_prec = get_tok_prec(token); + if (tok_prec < next_prec) + rhs = parse_binop_rhs(tok_prec + 1, rhs); + + // previous operator was '+', and current one is '-' + // (or vice a versa). + if (need_sign_flip) + rhs = - rhs; + + args.push_back(rhs); + + // Minimize the number of eval() and ctor calls. This is + // crucial for a reasonable performance. If the next operator + // is compatible with the pending one (or the same) don't create + // the expression and continue collecting operands instead. + if (binop == token) + continue; + else if (binop == '+' && token == '-') { + need_sign_flip = token != orig_binop; + continue; + } else if (binop == '-' && token == '+') { + need_sign_flip = token != orig_binop; + continue; + } else { + if (args.size() <= 1) + bug("binop has " << args.size() << " arguments, expected >= 2"); + lhs = make_binop_expr(orig_binop, args); + args.clear(); + args.push_back(lhs); + } + } +} + +extern numeric* _num_1_p; + +static ex make_minus_expr(const exvector& args) +{ + exvector rest_args; + rest_args.reserve(args.size() - 1); + std::copy(args.begin() + 1, args.end(), std::back_inserter(rest_args)); + ex rest_base = (new add(rest_args))->setflag(status_flags::dynallocated); + ex rest = (new mul(rest_base, *_num_1_p))->setflag(status_flags::dynallocated); + ex ret = (new add(args[0], rest))->setflag(status_flags::dynallocated); + return ret; +} + +static ex make_divide_expr(const exvector& args) +{ + exvector rest_args; + rest_args.reserve(args.size() - 1); + std::copy(args.begin() + 1, args.end(), std::back_inserter(rest_args)); + ex rest_base = (new mul(rest_args))->setflag(status_flags::dynallocated); + ex rest = pow(rest_base, *_num_1_p); + return (new mul(args[0], rest))->setflag(status_flags::dynallocated); +} + +static ex make_binop_expr(const int binop, const exvector& args) +{ + switch (binop) { + case '+': + return (new add(args))->setflag(status_flags::dynallocated); + case '-': + return make_minus_expr(args); + case '*': + return (new mul(args))->setflag(status_flags::dynallocated); + case '/': + return make_divide_expr(args); + case '^': + if (args.size() != 2) + throw std::invalid_argument( + std::string(__func__) + + ": power should have exactly 2 operands"); + return pow(args[0], args[1]); + default: + throw std::invalid_argument( + std::string(__func__) + + ": invalid binary operation: " + + char(binop)); + } +} + +static inline bool is_binop(const int c) +{ + switch (c) { + case '+': + case '-': + case '*': + case '/': + case '^': + return true; + default: + return false; + } +} + +/// Get the precedence of the pending binary operator. +static int get_tok_prec(const int c) +{ + switch (c) { + case '+': + case '-': + return 20; + case '*': + case '/': + return 40; + case '^': + return 60; + default: + return -1; + // means 'this is not a binary operator' + } +} + +} // namespace GiNaC + diff --git a/ginac/parser/parse_context.cpp b/ginac/parser/parse_context.cpp new file mode 100644 index 00000000..e4956ba7 --- /dev/null +++ b/ginac/parser/parse_context.cpp @@ -0,0 +1,27 @@ +#include "parse_context.hpp" +#include +#include +namespace GiNaC +{ + +const symbol& +find_or_insert_symbol(const std::string& name, symtab& syms, const bool strict) +{ + symtab::const_iterator p = syms.find(name); + if (p != syms.end()) + return p->second.first; + + if (strict) + throw std::invalid_argument( + std::string("find_or_insert_symbol: symbol \"") + + name + "\" not found"); + + // false means this symbol was created by parser + const std::pair tmp = std::make_pair(symbol(name), false); + + symtab::iterator i = syms.insert(symtab::value_type(name, tmp)).first; + return i->second.first; +} + +} + diff --git a/ginac/parser/parse_context.hpp b/ginac/parser/parse_context.hpp new file mode 100644 index 00000000..d0a3d558 --- /dev/null +++ b/ginac/parser/parse_context.hpp @@ -0,0 +1,78 @@ +#ifndef _GINAC_PARSE_CONTEXT_HPP +#define _GINAC_PARSE_CONTEXT_HPP +#include +#include // size_t +#include "ex.h" +#include "symbol.h" +#include +#include + +namespace GiNaC +{ + +/** + * Establishes correspondence between the strings and symbols. + * The parser will create missing symbols (if not instructed otherwise, + * in which case it fails if the expression contains unknown symbols). + * The .second element of pair helps to distinguish between the user + * supplied symbols and parser generated ones. The .first is the symbol + * itself + */ +typedef std::map > symtab; + +/** + * Find the symbol with the @a name in the symbol table @a syms. + * + * If symbol is missing and @a strict = false, insert it, otherwise + * throw an exception. + */ +extern const symbol& +find_or_insert_symbol(const std::string& name, symtab& syms, + const bool strict); + +/** + * Function (or class ctor) prototype + * .first is the name of function(or ctor), + * .second is the number of arguments (each of type ex) + */ +typedef std::pair prototype; + +/** + * A (C++) function for reading functions and classes from the stream. + * + * The parser uses (an associative array of) such functions to construct + * (GiNaC) classes and functions from a sequence of characters. + */ +typedef ex (*reader_func)(const exvector& args); + +/** + * Prototype table. + * + * If parser sees an expression which looks like a function call (e.g. + * foo(x+y, z^2, t)), it looks up such a table to find out which + * function (or class) corresponds to the given name and has the given + * number of the arguments. + * + * N.B. + * + * 1. The function don't have to return a (GiNaC) function or class, it + * can return any expression. + * 2. Overloaded functions/ctors are paritally supported, i.e. there might + * be several functions with the same name, but they should take different + * number of arguments. + * 3. User can extend the parser via custom prototype tables. It's possible + * to read user defined classes, create abbreviations, etc. + */ +typedef std::map prototype_table; + +/** + * Default prototype table. + * + * It supports most of builtin GiNaC functions. + */ +extern const prototype_table& get_default_reader(); + +} + +#endif // _GINAC_PARSE_CONTEXT_HPP + diff --git a/ginac/parser/parser.cpp b/ginac/parser/parser.cpp new file mode 100644 index 00000000..b33261c2 --- /dev/null +++ b/ginac/parser/parser.cpp @@ -0,0 +1,173 @@ +#include +#include +#include "parser.hpp" +#include "lexer.hpp" +#include "debug.hpp" +#include "mul.h" +#include "constant.h" + +namespace GiNaC +{ + +/// identifier_expr: identifier | identifier '(' expression* ')' +ex parser::parse_identifier_expr() +{ + std::string name = scanner->str; + get_next_tok(); // eat identifier. + + if (token != '(') // symbol + return find_or_insert_symbol(name, syms, strict); + + // function/ctor call. + get_next_tok(); // eat ( + exvector args; + if (token != ')') { + while (true) { + ex e = parse_expression(); + args.push_back(e); + + if (token == ')') + break; + + if (token != ',') + throw std::invalid_argument("Expected ')' or ',' in argument list"); + + get_next_tok(); + } + } + // Eat the ')'. + get_next_tok(); + prototype the_prototype = make_pair(name, args.size()); + prototype_table::const_iterator reader = funcs.find(the_prototype); + if (reader == funcs.end()) { + bail_out(std::invalid_argument, + "no function \"" << name << "\" with " << args.size() + << " arguments"); + } + ex ret = reader->second(args); + return ret; +} + +/// paren_expr: '(' expression ')' +ex parser::parse_paren_expr() +{ + get_next_tok(); // eat (. + ex e = parse_expression(); + + if (token != ')') + throw std::invalid_argument("parser::parse_paren_expr: expected ')'"); + get_next_tok(); // eat ). + return e; +} + +extern numeric* _num_1_p; + +/// unary_expr: [+-] expression +ex parser::parse_unary_expr(const int s) +{ + // consume '-' (or '+') + get_next_tok(); + ex v = parse_expression(); + switch (s) { + case '-': + return (new mul(v, *_num_1_p))->setflag(status_flags::dynallocated); + case '+': + return v; + default: + throw std::invalid_argument( + std::string(__func__) + + ": invalid unary operator \"" + + char(s) + "\""); + } +} + +/// primary: identifier_expr | number_expr | paren_expr | unary_expr +ex parser::parse_primary() +{ + switch (token) { + case lexer::token_type::identifier: + return parse_identifier_expr(); + case lexer::token_type::number: + return parse_number_expr(); + case '(': + return parse_paren_expr(); + case '-': + return parse_unary_expr('-'); + case '+': + return parse_unary_expr('+'); + case lexer::token_type::literal: + return parse_literal_expr(); + case lexer::token_type::eof: + bail_out(std::invalid_argument, "got EOF while parsing the expression"); + default: + bail_out(std::invalid_argument, "unknown token " << + token << " (\"" << + (token ? std::string("") + char(token) : "NULL") + << "\")"); + } +} + +/// expression ::= primary binoprhs +ex parser::parse_expression() +{ + ex lhs = parse_primary(); + ex res = parse_binop_rhs(0, lhs); + return res; +} + +/// number_expr: number +ex parser::parse_number_expr() +{ + ex n = numeric(scanner->str.c_str()); + get_next_tok(); // consume the number + return n; +} + +/// literal_expr: 'I' | 'Pi' | 'Euler' | 'Catalan' +ex parser::parse_literal_expr() +{ + if (scanner->str == "I") + return I; + else if (scanner->str == "Pi") + return Pi; + else if (scanner->str == "Euler") + return Euler; + else if (scanner->str == "Catalan") + return Catalan; + bug("unknown literal: \"" << scanner->str << "\""); +} + +ex parser::operator()(std::istream& input) +{ + scanner->switch_input(&input); + get_next_tok(); + ex ret = parse_expression(); + return ret; +} + +ex parser::operator()(const std::string& input) +{ + std::istringstream is(input); + ex ret = operator()(is); + return ret; +} + +int parser::get_next_tok() +{ + token = scanner->gettok(); + return token; +} + +parser::parser(const symtab& syms_, const prototype_table& funcs_, + const bool strict_) : strict(strict_), funcs(funcs_), + syms(syms_) +{ + scanner = new lexer(); +} + +parser::~parser() +{ + delete scanner; +} + +} // namespace GiNaC diff --git a/ginac/parser/parser.hpp b/ginac/parser/parser.hpp new file mode 100644 index 00000000..81b78476 --- /dev/null +++ b/ginac/parser/parser.hpp @@ -0,0 +1,95 @@ +#ifndef GINAC_PARSER_HPP_ + +#include "parse_context.hpp" +#include +#include "ex.h" + +namespace GiNaC +{ + +class lexer; + +/** + * Recursive descent parser for GiNaC expressions. + */ +class parser +{ + // The actual parser rules (in EBNF-alike notation): + + /// expression: primary binoprhs + ex parse_expression(); + + /// primary: indentifier_expr | number_expr | paren_expr | unary_expr + ex parse_primary(); + + /// binoprhs: ([+*/^-] primary)* + ex parse_binop_rhs(int, ex&); + + /// identifier_expr: identifier | + /// identifier '(' expression (',' expression)* ')' + ex parse_identifier_expr(); + + /// paren_expr: '(' expression ')' + ex parse_paren_expr(); + + /// number_expr: number + ex parse_number_expr(); + + /// unary_expr: [+-] expression + ex parse_unary_expr(const int c); + + /// literal_expr: 'I' | 'Pi' | 'Euler' | 'Catalan' + ex parse_literal_expr(); + +public: + /** + * @param syms_ symbol table. + * @param funcs_ function/ctors table. + * @param strict_ if true, throw an exception if unknown + * symbol is encountered. + */ + parser(const symtab& syms_ = symtab(), + const prototype_table& funcs_ = get_default_reader(), + const bool strict_ = false); + ~parser(); + + /// parse the stream @a input + ex operator()(std::istream& input); + /// parse the string @a input + ex operator()(const std::string& input); + + /// report the symbol table used by parser + symtab get_syms() const + { + return syms; + } + +private: + /// If true, throw an exception if an unknown symbol is encountered. + const bool strict; + /** + * Function/ctor table, maps a prototype (which is a name and number + * arguments) to a C++ function. Used for parsing identifier_expr's + * (see parse_identifier_expr). If expression contains unknown + * prototype, an exception will be thrown. + */ + const prototype_table funcs; + /** + * Symbol (variable) table. Used for parsing identifier_expr's + * (see parse_identifier_expr). If parser is strict, exception is + * thrown if an unknown symbol is encountered. Non-strict parser + * appends unknown symbols to the symbol table. + */ + symtab syms; + /// token scanner + lexer* scanner; + /// current token the parser is looking at + int token; + /// read the next token from the scanner + int get_next_tok(); +}; + +} // namespace GiNaC + +#endif // GINAC_PARSER_HPP_ +