/*
 * expr.cpp: Expression parser.
 */

/*
 * To make it really easy to add new operators at strange levels of
 * precedence in future, I think I'll implement an operator-
 * precedence parser on this occasion. This is a sort of informal
 * combination of a shift-reduce and recursive approach.
 *
 * The central function expects to read a stream of 'operator'
 * tokens and 'atom' nonterminals from the input. An operator token
 * comes straight from the lexer, and comes with tags saying what
 * the precedence and associativity of that operator is when it's
 * binary, and what its precedence is when unary. An atom token is
 * either a literal or identifier straight from the lexer, or a
 * function call, or something in parentheses; in the latter two
 * cases, we recurse into subfunctions to do the hard work, and when
 * we come back we've got something we can treat as atomic for the
 * purposes of this particular interleaving of operators and atoms.
 *
 * Within the central function, we pile up our operator and atom
 * nonterminals on a stack. Before shifting any given operator, we
 * may choose to perform one or more 'reduce' operations which
 * convert several of these symbols into one: a unary reduce
 * converts an operator and an atom into an atom, and a binary
 * reduce turns A,O,A into A. When we see end-of-string (or closing
 * parenthesis or function-call-argument-terminating comma, if we
 * have ourselves been called recursively), we perform reduces until
 * we have only one symbol left, and return that to our caller.
 *
 * So the question is, when do we shift and when do we reduce?
 *
 * We can trivially identify unary operators as soon as we see them:
 * they're precisely the operators not preceded by an atom (either
 * following another operator, or at the start of the string). So we
 * always know whether a binary or a unary reduce is an available
 * option.
 *
 * We do a reduce if the operator we're about to shift has lower
 * precedence than the one that would be involved in the reduce. (If
 * we have "1+2" and see *, we don't reduce the 1+2 because the
 * 2*something will take precedence; but if we have "1*2" and see +,
 * then we know that's equivalent to 3 plus whatever it is.) Ties
 * are broken by associativity: if the two operators have the same
 * precedence, we reduce if that precedence level is
 * right-associative and otherwise we shift. This applies to both
 * binary and unary reduces.
 *
 * (I'm assuming here, incidentally, that all operators at the same
 * precedence level have the same associativity and the same arity.
 * This is easily enough enforced at setup time by defining the
 * precedence levels to store associativity and arity in their
 * bottom two bits.)
 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include <ctype.h>
#include <limits.h>

#include <string>
#include <map>

#include "spigot.h"
#include "funcs.h"
#include "expr.h"
#include "error.h"

#define TOKENS(X) \
    X(ERROR) \
    X(EOS) \
    X(LPAR) \
    X(RPAR) \
    X(COMMA) \
    X(OPERATOR) \
    X(IDENTIFIER) \
    X(LET) \
    X(IN) \
    X(EQUALS) \
    X(NUMBER) \
    X(SPIGOT)

#define TOKEN_ENUM_DEF(x) TOK_##x,
enum tokentype { TOKENS(TOKEN_ENUM_DEF) TOK_MAX };
#define TOKEN_NAME_DEF(x) #x,
static const char *toknames[] = { TOKENS(TOKEN_NAME_DEF) };

#define ASSOC_MASK 0x3000
#define LEFTASSOC  0x0000
#define RIGHTASSOC 0x1000
#define UNARY      0x2000
#define FUNCTION   0x3000 /* special value: we give arity, not precedence */
#define VARIADIC   0x00FF /* sentinel arity value meaning 'variable' */

#define OPERATORS(X)                                            \
    X(ADD, "+", 10|LEFTASSOC, spigot_add(a,b))                  \
    X(SUB, "-", 10|LEFTASSOC, spigot_sub(a,b))                  \
    X(MUL, "*", 20|LEFTASSOC, spigot_mul(a,b))                  \
    X(DIV, "/", 20|LEFTASSOC, spigot_div(a,b))                  \
    X(MOD, "%", 20|LEFTASSOC, spigot_mod(a,b))                  \
    X(MOD2, "mod", 20|LEFTASSOC, spigot_mod(a,b))               \
    X(REM, "rem", 20|LEFTASSOC, spigot_rem(a,b))                \
    X(POW, "^", 40|RIGHTASSOC, spigot_pow(a,b))                 \
    X(POW2, "**", 40|RIGHTASSOC, spigot_pow(a,b))               \
    X(NEG, "-", 30|UNARY, spigot_neg(a))                        \
    X(NOP, "+", 30|UNARY, a)                                    \
    X(PI, "pi", 0|FUNCTION, spigot_pi())                        \
    X(TAU, "tau", 0|FUNCTION, spigot_tau())                     \
    X(E, "e", 0|FUNCTION, spigot_e())                           \
    X(PHI, "phi", 0|FUNCTION, spigot_phi())                     \
    X(APERY, "apery", 0|FUNCTION, spigot_apery())               \
    X(EULERGAMMA, "eulergamma", 0|FUNCTION, spigot_eulergamma())        \
    X(SQRT, "sqrt", 1|FUNCTION, spigot_sqrt(a))                 \
    X(HYPOT, "hypot", 2|FUNCTION, spigot_hypot(a,b))            \
    X(CBRT, "cbrt", 1|FUNCTION, spigot_cbrt(a))                 \
    X(SIN, "sin", 1|FUNCTION, spigot_sin(a))                    \
    X(COS, "cos", 1|FUNCTION, spigot_cos(a))                    \
    X(TAN, "tan", 1|FUNCTION, spigot_tan(a))                    \
    X(ASIN, "asin", 1|FUNCTION, spigot_asin(a))                 \
    X(ACOS, "acos", 1|FUNCTION, spigot_acos(a))                 \
    X(ATAN, "atan", 1|FUNCTION, spigot_atan(a))                 \
    X(ATAN2, "atan2", 2|FUNCTION, spigot_atan2(a,b))            \
    X(SIND, "sind", 1|FUNCTION, spigot_sind(a))                 \
    X(COSD, "cosd", 1|FUNCTION, spigot_cosd(a))                 \
    X(TAND, "tand", 1|FUNCTION, spigot_tand(a))                 \
    X(ASIND, "asind", 1|FUNCTION, spigot_asind(a))              \
    X(ACOSD, "acosd", 1|FUNCTION, spigot_acosd(a))              \
    X(ATAND, "atand", 1|FUNCTION, spigot_atand(a))              \
    X(ATAN2D, "atan2d", 2|FUNCTION, spigot_atan2d(a,b))         \
    X(EXP, "exp", 1|FUNCTION, spigot_exp(a))                    \
    X(LOG, "log", VARIADIC|FUNCTION, spigot_log_wrapper(args))  \
    X(EXP2, "exp2", 1|FUNCTION, spigot_exp2(a))                 \
    X(EXP10, "exp10", 1|FUNCTION, spigot_exp10(a))              \
    X(LOG2, "log2", 1|FUNCTION, spigot_log2(a))                 \
    X(LOG10, "log10", 1|FUNCTION, spigot_log10(a))              \
    X(POW3, "pow", 2|FUNCTION, spigot_pow(a,b))                 \
    X(EXPM1, "expm1", 1|FUNCTION, spigot_expm1(a))              \
    X(LOG1P, "log1p", 1|FUNCTION, spigot_log1p(a))              \
    X(SINH, "sinh", 1|FUNCTION, spigot_sinh(a))                 \
    X(COSH, "cosh", 1|FUNCTION, spigot_cosh(a))                 \
    X(TANH, "tanh", 1|FUNCTION, spigot_tanh(a))                 \
    X(ASINH, "asinh", 1|FUNCTION, spigot_asinh(a))              \
    X(ACOSH, "acosh", 1|FUNCTION, spigot_acosh(a))              \
    X(ATANH, "atanh", 1|FUNCTION, spigot_atanh(a))              \
    X(LGAMMA, "lgamma", 1|FUNCTION, spigot_lgamma(a))           \
    X(TGAMMA, "gamma", 1|FUNCTION, spigot_gamma(a))             \
    X(TGAMMA2, "tgamma", 1|FUNCTION, spigot_gamma(a))           \
    X(ERF, "erf", 1|FUNCTION, spigot_erf(a))                    \
    X(ERFC, "erfc", 1|FUNCTION, spigot_erfc(a))                 \
    X(Phi, "Phi", 1|FUNCTION, spigot_Phi(a))                    \
    X(Phi2, "norm", 1|FUNCTION, spigot_Phi(a))                  \
    X(ERFINV, "erfinv", 1|FUNCTION, spigot_erfinv(a))           \
    X(ERFINV2, "inverf", 1|FUNCTION, spigot_erfinv(a))          \
    X(ERFCINV, "erfcinv", 1|FUNCTION, spigot_erfcinv(a))        \
    X(ERFCINV2, "inverfc", 1|FUNCTION, spigot_erfcinv(a))       \
    X(PhiINV, "Phiinv", 1|FUNCTION, spigot_Phiinv(a))           \
    X(PhiINV2, "invPhi", 1|FUNCTION, spigot_Phiinv(a))          \
    X(PhiINV3, "norminv", 1|FUNCTION, spigot_Phiinv(a))         \
    X(PhiINV4, "invnorm", 1|FUNCTION, spigot_Phiinv(a))         \
    X(LAMBERTWPOS, "W", 1|FUNCTION, spigot_lambertw_pos(a))     \
    X(LAMBERTWNEG, "Wn", 1|FUNCTION, spigot_lambertw_neg(a))    \
    X(En, "En", 2|FUNCTION, spigot_En(a,b))                     \
    X(E1, "E1", 1|FUNCTION, spigot_E1(a))                       \
    X(Ei, "Ei", 1|FUNCTION, spigot_Ei(a))                       \
    X(Ein, "Ein", 1|FUNCTION, spigot_Ein(a))                    \
    X(li, "li", 1|FUNCTION, spigot_li(a))                       \
    X(Li, "Li", 1|FUNCTION, spigot_Li(a))                       \
    X(Si, "Si", 1|FUNCTION, spigot_Si(a))                       \
    X(si, "si", 1|FUNCTION, spigot_si(a))                       \
    X(Cin, "Cin", 1|FUNCTION, spigot_Cin(a))                    \
    X(Ci, "Ci", 1|FUNCTION, spigot_Ci(a))                       \
    X(UFresnelS, "UFresnelS", 1|FUNCTION, spigot_UFresnelS(a))  \
    X(UFresnelC, "UFresnelC", 1|FUNCTION, spigot_UFresnelC(a))  \
    X(FresnelS, "FresnelS", 1|FUNCTION, spigot_FresnelS(a))     \
    X(FresnelC, "FresnelC", 1|FUNCTION, spigot_FresnelC(a))     \
    X(zeta, "zeta", 1|FUNCTION, spigot_zeta(a))                 \
    X(ABS, "abs", 1|FUNCTION, spigot_abs(a))                    \
    X(FRAC, "frac", 1|FUNCTION, spigot_frac(a))                 \
    X(CEIL, "ceil", 1|FUNCTION, spigot_ceil(a))                 \
    X(FLOOR, "floor", 1|FUNCTION, spigot_floor(a))              \
    X(ALGEBRAIC, "algebraic", VARIADIC|FUNCTION,                \
      spigot_algebraic_wrapper(args))

#define OPERATOR_ENUM_DEF(name,string,prec,expr) OP_##name,
enum { OPERATORS(OPERATOR_ENUM_DEF) OP_MAX };
#define OPERATOR_DATA_DEF(name,string,prec,expr) {string,prec},
static const struct {
    const char *text;
    int prec;
} operators[] = { OPERATORS(OPERATOR_DATA_DEF) };
#define OPERATOR_CONSTRUCTOR_DEF(name,string,prec,expr)         \
    static Spigot *op_##name(const std::vector<Spigot *> &args) \
    {                                                           \
        /* shorthands for args[0] and args[1] to make most      \
         * definitions nicer */                                 \
        Spigot *a = (args.size() > 0 ? args[0] : NULL);         \
        Spigot *b = (args.size() > 1 ? args[1] : NULL);         \
        (void)a; (void)b; /* dodge unused-variable warnings */  \
        return (expr);                                          \
    }
OPERATORS(OPERATOR_CONSTRUCTOR_DEF)
#define OPERATOR_CONSTRUCTOR_ARRAY(name,string,prec,expr) op_##name,
typedef Spigot *(*op_fn_t)(const std::vector<Spigot *> &args);
static const op_fn_t operator_constructors[] = {
    OPERATORS(OPERATOR_CONSTRUCTOR_ARRAY)
};

class token {
  public:
    tokentype type;
    std::string text;
    int opindex;
    bigint n, d;
    Spigot *spigot;

    inline token()
        : type(TOK_ERROR), opindex(-1), spigot(NULL) {}
    inline token(tokentype atype)
        : type(atype), opindex(-1), spigot(NULL) {}
    inline token(tokentype atype, const char *str, int len = -1)
        : type(atype), opindex(-1), spigot(NULL)
    {
        if (len < 0)
            len = strlen(str);
        text = std::string(str, len);
    }
    inline token(tokentype atype, int index)
        : type(atype), opindex(index), spigot(NULL) {}
    inline token(tokentype atype, bigint an, bigint ad)
        : type(atype), opindex(-1), n(an), d(ad), spigot(NULL) {}
    inline token(tokentype atype, Spigot *spig)
        : type(atype), opindex(-1), spigot(spig) {}
    inline token(const token &x)
        : type(x.type), text(x.text), opindex(x.opindex),
          n(x.n), d(x.d), spigot(x.spigot) {}
    inline token &operator=(const token &x)
    {
        type = x.type;
        opindex = x.opindex;
        text = x.text;
        n = x.n;
        d = x.d;
        spigot = x.spigot;
        return *this;
    }

    void debug(void)
    {
        printf("%s", toknames[type]);
        if (type == TOK_OPERATOR) {
            printf(" %d '%s'", opindex, operators[opindex].text);
        } else if (type == TOK_IDENTIFIER) {
            printf(" '%s'", text.c_str());
        } else if (type == TOK_NUMBER) {
            putchar(' ');
            bigint_print(n);
            putchar('/');
            bigint_print(d);
        } else if (type == TOK_SPIGOT) {
            printf(" %p", spigot);
        }
        putchar('\n');
    }
};

inline bool isidstart(char c)
{
    return c && (isalpha((unsigned char)c) || c == '_');
}
inline bool isidchar(char c)
{
    return c && (isalnum((unsigned char)c) || c == '_');
}

inline bool strczmatch(int lenc, const char *strc, const char *strz)
{
    int lenz = strlen(strz);
    return lenc == lenz && !memcmp(strc, strz, lenc);
}

class Lexer {
  public:
    token currtok;
    const char *p;

    static inline int frombase(char c) {
        // This translates an alphanumeric out of any base up to 36.
        // Return value is from 0 to 35 for success, or 36 for
        // failure, so you can easily vet the answer as being within a
        // smaller base.
        return (c >= '0' && c <= '9' ? c - '0' :
                c >= 'A' && c <= 'Z' ? c - ('A'-10) :
                c >= 'a' && c <= 'z' ? c - ('a'-10) : 36);
    }

    Lexer(const char *string) : currtok(), p(string) { advance(); }

    std::string parse_filename(const char *q, int *lenused)
    {
        std::string ret;
        const char *qorig = q;

        if (*q == '"' || *q == '\'') {
            char quote = *q++;
            while (*q != quote || q[1] == quote) {
                if (!*q) {
                    throw spigot_error("unexpected end of expression in quoted"
                                       " filename string");
                }
                if (*q == quote)
                    q++;
                ret.push_back(*q);
                q++;
            }
            q++;                       /* eat closing quote */
        } else {
            while (*q && !isspace((unsigned char)*q)) {
                ret.push_back(*q);
                q++;
            }
        }

        if (lenused) *lenused = q - qorig;
        return ret;
    }

    void advance()
    {
        while (*p && isspace((unsigned char)*p)) p++;

        if (!*p) {
            currtok = token(TOK_EOS);
            return;
        }
        if (*p == '(') {
            currtok = token(TOK_LPAR);
            p++;
            return;
        }
        if (*p == ')') {
            currtok = token(TOK_RPAR);
            p++;
            return;
        }
        if (*p == ',') {
            currtok = token(TOK_COMMA);
            p++;
            return;
        }
        if (*p == '=') {
            currtok = token(TOK_EQUALS);
            p++;
            return;
        }

        /*
         * Match non-identifier-shaped operators by maximal munch.
         */
        int opmaxlen = 0, opindex = -1;
        for (int i = 0; i < OP_MAX; i++) {
            if (isidstart(operators[i].text[0]))
                continue;
            int oplen = strlen(operators[i].text);
            if (oplen > opmaxlen && !strncmp(p, operators[i].text, oplen)) {
                opmaxlen = oplen;
                opindex = i;
            }
        }
        if (opindex >= 0) {
            currtok = token(TOK_OPERATOR, opindex);
            p += opmaxlen;
            return;
        }

        int base_override = 0;
        if (isidstart(*p)) {

            const char *q = p;
            while (isidchar(*p)) p++;
            if (*p == ':') {
                /*
                 * Various special cases of keywords followed by
                 * colons.
                 *
                 * Start by extracting the keyword into a small string
                 * we can process on its own.
                 */
                char keyword[32];
                int param, pos;

                if (p-q >= (int)sizeof(keyword)) {
                    /*
                     * No keyword longer than this will be recognised
                     * anyway, so rather than faff about with
                     * allocating an arbitrary amount of space to
                     * store a copy of it, we might as well just fill
                     * 'keyword' with a string that can't possibly
                     * have come out of the alphanumeric-matching loop
                     * above.
                     */
                    strcpy(keyword, "!");
                } else {
                    sprintf(keyword, "%.*s", (int)(p-q), q);
                }

                if (!strcmp(keyword, "ieee")) {
                    /*
                     * Special case: "ieee:" followed by a 4-, 8-, 16-
                     * or 32-digit hex number is treated as an IEEE
                     * half, single, double or quad precision
                     * (respectively) floating-point bit pattern in
                     * hex, which is expanded into a rational and
                     * presented as TOK_NUMBER. If there's a following
                     * '.' plus extra hex digits, those extend the
                     * precision in the obvious way.
                     *
                     * So count and collect the hex digits.
                     */
                    bigint value = 0;
                    int ndigits = 0, expbits, sign = 1;
                    p++;
                    while (*p && isxdigit((unsigned char)*p)) {
                        if (ndigits >= 32) {
                            throw spigot_error("expected 4, 8, 16 or 32 hex "
                                               "digits after 'ieee:'");
                        }
                        int val = frombase(*p);
                        assert(val < 16);
                        value *= 16;
                        value += val;
                        ndigits++;
                        p++;
                    }
                    switch (ndigits) {
                      case 4: expbits = 5; break;
                      case 8: expbits = 8; break;
                      case 16: expbits = 11; break;
                      case 32: expbits = 15; break;
                      default:
                        throw spigot_error("expected 4, 8, 16 or 32 hex digits"
                                           " after 'ieee:'");
                    }
                    if (*p == '.') {
                        p++;
                        while (*p && isxdigit((unsigned char)*p)) {
                            int val = frombase(*p);
                            assert(val < 16);
                            value *= 16;
                            value += val;
                            ndigits++;
                            p++;
                        }
                    }
                    int exponent = value / bigint_power(2, 4*ndigits-expbits-1);
                    value %= bigint_power(2, 4*ndigits-expbits-1);
                    if (exponent & (1 << expbits))
                        sign = -1;
                    exponent &= (1 << expbits)-1;
                    if (exponent == (1 << expbits)-1) {
                        throw spigot_error("IEEE %s not supported",
                                           value == 0 ? "infinity" : "NaN");
                    }
                    if (exponent == 0) {
                        exponent = 1;
                    } else {
                        value += bigint_power(2, 4*ndigits-expbits-1);
                    }
                    exponent -= (1 << (expbits-1))-1;   /* unbias exponent */
                    exponent -= 4*ndigits-expbits-1; /* turn into int * 2^e */
                    if (exponent > 0)
                        currtok = token(TOK_NUMBER, sign * value *
                                        bigint_power(2, exponent), 1);
                    else
                        currtok = token(TOK_NUMBER, sign * value,
                                        bigint_power(2, -exponent));
                    return;

                } else if (sscanf(keyword, "base%d%n", &param, &pos) > 0 &&
                           pos == (int)strlen(keyword)) {
                    /*
                     * Prefixes such as base2: or base19: permit input
                     * in an arbitrary number base between 2 and 36.
                     * We don't process these here; we fall through to
                     * the main literal-processing code below, having
                     * set a base override variable.
                     */
                    if (param < 2 || param > 36) {
                        throw spigot_error("base in keyword '%s:' should be "
                                           "between 2 and 36 inclusive",
                                           keyword);
                    }
                    base_override = param;
                    assert(*p == ':');
                    p++;               // advance past the colon
                } else if (!strcmp(keyword, "cfracfile") ||
                           !strcmp(keyword, "cfracxfile") ||
                           (sscanf(keyword, "base%dfile%n", &param, &pos)>0 &&
                            pos == (int)strlen(keyword)) ||
                           (sscanf(keyword, "base%dxfile%n", &param, &pos)>0 &&
                            pos == (int)strlen(keyword))) {
                    bool cfrac = (keyword[0] == 'c');
                    bool exact = (keyword[strlen(keyword)-5] == 'x');

                    if (!cfrac && (param < 2 || param > 36)) {
                        throw spigot_error("base in keyword '%s:' should be "
                                           "between 2 and 36 inclusive",
                                           keyword);
                    }

                    /*
                     * We expect to see a file name here, which
                     * we'll open and read from at spigot evaluation
                     * time. Depending on which keyword was used
                     * above, we'll expect it to contain either
                     * continued fraction terms or digits in some
                     * number base.
                     */
                    p++;

                    int lenused;
                    std::string filename = parse_filename(p, &lenused);
                    p += lenused;

                    if (*q == 'c') {
                        currtok = token(
                            TOK_SPIGOT,
                            spigot_cfracfile(filename.c_str(), exact));
                    } else {
                        currtok = token(
                            TOK_SPIGOT,
                            spigot_basefile(param, filename.c_str(), exact));
                    }
                    return;
                } else if (!strcmp(keyword, "cfracfd") ||
                           (sscanf(keyword, "base%dfd%n", &param, &pos) > 0 &&
                            pos == (int)strlen(keyword))) {
                    /*
                     * We expect to see an fd number here, from
                     * which we'll read continued fraction terms or
                     * a base representation in the same way as
                     * above.
                     */
#ifdef HAVE_FDS
                    int fd;
#endif

                    p++;
#ifdef HAVE_FDS
                    fd = atoi(p);
#endif
                    p += strspn(p, "0123456789");

                    bool cfrac = (keyword[0] == 'c');

#ifdef HAVE_FDS
                    if (!cfrac && (param < 2 || param > 36)) {
                        throw spigot_error("base in keyword '%s:' should be "
                                           "between 2 and 36 inclusive",
                                           keyword);
                    }

                    if (cfrac) {
                        currtok = token(TOK_SPIGOT, spigot_cfracfd(fd));
                    } else {
                        int base = atoi(q+4);
                        currtok = token(TOK_SPIGOT, spigot_basefd(base, fd));
                    }
                    return;
#else
                    throw spigot_error("'%s:N' not supported in this"
                                       " build of spigot",
                                       cfrac ? "cfracfd" : "baseNfd");
#endif
                } else {
                    throw spigot_error("unrecognised prefix keyword '%.*s:'",
                                       (int)(p-q), q);
                }
            } else {
                /*
                 * Identifiers not prefixed by a colon must be checked
                 * against keywords we know, and failing that,
                 * returned as TOK_IDENTIFIER.
                 */
                for (int i = 0; i < OP_MAX; i++) {
                    if ((operators[i].prec & ASSOC_MASK) == FUNCTION)
                        continue; /* lex function name as id, not operator */
                    if (strczmatch(p-q, q, operators[i].text)) {
                        currtok = token(TOK_OPERATOR, i);
                        return;
                    }
                }
                if (strczmatch(p-q, q, "let")) {
                    currtok = token(TOK_LET);
                } else if (strczmatch(p-q, q, "in")) {
                    currtok = token(TOK_IN);
                } else {
                    currtok = token(TOK_IDENTIFIER, q, p-q);
                }
                return;
            }
        }

        if (*p == '.' || frombase(*p) < (base_override ? base_override : 10)) {
            int base, expbase;
            int expmarker;
            bool seendot = false;
            bigint n = 0, d = 1;

            if (base_override) {
                /*
                 * We saw a baseN: prefix above, which means we're
                 * expecting a number in that base, with no exponent
                 * suffix.
                 */
                base = base_override;
                expbase = 0;
                expmarker = UCHAR_MAX + 1; // avoid ever matching below
            } else if (*p == '0' && tolower((unsigned char)p[1]) == 'x') {
                /*
                 * Hex literal.
                 */
                p += 2;
                base = 16;
                expbase = 2;
                expmarker = 'p';
            } else {
                /*
                 * Decimal literal.
                 */
                base = expbase = 10;
                expmarker = 'e';
            }

            while (*p == '.' || (*p && frombase(*p) < base)) {
                if (*p == '.') {
                    if (!seendot) {
                        seendot = true;
                        p++;
                        continue;
                    } else {
                        throw spigot_error("two dots in numeric literal");
                    }
                } else {
                    int val = frombase(*p);
                    n *= base;
                    n += val;
                    if (seendot)
                        d *= base;
                    p++;
                }
            }

            if (*p && tolower((unsigned char)*p) == expmarker) {
                int exponent = 0;
                bool expneg = false;
                p++;
                if (*p == '-' || *p == '+') {
                    expneg = (*p == '-');
                    p++;
                }
                while (*p && isdigit((unsigned char)*p)) {
                    exponent = 10 * exponent + frombase(*p);
                    p++;
                }

                bigint mult = bigint_power(expbase, exponent);
                if (expneg)
                    d *= mult;
                else
                    n *= mult;
            }

            currtok = token(TOK_NUMBER, n, d);
            return;
        }

        throw spigot_error("unrecognised token");
    }
};

// Uniquely identify every function defined in an expression, no
// matter where in the tree and nesting structure.
typedef int FunctionID;

struct FnArgHolder {
    std::vector<Spigot *> args;
    FunctionID fnid;
    FnArgHolder *parent;
    FnArgHolder(FunctionID afnid, FnArgHolder *aparent)
        : fnid(afnid), parent(aparent) {}
    void add_arg(Spigot *arg) {
        args.push_back(arg);
    }
    ~FnArgHolder() {
        for (int i = 0; i < (int)args.size(); ++i)
            delete args[i];
    }
    Spigot *lookup(FunctionID afnid, int argindex) {
        if (afnid == fnid) {
            assert(0 <= argindex);
            assert(argindex < (int)args.size());
            return args[argindex]->clone();
        } else {
            assert(parent);
            return parent->lookup(afnid, argindex);
        }
    }
};

struct ASTNode {
    virtual ~ASTNode() {}
    virtual Spigot *evaluate(FnArgHolder *fnargs) = 0;
};

struct ASTSpigot : ASTNode {
    Spigot *spigot;

    ASTSpigot(Spigot *aspigot) : spigot(aspigot) {}
    ~ASTSpigot() { delete spigot; }
    Spigot *evaluate(FnArgHolder *) { return spigot->clone(); }
};

struct ASTOp : ASTNode {
    op_fn_t opfn;
    std::vector<ASTNode *> args;

    ASTOp(int opindex, const std::vector<ASTNode *> &aargs)
        : opfn(operator_constructors[opindex]), args(aargs) {}

    ~ASTOp() {
        for (int i = 0; i < (int)args.size(); ++i)
            delete args[i];
    };

    Spigot *evaluate(FnArgHolder *fnargs) {
        std::vector<Spigot *> sargs;
        for (int i = 0; i < (int)args.size(); ++i)
            sargs.push_back(args[i]->evaluate(fnargs));
        return opfn(sargs);
    }
};

struct ASTRef : ASTNode {
    /*
     * Cross-reference to another piece of AST, which avoids deleting
     * the referred-to piece twice.
     */
    ASTNode *target;

    ASTRef(ASTNode *atarget) : target(atarget) {}
    Spigot *evaluate(FnArgHolder *fnargs) { return target->evaluate(fnargs); }
};

struct ASTFnArg : ASTNode {
    FunctionID fnid;
    int argindex;
    ASTFnArg(FunctionID afnid, int aargindex)
        : fnid(afnid), argindex(aargindex) {}
    Spigot *evaluate(FnArgHolder *fnargs) {
        return fnargs->lookup(fnid, argindex);
    }
};

struct ASTFnCall : ASTNode {
    ASTNode *fnbody;
    FunctionID fnid;
    std::vector<ASTNode *> args;

    ASTFnCall(ASTNode *afnbody, FunctionID afnid,
              const std::vector<ASTNode *> &aargs)
        : fnbody(afnbody), fnid(afnid), args(aargs) {}

    ~ASTFnCall() {
        for (int i = 0; i < (int)args.size(); ++i)
            delete args[i];
    };

    Spigot *evaluate(FnArgHolder *fnargs) {
        FnArgHolder newfnargs(fnid, fnargs);
        for (int i = 0; i < (int)args.size(); ++i)
            newfnargs.add_arg(args[i]->evaluate(fnargs));
        return fnbody->evaluate(&newfnargs);
    }
};

struct Definition {
    virtual ~Definition() {}
    virtual bool is_function() = 0;
    virtual bool check_nargs(int nargs, int *expected) = 0;
    virtual ASTNode *make_call(const std::vector<ASTNode *> &aargs) = 0;
};

struct BuiltinDefinition : Definition {
    int opindex;
    BuiltinDefinition(int aopindex) : opindex(aopindex) {}
    bool is_function() {
        int n = operators[opindex].prec & ~ASSOC_MASK;
        return n != 0;
    }
    bool check_nargs(int nargs, int *expected) {
        int n = operators[opindex].prec & ~ASSOC_MASK;
        if (n == VARIADIC || n == nargs) {
            return true;
        } else {
            *expected = n;
            return false;
        }
    }
    virtual ASTNode *make_call(const std::vector<ASTNode *> &args) {
        return new ASTOp(opindex, args);
    }
};

struct Variable : Definition {
    ASTNode *value;
    Variable(ASTNode *avalue) : value(avalue) {}
    bool is_function() { return false; }
    bool check_nargs(int /*nargs_got*/, int * /*expected*/) { return true; }
    virtual ASTNode *make_call(const std::vector<ASTNode *> &) {
        return new ASTRef(value);
    }
};

struct UserDefinedFunction : Definition {
    int nargs;
    ASTNode *fnbody;
    FunctionID fnid;
    UserDefinedFunction(int anargs, ASTNode *afnbody, FunctionID afnid)
        : nargs(anargs), fnbody(afnbody), fnid(afnid) {}
    bool is_function() { return true; }
    bool check_nargs(int nargs_got, int *expected) {
        *expected = nargs;
        return nargs == nargs_got;
    }
    virtual ASTNode *make_call(const std::vector<ASTNode *> &args) {
        return new ASTFnCall(fnbody, fnid, args);
    }
};

struct Scope {
    Scope *parent;
    Definition *lookup(const char *varname) {
        Definition *ret = lookup_here(varname);
        if (!ret && parent)
            ret = parent->lookup(varname);
        return ret;
    }
    Scope(Scope *aparent) : parent(aparent) {}
    virtual Definition *lookup_here(const char *varname) = 0;
};

struct DictScope : Scope {
    std::map<std::string, Definition *> names;
    DictScope(Scope *parent) : Scope(parent) {}
    ~DictScope() {
        std::map<std::string, Definition *>::iterator it;
        for (it = names.begin(); it != names.end(); ++it)
            delete it->second;
    }
    Definition *lookup_here(const char *varname) {
        std::map<std::string, Definition *>::iterator it;
        it = names.find(varname);
        if (it != names.end())
            return it->second;
        else
            return NULL;
    }
};

struct GlobalScopeWrapper : DictScope {
    GlobalScope *gs;
    GlobalScopeWrapper(GlobalScope *ags) : DictScope(NULL), gs(ags) {
        for (int i = 0; i < OP_MAX; i++) {
            if ((operators[i].prec & ASSOC_MASK) == FUNCTION)
                names[operators[i].text] = new BuiltinDefinition(i);
        }
    }
    Definition *lookup_here(const char *varname) {
        if (gs) {
            Spigot *spig = gs->lookup(varname);
            if (spig)
                return new Variable(new ASTSpigot(spig));
        }
        return DictScope::lookup_here(varname);
    }
};

struct LetScope : DictScope {
    LetScope(Scope *parent) : DictScope(parent) {}
    void add_var(const std::string &varname, ASTNode *node) {
        names[varname] = new Variable(node);
    }
    void add_fn(const std::string &fnname, int nargs,
                ASTNode *node, FunctionID fnid) {
        names[fnname] = new UserDefinedFunction(nargs, node, fnid);
    }
};

struct FnArgScope : DictScope {
    int nargs;
    FunctionID fnid;
    FnArgScope(Scope *parent, FunctionID afnid)
        : DictScope(parent), nargs(0), fnid(afnid) {}
    void add_arg(const std::string &argname) {
        if (names.find(argname) != names.end())
            throw spigot_error("parameter name '%s' repeated in function"
                               " definition", argname.c_str());
        names[argname] = new Variable(new ASTFnArg(fnid, nargs++));
    }
};

struct stack {
    ASTNode *atom;
    int opindex;
};

void parse_recursive(Lexer &lexer, struct stack *stack, Scope *scope,
                     FunctionID *curr_fnid)
{
    struct stack *sp = stack;
    int index;

    while (1) {
        if (lexer.currtok.type == TOK_EOS ||
            lexer.currtok.type == TOK_COMMA ||
            lexer.currtok.type == TOK_IN ||
            lexer.currtok.type == TOK_RPAR ||
            lexer.currtok.type == TOK_OPERATOR) {
            /*
             * These are all the types of token which might require
             * reducing before we process them.
             */
            if (lexer.currtok.type == TOK_OPERATOR) {
                index = lexer.currtok.opindex;
                lexer.advance();
                got_op:

                /*
                 * Distinguish binary from unary operators: a unary
                 * operator is any operator appearing directly after
                 * another operator or at start-of-expression.
                 */
                if (sp == stack || !sp[-1].atom) {
                    /*
                     * This should be a unary operator. If the lexer has
                     * returned us a binary operator with the same
                     * spelling, find the right one instead.
                     */
                    if ((operators[index].prec & ASSOC_MASK) != UNARY) {
                        for (int i = 0; i < OP_MAX; i++)
                            if (!strcmp(operators[index].text,
                                        operators[i].text) &&
                                (operators[i].prec & ASSOC_MASK) == UNARY) {
                                index = i;
                                break;
                            }
                    }
                    /*
                     * And if that didn't work, we have a syntax error.
                     */
                    if ((operators[index].prec & ASSOC_MASK) != UNARY) {
                        throw spigot_error("expected unary operator");
                    }

                    /*
                     * If it did, though, unary operators get
                     * unconditionally shifted.
                     */
                    sp->atom = NULL;
                    sp->opindex = index;
                    sp++;
                    continue;
                } else {
                    /*
                     * This should be a binary operator.
                     */
                    if ((operators[index].prec & ASSOC_MASK) == UNARY) {
                        throw spigot_error("expected binary operator");
                    }
                }
            } else {
                index = -1; /* this is not actually an operator */
            }

            /*
             * Before we shift (or terminate), reduce any higher-
             * priority operators already on our stack.
             */
            while (1) {
                if (sp - stack < 2)
                    break;             /* run out of candidate reduces */
                assert(sp[-1].atom);
                assert(!sp[-2].atom);
                int thisprec = (index < 0 ? -1 : operators[index].prec);
                int thatprec = operators[sp[-2].opindex].prec;
                if ((thisprec &~ ASSOC_MASK) > (thatprec &~ ASSOC_MASK))
                    break;             /* new operator is higher-priority */
                if ((thisprec &~ ASSOC_MASK) == (thatprec &~ ASSOC_MASK) &&
                    (thisprec & ASSOC_MASK) == RIGHTASSOC)
                    break;             /* equal-prec but right-associative */

                /*
                 * Now we know we want to reduce. Split into unary
                 * and binary cases.
                 */
                std::vector<ASTNode *> args;
                ASTNode *ret;
                if ((thatprec & ASSOC_MASK) == UNARY) {
                    args.push_back(sp[-1].atom);
                    ret = new ASTOp(sp[-2].opindex, args);
                    sp--;
                } else {
                    assert(sp - stack >= 3);
                    assert(sp[-3].atom);
                    args.push_back(sp[-3].atom);
                    args.push_back(sp[-1].atom);
                    ret = new ASTOp(sp[-2].opindex, args);
                    sp -= 2;
                }
                sp[-1].atom = ret;
            }

            /*
             * Now we can shift the new operator, or terminate
             * the parse, depending.
             */
            if (index >= 0) {
                sp->atom = NULL;
                sp->opindex = index;
                sp++;
                continue;
            } else {
                if (sp != stack+1 || !sp[-1].atom) {
                    if (lexer.currtok.type == TOK_EOS)
                        throw spigot_error("unexpected end of expression");
                    else if (lexer.currtok.type == TOK_COMMA)
                        throw spigot_error("unexpected ','");
                    else if (lexer.currtok.type == TOK_IN)
                        throw spigot_error("unexpected 'in'");
                    else
                        throw spigot_error("unexpected ')'");
                }
                return;
            }
        }

        /*
         * If we get here, it means we're about to parse an atom.
         * 
         * One silly special case: if the previous thing on the
         * stack is also an atom, we pretend there was a
         * multiplication sign in between.
         */
        if (sp > stack && sp[-1].atom) {
            index = OP_MUL;
            goto got_op;
        }

        if (lexer.currtok.type == TOK_NUMBER) {
            sp->atom = new ASTSpigot
                (spigot_rational(lexer.currtok.n, lexer.currtok.d));
            sp->opindex = -1;
            sp++;
            lexer.advance();
            continue;
        }

        if (lexer.currtok.type == TOK_SPIGOT) {
            sp->atom = new ASTSpigot(lexer.currtok.spigot);
            sp->opindex = -1;
            sp++;
            lexer.advance();
            continue;
        }

        if (lexer.currtok.type == TOK_IDENTIFIER) {
            std::string id = lexer.currtok.text;
            std::vector<ASTNode *> args;

            Definition *def = scope->lookup(lexer.currtok.text.c_str());
            if (!def)
                throw spigot_error("unrecognised identifier '%s'",
                                   lexer.currtok.text.c_str());
            lexer.advance();

            if (def->is_function()) {
                /*
                 * This is a function call, so collect its
                 * arguments.
                 */
                if (lexer.currtok.type != TOK_LPAR) {
                    throw spigot_error("expected '(' after function name '%s'",
                                       id.c_str());
                }
                lexer.advance();

                while (1) {
                    parse_recursive(lexer, sp, scope, curr_fnid);
                    args.push_back(sp->atom);

                    if (lexer.currtok.type == TOK_RPAR) {
                        lexer.advance();
                        break;
                    } else if (lexer.currtok.type == TOK_COMMA) {
                        lexer.advance();
                    } else {
                        throw spigot_error("expected ',' or ')'");
                    }
                }

                int expected_nargs;
                if (!def->check_nargs(args.size(), &expected_nargs))
                    throw spigot_error("expected %d arguments for function"
                                       " '%s', found %d", expected_nargs,
                                       id.c_str(), (int)args.size());
            }

            sp->atom = def->make_call(args);
            sp->opindex = 0;
            sp++;
            continue;
        }

        if (lexer.currtok.type == TOK_LPAR) {
            lexer.advance();
            parse_recursive(lexer, sp, scope, curr_fnid);
            sp++;
            if (lexer.currtok.type != TOK_RPAR) {
                throw spigot_error("expected ')'");
            }
            lexer.advance();
            continue;
        }

        if (lexer.currtok.type == TOK_LET) {
            LetScope ls(scope);
            lexer.advance();
            while (1) {
                if (lexer.currtok.type != TOK_IDENTIFIER)
                    throw spigot_error("expected identifier in let clause");
                std::string name = lexer.currtok.text;
                lexer.advance();
                FnArgScope fas(&ls, *curr_fnid);
                bool is_function = false;
                if (lexer.currtok.type == TOK_LPAR) {
                    (*curr_fnid)++;
                    is_function = true;
                    lexer.advance();
                    while (1) {
                        if (lexer.currtok.type != TOK_IDENTIFIER)
                            throw spigot_error("expected identifier in "
                                               "function parameter list");
                        fas.add_arg(lexer.currtok.text);
                        lexer.advance();
                        if (lexer.currtok.type == TOK_COMMA) {
                            lexer.advance();
                            continue;
                        } else if (lexer.currtok.type == TOK_RPAR) {
                            lexer.advance();
                            break;
                        } else {
                            throw spigot_error("expected ',' or ')' after "
                                               "identifier in "
                                               "function parameter list");
                        }
                    }
                }
                if (lexer.currtok.type != TOK_EQUALS)
                    throw spigot_error("expected '=' after identifier in"
                                       " let clause");
                lexer.advance();
                parse_recursive(lexer, sp, &fas, curr_fnid);
                if (is_function) {
                    ls.add_fn(name, fas.nargs, sp[0].atom, fas.fnid);
                } else {
                    ls.add_var(name, sp[0].atom);
                }
                if (lexer.currtok.type == TOK_IN) {
                    lexer.advance();
                    break;
                } else if (lexer.currtok.type == TOK_COMMA) {
                    lexer.advance();
                    continue;
                } else {
                    throw spigot_error("expected ',' or 'in' after definition"
                                       " in let clause");
                }
            }
            parse_recursive(lexer, sp, &ls, curr_fnid);
            sp++;
            continue;
        }

        throw spigot_error("unrecognised token");
    }
}

Spigot *expr_parse(const char *expr, GlobalScope *globalscope)
{
    Lexer lexer(expr);
    struct stack *stack;
    GlobalScopeWrapper gsw(globalscope);
    FunctionID curr_fnid = 0;

    stack = (struct stack *)malloc(strlen(expr) * sizeof(struct stack));
    parse_recursive(lexer, stack, &gsw, &curr_fnid);
    if (lexer.currtok.type != TOK_EOS) {
        throw spigot_error("expected end of string");
    }
    ASTNode *ast = stack[0].atom;
    free(stack);

    /*
     * In the event of a parse failure, that function would never
     * even have returned.
     */
    Spigot *ret = ast->evaluate(NULL);
    delete ast;
    return ret;
}
