+static SCM canonicalize_define (SCM expr);
+
+\f
+
+/* {Syntax Errors}
+ *
+ * This section defines the message strings for the syntax errors that can be
+ * detected during memoization and the functions and macros that shall be
+ * called by the memoizer code to signal syntax errors. */
+
+
+/* Syntax errors that can be detected during memoization: */
+
+/* Circular or improper lists do not form valid scheme expressions. If a
+ * circular list or an improper list is detected in a place where a scheme
+ * expression is expected, a 'Bad expression' error is signalled. */
+static const char s_bad_expression[] = "Bad expression";
+
+/* If a form is detected that holds a different number of expressions than are
+ * required in that context, a 'Missing or extra expression' error is
+ * signalled. */
+static const char s_expression[] = "Missing or extra expression in";
+
+/* If a form is detected that holds less expressions than are required in that
+ * context, a 'Missing expression' error is signalled. */
+static const char s_missing_expression[] = "Missing expression in";
+
+/* If a form is detected that holds more expressions than are allowed in that
+ * context, an 'Extra expression' error is signalled. */
+static const char s_extra_expression[] = "Extra expression in";
+
+/* The empty combination '()' is not allowed as an expression in scheme. If
+ * it is detected in a place where an expression is expected, an 'Illegal
+ * empty combination' error is signalled. Note: If you encounter this error
+ * message, it is very likely that you intended to denote the empty list. To
+ * do so, you need to quote the empty list like (quote ()) or '(). */
+static const char s_empty_combination[] = "Illegal empty combination";
+
+/* A body may hold an arbitrary number of internal defines, followed by a
+ * non-empty sequence of expressions. If a body with an empty sequence of
+ * expressions is detected, a 'Missing body expression' error is signalled.
+ */
+static const char s_missing_body_expression[] = "Missing body expression in";
+
+/* A body may hold an arbitrary number of internal defines, followed by a
+ * non-empty sequence of expressions. Each the definitions and the
+ * expressions may be grouped arbitraryly with begin, but it is not allowed to
+ * mix definitions and expressions. If a define form in a body mixes
+ * definitions and expressions, a 'Mixed definitions and expressions' error is
+ * signalled.
+ */
+static const char s_mixed_body_forms[] = "Mixed definitions and expressions in";
+
+/* Case or cond expressions must have at least one clause. If a case or cond
+ * expression without any clauses is detected, a 'Missing clauses' error is
+ * signalled. */
+static const char s_missing_clauses[] = "Missing clauses";
+
+/* If there is an 'else' clause in a case or a cond statement, it must be the
+ * last clause. If after the 'else' case clause further clauses are detected,
+ * a 'Misplaced else clause' error is signalled. */
+static const char s_misplaced_else_clause[] = "Misplaced else clause";
+
+/* If a case clause is detected that is not in the format
+ * (<label(s)> <expression1> <expression2> ...)
+ * a 'Bad case clause' error is signalled. */
+static const char s_bad_case_clause[] = "Bad case clause";
+
+/* If a case clause is detected where the <label(s)> element is neither a
+ * proper list nor (in case of the last clause) the syntactic keyword 'else',
+ * a 'Bad case labels' error is signalled. Note: If you encounter this error
+ * for an else-clause which seems to be syntactically correct, check if 'else'
+ * is really a syntactic keyword in that context. If 'else' is bound in the
+ * local or global environment, it is not considered a syntactic keyword, but
+ * will be treated as any other variable. */
+static const char s_bad_case_labels[] = "Bad case labels";
+
+/* In a case statement all labels have to be distinct. If in a case statement
+ * a label occurs more than once, a 'Duplicate case label' error is
+ * signalled. */
+static const char s_duplicate_case_label[] = "Duplicate case label";
+
+/* If a cond clause is detected that is not in one of the formats
+ * (<test> <expression1> ...) or (else <expression1> <expression2> ...)
+ * a 'Bad cond clause' error is signalled. */
+static const char s_bad_cond_clause[] = "Bad cond clause";
+
+/* If a cond clause is detected that uses the alternate '=>' form, but does
+ * not hold a recipient element for the test result, a 'Missing recipient'
+ * error is signalled. */
+static const char s_missing_recipient[] = "Missing recipient in";
+
+/* If in a position where a variable name is required some other object is
+ * detected, a 'Bad variable' error is signalled. */
+static const char s_bad_variable[] = "Bad variable";
+
+/* Bindings for forms like 'let' and 'do' have to be given in a proper,
+ * possibly empty list. If any other object is detected in a place where a
+ * list of bindings was required, a 'Bad bindings' error is signalled. */
+static const char s_bad_bindings[] = "Bad bindings";
+
+/* Depending on the syntactic context, a binding has to be in the format
+ * (<variable> <expression>) or (<variable> <expression1> <expression2>).
+ * If anything else is detected in a place where a binding was expected, a
+ * 'Bad binding' error is signalled. */
+static const char s_bad_binding[] = "Bad binding";
+
+/* Some syntactic forms don't allow variable names to appear more than once in
+ * a list of bindings. If such a situation is nevertheless detected, a
+ * 'Duplicate binding' error is signalled. */
+static const char s_duplicate_binding[] = "Duplicate binding";
+
+/* If the exit form of a 'do' expression is not in the format
+ * (<test> <expression> ...)
+ * a 'Bad exit clause' error is signalled. */
+static const char s_bad_exit_clause[] = "Bad exit clause";
+
+/* The formal function arguments of a lambda expression have to be either a
+ * single symbol or a non-cyclic list. For anything else a 'Bad formals'
+ * error is signalled. */
+static const char s_bad_formals[] = "Bad formals";
+
+/* If in a lambda expression something else than a symbol is detected at a
+ * place where a formal function argument is required, a 'Bad formal' error is
+ * signalled. */
+static const char s_bad_formal[] = "Bad formal";
+
+/* If in the arguments list of a lambda expression an argument name occurs
+ * more than once, a 'Duplicate formal' error is signalled. */
+static const char s_duplicate_formal[] = "Duplicate formal";
+
+/* If the evaluation of an unquote-splicing expression gives something else
+ * than a proper list, a 'Non-list result for unquote-splicing' error is
+ * signalled. */
+static const char s_splicing[] = "Non-list result for unquote-splicing";
+
+/* If something else than an exact integer is detected as the argument for
+ * @slot-ref and @slot-set!, a 'Bad slot number' error is signalled. */
+static const char s_bad_slot_number[] = "Bad slot number";
+
+
+/* Signal a syntax error. We distinguish between the form that caused the
+ * error and the enclosing expression. The error message will print out as
+ * shown in the following pattern. The file name and line number are only
+ * given when they can be determined from the erroneous form or from the
+ * enclosing expression.
+ *
+ * <filename>: In procedure memoization:
+ * <filename>: In file <name>, line <nr>: <error-message> in <expression>. */
+
+SCM_SYMBOL (syntax_error_key, "syntax-error");
+
+/* The prototype is needed to indicate that the function does not return. */
+static void
+syntax_error (const char* const, const SCM, const SCM) SCM_NORETURN;
+
+static void
+syntax_error (const char* const msg, const SCM form, const SCM expr)
+{
+ const SCM msg_string = scm_makfrom0str (msg);
+ SCM filename = SCM_BOOL_F;
+ SCM linenr = SCM_BOOL_F;
+ const char *format;
+ SCM args;
+
+ if (SCM_CONSP (form))
+ {
+ filename = scm_source_property (form, scm_sym_filename);
+ linenr = scm_source_property (form, scm_sym_line);
+ }
+
+ if (SCM_FALSEP (filename) && SCM_FALSEP (linenr) && SCM_CONSP (expr))
+ {
+ filename = scm_source_property (expr, scm_sym_filename);
+ linenr = scm_source_property (expr, scm_sym_line);
+ }
+
+ if (!SCM_UNBNDP (expr))
+ {
+ if (!SCM_FALSEP (filename))
+ {
+ format = "In file ~S, line ~S: ~A ~S in expression ~S.";
+ args = scm_list_5 (filename, linenr, msg_string, form, expr);
+ }
+ else if (!SCM_FALSEP (linenr))
+ {
+ format = "In line ~S: ~A ~S in expression ~S.";
+ args = scm_list_4 (linenr, msg_string, form, expr);
+ }
+ else
+ {
+ format = "~A ~S in expression ~S.";
+ args = scm_list_3 (msg_string, form, expr);
+ }
+ }
+ else
+ {
+ if (!SCM_FALSEP (filename))
+ {
+ format = "In file ~S, line ~S: ~A ~S.";
+ args = scm_list_4 (filename, linenr, msg_string, form);
+ }
+ else if (!SCM_FALSEP (linenr))
+ {
+ format = "In line ~S: ~A ~S.";
+ args = scm_list_3 (linenr, msg_string, form);
+ }
+ else
+ {
+ format = "~A ~S.";
+ args = scm_list_2 (msg_string, form);
+ }
+ }
+
+ scm_error (syntax_error_key, "memoization", format, args, SCM_BOOL_F);
+}
+
+
+/* Shortcut macros to simplify syntax error handling. */
+#define ASSERT_SYNTAX(cond, message, form) \
+ { if (!(cond)) syntax_error (message, form, SCM_UNDEFINED); }
+#define ASSERT_SYNTAX_2(cond, message, form, expr) \
+ { if (!(cond)) syntax_error (message, form, expr); }
+
+\f
+
+/* {Ilocs}
+ *
+ * Ilocs are memoized references to variables in local environment frames.
+ * They are represented as three values: The relative offset of the
+ * environment frame, the number of the binding within that frame, and a
+ * boolean value indicating whether the binding is the last binding in the
+ * frame.
+ */
+#define SCM_ILOC00 SCM_MAKE_ITAG8(0L, scm_tc8_iloc)
+#define SCM_IDINC (0x00100000L)
+#define SCM_IDSTMSK (-SCM_IDINC)
+#define SCM_MAKE_ILOC(frame_nr, binding_nr, last_p) \
+ SCM_PACK ( \
+ ((frame_nr) << 8) \
+ + ((binding_nr) << 20) \
+ + ((last_p) ? SCM_ICDR : 0) \
+ + scm_tc8_iloc )
+
+#if (SCM_DEBUG_DEBUGGING_SUPPORT == 1)
+
+SCM scm_dbg_make_iloc (SCM frame, SCM binding, SCM cdrp);
+SCM_DEFINE (scm_dbg_make_iloc, "dbg-make-iloc", 3, 0, 0,
+ (SCM frame, SCM binding, SCM cdrp),
+ "Return a new iloc with frame offset @var{frame}, binding\n"
+ "offset @var{binding} and the cdr flag @var{cdrp}.")
+#define FUNC_NAME s_scm_dbg_make_iloc
+{
+ SCM_VALIDATE_INUM (1, frame);
+ SCM_VALIDATE_INUM (2, binding);
+ return SCM_MAKE_ILOC (SCM_INUM (frame),
+ SCM_INUM (binding),
+ !SCM_FALSEP (cdrp));
+}
+#undef FUNC_NAME
+
+SCM scm_dbg_iloc_p (SCM obj);
+SCM_DEFINE (scm_dbg_iloc_p, "dbg-iloc?", 1, 0, 0,
+ (SCM obj),
+ "Return @code{#t} if @var{obj} is an iloc.")
+#define FUNC_NAME s_scm_dbg_iloc_p
+{
+ return SCM_BOOL (SCM_ILOCP (obj));
+}
+#undef FUNC_NAME
+
+#endif
+
+\f
+
+/* The function lookup_symbol is used during memoization: Lookup the symbol
+ * in the environment. If there is no binding for the symbol, SCM_UNDEFINED
+ * is returned. If the symbol is a syntactic keyword, the macro object to
+ * which the symbol is bound is returned. If the symbol is a global variable,
+ * the variable object to which the symbol is bound is returned. Finally, if
+ * the symbol is a local variable the corresponding iloc object is returned.
+ */
+
+/* A helper function for lookup_symbol: Try to find the symbol in the top
+ * level environment frame. The function returns SCM_UNDEFINED if the symbol
+ * is unbound, it returns a macro object if the symbol is a syntactic keyword
+ * and it returns a variable object if the symbol is a global variable. */
+static SCM
+lookup_global_symbol (const SCM symbol, const SCM top_level)
+{
+ const SCM variable = scm_sym2var (symbol, top_level, SCM_BOOL_F);
+ if (SCM_FALSEP (variable))
+ {
+ return SCM_UNDEFINED;
+ }
+ else
+ {
+ const SCM value = SCM_VARIABLE_REF (variable);
+ if (SCM_MACROP (value))
+ return value;
+ else
+ return variable;
+ }
+}
+
+static SCM
+lookup_symbol (const SCM symbol, const SCM env)
+{
+ SCM frame_idx;
+ unsigned int frame_nr;
+
+ for (frame_idx = env, frame_nr = 0;
+ !SCM_NULLP (frame_idx);
+ frame_idx = SCM_CDR (frame_idx), ++frame_nr)
+ {
+ const SCM frame = SCM_CAR (frame_idx);
+ if (SCM_CONSP (frame))
+ {
+ /* frame holds a local environment frame */
+ SCM symbol_idx;
+ unsigned int symbol_nr;
+
+ for (symbol_idx = SCM_CAR (frame), symbol_nr = 0;
+ SCM_CONSP (symbol_idx);
+ symbol_idx = SCM_CDR (symbol_idx), ++symbol_nr)
+ {
+ if (SCM_EQ_P (SCM_CAR (symbol_idx), symbol))
+ /* found the symbol, therefore return the iloc */
+ return SCM_MAKE_ILOC (frame_nr, symbol_nr, 0);
+ }
+ if (SCM_EQ_P (symbol_idx, symbol))
+ /* found the symbol as the last element of the current frame */
+ return SCM_MAKE_ILOC (frame_nr, symbol_nr, 1);
+ }
+ else
+ {
+ /* no more local environment frames */
+ return lookup_global_symbol (symbol, frame);
+ }
+ }
+
+ return lookup_global_symbol (symbol, SCM_BOOL_F);
+}
+
+
+/* Return true if the symbol is - from the point of view of a macro
+ * transformer - a literal in the sense specified in chapter "pattern
+ * language" of R5RS. In the code below, however, we don't match the
+ * definition of R5RS exactly: It returns true if the identifier has no
+ * binding or if it is a syntactic keyword. */
+static int
+literal_p (const SCM symbol, const SCM env)
+{
+ const SCM value = lookup_symbol (symbol, env);
+ if (SCM_UNBNDP (value) || SCM_MACROP (value))
+ return 1;
+ else
+ return 0;
+}