merge from 1.8 branch

[bpt/guile.git] / libguile / read.c
diff --git a/libguile/read.c b/libguile/read.c

index fa74bec..9d90135 100644 (file)
--- a/libguile/read.c
+++ b/libguile/read.c
@@ -1,87 +1,119 @@
-/*     Copyright (C) 1995,1996,1997, 1999 Free Software Foundation, Inc.
+/* Copyright (C) 1995,1996,1997,1999,2000,2001,2003, 2004, 2006, 2007 Free Software
+ * Foundation, Inc.
   * 
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2, or (at your option)
- * any later version.
- * 
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- * 
- * You should have received a copy of the GNU General Public License
- * along with this software; see the file COPYING.  If not, write to
- * the Free Software Foundation, Inc., 59 Temple Place, Suite 330,
- * Boston, MA 02111-1307 USA
- *
- * As a special exception, the Free Software Foundation gives permission
- * for additional uses of the text contained in its release of GUILE.
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
   *
- * The exception is that, if you link the GUILE library with other files
- * to produce an executable, this does not by itself cause the
- * resulting executable to be covered by the GNU General Public License.
- * Your use of that executable is in no way restricted on account of
- * linking the GUILE library code into it.
- *
- * This exception does not however invalidate any other reasons why
- * the executable file might be covered by the GNU General Public License.
- *
- * This exception applies only to the code released by the
- * Free Software Foundation under the name GUILE.  If you copy
- * code from other Free Software Foundation releases into a copy of
- * GUILE, as the General Public License permits, the exception does
- * not apply to the code that you add in this way.  To avoid misleading
- * anyone as to the status of such modified files, you must delete
- * this exception notice from them.
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
   *
- * If you write modifications of your own for GUILE, it is your choice
- * whether to permit this exception to apply to your modifications.
- * If you do not wish that, delete this exception notice.  */
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
  
-/* Software engineering face-lift by Greg J. Badros, 11-Dec-1999,
-   gjb@cs.washington.edu, http://www.cs.washington.edu/homes/gjb */
  
  \f
  
  #include <stdio.h>
-#include "_scm.h"
-#include "chars.h"
-#include "genio.h"
-#include "eval.h"
-#include "unif.h"
-#include "keywords.h"
-#include "alist.h"
-#include "srcprop.h"
-#include "hashtab.h"
-#include "hash.h"
-
-#include "scm_validate.h"
-#include "read.h"
+#include "libguile/_scm.h"
+#include "libguile/chars.h"
+#include "libguile/eval.h"
+#include "libguile/unif.h"
+#include "libguile/keywords.h"
+#include "libguile/alist.h"
+#include "libguile/srcprop.h"
+#include "libguile/hashtab.h"
+#include "libguile/hash.h"
+#include "libguile/ports.h"
+#include "libguile/root.h"
+#include "libguile/strings.h"
+#include "libguile/strports.h"
+#include "libguile/vectors.h"
+#include "libguile/validate.h"
+#include "libguile/srfi-4.h"
+
+#include "libguile/read.h"
+#include "libguile/private-options.h"
+
  
  \f
  
+SCM_GLOBAL_SYMBOL (scm_sym_dot, ".");
  SCM_SYMBOL (scm_keyword_prefix, "prefix");
  
-scm_option scm_read_opts[] = {
+scm_t_option scm_read_opts[] = {
    { SCM_OPTION_BOOLEAN, "copy", 0,
      "Copy source code expressions." },
    { SCM_OPTION_BOOLEAN, "positions", 0,
      "Record positions of source code expressions." },
    { SCM_OPTION_BOOLEAN, "case-insensitive", 0,
      "Convert symbols to lower case."},
-  { SCM_OPTION_SCM, "keywords", SCM_BOOL_F,
-    "Style of keyword recognition: #f or 'prefix"}
+  { SCM_OPTION_SCM, "keywords", SCM_UNPACK (SCM_BOOL_F),
+    "Style of keyword recognition: #f or 'prefix."},
+#if SCM_ENABLE_ELISP
+  { SCM_OPTION_BOOLEAN, "elisp-vectors", 0,
+    "Support Elisp vector syntax, namely `[...]'."},
+  { SCM_OPTION_BOOLEAN, "elisp-strings", 0,
+    "Support `\\(' and `\\)' in strings."},
+#endif
+  { 0, },
  };
  
-GUILE_PROC (scm_read_options, "read-options-interface", 0, 1, 0, 
+/*
+  Give meaningful error messages for errors
+
+  We use the format
+
+  FILE:LINE:COL: MESSAGE
+  This happened in ....
+
+  This is not standard GNU format, but the test-suite likes the real
+  message to be in front.
+
+ */
+
+
+void
+scm_i_input_error (char const *function,
+                  SCM port, const char *message, SCM arg)
+{
+  SCM fn = (scm_is_string (SCM_FILENAME(port))
+           ? SCM_FILENAME(port)
+           : scm_from_locale_string ("#<unknown port>"));
+
+  SCM string_port = scm_open_output_string ();
+  SCM string = SCM_EOL;
+  scm_simple_format (string_port,
+                    scm_from_locale_string ("~A:~S:~S: ~A"),
+                    scm_list_4 (fn,
+                                scm_from_long (SCM_LINUM (port) + 1),
+                                scm_from_int (SCM_COL (port) + 1),
+                                scm_from_locale_string (message)));
+    
+  string = scm_get_output_string (string_port);
+  scm_close_output_port (string_port);
+  scm_error_scm (scm_from_locale_symbol ("read-error"),
+                function? scm_from_locale_string (function) : SCM_BOOL_F,
+                string,
+                arg,
+                SCM_BOOL_F);
+}
+
+
+SCM_DEFINE (scm_read_options, "read-options-interface", 0, 1, 0, 
              (SCM setting),
-"")
+           "Option interface for the read options. Instead of using\n"
+           "this procedure directly, use the procedures @code{read-enable},\n"
+           "@code{read-disable}, @code{read-set!} and @code{read-options}.")
  #define FUNC_NAME s_scm_read_options
  {
    SCM ans = scm_options (setting,
                          scm_read_opts,
-                        SCM_N_READ_OPTIONS,
                          FUNC_NAME);
    if (SCM_COPY_SOURCE_P)
      SCM_RECORD_POSITIONS_P = 1;
@@ -92,24 +124,26 @@ GUILE_PROC (scm_read_options, "read-options-interface", 0, 1, 0,
  /* An association list mapping extra hash characters to procedures.  */
  static SCM *scm_read_hash_procedures;
  
-GUILE_PROC (scm_read, "read", 0, 1, 0, 
+SCM_DEFINE (scm_read, "read", 0, 1, 0, 
              (SCM port),
-"")
+           "Read an s-expression from the input port @var{port}, or from\n"
+           "the current input port if @var{port} is not specified.\n"
+           "Any whitespace before the next token is discarded.")
  #define FUNC_NAME s_scm_read
  {
    int c;
    SCM tok_buf, copy;
  
    if (SCM_UNBNDP (port))
-    port = scm_cur_inp;
-  SCM_VALIDATE_OPINPORT(1,port);
+    port = scm_current_input_port ();
+  SCM_VALIDATE_OPINPORT (1, port);
  
    c = scm_flush_ws (port, (char *) NULL);
    if (EOF == c)
      return SCM_EOF_VAL;
    scm_ungetc (c, port);
  
-  tok_buf = scm_makstr (30L, 0);
+  tok_buf = scm_c_make_string (30, SCM_UNDEFINED);
    return scm_lreadr (&tok_buf, port, &copy);
  }
  #undef FUNC_NAME
@@ -119,11 +153,45 @@ GUILE_PROC (scm_read, "read", 0, 1, 0,
  char *
  scm_grow_tok_buf (SCM *tok_buf)
  {
-  scm_vector_set_length_x (*tok_buf, SCM_MAKINUM (2 * SCM_LENGTH (*tok_buf)));
-  return SCM_CHARS (*tok_buf);
+  size_t oldlen = scm_i_string_length (*tok_buf);
+  const char *olddata = scm_i_string_chars (*tok_buf);
+  char *newdata;
+  SCM newstr = scm_i_make_string (2 * oldlen, &newdata);
+  size_t i;
+
+  for (i = 0; i != oldlen; ++i)
+    newdata[i] = olddata[i];
+
+  *tok_buf = newstr;
+  return newdata;
  }
  
+/* Consume an SCSH-style block comment.  Assume that we've already
+   read the initial `#!', and eat characters until we get a
+   exclamation-point/sharp-sign sequence. 
+*/
  
+static void
+skip_scsh_block_comment (SCM port)
+{
+  int bang_seen = 0;
+
+  for (;;)
+    {
+      int c = scm_getc (port);
+      
+      if (c == EOF)
+       scm_i_input_error ("skip_block_comment", port, 
+                          "unterminated `#! ... !#' comment", SCM_EOL);
+
+      if (c == '!')
+       bang_seen = 1;
+      else if (c == '#' && bang_seen)
+       return;
+      else
+       bang_seen = 0;
+    }
+}
  
  int 
  scm_flush_ws (SCM port, const char *eoferr)
@@ -135,7 +203,12 @@ scm_flush_ws (SCM port, const char *eoferr)
        case EOF:
        goteof:
         if (eoferr)
-         scm_wta (SCM_UNDEFINED, "end of file in ", eoferr);
+         {
+           scm_i_input_error (eoferr,
+                              port,
+                              "end of file",
+                              SCM_EOL);
+         }
         return c;
        case ';':
        lp:
@@ -149,6 +222,20 @@ scm_flush_ws (SCM port, const char *eoferr)
             break;
           }
         break;
+      case '#':
+       switch (c = scm_getc (port))
+         {
+         case EOF:
+           eoferr = "read_sharp";
+           goto goteof;
+         case '!':
+           skip_scsh_block_comment (port);
+           break;
+         default:
+           scm_ungetc (c, port);
+           return '#';
+         }
+       break;
        case SCM_LINE_INCREMENTORS:
        case SCM_SINGLE_SPACES:
        case '\t':
@@ -164,7 +251,7 @@ int
  scm_casei_streq (char *s1, char *s2)
  {
    while (*s1 && *s2)
-    if (scm_downcase((int)*s1) != scm_downcase((int)*s2))
+    if (scm_c_downcase((int)*s1) != scm_c_downcase((int)*s2))
        return 0;
      else
        {
@@ -174,30 +261,41 @@ scm_casei_streq (char *s1, char *s2)
    return !(*s1 || *s2);
  }
  
+static int
+scm_i_casei_streq (const char *s1, const char *s2, size_t len2)
+{
+  while (*s1 && len2 > 0)
+    if (scm_c_downcase((int)*s1) != scm_c_downcase((int)*s2))
+      return 0;
+    else
+      {
+       ++s1;
+       ++s2;
+       --len2;
+      }
+  return !(*s1 || len2 > 0);
+}
  
  /* recsexpr is used when recording expressions
   * constructed by read:sharp.
   */
-#ifndef DEBUG_EXTENSIONS
-#define recsexpr(obj, line, column, filename) (obj)
-#else
  static SCM
-recsexpr (SCM obj,int line,int column,SCM filename)
+recsexpr (SCM obj, long line, int column, SCM filename)
  {
-  if (SCM_IMP (obj) || SCM_NCONSP(obj))
+  if (!scm_is_pair(obj)) {
      return obj;
-  {
+  } else {
      SCM tmp = obj, copy;
      /* If this sexpr is visible in the read:sharp source, we want to
         keep that information, so only record non-constant cons cells
         which haven't previously been read by the reader. */
-    if (SCM_FALSEP (scm_whash_lookup (scm_source_whash, obj)))
+    if (scm_is_false (scm_whash_lookup (scm_source_whash, obj)))
        {
         if (SCM_COPY_SOURCE_P)
           {
             copy = scm_cons (recsexpr (SCM_CAR (obj), line, column, filename),
                              SCM_UNDEFINED);
-           while (SCM_NIMP (tmp = SCM_CDR (tmp)) && SCM_CONSP (tmp))
+           while ((tmp = SCM_CDR (tmp)) && scm_is_pair (tmp))
               {
                 SCM_SETCDR (copy, scm_cons (recsexpr (SCM_CAR (tmp),
                                                       line,
@@ -211,7 +309,7 @@ recsexpr (SCM obj,int line,int column,SCM filename)
         else
           {
             recsexpr (SCM_CAR (obj), line, column, filename);
-           while (SCM_NIMP (tmp = SCM_CDR (tmp)) && SCM_CONSP (tmp))
+           while ((tmp = SCM_CDR (tmp)) && scm_is_pair (tmp))
               recsexpr (SCM_CAR (tmp), line, column, filename);
             copy = SCM_UNDEFINED;
           }
@@ -226,48 +324,26 @@ recsexpr (SCM obj,int line,int column,SCM filename)
      return obj;
    }
  }
-#endif
-
-/* Consume an SCSH-style block comment.  Assume that we've already
-   read the initial `#!', and eat characters until we get a
-   newline/exclamation-point/sharp-sign/newline sequence.  */
-
-static void
-skip_scsh_block_comment (SCM port)
-{
-  /* Is this portable?  Dear God, spare me from the non-eight-bit
-     characters.  But is it tasteful?  */
-  long history = 0;
-
-  for (;;)
-    {
-      int c = scm_getc (port);
-
-      if (c == EOF)
-       scm_wta (SCM_UNDEFINED,
-                "unterminated `#! ... !#' comment", "read");
-      history = ((history << 8) | (c & 0xff)) & 0xffffffff;
  
-      /* Were the last four characters read "\n!#\n"?  */
-      if (history == (('\n' << 24) | ('!' << 16) | ('#' << 8) | '\n'))
-       return;
-    }
-}
  
  static SCM scm_get_hash_procedure(int c);
+static SCM scm_i_lreadparen (SCM *, SCM, char *, SCM *, char);
  
  static char s_list[]="list";
+#if SCM_ENABLE_ELISP
+static char s_vector[]="vector";
+#endif
  
  SCM 
-scm_lreadr (SCM *tok_buf,SCM port,SCM *copy)
+scm_lreadr (SCM *tok_buf, SCM port, SCM *copy)
+#define FUNC_NAME "scm_lreadr"
  {
    int c;
-  scm_sizet j;
+  size_t j;
    SCM p;
                                   
-tryagain:
+ tryagain:
    c = scm_flush_ws (port, s_scm_read);
-tryagain_no_flush_ws:
    switch (c)
      {
      case EOF:
@@ -275,12 +351,21 @@ tryagain_no_flush_ws:
  
      case '(':
        return SCM_RECORD_POSITIONS_P
-            ? scm_lreadrecparen (tok_buf, port, s_list, copy)
-            : scm_lreadparen (tok_buf, port, s_list, copy);
+       ? scm_lreadrecparen (tok_buf, port, s_list, copy)
+       : scm_i_lreadparen (tok_buf, port, s_list, copy, ')');
      case ')':
-      scm_wta (SCM_UNDEFINED, "unexpected \")\"", "read");
+      scm_i_input_error (FUNC_NAME, port,"unexpected \")\"", SCM_EOL);
        goto tryagain;
      
+#if SCM_ENABLE_ELISP
+    case '[':
+      if (SCM_ELISP_VECTORS_P)
+       {
+         p = scm_i_lreadparen (tok_buf, port, s_vector, copy, ']');
+         return scm_is_null (p) ? scm_nullvect : scm_vector (p);
+       }
+      goto read_token;
+#endif
      case '\'':
        p = scm_sym_quote;
        goto recquote;
@@ -315,19 +400,72 @@ tryagain_no_flush_ws:
        return p;
      case '#':
        c = scm_getc (port);
+
+      {
+       /* Check for user-defined hash procedure first, to allow
+          overriding of builtin hash read syntaxes.  */
+       SCM sharp = scm_get_hash_procedure (c);
+       if (scm_is_true (sharp))
+         {
+           long line = SCM_LINUM (port);
+           int column = SCM_COL (port) - 2;
+           SCM got;
+
+           got = scm_call_2 (sharp, SCM_MAKE_CHAR (c), port);
+           if (scm_is_eq (got, SCM_UNSPECIFIED))
+             goto handle_sharp;
+           if (SCM_RECORD_POSITIONS_P)
+             return *copy = recsexpr (got, line, column,
+                                      SCM_FILENAME (port));
+           else
+             return got;
+         }
+      }
+    handle_sharp:
        switch (c)
         {
+         /* Vector, arrays, both uniform and not are handled by this
+            one function.  It also disambiguates between '#f' and
+            '#f32' and '#f64'.
+         */
+       case '0': case '1': case '2': case '3': case '4':
+       case '5': case '6': case '7': case '8': case '9':
+       case 'u': case 's': case 'f':
+       case '@':
         case '(':
-         p = scm_lreadparen (tok_buf, port, "vector", copy);
-         return SCM_NULLP (p) ? scm_nullvect : scm_vector (p);
+#if SCM_ENABLE_DEPRECATED
+         /* See below for 'i' and 'e'. */
+       case 'a':
+       case 'c':
+       case 'y':
+       case 'h':
+       case 'l':
+#endif
+         return scm_i_read_array (port, c);
  
         case 't':
         case 'T':
           return SCM_BOOL_T;
-       case 'f':
+
         case 'F':
+         /* See above for lower case 'f'. */
           return SCM_BOOL_F;
  
+
+       case 'i':
+       case 'e':
+#if SCM_ENABLE_DEPRECATED
+         {
+           /* When next char is '(', it really is an old-style
+              uniform array. */
+           int next_c = scm_getc (port);
+           if (next_c != EOF)
+             scm_ungetc (next_c, port);
+           if (next_c == '(')
+             return scm_i_read_array (port, c);
+           /* Fall through. */
+         }
+#endif  
         case 'b':
         case 'B':
         case 'o':
@@ -336,75 +474,71 @@ tryagain_no_flush_ws:
         case 'D':
         case 'x':
         case 'X':
-       case 'i':
         case 'I':
-       case 'e':
         case 'E':
           scm_ungetc (c, port);
           c = '#';
           goto num;
  
         case '!':
-         /* start of a shell script.  Parse as a block comment,
-            terminated by !#, just like SCSH.  */
-         skip_scsh_block_comment (port);
-         /* EOF is not an error here */
-         c = scm_flush_ws (port, (char *)NULL);
-         goto tryagain_no_flush_ws;
-
-#ifdef HAVE_ARRAYS
+         /* should never happen, #!...!# block comments are skipped
+            over in scm_flush_ws. */
+         abort ();
+
         case '*':
           j = scm_read_token (c, tok_buf, port, 0);
-         p = scm_istr2bve (SCM_CHARS (*tok_buf) + 1, (long) (j - 1));
-         if (SCM_NFALSEP (p))
+         p = scm_istr2bve (scm_c_substring_shared (*tok_buf, 1, j));
+         if (scm_is_true (p))
             return p;
           else
             goto unkshrp;
-#endif
  
         case '{':
           j = scm_read_token (c, tok_buf, port, 1);
-         p = scm_intern (SCM_CHARS (*tok_buf), j);
-         return SCM_CAR (p);
+         return scm_string_to_symbol (scm_c_substring_copy (*tok_buf, 0, j));
  
         case '\\':
           c = scm_getc (port);
           j = scm_read_token (c, tok_buf, port, 0);
           if (j == 1)
-           return SCM_MAKICHR (c);
+           return SCM_MAKE_CHAR (c);
           if (c >= '0' && c < '8')
             {
-             p = scm_istr2int (SCM_CHARS (*tok_buf), (long) j, 8);
-             if (SCM_NFALSEP (p))
-               return SCM_MAKICHR (SCM_INUM (p));
+             /* Dirk:FIXME::  This type of character syntax is not R5RS
+              * compliant.  Further, it should be verified that the constant
+              * does only consist of octal digits.  Finally, it should be
+              * checked whether the resulting fixnum is in the range of
+              * characters.  */
+             p = scm_c_locale_stringn_to_number (scm_i_string_chars (*tok_buf),
+                                                 j, 8);
+             if (SCM_I_INUMP (p))
+               return SCM_MAKE_CHAR (SCM_I_INUM (p));
             }
           for (c = 0; c < scm_n_charnames; c++)
             if (scm_charnames[c]
-               && (scm_casei_streq (scm_charnames[c], SCM_CHARS (*tok_buf))))
-             return SCM_MAKICHR (scm_charnums[c]);
-         scm_wta (SCM_UNDEFINED, "unknown # object: #\\", SCM_CHARS (*tok_buf));
+               && (scm_i_casei_streq (scm_charnames[c],
+                                      scm_i_string_chars (*tok_buf), j)))
+             return SCM_MAKE_CHAR (scm_charnums[c]);
+         scm_i_input_error (FUNC_NAME, port, "unknown character name ~a",
+                            scm_list_1 (scm_c_substring (*tok_buf, 0, j)));
  
           /* #:SYMBOL is a syntax for keywords supported in all contexts.  */
         case ':':
-         j = scm_read_token ('-', tok_buf, port, 0);
-         p = scm_intern (SCM_CHARS (*tok_buf), j);
-         return scm_make_keyword_from_dash_symbol (SCM_CAR (p));
+         return scm_symbol_to_keyword (scm_read (port));
  
         default:
         callshrp:
           {
             SCM sharp = scm_get_hash_procedure (c);
  
-           if (SCM_NIMP (sharp))
+           if (scm_is_true (sharp))
               {
-               int line = SCM_LINUM (port);
+               long line = SCM_LINUM (port);
                 int column = SCM_COL (port) - 2;
                 SCM got;
  
-               got = scm_apply (sharp,
-                                SCM_MAKICHR (c),
-                                scm_acons (port, SCM_EOL, SCM_EOL));
-               if (SCM_UNSPECIFIED == got)
+               got = scm_call_2 (sharp, SCM_MAKE_CHAR (c), port);
+               if (scm_is_eq (got, SCM_UNSPECIFIED))
                   goto unkshrp;
                 if (SCM_RECORD_POSITIONS_P)
                   return *copy = recsexpr (got, line, column,
@@ -414,22 +548,37 @@ tryagain_no_flush_ws:
               }
           }
         unkshrp:
-         scm_misc_error (s_scm_read, "Unknown # object: %S",
-                         scm_listify (SCM_MAKICHR (c), SCM_UNDEFINED));
+       scm_i_input_error (FUNC_NAME, port, "Unknown # object: ~S",
+                          scm_list_1 (SCM_MAKE_CHAR (c)));
         }
  
      case '"':
        j = 0;
        while ('"' != (c = scm_getc (port)))
         {
-         SCM_ASSERT (EOF != c, SCM_UNDEFINED, "end of file in ", "string");
+         if (c == EOF)
+           str_eof: scm_i_input_error (FUNC_NAME, port,
+                                       "end of file in string constant", 
+                                       SCM_EOL);
  
-         while (j + 2 >= SCM_LENGTH (*tok_buf))
+         while (j + 2 >= scm_i_string_length (*tok_buf))
             scm_grow_tok_buf (tok_buf);
  
           if (c == '\\')
             switch (c = scm_getc (port))
               {
+             case EOF:
+               goto str_eof;
+             case '"':
+             case '\\':
+               break;
+#if SCM_ENABLE_ELISP
+             case '(':
+             case ')':
+               if (SCM_ESCAPED_PARENS_P)
+                 break;
+               goto bad_escaped;
+#endif
               case '\n':
                 continue;
               case '0':
@@ -453,104 +602,136 @@ tryagain_no_flush_ws:
               case 'v':
                 c = '\v';
                 break;
+             case 'x':
+               {
+                 int a, b;
+                 a = scm_getc (port);
+                 if (a == EOF) goto str_eof;
+                 b = scm_getc (port);
+                 if (b == EOF) goto str_eof;
+                 if      ('0' <= a && a <= '9') a -= '0';
+                 else if ('A' <= a && a <= 'F') a = a - 'A' + 10;
+                 else if ('a' <= a && a <= 'f') a = a - 'a' + 10;
+                 else goto bad_escaped;
+                 if      ('0' <= b && b <= '9') b -= '0';
+                 else if ('A' <= b && b <= 'F') b = b - 'A' + 10;
+                 else if ('a' <= b && b <= 'f') b = b - 'a' + 10;
+                 else goto bad_escaped;
+                 c = a * 16 + b;
+                 break;
+               }
+             default:
+             bad_escaped:
+               scm_i_input_error(FUNC_NAME, port,
+                                 "illegal character in escape sequence: ~S",
+                                 scm_list_1 (SCM_MAKE_CHAR (c)));
               }
-         SCM_CHARS (*tok_buf)[j] = c;
+         scm_c_string_set_x (*tok_buf, j, SCM_MAKE_CHAR (c));
           ++j;
         }
        if (j == 0)
         return scm_nullstr;
-      SCM_CHARS (*tok_buf)[j] = 0;
-      {
-       SCM str;
-       str = scm_makfromstr (SCM_CHARS (*tok_buf), j, 0);
-       return str;
-      }
  
-    case'0':case '1':case '2':case '3':case '4':
-    case '5':case '6':case '7':case '8':case '9':
+      /* Change this to scm_c_substring_read_only when
+        SCM_STRING_CHARS has been removed.
+      */
+      return scm_c_substring_copy (*tok_buf, 0, j);
+
+    case '0': case '1': case '2': case '3': case '4':
+    case '5': case '6': case '7': case '8': case '9':
      case '.':
      case '-':
      case '+':
      num:
        j = scm_read_token (c, tok_buf, port, 0);
-      p = scm_istring2number (SCM_CHARS (*tok_buf), (long) j, 10L);
-      if (SCM_NFALSEP (p))
+      if (j == 1 && (c == '+' || c == '-'))
+       /* Shortcut:  Detected symbol '+ or '- */
+       goto tok;
+
+      p = scm_c_locale_stringn_to_number (scm_i_string_chars (*tok_buf), j, 10);
+      if (scm_is_true (p))
         return p;
        if (c == '#')
         {
           if ((j == 2) && (scm_getc (port) == '('))
             {
               scm_ungetc ('(', port);
-             c = SCM_CHARS (*tok_buf)[1];
+             c = scm_i_string_chars (*tok_buf)[1];
               goto callshrp;
             }
-         scm_wta (SCM_UNDEFINED, "unknown # object", SCM_CHARS (*tok_buf));
+         scm_i_input_error (FUNC_NAME, port, "unknown # object", SCM_EOL);
         }
        goto tok;
  
      case ':':
-      if (SCM_KEYWORD_STYLE == scm_keyword_prefix)
-       {
-         j = scm_read_token ('-', tok_buf, port, 0);
-         p = scm_intern (SCM_CHARS (*tok_buf), j);
-         return scm_make_keyword_from_dash_symbol (SCM_CAR (p));
-       }
+      if (scm_is_eq (SCM_PACK (SCM_KEYWORD_STYLE), scm_keyword_prefix))
+       return scm_symbol_to_keyword (scm_read (port));
+
        /* fallthrough */
      default:
+#if SCM_ENABLE_ELISP
+    read_token:
+#endif
        j = scm_read_token (c, tok_buf, port, 0);
        /* fallthrough */
  
      tok:
-      p = scm_intern (SCM_CHARS (*tok_buf), j);
-      return SCM_CAR (p);
+      return scm_string_to_symbol (scm_c_substring (*tok_buf, 0, j));
      }
  }
+#undef FUNC_NAME
+
  
  #ifdef _UNICOS
  _Pragma ("noopt");             /* # pragma _CRI noopt */
  #endif
  
-scm_sizet 
+size_t 
  scm_read_token (int ic, SCM *tok_buf, SCM port, int weird)
  {
-  register scm_sizet j;
-  register int c;
-  register char *p;
-
-  c = (SCM_CASE_INSENSITIVE_P ? scm_downcase(ic) : ic);
-  p = SCM_CHARS (*tok_buf);
+  size_t j;
+  int c;
  
+  c = (SCM_CASE_INSENSITIVE_P ? scm_c_downcase(ic) : ic);
+                                           
    if (weird)
      j = 0;
    else
      {
        j = 0;
-      while (j + 2 >= SCM_LENGTH (*tok_buf))
-       p = scm_grow_tok_buf (tok_buf);
-      p[j] = c;
+      while (j + 2 >= scm_i_string_length (*tok_buf))
+       scm_grow_tok_buf (tok_buf);
+      scm_c_string_set_x (*tok_buf, j, SCM_MAKE_CHAR (c));
        ++j;
      }
  
    while (1)
      {
-      while (j + 2 >= SCM_LENGTH (*tok_buf))
-       p = scm_grow_tok_buf (tok_buf);
+      while (j + 2 >= scm_i_string_length (*tok_buf))
+       scm_grow_tok_buf (tok_buf);
        c = scm_getc (port);
        switch (c)
         {
         case '(':
         case ')':
+#if SCM_ENABLE_ELISP
+       case '[':
+       case ']':
+#endif
         case '"':
         case ';':
         case SCM_WHITE_SPACES:
         case SCM_LINE_INCREMENTORS:
-         if (weird)
+         if (weird
+#if SCM_ENABLE_ELISP
+             || ((!SCM_ELISP_VECTORS_P) && ((c == '[') || (c == ']')))
+#endif
+             )
             goto default_case;
  
           scm_ungetc (c, port);
         case EOF:
         eof_case:
-         p[j] = 0;
           return j;
         case '\\':
           if (!weird)
@@ -570,7 +751,6 @@ scm_read_token (int ic, SCM *tok_buf, SCM port, int weird)
           c = scm_getc (port);
           if (c == '#')
             {
-             p[j] = 0;
               return j;
             }
           else
@@ -583,8 +763,8 @@ scm_read_token (int ic, SCM *tok_buf, SCM port, int weird)
         default:
         default_case:
           {
-           c = (SCM_CASE_INSENSITIVE_P ? scm_downcase(c) : c);
-           p[j] = c;
+           c = (SCM_CASE_INSENSITIVE_P ? scm_c_downcase(c) : c);
+            scm_c_string_set_x (*tok_buf, j, SCM_MAKE_CHAR (c));
             ++j;
           }
  
@@ -596,8 +776,9 @@ scm_read_token (int ic, SCM *tok_buf, SCM port, int weird)
  _Pragma ("opt");               /* # pragma _CRI opt */
  #endif
  
-SCM 
-scm_lreadparen (SCM *tok_buf, SCM port, char *name, SCM *copy)
+static SCM 
+scm_i_lreadparen (SCM *tok_buf, SCM port, char *name, SCM *copy, char term_char)
+#define FUNC_NAME "scm_i_lreadparen"
  {
    SCM tmp;
    SCM tl;
@@ -605,22 +786,22 @@ scm_lreadparen (SCM *tok_buf, SCM port, char *name, SCM *copy)
    int c;
  
    c = scm_flush_ws (port, name);
-  if (')' == c)
+  if (term_char == c)
      return SCM_EOL;
    scm_ungetc (c, port);
-  if (scm_sym_dot == (tmp = scm_lreadr (tok_buf, port, copy)))
+  if (scm_is_eq (scm_sym_dot, (tmp = scm_lreadr (tok_buf, port, copy))))
      {
        ans = scm_lreadr (tok_buf, port, copy);
      closeit:
-      if (')' != (c = scm_flush_ws (port, name)))
-       scm_wta (SCM_UNDEFINED, "missing close paren", "");
+      if (term_char != (c = scm_flush_ws (port, name)))
+       scm_i_input_error (FUNC_NAME, port, "missing close paren", SCM_EOL);
        return ans;
      }
    ans = tl = scm_cons (tmp, SCM_EOL);
-  while (')' != (c = scm_flush_ws (port, name)))
+  while (term_char != (c = scm_flush_ws (port, name)))
      {
        scm_ungetc (c, port);
-      if (scm_sym_dot == (tmp = scm_lreadr (tok_buf, port, copy)))
+      if (scm_is_eq (scm_sym_dot, (tmp = scm_lreadr (tok_buf, port, copy))))
         {
           SCM_SETCDR (tl, scm_lreadr (tok_buf, port, copy));
           goto closeit;
@@ -630,58 +811,68 @@ scm_lreadparen (SCM *tok_buf, SCM port, char *name, SCM *copy)
      }
    return ans;
  }
+#undef FUNC_NAME
  
  
  SCM 
  scm_lreadrecparen (SCM *tok_buf, SCM port, char *name, SCM *copy)
+#define FUNC_NAME "scm_lreadrecparen"
  {
    register int c;
    register SCM tmp;
    register SCM tl, tl2 = SCM_EOL;
    SCM ans, ans2 = SCM_EOL;
    /* Need to capture line and column numbers here. */
-  int line = SCM_LINUM (port);
+  long line = SCM_LINUM (port);
    int column = SCM_COL (port) - 1;
  
    c = scm_flush_ws (port, name);
    if (')' == c)
      return SCM_EOL;
    scm_ungetc (c, port);
-  if (scm_sym_dot == (tmp = scm_lreadr (tok_buf, port, copy)))
+  if (scm_is_eq (scm_sym_dot, (tmp = scm_lreadr (tok_buf, port, copy))))
      {
        ans = scm_lreadr (tok_buf, port, copy);
        if (')' != (c = scm_flush_ws (port, name)))
-       scm_wta (SCM_UNDEFINED, "missing close paren", "");
+       scm_i_input_error (FUNC_NAME, port, "missing close paren", SCM_EOL);
        return ans;
      }
    /* Build the head of the list structure. */
    ans = tl = scm_cons (tmp, SCM_EOL);
    if (SCM_COPY_SOURCE_P)
-    ans2 = tl2 = scm_cons (SCM_NIMP (tmp) && SCM_CONSP (tmp)
+    ans2 = tl2 = scm_cons (scm_is_pair (tmp)
                            ? *copy
                            : tmp,
                            SCM_EOL);
    while (')' != (c = scm_flush_ws (port, name)))
      {
+      SCM new_tail;
+
        scm_ungetc (c, port);
-      if (scm_sym_dot == (tmp = scm_lreadr (tok_buf, port, copy)))
+      if (scm_is_eq (scm_sym_dot, (tmp = scm_lreadr (tok_buf, port, copy))))
         {
           SCM_SETCDR (tl, tmp = scm_lreadr (tok_buf, port, copy));
           if (SCM_COPY_SOURCE_P)
-           SCM_SETCDR (tl2, scm_cons (SCM_NIMP (tmp) && SCM_CONSP (tmp)
+           SCM_SETCDR (tl2, scm_cons (scm_is_pair (tmp)
                                        ? *copy
                                        : tmp,
                                        SCM_EOL));
           if (')' != (c = scm_flush_ws (port, name)))
-           scm_wta (SCM_UNDEFINED, "missing close paren", "");
+           scm_i_input_error (FUNC_NAME, port,
+                              "missing close paren", SCM_EOL);
           goto exit;
         }
-      tl = SCM_SETCDR (tl, scm_cons (tmp, SCM_EOL));
+
+      new_tail = scm_cons (tmp, SCM_EOL);
+      SCM_SETCDR (tl, new_tail);
+      tl = new_tail;
+
        if (SCM_COPY_SOURCE_P)
-       tl2 = SCM_SETCDR (tl2, scm_cons (SCM_NIMP (tmp) && SCM_CONSP (tmp)
-                                        ? *copy
-                                        : tmp,
-                                        SCM_EOL));
+       {
+         SCM new_tail2 = scm_cons (scm_is_pair (tmp) ? *copy : tmp, SCM_EOL);
+         SCM_SETCDR (tl2, new_tail2);
+         tl2 = new_tail2;
+       }
      }
  exit:
    scm_whash_insert (scm_source_whash,
@@ -695,46 +886,54 @@ exit:
                                        SCM_EOL));
    return ans;
  }
+#undef FUNC_NAME
  
  
  \f
  
  /* Manipulate the read-hash-procedures alist.  This could be written in
     Scheme, but maybe it will also be used by C code during initialisation.  */
-GUILE_PROC (scm_read_hash_extend, "read-hash-extend", 2, 0, 0,
+SCM_DEFINE (scm_read_hash_extend, "read-hash-extend", 2, 0, 0,
              (SCM chr, SCM proc),
-"")
+           "Install the procedure @var{proc} for reading expressions\n"
+           "starting with the character sequence @code{#} and @var{chr}.\n"
+           "@var{proc} will be called with two arguments:  the character\n"
+           "@var{chr} and the port to read further data from. The object\n"
+           "returned will be the return value of @code{read}. \n"
+           "Passing @code{#f} for @var{proc} will remove a previous setting. \n"
+           )
  #define FUNC_NAME s_scm_read_hash_extend
  {
    SCM this;
    SCM prev;
  
-  SCM_VALIDATE_CHAR(1,chr);
-  SCM_ASSERT (SCM_FALSEP (proc) || SCM_NIMP(proc), proc, SCM_ARG2,
-             FUNC_NAME);
+  SCM_VALIDATE_CHAR (1, chr);
+  SCM_ASSERT (scm_is_false (proc)
+             || scm_is_eq (scm_procedure_p (proc), SCM_BOOL_T),
+             proc, SCM_ARG2, FUNC_NAME);
  
    /* Check if chr is already in the alist.  */
    this = *scm_read_hash_procedures;
    prev = SCM_BOOL_F;
    while (1)
      {
-      if (SCM_NULLP (this))
+      if (scm_is_null (this))
         {
           /* not found, so add it to the beginning.  */
-         if (SCM_NFALSEP (proc))
+         if (scm_is_true (proc))
             {
               *scm_read_hash_procedures = 
                 scm_cons (scm_cons (chr, proc), *scm_read_hash_procedures);
             }
           break;
         }
-      if (chr == SCM_CAAR (this))
+      if (scm_is_eq (chr, SCM_CAAR (this)))
         {
           /* already in the alist.  */
-         if (SCM_FALSEP (proc))
+         if (scm_is_false (proc))
             {
               /* remove it.  */
-             if (prev == SCM_BOOL_F)
+             if (scm_is_false (prev))
                 {
                   *scm_read_hash_procedures =
                     SCM_CDR (*scm_read_hash_procedures);
@@ -765,10 +964,10 @@ scm_get_hash_procedure (int c)
  
    while (1)
      {
-      if (SCM_NULLP (rest))
+      if (scm_is_null (rest))
         return SCM_BOOL_F;
    
-      if (SCM_ICHR (SCM_CAAR (rest)) == c)
+      if (SCM_CHAR (SCM_CAAR (rest)) == c)
         return SCM_CDAR (rest);
       
        rest = SCM_CDR (rest);
@@ -779,8 +978,14 @@ void
  scm_init_read ()
  {
    scm_read_hash_procedures =
-    SCM_CDRLOC (scm_sysintern ("read-hash-procedures", SCM_EOL));
+    SCM_VARIABLE_LOC (scm_c_define ("read-hash-procedures", SCM_EOL));
  
-  scm_init_opts (scm_read_options, scm_read_opts, SCM_N_READ_OPTIONS);
-#include "read.x"
+  scm_init_opts (scm_read_options, scm_read_opts);
+#include "libguile/read.x"
  }
+
+/*
+  Local Variables:
+  c-file-style: "gnu"
+  End:
+*/