(Fload) <!load_dangerous_libraries>: Don't leak fd.

[bpt/emacs.git] / src / lread.c
diff --git a/src/lread.c b/src/lread.c

index 824e34a..b4dc715 100644 (file)
--- a/src/lread.c
+++ b/src/lread.c
@@ -1,5 +1,5 @@
  /* Lisp parsing and input streams.
-   Copyright (C) 1985, 86, 87, 88, 89, 93, 94, 95, 97, 98, 99, 2000, 2001
+   Copyright (C) 1985, 86, 87, 88, 89, 93, 94, 95, 97, 98, 99, 2000, 01, 02
        Free Software Foundation, Inc.
  
  This file is part of GNU Emacs.
@@ -29,6 +29,7 @@ Boston, MA 02111-1307, USA.  */
  #include "lisp.h"
  #include "intervals.h"
  #include "buffer.h"
+#include "character.h"
  #include "charset.h"
  #include <epaths.h>
  #include "commands.h"
@@ -189,7 +190,6 @@ int load_dangerous_libraries;
  
  static Lisp_Object Vbytecomp_version_regexp;
  
-static void to_multibyte P_ ((char **, char **, int *));
  static void readevalloop P_ ((Lisp_Object, FILE*, Lisp_Object, 
                               Lisp_Object (*) (), int,
                               Lisp_Object, Lisp_Object));
@@ -220,7 +220,6 @@ readchar (readcharfun)
        register struct buffer *inbuffer = XBUFFER (readcharfun);
  
        int pt_byte = BUF_PT_BYTE (inbuffer);
-      int orig_pt_byte = pt_byte;
  
        if (readchar_backlog > 0)
         /* We get the address of the byte just passed,
@@ -256,7 +255,6 @@ readchar (readcharfun)
        register struct buffer *inbuffer = XMARKER (readcharfun)->buffer;
  
        int bytepos = marker_byte_position (readcharfun);
-      int orig_bytepos = bytepos;
  
        if (readchar_backlog > 0)
         /* We get the address of the byte just passed,
@@ -567,7 +565,7 @@ safe_to_load_p (fd)
  {
    char buf[512];
    int nbytes, i;
-  int safe_p = 1;
+  int safe_p = 1, version = 0;
  
    /* Read the first few bytes from the file, and look for a line
       specifying the byte compiler version used.  */
@@ -577,15 +575,18 @@ safe_to_load_p (fd)
        buf[nbytes] = '\0';
  
        /* Skip to the next newline, skipping over the initial `ELC'
-        with NUL bytes following it.  */
+        with NUL bytes following it, but note the version.  */
        for (i = 0; i < nbytes && buf[i] != '\n'; ++i)
-       ;
+       if (i == 4)
+          version = buf[i];
  
        if (i < nbytes
           && fast_c_string_match_ignore_case (Vbytecomp_version_regexp,
                                               buf + i) < 0)
         safe_p = 0;
      }
+  if (safe_p)
+    safe_p = version;
  
    lseek (fd, 0, SEEK_SET);
    return safe_p;
@@ -751,20 +752,47 @@ Return t if file exists.  */)
        if (fd != -2)
         {
           struct stat s1, s2;
-         int result;
+         int result, version;
  
-         if (!safe_to_load_p (fd))
+         if (!(version = safe_to_load_p (fd)))
             {
               safe_p = 0;
               if (!load_dangerous_libraries)
-               error ("File `%s' was not compiled in Emacs",
-                      XSTRING (found)->data);
+               {
+                 emacs_close (fd);
+                 error ("File `%s' was not compiled in Emacs",
+                        XSTRING (found)->data);
+               }
               else if (!NILP (nomessage))
                 message_with_string ("File `%s' not compiled in Emacs", found, 1);
             }
  
           compiled = 1;
  
+         if (version == 20)    /* 21 isn't used */
+           /* We're loading something compiled with Mule 3, 4 or 5,
+              and thus potentially emacs-mule-encoded; load it with
+              code conversion.  (Perhaps the test should actually be
+              <22?)  We could check further on whether the comment
+              mentions multibyte and only code-convert if it does.  I
+              doubt it's worth the effort. -- fx  */
+           {
+             Lisp_Object val;
+
+             if (fd >= 0)
+               emacs_close (fd);
+             /* load-with-code-conversion currently fails with
+                emacs-mule non-ASCII doc strings.  */
+             error ("Can't currently load Emacs 20/1-compiled files: %s",
+                    XSTRING (found)->data);
+#if 0
+             val = call4 (intern ("load-with-code-conversion"), found, file,
+                          NILP (noerror) ? Qnil : Qt,
+                          NILP (nomessage) ? Qnil : Qt);
+#endif
+             return unbind_to (count, val);
+           }
+
  #ifdef DOS_NT
           fmode = "rb";
  #endif /* DOS_NT */
@@ -1476,41 +1504,63 @@ static char *read_buffer;
  
  /* Read multibyte form and return it as a character.  C is a first
     byte of multibyte form, and rest of them are read from
-   READCHARFUN.  */
+   READCHARFUN.  Store the byte length of the form into *NBYTES.  */
  
  static int
-read_multibyte (c, readcharfun)
+read_multibyte (c, readcharfun, nbytes)
       register int c;
       Lisp_Object readcharfun;
+     int *nbytes;
  {
    /* We need the actual character code of this multibyte
       characters.  */
    unsigned char str[MAX_MULTIBYTE_LENGTH];
    int len = 0;
-  int bytes;
+  int bytes = BYTES_BY_CHAR_HEAD (c);
  
    str[len++] = c;
-  while ((c = READCHAR) >= 0xA0
-        && len < MAX_MULTIBYTE_LENGTH)
-    str[len++] = c;
-  UNREAD (c);
-  if (UNIBYTE_STR_AS_MULTIBYTE_P (str, len, bytes))
-    return STRING_CHAR (str, len);
+  while (len < bytes)
+    {
+      c = READCHAR;
+      if (CHAR_HEAD_P (c))
+       {
+         UNREAD (c);
+         break;
+       }
+      str[len++] = c;
+    }
+
+  if (len == bytes && MULTIBYTE_LENGTH_NO_CHECK (str) > 0)
+    {
+      *nbytes = len;
+      return STRING_CHAR (str, len);
+    }
    /* The byte sequence is not valid as multibyte.  Unread all bytes
       but the first one, and return the first byte.  */
    while (--len > 0)
      UNREAD (str[len]);
+  *nbytes = 1;
    return str[0];
  }
  
-/* Read a \-escape sequence, assuming we already read the `\'.  */
+/* Read a \-escape sequence, assuming we already read the `\'.
+   If the escape sequence forces unibyte, store 1 into *BYTEREP.
+   If the escape sequence forces multibyte and the returned character
+   is raw 8-bit char, store 2 into *BYTEREP.
+   If the escape sequence forces multibyte and the returned character
+   is not raw 8-bit char, store 3 into *BYTEREP.
+   Otherwise store 0 into *BYTEREP.  */
  
  static int
-read_escape (readcharfun, stringp)
+read_escape (readcharfun, stringp, byterep)
       Lisp_Object readcharfun;
       int stringp;
+     int *byterep;
  {
    register int c = READCHAR;
+
+  *byterep = 0;
+
    switch (c)
      {
      case -1:
@@ -1547,7 +1597,7 @@ read_escape (readcharfun, stringp)
         error ("Invalid escape character syntax");
        c = READCHAR;
        if (c == '\\')
-       c = read_escape (readcharfun, 0);
+       c = read_escape (readcharfun, 0, byterep);
        return c | meta_modifier;
  
      case 'S':
@@ -1556,7 +1606,7 @@ read_escape (readcharfun, stringp)
         error ("Invalid escape character syntax");
        c = READCHAR;
        if (c == '\\')
-       c = read_escape (readcharfun, 0);
+       c = read_escape (readcharfun, 0, byterep);
        return c | shift_modifier;
  
      case 'H':
@@ -1565,7 +1615,7 @@ read_escape (readcharfun, stringp)
         error ("Invalid escape character syntax");
        c = READCHAR;
        if (c == '\\')
-       c = read_escape (readcharfun, 0);
+       c = read_escape (readcharfun, 0, byterep);
        return c | hyper_modifier;
  
      case 'A':
@@ -1574,7 +1624,7 @@ read_escape (readcharfun, stringp)
         error ("Invalid escape character syntax");
        c = READCHAR;
        if (c == '\\')
-       c = read_escape (readcharfun, 0);
+       c = read_escape (readcharfun, 0, byterep);
        return c | alt_modifier;
  
      case 's':
@@ -1583,7 +1633,7 @@ read_escape (readcharfun, stringp)
         error ("Invalid escape character syntax");
        c = READCHAR;
        if (c == '\\')
-       c = read_escape (readcharfun, 0);
+       c = read_escape (readcharfun, 0, byterep);
        return c | super_modifier;
  
      case 'C':
@@ -1593,7 +1643,7 @@ read_escape (readcharfun, stringp)
      case '^':
        c = READCHAR;
        if (c == '\\')
-       c = read_escape (readcharfun, 0);
+       c = read_escape (readcharfun, 0, byterep);
        if ((c & ~CHAR_MODIFIER_MASK) == '?')
         return 0177 | (c & CHAR_MODIFIER_MASK);
        else if (! SINGLE_BYTE_CHAR_P ((c & ~CHAR_MODIFIER_MASK)))
@@ -1632,6 +1682,11 @@ read_escape (readcharfun, stringp)
                 break;
               }
           }
+       
+       if (c < 0x100)
+         *byterep = 1;
+       else
+         *byterep = 3;
         return i;
        }
  
@@ -1639,6 +1694,7 @@ read_escape (readcharfun, stringp)
        /* A hex escape, as in ANSI C.  */
        {
         int i = 0;
+       int count = 0;
         while (1)
           {
             c = READCHAR;
@@ -1661,13 +1717,26 @@ read_escape (readcharfun, stringp)
                 UNREAD (c);
                 break;
               }
+           count++;
           }
+
+       if (count < 3 && i >= 0x80)
+         *byterep = 2;
+       else
+         *byterep = 3;
         return i;
        }
  
      default:
-      if (BASE_LEADING_CODE_P (c))
-       c = read_multibyte (c, readcharfun);
+      if (EQ (readcharfun, Qget_file_char)
+         && BASE_LEADING_CODE_P (c))
+       {
+         int nbytes;
+
+         c = read_multibyte (c, readcharfun, &nbytes);
+         if (nbytes > 1)
+           *byterep = 3;
+       }
        return c;
      }
  }
@@ -1739,43 +1808,6 @@ read_integer (readcharfun, radix)
  }
  
  
-/* Convert unibyte text in read_buffer to multibyte.
-
-   Initially, *P is a pointer after the end of the unibyte text, and
-   the pointer *END points after the end of read_buffer.
-
-   If read_buffer doesn't have enough room to hold the result
-   of the conversion, reallocate it and adjust *P and *END.
-
-   At the end, make *P point after the result of the conversion, and
-   return in *NCHARS the number of characters in the converted
-   text.  */
-
-static void
-to_multibyte (p, end, nchars)
-     char **p, **end;
-     int *nchars;
-{
-  int nbytes;
-
-  parse_str_as_multibyte (read_buffer, *p - read_buffer, &nbytes, nchars);
-  if (read_buffer_size < 2 * nbytes)
-    {
-      int offset = *p - read_buffer;
-      read_buffer_size = 2 * max (read_buffer_size, nbytes);
-      read_buffer = (char *) xrealloc (read_buffer, read_buffer_size);
-      *p = read_buffer + offset;
-      *end = read_buffer + read_buffer_size;
-    }
-
-  if (nbytes != *nchars)
-    nbytes = str_as_multibyte (read_buffer, read_buffer_size,
-                              *p - read_buffer, nchars);
-  
-  *p = read_buffer + nbytes;
-}
-
-
  /* If the next token is ')' or ']' or '.', we store that character
     in *PCH and the return value is not interesting.  Else, we store
     zero in *PCH and we read and return one lisp object.
@@ -1823,11 +1855,9 @@ read1 (readcharfun, pch, first_in_list)
             {
               Lisp_Object tmp;
               tmp = read_vector (readcharfun, 0);
-             if (XVECTOR (tmp)->size < CHAR_TABLE_STANDARD_SLOTS
-                 || XVECTOR (tmp)->size > CHAR_TABLE_STANDARD_SLOTS + 10)
+             if (XVECTOR (tmp)->size != VECSIZE (struct Lisp_Char_Table))
                 error ("Invalid size char-table");
               XSETCHAR_TABLE (tmp, XCHAR_TABLE (tmp));
-             XCHAR_TABLE (tmp)->top = Qt;
               return tmp;
             }
           else if (c == '^')
@@ -1836,11 +1866,18 @@ read1 (readcharfun, pch, first_in_list)
               if (c == '[')
                 {
                   Lisp_Object tmp;
+                 int depth, size;
+                 
                   tmp = read_vector (readcharfun, 0);
-                 if (XVECTOR (tmp)->size != SUB_CHAR_TABLE_STANDARD_SLOTS)
+                 if (!INTEGERP (AREF (tmp, 0)))
+                   error ("Invalid depth in char-table");
+                 depth = XINT (AREF (tmp, 0));
+                 if (depth < 1 || depth > 3)
+                   error ("Invalid depth in char-table");
+                 size = XVECTOR (tmp)->size + 2;
+                 if (chartab_size [depth] != size)
                     error ("Invalid size char-table");
-                 XSETCHAR_TABLE (tmp, XCHAR_TABLE (tmp));
-                 XCHAR_TABLE (tmp)->top = Qnil;
+                 XSETSUB_CHAR_TABLE (tmp, XSUB_CHAR_TABLE (tmp));
                   return tmp;
                 }
               Fsignal (Qinvalid_read_syntax,
@@ -2115,14 +2152,17 @@ read1 (readcharfun, pch, first_in_list)
  
      case '?':
        {
+       int discard;
+
         c = READCHAR;
         if (c < 0)
           end_of_file_error ();
  
         if (c == '\\')
-         c = read_escape (readcharfun, 0);
-       else if (BASE_LEADING_CODE_P (c))
-         c = read_multibyte (c, readcharfun);
+         c = read_escape (readcharfun, 0, &discard);
+       else if (EQ (readcharfun, Qget_file_char)
+                && BASE_LEADING_CODE_P (c))
+         c = read_multibyte (c, readcharfun, &discard);
  
         return make_number (c);
        }
@@ -2139,7 +2179,7 @@ read1 (readcharfun, pch, first_in_list)
            a single-byte character.  */
         int force_singlebyte = 0;
         int cancel = 0;
-       int nchars;
+       int nchars = 0;
  
         while ((c = READCHAR) >= 0
                && c != '\"')
@@ -2155,7 +2195,10 @@ read1 (readcharfun, pch, first_in_list)
  
             if (c == '\\')
               {
-               c = read_escape (readcharfun, 1);
+               int modifiers;
+               int byterep;
+
+               c = read_escape (readcharfun, 1, &byterep);
  
                 /* C is -1 if \ newline has just been seen */
                 if (c == -1)
@@ -2165,45 +2208,91 @@ read1 (readcharfun, pch, first_in_list)
                     continue;
                   }
  
-               /* If an escape specifies a non-ASCII single-byte character,
-                  this must be a unibyte string.  */
-               if (SINGLE_BYTE_CHAR_P ((c & ~CHAR_MODIFIER_MASK))
-                   && ! ASCII_BYTE_P ((c & ~CHAR_MODIFIER_MASK)))
-                 force_singlebyte = 1;
-             }
+               modifiers = c & CHAR_MODIFIER_MASK;
+               c = c & ~CHAR_MODIFIER_MASK;
  
-           if (! SINGLE_BYTE_CHAR_P ((c & ~CHAR_MODIFIER_MASK)))
-             {
-               /* Any modifiers for a multibyte character are invalid.  */
-               if (c & CHAR_MODIFIER_MASK)
-                 error ("Invalid modifier in string");
-               p += CHAR_STRING (c, p);
-               force_multibyte = 1;
-             }
-           else
-             {
-               /* Allow `\C- ' and `\C-?'.  */
-               if (c == (CHAR_CTL | ' '))
-                 c = 0;
-               else if (c == (CHAR_CTL | '?'))
-                 c = 127;
+               if (byterep == 1)
+                 {
+                   force_singlebyte = 1;
+                   if (c >= 0x80)
+                     /*  Raw 8-bit code */
+                     c = BYTE8_TO_CHAR (c);
+                 }
+               else if (byterep > 1)
+                 {
+                   force_multibyte = 1;
+                   if (byterep == 2)
+                     c = BYTE8_TO_CHAR (c);
+                 }
+               else if (c >= 0x80)
+                 {
+                   force_singlebyte = 1;
+                   c = BYTE8_TO_CHAR (c);
+                 }
  
-               if (c & CHAR_SHIFT)
+               if (ASCII_CHAR_P (c))
                   {
-                   /* Shift modifier is valid only with [A-Za-z].  */
-                   if ((c & 0377) >= 'A' && (c & 0377) <= 'Z')
-                     c &= ~CHAR_SHIFT;
-                   else if ((c & 0377) >= 'a' && (c & 0377) <= 'z')
-                     c = (c & ~CHAR_SHIFT) - ('a' - 'A');
+                   /* Allow `\C- ' and `\C-?'.  */
+                   if (modifiers == CHAR_CTL)
+                     {
+                       if (c == ' ')
+                         c = 0, modifiers = 0;
+                       else if (c == '?')
+                         c = 127, modifiers = 0;
+                     }
+                   if (modifiers & CHAR_SHIFT)
+                     {
+                       /* Shift modifier is valid only with [A-Za-z].  */
+                       if (c >= 'A' && c <= 'Z')
+                         modifiers &= ~CHAR_SHIFT;
+                       else if (c >= 'a' && c <= 'z')
+                         c -= ('a' - 'A'), modifiers &= ~CHAR_SHIFT;
+                     }
+
+                   if (modifiers & CHAR_META)
+                     {
+                       /* Move the meta bit to the right place for a
+                          string.  */
+                       modifiers &= ~CHAR_META;
+                       c = BYTE8_TO_CHAR (c | 0x80);
+                       force_singlebyte = 1;
+                     }
                   }
  
-               if (c & CHAR_META)
-                 /* Move the meta bit to the right place for a string.  */
-                 c = (c & ~CHAR_META) | 0x80;
-               if (c & ~0xff)
+               /* Any modifiers remaining are invalid.  */
+               if (modifiers)
                   error ("Invalid modifier in string");
-               *p++ = c;
+               p += CHAR_STRING (c, (unsigned char *) p);
+             }
+           else if (c >= 0x80)
+             {
+               if (EQ (readcharfun, Qget_file_char))
+                 {
+                   if (BASE_LEADING_CODE_P (c))
+                     {
+                       int nbytes;
+                       c = read_multibyte (c, readcharfun, &nbytes);
+                       if (nbytes > 1)
+                         force_multibyte = 1;
+                       else
+                         {
+                           force_singlebyte = 1;
+                           c = BYTE8_TO_CHAR (c);
+                         }
+                     }
+                   else
+                     {
+                       force_singlebyte = 1;
+                       c = BYTE8_TO_CHAR (c);
+                     }
+                 }
+               else
+                 force_multibyte = 1;
+               p += CHAR_STRING (c, (unsigned char *) p);
               }
+           else
+             *p++ = c;
+           nchars++;
           }
         if (c < 0)
           end_of_file_error ();
@@ -2215,34 +2304,15 @@ read1 (readcharfun, pch, first_in_list)
           return make_number (0);
  
         if (force_multibyte)
-         to_multibyte (&p, &end, &nchars);
+         /* READ_BUFFER already contains valid multibyte forms.  */
+         ;
         else if (force_singlebyte)
-         nchars = p - read_buffer;
-       else if (load_convert_to_unibyte)
           {
-           Lisp_Object string;
-           to_multibyte (&p, &end, &nchars);
-           if (p - read_buffer != nchars)
-             {
-               string = make_multibyte_string (read_buffer, nchars,
-                                               p - read_buffer);
-               return Fstring_make_unibyte (string);
-             }
-         }
-       else if (EQ (readcharfun, Qget_file_char)
-                || EQ (readcharfun, Qlambda))
-         {
-           /* Nowadays, reading directly from a file is used only for
-              compiled Emacs Lisp files, and those always use the
-              Emacs internal encoding.  Meanwhile, Qlambda is used
-              for reading dynamic byte code (compiled with
-              byte-compile-dynamic = t).  */
-           to_multibyte (&p, &end, &nchars);
+           nchars = str_as_unibyte (read_buffer, p - read_buffer);
+           p = read_buffer + nchars;
           }
         else
-         /* In all other cases, if we read these bytes as
-            separate characters, treat them as separate characters now.  */
-         nchars = p - read_buffer;
+         /* Otherwise, READ_BUFFER contains only ASCII.  */
  
         if (read_pure)
           return make_pure_string (read_buffer, nchars, p - read_buffer,
@@ -3352,9 +3422,28 @@ init_lread ()
           Vload_path = decode_env_path (0, normal);
           if (!NILP (Vinstallation_directory))
             {
+             Lisp_Object tem, tem1, sitelisp;
+
+             /* Remove site-lisp dirs from path temporarily and store
+                them in sitelisp, then conc them on at the end so
+                they're always first in path.  */
+             sitelisp = Qnil;
+             while (1)
+               {
+                 tem = Fcar (Vload_path);
+                 tem1 = Fstring_match (build_string ("site-lisp"),
+                                       tem, Qnil);
+                 if (!NILP (tem1))
+                   {
+                     Vload_path = Fcdr (Vload_path);
+                     sitelisp = Fcons (tem, sitelisp);
+                   }
+                 else
+                   break;
+               }
+
               /* Add to the path the lisp subdir of the
                  installation dir, if it exists.  */
-             Lisp_Object tem, tem1;
               tem = Fexpand_file_name (build_string ("lisp"),
                                        Vinstallation_directory);
               tem1 = Ffile_exists_p (tem);
@@ -3363,7 +3452,7 @@ init_lread ()
                   if (NILP (Fmember (tem, Vload_path)))
                     {
                       turn_off_warning = 1;
-                     Vload_path = nconc2 (Vload_path, Fcons (tem, Qnil));
+                     Vload_path = Fcons (tem, Vload_path);
                     }
                 }
               else
@@ -3378,7 +3467,7 @@ init_lread ()
               if (!NILP (tem1))
                 {
                   if (NILP (Fmember (tem, Vload_path)))
-                   Vload_path = nconc2 (Vload_path, Fcons (tem, Qnil));
+                   Vload_path = Fcons (tem, Vload_path);
                 }
  
               /* Add site-list under the installation dir, if it exists.  */
@@ -3388,7 +3477,7 @@ init_lread ()
               if (!NILP (tem1))
                 {
                   if (NILP (Fmember (tem, Vload_path)))
-                   Vload_path = nconc2 (Vload_path, Fcons (tem, Qnil));
+                   Vload_path = Fcons (tem, Vload_path);
                 }
  
               /* If Emacs was not built in the source directory,
@@ -3416,21 +3505,23 @@ init_lread ()
                                                Vsource_directory);
  
                       if (NILP (Fmember (tem, Vload_path)))
-                       Vload_path = nconc2 (Vload_path, Fcons (tem, Qnil));
+                       Vload_path = Fcons (tem, Vload_path);
  
                       tem = Fexpand_file_name (build_string ("leim"),
                                                Vsource_directory);
  
                       if (NILP (Fmember (tem, Vload_path)))
-                       Vload_path = nconc2 (Vload_path, Fcons (tem, Qnil));
+                       Vload_path = Fcons (tem, Vload_path);
  
                       tem = Fexpand_file_name (build_string ("site-lisp"),
                                                Vsource_directory);
  
                       if (NILP (Fmember (tem, Vload_path)))
-                       Vload_path = nconc2 (Vload_path, Fcons (tem, Qnil));
+                       Vload_path = Fcons (tem, Vload_path);
                     }
                 }
+             if (!NILP (sitelisp))
+               Vload_path = nconc2 (Fnreverse (sitelisp), Vload_path);
             }
         }
      }