Merge from trunk.

[bpt/emacs.git] / src / charset.c
diff --git a/src/charset.c b/src/charset.c

index 57e24ae..6aa6fe4 100644 (file)
--- a/src/charset.c
+++ b/src/charset.c
@@ -29,6 +29,7 @@ along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.  */
  #include <stdio.h>
  #include <unistd.h>
  #include <ctype.h>
+#include <limits.h>
  #include <sys/types.h>
  #include <setjmp.h>
  #include "lisp.h"
@@ -60,24 +61,24 @@ Lisp_Object Vcharset_hash_table;
  /* Table of struct charset.  */
  struct charset *charset_table;
  
-static int charset_table_size;
+static ptrdiff_t charset_table_size;
  static int charset_table_used;
  
  Lisp_Object Qcharsetp;
  
  /* Special charset symbols.  */
  Lisp_Object Qascii;
-Lisp_Object Qeight_bit;
-Lisp_Object Qiso_8859_1;
-Lisp_Object Qunicode;
-Lisp_Object Qemacs;
+static Lisp_Object Qeight_bit;
+static Lisp_Object Qiso_8859_1;
+static Lisp_Object Qunicode;
+static Lisp_Object Qemacs;
  
  /* The corresponding charsets.  */
  int charset_ascii;
  int charset_eight_bit;
-int charset_iso_8859_1;
+static int charset_iso_8859_1;
  int charset_unicode;
-int charset_emacs;
+static int charset_emacs;
  
  /* The other special charsets.  */
  int charset_jisx0201_roman;
@@ -86,7 +87,7 @@ int charset_jisx0208;
  int charset_ksc5601;
  
  /* Value of charset attribute `charset-iso-plane'.  */
-Lisp_Object Qgl, Qgr;
+static Lisp_Object Qgl, Qgr;
  
  /* Charset of unibyte characters.  */
  int charset_unibyte;
@@ -117,24 +118,25 @@ int iso_charset_table[ISO_MAX_DIMENSION][ISO_MAX_CHARS][ISO_MAX_FINAL];
  
  #define CODE_POINT_TO_INDEX(charset, code)                             \
    ((charset)->code_linear_p                                            \
-   ? (code) - (charset)->min_code                                      \
+   ? (int) ((code) - (charset)->min_code)                              \
     : (((charset)->code_space_mask[(code) >> 24] & 0x8)                 \
        && ((charset)->code_space_mask[((code) >> 16) & 0xFF] & 0x4)     \
        && ((charset)->code_space_mask[((code) >> 8) & 0xFF] & 0x2)      \
        && ((charset)->code_space_mask[(code) & 0xFF] & 0x1))            \
-   ? (((((code) >> 24) - (charset)->code_space[12])                    \
-       * (charset)->code_space[11])                                    \
-      + (((((code) >> 16) & 0xFF) - (charset)->code_space[8])          \
-        * (charset)->code_space[7])                                    \
-      + (((((code) >> 8) & 0xFF) - (charset)->code_space[4])           \
-        * (charset)->code_space[3])                                    \
-      + (((code) & 0xFF) - (charset)->code_space[0])                   \
-      - ((charset)->char_index_offset))                                        \
+   ? (int) (((((code) >> 24) - (charset)->code_space[12])              \
+            * (charset)->code_space[11])                               \
+           + (((((code) >> 16) & 0xFF) - (charset)->code_space[8])     \
+              * (charset)->code_space[7])                              \
+           + (((((code) >> 8) & 0xFF) - (charset)->code_space[4])      \
+              * (charset)->code_space[3])                              \
+           + (((code) & 0xFF) - (charset)->code_space[0])              \
+           - ((charset)->char_index_offset))                           \
     : -1)
  
  
-/* Convert the character index IDX to code-point CODE for CHARSET.
-   It is assumed that IDX is in a valid range.  */
+/* Return the code-point for the character index IDX in CHARSET.
+   IDX should be an unsigned int variable in a valid range (which is
+   always in nonnegative int range too).  IDX contains garbage afterwards.  */
  
  #define INDEX_TO_CODE_POINT(charset, idx)                                   \
    ((charset)->code_linear_p                                                 \
@@ -250,7 +252,7 @@ struct charset_map_entries
  static void
  load_charset_map (struct charset *charset, struct charset_map_entries *entries, int n_entries, int control_flag)
  {
-  Lisp_Object vec, table;
+  Lisp_Object vec IF_LINT (= Qnil), table IF_LINT (= Qnil);
    unsigned max_code = CHARSET_MAX_CODE (charset);
    int ascii_compatible_p = charset->ascii_compatible_p;
    int min_char, max_char, nonascii_min_char;
@@ -292,7 +294,7 @@ load_charset_map (struct charset *charset, struct charset_map_entries *entries,
        else
         {
           if (! temp_charset_work)
-           temp_charset_work = malloc (sizeof (*temp_charset_work));
+           temp_charset_work = xmalloc (sizeof (*temp_charset_work));
           if (control_flag == 1)
             {
               memset (temp_charset_work->table.decoder, -1,
@@ -316,7 +318,7 @@ load_charset_map (struct charset *charset, struct charset_map_entries *entries,
    for (i = 0; i < n_entries; i++)
      {
        unsigned from, to;
-      int from_index, to_index;
+      int from_index, to_index, lim_index;
        int from_c, to_c;
        int idx = i % 0x10000;
  
@@ -338,6 +340,7 @@ load_charset_map (struct charset *charset, struct charset_map_entries *entries,
         }
        if (from_index < 0 || to_index < 0)
         continue;
+      lim_index = to_index + 1;
  
        if (to_c > max_char)
         max_char = to_c;
@@ -347,10 +350,10 @@ load_charset_map (struct charset *charset, struct charset_map_entries *entries,
        if (control_flag == 1)
         {
           if (charset->method == CHARSET_METHOD_MAP)
-           for (; from_index <= to_index; from_index++, from_c++)
+           for (; from_index < lim_index; from_index++, from_c++)
               ASET (vec, from_index, make_number (from_c));
           else
-           for (; from_index <= to_index; from_index++, from_c++)
+           for (; from_index < lim_index; from_index++, from_c++)
               CHAR_TABLE_SET (Vchar_unify_table,
                               CHARSET_CODE_OFFSET (charset) + from_index,
                               make_number (from_c));
@@ -359,25 +362,26 @@ load_charset_map (struct charset *charset, struct charset_map_entries *entries,
         {
           if (charset->method == CHARSET_METHOD_MAP
               && CHARSET_COMPACT_CODES_P (charset))
-           for (; from_index <= to_index; from_index++, from_c++)
+           for (; from_index < lim_index; from_index++, from_c++)
               {
-               unsigned code = INDEX_TO_CODE_POINT (charset, from_index);
+               unsigned code = from_index;
+               code = INDEX_TO_CODE_POINT (charset, code);
  
                 if (NILP (CHAR_TABLE_REF (table, from_c)))
                   CHAR_TABLE_SET (table, from_c, make_number (code));
               }
           else
-           for (; from_index <= to_index; from_index++, from_c++)
+           for (; from_index < lim_index; from_index++, from_c++)
               {
                 if (NILP (CHAR_TABLE_REF (table, from_c)))
                   CHAR_TABLE_SET (table, from_c, make_number (from_index));
               }
         }
        else if (control_flag == 3)
-       for (; from_index <= to_index; from_index++, from_c++)
+       for (; from_index < lim_index; from_index++, from_c++)
           SET_TEMP_CHARSET_WORK_DECODER (from_c, from_index);
        else if (control_flag == 4)
-       for (; from_index <= to_index; from_index++, from_c++)
+       for (; from_index < lim_index; from_index++, from_c++)
           SET_TEMP_CHARSET_WORK_ENCODER (from_c, from_index);
        else                     /* control_flag == 0 */
         {
@@ -416,8 +420,8 @@ load_charset_map (struct charset *charset, struct charset_map_entries *entries,
  /* Read a hexadecimal number (preceded by "0x") from the file FP while
     paying attention to comment character '#'.  */
  
-static INLINE unsigned
-read_hex (FILE *fp, int *eof)
+static inline unsigned
+read_hex (FILE *fp, int *eof, int *overflow)
  {
    int c;
    unsigned n;
@@ -439,15 +443,16 @@ read_hex (FILE *fp, int *eof)
        *eof = 1;
        return 0;
      }
-  *eof = 0;
    n = 0;
-  if (c == 'x')
-    while ((c = getc (fp)) != EOF && isxdigit (c))
+  while (isxdigit (c = getc (fp)))
+    {
+      if (UINT_MAX >> 4 < n)
+       *overflow = 1;
        n = ((n << 4)
-          | (c <= '9' ? c - '0' : c <= 'F' ? c - 'A' + 10 : c - 'a' + 10));
-  else
-    while ((c = getc (fp)) != EOF && isdigit (c))
-      n = (n * 10) + c - '0';
+          | (c - ('0' <= c && c <= '9' ? '0'
+                  : 'A' <= c && c <= 'F' ? 'A' - 10
+                  : 'a' - 10)));
+    }
    if (c != EOF)
      ungetc (c, fp);
    return n;
@@ -477,10 +482,10 @@ load_charset_map_from_file (struct charset *charset, Lisp_Object mapfile, int co
    unsigned max_code = CHARSET_MAX_CODE (charset);
    int fd;
    FILE *fp;
-  int eof;
    Lisp_Object suffixes;
    struct charset_map_entries *head, *entries;
-  int n_entries, count;
+  int n_entries;
+  ptrdiff_t count;
    USE_SAFE_ALLOCA;
  
    suffixes = Fcons (build_string (".map"),
@@ -492,7 +497,7 @@ load_charset_map_from_file (struct charset *charset, Lisp_Object mapfile, int co
    unbind_to (count, Qnil);
    if (fd < 0
        || ! (fp = fdopen (fd, "r")))
-    error ("Failure in loading charset map: %S", SDATA (mapfile));
+    error ("Failure in loading charset map: %s", SDATA (mapfile));
  
    /* Use SAFE_ALLOCA instead of alloca, as `charset_map_entries' is
       large (larger than MAX_ALLOCA).  */
@@ -502,22 +507,27 @@ load_charset_map_from_file (struct charset *charset, Lisp_Object mapfile, int co
    memset (entries, 0, sizeof (struct charset_map_entries));
  
    n_entries = 0;
-  eof = 0;
    while (1)
      {
-      unsigned from, to;
-      int c;
+      unsigned from, to, c;
        int idx;
+      int eof = 0, overflow = 0;
  
-      from = read_hex (fp, &eof);
+      from = read_hex (fp, &eof, &overflow);
        if (eof)
         break;
        if (getc (fp) == '-')
-       to = read_hex (fp, &eof);
+       to = read_hex (fp, &eof, &overflow);
        else
         to = from;
-      c = (int) read_hex (fp, &eof);
+      if (eof)
+       break;
+      c = read_hex (fp, &eof, &overflow);
+      if (eof)
+       break;
  
+      if (overflow)
+       continue;
        if (from < min_code || to > max_code || from > to || c > MAX_CHAR)
         continue;
  
@@ -527,8 +537,9 @@ load_charset_map_from_file (struct charset *charset, Lisp_Object mapfile, int co
                        sizeof (struct charset_map_entries));
           entries = entries->next;
           memset (entries, 0, sizeof (struct charset_map_entries));
+         n_entries = 0;
         }
-      idx = n_entries % 0x10000;
+      idx = n_entries;
        entries->entry[idx].from = from;
        entries->entry[idx].to = to;
        entries->entry[idx].c = c;
@@ -569,7 +580,7 @@ load_charset_map_from_vector (struct charset *charset, Lisp_Object vec, int cont
      {
        Lisp_Object val, val2;
        unsigned from, to;
-      int c;
+      EMACS_INT c;
        int idx;
  
        val = AREF (vec, i);
@@ -577,16 +588,11 @@ load_charset_map_from_vector (struct charset *charset, Lisp_Object vec, int cont
         {
           val2 = XCDR (val);
           val = XCAR (val);
-         CHECK_NATNUM (val);
-         CHECK_NATNUM (val2);
           from = XFASTINT (val);
           to = XFASTINT (val2);
         }
        else
-       {
-         CHECK_NATNUM (val);
-         from = to = XFASTINT (val);
-       }
+       from = to = XFASTINT (val);
        val = AREF (vec, i + 1);
        CHECK_NATNUM (val);
        c = XFASTINT (val);
@@ -629,8 +635,12 @@ load_charset (struct charset *charset, int control_flag)
  
    if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP)
      map = CHARSET_MAP (charset);
-  else if (CHARSET_UNIFIED_P (charset))
-    map = CHARSET_UNIFY_MAP (charset);
+  else
+    {
+      if (! CHARSET_UNIFIED_P (charset))
+       abort ();
+      map = CHARSET_UNIFY_MAP (charset);
+    }
    if (STRINGP (map))
      load_charset_map_from_file (charset, map, control_flag);
    else
@@ -646,12 +656,10 @@ DEFUN ("charsetp", Fcharsetp, Scharsetp, 1, 1, 0,
  }
  
  
-void map_charset_for_dump (void (*c_function) (Lisp_Object, Lisp_Object),
-                           Lisp_Object function, Lisp_Object arg,
-                           unsigned from, unsigned to);
-
-void
-map_charset_for_dump (void (*c_function) (Lisp_Object, Lisp_Object), Lisp_Object function, Lisp_Object arg, unsigned int from, unsigned int to)
+static void
+map_charset_for_dump (void (*c_function) (Lisp_Object, Lisp_Object),
+                     Lisp_Object function, Lisp_Object arg,
+                     unsigned int from, unsigned int to)
  {
    int from_idx = CODE_POINT_TO_INDEX (temp_charset_work->current, from);
    int to_idx = CODE_POINT_TO_INDEX (temp_charset_work->current, to);
@@ -668,9 +676,9 @@ map_charset_for_dump (void (*c_function) (Lisp_Object, Lisp_Object), Lisp_Object
  
    while (1)
      {
-      int index = GET_TEMP_CHARSET_WORK_ENCODER (c);
+      int idx = GET_TEMP_CHARSET_WORK_ENCODER (c);
  
-      if (index >= from_idx && index <= to_idx)
+      if (idx >= from_idx && idx <= to_idx)
         {
           if (NILP (XCAR (range)))
             XSETCAR (range, make_number (c));
@@ -812,7 +820,6 @@ range of code points (in CHARSET) of target characters.  */)
      from = CHARSET_MIN_CODE (cs);
    else
      {
-      CHECK_NATNUM (from_code);
        from = XINT (from_code);
        if (from < CHARSET_MIN_CODE (cs))
         from = CHARSET_MIN_CODE (cs);
@@ -821,7 +828,6 @@ range of code points (in CHARSET) of target characters.  */)
      to = CHARSET_MAX_CODE (cs);
    else
      {
-      CHECK_NATNUM (to_code);
        to = XINT (to_code);
        if (to > CHARSET_MAX_CODE (cs))
         to = CHARSET_MAX_CODE (cs);
@@ -840,12 +846,12 @@ DEFUN ("define-charset-internal", Fdefine_charset_internal,
         Sdefine_charset_internal, charset_arg_max, MANY, 0,
         doc: /* For internal use only.
  usage: (define-charset-internal ...)  */)
-  (int nargs, Lisp_Object *args)
+  (ptrdiff_t nargs, Lisp_Object *args)
  {
    /* Charset attr vector.  */
    Lisp_Object attrs;
    Lisp_Object val;
-  unsigned hash_code;
+  EMACS_UINT hash_code;
    struct Lisp_Hash_Table *hash_table = XHASH_TABLE (Vcharset_hash_table);
    int i, j;
    struct charset charset;
@@ -865,21 +871,26 @@ usage: (define-charset-internal ...)  */)
    ASET (attrs, charset_name, args[charset_arg_name]);
  
    val = args[charset_arg_code_space];
-  for (i = 0, dimension = 0, nchars = 1; i < 4; i++)
+  for (i = 0, dimension = 0, nchars = 1; ; i++)
      {
+      Lisp_Object min_byte_obj, max_byte_obj;
        int min_byte, max_byte;
  
-      min_byte = XINT (Faref (val, make_number (i * 2)));
-      max_byte = XINT (Faref (val, make_number (i * 2 + 1)));
-      if (min_byte < 0 || min_byte > max_byte || max_byte >= 256)
-       error ("Invalid :code-space value");
+      min_byte_obj = Faref (val, make_number (i * 2));
+      max_byte_obj = Faref (val, make_number (i * 2 + 1));
+      CHECK_RANGED_INTEGER (0, min_byte_obj, 255);
+      min_byte = XINT (min_byte_obj);
+      CHECK_RANGED_INTEGER (min_byte, max_byte_obj, 255);
+      max_byte = XINT (max_byte_obj);
        charset.code_space[i * 4] = min_byte;
        charset.code_space[i * 4 + 1] = max_byte;
        charset.code_space[i * 4 + 2] = max_byte - min_byte + 1;
-      nchars *= charset.code_space[i * 4 + 2];
-      charset.code_space[i * 4 + 3] = nchars;
        if (max_byte > 0)
         dimension = i + 1;
+      if (i == 3)
+       break;
+      nchars *= charset.code_space[i * 4 + 2];
+      charset.code_space[i * 4 + 3] = nchars;
      }
  
    val = args[charset_arg_dimension];
@@ -887,10 +898,8 @@ usage: (define-charset-internal ...)  */)
      charset.dimension = dimension;
    else
      {
-      CHECK_NATNUM (val);
+      CHECK_RANGED_INTEGER (1, val, 4);
        charset.dimension = XINT (val);
-      if (charset.dimension < 1 || charset.dimension > 4)
-       args_out_of_range_3 (val, make_number (1), make_number (4));
      }
  
    charset.code_linear_p
@@ -916,31 +925,22 @@ usage: (define-charset-internal ...)  */)
    charset.min_code = (charset.code_space[0]
                       | (charset.code_space[4] << 8)
                       | (charset.code_space[8] << 16)
-                     | (charset.code_space[12] << 24));
+                     | ((unsigned) charset.code_space[12] << 24));
    charset.max_code = (charset.code_space[1]
                       | (charset.code_space[5] << 8)
                       | (charset.code_space[9] << 16)
-                     | (charset.code_space[13] << 24));
+                     | ((unsigned) charset.code_space[13] << 24));
    charset.char_index_offset = 0;
  
    val = args[charset_arg_min_code];
    if (! NILP (val))
      {
-      unsigned code;
+      unsigned code = cons_to_unsigned (val, UINT_MAX);
  
-      if (INTEGERP (val))
-       code = XINT (val);
-      else
-       {
-         CHECK_CONS (val);
-         CHECK_NUMBER_CAR (val);
-         CHECK_NUMBER_CDR (val);
-         code = (XINT (XCAR (val)) << 16) | (XINT (XCDR (val)));
-       }
        if (code < charset.min_code
           || code > charset.max_code)
-       args_out_of_range_3 (make_number (charset.min_code),
-                            make_number (charset.max_code), val);
+       args_out_of_range_3 (make_fixnum_or_float (charset.min_code),
+                            make_fixnum_or_float (charset.max_code), val);
        charset.char_index_offset = CODE_POINT_TO_INDEX (&charset, code);
        charset.min_code = code;
      }
@@ -948,21 +948,12 @@ usage: (define-charset-internal ...)  */)
    val = args[charset_arg_max_code];
    if (! NILP (val))
      {
-      unsigned code;
+      unsigned code = cons_to_unsigned (val, UINT_MAX);
  
-      if (INTEGERP (val))
-       code = XINT (val);
-      else
-       {
-         CHECK_CONS (val);
-         CHECK_NUMBER_CAR (val);
-         CHECK_NUMBER_CDR (val);
-         code = (XINT (XCAR (val)) << 16) | (XINT (XCDR (val)));
-       }
        if (code < charset.min_code
           || code > charset.max_code)
-       args_out_of_range_3 (make_number (charset.min_code),
-                            make_number (charset.max_code), val);
+       args_out_of_range_3 (make_fixnum_or_float (charset.min_code),
+                            make_fixnum_or_float (charset.max_code), val);
        charset.max_code = code;
      }
  
@@ -975,18 +966,14 @@ usage: (define-charset-internal ...)  */)
         charset.invalid_code = 0;
        else
         {
-         XSETINT (val, charset.max_code + 1);
-         if (XINT (val) == charset.max_code + 1)
+         if (charset.max_code < UINT_MAX)
             charset.invalid_code = charset.max_code + 1;
           else
             error ("Attribute :invalid-code must be specified");
         }
      }
    else
-    {
-      CHECK_NATNUM (val);
-      charset.invalid_code = XFASTINT (val);
-    }
+    charset.invalid_code = cons_to_unsigned (val, UINT_MAX);
  
    val = args[charset_arg_iso_final];
    if (NILP (val))
@@ -995,7 +982,7 @@ usage: (define-charset-internal ...)  */)
      {
        CHECK_NUMBER (val);
        if (XINT (val) < '0' || XINT (val) > 127)
-       error ("Invalid iso-final-char: %d", XINT (val));
+       error ("Invalid iso-final-char: %"pI"d", XINT (val));
        charset.iso_final = XINT (val);
      }
  
@@ -1004,9 +991,7 @@ usage: (define-charset-internal ...)  */)
      charset.iso_revision = -1;
    else
      {
-      CHECK_NUMBER (val);
-      if (XINT (val) > 63)
-       args_out_of_range (make_number (63), val);
+      CHECK_RANGED_INTEGER (-1, val, 63);
        charset.iso_revision = XINT (val);
      }
  
@@ -1017,7 +1002,7 @@ usage: (define-charset-internal ...)  */)
      {
        CHECK_NATNUM (val);
        if ((XINT (val) > 0 && XINT (val) <= 128) || XINT (val) >= 256)
-       error ("Invalid emacs-mule-id: %d", XINT (val));
+       error ("Invalid emacs-mule-id: %"pI"d", XINT (val));
        charset.emacs_mule_id = XINT (val);
      }
  
@@ -1032,17 +1017,17 @@ usage: (define-charset-internal ...)  */)
    if (! NILP (args[charset_arg_code_offset]))
      {
        val = args[charset_arg_code_offset];
-      CHECK_NUMBER (val);
+      CHECK_CHARACTER (val);
  
        charset.method = CHARSET_METHOD_OFFSET;
        charset.code_offset = XINT (val);
  
-      i = CODE_POINT_TO_INDEX (&charset, charset.min_code);
-      charset.min_char = i + charset.code_offset;
        i = CODE_POINT_TO_INDEX (&charset, charset.max_code);
-      charset.max_char = i + charset.code_offset;
-      if (charset.max_char > MAX_CHAR)
+      if (MAX_CHAR - charset.code_offset < i)
         error ("Unsupported max char: %d", charset.max_char);
+      charset.max_char = i + charset.code_offset;
+      i = CODE_POINT_TO_INDEX (&charset, charset.min_code);
+      charset.min_char = i + charset.code_offset;
  
        i = (charset.min_char >> 7) << 7;
        for (; i < 0x10000 && i <= charset.max_char; i += 128)
@@ -1113,7 +1098,7 @@ usage: (define-charset-internal ...)  */)
               car_part = XCAR (elt);
               cdr_part = XCDR (elt);
               CHECK_CHARSET_GET_ID (car_part, this_id);
-             CHECK_NUMBER (cdr_part);
+             CHECK_TYPE_RANGED_INTEGER (int, cdr_part);
               offset = XINT (cdr_part);
             }
           else
@@ -1157,13 +1142,25 @@ usage: (define-charset-internal ...)  */)
                                      hash_code);
        if (charset_table_used == charset_table_size)
         {
-         struct charset *new_table
-           = (struct charset *) xmalloc (sizeof (struct charset)
-                                         * (charset_table_size + 16));
-         memcpy (new_table, charset_table,
-                 sizeof (struct charset) * charset_table_size);
-         charset_table_size += 16;
+         /* Ensure that charset IDs fit into 'int' as well as into the
+            restriction imposed by fixnums.  Although the 'int' restriction
+            could be removed, too much other code would need altering; for
+            example, the IDs are stuffed into struct
+            coding_system.charbuf[i] entries, which are 'int'.  */
+         int old_size = charset_table_size;
+         struct charset *new_table =
+           xpalloc (0, &charset_table_size, 1,
+                    min (INT_MAX, MOST_POSITIVE_FIXNUM),
+                    sizeof *charset_table);
+         memcpy (new_table, charset_table, old_size * sizeof *new_table);
           charset_table = new_table;
+         /* FIXME: This leaks memory, as the old charset_table becomes
+            unreachable.  If the old charset table is charset_table_init
+            then this leak is intentional; otherwise, it's unclear.
+            If the latter memory leak is intentional, a
+            comment should be added to explain this.  If not, the old
+            charset_table should be freed, by passing it as the 1st argument
+            to xpalloc and removing the memcpy.  */
         }
        id = charset_table_used++;
        new_definition_p = 1;
@@ -1253,12 +1250,13 @@ usage: (define-charset-internal ...)  */)
  static int
  define_charset_internal (Lisp_Object name,
                          int dimension,
-                        const unsigned char *code_space,
+                        const char *code_space_chars,
                          unsigned min_code, unsigned max_code,
                          int iso_final, int iso_revision, int emacs_mule_id,
                          int ascii_compatible, int supplementary,
                          int code_offset)
  {
+  const unsigned char *code_space = (const unsigned char *) code_space_chars;
    Lisp_Object args[charset_arg_max];
    Lisp_Object plist[14];
    Lisp_Object val;
@@ -1389,8 +1387,8 @@ Optional third argument DEUNIFY, if non-nil, means to de-unify CHARSET.  */)
      }
    else if (CHAR_TABLE_P (Vchar_unify_table))
      {
-      int min_code = CHARSET_MIN_CODE (cs);
-      int max_code = CHARSET_MAX_CODE (cs);
+      unsigned min_code = CHARSET_MIN_CODE (cs);
+      unsigned max_code = CHARSET_MAX_CODE (cs);
        int min_char = DECODE_CHAR (cs, min_code);
        int max_char = DECODE_CHAR (cs, max_code);
  
@@ -1431,14 +1429,16 @@ check_iso_charset_parameter (Lisp_Object dimension, Lisp_Object chars, Lisp_Obje
  {
    CHECK_NATNUM (dimension);
    CHECK_NATNUM (chars);
-  CHECK_NATNUM (final_char);
+  CHECK_CHARACTER (final_char);
  
    if (XINT (dimension) > 3)
-    error ("Invalid DIMENSION %d, it should be 1, 2, or 3", XINT (dimension));
+    error ("Invalid DIMENSION %"pI"d, it should be 1, 2, or 3",
+          XINT (dimension));
    if (XINT (chars) != 94 && XINT (chars) != 96)
-    error ("Invalid CHARS %d, it should be 94 or 96", XINT (chars));
+    error ("Invalid CHARS %"pI"d, it should be 94 or 96", XINT (chars));
    if (XINT (final_char) < '0' || XINT (final_char) > '~')
-    error ("Invalid FINAL-CHAR %c, it should be `0'..`~'", XINT (chars));
+    error ("Invalid FINAL-CHAR %c, it should be `0'..`~'",
+          (int)XINT (final_char));
  }
  
  
@@ -1503,7 +1503,7 @@ string_xstring_p (Lisp_Object string)
     It may lookup a translation table TABLE if supplied.  */
  
  static void
-find_charsets_in_text (const unsigned char *ptr, EMACS_INT nchars, EMACS_INT nbytes, Lisp_Object charsets, Lisp_Object table, int multibyte)
+find_charsets_in_text (const unsigned char *ptr, ptrdiff_t nchars, ptrdiff_t nbytes, Lisp_Object charsets, Lisp_Object table, int multibyte)
  {
    const unsigned char *pend = ptr + nbytes;
  
@@ -1550,10 +1550,10 @@ only `ascii', `eight-bit-control', and `eight-bit-graphic'.  */)
    (Lisp_Object beg, Lisp_Object end, Lisp_Object table)
  {
    Lisp_Object charsets;
-  EMACS_INT from, from_byte, to, stop, stop_byte;
+  ptrdiff_t from, from_byte, to, stop, stop_byte;
    int i;
    Lisp_Object val;
-  int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
+  int multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters));
  
    validate_region (&beg, &end);
    from = XFASTINT (beg);
@@ -1628,7 +1628,7 @@ maybe_unify_char (int c, Lisp_Object val)
    struct charset *charset;
  
    if (INTEGERP (val))
-    return XINT (val);
+    return XFASTINT (val);
    if (NILP (val))
      return c;
  
@@ -1638,7 +1638,7 @@ maybe_unify_char (int c, Lisp_Object val)
      {
        val = CHAR_TABLE_REF (Vchar_unify_table, c);
        if (! NILP (val))
-       c = XINT (val);
+       c = XFASTINT (val);
      }
    else
      {
@@ -1832,7 +1832,7 @@ encode_char (struct charset *charset, int c)
      }
    else                         /* method == CHARSET_METHOD_OFFSET */
      {
-      int code_index = c - CHARSET_CODE_OFFSET (charset);
+      unsigned code_index = c - CHARSET_CODE_OFFSET (charset);
  
        code = INDEX_TO_CODE_POINT (charset, code_index);
      }
@@ -1856,17 +1856,7 @@ and CODE-POINT to a character.  Currently not supported and just ignored.  */)
    struct charset *charsetp;
  
    CHECK_CHARSET_GET_ID (charset, id);
-  if (CONSP (code_point))
-    {
-      CHECK_NATNUM_CAR (code_point);
-      CHECK_NATNUM_CDR (code_point);
-      code = (XINT (XCAR (code_point)) << 16) | (XINT (XCDR (code_point)));
-    }
-  else
-    {
-      CHECK_NATNUM (code_point);
-      code = XINT (code_point);
-    }
+  code = cons_to_unsigned (code_point, UINT_MAX);
    charsetp = CHARSET_FROM_ID (id);
    c = DECODE_CHAR (charsetp, code);
    return (c >= 0 ? make_number (c) : Qnil);
@@ -1881,19 +1871,18 @@ Optional argument RESTRICTION specifies a way to map CH to a
  code-point in CCS.  Currently not supported and just ignored.  */)
    (Lisp_Object ch, Lisp_Object charset, Lisp_Object restriction)
  {
-  int id;
+  int c, id;
    unsigned code;
    struct charset *charsetp;
  
    CHECK_CHARSET_GET_ID (charset, id);
-  CHECK_NATNUM (ch);
+  CHECK_CHARACTER (ch);
+  c = XFASTINT (ch);
    charsetp = CHARSET_FROM_ID (id);
-  code = ENCODE_CHAR (charsetp, XINT (ch));
+  code = ENCODE_CHAR (charsetp, c);
    if (code == CHARSET_INVALID_CODE (charsetp))
      return Qnil;
-  if (code > 0x7FFFFFF)
-    return Fcons (make_number (code >> 16), make_number (code & 0xFFFF));
-  return make_number (code);
+  return INTEGER_TO_CONS (code);
  }
  
  
@@ -2065,10 +2054,10 @@ that case, find the charset from what supported by that coding system.  */)
  
           for (; CONSP (restriction); restriction = XCDR (restriction))
             {
-             struct charset *charset;
+             struct charset *rcharset;
  
-             CHECK_CHARSET_GET_CHARSET (XCAR (restriction), charset);
-             if (ENCODE_CHAR (charset, c) != CHARSET_INVALID_CODE (charset))
+             CHECK_CHARSET_GET_CHARSET (XCAR (restriction), rcharset);
+             if (ENCODE_CHAR (rcharset, c) != CHARSET_INVALID_CODE (rcharset))
                 return XCAR (restriction);
             }
           return Qnil;
@@ -2132,7 +2121,7 @@ It should be called only from temacs invoked for dumping.  */)
  {
    if (temp_charset_work)
      {
-      free (temp_charset_work);
+      xfree (temp_charset_work);
        temp_charset_work = NULL;
      }
  
@@ -2165,11 +2154,12 @@ DEFUN ("set-charset-priority", Fset_charset_priority, Sset_charset_priority,
         1, MANY, 0,
         doc: /* Assign higher priority to the charsets given as arguments.
  usage: (set-charset-priority &rest charsets)  */)
-  (int nargs, Lisp_Object *args)
+  (ptrdiff_t nargs, Lisp_Object *args)
  {
    Lisp_Object new_head, old_list, arglist[2];
    Lisp_Object list_2022, list_emacs_mule;
-  int i, id;
+  ptrdiff_t i;
+  int id;
  
    old_list = Fcopy_sequence (Vcharset_ordered_list);
    new_head = Qnil;
@@ -2229,14 +2219,16 @@ struct charset_sort_data
  {
    Lisp_Object charset;
    int id;
-  int priority;
+  ptrdiff_t priority;
  };
  
  static int
  charset_compare (const void *d1, const void *d2)
  {
    const struct charset_sort_data *data1 = d1, *data2 = d2;
-  return (data1->priority - data2->priority);
+  if (data1->priority != data2->priority)
+    return data1->priority < data2->priority ? -1 : 1;
+  return 0;
  }
  
  DEFUN ("sort-charsets", Fsort_charsets, Ssort_charsets, 1, 1, 0,
@@ -2246,26 +2238,25 @@ See also `charset-priority-list' and `set-charset-priority'.  */)
       (Lisp_Object charsets)
  {
    Lisp_Object len = Flength (charsets);
-  int n = XFASTINT (len), i, j, done;
+  ptrdiff_t n = XFASTINT (len), i, j;
+  int done;
    Lisp_Object tail, elt, attrs;
    struct charset_sort_data *sort_data;
-  int id, min_id, max_id;
+  int id, min_id = INT_MAX, max_id = INT_MIN;
    USE_SAFE_ALLOCA;
  
    if (n == 0)
      return Qnil;
-  SAFE_ALLOCA (sort_data, struct charset_sort_data *, sizeof (*sort_data) * n);
+  SAFE_NALLOCA (sort_data, 1, n);
    for (tail = charsets, i = 0; CONSP (tail); tail = XCDR (tail), i++)
      {
        elt = XCAR (tail);
        CHECK_CHARSET_GET_ATTR (elt, attrs);
        sort_data[i].charset = elt;
        sort_data[i].id = id = XINT (CHARSET_ATTR_ID (attrs));
-      if (i == 0)
-       min_id = max_id = id;
-      else if (id < min_id)
+      if (id < min_id)
         min_id = id;
-      else if (id > max_id)
+      if (id > max_id)
         max_id = id;
      }
    for (done = 0, tail = Vcharset_ordered_list, i = 0;
@@ -2328,6 +2319,18 @@ init_charset_once (void)
  
  #ifdef emacs
  
+/* Allocate an initial charset table that is large enough to handle
+   Emacs while it is bootstrapping.  As of September 2011, the size
+   needs to be at least 166; make it a bit bigger to allow for future
+   expansion.
+
+   Don't make the value so small that the table is reallocated during
+   bootstrapping, as glibc malloc calls larger than just under 64 KiB
+   during an initial bootstrap wreak havoc after dumping; see the
+   M_MMAP_THRESHOLD value in alloc.c, plus there is a extra overhead
+   internal to glibc malloc and perhaps to Emacs malloc debugging.  */
+static struct charset charset_table_init[180];
+
  void
  syms_of_charset (void)
  {
@@ -2363,9 +2366,8 @@ syms_of_charset (void)
      Vcharset_hash_table = Fmake_hash_table (2, args);
    }
  
-  charset_table_size = 128;
-  charset_table = ((struct charset *)
-                  xmalloc (sizeof (struct charset) * charset_table_size));
+  charset_table = charset_table_init;
+  charset_table_size = sizeof charset_table_init / sizeof *charset_table_init;
    charset_table_used = 0;
  
    defsubr (&Scharsetp);