Merge from trunk.

[bpt/emacs.git] / src / charset.c
diff --git a/src/charset.c b/src/charset.c

index 43155d2..6aa6fe4 100644 (file)
--- a/src/charset.c
+++ b/src/charset.c
@@ -1,8 +1,7 @@
  /* Basic character set support.
-   Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007,
-     2008  Free Software Foundation, Inc.
+   Copyright (C) 2001-2011  Free Software Foundation, Inc.
     Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
-     2005, 2006, 2007, 2008
+     2005, 2006, 2007, 2008, 2009, 2010, 2011
       National Institute of Advanced Industrial Science and Technology (AIST)
       Registration Number H14PRO021
  
@@ -30,7 +29,9 @@ along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.  */
  #include <stdio.h>
  #include <unistd.h>
  #include <ctype.h>
+#include <limits.h>
  #include <sys/types.h>
+#include <setjmp.h>
  #include "lisp.h"
  #include "character.h"
  #include "charset.h"
@@ -53,10 +54,6 @@ along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.  */
  
  */
  
-/* List of all charsets.  This variable is used only from Emacs
-   Lisp.  */
-Lisp_Object Vcharset_list;
-
  /* Hash table that contains attributes of each charset.  Keys are
     charset symbols, and values are vectors of charset attributes.  */
  Lisp_Object Vcharset_hash_table;
@@ -64,32 +61,33 @@ Lisp_Object Vcharset_hash_table;
  /* Table of struct charset.  */
  struct charset *charset_table;
  
-static int charset_table_size;
+static ptrdiff_t charset_table_size;
  static int charset_table_used;
  
  Lisp_Object Qcharsetp;
  
  /* Special charset symbols.  */
  Lisp_Object Qascii;
-Lisp_Object Qeight_bit;
-Lisp_Object Qiso_8859_1;
-Lisp_Object Qunicode;
-Lisp_Object Qemacs;
+static Lisp_Object Qeight_bit;
+static Lisp_Object Qiso_8859_1;
+static Lisp_Object Qunicode;
+static Lisp_Object Qemacs;
  
  /* The corresponding charsets.  */
  int charset_ascii;
  int charset_eight_bit;
-int charset_iso_8859_1;
+static int charset_iso_8859_1;
  int charset_unicode;
-int charset_emacs;
+static int charset_emacs;
  
  /* The other special charsets.  */
  int charset_jisx0201_roman;
  int charset_jisx0208_1978;
  int charset_jisx0208;
+int charset_ksc5601;
  
  /* Value of charset attribute `charset-iso-plane'.  */
-Lisp_Object Qgl, Qgr;
+static Lisp_Object Qgl, Qgr;
  
  /* Charset of unibyte characters.  */
  int charset_unibyte;
@@ -112,45 +110,33 @@ Lisp_Object Viso_2022_charset_list;
  /* List of emacs-mule charsets.  */
  Lisp_Object Vemacs_mule_charset_list;
  
-struct charset *emacs_mule_charset[256];
+int emacs_mule_charset[256];
  
  /* Mapping table from ISO2022's charset (specified by DIMENSION,
     CHARS, and FINAL-CHAR) to Emacs' charset.  */
  int iso_charset_table[ISO_MAX_DIMENSION][ISO_MAX_CHARS][ISO_MAX_FINAL];
  
-Lisp_Object Vcharset_map_path;
-
-Lisp_Object Vchar_unified_charset_table;
-
-Lisp_Object Vcurrent_iso639_language;
-
-/* Defined in chartab.c */
-extern void
-map_char_table_for_charset P_ ((void (*c_function) (Lisp_Object, Lisp_Object),
-                               Lisp_Object function, Lisp_Object table,
-                               Lisp_Object arg, struct charset *charset,
-                               unsigned from, unsigned to));
-
  #define CODE_POINT_TO_INDEX(charset, code)                             \
    ((charset)->code_linear_p                                            \
-   ? (code) - (charset)->min_code                                      \
+   ? (int) ((code) - (charset)->min_code)                              \
     : (((charset)->code_space_mask[(code) >> 24] & 0x8)                 \
        && ((charset)->code_space_mask[((code) >> 16) & 0xFF] & 0x4)     \
        && ((charset)->code_space_mask[((code) >> 8) & 0xFF] & 0x2)      \
        && ((charset)->code_space_mask[(code) & 0xFF] & 0x1))            \
-   ? (((((code) >> 24) - (charset)->code_space[12])                    \
-       * (charset)->code_space[11])                                    \
-      + (((((code) >> 16) & 0xFF) - (charset)->code_space[8])          \
-        * (charset)->code_space[7])                                    \
-      + (((((code) >> 8) & 0xFF) - (charset)->code_space[4])           \
-        * (charset)->code_space[3])                                    \
-      + (((code) & 0xFF) - (charset)->code_space[0])                   \
-      - ((charset)->char_index_offset))                                        \
+   ? (int) (((((code) >> 24) - (charset)->code_space[12])              \
+            * (charset)->code_space[11])                               \
+           + (((((code) >> 16) & 0xFF) - (charset)->code_space[8])     \
+              * (charset)->code_space[7])                              \
+           + (((((code) >> 8) & 0xFF) - (charset)->code_space[4])      \
+              * (charset)->code_space[3])                              \
+           + (((code) & 0xFF) - (charset)->code_space[0])              \
+           - ((charset)->char_index_offset))                           \
     : -1)
  
  
-/* Convert the character index IDX to code-point CODE for CHARSET.
-   It is assumed that IDX is in a valid range.  */
+/* Return the code-point for the character index IDX in CHARSET.
+   IDX should be an unsigned int variable in a valid range (which is
+   always in nonnegative int range too).  IDX contains garbage afterwards.  */
  
  #define INDEX_TO_CODE_POINT(charset, idx)                                   \
    ((charset)->code_linear_p                                                 \
@@ -166,7 +152,63 @@ map_char_table_for_charset P_ ((void (*c_function) (Lisp_Object, Lisp_Object),
         | (((charset)->code_space[12] + ((idx) / (charset)->code_space[11]))  \
           << 24))))
  
+/* Structure to hold mapping tables for a charset.  Used by temacs
+   invoked for dumping.  */
  
+static struct
+{
+  /* The current charset for which the following tables are setup.  */
+  struct charset *current;
+
+  /* 1 iff the following table is used for encoder.  */
+  short for_encoder;
+
+  /* When the following table is used for encoding, mininum and
+     maxinum character of the current charset.  */
+  int min_char, max_char;
+
+  /* A Unicode character correspoinding to the code indice 0 (i.e. the
+     minimum code-point) of the current charset, or -1 if the code
+     indice 0 is not a Unicode character.  This is checked when
+     table.encoder[CHAR] is zero.  */
+  int zero_index_char;
+
+  union {
+    /* Table mapping code-indices (not code-points) of the current
+       charset to Unicode characters.  If decoder[CHAR] is -1, CHAR
+       doesn't belong to the current charset.  */
+    int decoder[0x10000];
+    /* Table mapping Unicode characters to code-indices of the current
+       charset.  The first 0x10000 elements are for BMP (0..0xFFFF),
+       and the last 0x10000 are for SMP (0x10000..0x1FFFF) or SIP
+       (0x20000..0x2FFFF).  Note that there is no charset map that
+       uses both SMP and SIP.  */
+    unsigned short encoder[0x20000];
+  } table;
+} *temp_charset_work;
+
+#define SET_TEMP_CHARSET_WORK_ENCODER(C, CODE)                 \
+  do {                                                         \
+    if ((CODE) == 0)                                           \
+      temp_charset_work->zero_index_char = (C);                        \
+    else if ((C) < 0x20000)                                    \
+      temp_charset_work->table.encoder[(C)] = (CODE);          \
+    else                                                       \
+      temp_charset_work->table.encoder[(C) - 0x10000] = (CODE);        \
+  } while (0)
+
+#define GET_TEMP_CHARSET_WORK_ENCODER(C)                                 \
+  ((C) == temp_charset_work->zero_index_char ? 0                         \
+   : (C) < 0x20000 ? (temp_charset_work->table.encoder[(C)]              \
+                     ? (int) temp_charset_work->table.encoder[(C)] : -1) \
+   : temp_charset_work->table.encoder[(C) - 0x10000]                     \
+   ? temp_charset_work->table.encoder[(C) - 0x10000] : -1)
+
+#define SET_TEMP_CHARSET_WORK_DECODER(C, CODE) \
+  (temp_charset_work->table.decoder[(CODE)] = (C))
+
+#define GET_TEMP_CHARSET_WORK_DECODER(CODE)    \
+  (temp_charset_work->table.decoder[(CODE)])
  \f
  
  /* Set to 1 to warn that a charset map is loaded and thus a buffer
@@ -182,25 +224,35 @@ struct charset_map_entries
    struct charset_map_entries *next;
  };
  
-/* Load the mapping information for CHARSET from ENTRIES.
+/* Load the mapping information of CHARSET from ENTRIES for
+   initializing (CONTROL_FLAG == 0), decoding (CONTROL_FLAG == 1), and
+   encoding (CONTROL_FLAG == 2).
+
+   If CONTROL_FLAG is 0, setup CHARSET->min_char, CHARSET->max_char,
+   and CHARSET->fast_map.
  
-   If CONTROL_FLAG is 0, setup CHARSET->min_char and CHARSET->max_char.
+   If CONTROL_FLAG is 1, setup the following tables according to
+   CHARSET->method and inhibit_load_charset_map.
  
-   If CONTROL_FLAG is 1, setup CHARSET->min_char, CHARSET->max_char,
-   CHARSET->decoder, and CHARSET->encoder.
+   CHARSET->method       | inhibit_lcm == 0   | inhibit_lcm == 1
+   ----------------------+--------------------+---------------------------
+   CHARSET_METHOD_MAP    | CHARSET->decoder   | temp_charset_work->decoder
+   ----------------------+--------------------+---------------------------
+   CHARSET_METHOD_OFFSET | Vchar_unify_table  | temp_charset_work->decoder
  
-   If CONTROL_FLAG is 2, setup CHARSET->deunifier and
-   Vchar_unify_table.  If Vchar_unified_charset_table is non-nil,
-   setup it too.  */
+   If CONTROL_FLAG is 2, setup the following tables.
+
+   CHARSET->method       | inhibit_lcm == 0   | inhibit_lcm == 1
+   ----------------------+--------------------+---------------------------
+   CHARSET_METHOD_MAP    | CHARSET->encoder   | temp_charset_work->encoder
+   ----------------------+--------------------+--------------------------
+   CHARSET_METHOD_OFFSET | CHARSET->deunifier | temp_charset_work->encoder
+*/
  
  static void
-load_charset_map (charset, entries, n_entries, control_flag)
-  struct charset *charset;
-  struct charset_map_entries *entries;
-  int n_entries;
-  int control_flag;
+load_charset_map (struct charset *charset, struct charset_map_entries *entries, int n_entries, int control_flag)
  {
-  Lisp_Object vec, table;
+  Lisp_Object vec IF_LINT (= Qnil), table IF_LINT (= Qnil);
    unsigned max_code = CHARSET_MAX_CODE (charset);
    int ascii_compatible_p = charset->ascii_compatible_p;
    int min_char, max_char, nonascii_min_char;
@@ -210,16 +262,54 @@ load_charset_map (charset, entries, n_entries, control_flag)
    if (n_entries <= 0)
      return;
  
-  if (control_flag > 0)
+  if (control_flag)
      {
-      int n = CODE_POINT_TO_INDEX (charset, max_code) + 1;
-
-      table = Fmake_char_table (Qnil, Qnil);
-      if (control_flag == 1)
-       vec = Fmake_vector (make_number (n), make_number (-1));
-      else if (! CHAR_TABLE_P (Vchar_unify_table))
-       Vchar_unify_table = Fmake_char_table (Qnil, Qnil);
+      if (! inhibit_load_charset_map)
+       {
+         if (control_flag == 1)
+           {
+             if (charset->method == CHARSET_METHOD_MAP)
+               {
+                 int n = CODE_POINT_TO_INDEX (charset, max_code) + 1;
  
+                 vec = CHARSET_DECODER (charset)
+                   = Fmake_vector (make_number (n), make_number (-1));
+               }
+             else
+               {
+                 char_table_set_range (Vchar_unify_table,
+                                       charset->min_char, charset->max_char,
+                                       Qnil);
+               }
+           }
+         else
+           {
+             table = Fmake_char_table (Qnil, Qnil);
+             if (charset->method == CHARSET_METHOD_MAP)
+               CHARSET_ENCODER (charset) = table;
+             else
+               CHARSET_DEUNIFIER (charset) = table;
+           }
+       }
+      else
+       {
+         if (! temp_charset_work)
+           temp_charset_work = xmalloc (sizeof (*temp_charset_work));
+         if (control_flag == 1)
+           {
+             memset (temp_charset_work->table.decoder, -1,
+                     sizeof (int) * 0x10000);
+           }
+         else
+           {
+             memset (temp_charset_work->table.encoder, 0,
+                     sizeof (unsigned short) * 0x20000);
+             temp_charset_work->zero_index_char = -1;
+           }
+         temp_charset_work->current = charset;
+         temp_charset_work->for_encoder = (control_flag == 2);
+         control_flag += 2;
+       }
        charset_map_loaded = 1;
      }
  
@@ -228,7 +318,7 @@ load_charset_map (charset, entries, n_entries, control_flag)
    for (i = 0; i < n_entries; i++)
      {
        unsigned from, to;
-      int from_index, to_index;
+      int from_index, to_index, lim_index;
        int from_c, to_c;
        int idx = i % 0x10000;
  
@@ -250,15 +340,51 @@ load_charset_map (charset, entries, n_entries, control_flag)
         }
        if (from_index < 0 || to_index < 0)
         continue;
+      lim_index = to_index + 1;
  
-      if (control_flag < 2)
-       {
-         int c;
+      if (to_c > max_char)
+       max_char = to_c;
+      else if (from_c < min_char)
+       min_char = from_c;
  
-         if (to_c > max_char)
-           max_char = to_c;
-         else if (from_c < min_char)
-           min_char = from_c;
+      if (control_flag == 1)
+       {
+         if (charset->method == CHARSET_METHOD_MAP)
+           for (; from_index < lim_index; from_index++, from_c++)
+             ASET (vec, from_index, make_number (from_c));
+         else
+           for (; from_index < lim_index; from_index++, from_c++)
+             CHAR_TABLE_SET (Vchar_unify_table,
+                             CHARSET_CODE_OFFSET (charset) + from_index,
+                             make_number (from_c));
+       }
+      else if (control_flag == 2)
+       {
+         if (charset->method == CHARSET_METHOD_MAP
+             && CHARSET_COMPACT_CODES_P (charset))
+           for (; from_index < lim_index; from_index++, from_c++)
+             {
+               unsigned code = from_index;
+               code = INDEX_TO_CODE_POINT (charset, code);
+
+               if (NILP (CHAR_TABLE_REF (table, from_c)))
+                 CHAR_TABLE_SET (table, from_c, make_number (code));
+             }
+         else
+           for (; from_index < lim_index; from_index++, from_c++)
+             {
+               if (NILP (CHAR_TABLE_REF (table, from_c)))
+                 CHAR_TABLE_SET (table, from_c, make_number (from_index));
+             }
+       }
+      else if (control_flag == 3)
+       for (; from_index < lim_index; from_index++, from_c++)
+         SET_TEMP_CHARSET_WORK_DECODER (from_c, from_index);
+      else if (control_flag == 4)
+       for (; from_index < lim_index; from_index++, from_c++)
+         SET_TEMP_CHARSET_WORK_ENCODER (from_c, from_index);
+      else                     /* control_flag == 0 */
+       {
           if (ascii_compatible_p)
             {
               if (! ASCII_BYTE_P (from_c))
@@ -272,80 +398,30 @@ load_charset_map (charset, entries, n_entries, control_flag)
                 }
             }
  
-         for (c = from_c; c <= to_c; c++)
-           CHARSET_FAST_MAP_SET (c, fast_map);
-
-         if (control_flag == 1)
-           {
-             unsigned code = from;
-
-             if (CHARSET_COMPACT_CODES_P (charset))
-               while (1)
-                 {
-                   ASET (vec, from_index, make_number (from_c));
-                   if (NILP (CHAR_TABLE_REF (table, from_c)))
-                     CHAR_TABLE_SET (table, from_c, make_number (code));
-                   if (from_index == to_index)
-                     break;
-                   from_index++, from_c++;
-                   code = INDEX_TO_CODE_POINT (charset, from_index);
-                 }
-             else
-               for (; from_index <= to_index; from_index++, from_c++)
-                 {
-                   ASET (vec, from_index, make_number (from_c));
-                   if (NILP (CHAR_TABLE_REF (table, from_c)))
-                     CHAR_TABLE_SET (table, from_c, make_number (from_index));
-                 }
-           }
-       }
-      else
-       {
-         unsigned code = from;
-
-         while (1)
-           {
-             int c1 = DECODE_CHAR (charset, code);
-
-             if (c1 >= 0)
-               {
-                 CHAR_TABLE_SET (table, from_c, make_number (c1));
-                 CHAR_TABLE_SET (Vchar_unify_table, c1, make_number (from_c));
-                 if (CHAR_TABLE_P (Vchar_unified_charset_table))
-                   CHAR_TABLE_SET (Vchar_unified_charset_table, c1,
-                                   CHARSET_NAME (charset));
-               }
-             if (from_index == to_index)
-               break;
-             from_index++, from_c++;
-             code = INDEX_TO_CODE_POINT (charset, from_index);
-           }
+         for (; from_c <= to_c; from_c++)
+           CHARSET_FAST_MAP_SET (from_c, fast_map);
         }
      }
  
-  if (control_flag < 2)
+  if (control_flag == 0)
      {
        CHARSET_MIN_CHAR (charset) = (ascii_compatible_p
                                     ? nonascii_min_char : min_char);
        CHARSET_MAX_CHAR (charset) = max_char;
-      if (control_flag == 1)
-       {
-         CHARSET_DECODER (charset) = vec;
-         CHARSET_ENCODER (charset) = table;
-       }
      }
-  else
-    CHARSET_DEUNIFIER (charset) = table;
+  else if (control_flag == 4)
+    {
+      temp_charset_work->min_char = min_char;
+      temp_charset_work->max_char = max_char;
+    }
  }
  
  
  /* Read a hexadecimal number (preceded by "0x") from the file FP while
-   paying attention to comment charcter '#'.  */
+   paying attention to comment character '#'.  */
  
-static INLINE unsigned
-read_hex (fp, eof)
-     FILE *fp;
-     int *eof;
+static inline unsigned
+read_hex (FILE *fp, int *eof, int *overflow)
  {
    int c;
    unsigned n;
@@ -367,21 +443,21 @@ read_hex (fp, eof)
        *eof = 1;
        return 0;
      }
-  *eof = 0;
    n = 0;
-  if (c == 'x')
-    while ((c = getc (fp)) != EOF && isxdigit (c))
+  while (isxdigit (c = getc (fp)))
+    {
+      if (UINT_MAX >> 4 < n)
+       *overflow = 1;
        n = ((n << 4)
-          | (c <= '9' ? c - '0' : c <= 'F' ? c - 'A' + 10 : c - 'a' + 10));
-  else
-    while ((c = getc (fp)) != EOF && isdigit (c))
-      n = (n * 10) + c - '0';
+          | (c - ('0' <= c && c <= '9' ? '0'
+                  : 'A' <= c && c <= 'F' ? 'A' - 10
+                  : 'a' - 10)));
+    }
    if (c != EOF)
      ungetc (c, fp);
    return n;
  }
  
-
  /* Return a mapping vector for CHARSET loaded from MAPFILE.
     Each line of MAPFILE has this form
         0xAAAA 0xCCCC
@@ -394,81 +470,89 @@ read_hex (fp, eof)
     The returned vector has this form:
         [ CODE1 CHAR1 CODE2 CHAR2 .... ]
     where CODE1 is a code-point or a cons of code-points specifying a
-   range.  */
+   range.
  
-extern void add_to_log P_ ((char *, Lisp_Object, Lisp_Object));
+   Note that this function uses `openp' to open MAPFILE but ignores
+   `file-name-handler-alist' to avoid running any Lisp code.  */
  
  static void
-load_charset_map_from_file (charset, mapfile, control_flag)
-     struct charset *charset;
-     Lisp_Object mapfile;
-     int control_flag;
+load_charset_map_from_file (struct charset *charset, Lisp_Object mapfile, int control_flag)
  {
    unsigned min_code = CHARSET_MIN_CODE (charset);
    unsigned max_code = CHARSET_MAX_CODE (charset);
    int fd;
    FILE *fp;
-  int eof;
    Lisp_Object suffixes;
    struct charset_map_entries *head, *entries;
    int n_entries;
+  ptrdiff_t count;
+  USE_SAFE_ALLOCA;
  
    suffixes = Fcons (build_string (".map"),
                     Fcons (build_string (".TXT"), Qnil));
  
+  count = SPECPDL_INDEX ();
+  specbind (Qfile_name_handler_alist, Qnil);
    fd = openp (Vcharset_map_path, mapfile, suffixes, NULL, Qnil);
+  unbind_to (count, Qnil);
    if (fd < 0
        || ! (fp = fdopen (fd, "r")))
-    {
-      add_to_log ("Failure in loading charset map: %S", mapfile, Qnil);
-      return;
-    }
+    error ("Failure in loading charset map: %s", SDATA (mapfile));
+
+  /* Use SAFE_ALLOCA instead of alloca, as `charset_map_entries' is
+     large (larger than MAX_ALLOCA).  */
+  SAFE_ALLOCA (head, struct charset_map_entries *,
+              sizeof (struct charset_map_entries));
+  entries = head;
+  memset (entries, 0, sizeof (struct charset_map_entries));
  
-  head = entries = ((struct charset_map_entries *)
-                   alloca (sizeof (struct charset_map_entries)));
    n_entries = 0;
-  eof = 0;
    while (1)
      {
-      unsigned from, to;
-      int c;
+      unsigned from, to, c;
        int idx;
+      int eof = 0, overflow = 0;
  
-      from = read_hex (fp, &eof);
+      from = read_hex (fp, &eof, &overflow);
        if (eof)
         break;
        if (getc (fp) == '-')
-       to = read_hex (fp, &eof);
+       to = read_hex (fp, &eof, &overflow);
        else
         to = from;
-      c = (int) read_hex (fp, &eof);
+      if (eof)
+       break;
+      c = read_hex (fp, &eof, &overflow);
+      if (eof)
+       break;
  
+      if (overflow)
+       continue;
        if (from < min_code || to > max_code || from > to || c > MAX_CHAR)
         continue;
  
        if (n_entries > 0 && (n_entries % 0x10000) == 0)
         {
-         entries->next = ((struct charset_map_entries *)
-                          alloca (sizeof (struct charset_map_entries)));
+         SAFE_ALLOCA (entries->next, struct charset_map_entries *,
+                      sizeof (struct charset_map_entries));
           entries = entries->next;
+         memset (entries, 0, sizeof (struct charset_map_entries));
+         n_entries = 0;
         }
-      idx = n_entries % 0x10000;
+      idx = n_entries;
        entries->entry[idx].from = from;
        entries->entry[idx].to = to;
        entries->entry[idx].c = c;
        n_entries++;
      }
    fclose (fp);
-  close (fd);
  
    load_charset_map (charset, head, n_entries, control_flag);
+  SAFE_FREE ();
  }
  
  static void
-load_charset_map_from_vector (charset, vec, control_flag)
-     struct charset *charset;
-     Lisp_Object vec;
-     int control_flag;
+load_charset_map_from_vector (struct charset *charset, Lisp_Object vec, int control_flag)
  {
    unsigned min_code = CHARSET_MIN_CODE (charset);
    unsigned max_code = CHARSET_MAX_CODE (charset);
@@ -476,6 +560,7 @@ load_charset_map_from_vector (charset, vec, control_flag)
    int n_entries;
    int len = ASIZE (vec);
    int i;
+  USE_SAFE_ALLOCA;
  
    if (len % 2 == 1)
      {
@@ -483,14 +568,19 @@ load_charset_map_from_vector (charset, vec, control_flag)
        return;
      }
  
-  head = entries = ((struct charset_map_entries *)
-                   alloca (sizeof (struct charset_map_entries)));
+  /* Use SAFE_ALLOCA instead of alloca, as `charset_map_entries' is
+     large (larger than MAX_ALLOCA).  */
+  SAFE_ALLOCA (head, struct charset_map_entries *,
+              sizeof (struct charset_map_entries));
+  entries = head;
+  memset (entries, 0, sizeof (struct charset_map_entries));
+
    n_entries = 0;
    for (i = 0; i < len; i += 2)
      {
        Lisp_Object val, val2;
        unsigned from, to;
-      int c;
+      EMACS_INT c;
        int idx;
  
        val = AREF (vec, i);
@@ -498,16 +588,11 @@ load_charset_map_from_vector (charset, vec, control_flag)
         {
           val2 = XCDR (val);
           val = XCAR (val);
-         CHECK_NATNUM (val);
-         CHECK_NATNUM (val2);
           from = XFASTINT (val);
           to = XFASTINT (val2);
         }
        else
-       {
-         CHECK_NATNUM (val);
-         from = to = XFASTINT (val);
-       }
+       from = to = XFASTINT (val);
        val = AREF (vec, i + 1);
        CHECK_NATNUM (val);
        c = XFASTINT (val);
@@ -517,9 +602,10 @@ load_charset_map_from_vector (charset, vec, control_flag)
  
        if (n_entries > 0 && (n_entries % 0x10000) == 0)
         {
-         entries->next = ((struct charset_map_entries *)
-                          alloca (sizeof (struct charset_map_entries)));
+         SAFE_ALLOCA (entries->next, struct charset_map_entries *,
+                      sizeof (struct charset_map_entries));
           entries = entries->next;
+         memset (entries, 0, sizeof (struct charset_map_entries));
         }
        idx = n_entries % 0x10000;
        entries->entry[idx].from = from;
@@ -529,59 +615,114 @@ load_charset_map_from_vector (charset, vec, control_flag)
      }
  
    load_charset_map (charset, head, n_entries, control_flag);
+  SAFE_FREE ();
  }
  
+
+/* Load a mapping table for CHARSET.  CONTROL-FLAG tells what kind of
+   map it is (see the comment of load_charset_map for the detail).  */
+
  static void
-load_charset (charset)
-     struct charset *charset;
+load_charset (struct charset *charset, int control_flag)
  {
-  if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP_DEFERRED)
-    {
-      Lisp_Object map;
+  Lisp_Object map;
  
-      map = CHARSET_MAP (charset);
-      if (STRINGP (map))
-       load_charset_map_from_file (charset, map, 1);
-      else
-       load_charset_map_from_vector (charset, map, 1);
-      CHARSET_METHOD (charset) = CHARSET_METHOD_MAP;
+  if (inhibit_load_charset_map
+      && temp_charset_work
+      && charset == temp_charset_work->current
+      && ((control_flag == 2) == temp_charset_work->for_encoder))
+    return;
+
+  if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP)
+    map = CHARSET_MAP (charset);
+  else
+    {
+      if (! CHARSET_UNIFIED_P (charset))
+       abort ();
+      map = CHARSET_UNIFY_MAP (charset);
      }
+  if (STRINGP (map))
+    load_charset_map_from_file (charset, map, control_flag);
+  else
+    load_charset_map_from_vector (charset, map, control_flag);
  }
  
  
  DEFUN ("charsetp", Fcharsetp, Scharsetp, 1, 1, 0,
         doc: /* Return non-nil if and only if OBJECT is a charset.*/)
-     (object)
-     Lisp_Object object;
+  (Lisp_Object object)
  {
    return (CHARSETP (object) ? Qt : Qnil);
  }
  
  
-void
-map_charset_chars (c_function, function, arg,
-                  charset, from, to)
-     void (*c_function) P_ ((Lisp_Object, Lisp_Object));
-     Lisp_Object function, arg;
-     struct charset *charset;
-     unsigned from, to;
+static void
+map_charset_for_dump (void (*c_function) (Lisp_Object, Lisp_Object),
+                     Lisp_Object function, Lisp_Object arg,
+                     unsigned int from, unsigned int to)
  {
+  int from_idx = CODE_POINT_TO_INDEX (temp_charset_work->current, from);
+  int to_idx = CODE_POINT_TO_INDEX (temp_charset_work->current, to);
    Lisp_Object range;
-  int partial;
+  int c, stop;
+  struct gcpro gcpro1;
  
-  if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP_DEFERRED)
-    load_charset (charset);
+  range = Fcons (Qnil, Qnil);
+  GCPRO1 (range);
  
-  partial = (from > CHARSET_MIN_CODE (charset)
-            || to < CHARSET_MAX_CODE (charset));
+  c = temp_charset_work->min_char;
+  stop = (temp_charset_work->max_char < 0x20000
+         ? temp_charset_work->max_char : 0xFFFF);
  
-  if (CHARSET_UNIFIED_P (charset)
-      && CHAR_TABLE_P (CHARSET_DEUNIFIER (charset)))
+  while (1)
      {
-      map_char_table_for_charset (c_function, function,
-                                 CHARSET_DEUNIFIER (charset), arg,
-                                 partial ? charset : NULL, from, to);
+      int idx = GET_TEMP_CHARSET_WORK_ENCODER (c);
+
+      if (idx >= from_idx && idx <= to_idx)
+       {
+         if (NILP (XCAR (range)))
+           XSETCAR (range, make_number (c));
+       }
+      else if (! NILP (XCAR (range)))
+       {
+         XSETCDR (range, make_number (c - 1));
+         if (c_function)
+           (*c_function) (arg, range);
+         else
+           call2 (function, range, arg);
+         XSETCAR (range, Qnil);
+       }
+      if (c == stop)
+       {
+         if (c == temp_charset_work->max_char)
+           {
+             if (! NILP (XCAR (range)))
+               {
+                 XSETCDR (range, make_number (c));
+                 if (c_function)
+                   (*c_function) (arg, range);
+                 else
+                   call2 (function, range, arg);
+               }
+             break;
+           }
+         c = 0x1FFFF;
+         stop = temp_charset_work->max_char;
+       }
+      c++;
      }
+  UNGCPRO;
+}
+
+void
+map_charset_chars (void (*c_function)(Lisp_Object, Lisp_Object), Lisp_Object function,
+                  Lisp_Object arg, struct charset *charset, unsigned from, unsigned to)
+{
+  Lisp_Object range;
+  int partial;
+
+  partial = (from > CHARSET_MIN_CODE (charset)
+            || to < CHARSET_MAX_CODE (charset));
  
    if (CHARSET_METHOD (charset) == CHARSET_METHOD_OFFSET)
      {
@@ -590,6 +731,18 @@ map_charset_chars (c_function, function, arg,
        int from_c = from_idx + CHARSET_CODE_OFFSET (charset);
        int to_c = to_idx + CHARSET_CODE_OFFSET (charset);
  
+      if (CHARSET_UNIFIED_P (charset))
+       {
+         if (! CHAR_TABLE_P (CHARSET_DEUNIFIER (charset)))
+           load_charset (charset, 2);
+         if (CHAR_TABLE_P (CHARSET_DEUNIFIER (charset)))
+           map_char_table_for_charset (c_function, function,
+                                       CHARSET_DEUNIFIER (charset), arg,
+                                       partial ? charset : NULL, from, to);
+         else
+           map_charset_for_dump (c_function, function, arg, from, to);
+       }
+
        range = Fcons (make_number (from_c), make_number (to_c));
        if (NILP (function))
         (*c_function) (arg, range);
@@ -599,10 +752,13 @@ map_charset_chars (c_function, function, arg,
    else if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP)
      {
        if (! CHAR_TABLE_P (CHARSET_ENCODER (charset)))
-       return;
-      map_char_table_for_charset (c_function, function,
-                                 CHARSET_ENCODER (charset), arg,
-                                 partial ? charset : NULL, from, to);
+       load_charset (charset, 2);
+      if (CHAR_TABLE_P (CHARSET_ENCODER (charset)))
+       map_char_table_for_charset (c_function, function,
+                                   CHARSET_ENCODER (charset), arg,
+                                   partial ? charset : NULL, from, to);
+      else
+       map_charset_for_dump (c_function, function, arg, from, to);
      }
    else if (CHARSET_METHOD (charset) == CHARSET_METHOD_SUBSET)
      {
@@ -632,8 +788,8 @@ map_charset_chars (c_function, function, arg,
  
           charset = CHARSET_FROM_ID (XFASTINT (XCAR (XCAR (parents))));
           offset = XINT (XCDR (XCAR (parents)));
-         this_from = from - offset;
-         this_to = to - offset;
+         this_from = from > offset ? from - offset : 0;
+         this_to = to > offset ? to - offset : 0;
           if (this_from < CHARSET_MIN_CODE (charset))
             this_from = CHARSET_MIN_CODE (charset);
           if (this_to > CHARSET_MAX_CODE (charset))
@@ -653,9 +809,8 @@ RANGE is a cons (FROM .  TO), where FROM and TO indicate a range of
  characters contained in CHARSET.
  
  The optional 4th and 5th arguments FROM-CODE and TO-CODE specify the
-range of code points of target characters.  */)
-     (function, charset, arg, from_code, to_code)
-       Lisp_Object function, charset, arg, from_code, to_code;
+range of code points (in CHARSET) of target characters.  */)
+  (Lisp_Object function, Lisp_Object charset, Lisp_Object arg, Lisp_Object from_code, Lisp_Object to_code)
  {
    struct charset *cs;
    unsigned from, to;
@@ -665,7 +820,6 @@ range of code points of target characters.  */)
      from = CHARSET_MIN_CODE (cs);
    else
      {
-      CHECK_NATNUM (from_code);
        from = XINT (from_code);
        if (from < CHARSET_MIN_CODE (cs))
         from = CHARSET_MIN_CODE (cs);
@@ -674,7 +828,6 @@ range of code points of target characters.  */)
      to = CHARSET_MAX_CODE (cs);
    else
      {
-      CHECK_NATNUM (to_code);
        to = XINT (to_code);
        if (to > CHARSET_MAX_CODE (cs))
         to = CHARSET_MAX_CODE (cs);
@@ -693,14 +846,12 @@ DEFUN ("define-charset-internal", Fdefine_charset_internal,
         Sdefine_charset_internal, charset_arg_max, MANY, 0,
         doc: /* For internal use only.
  usage: (define-charset-internal ...)  */)
-     (nargs, args)
-     int nargs;
-     Lisp_Object *args;
+  (ptrdiff_t nargs, Lisp_Object *args)
  {
    /* Charset attr vector.  */
    Lisp_Object attrs;
    Lisp_Object val;
-  unsigned hash_code;
+  EMACS_UINT hash_code;
    struct Lisp_Hash_Table *hash_table = XHASH_TABLE (Vcharset_hash_table);
    int i, j;
    struct charset charset;
@@ -720,21 +871,26 @@ usage: (define-charset-internal ...)  */)
    ASET (attrs, charset_name, args[charset_arg_name]);
  
    val = args[charset_arg_code_space];
-  for (i = 0, dimension = 0, nchars = 1; i < 4; i++)
+  for (i = 0, dimension = 0, nchars = 1; ; i++)
      {
+      Lisp_Object min_byte_obj, max_byte_obj;
        int min_byte, max_byte;
  
-      min_byte = XINT (Faref (val, make_number (i * 2)));
-      max_byte = XINT (Faref (val, make_number (i * 2 + 1)));
-      if (min_byte < 0 || min_byte > max_byte || max_byte >= 256)
-       error ("Invalid :code-space value");
+      min_byte_obj = Faref (val, make_number (i * 2));
+      max_byte_obj = Faref (val, make_number (i * 2 + 1));
+      CHECK_RANGED_INTEGER (0, min_byte_obj, 255);
+      min_byte = XINT (min_byte_obj);
+      CHECK_RANGED_INTEGER (min_byte, max_byte_obj, 255);
+      max_byte = XINT (max_byte_obj);
        charset.code_space[i * 4] = min_byte;
        charset.code_space[i * 4 + 1] = max_byte;
        charset.code_space[i * 4 + 2] = max_byte - min_byte + 1;
-      nchars *= charset.code_space[i * 4 + 2];
-      charset.code_space[i * 4 + 3] = nchars;
        if (max_byte > 0)
         dimension = i + 1;
+      if (i == 3)
+       break;
+      nchars *= charset.code_space[i * 4 + 2];
+      charset.code_space[i * 4 + 3] = nchars;
      }
  
    val = args[charset_arg_dimension];
@@ -742,10 +898,8 @@ usage: (define-charset-internal ...)  */)
      charset.dimension = dimension;
    else
      {
-      CHECK_NATNUM (val);
+      CHECK_RANGED_INTEGER (1, val, 4);
        charset.dimension = XINT (val);
-      if (charset.dimension < 1 || charset.dimension > 4)
-       args_out_of_range_3 (val, make_number (1), make_number (4));
      }
  
    charset.code_linear_p
@@ -759,7 +913,7 @@ usage: (define-charset-internal ...)  */)
    if (! charset.code_linear_p)
      {
        charset.code_space_mask = (unsigned char *) xmalloc (256);
-      bzero (charset.code_space_mask, 256);
+      memset (charset.code_space_mask, 0, 256);
        for (i = 0; i < 4; i++)
         for (j = charset.code_space[i * 4]; j <= charset.code_space[i * 4 + 1];
              j++)
@@ -771,31 +925,22 @@ usage: (define-charset-internal ...)  */)
    charset.min_code = (charset.code_space[0]
                       | (charset.code_space[4] << 8)
                       | (charset.code_space[8] << 16)
-                     | (charset.code_space[12] << 24));
+                     | ((unsigned) charset.code_space[12] << 24));
    charset.max_code = (charset.code_space[1]
                       | (charset.code_space[5] << 8)
                       | (charset.code_space[9] << 16)
-                     | (charset.code_space[13] << 24));
+                     | ((unsigned) charset.code_space[13] << 24));
    charset.char_index_offset = 0;
  
    val = args[charset_arg_min_code];
    if (! NILP (val))
      {
-      unsigned code;
+      unsigned code = cons_to_unsigned (val, UINT_MAX);
  
-      if (INTEGERP (val))
-       code = XINT (val);
-      else
-       {
-         CHECK_CONS (val);
-         CHECK_NUMBER_CAR (val);
-         CHECK_NUMBER_CDR (val);
-         code = (XINT (XCAR (val)) << 16) | (XINT (XCDR (val)));
-       }
        if (code < charset.min_code
           || code > charset.max_code)
-       args_out_of_range_3 (make_number (charset.min_code),
-                            make_number (charset.max_code), val);
+       args_out_of_range_3 (make_fixnum_or_float (charset.min_code),
+                            make_fixnum_or_float (charset.max_code), val);
        charset.char_index_offset = CODE_POINT_TO_INDEX (&charset, code);
        charset.min_code = code;
      }
@@ -803,25 +948,16 @@ usage: (define-charset-internal ...)  */)
    val = args[charset_arg_max_code];
    if (! NILP (val))
      {
-      unsigned code;
+      unsigned code = cons_to_unsigned (val, UINT_MAX);
  
-      if (INTEGERP (val))
-       code = XINT (val);
-      else
-       {
-         CHECK_CONS (val);
-         CHECK_NUMBER_CAR (val);
-         CHECK_NUMBER_CDR (val);
-         code = (XINT (XCAR (val)) << 16) | (XINT (XCDR (val)));
-       }
        if (code < charset.min_code
           || code > charset.max_code)
-       args_out_of_range_3 (make_number (charset.min_code),
-                            make_number (charset.max_code), val);
+       args_out_of_range_3 (make_fixnum_or_float (charset.min_code),
+                            make_fixnum_or_float (charset.max_code), val);
        charset.max_code = code;
      }
  
-  charset.compact_codes_p = charset.max_code < 0x1000000;
+  charset.compact_codes_p = charset.max_code < 0x10000;
  
    val = args[charset_arg_invalid_code];
    if (NILP (val))
@@ -830,18 +966,14 @@ usage: (define-charset-internal ...)  */)
         charset.invalid_code = 0;
        else
         {
-         XSETINT (val, charset.max_code + 1);
-         if (XINT (val) == charset.max_code + 1)
+         if (charset.max_code < UINT_MAX)
             charset.invalid_code = charset.max_code + 1;
           else
             error ("Attribute :invalid-code must be specified");
         }
      }
    else
-    {
-      CHECK_NATNUM (val);
-      charset.invalid_code = XFASTINT (val);
-    }
+    charset.invalid_code = cons_to_unsigned (val, UINT_MAX);
  
    val = args[charset_arg_iso_final];
    if (NILP (val))
@@ -850,7 +982,7 @@ usage: (define-charset-internal ...)  */)
      {
        CHECK_NUMBER (val);
        if (XINT (val) < '0' || XINT (val) > 127)
-       error ("Invalid iso-final-char: %d", XINT (val));
+       error ("Invalid iso-final-char: %"pI"d", XINT (val));
        charset.iso_final = XINT (val);
      }
  
@@ -859,9 +991,7 @@ usage: (define-charset-internal ...)  */)
      charset.iso_revision = -1;
    else
      {
-      CHECK_NUMBER (val);
-      if (XINT (val) > 63)
-       args_out_of_range (make_number (63), val);
+      CHECK_RANGED_INTEGER (-1, val, 63);
        charset.iso_revision = XINT (val);
      }
  
@@ -872,7 +1002,7 @@ usage: (define-charset-internal ...)  */)
      {
        CHECK_NATNUM (val);
        if ((XINT (val) > 0 && XINT (val) <= 128) || XINT (val) >= 256)
-       error ("Invalid emacs-mule-id: %d", XINT (val));
+       error ("Invalid emacs-mule-id: %"pI"d", XINT (val));
        charset.emacs_mule_id = XINT (val);
      }
  
@@ -882,22 +1012,22 @@ usage: (define-charset-internal ...)  */)
  
    charset.unified_p = 0;
  
-  bzero (charset.fast_map, sizeof (charset.fast_map));
+  memset (charset.fast_map, 0, sizeof (charset.fast_map));
  
    if (! NILP (args[charset_arg_code_offset]))
      {
        val = args[charset_arg_code_offset];
-      CHECK_NUMBER (val);
+      CHECK_CHARACTER (val);
  
        charset.method = CHARSET_METHOD_OFFSET;
        charset.code_offset = XINT (val);
  
-      i = CODE_POINT_TO_INDEX (&charset, charset.min_code);
-      charset.min_char = i + charset.code_offset;
        i = CODE_POINT_TO_INDEX (&charset, charset.max_code);
-      charset.max_char = i + charset.code_offset;
-      if (charset.max_char > MAX_CHAR)
+      if (MAX_CHAR - charset.code_offset < i)
         error ("Unsupported max char: %d", charset.max_char);
+      charset.max_char = i + charset.code_offset;
+      i = CODE_POINT_TO_INDEX (&charset, charset.min_code);
+      charset.min_char = i + charset.code_offset;
  
        i = (charset.min_char >> 7) << 7;
        for (; i < 0x10000 && i <= charset.max_char; i += 128)
@@ -905,16 +1035,14 @@ usage: (define-charset-internal ...)  */)
        i = (i >> 12) << 12;
        for (; i <= charset.max_char; i += 0x1000)
         CHARSET_FAST_MAP_SET (i, charset.fast_map);
+      if (charset.code_offset == 0 && charset.max_char >= 0x80)
+       charset.ascii_compatible_p = 1;
      }
    else if (! NILP (args[charset_arg_map]))
      {
        val = args[charset_arg_map];
        ASET (attrs, charset_map, val);
-      if (STRINGP (val))
-       load_charset_map_from_file (&charset, val, 0);
-      else
-       load_charset_map_from_vector (&charset, val, 0);
-      charset.method = CHARSET_METHOD_MAP_DEFERRED;
+      charset.method = CHARSET_METHOD_MAP;
      }
    else if (! NILP (args[charset_arg_subset]))
      {
@@ -970,7 +1098,7 @@ usage: (define-charset-internal ...)  */)
               car_part = XCAR (elt);
               cdr_part = XCDR (elt);
               CHECK_CHARSET_GET_ID (car_part, this_id);
-             CHECK_NUMBER (cdr_part);
+             CHECK_TYPE_RANGED_INTEGER (int, cdr_part);
               offset = XINT (cdr_part);
             }
           else
@@ -1014,13 +1142,25 @@ usage: (define-charset-internal ...)  */)
                                      hash_code);
        if (charset_table_used == charset_table_size)
         {
-         struct charset *new_table
-           = (struct charset *) xmalloc (sizeof (struct charset)
-                                         * (charset_table_size + 16));
-         bcopy (charset_table, new_table,
-                sizeof (struct charset) * charset_table_size);
-         charset_table_size += 16;
+         /* Ensure that charset IDs fit into 'int' as well as into the
+            restriction imposed by fixnums.  Although the 'int' restriction
+            could be removed, too much other code would need altering; for
+            example, the IDs are stuffed into struct
+            coding_system.charbuf[i] entries, which are 'int'.  */
+         int old_size = charset_table_size;
+         struct charset *new_table =
+           xpalloc (0, &charset_table_size, 1,
+                    min (INT_MAX, MOST_POSITIVE_FIXNUM),
+                    sizeof *charset_table);
+         memcpy (new_table, charset_table, old_size * sizeof *new_table);
           charset_table = new_table;
+         /* FIXME: This leaks memory, as the old charset_table becomes
+            unreachable.  If the old charset table is charset_table_init
+            then this leak is intentional; otherwise, it's unclear.
+            If the latter memory leak is intentional, a
+            comment should be added to explain this.  If not, the old
+            charset_table should be freed, by passing it as the 1st argument
+            to xpalloc and removing the memcpy.  */
         }
        id = charset_table_used++;
        new_definition_p = 1;
@@ -1030,6 +1170,12 @@ usage: (define-charset-internal ...)  */)
    charset.id = id;
    charset_table[id] = charset;
  
+  if (charset.method == CHARSET_METHOD_MAP)
+    {
+      load_charset (&charset, 0);
+      charset_table[id] = charset;
+    }
+
    if (charset.iso_final >= 0)
      {
        ISO_CHARSET_TABLE (charset.dimension, charset.iso_chars_96,
@@ -1043,11 +1189,13 @@ usage: (define-charset-internal ...)  */)
         charset_jisx0208_1978 = id;
        else if (ISO_CHARSET_TABLE (2, 0, 'B') == id)
         charset_jisx0208 = id;
+      else if (ISO_CHARSET_TABLE (2, 0, 'C') == id)
+       charset_ksc5601 = id;
      }
  
    if (charset.emacs_mule_id >= 0)
      {
-      emacs_mule_charset[charset.emacs_mule_id] = CHARSET_FROM_ID (id);
+      emacs_mule_charset[charset.emacs_mule_id] = id;
        if (charset.emacs_mule_id < 0xA0)
         emacs_mule_bytes[charset.emacs_mule_id] = charset.dimension + 1;
        else
@@ -1100,18 +1248,15 @@ usage: (define-charset-internal ...)  */)
     charset.  */
  
  static int
-define_charset_internal (name, dimension, code_space, min_code, max_code,
-                        iso_final, iso_revision, emacs_mule_id,
-                        ascii_compatible, supplementary,
-                        code_offset)
-     Lisp_Object name;
-     int dimension;
-     unsigned char *code_space;
-     unsigned min_code, max_code;
-     int iso_final, iso_revision, emacs_mule_id;
-     int ascii_compatible, supplementary;
-     int code_offset;
+define_charset_internal (Lisp_Object name,
+                        int dimension,
+                        const char *code_space_chars,
+                        unsigned min_code, unsigned max_code,
+                        int iso_final, int iso_revision, int emacs_mule_id,
+                        int ascii_compatible, int supplementary,
+                        int code_offset)
  {
+  const unsigned char *code_space = (const unsigned char *) code_space_chars;
    Lisp_Object args[charset_arg_max];
    Lisp_Object plist[14];
    Lisp_Object val;
@@ -1139,19 +1284,19 @@ define_charset_internal (name, dimension, code_space, min_code, max_code,
    args[charset_arg_superset] = Qnil;
    args[charset_arg_unify_map] = Qnil;
  
-  plist[0] = intern (":name");
+  plist[0] = intern_c_string (":name");
    plist[1] = args[charset_arg_name];
-  plist[2] = intern (":dimension");
+  plist[2] = intern_c_string (":dimension");
    plist[3] = args[charset_arg_dimension];
-  plist[4] = intern (":code-space");
+  plist[4] = intern_c_string (":code-space");
    plist[5] = args[charset_arg_code_space];
-  plist[6] = intern (":iso-final-char");
+  plist[6] = intern_c_string (":iso-final-char");
    plist[7] = args[charset_arg_iso_final];
-  plist[8] = intern (":emacs-mule-id");
+  plist[8] = intern_c_string (":emacs-mule-id");
    plist[9] = args[charset_arg_emacs_mule_id];
-  plist[10] = intern (":ascii-compatible-p");
+  plist[10] = intern_c_string (":ascii-compatible-p");
    plist[11] = args[charset_arg_ascii_compatible_p];
-  plist[12] = intern (":code-offset");
+  plist[12] = intern_c_string (":code-offset");
    plist[13] = args[charset_arg_code_offset];
  
    args[charset_arg_plist] = Flist (14, plist);
@@ -1164,8 +1309,7 @@ define_charset_internal (name, dimension, code_space, min_code, max_code,
  DEFUN ("define-charset-alias", Fdefine_charset_alias,
         Sdefine_charset_alias, 2, 2, 0,
         doc: /* Define ALIAS as an alias for charset CHARSET.  */)
-     (alias, charset)
-     Lisp_Object alias, charset;
+  (Lisp_Object alias, Lisp_Object charset)
  {
    Lisp_Object attr;
  
@@ -1178,8 +1322,7 @@ DEFUN ("define-charset-alias", Fdefine_charset_alias,
  
  DEFUN ("charset-plist", Fcharset_plist, Scharset_plist, 1, 1, 0,
         doc: /* Return the property list of CHARSET.  */)
-     (charset)
-     Lisp_Object charset;
+  (Lisp_Object charset)
  {
    Lisp_Object attrs;
  
@@ -1190,8 +1333,7 @@ DEFUN ("charset-plist", Fcharset_plist, Scharset_plist, 1, 1, 0,
  
  DEFUN ("set-charset-plist", Fset_charset_plist, Sset_charset_plist, 2, 2, 0,
         doc: /* Set CHARSET's property list to PLIST.  */)
-     (charset, plist)
-     Lisp_Object charset, plist;
+  (Lisp_Object charset, Lisp_Object plist)
  {
    Lisp_Object attrs;
  
@@ -1211,16 +1353,13 @@ the same meaning as the `:unify-map' attribute in the function
  `define-charset' (which see).
  
  Optional third argument DEUNIFY, if non-nil, means to de-unify CHARSET.  */)
-     (charset, unify_map, deunify)
-     Lisp_Object charset, unify_map, deunify;
+  (Lisp_Object charset, Lisp_Object unify_map, Lisp_Object deunify)
  {
    int id;
    struct charset *cs;
  
    CHECK_CHARSET_GET_ID (charset, id);
    cs = CHARSET_FROM_ID (id);
-  if (CHARSET_METHOD (cs) == CHARSET_METHOD_MAP_DEFERRED)
-    load_charset (cs);
    if (NILP (deunify)
        ? CHARSET_UNIFIED_P (cs) && ! NILP (CHARSET_DEUNIFIER (cs))
        : ! CHARSET_UNIFIED_P (cs))
@@ -1229,24 +1368,27 @@ Optional third argument DEUNIFY, if non-nil, means to de-unify CHARSET.  */)
    CHARSET_UNIFIED_P (cs) = 0;
    if (NILP (deunify))
      {
-      if (CHARSET_METHOD (cs) != CHARSET_METHOD_OFFSET)
+      if (CHARSET_METHOD (cs) != CHARSET_METHOD_OFFSET
+         || CHARSET_CODE_OFFSET (cs) < 0x110000)
         error ("Can't unify charset: %s", SDATA (SYMBOL_NAME (charset)));
        if (NILP (unify_map))
         unify_map = CHARSET_UNIFY_MAP (cs);
-      if (STRINGP (unify_map))
-       load_charset_map_from_file (cs, unify_map, 2);
-      else if (VECTORP (unify_map))
-       load_charset_map_from_vector (cs, unify_map, 2);
-      else if (NILP (unify_map))
-       error ("No unify-map for charset");
        else
-       error ("Bad unify-map arg");
+       {
+         if (! STRINGP (unify_map) && ! VECTORP (unify_map))
+           signal_error ("Bad unify-map", unify_map);
+         CHARSET_UNIFY_MAP (cs) = unify_map;
+       }
+      if (NILP (Vchar_unify_table))
+       Vchar_unify_table = Fmake_char_table (Qnil, Qnil);
+      char_table_set_range (Vchar_unify_table,
+                           cs->min_char, cs->max_char, charset);
        CHARSET_UNIFIED_P (cs) = 1;
      }
    else if (CHAR_TABLE_P (Vchar_unify_table))
      {
-      int min_code = CHARSET_MIN_CODE (cs);
-      int max_code = CHARSET_MAX_CODE (cs);
+      unsigned min_code = CHARSET_MIN_CODE (cs);
+      unsigned max_code = CHARSET_MAX_CODE (cs);
        int min_char = DECODE_CHAR (cs, min_code);
        int max_char = DECODE_CHAR (cs, max_code);
  
@@ -1266,8 +1408,7 @@ CHARS is the number of characters in a dimension: 94 or 96.
  This final char is for private use, thus the range is `0' (48) .. `?' (63).
  If there's no unused final char for the specified kind of charset,
  return nil.  */)
-     (dimension, chars)
-     Lisp_Object dimension, chars;
+  (Lisp_Object dimension, Lisp_Object chars)
  {
    int final_char;
  
@@ -1284,19 +1425,20 @@ return nil.  */)
  }
  
  static void
-check_iso_charset_parameter (dimension, chars, final_char)
-     Lisp_Object dimension, chars, final_char;
+check_iso_charset_parameter (Lisp_Object dimension, Lisp_Object chars, Lisp_Object final_char)
  {
    CHECK_NATNUM (dimension);
    CHECK_NATNUM (chars);
-  CHECK_NATNUM (final_char);
+  CHECK_CHARACTER (final_char);
  
    if (XINT (dimension) > 3)
-    error ("Invalid DIMENSION %d, it should be 1, 2, or 3", XINT (dimension));
+    error ("Invalid DIMENSION %"pI"d, it should be 1, 2, or 3",
+          XINT (dimension));
    if (XINT (chars) != 94 && XINT (chars) != 96)
-    error ("Invalid CHARS %d, it should be 94 or 96", XINT (chars));
+    error ("Invalid CHARS %"pI"d, it should be 94 or 96", XINT (chars));
    if (XINT (final_char) < '0' || XINT (final_char) > '~')
-    error ("Invalid FINAL-CHAR %c, it should be `0'..`~'", XINT (chars));
+    error ("Invalid FINAL-CHAR %c, it should be `0'..`~'",
+          (int)XINT (final_char));
  }
  
  
@@ -1307,8 +1449,7 @@ DEFUN ("declare-equiv-charset", Fdeclare_equiv_charset, Sdeclare_equiv_charset,
  On decoding by an ISO-2022 base coding system, when a charset
  specified by DIMENSION, CHARS, and FINAL-CHAR is designated, behave as
  if CHARSET is designated instead.  */)
-     (dimension, chars, final_char, charset)
-     Lisp_Object dimension, chars, final_char, charset;
+  (Lisp_Object dimension, Lisp_Object chars, Lisp_Object final_char, Lisp_Object charset)
  {
    int id;
    int chars_flag;
@@ -1335,8 +1476,7 @@ if CHARSET is designated instead.  */)
  */
  
  int
-string_xstring_p (string)
-     Lisp_Object string;
+string_xstring_p (Lisp_Object string)
  {
    const unsigned char *p = SDATA (string);
    const unsigned char *endp = p + SBYTES (string);
@@ -1363,11 +1503,7 @@ string_xstring_p (string)
     It may lookup a translation table TABLE if supplied.  */
  
  static void
-find_charsets_in_text (ptr, nchars, nbytes, charsets, table, multibyte)
-     const unsigned char *ptr;
-     EMACS_INT nchars, nbytes;
-     Lisp_Object charsets, table;
-     int multibyte;
+find_charsets_in_text (const unsigned char *ptr, ptrdiff_t nchars, ptrdiff_t nbytes, Lisp_Object charsets, Lisp_Object table, int multibyte)
  {
    const unsigned char *pend = ptr + nbytes;
  
@@ -1411,14 +1547,13 @@ Optional arg TABLE if non-nil is a translation table to look up.
  
  If the current buffer is unibyte, the returned list may contain
  only `ascii', `eight-bit-control', and `eight-bit-graphic'.  */)
-     (beg, end, table)
-     Lisp_Object beg, end, table;
+  (Lisp_Object beg, Lisp_Object end, Lisp_Object table)
  {
    Lisp_Object charsets;
-  EMACS_INT from, from_byte, to, stop, stop_byte;
+  ptrdiff_t from, from_byte, to, stop, stop_byte;
    int i;
    Lisp_Object val;
-  int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
+  int multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters));
  
    validate_region (&beg, &end);
    from = XFASTINT (beg);
@@ -1463,8 +1598,7 @@ Optional arg TABLE if non-nil is a translation table to look up.
  
  If STR is unibyte, the returned list may contain
  only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
-     (str, table)
-     Lisp_Object str, table;
+  (Lisp_Object str, Lisp_Object table)
  {
    Lisp_Object charsets;
    int i;
@@ -1485,13 +1619,44 @@ only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
  
  \f
  
+/* Return a unified character code for C (>= 0x110000).  VAL is a
+   value of Vchar_unify_table for C; i.e. it is nil, an integer, or a
+   charset symbol.  */
+int
+maybe_unify_char (int c, Lisp_Object val)
+{
+  struct charset *charset;
+
+  if (INTEGERP (val))
+    return XFASTINT (val);
+  if (NILP (val))
+    return c;
+
+  CHECK_CHARSET_GET_CHARSET (val, charset);
+  load_charset (charset, 1);
+  if (! inhibit_load_charset_map)
+    {
+      val = CHAR_TABLE_REF (Vchar_unify_table, c);
+      if (! NILP (val))
+       c = XFASTINT (val);
+    }
+  else
+    {
+      int code_index = c - CHARSET_CODE_OFFSET (charset);
+      int unified = GET_TEMP_CHARSET_WORK_DECODER (code_index);
+
+      if (unified > 0)
+       c = unified;
+    }
+  return c;
+}
+
+
  /* Return a character correponding to the code-point CODE of
     CHARSET.  */
  
  int
-decode_char (charset, code)
-     struct charset *charset;
-     unsigned code;
+decode_char (struct charset *charset, unsigned int code)
  {
    int c, char_index;
    enum charset_method method = CHARSET_METHOD (charset);
@@ -1499,12 +1664,6 @@ decode_char (charset, code)
    if (code < CHARSET_MIN_CODE (charset) || code > CHARSET_MAX_CODE (charset))
      return -1;
  
-  if (method == CHARSET_METHOD_MAP_DEFERRED)
-    {
-      load_charset (charset);
-      method = CHARSET_METHOD (charset);
-    }
-
    if (method == CHARSET_METHOD_SUBSET)
      {
        Lisp_Object subset_info;
@@ -1547,21 +1706,24 @@ decode_char (charset, code)
  
           decoder = CHARSET_DECODER (charset);
           if (! VECTORP (decoder))
-           return -1;
-         c = XINT (AREF (decoder, char_index));
+           {
+             load_charset (charset, 1);
+             decoder = CHARSET_DECODER (charset);
+           }
+         if (VECTORP (decoder))
+           c = XINT (AREF (decoder, char_index));
+         else
+           c = GET_TEMP_CHARSET_WORK_DECODER (char_index);
         }
-      else
+      else                     /* method == CHARSET_METHOD_OFFSET */
         {
           c = char_index + CHARSET_CODE_OFFSET (charset);
+         if (CHARSET_UNIFIED_P (charset)
+             && c > MAX_UNICODE_CHAR)
+           MAYBE_UNIFY_CHAR (c);
         }
      }
  
-  if (CHARSET_UNIFIED_P (charset)
-      && c >= 0)
-    {
-      MAYBE_UNIFY_CHAR (c);
-    }
-
    return c;
  }
  
@@ -1573,26 +1735,35 @@ Lisp_Object charset_work;
     use CHARSET's strict_max_char instead of max_char.  */
  
  unsigned
-encode_char (charset, c)
-     struct charset *charset;
-     int c;
+encode_char (struct charset *charset, int c)
  {
    unsigned code;
    enum charset_method method = CHARSET_METHOD (charset);
  
    if (CHARSET_UNIFIED_P (charset))
      {
-      Lisp_Object deunifier, deunified;
+      Lisp_Object deunifier;
+      int code_index = -1;
  
        deunifier = CHARSET_DEUNIFIER (charset);
        if (! CHAR_TABLE_P (deunifier))
         {
-         Funify_charset (CHARSET_NAME (charset), Qnil, Qnil);
+         load_charset (charset, 2);
           deunifier = CHARSET_DEUNIFIER (charset);
         }
-      deunified = CHAR_TABLE_REF (deunifier, c);
-      if (! NILP (deunified))
-       c = XINT (deunified);
+      if (CHAR_TABLE_P (deunifier))
+       {
+         Lisp_Object deunified = CHAR_TABLE_REF (deunifier, c);
+
+         if (INTEGERP (deunified))
+           code_index = XINT (deunified);
+       }
+      else
+       {
+         code_index = GET_TEMP_CHARSET_WORK_ENCODER (c);
+       }
+      if (code_index >= 0)
+       c = CHARSET_CODE_OFFSET (charset) + code_index;
      }
  
    if (method == CHARSET_METHOD_SUBSET)
@@ -1633,12 +1804,6 @@ encode_char (charset, c)
        || c < CHARSET_MIN_CHAR (charset) || c > CHARSET_MAX_CHAR (charset))
      return CHARSET_INVALID_CODE (charset);
  
-  if (method == CHARSET_METHOD_MAP_DEFERRED)
-    {
-      load_charset (charset);
-      method = CHARSET_METHOD (charset);
-    }
-
    if (method == CHARSET_METHOD_MAP)
      {
        Lisp_Object encoder;
@@ -1646,18 +1811,30 @@ encode_char (charset, c)
  
        encoder = CHARSET_ENCODER (charset);
        if (! CHAR_TABLE_P (CHARSET_ENCODER (charset)))
-       return CHARSET_INVALID_CODE (charset);
-      val = CHAR_TABLE_REF (encoder, c);
-      if (NILP (val))
-       return CHARSET_INVALID_CODE (charset);
-      code = XINT (val);
-      if (! CHARSET_COMPACT_CODES_P (charset))
-       code = INDEX_TO_CODE_POINT (charset, code);
+       {
+         load_charset (charset, 2);
+         encoder = CHARSET_ENCODER (charset);
+       }
+      if (CHAR_TABLE_P (encoder))
+       {
+         val = CHAR_TABLE_REF (encoder, c);
+         if (NILP (val))
+           return CHARSET_INVALID_CODE (charset);
+         code = XINT (val);
+         if (! CHARSET_COMPACT_CODES_P (charset))
+           code = INDEX_TO_CODE_POINT (charset, code);
+       }
+      else
+       {
+         code = GET_TEMP_CHARSET_WORK_ENCODER (c);
+         code = INDEX_TO_CODE_POINT (charset, code);
+       }
      }
    else                         /* method == CHARSET_METHOD_OFFSET */
      {
-      code = c - CHARSET_CODE_OFFSET (charset);
-      code = INDEX_TO_CODE_POINT (charset, code);
+      unsigned code_index = c - CHARSET_CODE_OFFSET (charset);
+
+      code = INDEX_TO_CODE_POINT (charset, code_index);
      }
  
    return code;
@@ -1672,25 +1849,14 @@ CODE-POINT may be a cons (HIGHER-16-BIT-VALUE . LOWER-16-BIT-VALUE).
  
  Optional argument RESTRICTION specifies a way to map the pair of CCS
  and CODE-POINT to a character.  Currently not supported and just ignored.  */)
-  (charset, code_point, restriction)
-     Lisp_Object charset, code_point, restriction;
+  (Lisp_Object charset, Lisp_Object code_point, Lisp_Object restriction)
  {
    int c, id;
    unsigned code;
    struct charset *charsetp;
  
    CHECK_CHARSET_GET_ID (charset, id);
-  if (CONSP (code_point))
-    {
-      CHECK_NATNUM_CAR (code_point);
-      CHECK_NATNUM_CDR (code_point);
-      code = (XINT (XCAR (code_point)) << 16) | (XINT (XCDR (code_point)));
-    }
-  else
-    {
-      CHECK_NATNUM (code_point);
-      code = XINT (code_point);
-    }
+  code = cons_to_unsigned (code_point, UINT_MAX);
    charsetp = CHARSET_FROM_ID (id);
    c = DECODE_CHAR (charsetp, code);
    return (c >= 0 ? make_number (c) : Qnil);
@@ -1703,22 +1869,20 @@ Return nil if CHARSET doesn't include CH.
  
  Optional argument RESTRICTION specifies a way to map CH to a
  code-point in CCS.  Currently not supported and just ignored.  */)
-     (ch, charset, restriction)
-     Lisp_Object ch, charset, restriction;
+  (Lisp_Object ch, Lisp_Object charset, Lisp_Object restriction)
  {
-  int id;
+  int c, id;
    unsigned code;
    struct charset *charsetp;
  
    CHECK_CHARSET_GET_ID (charset, id);
-  CHECK_NATNUM (ch);
+  CHECK_CHARACTER (ch);
+  c = XFASTINT (ch);
    charsetp = CHARSET_FROM_ID (id);
-  code = ENCODE_CHAR (charsetp, XINT (ch));
+  code = ENCODE_CHAR (charsetp, c);
    if (code == CHARSET_INVALID_CODE (charsetp))
      return Qnil;
-  if (code > 0x7FFFFFF)
-    return Fcons (make_number (code >> 16), make_number (code & 0xFFFF));
-  return make_number (code);
+  return INTEGER_TO_CONS (code);
  }
  
  
@@ -1729,8 +1893,7 @@ DEFUN ("make-char", Fmake_char, Smake_char, 1, 5, 0,
  CODE1 through CODE4 are optional, but if you don't supply sufficient
  position codes, it is assumed that the minimum code in each dimension
  is specified.  */)
-     (charset, code1, code2, code3, code4)
-     Lisp_Object charset, code1, code2, code3, code4;
+  (Lisp_Object charset, Lisp_Object code1, Lisp_Object code2, Lisp_Object code3, Lisp_Object code4)
  {
    int id, dimension;
    struct charset *charsetp;
@@ -1808,10 +1971,7 @@ is specified.  */)
     Vcharset_ordered_list.  */
  
  struct charset *
-char_charset (c, charset_list, code_return)
-     int c;
-     Lisp_Object charset_list;
-     unsigned *code_return;
+char_charset (int c, Lisp_Object charset_list, unsigned int *code_return)
  {
    int maybe_null = 0;
  
@@ -1832,8 +1992,9 @@ char_charset (c, charset_list, code_return)
           return charset;
         }
        charset_list = XCDR (charset_list);
-      if (c <= MAX_UNICODE_CHAR
-        && EQ (charset_list, Vcharset_non_preferred_head))
+      if (! maybe_null
+         && c <= MAX_UNICODE_CHAR
+         && EQ (charset_list, Vcharset_non_preferred_head))
         return CHARSET_FROM_ID (charset_unicode);
      }
    return (maybe_null ? NULL
@@ -1848,8 +2009,7 @@ DEFUN ("split-char", Fsplit_char, Ssplit_char, 1, 1, 0,
  The charset is decided by the current priority order of charsets.
  A position-code is a byte value of each dimension of the code-point of
  CH in the charset.  */)
-     (ch)
-     Lisp_Object ch;
+  (Lisp_Object ch)
  {
    struct charset *charset;
    int c, dimension;
@@ -1874,15 +2034,39 @@ CH in the charset.  */)
  }
  
  
-DEFUN ("char-charset", Fchar_charset, Schar_charset, 1, 1, 0,
-       doc: /* Return the charset of highest priority that contains CH.  */)
-     (ch)
-     Lisp_Object ch;
+DEFUN ("char-charset", Fchar_charset, Schar_charset, 1, 2, 0,
+       doc: /* Return the charset of highest priority that contains CH.
+If optional 2nd arg RESTRICTION is non-nil, it is a list of charsets
+from which to find the charset.  It may also be a coding system.  In
+that case, find the charset from what supported by that coding system.  */)
+  (Lisp_Object ch, Lisp_Object restriction)
  {
    struct charset *charset;
  
    CHECK_CHARACTER (ch);
-  charset = CHAR_CHARSET (XINT (ch));
+  if (NILP (restriction))
+    charset = CHAR_CHARSET (XINT (ch));
+  else
+    {
+      if (CONSP (restriction))
+       {
+         int c = XFASTINT (ch);
+
+         for (; CONSP (restriction); restriction = XCDR (restriction))
+           {
+             struct charset *rcharset;
+
+             CHECK_CHARSET_GET_CHARSET (XCAR (restriction), rcharset);
+             if (ENCODE_CHAR (rcharset, c) != CHARSET_INVALID_CODE (rcharset))
+               return XCAR (restriction);
+           }
+         return Qnil;
+       }
+      restriction = coding_system_charset_list (restriction);
+      charset = char_charset (XINT (ch), restriction, NULL);
+      if (! charset)
+       return Qnil;
+    }
    return (CHARSET_NAME (charset));
  }
  
@@ -1892,8 +2076,7 @@ DEFUN ("charset-after", Fcharset_after, Scharset_after, 0, 1, 0,
  Return charset of a character in the current buffer at position POS.
  If POS is nil, it defauls to the current point.
  If POS is out of range, the value is nil.  */)
-     (pos)
-     Lisp_Object pos;
+  (Lisp_Object pos)
  {
    Lisp_Object ch;
    struct charset *charset;
@@ -1915,8 +2098,7 @@ by their DIMENSION, CHARS, and FINAL-CHAR,
  whereas Emacs distinguishes them by charset symbol.
  See the documentation of the function `charset-info' for the meanings of
  DIMENSION, CHARS, and FINAL-CHAR.  */)
-     (dimension, chars, final_char)
-     Lisp_Object dimension, chars, final_char;
+  (Lisp_Object dimension, Lisp_Object chars, Lisp_Object final_char)
  {
    int id;
    int chars_flag;
@@ -1932,35 +2114,19 @@ DIMENSION, CHARS, and FINAL-CHAR.  */)
  DEFUN ("clear-charset-maps", Fclear_charset_maps, Sclear_charset_maps,
         0, 0, 0,
         doc: /*
-Clear encoder and decoder of charsets that are loaded from mapfiles.  */)
-     ()
+Internal use only.
+Clear temporary charset mapping tables.
+It should be called only from temacs invoked for dumping.  */)
+  (void)
  {
-  int i;
-  struct charset *charset;
-  Lisp_Object attrs;
-
-  for (i = 0; i < charset_table_used; i++)
+  if (temp_charset_work)
      {
-      charset = CHARSET_FROM_ID (i);
-      attrs = CHARSET_ATTRIBUTES (charset);
-
-      if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP)
-       {
-         CHARSET_ATTR_DECODER (attrs) = Qnil;
-         CHARSET_ATTR_ENCODER (attrs) = Qnil;
-         CHARSET_METHOD (charset) = CHARSET_METHOD_MAP_DEFERRED;
-       }
-
-      if (CHARSET_UNIFIED_P (charset))
-       CHARSET_ATTR_DEUNIFIER (attrs) = Qnil;
+      xfree (temp_charset_work);
+      temp_charset_work = NULL;
      }
  
-  if (CHAR_TABLE_P (Vchar_unified_charset_table))
-    {
-      Foptimize_char_table (Vchar_unified_charset_table, Qnil);
-      Vchar_unify_table = Vchar_unified_charset_table;
-      Vchar_unified_charset_table = Qnil;
-    }
+  if (CHAR_TABLE_P (Vchar_unify_table))
+    Foptimize_char_table (Vchar_unify_table, Qnil);
  
    return Qnil;
  }
@@ -1969,8 +2135,7 @@ DEFUN ("charset-priority-list", Fcharset_priority_list,
         Scharset_priority_list, 0, 1, 0,
         doc: /* Return the list of charsets ordered by priority.
  HIGHESTP non-nil means just return the highest priority one.  */)
-     (highestp)
-     Lisp_Object highestp;
+  (Lisp_Object highestp)
  {
    Lisp_Object val = Qnil, list = Vcharset_ordered_list;
  
@@ -1989,13 +2154,12 @@ DEFUN ("set-charset-priority", Fset_charset_priority, Sset_charset_priority,
         1, MANY, 0,
         doc: /* Assign higher priority to the charsets given as arguments.
  usage: (set-charset-priority &rest charsets)  */)
-       (nargs, args)
-     int nargs;
-     Lisp_Object *args;
+  (ptrdiff_t nargs, Lisp_Object *args)
  {
    Lisp_Object new_head, old_list, arglist[2];
    Lisp_Object list_2022, list_emacs_mule;
-  int i, id;
+  ptrdiff_t i;
+  int id;
  
    old_list = Fcopy_sequence (Vcharset_ordered_list);
    new_head = Qnil;
@@ -2013,6 +2177,7 @@ usage: (set-charset-priority &rest charsets)  */)
    Vcharset_ordered_list = Fnconc (2, arglist);
    charset_ordered_list_tick++;
  
+  charset_unibyte = -1;
    for (old_list = Vcharset_ordered_list, list_2022 = list_emacs_mule = Qnil;
         CONSP (old_list); old_list = XCDR (old_list))
      {
@@ -2020,9 +2185,20 @@ usage: (set-charset-priority &rest charsets)  */)
         list_2022 = Fcons (XCAR (old_list), list_2022);
        if (! NILP (Fmemq (XCAR (old_list), Vemacs_mule_charset_list)))
         list_emacs_mule = Fcons (XCAR (old_list), list_emacs_mule);
+      if (charset_unibyte < 0)
+       {
+         struct charset *charset = CHARSET_FROM_ID (XINT (XCAR (old_list)));
+
+         if (CHARSET_DIMENSION (charset) == 1
+             && CHARSET_ASCII_COMPATIBLE_P (charset)
+             && CHARSET_MAX_CHAR (charset) >= 0x80)
+           charset_unibyte = CHARSET_ID (charset);
+       }
      }
    Viso_2022_charset_list = Fnreverse (list_2022);
    Vemacs_mule_charset_list = Fnreverse (list_emacs_mule);
+  if (charset_unibyte < 0)
+    charset_unibyte = charset_iso_8859_1;
  
    return Qnil;
  }
@@ -2031,8 +2207,7 @@ DEFUN ("charset-id-internal", Fcharset_id_internal, Scharset_id_internal,
         0, 1, 0,
         doc: /* Internal use only.
  Return charset identification number of CHARSET.  */)
-     (charset)
-     Lisp_Object charset;
+  (Lisp_Object charset)
  {
    int id;
  
@@ -2040,13 +2215,77 @@ Return charset identification number of CHARSET.  */)
    return make_number (id);
  }
  
+struct charset_sort_data
+{
+  Lisp_Object charset;
+  int id;
+  ptrdiff_t priority;
+};
+
+static int
+charset_compare (const void *d1, const void *d2)
+{
+  const struct charset_sort_data *data1 = d1, *data2 = d2;
+  if (data1->priority != data2->priority)
+    return data1->priority < data2->priority ? -1 : 1;
+  return 0;
+}
+
+DEFUN ("sort-charsets", Fsort_charsets, Ssort_charsets, 1, 1, 0,
+       doc: /* Sort charset list CHARSETS by a priority of each charset.
+Return the sorted list.  CHARSETS is modified by side effects.
+See also `charset-priority-list' and `set-charset-priority'.  */)
+     (Lisp_Object charsets)
+{
+  Lisp_Object len = Flength (charsets);
+  ptrdiff_t n = XFASTINT (len), i, j;
+  int done;
+  Lisp_Object tail, elt, attrs;
+  struct charset_sort_data *sort_data;
+  int id, min_id = INT_MAX, max_id = INT_MIN;
+  USE_SAFE_ALLOCA;
+
+  if (n == 0)
+    return Qnil;
+  SAFE_NALLOCA (sort_data, 1, n);
+  for (tail = charsets, i = 0; CONSP (tail); tail = XCDR (tail), i++)
+    {
+      elt = XCAR (tail);
+      CHECK_CHARSET_GET_ATTR (elt, attrs);
+      sort_data[i].charset = elt;
+      sort_data[i].id = id = XINT (CHARSET_ATTR_ID (attrs));
+      if (id < min_id)
+       min_id = id;
+      if (id > max_id)
+       max_id = id;
+    }
+  for (done = 0, tail = Vcharset_ordered_list, i = 0;
+       done < n && CONSP (tail); tail = XCDR (tail), i++)
+    {
+      elt = XCAR (tail);
+      id = XFASTINT (elt);
+      if (id >= min_id && id <= max_id)
+       for (j = 0; j < n; j++)
+         if (sort_data[j].id == id)
+           {
+             sort_data[j].priority = i;
+             done++;
+           }
+    }
+  qsort (sort_data, n, sizeof *sort_data, charset_compare);
+  for (i = 0, tail = charsets; CONSP (tail); tail = XCDR (tail), i++)
+    XSETCAR (tail, sort_data[i].charset);
+  SAFE_FREE ();
+  return charsets;
+}
+
  \f
  void
-init_charset ()
+init_charset (void)
  {
    Lisp_Object tempdir;
    tempdir = Fexpand_file_name (build_string ("charsets"), Vdata_directory);
-  if (access (SDATA (tempdir), 0) < 0)
+  if (access (SSDATA (tempdir), 0) < 0)
      {
        dir_warning ("Error: charsets directory (%s) does not exist.\n\
  Emacs will not function correctly without the character map files.\n\
@@ -2060,7 +2299,7 @@ Please check your installation!\n",
  
  
  void
-init_charset_once ()
+init_charset_once (void)
  {
    int i, j, k;
  
@@ -2070,22 +2309,30 @@ init_charset_once ()
         iso_charset_table[i][j][k] = -1;
  
    for (i = 0; i < 256; i++)
-    emacs_mule_charset[i] = NULL;
+    emacs_mule_charset[i] = -1;
  
    charset_jisx0201_roman = -1;
    charset_jisx0208_1978 = -1;
    charset_jisx0208 = -1;
-
-  for (i = 0; i < 128; i++)
-    unibyte_to_multibyte_table[i] = i;
-  for (; i < 256; i++)
-    unibyte_to_multibyte_table[i] = BYTE8_TO_CHAR (i);
+  charset_ksc5601 = -1;
  }
  
  #ifdef emacs
  
+/* Allocate an initial charset table that is large enough to handle
+   Emacs while it is bootstrapping.  As of September 2011, the size
+   needs to be at least 166; make it a bit bigger to allow for future
+   expansion.
+
+   Don't make the value so small that the table is reallocated during
+   bootstrapping, as glibc malloc calls larger than just under 64 KiB
+   during an initial bootstrap wreak havoc after dumping; see the
+   M_MMAP_THRESHOLD value in alloc.c, plus there is a extra overhead
+   internal to glibc malloc and perhaps to Emacs malloc debugging.  */
+static struct charset charset_table_init[180];
+
  void
-syms_of_charset ()
+syms_of_charset (void)
  {
    DEFSYM (Qcharsetp, "charsetp");
  
@@ -2108,8 +2355,8 @@ syms_of_charset ()
    Vemacs_mule_charset_list = Qnil;
  
    /* Don't staticpro them here.  It's done in syms_of_fns.  */
-  QCtest = intern (":test");
-  Qeq = intern ("eq");
+  QCtest = intern_c_string (":test");
+  Qeq = intern_c_string ("eq");
  
    staticpro (&Vcharset_hash_table);
    {
@@ -2119,14 +2366,10 @@ syms_of_charset ()
      Vcharset_hash_table = Fmake_hash_table (2, args);
    }
  
-  charset_table_size = 128;
-  charset_table = ((struct charset *)
-                  xmalloc (sizeof (struct charset) * charset_table_size));
+  charset_table = charset_table_init;
+  charset_table_size = sizeof charset_table_init / sizeof *charset_table_init;
    charset_table_used = 0;
  
-  staticpro (&Vchar_unified_charset_table);
-  Vchar_unified_charset_table = Fmake_char_table (Qnil, make_number (-1));
-
    defsubr (&Scharsetp);
    defsubr (&Smap_charset_chars);
    defsubr (&Sdefine_charset_internal);
@@ -2149,16 +2392,21 @@ syms_of_charset ()
    defsubr (&Scharset_priority_list);
    defsubr (&Sset_charset_priority);
    defsubr (&Scharset_id_internal);
+  defsubr (&Ssort_charsets);
  
-  DEFVAR_LISP ("charset-map-path", &Vcharset_map_path,
+  DEFVAR_LISP ("charset-map-path", Vcharset_map_path,
                doc: /* *List of directories to search for charset map files.  */);
    Vcharset_map_path = Qnil;
  
-  DEFVAR_LISP ("charset-list", &Vcharset_list,
+  DEFVAR_BOOL ("inhibit-load-charset-map", inhibit_load_charset_map,
+              doc: /* Inhibit loading of charset maps.  Used when dumping Emacs.  */);
+  inhibit_load_charset_map = 0;
+
+  DEFVAR_LISP ("charset-list", Vcharset_list,
                doc: /* List of all charsets ever defined.  */);
    Vcharset_list = Qnil;
  
-  DEFVAR_LISP ("current-iso639-language", &Vcurrent_iso639_language,
+  DEFVAR_LISP ("current-iso639-language", Vcurrent_iso639_language,
                doc: /* ISO639 language mnemonic symbol for the current language environment.
  If the current language environment is for multiple languages (e.g. "Latin-1"),
  the value may be a list of mnemonics.  */);
@@ -2180,9 +2428,7 @@ the value may be a list of mnemonics.  */);
      = define_charset_internal (Qeight_bit, 1, "\x80\xFF\x00\x00\x00\x00",
                                128, 255, -1, 0, -1, 0, 1,
                                MAX_5_BYTE_CHAR + 1);
+  charset_unibyte = charset_iso_8859_1;
  }
  
  #endif /* emacs */
-
-/* arch-tag: 66a89b8d-4c28-47d3-9ca1-56f78440d69f
-   (do not change this comment) */