Revision: emacs@sv.gnu.org/emacs--unicode--0--patch-11
authorMiles Bader <miles@gnu.org>
Sat, 4 Feb 2006 01:01:38 +0000 (01:01 +0000)
committerMiles Bader <miles@gnu.org>
Sat, 4 Feb 2006 01:01:38 +0000 (01:01 +0000)
Merge from emacs--devo--0

Patches applied:

 * emacs--devo--0  (patch 34-42)

   - Update from CVS
   - Merge from gnus--rel--5.10

 * gnus--rel--5.10  (patch 14-17)

   - Update from CVS
   - Merge from emacs--devo--0

1  2 
etc/NEWS
etc/TODO
lisp/ChangeLog
lisp/arc-mode.el
lisp/gnus/mml.el
lisp/international/mule-cmds.el
lisp/simple.el
src/ChangeLog
src/coding.c
src/xdisp.c

diff --cc etc/NEWS
Simple merge
diff --cc etc/TODO
Simple merge
diff --cc lisp/ChangeLog
Simple merge
Simple merge
Simple merge
Simple merge
diff --cc lisp/simple.el
Simple merge
diff --cc src/ChangeLog
Simple merge
diff --cc src/coding.c
@@@ -818,1480 -783,463 +818,1482 @@@ static struct coding_system coding_cate
    } while (0)
  
  
 -/* Decode composition sequence encoded by `emacs-mule' at the source
 -   pointed by SRC.  SRC_END is the end of source.  Store information
 -   of the composition in CODING->cmp_data.
 +#define EMIT_FOUR_BYTES(c1, c2, c3, c4)               \
 +  do {                                                \
 +    EMIT_TWO_BYTES (c1, c2);                  \
 +    EMIT_TWO_BYTES (c3, c4);                  \
 +  } while (0)
  
 -   For backward compatibility, decode also a composition sequence of
 -   Emacs 20 style.  In that case, the composition sequence contains
 -   characters that should be extracted into a buffer or string.  Store
 -   those characters at *DESTINATION in multibyte form.
  
 -   If we encounter an invalid byte sequence, return 0.
 -   If we encounter an insufficient source or destination, or
 -   insufficient space in CODING->cmp_data, return 1.
 -   Otherwise, return consumed bytes in the source.
 +/* Prototypes for static functions.  */
 +static void record_conversion_result P_ ((struct coding_system *coding,
 +                                        enum coding_result_code result));
 +static int detect_coding_utf_8 P_ ((struct coding_system *,
 +                                  struct coding_detection_info *info));
 +static void decode_coding_utf_8 P_ ((struct coding_system *));
 +static int encode_coding_utf_8 P_ ((struct coding_system *));
 +
 +static int detect_coding_utf_16 P_ ((struct coding_system *,
 +                                   struct coding_detection_info *info));
 +static void decode_coding_utf_16 P_ ((struct coding_system *));
 +static int encode_coding_utf_16 P_ ((struct coding_system *));
 +
 +static int detect_coding_iso_2022 P_ ((struct coding_system *,
 +                                     struct coding_detection_info *info));
 +static void decode_coding_iso_2022 P_ ((struct coding_system *));
 +static int encode_coding_iso_2022 P_ ((struct coding_system *));
 +
 +static int detect_coding_emacs_mule P_ ((struct coding_system *,
 +                                       struct coding_detection_info *info));
 +static void decode_coding_emacs_mule P_ ((struct coding_system *));
 +static int encode_coding_emacs_mule P_ ((struct coding_system *));
 +
 +static int detect_coding_sjis P_ ((struct coding_system *,
 +                                 struct coding_detection_info *info));
 +static void decode_coding_sjis P_ ((struct coding_system *));
 +static int encode_coding_sjis P_ ((struct coding_system *));
 +
 +static int detect_coding_big5 P_ ((struct coding_system *,
 +                                 struct coding_detection_info *info));
 +static void decode_coding_big5 P_ ((struct coding_system *));
 +static int encode_coding_big5 P_ ((struct coding_system *));
 +
 +static int detect_coding_ccl P_ ((struct coding_system *,
 +                                struct coding_detection_info *info));
 +static void decode_coding_ccl P_ ((struct coding_system *));
 +static int encode_coding_ccl P_ ((struct coding_system *));
 +
 +static void decode_coding_raw_text P_ ((struct coding_system *));
 +static int encode_coding_raw_text P_ ((struct coding_system *));
 +
 +static void coding_set_source P_ ((struct coding_system *));
 +static void coding_set_destination P_ ((struct coding_system *));
 +static void coding_alloc_by_realloc P_ ((struct coding_system *, EMACS_INT));
 +static void coding_alloc_by_making_gap P_ ((struct coding_system *,
 +                                          EMACS_INT));
 +static unsigned char *alloc_destination P_ ((struct coding_system *,
 +                                           EMACS_INT, unsigned char *));
 +static void setup_iso_safe_charsets P_ ((Lisp_Object));
 +static unsigned char *encode_designation_at_bol P_ ((struct coding_system *,
 +                                                   int *, int *,
 +                                                   unsigned char *));
 +static int detect_eol P_ ((const unsigned char *,
 +                         EMACS_INT, enum coding_category));
 +static Lisp_Object adjust_coding_eol_type P_ ((struct coding_system *, int));
 +static void decode_eol P_ ((struct coding_system *));
 +static Lisp_Object get_translation_table P_ ((Lisp_Object, int, int *));
 +static Lisp_Object get_translation P_ ((Lisp_Object, int *, int *,
 +                                      int, int *, int *));
 +static int produce_chars P_ ((struct coding_system *, Lisp_Object, int));
 +static INLINE void produce_composition P_ ((struct coding_system *, int *,
 +                                          EMACS_INT));
 +static INLINE void produce_charset P_ ((struct coding_system *, int *,
 +                                      EMACS_INT));
 +static void produce_annotation P_ ((struct coding_system *, EMACS_INT));
 +static int decode_coding P_ ((struct coding_system *));
 +static INLINE int *handle_composition_annotation P_ ((EMACS_INT, EMACS_INT,
 +                                                    struct coding_system *, 
 +                                                    int *, EMACS_INT *));
 +static INLINE int *handle_charset_annotation P_ ((EMACS_INT, EMACS_INT,
 +                                                struct coding_system *,
 +                                                int *, EMACS_INT *));
 +static void consume_chars P_ ((struct coding_system *, Lisp_Object, int));
 +static int encode_coding P_ ((struct coding_system *));
 +static Lisp_Object make_conversion_work_buffer P_ ((int));
 +static Lisp_Object code_conversion_restore P_ ((Lisp_Object));
 +static INLINE int char_encodable_p P_ ((int, Lisp_Object));
 +static Lisp_Object make_subsidiaries P_ ((Lisp_Object));
  
 -*/
 -static INLINE int
 -decode_composition_emacs_mule (coding, src, src_end,
 -                             destination, dst_end, dst_bytes)
 -     struct coding_system *coding;
 -     const unsigned char *src, *src_end;
 -     unsigned char **destination, *dst_end;
 -     int dst_bytes;
 +static void
 +record_conversion_result (struct coding_system *coding,
 +                        enum coding_result_code result)
  {
 -  unsigned char *dst = *destination;
 -  int method, data_len, nchars;
 -  const unsigned char *src_base = src++;
 -  /* Store components of composition.  */
 -  int component[COMPOSITION_DATA_MAX_BUNCH_LENGTH];
 -  int ncomponent;
 -  /* Store multibyte form of characters to be composed.  This is for
 -     Emacs 20 style composition sequence.  */
 -  unsigned char buf[MAX_COMPOSITION_COMPONENTS * MAX_MULTIBYTE_LENGTH];
 -  unsigned char *bufp = buf;
 -  int c, i, gref, nref;
 -
 -  if (coding->cmp_data->used + COMPOSITION_DATA_MAX_BUNCH_LENGTH
 -      >= COMPOSITION_DATA_SIZE)
 +  coding->result = result;
 +  switch (result)
      {
 -      coding->result = CODING_FINISH_INSUFFICIENT_CMP;
 -      return -1;
 +    case CODING_RESULT_INSUFFICIENT_SRC:
 +      Vlast_code_conversion_error = Qinsufficient_source;
 +      break;
 +    case CODING_RESULT_INCONSISTENT_EOL:
 +      Vlast_code_conversion_error = Qinconsistent_eol;
 +      break;
 +    case CODING_RESULT_INVALID_SRC:
 +      Vlast_code_conversion_error = Qinvalid_source;
 +      break;
 +    case CODING_RESULT_INTERRUPT:
 +      Vlast_code_conversion_error = Qinterrupted;
 +      break;
 +    case CODING_RESULT_INSUFFICIENT_MEM:
 +      Vlast_code_conversion_error = Qinsufficient_memory;
 +      break;
 +    default:
 +      Vlast_code_conversion_error = intern ("Unknown error");
      }
 +}
  
 -  ONE_MORE_BYTE (c);
 -  if (c - 0xF0 >= COMPOSITION_RELATIVE
 -         && c - 0xF0 <= COMPOSITION_WITH_RULE_ALTCHARS)
 -    {
 -      int with_rule;
 +#define CODING_DECODE_CHAR(coding, src, src_base, src_end, charset, code, c) \
 +  do {                                                                             \
 +    charset_map_loaded = 0;                                                \
 +    c = DECODE_CHAR (charset, code);                                       \
 +    if (charset_map_loaded)                                                \
 +      {                                                                            \
 +      const unsigned char *orig = coding->source;                          \
 +      EMACS_INT offset;                                                    \
 +                                                                           \
 +      coding_set_source (coding);                                          \
 +      offset = coding->source - orig;                                      \
 +      src += offset;                                                       \
 +      src_base += offset;                                                  \
 +      src_end += offset;                                                   \
 +      }                                                                            \
 +  } while (0)
  
 -      method = c - 0xF0;
 -      with_rule = (method == COMPOSITION_WITH_RULE
 -                 || method == COMPOSITION_WITH_RULE_ALTCHARS);
 -      ONE_MORE_BYTE (c);
 -      data_len = c - 0xA0;
 -      if (data_len < 4
 -        || src_base + data_len > src_end)
 -      return 0;
 -      ONE_MORE_BYTE (c);
 -      nchars = c - 0xA0;
 -      if (c < 1)
 -      return 0;
 -      for (ncomponent = 0; src < src_base + data_len; ncomponent++)
 -      {
 -        /* If it is longer than this, it can't be valid.  */
 -        if (ncomponent >= COMPOSITION_DATA_MAX_BUNCH_LENGTH)
 -          return 0;
  
 -        if (ncomponent % 2 && with_rule)
 -          {
 -            ONE_MORE_BYTE (gref);
 -            gref -= 32;
 -            ONE_MORE_BYTE (nref);
 -            nref -= 32;
 -            c = COMPOSITION_ENCODE_RULE (gref, nref);
 -          }
 -        else
 -          {
 -            int bytes;
 -            if (UNIBYTE_STR_AS_MULTIBYTE_P (src, src_end - src, bytes)
 -                || (coding->flags /* We are recovering a file.  */
 -                    && src[0] == LEADING_CODE_8_BIT_CONTROL
 -                    && ! CHAR_HEAD_P (src[1])))
 -              c = STRING_CHAR (src, bytes);
 -            else
 -              c = *src, bytes = 1;
 -            src += bytes;
 -          }
 -        component[ncomponent] = c;
 -      }
 +#define ASSURE_DESTINATION(bytes)                             \
 +  do {                                                                \
 +    if (dst + (bytes) >= dst_end)                             \
 +      {                                                               \
 +      int more_bytes = charbuf_end - charbuf + (bytes);       \
 +                                                              \
 +      dst = alloc_destination (coding, more_bytes, dst);      \
 +      dst_end = coding->destination + coding->dst_bytes;      \
 +      }                                                               \
 +  } while (0)
 +
 +
 +
 +static void
 +coding_set_source (coding)
 +     struct coding_system *coding;
 +{
 +  if (BUFFERP (coding->src_object))
 +    {
 +      struct buffer *buf = XBUFFER (coding->src_object);
 +
 +      if (coding->src_pos < 0)
 +      coding->source = BUF_GAP_END_ADDR (buf) + coding->src_pos_byte;
 +      else
 +      coding->source = BUF_BYTE_ADDRESS (buf, coding->src_pos_byte);
      }
 -  else if (c >= 0x80)
 +  else if (STRINGP (coding->src_object))
      {
 -      /* This may be an old Emacs 20 style format.  See the comment at
 -       the section 2 of this file.  */
 -      while (src < src_end && !CHAR_HEAD_P (*src)) src++;
 -      if (src == src_end
 -        && !(coding->mode & CODING_MODE_LAST_BLOCK))
 -      goto label_end_of_loop;
 +      coding->source = SDATA (coding->src_object) + coding->src_pos_byte;
 +    }
 +  else
 +    /* Otherwise, the source is C string and is never relocated
 +       automatically.  Thus we don't have to update anything.  */
 +    ;
 +}
  
 -      src_end = src;
 -      src = src_base + 1;
 -      if (c < 0xC0)
 +static void
 +coding_set_destination (coding)
 +     struct coding_system *coding;
 +{
 +  if (BUFFERP (coding->dst_object))
 +    {
 +      if (coding->src_pos < 0)
        {
 -        method = COMPOSITION_RELATIVE;
 -        for (ncomponent = 0; ncomponent < MAX_COMPOSITION_COMPONENTS;)
 -          {
 -            DECODE_EMACS_MULE_COMPOSITION_CHAR (c, bufp);
 -            if (c < 0)
 -              break;
 -            component[ncomponent++] = c;
 -          }
 -        if (ncomponent < 2)
 -          return 0;
 -        nchars = ncomponent;
 +        coding->destination = BEG_ADDR + coding->dst_pos_byte - 1;
 +        coding->dst_bytes = (GAP_END_ADDR
 +                             - (coding->src_bytes - coding->consumed)
 +                             - coding->destination);
        }
 -      else if (c == 0xFF)
 +      else
        {
 -        method = COMPOSITION_WITH_RULE;
 -        src++;
 -        DECODE_EMACS_MULE_COMPOSITION_CHAR (c, bufp);
 -        if (c < 0)
 -          return 0;
 -        component[0] = c;
 -        for (ncomponent = 1;
 -             ncomponent < MAX_COMPOSITION_COMPONENTS * 2 - 1;)
 -          {
 -            DECODE_EMACS_MULE_COMPOSITION_RULE (c);
 -            if (c < 0)
 -              break;
 -            component[ncomponent++] = c;
 -            DECODE_EMACS_MULE_COMPOSITION_CHAR (c, bufp);
 -            if (c < 0)
 -              break;
 -            component[ncomponent++] = c;
 -          }
 -        if (ncomponent < 3)
 -          return 0;
 -        nchars = (ncomponent + 1) / 2;
 +        /* We are sure that coding->dst_pos_byte is before the gap
 +           of the buffer. */
 +        coding->destination = (BUF_BEG_ADDR (XBUFFER (coding->dst_object))
 +                               + coding->dst_pos_byte - 1);
 +        coding->dst_bytes = (BUF_GAP_END_ADDR (XBUFFER (coding->dst_object))
 +                             - coding->destination);
        }
 -      else
 -      return 0;
      }
    else
 -    return 0;
 +    /* Otherwise, the destination is C string and is never relocated
 +       automatically.  Thus we don't have to update anything.  */
 +    ;
 +}
 +
 +
 +static void
 +coding_alloc_by_realloc (coding, bytes)
 +     struct coding_system *coding;
 +     EMACS_INT bytes;
 +{
 +  coding->destination = (unsigned char *) xrealloc (coding->destination,
 +                                                  coding->dst_bytes + bytes);
 +  coding->dst_bytes += bytes;
 +}
  
 -  if (buf == bufp || dst + (bufp - buf) <= (dst_bytes ? dst_end : src))
 +static void
 +coding_alloc_by_making_gap (coding, bytes)
 +     struct coding_system *coding;
 +     EMACS_INT bytes;
 +{
 +  if (BUFFERP (coding->dst_object)
 +      && EQ (coding->src_object, coding->dst_object))
      {
 -      CODING_ADD_COMPOSITION_START (coding, coding->produced_char, method);
 -      for (i = 0; i < ncomponent; i++)
 -      CODING_ADD_COMPOSITION_COMPONENT (coding, component[i]);
 -      CODING_ADD_COMPOSITION_END (coding, coding->produced_char + nchars);
 -      if (buf < bufp)
 -      {
 -        unsigned char *p = buf;
 -        EMIT_BYTES (p, bufp);
 -        *destination += bufp - buf;
 -        coding->produced_char += nchars;
 -      }
 -      return (src - src_base);
 +      EMACS_INT add = coding->src_bytes - coding->consumed;
 +
 +      GAP_SIZE -= add; ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
 +      make_gap (bytes);
 +      GAP_SIZE += add; ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
 +    }
-   else
++  else if (c >= 0x80)
 +    {
 +      Lisp_Object this_buffer;
 +
 +      this_buffer = Fcurrent_buffer ();
 +      set_buffer_internal (XBUFFER (coding->dst_object));
 +      make_gap (bytes);
 +      set_buffer_internal (XBUFFER (this_buffer));
      }
 - label_end_of_loop:
 -  return -1;
  }
  
 -/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
  
 -static void
 -decode_coding_emacs_mule (coding, source, destination, src_bytes, dst_bytes)
 +static unsigned char *
 +alloc_destination (coding, nbytes, dst)
       struct coding_system *coding;
 -     const unsigned char *source;
 -     unsigned char *destination;
 -     int src_bytes, dst_bytes;
 +     EMACS_INT nbytes;
 +     unsigned char *dst;
  {
 -  const unsigned char *src = source;
 -  const unsigned char *src_end = source + src_bytes;
 -  unsigned char *dst = destination;
 -  unsigned char *dst_end = destination + dst_bytes;
 -  /* SRC_BASE remembers the start position in source in each loop.
 -     The loop will be exited when there's not enough source code, or
 -     when there's not enough destination area to produce a
 -     character.  */
 -  const unsigned char *src_base;
 +  EMACS_INT offset = dst - coding->destination;
 +
 +  if (BUFFERP (coding->dst_object))
 +    coding_alloc_by_making_gap (coding, nbytes);
 +  else
 +    coding_alloc_by_realloc (coding, nbytes);
 +  record_conversion_result (coding, CODING_RESULT_SUCCESS);
 +  coding_set_destination (coding);
 +  dst = coding->destination + offset;
 +  return dst;
 +}
 +
 +/** Macros for annotations.  */
 +
 +/* Maximum length of annotation data (sum of annotations for
 +   composition and charset).  */
 +#define MAX_ANNOTATION_LENGTH (4 + (MAX_COMPOSITION_COMPONENTS * 2) - 1 + 4)
 +
 +/* An annotation data is stored in the array coding->charbuf in this
 +   format:
 +     [ -LENGTH ANNOTATION_MASK NCHARS ... ]
 +   LENGTH is the number of elements in the annotation.
 +   ANNOTATION_MASK is one of CODING_ANNOTATE_XXX_MASK.
 +   NCHARS is the number of characters in the text annotated.
 +
 +   The format of the following elements depend on ANNOTATION_MASK.
 +
 +   In the case of CODING_ANNOTATE_COMPOSITION_MASK, these elements
 +   follows:
 +     ... METHOD [ COMPOSITION-COMPONENTS ... ]
 +   METHOD is one of enum composition_method.
 +   Optionnal COMPOSITION-COMPONENTS are characters and composition
 +   rules.
 +
 +   In the case of CODING_ANNOTATE_CHARSET_MASK, one element CHARSET-ID
 +   follows.  */
 +
 +#define ADD_ANNOTATION_DATA(buf, len, mask, nchars)   \
 +  do {                                                        \
 +    *(buf)++ = -(len);                                        \
 +    *(buf)++ = (mask);                                        \
 +    *(buf)++ = (nchars);                              \
 +    coding->annotated = 1;                            \
 +  } while (0);
 +
 +#define ADD_COMPOSITION_DATA(buf, nchars, method)                         \
 +  do {                                                                            \
 +    ADD_ANNOTATION_DATA (buf, 4, CODING_ANNOTATE_COMPOSITION_MASK, nchars); \
 +    *buf++ = method;                                                      \
 +  } while (0)
 +
 +
 +#define ADD_CHARSET_DATA(buf, nchars, id)                             \
 +  do {                                                                        \
 +    ADD_ANNOTATION_DATA (buf, 4, CODING_ANNOTATE_CHARSET_MASK, nchars);       \
 +    *buf++ = id;                                                      \
 +  } while (0)
 +
 +\f
 +/*** 2. Emacs' internal format (emacs-utf-8) ***/
 +
 +
 +
 +\f
 +/*** 3. UTF-8 ***/
 +
 +/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
 +   Check if a text is encoded in UTF-8.  If it is, return 1, else
 +   return 0.  */
 +
 +#define UTF_8_1_OCTET_P(c)         ((c) < 0x80)
 +#define UTF_8_EXTRA_OCTET_P(c)     (((c) & 0xC0) == 0x80)
 +#define UTF_8_2_OCTET_LEADING_P(c) (((c) & 0xE0) == 0xC0)
 +#define UTF_8_3_OCTET_LEADING_P(c) (((c) & 0xF0) == 0xE0)
 +#define UTF_8_4_OCTET_LEADING_P(c) (((c) & 0xF8) == 0xF0)
 +#define UTF_8_5_OCTET_LEADING_P(c) (((c) & 0xFC) == 0xF8)
 +
 +static int
 +detect_coding_utf_8 (coding, detect_info)
 +     struct coding_system *coding;
 +     struct coding_detection_info *detect_info;
 +{
 +  const unsigned char *src = coding->source, *src_base;
 +  const unsigned char *src_end = coding->source + coding->src_bytes;
 +  int multibytep = coding->src_multibyte;
 +  int consumed_chars = 0;
 +  int found = 0;
 +
 +  detect_info->checked |= CATEGORY_MASK_UTF_8;
 +  /* A coding system of this category is always ASCII compatible.  */
 +  src += coding->head_ascii;
  
 -  coding->produced_char = 0;
 -  while ((src_base = src) < src_end)
 +  while (1)
      {
 -      unsigned char tmp[MAX_MULTIBYTE_LENGTH];
 -      const unsigned char *p;
 -      int bytes;
 +      int c, c1, c2, c3, c4;
  
 -      if (*src == '\r')
 +      src_base = src;
 +      ONE_MORE_BYTE (c);
 +      if (c < 0 || UTF_8_1_OCTET_P (c))
 +      continue;
 +      ONE_MORE_BYTE (c1);
 +      if (c1 < 0 || ! UTF_8_EXTRA_OCTET_P (c1))
 +      break;
 +      if (UTF_8_2_OCTET_LEADING_P (c))
        {
 -        int c = *src++;
 -
 -        if (coding->eol_type == CODING_EOL_CR)
 -          c = '\n';
 -        else if (coding->eol_type == CODING_EOL_CRLF)
 -          {
 -            ONE_MORE_BYTE (c);
 -            if (c != '\n')
 -              {
 -                src--;
 -                c = '\r';
 -              }
 -          }
 -        *dst++ = c;
 -        coding->produced_char++;
 +        found = CATEGORY_MASK_UTF_8;
          continue;
        }
 -      else if (*src == '\n')
 +      ONE_MORE_BYTE (c2);
 +      if (c2 < 0 || ! UTF_8_EXTRA_OCTET_P (c2))
 +      break;
 +      if (UTF_8_3_OCTET_LEADING_P (c))
        {
 -        if ((coding->eol_type == CODING_EOL_CR
 -             || coding->eol_type == CODING_EOL_CRLF)
 -            && coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
 -          {
 -            coding->result = CODING_FINISH_INCONSISTENT_EOL;
 -            goto label_end_of_loop;
 -          }
 -        *dst++ = *src++;
 -        coding->produced_char++;
 +        found = CATEGORY_MASK_UTF_8;
          continue;
        }
 -      else if (*src == 0x80 && coding->cmp_data)
 +      ONE_MORE_BYTE (c3);
 +      if (c3 < 0 || ! UTF_8_EXTRA_OCTET_P (c3))
 +      break;
 +      if (UTF_8_4_OCTET_LEADING_P (c))
        {
 -        /* Start of composition data.  */
 -        int consumed  = decode_composition_emacs_mule (coding, src, src_end,
 -                                                       &dst, dst_end,
 -                                                       dst_bytes);
 -        if (consumed < 0)
 -          goto label_end_of_loop;
 -        else if (consumed > 0)
 -          {
 -            src += consumed;
 -            continue;
 -          }
 -        bytes = CHAR_STRING (*src, tmp);
 -        p = tmp;
 -        src++;
 +        found = CATEGORY_MASK_UTF_8;
 +        continue;
        }
 -      else if (UNIBYTE_STR_AS_MULTIBYTE_P (src, src_end - src, bytes)
 -             || (coding->flags /* We are recovering a file.  */
 -                 && src[0] == LEADING_CODE_8_BIT_CONTROL
 -                 && ! CHAR_HEAD_P (src[1])))
 +      ONE_MORE_BYTE (c4);
 +      if (c4 < 0 || ! UTF_8_EXTRA_OCTET_P (c4))
 +      break;
 +      if (UTF_8_5_OCTET_LEADING_P (c))
        {
 -        p = src;
 -        src += bytes;
 +        found = CATEGORY_MASK_UTF_8;
 +        continue;
        }
 -      else
 -      {
 -        int i, c;
 +      break;
 +    }
 +  detect_info->rejected |= CATEGORY_MASK_UTF_8;
 +  return 0;
  
 -        bytes = BYTES_BY_CHAR_HEAD (*src);
 -        src++;
 -        for (i = 1; i < bytes; i++)
 -          {
 -            ONE_MORE_BYTE (c);
 -            if (CHAR_HEAD_P (c))
 -              break;
 -          }
 -        if (i < bytes)
 -          {
 -            bytes = CHAR_STRING (*src_base, tmp);
 -            p = tmp;
 -            src = src_base + 1;
 -          }
 -        else
 -          {
 -            p = src_base;
 -          }
 -      }
 -      if (dst + bytes >= (dst_bytes ? dst_end : src))
 -      {
 -        coding->result = CODING_FINISH_INSUFFICIENT_DST;
 -        break;
 -      }
 -      while (bytes--) *dst++ = *p++;
 -      coding->produced_char++;
 + no_more_source:
 +  if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
 +    {
 +      detect_info->rejected |= CATEGORY_MASK_UTF_8;
 +      return 0;
      }
 - label_end_of_loop:
 -  coding->consumed = coding->consumed_char = src_base - source;
 -  coding->produced = dst - destination;
 +  detect_info->found |= found;
 +  return 1;
  }
  
  
 -/* Encode composition data stored at DATA into a special byte sequence
 -   starting by 0x80.  Update CODING->cmp_data_start and maybe
 -   CODING->cmp_data for the next call.  */
 -
 -#define ENCODE_COMPOSITION_EMACS_MULE(coding, data)                   \
 -  do {                                                                        \
 -    unsigned char buf[1024], *p0 = buf, *p;                           \
 -    int len = data[0];                                                        \
 -    int i;                                                            \
 -                                                                      \
 -    buf[0] = 0x80;                                                    \
 -    buf[1] = 0xF0 + data[3];  /* METHOD */                            \
 -    buf[3] = 0xA0 + (data[2] - data[1]); /* COMPOSED-CHARS */         \
 -    p = buf + 4;                                                      \
 -    if (data[3] == COMPOSITION_WITH_RULE                              \
 -      || data[3] == COMPOSITION_WITH_RULE_ALTCHARS)                   \
 -      {                                                                       \
 -      p += CHAR_STRING (data[4], p);                                  \
 -      for (i = 5; i < len; i += 2)                                    \
 -        {                                                             \
 -          int gref, nref;                                             \
 -           COMPOSITION_DECODE_RULE (data[i], gref, nref);             \
 -          *p++ = 0x20 + gref;                                         \
 -          *p++ = 0x20 + nref;                                         \
 -          p += CHAR_STRING (data[i + 1], p);                          \
 -        }                                                             \
 -      }                                                                       \
 -    else                                                              \
 -      {                                                                       \
 -      for (i = 4; i < len; i++)                                       \
 -        p += CHAR_STRING (data[i], p);                                \
 -      }                                                                       \
 -    buf[2] = 0xA0 + (p - buf);        /* COMPONENTS-BYTES */                  \
 -                                                                      \
 -    if (dst + (p - buf) + 4 > (dst_bytes ? dst_end : src))            \
 -      {                                                                       \
 -      coding->result = CODING_FINISH_INSUFFICIENT_DST;                \
 -      goto label_end_of_loop;                                         \
 -      }                                                                       \
 -    while (p0 < p)                                                    \
 -      *dst++ = *p0++;                                                 \
 -    coding->cmp_data_start += data[0];                                        \
 -    if (coding->cmp_data_start == coding->cmp_data->used              \
 -      && coding->cmp_data->next)                                      \
 -      {                                                                       \
 -      coding->cmp_data = coding->cmp_data->next;                      \
 -      coding->cmp_data_start = 0;                                     \
 -      }                                                                       \
 -  } while (0)
 -
 -
 -static void encode_eol P_ ((struct coding_system *, const unsigned char *,
 -                          unsigned char *, int, int));
 -
  static void
 -encode_coding_emacs_mule (coding, source, destination, src_bytes, dst_bytes)
 +decode_coding_utf_8 (coding)
       struct coding_system *coding;
 -     const unsigned char *source;
 -     unsigned char *destination;
 -     int src_bytes, dst_bytes;
  {
 -  const unsigned char *src = source;
 -  const unsigned char *src_end = source + src_bytes;
 -  unsigned char *dst = destination;
 -  unsigned char *dst_end = destination + dst_bytes;
 +  const unsigned char *src = coding->source + coding->consumed;
 +  const unsigned char *src_end = coding->source + coding->src_bytes;
    const unsigned char *src_base;
 -  int c;
 -  int char_offset;
 -  int *data;
 -
 -  Lisp_Object translation_table;
 +  int *charbuf = coding->charbuf + coding->charbuf_used;
 +  int *charbuf_end = coding->charbuf + coding->charbuf_size;
 +  int consumed_chars = 0, consumed_chars_base;
 +  int multibytep = coding->src_multibyte;
 +  Lisp_Object attr, charset_list;
  
 -  translation_table = Qnil;
 +  CODING_GET_INFO (coding, attr, charset_list);
  
 -  /* Optimization for the case that there's no composition.  */
 -  if (!coding->cmp_data || coding->cmp_data->used == 0)
 -    {
 -      encode_eol (coding, source, destination, src_bytes, dst_bytes);
 -      return;
 -    }
 -
 -  char_offset = coding->cmp_data->char_offset;
 -  data = coding->cmp_data->data + coding->cmp_data_start;
    while (1)
      {
 +      int c, c1, c2, c3, c4, c5;
 +
        src_base = src;
 +      consumed_chars_base = consumed_chars;
  
 -      /* If SRC starts a composition, encode the information about the
 -       composition in advance.  */
 -      if (coding->cmp_data_start < coding->cmp_data->used
 -        && char_offset + coding->consumed_char == data[1])
 +      if (charbuf >= charbuf_end)
 +      break;
 +
 +      ONE_MORE_BYTE (c1);
 +      if (c1 < 0)
        {
 -        ENCODE_COMPOSITION_EMACS_MULE (coding, data);
 -        char_offset = coding->cmp_data->char_offset;
 -        data = coding->cmp_data->data + coding->cmp_data_start;
 +        c = - c1;
        }
 -
 -      ONE_MORE_CHAR (c);
 -      if (c == '\n' && (coding->eol_type == CODING_EOL_CRLF
 -                      || coding->eol_type == CODING_EOL_CR))
 +      else if (UTF_8_1_OCTET_P(c1))
        {
 -        if (coding->eol_type == CODING_EOL_CRLF)
 -          EMIT_TWO_BYTES ('\r', c);
 -        else
 -          EMIT_ONE_BYTE ('\r');
 +        c = c1;
        }
 -      else if (SINGLE_BYTE_CHAR_P (c))
 +      else
        {
 -        if (coding->flags && ! ASCII_BYTE_P (c))
 +        ONE_MORE_BYTE (c2);
 +        if (c2 < 0 || ! UTF_8_EXTRA_OCTET_P (c2))
 +          goto invalid_code;
 +        if (UTF_8_2_OCTET_LEADING_P (c1))
            {
 -            /* As we are auto saving, retain the multibyte form for
 -               8-bit chars.  */
 -            unsigned char buf[MAX_MULTIBYTE_LENGTH];
 -            int bytes = CHAR_STRING (c, buf);
 -
 -            if (bytes == 1)
 -              EMIT_ONE_BYTE (buf[0]);
 -            else
 -              EMIT_TWO_BYTES (buf[0], buf[1]);
 +            c = ((c1 & 0x1F) << 6) | (c2 & 0x3F);
 +            /* Reject overlong sequences here and below.  Encoders
 +               producing them are incorrect, they can be misleading,
 +               and they mess up read/write invariance.  */
 +            if (c < 128)
 +              goto invalid_code;
            }
          else
 -          EMIT_ONE_BYTE (c);
 +          {
 +            ONE_MORE_BYTE (c3);
 +            if (c3 < 0 || ! UTF_8_EXTRA_OCTET_P (c3))
 +              goto invalid_code;
 +            if (UTF_8_3_OCTET_LEADING_P (c1))
 +              {
 +                c = (((c1 & 0xF) << 12)
 +                     | ((c2 & 0x3F) << 6) | (c3 & 0x3F));
 +                if (c < 0x800
 +                    || (c >= 0xd800 && c < 0xe000)) /* surrogates (invalid) */
 +                  goto invalid_code;
 +              }
 +            else
 +              {
 +                ONE_MORE_BYTE (c4);
 +                if (c4 < 0 || ! UTF_8_EXTRA_OCTET_P (c4))
 +                  goto invalid_code;
 +                if (UTF_8_4_OCTET_LEADING_P (c1))
 +                  {
 +                  c = (((c1 & 0x7) << 18) | ((c2 & 0x3F) << 12)
 +                       | ((c3 & 0x3F) << 6) | (c4 & 0x3F));
 +                  if (c < 0x10000)
 +                    goto invalid_code;
 +                  }
 +                else
 +                  {
 +                    ONE_MORE_BYTE (c5);
 +                    if (c5 < 0 || ! UTF_8_EXTRA_OCTET_P (c5))
 +                      goto invalid_code;
 +                    if (UTF_8_5_OCTET_LEADING_P (c1))
 +                      {
 +                        c = (((c1 & 0x3) << 24) | ((c2 & 0x3F) << 18)
 +                             | ((c3 & 0x3F) << 12) | ((c4 & 0x3F) << 6)
 +                             | (c5 & 0x3F));
 +                        if ((c > MAX_CHAR) || (c < 0x200000))
 +                          goto invalid_code;
 +                      }
 +                    else
 +                      goto invalid_code;
 +                  }
 +              }
 +          }
        }
 -      else
 -      EMIT_BYTES (src_base, src);
 -      coding->consumed_char++;
 -    }
 - label_end_of_loop:
 -  coding->consumed = src_base - source;
 -  coding->produced = coding->produced_char = dst - destination;
 -  return;
 -}
  
 -\f
 -/*** 3. ISO2022 handlers ***/
 +      *charbuf++ = c;
 +      continue;
  
 -/* The following note describes the coding system ISO2022 briefly.
 -   Since the intention of this note is to help understand the
 -   functions in this file, some parts are NOT ACCURATE or are OVERLY
 -   SIMPLIFIED.  For thorough understanding, please refer to the
 -   original document of ISO2022.  This is equivalent to the standard
 -   ECMA-35, obtainable from <URL:http://www.ecma.ch/> (*).
 +    invalid_code:
 +      src = src_base;
 +      consumed_chars = consumed_chars_base;
 +      ONE_MORE_BYTE (c);
 +      *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
 +      coding->errors++;
 +    }
++  else
++    return 0;
  
 -   ISO2022 provides many mechanisms to encode several character sets
 -   in 7-bit and 8-bit environments.  For 7-bit environments, all text
 -   is encoded using bytes less than 128.  This may make the encoded
 -   text a little bit longer, but the text passes more easily through
 -   several types of gateway, some of which strip off the MSB (Most
 -   Significant Bit).
 + no_more_source:
 +  coding->consumed_char += consumed_chars_base;
 +  coding->consumed = src_base - coding->source;
 +  coding->charbuf_used = charbuf - coding->charbuf;
 +}
  
 -   There are two kinds of character sets: control character sets and
 -   graphic character sets.  The former contain control characters such
 -   as `newline' and `escape' to provide control functions (control
 -   functions are also provided by escape sequences).  The latter
 -   contain graphic characters such as 'A' and '-'.  Emacs recognizes
 -   two control character sets and many graphic character sets.
  
 -   Graphic character sets are classified into one of the following
 -   four classes, according to the number of bytes (DIMENSION) and
 -   number of characters in one dimension (CHARS) of the set:
 -   - DIMENSION1_CHARS94
 -   - DIMENSION1_CHARS96
 -   - DIMENSION2_CHARS94
 -   - DIMENSION2_CHARS96
 +static int
 +encode_coding_utf_8 (coding)
 +     struct coding_system *coding;
 +{
 +  int multibytep = coding->dst_multibyte;
 +  int *charbuf = coding->charbuf;
 +  int *charbuf_end = charbuf + coding->charbuf_used;
 +  unsigned char *dst = coding->destination + coding->produced;
 +  unsigned char *dst_end = coding->destination + coding->dst_bytes;
 +  int produced_chars = 0;
 +  int c;
  
 -   In addition, each character set is assigned an identification tag,
 -   unique for each set, called the "final character" (denoted as <F>
 -   hereafter).  The <F> of each character set is decided by ECMA(*)
 -   when it is registered in ISO.  The code range of <F> is 0x30..0x7F
 -   (0x30..0x3F are for private use only).
 +  if (multibytep)
 +    {
 +      int safe_room = MAX_MULTIBYTE_LENGTH * 2;
  
 -   Note (*): ECMA = European Computer Manufacturers Association
 +      while (charbuf < charbuf_end)
 +      {
 +        unsigned char str[MAX_MULTIBYTE_LENGTH], *p, *pend = str;
  
 -   Here are examples of graphic character sets [NAME(<F>)]:
 +        ASSURE_DESTINATION (safe_room);
 +        c = *charbuf++;
 +        if (CHAR_BYTE8_P (c))
 +          {
 +            c = CHAR_TO_BYTE8 (c);
 +            EMIT_ONE_BYTE (c);
 +          }
 +        else
 +          {
 +            CHAR_STRING_ADVANCE (c, pend);
 +            for (p = str; p < pend; p++)
 +              EMIT_ONE_BYTE (*p);
 +          }
 +      }
 +    }
 +  else
 +    {
 +      int safe_room = MAX_MULTIBYTE_LENGTH;
 +
 +      while (charbuf < charbuf_end)
 +      {
 +        ASSURE_DESTINATION (safe_room);
 +        c = *charbuf++;
 +        if (CHAR_BYTE8_P (c))
 +          *dst++ = CHAR_TO_BYTE8 (c);
 +        else
 +          dst += CHAR_STRING (c, dst);
 +        produced_chars++;
 +      }
 +    }
 +  record_conversion_result (coding, CODING_RESULT_SUCCESS);
 +  coding->produced_char += produced_chars;
 +  coding->produced = dst - coding->destination;
 +  return 0;
 +}
 +
 +
 +/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
 +   Check if a text is encoded in one of UTF-16 based coding systems.
 +   If it is, return 1, else return 0.  */
 +
 +#define UTF_16_HIGH_SURROGATE_P(val) \
 +  (((val) & 0xFC00) == 0xD800)
 +
 +#define UTF_16_LOW_SURROGATE_P(val) \
 +  (((val) & 0xFC00) == 0xDC00)
 +
 +#define UTF_16_INVALID_P(val) \
 +  (((val) == 0xFFFE)          \
 +   || ((val) == 0xFFFF)               \
 +   || UTF_16_LOW_SURROGATE_P (val))
 +
 +
 +static int
 +detect_coding_utf_16 (coding, detect_info)
 +     struct coding_system *coding;
 +     struct coding_detection_info *detect_info;
 +{
 +  const unsigned char *src = coding->source, *src_base = src;
 +  const unsigned char *src_end = coding->source + coding->src_bytes;
 +  int multibytep = coding->src_multibyte;
 +  int consumed_chars = 0;
 +  int c1, c2;
 +
 +  detect_info->checked |= CATEGORY_MASK_UTF_16;
 +  if (coding->mode & CODING_MODE_LAST_BLOCK
 +      && (coding->src_chars & 1))
 +    {
 +      detect_info->rejected |= CATEGORY_MASK_UTF_16;
 +      return 0;
 +    }
 +
 +  ONE_MORE_BYTE (c1);
 +  ONE_MORE_BYTE (c2);
 +  if ((c1 == 0xFF) && (c2 == 0xFE))
 +    {
 +      detect_info->found |= (CATEGORY_MASK_UTF_16_LE
 +                           | CATEGORY_MASK_UTF_16_AUTO);
 +      detect_info->rejected |= (CATEGORY_MASK_UTF_16_BE
 +                              | CATEGORY_MASK_UTF_16_BE_NOSIG
 +                              | CATEGORY_MASK_UTF_16_LE_NOSIG);
 +    }
 +  else if ((c1 == 0xFE) && (c2 == 0xFF))
 +    {
 +      detect_info->found |= (CATEGORY_MASK_UTF_16_BE
 +                           | CATEGORY_MASK_UTF_16_AUTO);
 +      detect_info->rejected |= (CATEGORY_MASK_UTF_16_LE
 +                              | CATEGORY_MASK_UTF_16_BE_NOSIG
 +                              | CATEGORY_MASK_UTF_16_LE_NOSIG);
 +    }
 +  else if (c1 >= 0 && c2 >= 0)
 +    {
 +      detect_info->rejected
 +      |= (CATEGORY_MASK_UTF_16_BE | CATEGORY_MASK_UTF_16_LE);
 +    }
 + no_more_source:
 +  return 1;
 +}
 +
 +static void
 +decode_coding_utf_16 (coding)
 +     struct coding_system *coding;
 +{
 +  const unsigned char *src = coding->source + coding->consumed;
 +  const unsigned char *src_end = coding->source + coding->src_bytes;
 +  const unsigned char *src_base;
 +  int *charbuf = coding->charbuf + coding->charbuf_used;
 +  int *charbuf_end = coding->charbuf + coding->charbuf_size;
 +  int consumed_chars = 0, consumed_chars_base;
 +  int multibytep = coding->src_multibyte;
 +  enum utf_16_bom_type bom = CODING_UTF_16_BOM (coding);
 +  enum utf_16_endian_type endian = CODING_UTF_16_ENDIAN (coding);
 +  int surrogate = CODING_UTF_16_SURROGATE (coding);
 +  Lisp_Object attr, charset_list;
 +
 +  CODING_GET_INFO (coding, attr, charset_list);
 +
 +  if (bom == utf_16_with_bom)
 +    {
 +      int c, c1, c2;
 +
 +      src_base = src;
 +      ONE_MORE_BYTE (c1);
 +      ONE_MORE_BYTE (c2);
 +      c = (c1 << 8) | c2;
 +
 +      if (endian == utf_16_big_endian
 +        ? c != 0xFEFF : c != 0xFFFE)
 +      {
 +        /* The first two bytes are not BOM.  Treat them as bytes
 +           for a normal character.  */
 +        src = src_base;
 +        coding->errors++;
 +      }
 +      CODING_UTF_16_BOM (coding) = utf_16_without_bom;
 +    }
 +  else if (bom == utf_16_detect_bom)
 +    {
 +      /* We have already tried to detect BOM and failed in
 +       detect_coding.  */
 +      CODING_UTF_16_BOM (coding) = utf_16_without_bom;
 +    }
 +
 +  while (1)
 +    {
 +      int c, c1, c2;
 +
 +      src_base = src;
 +      consumed_chars_base = consumed_chars;
 +
 +      if (charbuf + 2 >= charbuf_end)
 +      break;
 +
 +      ONE_MORE_BYTE (c1);
 +      if (c1 < 0)
 +      {
 +        *charbuf++ = -c1;
 +        continue;
 +      }
 +      ONE_MORE_BYTE (c2);
 +      if (c2 < 0)
 +      {
 +        *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
 +        *charbuf++ = -c2;
 +        continue;
 +      }
 +      c = (endian == utf_16_big_endian
 +         ? ((c1 << 8) | c2) : ((c2 << 8) | c1));
 +      if (surrogate)
 +      {
 +        if (! UTF_16_LOW_SURROGATE_P (c))
 +          {
 +            if (endian == utf_16_big_endian)
 +              c1 = surrogate >> 8, c2 = surrogate & 0xFF;
 +            else
 +              c1 = surrogate & 0xFF, c2 = surrogate >> 8;
 +            *charbuf++ = c1;
 +            *charbuf++ = c2;
 +            coding->errors++;
 +            if (UTF_16_HIGH_SURROGATE_P (c))
 +              CODING_UTF_16_SURROGATE (coding) = surrogate = c;
 +            else
 +              *charbuf++ = c;
 +          }
 +        else
 +          {
 +            c = ((surrogate - 0xD800) << 10) | (c - 0xDC00);
 +            CODING_UTF_16_SURROGATE (coding) = surrogate = 0;
 +            *charbuf++ = 0x10000 + c;
 +          }
 +      }
 +      else
 +      {
 +        if (UTF_16_HIGH_SURROGATE_P (c))
 +          CODING_UTF_16_SURROGATE (coding) = surrogate = c;
 +        else
 +          *charbuf++ = c;
 +      }
 +    }
 +
 + no_more_source:
 +  coding->consumed_char += consumed_chars_base;
 +  coding->consumed = src_base - coding->source;
 +  coding->charbuf_used = charbuf - coding->charbuf;
 +}
 +
 +static int
 +encode_coding_utf_16 (coding)
 +     struct coding_system *coding;
 +{
 +  int multibytep = coding->dst_multibyte;
 +  int *charbuf = coding->charbuf;
 +  int *charbuf_end = charbuf + coding->charbuf_used;
 +  unsigned char *dst = coding->destination + coding->produced;
 +  unsigned char *dst_end = coding->destination + coding->dst_bytes;
 +  int safe_room = 8;
 +  enum utf_16_bom_type bom = CODING_UTF_16_BOM (coding);
 +  int big_endian = CODING_UTF_16_ENDIAN (coding) == utf_16_big_endian;
 +  int produced_chars = 0;
 +  Lisp_Object attrs, charset_list;
 +  int c;
 +
 +  CODING_GET_INFO (coding, attrs, charset_list);
 +
 +  if (bom != utf_16_without_bom)
 +    {
 +      ASSURE_DESTINATION (safe_room);
 +      if (big_endian)
 +      EMIT_TWO_BYTES (0xFE, 0xFF);
 +      else
 +      EMIT_TWO_BYTES (0xFF, 0xFE);
 +      CODING_UTF_16_BOM (coding) = utf_16_without_bom;
 +    }
 +
 +  while (charbuf < charbuf_end)
 +    {
 +      ASSURE_DESTINATION (safe_room);
 +      c = *charbuf++;
 +      if (c >= MAX_UNICODE_CHAR)
 +      c = coding->default_char;
 +
 +      if (c < 0x10000)
 +      {
 +        if (big_endian)
 +          EMIT_TWO_BYTES (c >> 8, c & 0xFF);
 +        else
 +          EMIT_TWO_BYTES (c & 0xFF, c >> 8);
 +      }
 +      else
 +      {
 +        int c1, c2;
 +
 +        c -= 0x10000;
 +        c1 = (c >> 10) + 0xD800;
 +        c2 = (c & 0x3FF) + 0xDC00;
 +        if (big_endian)
 +          EMIT_FOUR_BYTES (c1 >> 8, c1 & 0xFF, c2 >> 8, c2 & 0xFF);
 +        else
 +          EMIT_FOUR_BYTES (c1 & 0xFF, c1 >> 8, c2 & 0xFF, c2 >> 8);
 +      }
 +    }
 +  record_conversion_result (coding, CODING_RESULT_SUCCESS);
 +  coding->produced = dst - coding->destination;
 +  coding->produced_char += produced_chars;
 +  return 0;
 +}
 +
 +\f
 +/*** 6. Old Emacs' internal format (emacs-mule) ***/
 +
 +/* Emacs' internal format for representation of multiple character
 +   sets is a kind of multi-byte encoding, i.e. characters are
 +   represented by variable-length sequences of one-byte codes.
 +
 +   ASCII characters and control characters (e.g. `tab', `newline') are
 +   represented by one-byte sequences which are their ASCII codes, in
 +   the range 0x00 through 0x7F.
 +
 +   8-bit characters of the range 0x80..0x9F are represented by
 +   two-byte sequences of LEADING_CODE_8_BIT_CONTROL and (their 8-bit
 +   code + 0x20).
 +
 +   8-bit characters of the range 0xA0..0xFF are represented by
 +   one-byte sequences which are their 8-bit code.
 +
 +   The other characters are represented by a sequence of `base
 +   leading-code', optional `extended leading-code', and one or two
 +   `position-code's.  The length of the sequence is determined by the
 +   base leading-code.  Leading-code takes the range 0x81 through 0x9D,
 +   whereas extended leading-code and position-code take the range 0xA0
 +   through 0xFF.  See `charset.h' for more details about leading-code
 +   and position-code.
 +
 +   --- CODE RANGE of Emacs' internal format ---
 +   character set      range
 +   -------------      -----
 +   ascii              0x00..0x7F
 +   eight-bit-control  LEADING_CODE_8_BIT_CONTROL + 0xA0..0xBF
 +   eight-bit-graphic  0xA0..0xBF
 +   ELSE                       0x81..0x9D + [0xA0..0xFF]+
 +   ---------------------------------------------
 +
 +   As this is the internal character representation, the format is
 +   usually not used externally (i.e. in a file or in a data sent to a
 +   process).  But, it is possible to have a text externally in this
 +   format (i.e. by encoding by the coding system `emacs-mule').
 +
 +   In that case, a sequence of one-byte codes has a slightly different
 +   form.
 +
 +   At first, all characters in eight-bit-control are represented by
 +   one-byte sequences which are their 8-bit code.
 +
 +   Next, character composition data are represented by the byte
 +   sequence of the form: 0x80 METHOD BYTES CHARS COMPONENT ...,
 +   where,
 +      METHOD is 0xF0 plus one of composition method (enum
 +      composition_method),
 +
 +      BYTES is 0xA0 plus a byte length of this composition data,
 +
 +      CHARS is 0x20 plus a number of characters composed by this
 +      data,
 +
 +      COMPONENTs are characters of multibye form or composition
 +      rules encoded by two-byte of ASCII codes.
 +
 +   In addition, for backward compatibility, the following formats are
 +   also recognized as composition data on decoding.
 +
 +   0x80 MSEQ ...
 +   0x80 0xFF MSEQ RULE MSEQ RULE ... MSEQ
 +
 +   Here,
 +      MSEQ is a multibyte form but in these special format:
 +        ASCII: 0xA0 ASCII_CODE+0x80,
 +        other: LEADING_CODE+0x20 FOLLOWING-BYTE ...,
 +      RULE is a one byte code of the range 0xA0..0xF0 that
 +      represents a composition rule.
 +  */
 +
 +char emacs_mule_bytes[256];
 +
 +int
 +emacs_mule_char (coding, src, nbytes, nchars, id)
 +     struct coding_system *coding;
 +     const unsigned char *src;
 +     int *nbytes, *nchars, *id;
 +{
 +  const unsigned char *src_end = coding->source + coding->src_bytes;
 +  const unsigned char *src_base = src;
 +  int multibytep = coding->src_multibyte;
 +  struct charset *charset;
 +  unsigned code;
 +  int c;
 +  int consumed_chars = 0;
 +
 +  ONE_MORE_BYTE (c);
 +  if (c < 0)
 +    {
 +      c = -c;
 +      charset = emacs_mule_charset[0];
 +    }
 +  else
 +    {
 +      switch (emacs_mule_bytes[c])
 +      {
 +      case 2:
 +        if (! (charset = emacs_mule_charset[c]))
 +          goto invalid_code;
 +        ONE_MORE_BYTE (c);
 +        if (c < 0xA0)
 +          goto invalid_code;
 +        code = c & 0x7F;
 +        break;
 +
 +      case 3:
 +        if (c == EMACS_MULE_LEADING_CODE_PRIVATE_11
 +            || c == EMACS_MULE_LEADING_CODE_PRIVATE_12)
 +          {
 +            ONE_MORE_BYTE (c);
 +            if (c < 0xA0 || ! (charset = emacs_mule_charset[c]))
 +              goto invalid_code;
 +            ONE_MORE_BYTE (c);
 +            if (c < 0xA0)
 +              goto invalid_code;
 +            code = c & 0x7F;
 +          }
 +        else
 +          {
 +            if (! (charset = emacs_mule_charset[c]))
 +              goto invalid_code;
 +            ONE_MORE_BYTE (c);
 +            if (c < 0xA0)
 +              goto invalid_code;
 +            code = (c & 0x7F) << 8;
 +            ONE_MORE_BYTE (c);
 +            if (c < 0xA0)
 +              goto invalid_code;
 +            code |= c & 0x7F;
 +          }
 +        break;
 +
 +      case 4:
 +        ONE_MORE_BYTE (c);
 +        if (c < 0 || ! (charset = emacs_mule_charset[c]))
 +          goto invalid_code;
 +        ONE_MORE_BYTE (c);
 +        if (c < 0xA0)
 +          goto invalid_code;
 +        code = (c & 0x7F) << 8;
 +        ONE_MORE_BYTE (c);
 +        if (c < 0xA0)
 +          goto invalid_code;
 +        code |= c & 0x7F;
 +        break;
 +
 +      case 1:
 +        code = c;
 +        charset = CHARSET_FROM_ID (ASCII_BYTE_P (code)
 +                                   ? charset_ascii : charset_eight_bit);
 +        break;
 +
 +      default:
 +        abort ();
 +      }
 +      c = DECODE_CHAR (charset, code);
 +      if (c < 0)
 +      goto invalid_code;
 +    }
 +  *nbytes = src - src_base;
 +  *nchars = consumed_chars;
 +  if (id)
 +    *id = charset->id;
 +  return c;
 +
 + no_more_source:
 +  return -2;
 +
 + invalid_code:
 +  return -1;
 +}
 +
 +
 +/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
 +   Check if a text is encoded in `emacs-mule'.  If it is, return 1,
 +   else return 0.  */
 +
 +static int
 +detect_coding_emacs_mule (coding, detect_info)
 +     struct coding_system *coding;
 +     struct coding_detection_info *detect_info;
 +{
 +  const unsigned char *src = coding->source, *src_base;
 +  const unsigned char *src_end = coding->source + coding->src_bytes;
 +  int multibytep = coding->src_multibyte;
 +  int consumed_chars = 0;
 +  int c;
 +  int found = 0;
 +
 +  detect_info->checked |= CATEGORY_MASK_EMACS_MULE;
 +  /* A coding system of this category is always ASCII compatible.  */
 +  src += coding->head_ascii;
 +
 +  while (1)
 +    {
 +      src_base = src;
 +      ONE_MORE_BYTE (c);
 +      if (c < 0)
 +      continue;
 +      if (c == 0x80)
 +      {
 +        /* Perhaps the start of composite character.  We simple skip
 +           it because analyzing it is too heavy for detecting.  But,
 +           at least, we check that the composite character
 +           constitues of more than 4 bytes.  */
 +        const unsigned char *src_base;
 +
 +      repeat:
 +        src_base = src;
 +        do
 +          {
 +            ONE_MORE_BYTE (c);
 +          }
 +        while (c >= 0xA0);
 +
 +        if (src - src_base <= 4)
 +          break;
 +        found = CATEGORY_MASK_EMACS_MULE;
 +        if (c == 0x80)
 +          goto repeat;
 +      }
 +
 +      if (c < 0x80)
 +      {
 +        if (c < 0x20
 +            && (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO))
 +          break;
 +      }
 +      else
 +      {
 +        int more_bytes = emacs_mule_bytes[*src_base] - 1;
 +
 +        while (more_bytes > 0)
 +          {
 +            ONE_MORE_BYTE (c);
 +            if (c < 0xA0)
 +              {
 +                src--;        /* Unread the last byte.  */
 +                break;
 +              }
 +            more_bytes--;
 +          }
 +        if (more_bytes != 0)
 +          break;
 +        found = CATEGORY_MASK_EMACS_MULE;
 +      }
 +    }
 +  detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
 +  return 0;
 +
 + no_more_source:
 +  if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
 +    {
 +      detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
 +      return 0;
 +    }
 +  detect_info->found |= found;
 +  return 1;
 +}
 +
 +
 +/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
 +
 +/* Decode a character represented as a component of composition
 +   sequence of Emacs 20/21 style at SRC.  Set C to that character and
 +   update SRC to the head of next character (or an encoded composition
 +   rule).  If SRC doesn't points a composition component, set C to -1.
 +   If SRC points an invalid byte sequence, global exit by a return
 +   value 0.  */
 +
 +#define DECODE_EMACS_MULE_COMPOSITION_CHAR(buf)                       \
 +  if (1)                                                      \
 +    {                                                         \
 +      int c;                                                  \
 +      int nbytes, nchars;                                     \
 +                                                              \
 +      if (src == src_end)                                     \
 +      break;                                                  \
 +      c = emacs_mule_char (coding, src, &nbytes, &nchars, NULL);\
 +      if (c < 0)                                              \
 +      {                                                       \
 +        if (c == -2)                                          \
 +          break;                                              \
 +        goto invalid_code;                                    \
 +      }                                                       \
 +      *buf++ = c;                                             \
 +      src += nbytes;                                          \
 +      consumed_chars += nchars;                                       \
 +    }                                                         \
 +  else
 +
 +
 +/* Decode a composition rule represented as a component of composition
 +   sequence of Emacs 20 style at SRC.  Store the decoded rule in *BUF,
 +   and increment BUF.  If SRC points an invalid byte sequence, set C
 +   to -1.  */
 +
 +#define DECODE_EMACS_MULE_COMPOSITION_RULE_20(buf)    \
 +  do {                                                        \
 +    int c, gref, nref;                                        \
 +                                                      \
 +    if (src >= src_end)                                       \
 +      goto invalid_code;                              \
 +    ONE_MORE_BYTE_NO_CHECK (c);                               \
 +    c -= 0x20;                                                \
 +    if (c < 0 || c >= 81)                             \
 +      goto invalid_code;                              \
 +                                                      \
 +    gref = c / 9, nref = c % 9;                               \
 +    *buf++ = COMPOSITION_ENCODE_RULE (gref, nref);    \
 +  } while (0)
 +
 +
 +/* Decode a composition rule represented as a component of composition
 +   sequence of Emacs 21 style at SRC.  Store the decoded rule in *BUF,
 +   and increment BUF.  If SRC points an invalid byte sequence, set C
 +   to -1.  */
 +
 +#define DECODE_EMACS_MULE_COMPOSITION_RULE_21(buf)    \
 +  do {                                                        \
 +    int gref, nref;                                   \
 +                                                      \
 +    if (src + 1>= src_end)                            \
 +      goto invalid_code;                              \
 +    ONE_MORE_BYTE_NO_CHECK (gref);                    \
 +    gref -= 0x20;                                     \
 +    ONE_MORE_BYTE_NO_CHECK (nref);                    \
 +    nref -= 0x20;                                     \
 +    if (gref < 0 || gref >= 81                                \
 +      || nref < 0 || nref >= 81)                      \
 +      goto invalid_code;                              \
 +    *buf++ = COMPOSITION_ENCODE_RULE (gref, nref);    \
 +  } while (0)
 +
 +
 +#define DECODE_EMACS_MULE_21_COMPOSITION(c)                           \
 +  do {                                                                        \
 +    /* Emacs 21 style format.  The first three bytes at SRC are               \
 +       (METHOD - 0xF2), (BYTES - 0xA0), (CHARS - 0xA0), where BYTES is        \
 +       the byte length of this composition information, CHARS is the  \
 +       number of characters composed by this composition.  */         \
 +    enum composition_method method = c - 0xF2;                                \
 +    int *charbuf_base = charbuf;                                      \
 +    int consumed_chars_limit;                                         \
 +    int nbytes, nchars;                                                       \
 +                                                                      \
 +    ONE_MORE_BYTE (c);                                                        \
 +    if (c < 0)                                                                \
 +      goto invalid_code;                                              \
 +    nbytes = c - 0xA0;                                                        \
 +    if (nbytes < 3)                                                   \
 +      goto invalid_code;                                              \
 +    ONE_MORE_BYTE (c);                                                        \
 +    if (c < 0)                                                                \
 +      goto invalid_code;                                              \
 +    nchars = c - 0xA0;                                                        \
 +    ADD_COMPOSITION_DATA (charbuf, nchars, method);                   \
 +    consumed_chars_limit = consumed_chars_base + nbytes;              \
 +    if (method != COMPOSITION_RELATIVE)                                       \
 +      {                                                                       \
 +      int i = 0;                                                      \
 +      while (consumed_chars < consumed_chars_limit)                   \
 +        {                                                             \
 +          if (i % 2 && method != COMPOSITION_WITH_ALTCHARS)           \
 +            DECODE_EMACS_MULE_COMPOSITION_RULE_21 (charbuf);          \
 +          else                                                        \
 +            DECODE_EMACS_MULE_COMPOSITION_CHAR (charbuf);             \
 +          i++;                                                        \
 +        }                                                             \
 +      if (consumed_chars < consumed_chars_limit)                      \
 +        goto invalid_code;                                            \
 +      charbuf_base[0] -= i;                                           \
 +      }                                                                       \
 +  } while (0)
 +
 +
 +#define DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION(c)          \
 +  do {                                                                \
 +    /* Emacs 20 style format for relative composition.  */    \
 +    /* Store multibyte form of characters to be composed.  */ \
 +    enum composition_method method = COMPOSITION_RELATIVE;    \
 +    int components[MAX_COMPOSITION_COMPONENTS * 2 - 1];               \
 +    int *buf = components;                                    \
 +    int i, j;                                                 \
 +                                                              \
 +    src = src_base;                                           \
 +    ONE_MORE_BYTE (c);                /* skip 0x80 */                 \
 +    for (i = 0; i < MAX_COMPOSITION_COMPONENTS; i++)          \
 +      DECODE_EMACS_MULE_COMPOSITION_CHAR (buf);                       \
 +    if (i < 2)                                                        \
 +      goto invalid_code;                                      \
 +    ADD_COMPOSITION_DATA (charbuf, i, method);                        \
 +    for (j = 0; j < i; j++)                                   \
 +      *charbuf++ = components[j];                             \
 +  } while (0)
 +
 +
 +#define DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION(c)          \
 +  do {                                                                \
 +    /* Emacs 20 style format for rule-base composition.  */   \
 +    /* Store multibyte form of characters to be composed.  */ \
 +    enum composition_method method = COMPOSITION_WITH_RULE;   \
 +    int components[MAX_COMPOSITION_COMPONENTS * 2 - 1];               \
 +    int *buf = components;                                    \
 +    int i, j;                                                 \
 +                                                              \
 +    DECODE_EMACS_MULE_COMPOSITION_CHAR (buf);                 \
 +    for (i = 0; i < MAX_COMPOSITION_COMPONENTS; i++)          \
 +      {                                                               \
 +      DECODE_EMACS_MULE_COMPOSITION_RULE_20 (buf);            \
 +      DECODE_EMACS_MULE_COMPOSITION_CHAR (buf);               \
 +      }                                                               \
 +    if (i < 1 || (buf - components) % 2 == 0)                 \
 +      goto invalid_code;                                      \
 +    if (charbuf + i + (i / 2) + 1 < charbuf_end)              \
 +      goto no_more_source;                                    \
 +    ADD_COMPOSITION_DATA (buf, i, method);                    \
 +    for (j = 0; j < i; j++)                                   \
 +      *charbuf++ = components[j];                             \
 +    for (j = 0; j < i; j += 2)                                        \
 +      *charbuf++ = components[j];                             \
 +  } while (0)
 +
 +
 +static void
 +decode_coding_emacs_mule (coding)
 +     struct coding_system *coding;
 +{
 +  const unsigned char *src = coding->source + coding->consumed;
 +  const unsigned char *src_end = coding->source + coding->src_bytes;
 +  const unsigned char *src_base;
 +  int *charbuf = coding->charbuf + coding->charbuf_used;
 +  int *charbuf_end
 +    = coding->charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
 +  int consumed_chars = 0, consumed_chars_base;
 +  int multibytep = coding->src_multibyte;
 +  Lisp_Object attrs, charset_list;
 +  int char_offset = coding->produced_char;
 +  int last_offset = char_offset;
 +  int last_id = charset_ascii;
 +
 +  CODING_GET_INFO (coding, attrs, charset_list);
 +
 +  while (1)
 +    {
 +      int c;
 +
 +      src_base = src;
 +      consumed_chars_base = consumed_chars;
 +
 +      if (charbuf >= charbuf_end)
 +      break;
 +
 +      ONE_MORE_BYTE (c);
 +      if (c < 0)
 +      {
 +        *charbuf++ = -c;
 +        char_offset++;
 +      }
 +      else if (c < 0x80)
 +      {
 +        *charbuf++ = c;
 +        char_offset++;
 +      }
 +      else if (c == 0x80)
 +      {
 +        ONE_MORE_BYTE (c);
 +        if (c < 0)
 +          goto invalid_code;
 +        if (c - 0xF2 >= COMPOSITION_RELATIVE
 +            && c - 0xF2 <= COMPOSITION_WITH_RULE_ALTCHARS)
 +          DECODE_EMACS_MULE_21_COMPOSITION (c);
 +        else if (c < 0xC0)
 +          DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION (c);
 +        else if (c == 0xFF)
 +          DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION (c);
 +        else
 +          goto invalid_code;
 +      }
 +      else if (c < 0xA0 && emacs_mule_bytes[c] > 1)
 +      {
 +        int nbytes, nchars;
 +        int id;
 +
 +        src = src_base;
 +        consumed_chars = consumed_chars_base;
 +        c = emacs_mule_char (coding, src, &nbytes, &nchars, &id);
 +        if (c < 0)
 +          {
 +            if (c == -2)
 +              break;
 +            goto invalid_code;
 +          }
 +        if (last_id != id)
 +          {
 +            if (last_id != charset_ascii)
 +              ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
 +            last_id = id;
 +            last_offset = char_offset;
 +          }
 +        *charbuf++ = c;
 +        src += nbytes;
 +        consumed_chars += nchars;
 +        char_offset++;
 +      }
 +      continue;
 +
 +    invalid_code:
 +      src = src_base;
 +      consumed_chars = consumed_chars_base;
 +      ONE_MORE_BYTE (c);
 +      *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
 +      char_offset++;
 +      coding->errors++;
 +    }
 +
 + no_more_source:
 +  if (last_id != charset_ascii)
 +    ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
 +  coding->consumed_char += consumed_chars_base;
 +  coding->consumed = src_base - coding->source;
 +  coding->charbuf_used = charbuf - coding->charbuf;
 +}
 +
 +
 +#define EMACS_MULE_LEADING_CODES(id, codes)   \
 +  do {                                                \
 +    if (id < 0xA0)                            \
 +      codes[0] = id, codes[1] = 0;            \
 +    else if (id < 0xE0)                               \
 +      codes[0] = 0x9A, codes[1] = id;         \
 +    else if (id < 0xF0)                               \
 +      codes[0] = 0x9B, codes[1] = id;         \
 +    else if (id < 0xF5)                               \
 +      codes[0] = 0x9C, codes[1] = id;         \
 +    else                                      \
 +      codes[0] = 0x9D, codes[1] = id;         \
 +  } while (0);
 +
 +
 +static int
 +encode_coding_emacs_mule (coding)
 +     struct coding_system *coding;
 +{
 +  int multibytep = coding->dst_multibyte;
 +  int *charbuf = coding->charbuf;
 +  int *charbuf_end = charbuf + coding->charbuf_used;
 +  unsigned char *dst = coding->destination + coding->produced;
 +  unsigned char *dst_end = coding->destination + coding->dst_bytes;
 +  int safe_room = 8;
 +  int produced_chars = 0;
 +  Lisp_Object attrs, charset_list;
 +  int c;
 +  int preferred_charset_id = -1;
 +
 +  CODING_GET_INFO (coding, attrs, charset_list);
 +  if (! EQ (charset_list, Vemacs_mule_charset_list))
 +    {
 +      CODING_ATTR_CHARSET_LIST (attrs)
 +      = charset_list = Vemacs_mule_charset_list;
 +    }
 +
 +  while (charbuf < charbuf_end)
 +    {
 +      ASSURE_DESTINATION (safe_room);
 +      c = *charbuf++;
 +
 +      if (c < 0)
 +      {
 +        /* Handle an annotation.  */
 +        switch (*charbuf)
 +          {
 +          case CODING_ANNOTATE_COMPOSITION_MASK:
 +            /* Not yet implemented.  */
 +            break;
 +          case CODING_ANNOTATE_CHARSET_MASK:
 +            preferred_charset_id = charbuf[3];
 +            if (preferred_charset_id >= 0
 +                && NILP (Fmemq (make_number (preferred_charset_id),
 +                                charset_list)))
 +              preferred_charset_id = -1;
 +            break;
 +          default:
 +            abort ();
 +          }
 +        charbuf += -c - 1;
 +        continue;
 +      }
 +
 +      if (ASCII_CHAR_P (c))
 +      EMIT_ONE_ASCII_BYTE (c);
 +      else if (CHAR_BYTE8_P (c))
 +      {
 +        c = CHAR_TO_BYTE8 (c);
 +        EMIT_ONE_BYTE (c);
 +      }
 +      else
 +      {
 +        struct charset *charset;
 +        unsigned code;
 +        int dimension;
 +        int emacs_mule_id;
 +        unsigned char leading_codes[2];
 +
 +        if (preferred_charset_id >= 0)
 +          {
 +            charset = CHARSET_FROM_ID (preferred_charset_id);
 +            if (! CHAR_CHARSET_P (c, charset))
 +              charset = char_charset (c, charset_list, NULL);
 +          }
 +        else
 +          charset = char_charset (c, charset_list, &code);
 +        if (! charset)
 +          {
 +            c = coding->default_char;
 +            if (ASCII_CHAR_P (c))
 +              {
 +                EMIT_ONE_ASCII_BYTE (c);
 +                continue;
 +              }
 +            charset = char_charset (c, charset_list, &code);
 +          }
 +        dimension = CHARSET_DIMENSION (charset);
 +        emacs_mule_id = CHARSET_EMACS_MULE_ID (charset);
 +        EMACS_MULE_LEADING_CODES (emacs_mule_id, leading_codes);
 +        EMIT_ONE_BYTE (leading_codes[0]);
 +        if (leading_codes[1])
 +          EMIT_ONE_BYTE (leading_codes[1]);
 +        if (dimension == 1)
 +          EMIT_ONE_BYTE (code | 0x80);
 +        else
 +          {
 +            code |= 0x8080;
 +            EMIT_ONE_BYTE (code >> 8);
 +            EMIT_ONE_BYTE (code & 0xFF);
 +          }
 +      }
 +    }
 +  record_conversion_result (coding, CODING_RESULT_SUCCESS);
 +  coding->produced_char += produced_chars;
 +  coding->produced = dst - coding->destination;
 +  return 0;
 +}
 +
 +\f
 +/*** 7. ISO2022 handlers ***/
 +
 +/* The following note describes the coding system ISO2022 briefly.
 +   Since the intention of this note is to help understand the
 +   functions in this file, some parts are NOT ACCURATE or are OVERLY
 +   SIMPLIFIED.  For thorough understanding, please refer to the
 +   original document of ISO2022.  This is equivalent to the standard
 +   ECMA-35, obtainable from <URL:http://www.ecma.ch/> (*).
 +
 +   ISO2022 provides many mechanisms to encode several character sets
 +   in 7-bit and 8-bit environments.  For 7-bit environments, all text
 +   is encoded using bytes less than 128.  This may make the encoded
 +   text a little bit longer, but the text passes more easily through
 +   several types of gateway, some of which strip off the MSB (Most
 +   Significant Bit).
 +
 +   There are two kinds of character sets: control character sets and
 +   graphic character sets.  The former contain control characters such
 +   as `newline' and `escape' to provide control functions (control
 +   functions are also provided by escape sequences).  The latter
 +   contain graphic characters such as 'A' and '-'.  Emacs recognizes
 +   two control character sets and many graphic character sets.
 +
 +   Graphic character sets are classified into one of the following
 +   four classes, according to the number of bytes (DIMENSION) and
 +   number of characters in one dimension (CHARS) of the set:
 +   - DIMENSION1_CHARS94
 +   - DIMENSION1_CHARS96
 +   - DIMENSION2_CHARS94
 +   - DIMENSION2_CHARS96
 +
 +   In addition, each character set is assigned an identification tag,
 +   unique for each set, called the "final character" (denoted as <F>
 +   hereafter).  The <F> of each character set is decided by ECMA(*)
 +   when it is registered in ISO.  The code range of <F> is 0x30..0x7F
 +   (0x30..0x3F are for private use only).
 +
 +   Note (*): ECMA = European Computer Manufacturers Association
 +
 +   Here are examples of graphic character sets [NAME(<F>)]:
        o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
        o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ...
        o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ...
diff --cc src/xdisp.c
Simple merge