src/charset.c

   1 /* Basic multilingual character support.
   2    Copyright (C) 2001, 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
   3    Copyright (C) 1995, 1997, 1998, 1999, 2000, 2001
   4      National Institute of Advanced Industrial Science and Technology (AIST)
   5      Registration Number H14PRO021
   6
   7 This file is part of GNU Emacs.
   8
   9 GNU Emacs is free software; you can redistribute it and/or modify
  10 it under the terms of the GNU General Public License as published by
  11 the Free Software Foundation; either version 2, or (at your option)
  12 any later version.
  13
  14 GNU Emacs is distributed in the hope that it will be useful,
  15 but WITHOUT ANY WARRANTY; without even the implied warranty of
  16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17 GNU General Public License for more details.
  18
  19 You should have received a copy of the GNU General Public License
  20 along with GNU Emacs; see the file COPYING.  If not, write to
  21 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
  22 Boston, MA 02110-1301, USA.  */
  23
  24 /* At first, see the document in `charset.h' to understand the code in
  25    this file.  */
  26
  27 #ifdef emacs
  28 #include <config.h>
  29 #endif
  30
  31 #include <stdio.h>
  32
  33 #ifdef emacs
  34
  35 #include <sys/types.h>
  36 #include "lisp.h"
  37 #include "buffer.h"
  38 #include "charset.h"
  39 #include "composite.h"
  40 #include "coding.h"
  41 #include "disptab.h"
  42
  43 #else  /* not emacs */
  44
  45 #include "mulelib.h"
  46
  47 #endif /* emacs */
  48
  49 Lisp_Object Qcharset, Qascii, Qeight_bit_control, Qeight_bit_graphic;
  50 Lisp_Object Qunknown;
  51
  52 /* Declaration of special leading-codes.  */
  53 EMACS_INT leading_code_private_11; /* for private DIMENSION1 of 1-column */
  54 EMACS_INT leading_code_private_12; /* for private DIMENSION1 of 2-column */
  55 EMACS_INT leading_code_private_21; /* for private DIMENSION2 of 1-column */
  56 EMACS_INT leading_code_private_22; /* for private DIMENSION2 of 2-column */
  57
  58 /* Declaration of special charsets.  The values are set by
  59    Fsetup_special_charsets.  */
  60 int charset_latin_iso8859_1;    /* ISO8859-1 (Latin-1) */
  61 int charset_jisx0208_1978;      /* JISX0208.1978 (Japanese Kanji old set) */
  62 int charset_jisx0208;           /* JISX0208.1983 (Japanese Kanji) */
  63 int charset_katakana_jisx0201;  /* JISX0201.Kana (Japanese Katakana) */
  64 int charset_latin_jisx0201;     /* JISX0201.Roman (Japanese Roman) */
  65 int charset_big5_1;             /* Big5 Level 1 (Chinese Traditional) */
  66 int charset_big5_2;             /* Big5 Level 2 (Chinese Traditional) */
  67 int charset_mule_unicode_0100_24ff;
  68 int charset_mule_unicode_2500_33ff;
  69 int charset_mule_unicode_e000_ffff;
  70
  71 Lisp_Object Qcharset_table;
  72
  73 /* A char-table containing information of each character set.  */
  74 Lisp_Object Vcharset_table;
  75
  76 /* A vector of charset symbol indexed by charset-id.  This is used
  77    only for returning charset symbol from C functions.  */
  78 Lisp_Object Vcharset_symbol_table;
  79
  80 /* A list of charset symbols ever defined.  */
  81 Lisp_Object Vcharset_list;
  82
  83 /* Vector of translation table ever defined.
  84    ID of a translation table is used to index this vector.  */
  85 Lisp_Object Vtranslation_table_vector;
  86
  87 /* A char-table for characters which may invoke auto-filling.  */
  88 Lisp_Object Vauto_fill_chars;
  89
  90 Lisp_Object Qauto_fill_chars;
  91
  92 /* Tables used by macros BYTES_BY_CHAR_HEAD and WIDTH_BY_CHAR_HEAD.  */
  93 int bytes_by_char_head[256];
  94 int width_by_char_head[256];
  95
  96 /* Mapping table from ISO2022's charset (specified by DIMENSION,
  97    CHARS, and FINAL-CHAR) to Emacs' charset.  */
  98 int iso_charset_table[2][2][128];
  99
 100 /* Variables used locally in the macro FETCH_MULTIBYTE_CHAR.  */
 101 unsigned char *_fetch_multibyte_char_p;
 102 int _fetch_multibyte_char_len;
 103
 104 /* Offset to add to a non-ASCII value when inserting it.  */
 105 EMACS_INT nonascii_insert_offset;
 106
 107 /* Translation table for converting non-ASCII unibyte characters
 108    to multibyte codes, or nil.  */
 109 Lisp_Object Vnonascii_translation_table;
 110
 111 /* List of all possible generic characters.  */
 112 Lisp_Object Vgeneric_character_list;
 113
 114 \f
 115 void
 116 invalid_character (c)
 117      int c;
 118 {
 119   error ("Invalid character: %d, #o%o, #x%x", c, c, c);
 120 }
 121
 122 /* Parse string STR of length LENGTH and fetch information of a
 123    character at STR.  Set BYTES to the byte length the character
 124    occupies, CHARSET, C1, C2 to proper values of the character. */
 125
 126 #define SPLIT_MULTIBYTE_SEQ(str, length, bytes, charset, c1, c2)             \
 127   do {                                                                       \
 128     (c1) = *(str);                                                           \
 129     (bytes) = BYTES_BY_CHAR_HEAD (c1);                                       \
 130     if ((bytes) == 1)                                                        \
 131       (charset) = ASCII_BYTE_P (c1) ? CHARSET_ASCII : CHARSET_8_BIT_GRAPHIC; \
 132     else if ((bytes) == 2)                                                   \
 133       {                                                                      \
 134         if ((c1) == LEADING_CODE_8_BIT_CONTROL)                              \
 135           (charset) = CHARSET_8_BIT_CONTROL, (c1) = (str)[1] - 0x20;         \
 136         else                                                                 \
 137           (charset) = (c1), (c1) = (str)[1] & 0x7F;                          \
 138       }                                                                      \
 139     else if ((bytes) == 3)                                                   \
 140       {                                                                      \
 141         if ((c1) < LEADING_CODE_PRIVATE_11)                                  \
 142           (charset) = (c1), (c1) = (str)[1] & 0x7F, (c2) = (str)[2] & 0x7F;  \
 143         else                                                                 \
 144           (charset) = (str)[1], (c1) = (str)[2] & 0x7F;                      \
 145       }                                                                      \
 146     else                                                                     \
 147       (charset) = (str)[1], (c1) = (str)[2] & 0x7F, (c2) = (str)[3] & 0x7F;  \
 148   } while (0)
 149
 150 /* 1 if CHARSET, C1, and C2 compose a valid character, else 0.
 151    Note that this intentionally allows invalid components, such
 152    as 0xA0 0xA0, because there exist many files that contain
 153    such invalid byte sequences, especially in EUC-GB. */
 154 #define CHAR_COMPONENTS_VALID_P(charset, c1, c2)        \
 155   ((charset) == CHARSET_ASCII                           \
 156    ? ((c1) >= 0 && (c1) <= 0x7F)                        \
 157    : ((charset) == CHARSET_8_BIT_CONTROL                \
 158       ? ((c1) >= 0x80 && (c1) <= 0x9F)                  \
 159       : ((charset) == CHARSET_8_BIT_GRAPHIC             \
 160          ? ((c1) >= 0x80 && (c1) <= 0xFF)               \
 161          : (CHARSET_DIMENSION (charset) == 1            \
 162             ? ((c1) >= 0x20 && (c1) <= 0x7F)            \
 163             : ((c1) >= 0x20 && (c1) <= 0x7F             \
 164                && (c2) >= 0x20 && (c2) <= 0x7F)))))
 165
 166 /* Store multi-byte form of the character C in STR.  The caller should
 167    allocate at least 4-byte area at STR in advance.  Returns the
 168    length of the multi-byte form.  If C is an invalid character code,
 169    return -1.  */
 170
 171 int
 172 char_to_string_1 (c, str)
 173      int c;
 174      unsigned char *str;
 175 {
 176   unsigned char *p = str;
 177
 178   if (c & CHAR_MODIFIER_MASK)   /* This includes the case C is negative.  */
 179     {
 180       /* Multibyte character can't have a modifier bit.  */
 181       if (! SINGLE_BYTE_CHAR_P ((c & ~CHAR_MODIFIER_MASK)))
 182         return -1;
 183
 184       /* For Meta, Shift, and Control modifiers, we need special care.  */
 185       if (c & CHAR_META)
 186         {
 187           /* Move the meta bit to the right place for a string.  */
 188           c = (c & ~CHAR_META) | 0x80;
 189         }
 190       if (c & CHAR_SHIFT)
 191         {
 192           /* Shift modifier is valid only with [A-Za-z].  */
 193           if ((c & 0377) >= 'A' && (c & 0377) <= 'Z')
 194             c &= ~CHAR_SHIFT;
 195           else if ((c & 0377) >= 'a' && (c & 0377) <= 'z')
 196             c = (c & ~CHAR_SHIFT) - ('a' - 'A');
 197         }
 198       if (c & CHAR_CTL)
 199         {
 200           /* Simulate the code in lread.c.  */
 201           /* Allow `\C- ' and `\C-?'.  */
 202           if (c == (CHAR_CTL | ' '))
 203             c = 0;
 204           else if (c == (CHAR_CTL | '?'))
 205             c = 127;
 206           /* ASCII control chars are made from letters (both cases),
 207              as well as the non-letters within 0100...0137.  */
 208           else if ((c & 0137) >= 0101 && (c & 0137) <= 0132)
 209             c &= (037 | (~0177 & ~CHAR_CTL));
 210           else if ((c & 0177) >= 0100 && (c & 0177) <= 0137)
 211             c &= (037 | (~0177 & ~CHAR_CTL));
 212         }
 213
 214       /* If C still has any modifier bits, just ignore it.  */
 215       c &= ~CHAR_MODIFIER_MASK;
 216     }
 217
 218   if (SINGLE_BYTE_CHAR_P (c))
 219     {
 220       if (ASCII_BYTE_P (c) || c >= 0xA0)
 221         *p++ = c;
 222       else
 223         {
 224           *p++ = LEADING_CODE_8_BIT_CONTROL;
 225           *p++ = c + 0x20;
 226         }
 227     }
 228   else if (CHAR_VALID_P (c, 0))
 229     {
 230       int charset, c1, c2;
 231
 232       SPLIT_CHAR (c, charset, c1, c2);
 233
 234       if (charset >= LEADING_CODE_EXT_11)
 235         *p++ = (charset < LEADING_CODE_EXT_12
 236                 ? LEADING_CODE_PRIVATE_11
 237                 : (charset < LEADING_CODE_EXT_21
 238                    ? LEADING_CODE_PRIVATE_12
 239                    : (charset < LEADING_CODE_EXT_22
 240                       ? LEADING_CODE_PRIVATE_21
 241                       : LEADING_CODE_PRIVATE_22)));
 242       *p++ = charset;
 243       if ((c1 > 0 && c1 < 32) || (c2 > 0 && c2 < 32))
 244         return -1;
 245       if (c1)
 246         {
 247           *p++ = c1 | 0x80;
 248           if (c2 > 0)
 249             *p++ = c2 | 0x80;
 250         }
 251     }
 252   else
 253     return -1;
 254
 255   return (p - str);
 256 }
 257
 258
 259 /* Store multi-byte form of the character C in STR.  The caller should
 260    allocate at least 4-byte area at STR in advance.  Returns the
 261    length of the multi-byte form.  If C is an invalid character code,
 262    signal an error.
 263
 264    Use macro `CHAR_STRING (C, STR)' instead of calling this function
 265    directly if C can be an ASCII character.  */
 266
 267 int
 268 char_to_string (c, str)
 269      int c;
 270      unsigned char *str;
 271 {
 272   int len;
 273   len = char_to_string_1 (c, str);
 274   if (len == -1)
 275     invalid_character (c);
 276   return len;
 277 }
 278
 279
 280 /* Return the non-ASCII character corresponding to multi-byte form at
 281    STR of length LEN.  If ACTUAL_LEN is not NULL, store the byte
 282    length of the multibyte form in *ACTUAL_LEN.
 283
 284    Use macros STRING_CHAR or STRING_CHAR_AND_LENGTH instead of calling
 285    this function directly if you want ot handle ASCII characters as
 286    well.  */
 287
 288 int
 289 string_to_char (str, len, actual_len)
 290      const unsigned char *str;
 291      int len, *actual_len;
 292 {
 293   int c, bytes, charset, c1, c2;
 294
 295   SPLIT_MULTIBYTE_SEQ (str, len, bytes, charset, c1, c2);
 296   c = MAKE_CHAR (charset, c1, c2);
 297   if (actual_len)
 298     *actual_len = bytes;
 299   return c;
 300 }
 301
 302 /* Return the length of the multi-byte form at string STR of length LEN.
 303    Use the macro MULTIBYTE_FORM_LENGTH instead.  */
 304 int
 305 multibyte_form_length (str, len)
 306      const unsigned char *str;
 307      int len;
 308 {
 309   int bytes;
 310
 311   PARSE_MULTIBYTE_SEQ (str, len, bytes);
 312   return bytes;
 313 }
 314
 315 /* Check multibyte form at string STR of length LEN and set variables
 316    pointed by CHARSET, C1, and C2 to charset and position codes of the
 317    character at STR, and return 0.  If there's no multibyte character,
 318    return -1.  This should be used only in the macro SPLIT_STRING
 319    which checks range of STR in advance.  */
 320
 321 int
 322 split_string (str, len, charset, c1, c2)
 323      const unsigned char *str;
 324      unsigned char *c1, *c2;
 325      int len, *charset;
 326 {
 327   register int bytes, cs, code1, code2 = -1;
 328
 329   SPLIT_MULTIBYTE_SEQ (str, len, bytes, cs, code1, code2);
 330   if (cs == CHARSET_ASCII)
 331     return -1;
 332   *charset = cs;
 333   *c1 = code1;
 334   *c2 = code2;
 335   return 0;
 336 }
 337
 338 /* Return 1 iff character C has valid printable glyph.
 339    Use the macro CHAR_PRINTABLE_P instead.  */
 340 int
 341 char_printable_p (c)
 342      int c;
 343 {
 344   int charset, c1, c2;
 345
 346   if (ASCII_BYTE_P (c))
 347     return 1;
 348   else if (SINGLE_BYTE_CHAR_P (c))
 349     return 0;
 350   else if (c >= MAX_CHAR)
 351     return 0;
 352
 353   SPLIT_CHAR (c, charset, c1, c2);
 354   if (! CHARSET_DEFINED_P (charset))
 355     return 0;
 356   if (CHARSET_CHARS (charset) == 94
 357       ? c1 <= 32 || c1 >= 127
 358       : c1 < 32)
 359     return 0;
 360   if (CHARSET_DIMENSION (charset) == 2
 361       && (CHARSET_CHARS (charset) == 94
 362           ? c2 <= 32 || c2 >= 127
 363           : c2 < 32))
 364     return 0;
 365   return 1;
 366 }
 367
 368 /* Translate character C by translation table TABLE.  If C
 369    is negative, translate a character specified by CHARSET, C1, and C2
 370    (C1 and C2 are code points of the character).  If no translation is
 371    found in TABLE, return C.  */
 372 int
 373 translate_char (table, c, charset, c1, c2)
 374      Lisp_Object table;
 375      int c, charset, c1, c2;
 376 {
 377   Lisp_Object ch;
 378   int alt_charset, alt_c1, alt_c2, dimension;
 379
 380   if (c < 0) c = MAKE_CHAR (charset, (c1 & 0x7F) , (c2 & 0x7F));
 381   if (!CHAR_TABLE_P (table)
 382       || (ch = Faref (table, make_number (c)), !NATNUMP (ch)))
 383     return c;
 384
 385   SPLIT_CHAR (XFASTINT (ch), alt_charset, alt_c1, alt_c2);
 386   dimension = CHARSET_DIMENSION (alt_charset);
 387   if ((dimension == 1 && alt_c1 > 0) || (dimension == 2 && alt_c2 > 0))
 388     /* CH is not a generic character, just return it.  */
 389     return XFASTINT (ch);
 390
 391   /* Since CH is a generic character, we must return a specific
 392      charater which has the same position codes as C from CH.  */
 393   if (charset < 0)
 394     SPLIT_CHAR (c, charset, c1, c2);
 395   if (dimension != CHARSET_DIMENSION (charset))
 396     /* We can't make such a character because of dimension mismatch.  */
 397     return c;
 398   return MAKE_CHAR (alt_charset, c1, c2);
 399 }
 400
 401 /* Convert the unibyte character C to multibyte based on
 402    Vnonascii_translation_table or nonascii_insert_offset.  If they can't
 403    convert C to a valid multibyte character, convert it based on
 404    DEFAULT_NONASCII_INSERT_OFFSET which makes C a Latin-1 character.  */
 405
 406 int
 407 unibyte_char_to_multibyte (c)
 408      int c;
 409 {
 410   if (c < 0400 && c >= 0200)
 411     {
 412       int c_save = c;
 413
 414       if (! NILP (Vnonascii_translation_table))
 415         {
 416           c = XINT (Faref (Vnonascii_translation_table, make_number (c)));
 417           if (c >= 0400 && ! char_valid_p (c, 0))
 418             c = c_save + DEFAULT_NONASCII_INSERT_OFFSET;
 419         }
 420       else if (c >= 0240 && nonascii_insert_offset > 0)
 421         {
 422           c += nonascii_insert_offset;
 423           if (c < 0400 || ! char_valid_p (c, 0))
 424             c = c_save + DEFAULT_NONASCII_INSERT_OFFSET;
 425         }
 426       else if (c >= 0240)
 427         c = c_save + DEFAULT_NONASCII_INSERT_OFFSET;
 428     }
 429   return c;
 430 }
 431
 432
 433 /* Convert the multibyte character C to unibyte 8-bit character based
 434    on Vnonascii_translation_table or nonascii_insert_offset.  If
 435    REV_TBL is non-nil, it should be a reverse table of
 436    Vnonascii_translation_table, i.e. what given by:
 437      Fchar_table_extra_slot (Vnonascii_translation_table, make_number (0))  */
 438
 439 int
 440 multibyte_char_to_unibyte (c, rev_tbl)
 441      int c;
 442      Lisp_Object rev_tbl;
 443 {
 444   if (!SINGLE_BYTE_CHAR_P (c))
 445     {
 446       int c_save = c;
 447
 448       if (! CHAR_TABLE_P (rev_tbl)
 449           && CHAR_TABLE_P (Vnonascii_translation_table))
 450         rev_tbl = Fchar_table_extra_slot (Vnonascii_translation_table,
 451                                           make_number (0));
 452       if (CHAR_TABLE_P (rev_tbl))
 453         {
 454           Lisp_Object temp;
 455           temp = Faref (rev_tbl, make_number (c));
 456           if (INTEGERP (temp))
 457             c = XINT (temp);
 458           if (c >= 256)
 459             c = (c_save & 0177) + 0200;
 460         }
 461       else
 462         {
 463           if (nonascii_insert_offset > 0)
 464             c -= nonascii_insert_offset;
 465           if (c < 128 || c >= 256)
 466             c = (c_save & 0177) + 0200;
 467         }
 468     }
 469
 470   return c;
 471 }
 472
 473 \f
 474 /* Update the table Vcharset_table with the given arguments (see the
 475    document of `define-charset' for the meaning of each argument).
 476    Several other table contents are also updated.  The caller should
 477    check the validity of CHARSET-ID and the remaining arguments in
 478    advance.  */
 479
 480 void
 481 update_charset_table (charset_id, dimension, chars, width, direction,
 482                       iso_final_char, iso_graphic_plane,
 483                       short_name, long_name, description)
 484      Lisp_Object charset_id, dimension, chars, width, direction;
 485      Lisp_Object iso_final_char, iso_graphic_plane;
 486      Lisp_Object short_name, long_name, description;
 487 {
 488   int charset = XINT (charset_id);
 489   int bytes;
 490   unsigned char leading_code_base, leading_code_ext;
 491
 492   if (NILP (CHARSET_TABLE_ENTRY (charset)))
 493     CHARSET_TABLE_ENTRY (charset)
 494       = Fmake_vector (make_number (CHARSET_MAX_IDX), Qnil);
 495
 496   if (NILP (long_name))
 497     long_name = short_name;
 498   if (NILP (description))
 499     description = long_name;
 500
 501   /* Get byte length of multibyte form, base leading-code, and
 502      extended leading-code of the charset.  See the comment under the
 503      title "GENERAL NOTE on CHARACTER SET (CHARSET)" in charset.h.  */
 504   bytes = XINT (dimension);
 505   if (charset < MIN_CHARSET_PRIVATE_DIMENSION1)
 506     {
 507       /* Official charset, it doesn't have an extended leading-code.  */
 508       if (charset != CHARSET_ASCII && charset != CHARSET_8_BIT_GRAPHIC)
 509         bytes += 1; /* For a base leading-code.  */
 510       leading_code_base = charset;
 511       leading_code_ext = 0;
 512     }
 513   else
 514     {
 515       /* Private charset.  */
 516       bytes += 2; /* For base and extended leading-codes.  */
 517       leading_code_base
 518         = (charset < LEADING_CODE_EXT_12
 519            ? LEADING_CODE_PRIVATE_11
 520            : (charset < LEADING_CODE_EXT_21
 521               ? LEADING_CODE_PRIVATE_12
 522               : (charset < LEADING_CODE_EXT_22
 523                  ? LEADING_CODE_PRIVATE_21
 524                  : LEADING_CODE_PRIVATE_22)));
 525       leading_code_ext = charset;
 526       if (BYTES_BY_CHAR_HEAD (leading_code_base) != bytes)
 527         error ("Invalid dimension for the charset-ID %d", charset);
 528     }
 529
 530   CHARSET_TABLE_INFO (charset, CHARSET_ID_IDX) = charset_id;
 531   CHARSET_TABLE_INFO (charset, CHARSET_BYTES_IDX) = make_number (bytes);
 532   CHARSET_TABLE_INFO (charset, CHARSET_DIMENSION_IDX) = dimension;
 533   CHARSET_TABLE_INFO (charset, CHARSET_CHARS_IDX) = chars;
 534   CHARSET_TABLE_INFO (charset, CHARSET_WIDTH_IDX) = width;
 535   CHARSET_TABLE_INFO (charset, CHARSET_DIRECTION_IDX) = direction;
 536   CHARSET_TABLE_INFO (charset, CHARSET_LEADING_CODE_BASE_IDX)
 537     = make_number (leading_code_base);
 538   CHARSET_TABLE_INFO (charset, CHARSET_LEADING_CODE_EXT_IDX)
 539     = make_number (leading_code_ext);
 540   CHARSET_TABLE_INFO (charset, CHARSET_ISO_FINAL_CHAR_IDX) = iso_final_char;
 541   CHARSET_TABLE_INFO (charset, CHARSET_ISO_GRAPHIC_PLANE_IDX)
 542     = iso_graphic_plane;
 543   CHARSET_TABLE_INFO (charset, CHARSET_SHORT_NAME_IDX) = short_name;
 544   CHARSET_TABLE_INFO (charset, CHARSET_LONG_NAME_IDX) = long_name;
 545   CHARSET_TABLE_INFO (charset, CHARSET_DESCRIPTION_IDX) = description;
 546   CHARSET_TABLE_INFO (charset, CHARSET_PLIST_IDX) = Qnil;
 547
 548   {
 549     /* If we have already defined a charset which has the same
 550        DIMENSION, CHARS and ISO-FINAL-CHAR but the different
 551        DIRECTION, we must update the entry REVERSE-CHARSET of both
 552        charsets.  If there's no such charset, the value of the entry
 553        is set to nil.  */
 554     int i;
 555
 556     for (i = 0; i <= MAX_CHARSET; i++)
 557       if (!NILP (CHARSET_TABLE_ENTRY (i)))
 558         {
 559           if (CHARSET_DIMENSION (i) == XINT (dimension)
 560               && CHARSET_CHARS (i) == XINT (chars)
 561               && CHARSET_ISO_FINAL_CHAR (i) == XINT (iso_final_char)
 562               && CHARSET_DIRECTION (i) != XINT (direction))
 563             {
 564               CHARSET_TABLE_INFO (charset, CHARSET_REVERSE_CHARSET_IDX)
 565                 = make_number (i);
 566               CHARSET_TABLE_INFO (i, CHARSET_REVERSE_CHARSET_IDX) = charset_id;
 567               break;
 568             }
 569         }
 570     if (i > MAX_CHARSET)
 571       /* No such a charset.  */
 572       CHARSET_TABLE_INFO (charset, CHARSET_REVERSE_CHARSET_IDX)
 573         = make_number (-1);
 574   }
 575
 576   if (charset != CHARSET_ASCII && charset != CHARSET_8_BIT_GRAPHIC
 577       && charset < MIN_CHARSET_PRIVATE_DIMENSION1)
 578     {
 579       bytes_by_char_head[leading_code_base] = bytes;
 580       width_by_char_head[leading_code_base] = XINT (width);
 581
 582       /* Update table emacs_code_class.  */
 583       emacs_code_class[charset] = (bytes == 2
 584                                    ? EMACS_leading_code_2
 585                                    : (bytes == 3
 586                                       ? EMACS_leading_code_3
 587                                       : EMACS_leading_code_4));
 588     }
 589
 590   /* Update table iso_charset_table.  */
 591   if (XINT (iso_final_char) >= 0
 592       && ISO_CHARSET_TABLE (dimension, chars, iso_final_char) < 0)
 593     ISO_CHARSET_TABLE (dimension, chars, iso_final_char) = charset;
 594 }
 595
 596 #ifdef emacs
 597
 598 /* Return charset id of CHARSET_SYMBOL, or return -1 if CHARSET_SYMBOL
 599    is invalid.  */
 600 int
 601 get_charset_id (charset_symbol)
 602      Lisp_Object charset_symbol;
 603 {
 604   Lisp_Object val;
 605   int charset;
 606
 607   /* This originally used a ?: operator, but reportedly the HP-UX
 608      compiler version HP92453-01 A.10.32.22 miscompiles that.  */
 609   if (SYMBOLP (charset_symbol)
 610       && VECTORP (val = Fget (charset_symbol, Qcharset))
 611       && CHARSET_VALID_P (charset =
 612                           XINT (XVECTOR (val)->contents[CHARSET_ID_IDX])))
 613     return charset;
 614   else
 615     return -1;
 616 }
 617
 618 /* Return an identification number for a new private charset of
 619    DIMENSION and WIDTH.  If there's no more room for the new charset,
 620    return 0.  */
 621 Lisp_Object
 622 get_new_private_charset_id (dimension, width)
 623      int dimension, width;
 624 {
 625   int charset, from, to;
 626
 627   if (dimension == 1)
 628     {
 629       from = LEADING_CODE_EXT_11;
 630       to = LEADING_CODE_EXT_21;
 631     }
 632   else
 633     {
 634       from = LEADING_CODE_EXT_21;
 635       to = LEADING_CODE_EXT_MAX + 1;
 636     }
 637
 638   for (charset = from; charset < to; charset++)
 639     if (!CHARSET_DEFINED_P (charset)) break;
 640
 641   return make_number (charset < to ? charset : 0);
 642 }
 643
 644 DEFUN ("define-charset", Fdefine_charset, Sdefine_charset, 3, 3, 0,
 645        doc: /* Define CHARSET-ID as the identification number of CHARSET with INFO-VECTOR.
 646 If CHARSET-ID is nil, it is decided automatically, which means CHARSET is
 647  treated as a private charset.
 648 INFO-VECTOR is a vector of the format:
 649    [DIMENSION CHARS WIDTH DIRECTION ISO-FINAL-CHAR ISO-GRAPHIC-PLANE
 650     SHORT-NAME LONG-NAME DESCRIPTION]
 651 The meanings of each elements is as follows:
 652 DIMENSION (integer) is the number of bytes to represent a character: 1 or 2.
 653 CHARS (integer) is the number of characters in a dimension: 94 or 96.
 654 WIDTH (integer) is the number of columns a character in the charset
 655 occupies on the screen: one of 0, 1, and 2.
 656
 657 DIRECTION (integer) is the rendering direction of characters in the
 658 charset when rendering.  If 0, render from left to right, else
 659 render from right to left.
 660
 661 ISO-FINAL-CHAR (character) is the final character of the
 662 corresponding ISO 2022 charset.
 663 It may be -1 if the charset is internal use only.
 664
 665 ISO-GRAPHIC-PLANE (integer) is the graphic plane to be invoked
 666 while encoding to variants of ISO 2022 coding system, one of the
 667 following: 0/graphic-plane-left(GL), 1/graphic-plane-right(GR).
 668 It may be -1 if the charset is internal use only.
 669
 670 SHORT-NAME (string) is the short name to refer to the charset.
 671
 672 LONG-NAME (string) is the long name to refer to the charset.
 673
 674 DESCRIPTION (string) is the description string of the charset.  */)
 675        (charset_id, charset_symbol, info_vector)
 676      Lisp_Object charset_id, charset_symbol, info_vector;
 677 {
 678   Lisp_Object *vec;
 679
 680   if (!NILP (charset_id))
 681     CHECK_NUMBER (charset_id);
 682   CHECK_SYMBOL (charset_symbol);
 683   CHECK_VECTOR (info_vector);
 684
 685   if (! NILP (charset_id))
 686     {
 687       if (! CHARSET_VALID_P (XINT (charset_id)))
 688         error ("Invalid CHARSET: %d", XINT (charset_id));
 689       else if (CHARSET_DEFINED_P (XINT (charset_id)))
 690         error ("Already defined charset: %d", XINT (charset_id));
 691     }
 692
 693   vec = XVECTOR (info_vector)->contents;
 694   if (XVECTOR (info_vector)->size != 9
 695       || !INTEGERP (vec[0]) || !(XINT (vec[0]) == 1 || XINT (vec[0]) == 2)
 696       || !INTEGERP (vec[1]) || !(XINT (vec[1]) == 94 || XINT (vec[1]) == 96)
 697       || !INTEGERP (vec[2]) || !(XINT (vec[2]) == 1 || XINT (vec[2]) == 2)
 698       || !INTEGERP (vec[3]) || !(XINT (vec[3]) == 0 || XINT (vec[3]) == 1)
 699       || !INTEGERP (vec[4])
 700       || !(XINT (vec[4]) == -1 || (XINT (vec[4]) >= '0' && XINT (vec[4]) <= '~'))
 701       || !INTEGERP (vec[5])
 702       || !(XINT (vec[5]) == -1 || XINT (vec[5]) == 0 || XINT (vec[5]) == 1)
 703       || !STRINGP (vec[6])
 704       || !STRINGP (vec[7])
 705       || !STRINGP (vec[8]))
 706     error ("Invalid info-vector argument for defining charset %s",
 707            SDATA (SYMBOL_NAME (charset_symbol)));
 708
 709   if (NILP (charset_id))
 710     {
 711       charset_id = get_new_private_charset_id (XINT (vec[0]), XINT (vec[2]));
 712       if (XINT (charset_id) == 0)
 713         error ("There's no room for a new private charset %s",
 714                SDATA (SYMBOL_NAME (charset_symbol)));
 715     }
 716
 717   update_charset_table (charset_id, vec[0], vec[1], vec[2], vec[3],
 718                         vec[4], vec[5], vec[6], vec[7], vec[8]);
 719   Fput (charset_symbol, Qcharset, CHARSET_TABLE_ENTRY (XINT (charset_id)));
 720   CHARSET_SYMBOL (XINT (charset_id)) = charset_symbol;
 721   Vcharset_list = Fcons (charset_symbol, Vcharset_list);
 722   Fupdate_coding_systems_internal ();
 723   return Qnil;
 724 }
 725
 726 DEFUN ("generic-character-list", Fgeneric_character_list,
 727        Sgeneric_character_list, 0, 0, 0,
 728        doc: /* Return a list of all possible generic characters.
 729 It includes a generic character for a charset not yet defined.  */)
 730      ()
 731 {
 732   return Vgeneric_character_list;
 733 }
 734
 735 DEFUN ("get-unused-iso-final-char", Fget_unused_iso_final_char,
 736        Sget_unused_iso_final_char, 2, 2, 0,
 737        doc: /* Return an unused ISO's final char for a charset of DIMENSION and CHARS.
 738 DIMENSION is the number of bytes to represent a character: 1 or 2.
 739 CHARS is the number of characters in a dimension: 94 or 96.
 740
 741 This final char is for private use, thus the range is `0' (48) .. `?' (63).
 742 If there's no unused final char for the specified kind of charset,
 743 return nil.  */)
 744      (dimension, chars)
 745      Lisp_Object dimension, chars;
 746 {
 747   int final_char;
 748
 749   CHECK_NUMBER (dimension);
 750   CHECK_NUMBER (chars);
 751   if (XINT (dimension) != 1 && XINT (dimension) != 2)
 752     error ("Invalid charset dimension %d, it should be 1 or 2",
 753            XINT (dimension));
 754   if (XINT (chars) != 94 && XINT (chars) != 96)
 755     error ("Invalid charset chars %d, it should be 94 or 96",
 756            XINT (chars));
 757   for (final_char = '0'; final_char <= '?'; final_char++)
 758     {
 759       if (ISO_CHARSET_TABLE (dimension, chars, make_number (final_char)) < 0)
 760         break;
 761     }
 762   return (final_char <= '?' ? make_number (final_char) : Qnil);
 763 }
 764
 765 DEFUN ("declare-equiv-charset", Fdeclare_equiv_charset, Sdeclare_equiv_charset,
 766        4, 4, 0,
 767        doc: /* Declare an equivalent charset for ISO-2022 decoding.
 768
 769 On decoding by an ISO-2022 base coding system, when a charset
 770 specified by DIMENSION, CHARS, and FINAL-CHAR is designated, behave as
 771 if CHARSET is designated instead.  */)
 772      (dimension, chars, final_char, charset)
 773      Lisp_Object dimension, chars, final_char, charset;
 774 {
 775   int charset_id;
 776
 777   CHECK_NUMBER (dimension);
 778   CHECK_NUMBER (chars);
 779   CHECK_NUMBER (final_char);
 780   CHECK_SYMBOL (charset);
 781
 782   if (XINT (dimension) != 1 && XINT (dimension) != 2)
 783     error ("Invalid DIMENSION %d, it should be 1 or 2", XINT (dimension));
 784   if (XINT (chars) != 94 && XINT (chars) != 96)
 785     error ("Invalid CHARS %d, it should be 94 or 96", XINT (chars));
 786   if (XINT (final_char) < '0' || XFASTINT (final_char) > '~')
 787     error ("Invalid FINAL-CHAR %c, it should be `0'..`~'", XINT (chars));
 788   if ((charset_id = get_charset_id (charset)) < 0)
 789     error ("Invalid charset %s", SDATA (SYMBOL_NAME (charset)));
 790
 791   ISO_CHARSET_TABLE (dimension, chars, final_char) = charset_id;
 792   return Qnil;
 793 }
 794
 795 /* Return information about charsets in the text at PTR of NBYTES
 796    bytes, which are NCHARS characters.  The value is:
 797
 798         0: Each character is represented by one byte.  This is always
 799            true for unibyte text.
 800         1: No charsets other than ascii eight-bit-control,
 801            eight-bit-graphic, and latin-1 are found.
 802         2: Otherwise.
 803
 804    In addition, if CHARSETS is nonzero, for each found charset N, set
 805    CHARSETS[N] to 1.  For that, callers should allocate CHARSETS
 806    (MAX_CHARSET + 1 elements) in advance.  It may lookup a translation
 807    table TABLE if supplied.  For invalid charsets, set CHARSETS[1] to
 808    1 (note that there's no charset whose ID is 1).  */
 809
 810 int
 811 find_charset_in_text (ptr, nchars, nbytes, charsets, table)
 812      const unsigned char *ptr;
 813      int nchars, nbytes, *charsets;
 814      Lisp_Object table;
 815 {
 816   if (nchars == nbytes)
 817     {
 818       if (charsets && nbytes > 0)
 819         {
 820           const unsigned char *endp = ptr + nbytes;
 821           int maskbits = 0;
 822
 823           while (ptr < endp && maskbits != 7)
 824             {
 825               maskbits |= (*ptr < 0x80 ? 1 : *ptr < 0xA0 ? 2 : 4);
 826               ptr++;
 827             }
 828
 829           if (maskbits & 1)
 830             charsets[CHARSET_ASCII] = 1;
 831           if (maskbits & 2)
 832             charsets[CHARSET_8_BIT_CONTROL] = 1;
 833           if (maskbits & 4)
 834             charsets[CHARSET_8_BIT_GRAPHIC] = 1;
 835         }
 836       return 0;
 837     }
 838   else
 839     {
 840       int return_val = 1;
 841       int bytes, charset, c1, c2;
 842
 843       if (! CHAR_TABLE_P (table))
 844         table = Qnil;
 845
 846       while (nchars-- > 0)
 847         {
 848           SPLIT_MULTIBYTE_SEQ (ptr, len, bytes, charset, c1, c2);
 849           ptr += bytes;
 850
 851           if (!CHARSET_DEFINED_P (charset))
 852             charset = 1;
 853           else if (! NILP (table))
 854             {
 855               int c = translate_char (table, -1, charset, c1, c2);
 856               if (c >= 0)
 857                 charset = CHAR_CHARSET (c);
 858             }
 859
 860           if (return_val == 1
 861               && charset != CHARSET_ASCII
 862               && charset != CHARSET_8_BIT_CONTROL
 863               && charset != CHARSET_8_BIT_GRAPHIC
 864               && charset != charset_latin_iso8859_1)
 865             return_val = 2;
 866
 867           if (charsets)
 868             charsets[charset] = 1;
 869           else if (return_val == 2)
 870             break;
 871         }
 872       return return_val;
 873     }
 874 }
 875
 876 DEFUN ("find-charset-region", Ffind_charset_region, Sfind_charset_region,
 877        2, 3, 0,
 878        doc: /* Return a list of charsets in the region between BEG and END.
 879 BEG and END are buffer positions.
 880 Optional arg TABLE if non-nil is a translation table to look up.
 881
 882 If the region contains invalid multibyte characters,
 883 `unknown' is included in the returned list.
 884
 885 If the current buffer is unibyte, the returned list may contain
 886 only `ascii', `eight-bit-control', and `eight-bit-graphic'.  */)
 887      (beg, end, table)
 888      Lisp_Object beg, end, table;
 889 {
 890   int charsets[MAX_CHARSET + 1];
 891   int from, from_byte, to, stop, stop_byte, i;
 892   Lisp_Object val;
 893
 894   validate_region (&beg, &end);
 895   from = XFASTINT (beg);
 896   stop = to = XFASTINT (end);
 897
 898   if (from < GPT && GPT < to)
 899     {
 900       stop = GPT;
 901       stop_byte = GPT_BYTE;
 902     }
 903   else
 904     stop_byte = CHAR_TO_BYTE (stop);
 905
 906   from_byte = CHAR_TO_BYTE (from);
 907
 908   bzero (charsets, (MAX_CHARSET + 1) * sizeof (int));
 909   while (1)
 910     {
 911       find_charset_in_text (BYTE_POS_ADDR (from_byte), stop - from,
 912                             stop_byte - from_byte, charsets, table);
 913       if (stop < to)
 914         {
 915           from = stop, from_byte = stop_byte;
 916           stop = to, stop_byte = CHAR_TO_BYTE (stop);
 917         }
 918       else
 919         break;
 920     }
 921
 922   val = Qnil;
 923   if (charsets[1])
 924     val = Fcons (Qunknown, val);
 925   for (i = MAX_CHARSET; i >= MIN_CHARSET_OFFICIAL_DIMENSION1; i--)
 926     if (charsets[i])
 927       val = Fcons (CHARSET_SYMBOL (i), val);
 928   if (charsets[0])
 929     val = Fcons (Qascii, val);
 930   return val;
 931 }
 932
 933 DEFUN ("find-charset-string", Ffind_charset_string, Sfind_charset_string,
 934        1, 2, 0,
 935        doc: /* Return a list of charsets in STR.
 936 Optional arg TABLE if non-nil is a translation table to look up.
 937
 938 If the string contains invalid multibyte characters,
 939 `unknown' is included in the returned list.
 940
 941 If STR is unibyte, the returned list may contain
 942 only `ascii', `eight-bit-control', and `eight-bit-graphic'.  */)
 943      (str, table)
 944      Lisp_Object str, table;
 945 {
 946   int charsets[MAX_CHARSET + 1];
 947   int i;
 948   Lisp_Object val;
 949
 950   CHECK_STRING (str);
 951
 952   bzero (charsets, (MAX_CHARSET + 1) * sizeof (int));
 953   find_charset_in_text (SDATA (str), SCHARS (str),
 954                         SBYTES (str), charsets, table);
 955
 956   val = Qnil;
 957   if (charsets[1])
 958     val = Fcons (Qunknown, val);
 959   for (i = MAX_CHARSET; i >= MIN_CHARSET_OFFICIAL_DIMENSION1; i--)
 960     if (charsets[i])
 961       val = Fcons (CHARSET_SYMBOL (i), val);
 962   if (charsets[0])
 963     val = Fcons (Qascii, val);
 964   return val;
 965 }
 966
 967 \f
 968 DEFUN ("make-char-internal", Fmake_char_internal, Smake_char_internal, 1, 3, 0,
 969        doc: /* Return a character made from arguments.
 970 Internal use only.  */)
 971      (charset, code1, code2)
 972      Lisp_Object charset, code1, code2;
 973 {
 974   int charset_id, c1, c2;
 975
 976   CHECK_NUMBER (charset);
 977   charset_id = XINT (charset);
 978   if (!CHARSET_DEFINED_P (charset_id))
 979     error ("Invalid charset ID: %d", XINT (charset));
 980
 981   if (NILP (code1))
 982     c1 = 0;
 983   else
 984     {
 985       CHECK_NUMBER (code1);
 986       c1 = XINT (code1);
 987     }
 988   if (NILP (code2))
 989     c2 = 0;
 990   else
 991     {
 992       CHECK_NUMBER (code2);
 993       c2 = XINT (code2);
 994     }
 995
 996   if (charset_id == CHARSET_ASCII)
 997     {
 998       if (c1 < 0 || c1 > 0x7F)
 999         goto invalid_code_posints;
1000       return make_number (c1);
1001     }
1002   else if (charset_id == CHARSET_8_BIT_CONTROL)
1003     {
1004       if (NILP (code1))
1005         c1 = 0x80;
1006       else if (c1 < 0x80 || c1 > 0x9F)
1007         goto invalid_code_posints;
1008       return make_number (c1);
1009     }
1010   else if (charset_id == CHARSET_8_BIT_GRAPHIC)
1011     {
1012       if (NILP (code1))
1013         c1 = 0xA0;
1014       else if (c1 < 0xA0 || c1 > 0xFF)
1015         goto invalid_code_posints;
1016       return make_number (c1);
1017     }
1018   else if (c1 < 0 || c1 > 0xFF || c2 < 0 || c2 > 0xFF)
1019     goto invalid_code_posints;
1020   c1 &= 0x7F;
1021   c2 &= 0x7F;
1022   if (c1 == 0
1023       ? c2 != 0
1024       : (c2 == 0
1025          ? !CHAR_COMPONENTS_VALID_P (charset_id, c1, 0x20)
1026          : !CHAR_COMPONENTS_VALID_P (charset_id, c1, c2)))
1027     goto invalid_code_posints;
1028   return make_number (MAKE_CHAR (charset_id, c1, c2));
1029
1030  invalid_code_posints:
1031   error ("Invalid code points for charset ID %d: %d %d", charset_id, c1, c2);
1032 }
1033
1034 DEFUN ("split-char", Fsplit_char, Ssplit_char, 1, 1, 0,
1035        doc: /* Return list of charset and one or two position-codes of CH.
1036 If CH is invalid as a character code,
1037 return a list of symbol `unknown' and CH.  */)
1038      (ch)
1039      Lisp_Object ch;
1040 {
1041   int c, charset, c1, c2;
1042
1043   CHECK_NUMBER (ch);
1044   c = XFASTINT (ch);
1045   if (!CHAR_VALID_P (c, 1))
1046     return Fcons (Qunknown, Fcons (ch, Qnil));
1047   SPLIT_CHAR (XFASTINT (ch), charset, c1, c2);
1048   return (c2 >= 0
1049           ? Fcons (CHARSET_SYMBOL (charset),
1050                    Fcons (make_number (c1), Fcons (make_number (c2), Qnil)))
1051           : Fcons (CHARSET_SYMBOL (charset), Fcons (make_number (c1), Qnil)));
1052 }
1053
1054 DEFUN ("char-charset", Fchar_charset, Schar_charset, 1, 1, 0,
1055        doc: /* Return charset of CH.  */)
1056      (ch)
1057      Lisp_Object ch;
1058 {
1059   CHECK_NUMBER (ch);
1060
1061   return CHARSET_SYMBOL (CHAR_CHARSET (XINT (ch)));
1062 }
1063
1064 DEFUN ("charset-after", Fcharset_after, Scharset_after, 0, 1, 0,
1065        doc: /* Return charset of a character in the current buffer at position POS.
1066 If POS is nil, it defauls to the current point.
1067 If POS is out of range, the value is nil.  */)
1068      (pos)
1069      Lisp_Object pos;
1070 {
1071   Lisp_Object ch;
1072   int charset;
1073
1074   ch = Fchar_after (pos);
1075   if (! INTEGERP (ch))
1076     return ch;
1077   charset = CHAR_CHARSET (XINT (ch));
1078   return CHARSET_SYMBOL (charset);
1079 }
1080
1081 DEFUN ("iso-charset", Fiso_charset, Siso_charset, 3, 3, 0,
1082        doc: /* Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR.
1083
1084 ISO 2022's designation sequence (escape sequence) distinguishes charsets
1085 by their DIMENSION, CHARS, and FINAL-CHAR,
1086 where as Emacs distinguishes them by charset symbol.
1087 See the documentation of the function `charset-info' for the meanings of
1088 DIMENSION, CHARS, and FINAL-CHAR.  */)
1089      (dimension, chars, final_char)
1090      Lisp_Object dimension, chars, final_char;
1091 {
1092   int charset;
1093
1094   CHECK_NUMBER (dimension);
1095   CHECK_NUMBER (chars);
1096   CHECK_NUMBER (final_char);
1097
1098   if ((charset = ISO_CHARSET_TABLE (dimension, chars, final_char)) < 0)
1099     return Qnil;
1100   return CHARSET_SYMBOL (charset);
1101 }
1102
1103 /* If GENERICP is nonzero, return nonzero iff C is a valid normal or
1104    generic character.  If GENERICP is zero, return nonzero iff C is a
1105    valid normal character.  Do not call this function directly,
1106    instead use macro CHAR_VALID_P.  */
1107 int
1108 char_valid_p (c, genericp)
1109      int c, genericp;
1110 {
1111   int charset, c1, c2;
1112
1113   if (c < 0 || c >= MAX_CHAR)
1114     return 0;
1115   if (SINGLE_BYTE_CHAR_P (c))
1116     return 1;
1117   SPLIT_CHAR (c, charset, c1, c2);
1118   if (genericp)
1119     {
1120       if (c1)
1121         {
1122           if (c2 <= 0) c2 = 0x20;
1123         }
1124       else
1125         {
1126           if (c2 <= 0) c1 = c2 = 0x20;
1127         }
1128     }
1129   return (CHARSET_DEFINED_P (charset)
1130           && CHAR_COMPONENTS_VALID_P (charset, c1, c2));
1131 }
1132
1133 DEFUN ("char-valid-p", Fchar_valid_p, Schar_valid_p, 1, 2, 0,
1134        doc: /* Return t if OBJECT is a valid normal character.
1135 If optional arg GENERICP is non-nil, also return t if OBJECT is
1136 a valid generic character.  */)
1137      (object, genericp)
1138      Lisp_Object object, genericp;
1139 {
1140   if (! NATNUMP (object))
1141     return Qnil;
1142   return (CHAR_VALID_P (XFASTINT (object), !NILP (genericp)) ? Qt : Qnil);
1143 }
1144
1145 DEFUN ("unibyte-char-to-multibyte", Funibyte_char_to_multibyte,
1146        Sunibyte_char_to_multibyte, 1, 1, 0,
1147        doc: /* Convert the unibyte character CH to multibyte character.
1148 The conversion is done based on `nonascii-translation-table' (which see)
1149  or `nonascii-insert-offset' (which see).  */)
1150      (ch)
1151      Lisp_Object ch;
1152 {
1153   int c;
1154
1155   CHECK_NUMBER (ch);
1156   c = XINT (ch);
1157   if (c < 0 || c >= 0400)
1158     error ("Invalid unibyte character: %d", c);
1159   c = unibyte_char_to_multibyte (c);
1160   if (c < 0)
1161     error ("Can't convert to multibyte character: %d", XINT (ch));
1162   return make_number (c);
1163 }
1164
1165 DEFUN ("multibyte-char-to-unibyte", Fmultibyte_char_to_unibyte,
1166        Smultibyte_char_to_unibyte, 1, 1, 0,
1167        doc: /* Convert the multibyte character CH to unibyte character.
1168 The conversion is done based on `nonascii-translation-table' (which see)
1169  or `nonascii-insert-offset' (which see).  */)
1170      (ch)
1171      Lisp_Object ch;
1172 {
1173   int c;
1174
1175   CHECK_NUMBER (ch);
1176   c = XINT (ch);
1177   if (! CHAR_VALID_P (c, 0))
1178     error ("Invalid multibyte character: %d", c);
1179   c = multibyte_char_to_unibyte (c, Qnil);
1180   if (c < 0)
1181     error ("Can't convert to unibyte character: %d", XINT (ch));
1182   return make_number (c);
1183 }
1184
1185 DEFUN ("char-bytes", Fchar_bytes, Schar_bytes, 1, 1, 0,
1186        doc: /* Return 1 regardless of the argument CH.  */)
1187      (ch)
1188      Lisp_Object ch;
1189 {
1190   CHECK_NUMBER (ch);
1191   return make_number (1);
1192 }
1193
1194 /* Return how many bytes C will occupy in a multibyte buffer.
1195    Don't call this function directly, instead use macro CHAR_BYTES.  */
1196 int
1197 char_bytes (c)
1198      int c;
1199 {
1200   int charset;
1201
1202   if (ASCII_BYTE_P (c) || (c & ~((1 << CHARACTERBITS) -1)))
1203     return 1;
1204   if (SINGLE_BYTE_CHAR_P (c) && c >= 0xA0)
1205     return 1;
1206
1207   charset = CHAR_CHARSET (c);
1208   return (CHARSET_DEFINED_P (charset) ? CHARSET_BYTES (charset) : 1);
1209 }
1210
1211 /* Return the width of character of which multi-byte form starts with
1212    C.  The width is measured by how many columns occupied on the
1213    screen when displayed in the current buffer.  */
1214
1215 #define ONE_BYTE_CHAR_WIDTH(c)                                          \
1216   (c < 0x20                                                             \
1217    ? (c == '\t'                                                         \
1218       ? XFASTINT (current_buffer->tab_width)                            \
1219       : (c == '\n' ? 0 : (NILP (current_buffer->ctl_arrow) ? 4 : 2)))   \
1220    : (c < 0x7f                                                          \
1221       ? 1                                                               \
1222       : (c == 0x7F                                                      \
1223          ? (NILP (current_buffer->ctl_arrow) ? 4 : 2)                   \
1224          : ((! NILP (current_buffer->enable_multibyte_characters)       \
1225              && BASE_LEADING_CODE_P (c))                                \
1226             ? WIDTH_BY_CHAR_HEAD (c)                                    \
1227             : 4))))
1228
1229 DEFUN ("char-width", Fchar_width, Schar_width, 1, 1, 0,
1230        doc: /* Return width of CH when displayed in the current buffer.
1231 The width is measured by how many columns it occupies on the screen.
1232 Tab is taken to occupy `tab-width' columns.  */)
1233      (ch)
1234      Lisp_Object ch;
1235 {
1236   Lisp_Object val, disp;
1237   int c;
1238   struct Lisp_Char_Table *dp = buffer_display_table ();
1239
1240   CHECK_NUMBER (ch);
1241
1242   c = XINT (ch);
1243
1244   /* Get the way the display table would display it.  */
1245   disp = dp ? DISP_CHAR_VECTOR (dp, c) : Qnil;
1246
1247   if (VECTORP (disp))
1248     XSETINT (val, XVECTOR (disp)->size);
1249   else if (SINGLE_BYTE_CHAR_P (c))
1250     XSETINT (val, ONE_BYTE_CHAR_WIDTH (c));
1251   else
1252     {
1253       int charset = CHAR_CHARSET (c);
1254
1255       XSETFASTINT (val, CHARSET_WIDTH (charset));
1256     }
1257   return val;
1258 }
1259
1260 /* Return width of string STR of length LEN when displayed in the
1261    current buffer.  The width is measured by how many columns it
1262    occupies on the screen.  */
1263
1264 int
1265 strwidth (str, len)
1266      unsigned char *str;
1267      int len;
1268 {
1269   return c_string_width (str, len, -1, NULL, NULL);
1270 }
1271
1272 /* Return width of string STR of length LEN when displayed in the
1273    current buffer.  The width is measured by how many columns it
1274    occupies on the screen.  If PRECISION > 0, return the width of
1275    longest substring that doesn't exceed PRECISION, and set number of
1276    characters and bytes of the substring in *NCHARS and *NBYTES
1277    respectively.  */
1278
1279 int
1280 c_string_width (str, len, precision, nchars, nbytes)
1281      const unsigned char *str;
1282      int len, precision, *nchars, *nbytes;
1283 {
1284   int i = 0, i_byte = 0;
1285   int width = 0;
1286   int chars;
1287   struct Lisp_Char_Table *dp = buffer_display_table ();
1288
1289   while (i_byte < len)
1290     {
1291       int bytes, thiswidth;
1292       Lisp_Object val;
1293
1294       if (dp)
1295         {
1296           int c = STRING_CHAR_AND_LENGTH (str + i_byte, len - i_byte, bytes);
1297
1298           chars = 1;
1299           val = DISP_CHAR_VECTOR (dp, c);
1300           if (VECTORP (val))
1301             thiswidth = XVECTOR (val)->size;
1302           else
1303             thiswidth = ONE_BYTE_CHAR_WIDTH (str[i_byte]);
1304         }
1305       else
1306         {
1307           chars = 1;
1308           PARSE_MULTIBYTE_SEQ (str + i_byte, len - i_byte, bytes);
1309           thiswidth = ONE_BYTE_CHAR_WIDTH (str[i_byte]);
1310         }
1311
1312       if (precision > 0
1313           && (width + thiswidth > precision))
1314         {
1315           *nchars = i;
1316           *nbytes = i_byte;
1317           return width;
1318         }
1319       i++;
1320       i_byte += bytes;
1321       width += thiswidth;
1322   }
1323
1324   if (precision > 0)
1325     {
1326       *nchars = i;
1327       *nbytes = i_byte;
1328     }
1329
1330   return width;
1331 }
1332
1333 /* Return width of Lisp string STRING when displayed in the current
1334    buffer.  The width is measured by how many columns it occupies on
1335    the screen while paying attention to compositions.  If PRECISION >
1336    0, return the width of longest substring that doesn't exceed
1337    PRECISION, and set number of characters and bytes of the substring
1338    in *NCHARS and *NBYTES respectively.  */
1339
1340 int
1341 lisp_string_width (string, precision, nchars, nbytes)
1342      Lisp_Object string;
1343      int precision, *nchars, *nbytes;
1344 {
1345   int len = SCHARS (string);
1346   int len_byte = SBYTES (string);
1347   const unsigned char *str = SDATA (string);
1348   int i = 0, i_byte = 0;
1349   int width = 0;
1350   struct Lisp_Char_Table *dp = buffer_display_table ();
1351
1352   while (i < len)
1353     {
1354       int chars, bytes, thiswidth;
1355       Lisp_Object val;
1356       int cmp_id;
1357       int ignore, end;
1358
1359       if (find_composition (i, -1, &ignore, &end, &val, string)
1360           && ((cmp_id = get_composition_id (i, i_byte, end - i, val, string))
1361               >= 0))
1362         {
1363           thiswidth = composition_table[cmp_id]->width;
1364           chars = end - i;
1365           bytes = string_char_to_byte (string, end) - i_byte;
1366         }
1367       else if (dp)
1368         {
1369           int c = STRING_CHAR_AND_LENGTH (str + i_byte, len - i_byte, bytes);
1370
1371           chars = 1;
1372           val = DISP_CHAR_VECTOR (dp, c);
1373           if (VECTORP (val))
1374             thiswidth = XVECTOR (val)->size;
1375           else
1376             thiswidth = ONE_BYTE_CHAR_WIDTH (str[i_byte]);
1377         }
1378       else
1379         {
1380           chars = 1;
1381           PARSE_MULTIBYTE_SEQ (str + i_byte, len_byte - i_byte, bytes);
1382           thiswidth = ONE_BYTE_CHAR_WIDTH (str[i_byte]);
1383         }
1384
1385       if (precision > 0
1386           && (width + thiswidth > precision))
1387         {
1388           *nchars = i;
1389           *nbytes = i_byte;
1390           return width;
1391         }
1392       i += chars;
1393       i_byte += bytes;
1394       width += thiswidth;
1395   }
1396
1397   if (precision > 0)
1398     {
1399       *nchars = i;
1400       *nbytes = i_byte;
1401     }
1402
1403   return width;
1404 }
1405
1406 DEFUN ("string-width", Fstring_width, Sstring_width, 1, 1, 0,
1407        doc: /* Return width of STRING when displayed in the current buffer.
1408 Width is measured by how many columns it occupies on the screen.
1409 When calculating width of a multibyte character in STRING,
1410 only the base leading-code is considered; the validity of
1411 the following bytes is not checked.  Tabs in STRING are always
1412 taken to occupy `tab-width' columns.  */)
1413      (string)
1414      Lisp_Object string;
1415 {
1416   Lisp_Object val;
1417
1418   CHECK_STRING (string);
1419   XSETFASTINT (val, lisp_string_width (string, -1, NULL, NULL));
1420   return val;
1421 }
1422
1423 DEFUN ("char-direction", Fchar_direction, Schar_direction, 1, 1, 0,
1424        doc: /* Return the direction of CH.
1425 The returned value is 0 for left-to-right and 1 for right-to-left.  */)
1426      (ch)
1427      Lisp_Object ch;
1428 {
1429   int charset;
1430
1431   CHECK_NUMBER (ch);
1432   charset = CHAR_CHARSET (XFASTINT (ch));
1433   if (!CHARSET_DEFINED_P (charset))
1434     invalid_character (XINT (ch));
1435   return CHARSET_TABLE_INFO (charset, CHARSET_DIRECTION_IDX);
1436 }
1437
1438 /* Return the number of characters in the NBYTES bytes at PTR.
1439    This works by looking at the contents and checking for multibyte sequences.
1440    However, if the current buffer has enable-multibyte-characters = nil,
1441    we treat each byte as a character.  */
1442
1443 int
1444 chars_in_text (ptr, nbytes)
1445      const unsigned char *ptr;
1446      int nbytes;
1447 {
1448   /* current_buffer is null at early stages of Emacs initialization.  */
1449   if (current_buffer == 0
1450       || NILP (current_buffer->enable_multibyte_characters))
1451     return nbytes;
1452
1453   return multibyte_chars_in_text (ptr, nbytes);
1454 }
1455
1456 /* Return the number of characters in the NBYTES bytes at PTR.
1457    This works by looking at the contents and checking for multibyte sequences.
1458    It ignores enable-multibyte-characters.  */
1459
1460 int
1461 multibyte_chars_in_text (ptr, nbytes)
1462      const unsigned char *ptr;
1463      int nbytes;
1464 {
1465   const unsigned char *endp;
1466   int chars, bytes;
1467
1468   endp = ptr + nbytes;
1469   chars = 0;
1470
1471   while (ptr < endp)
1472     {
1473       PARSE_MULTIBYTE_SEQ (ptr, endp - ptr, bytes);
1474       ptr += bytes;
1475       chars++;
1476     }
1477
1478   return chars;
1479 }
1480
1481 /* Parse unibyte text at STR of LEN bytes as multibyte text, and
1482    count the numbers of characters and bytes in it.  On counting
1483    bytes, pay attention to the fact that 8-bit characters in the range
1484    0x80..0x9F are represented by 2 bytes in multibyte text.  */
1485 void
1486 parse_str_as_multibyte (str, len, nchars, nbytes)
1487      const unsigned char *str;
1488      int len, *nchars, *nbytes;
1489 {
1490   const unsigned char *endp = str + len;
1491   int n, chars = 0, bytes = 0;
1492
1493   while (str < endp)
1494     {
1495       if (UNIBYTE_STR_AS_MULTIBYTE_P (str, endp - str, n))
1496         str += n, bytes += n;
1497       else
1498         str++, bytes += 2;
1499       chars++;
1500     }
1501   *nchars = chars;
1502   *nbytes = bytes;
1503   return;
1504 }
1505
1506 /* Arrange unibyte text at STR of NBYTES bytes as multibyte text.
1507    It actually converts only 8-bit characters in the range 0x80..0x9F
1508    that don't contruct multibyte characters to multibyte forms.  If
1509    NCHARS is nonzero, set *NCHARS to the number of characters in the
1510    text.  It is assured that we can use LEN bytes at STR as a work
1511    area and that is enough.  Return the number of bytes of the
1512    resulting text.  */
1513
1514 int
1515 str_as_multibyte (str, len, nbytes, nchars)
1516      unsigned char *str;
1517      int len, nbytes, *nchars;
1518 {
1519   unsigned char *p = str, *endp = str + nbytes;
1520   unsigned char *to;
1521   int chars = 0;
1522   int n;
1523
1524   while (p < endp && UNIBYTE_STR_AS_MULTIBYTE_P (p, endp - p, n))
1525     p += n, chars++;
1526   if (nchars)
1527     *nchars = chars;
1528   if (p == endp)
1529     return nbytes;
1530
1531   to = p;
1532   nbytes = endp - p;
1533   endp = str + len;
1534   safe_bcopy (p, endp - nbytes, nbytes);
1535   p = endp - nbytes;
1536   while (p < endp)
1537     {
1538       if (UNIBYTE_STR_AS_MULTIBYTE_P (p, endp - p, n))
1539         {
1540           while (n--)
1541             *to++ = *p++;
1542         }
1543       else
1544         {
1545           *to++ = LEADING_CODE_8_BIT_CONTROL;
1546           *to++ = *p++ + 0x20;
1547         }
1548       chars++;
1549     }
1550   if (nchars)
1551     *nchars = chars;
1552   return (to - str);
1553 }
1554
1555 /* Parse unibyte string at STR of LEN bytes, and return the number of
1556    bytes it may ocupy when converted to multibyte string by
1557    `str_to_multibyte'.  */
1558
1559 int
1560 parse_str_to_multibyte (str, len)
1561      unsigned char *str;
1562      int len;
1563 {
1564   unsigned char *endp = str + len;
1565   int bytes;
1566
1567   for (bytes = 0; str < endp; str++)
1568     bytes += (*str < 0x80 || *str >= 0xA0) ? 1 : 2;
1569   return bytes;
1570 }
1571
1572 /* Convert unibyte text at STR of NBYTES bytes to multibyte text
1573    that contains the same single-byte characters.  It actually
1574    converts all 8-bit characters to multibyte forms.  It is assured
1575    that we can use LEN bytes at STR as a work area and that is
1576    enough.  */
1577
1578 int
1579 str_to_multibyte (str, len, bytes)
1580      unsigned char *str;
1581      int len, bytes;
1582 {
1583   unsigned char *p = str, *endp = str + bytes;
1584   unsigned char *to;
1585
1586   while (p < endp && (*p < 0x80 || *p >= 0xA0)) p++;
1587   if (p == endp)
1588     return bytes;
1589   to = p;
1590   bytes = endp - p;
1591   endp = str + len;
1592   safe_bcopy (p, endp - bytes, bytes);
1593   p = endp - bytes;
1594   while (p < endp)
1595     {
1596       if (*p < 0x80 || *p >= 0xA0)
1597         *to++ = *p++;
1598       else
1599         *to++ = LEADING_CODE_8_BIT_CONTROL, *to++ = *p++ + 0x20;
1600     }
1601   return (to - str);
1602 }
1603
1604 /* Arrange multibyte text at STR of LEN bytes as a unibyte text.  It
1605    actually converts only 8-bit characters in the range 0x80..0x9F to
1606    unibyte forms.  */
1607
1608 int
1609 str_as_unibyte (str, bytes)
1610      unsigned char *str;
1611      int bytes;
1612 {
1613   unsigned char *p = str, *endp = str + bytes;
1614   unsigned char *to = str;
1615
1616   while (p < endp && *p != LEADING_CODE_8_BIT_CONTROL) p++;
1617   to = p;
1618   while (p < endp)
1619     {
1620       if (*p == LEADING_CODE_8_BIT_CONTROL)
1621         *to++ = *(p + 1) - 0x20, p += 2;
1622       else
1623         *to++ = *p++;
1624     }
1625   return (to - str);
1626 }
1627
1628 \f
1629 DEFUN ("string", Fstring, Sstring, 0, MANY, 0,
1630   doc: /* Concatenate all the argument characters and make the result a string.
1631 usage: (string &rest CHARACTERS)  */)
1632      (n, args)
1633      int n;
1634      Lisp_Object *args;
1635 {
1636   int i, bufsize;
1637   unsigned char *buf, *p;
1638   int c;
1639   int multibyte = 0;
1640   Lisp_Object ret;
1641   USE_SAFE_ALLOCA;
1642
1643   bufsize = MAX_MULTIBYTE_LENGTH * n;
1644   SAFE_ALLOCA (buf, unsigned char *, bufsize);
1645   p = buf;
1646
1647   for (i = 0; i < n; i++)
1648     {
1649       CHECK_NUMBER (args[i]);
1650       if (!multibyte && !SINGLE_BYTE_CHAR_P (XFASTINT (args[i])))
1651         multibyte = 1;
1652     }
1653
1654   for (i = 0; i < n; i++)
1655     {
1656       c = XINT (args[i]);
1657       if (multibyte)
1658         p += CHAR_STRING (c, p);
1659       else
1660         *p++ = c;
1661     }
1662
1663   ret = make_string_from_bytes (buf, n, p - buf);
1664   SAFE_FREE ();
1665
1666   return ret;
1667 }
1668
1669 #endif /* emacs */
1670 \f
1671 int
1672 charset_id_internal (charset_name)
1673      char *charset_name;
1674 {
1675   Lisp_Object val;
1676
1677   val= Fget (intern (charset_name), Qcharset);
1678   if (!VECTORP (val))
1679     error ("Charset %s is not defined", charset_name);
1680
1681   return (XINT (XVECTOR (val)->contents[0]));
1682 }
1683
1684 DEFUN ("setup-special-charsets", Fsetup_special_charsets,
1685        Ssetup_special_charsets, 0, 0, 0, doc: /* Internal use only.  */)
1686      ()
1687 {
1688   charset_latin_iso8859_1 = charset_id_internal ("latin-iso8859-1");
1689   charset_jisx0208_1978 = charset_id_internal ("japanese-jisx0208-1978");
1690   charset_jisx0208 = charset_id_internal ("japanese-jisx0208");
1691   charset_katakana_jisx0201 = charset_id_internal ("katakana-jisx0201");
1692   charset_latin_jisx0201 = charset_id_internal ("latin-jisx0201");
1693   charset_big5_1 = charset_id_internal ("chinese-big5-1");
1694   charset_big5_2 = charset_id_internal ("chinese-big5-2");
1695   charset_mule_unicode_0100_24ff
1696     = charset_id_internal ("mule-unicode-0100-24ff");
1697   charset_mule_unicode_2500_33ff
1698     = charset_id_internal ("mule-unicode-2500-33ff");
1699   charset_mule_unicode_e000_ffff
1700     = charset_id_internal ("mule-unicode-e000-ffff");
1701   return Qnil;
1702 }
1703
1704 void
1705 init_charset_once ()
1706 {
1707   int i, j, k;
1708
1709   staticpro (&Vcharset_table);
1710   staticpro (&Vcharset_symbol_table);
1711   staticpro (&Vgeneric_character_list);
1712
1713   /* This has to be done here, before we call Fmake_char_table.  */
1714   Qcharset_table = intern ("charset-table");
1715   staticpro (&Qcharset_table);
1716
1717   /* Intern this now in case it isn't already done.
1718      Setting this variable twice is harmless.
1719      But don't staticpro it here--that is done in alloc.c.  */
1720   Qchar_table_extra_slots = intern ("char-table-extra-slots");
1721
1722   /* Now we are ready to set up this property, so we can
1723      create the charset table.  */
1724   Fput (Qcharset_table, Qchar_table_extra_slots, make_number (0));
1725   Vcharset_table = Fmake_char_table (Qcharset_table, Qnil);
1726
1727   Qunknown = intern ("unknown");
1728   staticpro (&Qunknown);
1729   Vcharset_symbol_table = Fmake_vector (make_number (MAX_CHARSET + 1),
1730                                         Qunknown);
1731
1732   /* Setup tables.  */
1733   for (i = 0; i < 2; i++)
1734     for (j = 0; j < 2; j++)
1735       for (k = 0; k < 128; k++)
1736         iso_charset_table [i][j][k] = -1;
1737
1738   for (i = 0; i < 256; i++)
1739     bytes_by_char_head[i] = 1;
1740   bytes_by_char_head[LEADING_CODE_PRIVATE_11] = 3;
1741   bytes_by_char_head[LEADING_CODE_PRIVATE_12] = 3;
1742   bytes_by_char_head[LEADING_CODE_PRIVATE_21] = 4;
1743   bytes_by_char_head[LEADING_CODE_PRIVATE_22] = 4;
1744
1745   for (i = 0; i < 128; i++)
1746     width_by_char_head[i] = 1;
1747   for (; i < 256; i++)
1748     width_by_char_head[i] = 4;
1749   width_by_char_head[LEADING_CODE_PRIVATE_11] = 1;
1750   width_by_char_head[LEADING_CODE_PRIVATE_12] = 2;
1751   width_by_char_head[LEADING_CODE_PRIVATE_21] = 1;
1752   width_by_char_head[LEADING_CODE_PRIVATE_22] = 2;
1753
1754   {
1755     Lisp_Object val;
1756
1757     val = Qnil;
1758     for (i = 0x81; i < 0x90; i++)
1759       val = Fcons (make_number ((i - 0x70) << 7), val);
1760     for (; i < 0x9A; i++)
1761       val = Fcons (make_number ((i - 0x8F) << 14), val);
1762     for (i = 0xA0; i < 0xF0; i++)
1763       val = Fcons (make_number ((i - 0x70) << 7), val);
1764     for (; i < 0xFF; i++)
1765       val = Fcons (make_number ((i - 0xE0) << 14), val);
1766     Vgeneric_character_list = Fnreverse (val);
1767   }
1768
1769   nonascii_insert_offset = 0;
1770   Vnonascii_translation_table = Qnil;
1771 }
1772
1773 #ifdef emacs
1774
1775 void
1776 syms_of_charset ()
1777 {
1778   Qcharset = intern ("charset");
1779   staticpro (&Qcharset);
1780
1781   Qascii = intern ("ascii");
1782   staticpro (&Qascii);
1783
1784   Qeight_bit_control = intern ("eight-bit-control");
1785   staticpro (&Qeight_bit_control);
1786
1787   Qeight_bit_graphic = intern ("eight-bit-graphic");
1788   staticpro (&Qeight_bit_graphic);
1789
1790   /* Define special charsets ascii, eight-bit-control, and
1791      eight-bit-graphic.  */
1792   update_charset_table (make_number (CHARSET_ASCII),
1793                         make_number (1), make_number (94),
1794                         make_number (1),
1795                         make_number (0),
1796                         make_number ('B'),
1797                         make_number (0),
1798                         build_string ("ASCII"),
1799                         Qnil,   /* same as above */
1800                         build_string ("ASCII (ISO646 IRV)"));
1801   CHARSET_SYMBOL (CHARSET_ASCII) = Qascii;
1802   Fput (Qascii, Qcharset, CHARSET_TABLE_ENTRY (CHARSET_ASCII));
1803
1804   update_charset_table (make_number (CHARSET_8_BIT_CONTROL),
1805                         make_number (1), make_number (96),
1806                         make_number (4),
1807                         make_number (0),
1808                         make_number (-1),
1809                         make_number (-1),
1810                         build_string ("8-bit control code (0x80..0x9F)"),
1811                         Qnil,   /* same as above */
1812                         Qnil);  /* same as above */
1813   CHARSET_SYMBOL (CHARSET_8_BIT_CONTROL) = Qeight_bit_control;
1814   Fput (Qeight_bit_control, Qcharset,
1815         CHARSET_TABLE_ENTRY (CHARSET_8_BIT_CONTROL));
1816
1817   update_charset_table (make_number (CHARSET_8_BIT_GRAPHIC),
1818                         make_number (1), make_number (96),
1819                         make_number (4),
1820                         make_number (0),
1821                         make_number (-1),
1822                         make_number (-1),
1823                         build_string ("8-bit graphic char (0xA0..0xFF)"),
1824                         Qnil,   /* same as above */
1825                         Qnil);  /* same as above */
1826   CHARSET_SYMBOL (CHARSET_8_BIT_GRAPHIC) = Qeight_bit_graphic;
1827   Fput (Qeight_bit_graphic, Qcharset,
1828         CHARSET_TABLE_ENTRY (CHARSET_8_BIT_GRAPHIC));
1829
1830   Qauto_fill_chars = intern ("auto-fill-chars");
1831   staticpro (&Qauto_fill_chars);
1832   Fput (Qauto_fill_chars, Qchar_table_extra_slots, make_number (0));
1833
1834   defsubr (&Sdefine_charset);
1835   defsubr (&Sgeneric_character_list);
1836   defsubr (&Sget_unused_iso_final_char);
1837   defsubr (&Sdeclare_equiv_charset);
1838   defsubr (&Sfind_charset_region);
1839   defsubr (&Sfind_charset_string);
1840   defsubr (&Smake_char_internal);
1841   defsubr (&Ssplit_char);
1842   defsubr (&Schar_charset);
1843   defsubr (&Scharset_after);
1844   defsubr (&Siso_charset);
1845   defsubr (&Schar_valid_p);
1846   defsubr (&Sunibyte_char_to_multibyte);
1847   defsubr (&Smultibyte_char_to_unibyte);
1848   defsubr (&Schar_bytes);
1849   defsubr (&Schar_width);
1850   defsubr (&Sstring_width);
1851   defsubr (&Schar_direction);
1852   defsubr (&Sstring);
1853   defsubr (&Ssetup_special_charsets);
1854
1855   DEFVAR_LISP ("charset-list", &Vcharset_list,
1856                doc: /* List of charsets ever defined.  */);
1857   Vcharset_list = Fcons (Qascii, Fcons (Qeight_bit_control,
1858                                         Fcons (Qeight_bit_graphic, Qnil)));
1859
1860   DEFVAR_LISP ("translation-table-vector",  &Vtranslation_table_vector,
1861                doc: /* Vector of cons cell of a symbol and translation table ever defined.
1862 An ID of a translation table is an index of this vector.  */);
1863   Vtranslation_table_vector = Fmake_vector (make_number (16), Qnil);
1864
1865   DEFVAR_INT ("leading-code-private-11", &leading_code_private_11,
1866               doc: /* Leading-code of private TYPE9N charset of column-width 1.  */);
1867   leading_code_private_11 = LEADING_CODE_PRIVATE_11;
1868
1869   DEFVAR_INT ("leading-code-private-12", &leading_code_private_12,
1870               doc: /* Leading-code of private TYPE9N charset of column-width 2.  */);
1871   leading_code_private_12 = LEADING_CODE_PRIVATE_12;
1872
1873   DEFVAR_INT ("leading-code-private-21", &leading_code_private_21,
1874               doc: /* Leading-code of private TYPE9Nx9N charset of column-width 1.  */);
1875   leading_code_private_21 = LEADING_CODE_PRIVATE_21;
1876
1877   DEFVAR_INT ("leading-code-private-22", &leading_code_private_22,
1878               doc: /* Leading-code of private TYPE9Nx9N charset of column-width 2.  */);
1879   leading_code_private_22 = LEADING_CODE_PRIVATE_22;
1880
1881   DEFVAR_INT ("nonascii-insert-offset", &nonascii_insert_offset,
1882               doc: /* Offset for converting non-ASCII unibyte codes 0240...0377 to multibyte.
1883 This is used for converting unibyte text to multibyte,
1884 and for inserting character codes specified by number.
1885
1886 This serves to convert a Latin-1 or similar 8-bit character code
1887 to the corresponding Emacs multibyte character code.
1888 Typically the value should be (- (make-char CHARSET 0) 128),
1889 for your choice of character set.
1890 If `nonascii-translation-table' is non-nil, it overrides this variable.  */);
1891   nonascii_insert_offset = 0;
1892
1893   DEFVAR_LISP ("nonascii-translation-table", &Vnonascii_translation_table,
1894                doc: /* Translation table to convert non-ASCII unibyte codes to multibyte.
1895 This is used for converting unibyte text to multibyte,
1896 and for inserting character codes specified by number.
1897
1898 Conversion is performed only when multibyte characters are enabled,
1899 and it serves to convert a Latin-1 or similar 8-bit character code
1900 to the corresponding Emacs character code.
1901
1902 If this is nil, `nonascii-insert-offset' is used instead.
1903 See also the docstring of `make-translation-table'.  */);
1904   Vnonascii_translation_table = Qnil;
1905
1906   DEFVAR_LISP ("auto-fill-chars", &Vauto_fill_chars,
1907                doc: /* A char-table for characters which invoke auto-filling.
1908 Such characters have value t in this table.  */);
1909   Vauto_fill_chars = Fmake_char_table (Qauto_fill_chars, Qnil);
1910   CHAR_TABLE_SET (Vauto_fill_chars, make_number (' '), Qt);
1911   CHAR_TABLE_SET (Vauto_fill_chars, make_number ('\n'), Qt);
1912 }
1913
1914 #endif /* emacs */
1915
1916 /* arch-tag: 66a89b8d-4c28-47d3-9ca1-56f78440d69f
1917    (do not change this comment) */