src/charset.c

   1 /* Basic multilingual character support.
   2    Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
   3    Licensed to the Free Software Foundation.
   4    Copyright (C) 2001 Free Software Foundation, Inc.
   5
   6 This file is part of GNU Emacs.
   7
   8 GNU Emacs is free software; you can redistribute it and/or modify
   9 it under the terms of the GNU General Public License as published by
  10 the Free Software Foundation; either version 2, or (at your option)
  11 any later version.
  12
  13 GNU Emacs is distributed in the hope that it will be useful,
  14 but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 GNU General Public License for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GNU Emacs; see the file COPYING.  If not, write to
  20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  21 Boston, MA 02111-1307, USA.  */
  22
  23 /* At first, see the document in `charset.h' to understand the code in
  24    this file.  */
  25
  26 #ifdef emacs
  27 #include <config.h>
  28 #endif
  29
  30 #include <stdio.h>
  31
  32 #ifdef emacs
  33
  34 #include <sys/types.h>
  35 #include "lisp.h"
  36 #include "buffer.h"
  37 #include "charset.h"
  38 #include "composite.h"
  39 #include "coding.h"
  40 #include "disptab.h"
  41
  42 #else  /* not emacs */
  43
  44 #include "mulelib.h"
  45
  46 #endif /* emacs */
  47
  48 Lisp_Object Qcharset, Qascii, Qeight_bit_control, Qeight_bit_graphic;
  49 Lisp_Object Qunknown;
  50
  51 /* Declaration of special leading-codes.  */
  52 int leading_code_private_11;    /* for private DIMENSION1 of 1-column */
  53 int leading_code_private_12;    /* for private DIMENSION1 of 2-column */
  54 int leading_code_private_21;    /* for private DIMENSION2 of 1-column */
  55 int leading_code_private_22;    /* for private DIMENSION2 of 2-column */
  56
  57 /* Declaration of special charsets.  The values are set by
  58    Fsetup_special_charsets.  */
  59 int charset_latin_iso8859_1;    /* ISO8859-1 (Latin-1) */
  60 int charset_jisx0208_1978;      /* JISX0208.1978 (Japanese Kanji old set) */
  61 int charset_jisx0208;           /* JISX0208.1983 (Japanese Kanji) */
  62 int charset_katakana_jisx0201;  /* JISX0201.Kana (Japanese Katakana) */
  63 int charset_latin_jisx0201;     /* JISX0201.Roman (Japanese Roman) */
  64 int charset_big5_1;             /* Big5 Level 1 (Chinese Traditional) */
  65 int charset_big5_2;             /* Big5 Level 2 (Chinese Traditional) */
  66
  67 Lisp_Object Qcharset_table;
  68
  69 /* A char-table containing information of each character set.  */
  70 Lisp_Object Vcharset_table;
  71
  72 /* A vector of charset symbol indexed by charset-id.  This is used
  73    only for returning charset symbol from C functions.  */
  74 Lisp_Object Vcharset_symbol_table;
  75
  76 /* A list of charset symbols ever defined.  */
  77 Lisp_Object Vcharset_list;
  78
  79 /* Vector of translation table ever defined.
  80    ID of a translation table is used to index this vector.  */
  81 Lisp_Object Vtranslation_table_vector;
  82
  83 /* A char-table for characters which may invoke auto-filling.  */
  84 Lisp_Object Vauto_fill_chars;
  85
  86 Lisp_Object Qauto_fill_chars;
  87
  88 /* Tables used by macros BYTES_BY_CHAR_HEAD and WIDTH_BY_CHAR_HEAD.  */
  89 int bytes_by_char_head[256];
  90 int width_by_char_head[256];
  91
  92 /* Mapping table from ISO2022's charset (specified by DIMENSION,
  93    CHARS, and FINAL-CHAR) to Emacs' charset.  */
  94 int iso_charset_table[2][2][128];
  95
  96 /* Variables used locally in the macro FETCH_MULTIBYTE_CHAR.  */
  97 unsigned char *_fetch_multibyte_char_p;
  98 int _fetch_multibyte_char_len;
  99
 100 /* Offset to add to a non-ASCII value when inserting it.  */
 101 int nonascii_insert_offset;
 102
 103 /* Translation table for converting non-ASCII unibyte characters
 104    to multibyte codes, or nil.  */
 105 Lisp_Object Vnonascii_translation_table;
 106
 107 /* List of all possible generic characters.  */
 108 Lisp_Object Vgeneric_character_list;
 109
 110 \f
 111 void
 112 invalid_character (c)
 113      int c;
 114 {
 115   error ("Invalid character: 0%o, %d, 0x%x", c, c, c);
 116 }
 117
 118 /* Parse string STR of length LENGTH and fetch information of a
 119    character at STR.  Set BYTES to the byte length the character
 120    occupies, CHARSET, C1, C2 to proper values of the character. */
 121
 122 #define SPLIT_MULTIBYTE_SEQ(str, length, bytes, charset, c1, c2)             \
 123   do {                                                                       \
 124     (c1) = *(str);                                                           \
 125     (bytes) = BYTES_BY_CHAR_HEAD (c1);                                       \
 126     if ((bytes) == 1)                                                        \
 127       (charset) = ASCII_BYTE_P (c1) ? CHARSET_ASCII : CHARSET_8_BIT_GRAPHIC; \
 128     else if ((bytes) == 2)                                                   \
 129       {                                                                      \
 130         if ((c1) == LEADING_CODE_8_BIT_CONTROL)                              \
 131           (charset) = CHARSET_8_BIT_CONTROL, (c1) = (str)[1] - 0x20;         \
 132         else                                                                 \
 133           (charset) = (c1), (c1) = (str)[1] & 0x7F;                          \
 134       }                                                                      \
 135     else if ((bytes) == 3)                                                   \
 136       {                                                                      \
 137         if ((c1) < LEADING_CODE_PRIVATE_11)                                  \
 138           (charset) = (c1), (c1) = (str)[1] & 0x7F, (c2) = (str)[2] & 0x7F;  \
 139         else                                                                 \
 140           (charset) = (str)[1], (c1) = (str)[2] & 0x7F;                      \
 141       }                                                                      \
 142     else                                                                     \
 143       (charset) = (str)[1], (c1) = (str)[2] & 0x7F, (c2) = (str)[3] & 0x7F;  \
 144   } while (0)
 145
 146 /* 1 if CHARSET, C1, and C2 compose a valid character, else 0.  */
 147 #define CHAR_COMPONENTS_VALID_P(charset, c1, c2)        \
 148   ((charset) == CHARSET_ASCII                           \
 149    ? ((c1) >= 0 && (c1) <= 0x7F)                        \
 150    : ((charset) == CHARSET_8_BIT_CONTROL                \
 151       ? ((c1) >= 0x80 && (c1) <= 0x9F)                  \
 152       : ((charset) == CHARSET_8_BIT_GRAPHIC             \
 153          ? ((c1) >= 0x80 && (c1) <= 0xFF)               \
 154          : (CHARSET_DIMENSION (charset) == 1            \
 155             ? ((c1) >= 0x20 && (c1) <= 0x7F)            \
 156             : ((c1) >= 0x20 && (c1) <= 0x7F             \
 157                && (c2) >= 0x20 && (c2) <= 0x7F)))))
 158
 159 /* Store multi-byte form of the character C in STR.  The caller should
 160    allocate at least 4-byte area at STR in advance.  Returns the
 161    length of the multi-byte form.  If C is an invalid character code,
 162    return -1.  */
 163
 164 int
 165 char_to_string_1 (c, str)
 166      int c;
 167      unsigned char *str;
 168 {
 169   unsigned char *p = str;
 170
 171   if (c & CHAR_MODIFIER_MASK)   /* This includes the case C is negative.  */
 172     {
 173       /* Multibyte character can't have a modifier bit.  */
 174       if (! SINGLE_BYTE_CHAR_P ((c & ~CHAR_MODIFIER_MASK)))
 175         return -1;
 176
 177       /* For Meta, Shift, and Control modifiers, we need special care.  */
 178       if (c & CHAR_META)
 179         {
 180           /* Move the meta bit to the right place for a string.  */
 181           c = (c & ~CHAR_META) | 0x80;
 182         }
 183       if (c & CHAR_SHIFT)
 184         {
 185           /* Shift modifier is valid only with [A-Za-z].  */
 186           if ((c & 0377) >= 'A' && (c & 0377) <= 'Z')
 187             c &= ~CHAR_SHIFT;
 188           else if ((c & 0377) >= 'a' && (c & 0377) <= 'z')
 189             c = (c & ~CHAR_SHIFT) - ('a' - 'A');
 190         }
 191       if (c & CHAR_CTL)
 192         {
 193           /* Simulate the code in lread.c.  */
 194           /* Allow `\C- ' and `\C-?'.  */
 195           if (c == (CHAR_CTL | ' '))
 196             c = 0;
 197           else if (c == (CHAR_CTL | '?'))
 198             c = 127;
 199           /* ASCII control chars are made from letters (both cases),
 200              as well as the non-letters within 0100...0137.  */
 201           else if ((c & 0137) >= 0101 && (c & 0137) <= 0132)
 202             c &= (037 | (~0177 & ~CHAR_CTL));
 203           else if ((c & 0177) >= 0100 && (c & 0177) <= 0137)
 204             c &= (037 | (~0177 & ~CHAR_CTL));
 205         }
 206
 207       /* If C still has any modifier bits, just ignore it.  */
 208       c &= ~CHAR_MODIFIER_MASK;
 209     }
 210
 211   if (SINGLE_BYTE_CHAR_P (c))
 212     {
 213       if (ASCII_BYTE_P (c) || c >= 0xA0)
 214         *p++ = c;
 215       else
 216         {
 217           *p++ = LEADING_CODE_8_BIT_CONTROL;
 218           *p++ = c + 0x20;
 219         }
 220     }
 221   else if (CHAR_VALID_P (c, 0))
 222     {
 223       int charset, c1, c2;
 224
 225       SPLIT_CHAR (c, charset, c1, c2);
 226
 227       if (charset >= LEADING_CODE_EXT_11)
 228         *p++ = (charset < LEADING_CODE_EXT_12
 229                 ? LEADING_CODE_PRIVATE_11
 230                 : (charset < LEADING_CODE_EXT_21
 231                    ? LEADING_CODE_PRIVATE_12
 232                    : (charset < LEADING_CODE_EXT_22
 233                       ? LEADING_CODE_PRIVATE_21
 234                       : LEADING_CODE_PRIVATE_22)));
 235       *p++ = charset;
 236       if ((c1 > 0 && c1 < 32) || (c2 > 0 && c2 < 32))
 237         return -1;
 238       if (c1)
 239         {
 240           *p++ = c1 | 0x80;
 241           if (c2 > 0)
 242             *p++ = c2 | 0x80;
 243         }
 244     }
 245   else
 246     return -1;
 247
 248   return (p - str);
 249 }
 250
 251
 252 /* Store multi-byte form of the character C in STR.  The caller should
 253    allocate at least 4-byte area at STR in advance.  Returns the
 254    length of the multi-byte form.  If C is an invalid character code,
 255    signal an error.
 256
 257    Use macro `CHAR_STRING (C, STR)' instead of calling this function
 258    directly if C can be an ASCII character.  */
 259
 260 int
 261 char_to_string (c, str)
 262      int c;
 263      unsigned char *str;
 264 {
 265   int len;
 266   len = char_to_string_1 (c, str);
 267   if (len == -1)
 268     invalid_character (c);
 269   return len;
 270 }
 271
 272
 273 /* Return the non-ASCII character corresponding to multi-byte form at
 274    STR of length LEN.  If ACTUAL_LEN is not NULL, store the byte
 275    length of the multibyte form in *ACTUAL_LEN.
 276
 277    Use macros STRING_CHAR or STRING_CHAR_AND_LENGTH instead of calling
 278    this function directly if you want ot handle ASCII characters as
 279    well.  */
 280
 281 int
 282 string_to_char (str, len, actual_len)
 283      const unsigned char *str;
 284      int len, *actual_len;
 285 {
 286   int c, bytes, charset, c1, c2;
 287
 288   SPLIT_MULTIBYTE_SEQ (str, len, bytes, charset, c1, c2);
 289   c = MAKE_CHAR (charset, c1, c2);
 290   if (actual_len)
 291     *actual_len = bytes;
 292   return c;
 293 }
 294
 295 /* Return the length of the multi-byte form at string STR of length LEN.
 296    Use the macro MULTIBYTE_FORM_LENGTH instead.  */
 297 int
 298 multibyte_form_length (str, len)
 299      const unsigned char *str;
 300      int len;
 301 {
 302   int bytes;
 303
 304   PARSE_MULTIBYTE_SEQ (str, len, bytes);
 305   return bytes;
 306 }
 307
 308 /* Check multibyte form at string STR of length LEN and set variables
 309    pointed by CHARSET, C1, and C2 to charset and position codes of the
 310    character at STR, and return 0.  If there's no multibyte character,
 311    return -1.  This should be used only in the macro SPLIT_STRING
 312    which checks range of STR in advance.  */
 313
 314 int
 315 split_string (str, len, charset, c1, c2)
 316      const unsigned char *str;
 317      unsigned char *c1, *c2;
 318      int len, *charset;
 319 {
 320   register int bytes, cs, code1, code2 = -1;
 321
 322   SPLIT_MULTIBYTE_SEQ (str, len, bytes, cs, code1, code2);
 323   if (cs == CHARSET_ASCII)
 324     return -1;
 325   *charset = cs;
 326   *c1 = code1;
 327   *c2 = code2;
 328   return 0;
 329 }
 330
 331 /* Return 1 iff character C has valid printable glyph.
 332    Use the macro CHAR_PRINTABLE_P instead.  */
 333 int
 334 char_printable_p (c)
 335      int c;
 336 {
 337   int charset, c1, c2;
 338
 339   if (ASCII_BYTE_P (c))
 340     return 1;
 341   else if (SINGLE_BYTE_CHAR_P (c))
 342     return 0;
 343   else if (c >= MAX_CHAR)
 344     return 0;
 345
 346   SPLIT_CHAR (c, charset, c1, c2);
 347   if (! CHARSET_DEFINED_P (charset))
 348     return 0;
 349   if (CHARSET_CHARS (charset) == 94
 350       ? c1 <= 32 || c1 >= 127
 351       : c1 < 32)
 352     return 0;
 353   if (CHARSET_DIMENSION (charset) == 2
 354       && (CHARSET_CHARS (charset) == 94
 355           ? c2 <= 32 || c2 >= 127
 356           : c2 < 32))
 357     return 0;
 358   return 1;
 359 }
 360
 361 /* Translate character C by translation table TABLE.  If C
 362    is negative, translate a character specified by CHARSET, C1, and C2
 363    (C1 and C2 are code points of the character).  If no translation is
 364    found in TABLE, return C.  */
 365 int
 366 translate_char (table, c, charset, c1, c2)
 367      Lisp_Object table;
 368      int c, charset, c1, c2;
 369 {
 370   Lisp_Object ch;
 371   int alt_charset, alt_c1, alt_c2, dimension;
 372
 373   if (c < 0) c = MAKE_CHAR (charset, (c1 & 0x7F) , (c2 & 0x7F));
 374   if (!CHAR_TABLE_P (table)
 375       || (ch = Faref (table, make_number (c)), !NATNUMP (ch)))
 376     return c;
 377
 378   SPLIT_CHAR (XFASTINT (ch), alt_charset, alt_c1, alt_c2);
 379   dimension = CHARSET_DIMENSION (alt_charset);
 380   if ((dimension == 1 && alt_c1 > 0) || (dimension == 2 && alt_c2 > 0))
 381     /* CH is not a generic character, just return it.  */
 382     return XFASTINT (ch);
 383
 384   /* Since CH is a generic character, we must return a specific
 385      charater which has the same position codes as C from CH.  */
 386   if (charset < 0)
 387     SPLIT_CHAR (c, charset, c1, c2);
 388   if (dimension != CHARSET_DIMENSION (charset))
 389     /* We can't make such a character because of dimension mismatch.  */
 390     return c;
 391   return MAKE_CHAR (alt_charset, c1, c2);
 392 }
 393
 394 /* Convert the unibyte character C to multibyte based on
 395    Vnonascii_translation_table or nonascii_insert_offset.  If they can't
 396    convert C to a valid multibyte character, convert it based on
 397    DEFAULT_NONASCII_INSERT_OFFSET which makes C a Latin-1 character.  */
 398
 399 int
 400 unibyte_char_to_multibyte (c)
 401      int c;
 402 {
 403   if (c < 0400 && c >= 0200)
 404     {
 405       int c_save = c;
 406
 407       if (! NILP (Vnonascii_translation_table))
 408         {
 409           c = XINT (Faref (Vnonascii_translation_table, make_number (c)));
 410           if (c >= 0400 && ! char_valid_p (c, 0))
 411             c = c_save + DEFAULT_NONASCII_INSERT_OFFSET;
 412         }
 413       else if (c >= 0240 && nonascii_insert_offset > 0)
 414         {
 415           c += nonascii_insert_offset;
 416           if (c < 0400 || ! char_valid_p (c, 0))
 417             c = c_save + DEFAULT_NONASCII_INSERT_OFFSET;
 418         }
 419       else if (c >= 0240)
 420         c = c_save + DEFAULT_NONASCII_INSERT_OFFSET;
 421     }
 422   return c;
 423 }
 424
 425
 426 /* Convert the multibyte character C to unibyte 8-bit character based
 427    on Vnonascii_translation_table or nonascii_insert_offset.  If
 428    REV_TBL is non-nil, it should be a reverse table of
 429    Vnonascii_translation_table, i.e. what given by:
 430      Fchar_table_extra_slot (Vnonascii_translation_table, make_number (0))  */
 431
 432 int
 433 multibyte_char_to_unibyte (c, rev_tbl)
 434      int c;
 435      Lisp_Object rev_tbl;
 436 {
 437   if (!SINGLE_BYTE_CHAR_P (c))
 438     {
 439       int c_save = c;
 440
 441       if (! CHAR_TABLE_P (rev_tbl)
 442           && CHAR_TABLE_P (Vnonascii_translation_table))
 443         rev_tbl = Fchar_table_extra_slot (Vnonascii_translation_table,
 444                                           make_number (0));
 445       if (CHAR_TABLE_P (rev_tbl))
 446         {
 447           Lisp_Object temp;
 448           temp = Faref (rev_tbl, make_number (c));
 449           if (INTEGERP (temp))
 450             c = XINT (temp);
 451           if (c >= 256)
 452             c = (c_save & 0177) + 0200;
 453         }
 454       else
 455         {
 456           if (nonascii_insert_offset > 0)
 457             c -= nonascii_insert_offset;
 458           if (c < 128 || c >= 256)
 459             c = (c_save & 0177) + 0200;
 460         }
 461     }
 462
 463   return c;
 464 }
 465
 466 \f
 467 /* Update the table Vcharset_table with the given arguments (see the
 468    document of `define-charset' for the meaning of each argument).
 469    Several other table contents are also updated.  The caller should
 470    check the validity of CHARSET-ID and the remaining arguments in
 471    advance.  */
 472
 473 void
 474 update_charset_table (charset_id, dimension, chars, width, direction,
 475                       iso_final_char, iso_graphic_plane,
 476                       short_name, long_name, description)
 477      Lisp_Object charset_id, dimension, chars, width, direction;
 478      Lisp_Object iso_final_char, iso_graphic_plane;
 479      Lisp_Object short_name, long_name, description;
 480 {
 481   int charset = XINT (charset_id);
 482   int bytes;
 483   unsigned char leading_code_base, leading_code_ext;
 484
 485   if (NILP (CHARSET_TABLE_ENTRY (charset)))
 486     CHARSET_TABLE_ENTRY (charset)
 487       = Fmake_vector (make_number (CHARSET_MAX_IDX), Qnil);
 488
 489   if (NILP (long_name))
 490     long_name = short_name;
 491   if (NILP (description))
 492     description = long_name;
 493
 494   /* Get byte length of multibyte form, base leading-code, and
 495      extended leading-code of the charset.  See the comment under the
 496      title "GENERAL NOTE on CHARACTER SET (CHARSET)" in charset.h.  */
 497   bytes = XINT (dimension);
 498   if (charset < MIN_CHARSET_PRIVATE_DIMENSION1)
 499     {
 500       /* Official charset, it doesn't have an extended leading-code.  */
 501       if (charset != CHARSET_ASCII && charset != CHARSET_8_BIT_GRAPHIC)
 502         bytes += 1; /* For a base leading-code.  */
 503       leading_code_base = charset;
 504       leading_code_ext = 0;
 505     }
 506   else
 507     {
 508       /* Private charset.  */
 509       bytes += 2; /* For base and extended leading-codes.  */
 510       leading_code_base
 511         = (charset < LEADING_CODE_EXT_12
 512            ? LEADING_CODE_PRIVATE_11
 513            : (charset < LEADING_CODE_EXT_21
 514               ? LEADING_CODE_PRIVATE_12
 515               : (charset < LEADING_CODE_EXT_22
 516                  ? LEADING_CODE_PRIVATE_21
 517                  : LEADING_CODE_PRIVATE_22)));
 518       leading_code_ext = charset;
 519       if (BYTES_BY_CHAR_HEAD (leading_code_base) != bytes)
 520         error ("Invalid dimension for the charset-ID %d", charset);
 521     }
 522
 523   CHARSET_TABLE_INFO (charset, CHARSET_ID_IDX) = charset_id;
 524   CHARSET_TABLE_INFO (charset, CHARSET_BYTES_IDX) = make_number (bytes);
 525   CHARSET_TABLE_INFO (charset, CHARSET_DIMENSION_IDX) = dimension;
 526   CHARSET_TABLE_INFO (charset, CHARSET_CHARS_IDX) = chars;
 527   CHARSET_TABLE_INFO (charset, CHARSET_WIDTH_IDX) = width;
 528   CHARSET_TABLE_INFO (charset, CHARSET_DIRECTION_IDX) = direction;
 529   CHARSET_TABLE_INFO (charset, CHARSET_LEADING_CODE_BASE_IDX)
 530     = make_number (leading_code_base);
 531   CHARSET_TABLE_INFO (charset, CHARSET_LEADING_CODE_EXT_IDX)
 532     = make_number (leading_code_ext);
 533   CHARSET_TABLE_INFO (charset, CHARSET_ISO_FINAL_CHAR_IDX) = iso_final_char;
 534   CHARSET_TABLE_INFO (charset, CHARSET_ISO_GRAPHIC_PLANE_IDX)
 535     = iso_graphic_plane;
 536   CHARSET_TABLE_INFO (charset, CHARSET_SHORT_NAME_IDX) = short_name;
 537   CHARSET_TABLE_INFO (charset, CHARSET_LONG_NAME_IDX) = long_name;
 538   CHARSET_TABLE_INFO (charset, CHARSET_DESCRIPTION_IDX) = description;
 539   CHARSET_TABLE_INFO (charset, CHARSET_PLIST_IDX) = Qnil;
 540
 541   {
 542     /* If we have already defined a charset which has the same
 543        DIMENSION, CHARS and ISO-FINAL-CHAR but the different
 544        DIRECTION, we must update the entry REVERSE-CHARSET of both
 545        charsets.  If there's no such charset, the value of the entry
 546        is set to nil.  */
 547     int i;
 548
 549     for (i = 0; i <= MAX_CHARSET; i++)
 550       if (!NILP (CHARSET_TABLE_ENTRY (i)))
 551         {
 552           if (CHARSET_DIMENSION (i) == XINT (dimension)
 553               && CHARSET_CHARS (i) == XINT (chars)
 554               && CHARSET_ISO_FINAL_CHAR (i) == XINT (iso_final_char)
 555               && CHARSET_DIRECTION (i) != XINT (direction))
 556             {
 557               CHARSET_TABLE_INFO (charset, CHARSET_REVERSE_CHARSET_IDX)
 558                 = make_number (i);
 559               CHARSET_TABLE_INFO (i, CHARSET_REVERSE_CHARSET_IDX) = charset_id;
 560               break;
 561             }
 562         }
 563     if (i > MAX_CHARSET)
 564       /* No such a charset.  */
 565       CHARSET_TABLE_INFO (charset, CHARSET_REVERSE_CHARSET_IDX)
 566         = make_number (-1);
 567   }
 568
 569   if (charset != CHARSET_ASCII && charset != CHARSET_8_BIT_GRAPHIC
 570       && charset < MIN_CHARSET_PRIVATE_DIMENSION1)
 571     {
 572       bytes_by_char_head[leading_code_base] = bytes;
 573       width_by_char_head[leading_code_base] = XINT (width);
 574
 575       /* Update table emacs_code_class.  */
 576       emacs_code_class[charset] = (bytes == 2
 577                                    ? EMACS_leading_code_2
 578                                    : (bytes == 3
 579                                       ? EMACS_leading_code_3
 580                                       : EMACS_leading_code_4));
 581     }
 582
 583   /* Update table iso_charset_table.  */
 584   if (XINT (iso_final_char) >= 0
 585       && ISO_CHARSET_TABLE (dimension, chars, iso_final_char) < 0)
 586     ISO_CHARSET_TABLE (dimension, chars, iso_final_char) = charset;
 587 }
 588
 589 #ifdef emacs
 590
 591 /* Return charset id of CHARSET_SYMBOL, or return -1 if CHARSET_SYMBOL
 592    is invalid.  */
 593 int
 594 get_charset_id (charset_symbol)
 595      Lisp_Object charset_symbol;
 596 {
 597   Lisp_Object val;
 598   int charset;
 599
 600   return ((SYMBOLP (charset_symbol)
 601            && (val = Fget (charset_symbol, Qcharset), VECTORP (val))
 602            && (charset = XINT (XVECTOR (val)->contents[CHARSET_ID_IDX]),
 603                CHARSET_VALID_P (charset)))
 604           ? charset : -1);
 605 }
 606
 607 /* Return an identification number for a new private charset of
 608    DIMENSION and WIDTH.  If there's no more room for the new charset,
 609    return 0.  */
 610 Lisp_Object
 611 get_new_private_charset_id (dimension, width)
 612      int dimension, width;
 613 {
 614   int charset, from, to;
 615
 616   if (dimension == 1)
 617     {
 618       from = LEADING_CODE_EXT_11;
 619       to = LEADING_CODE_EXT_21;
 620     }
 621   else
 622     {
 623       from = LEADING_CODE_EXT_21;
 624       to = LEADING_CODE_EXT_MAX + 1;
 625     }
 626
 627   for (charset = from; charset < to; charset++)
 628     if (!CHARSET_DEFINED_P (charset)) break;
 629
 630   return make_number (charset < to ? charset : 0);
 631 }
 632
 633 DEFUN ("define-charset", Fdefine_charset, Sdefine_charset, 3, 3, 0,
 634        doc: /* Define CHARSET-ID as the identification number of CHARSET with INFO-VECTOR.
 635 If CHARSET-ID is nil, it is decided automatically, which means CHARSET is
 636  treated as a private charset.
 637 INFO-VECTOR is a vector of the format:
 638    [DIMENSION CHARS WIDTH DIRECTION ISO-FINAL-CHAR ISO-GRAPHIC-PLANE
 639     SHORT-NAME LONG-NAME DESCRIPTION]
 640 The meanings of each elements is as follows:
 641 DIMENSION (integer) is the number of bytes to represent a character: 1 or 2.
 642 CHARS (integer) is the number of characters in a dimension: 94 or 96.
 643 WIDTH (integer) is the number of columns a character in the charset
 644 occupies on the screen: one of 0, 1, and 2.
 645
 646 DIRECTION (integer) is the rendering direction of characters in the
 647 charset when rendering.  If 0, render from left to right, else
 648 render from right to left.
 649
 650 ISO-FINAL-CHAR (character) is the final character of the
 651 corresponding ISO 2022 charset.
 652 It may be -1 if the charset is internal use only.
 653
 654 ISO-GRAPHIC-PLANE (integer) is the graphic plane to be invoked
 655 while encoding to variants of ISO 2022 coding system, one of the
 656 following: 0/graphic-plane-left(GL), 1/graphic-plane-right(GR).
 657 It may be -1 if the charset is internal use only.
 658
 659 SHORT-NAME (string) is the short name to refer to the charset.
 660
 661 LONG-NAME (string) is the long name to refer to the charset.
 662
 663 DESCRIPTION (string) is the description string of the charset.  */)
 664        (charset_id, charset_symbol, info_vector)
 665      Lisp_Object charset_id, charset_symbol, info_vector;
 666 {
 667   Lisp_Object *vec;
 668
 669   if (!NILP (charset_id))
 670     CHECK_NUMBER (charset_id);
 671   CHECK_SYMBOL (charset_symbol);
 672   CHECK_VECTOR (info_vector);
 673
 674   if (! NILP (charset_id))
 675     {
 676       if (! CHARSET_VALID_P (XINT (charset_id)))
 677         error ("Invalid CHARSET: %d", XINT (charset_id));
 678       else if (CHARSET_DEFINED_P (XINT (charset_id)))
 679         error ("Already defined charset: %d", XINT (charset_id));
 680     }
 681
 682   vec = XVECTOR (info_vector)->contents;
 683   if (XVECTOR (info_vector)->size != 9
 684       || !INTEGERP (vec[0]) || !(XINT (vec[0]) == 1 || XINT (vec[0]) == 2)
 685       || !INTEGERP (vec[1]) || !(XINT (vec[1]) == 94 || XINT (vec[1]) == 96)
 686       || !INTEGERP (vec[2]) || !(XINT (vec[2]) == 1 || XINT (vec[2]) == 2)
 687       || !INTEGERP (vec[3]) || !(XINT (vec[3]) == 0 || XINT (vec[3]) == 1)
 688       || !INTEGERP (vec[4])
 689       || !(XINT (vec[4]) == -1 || (XINT (vec[4]) >= '0' && XINT (vec[4]) <= '~'))
 690       || !INTEGERP (vec[5])
 691       || !(XINT (vec[5]) == -1 || XINT (vec[5]) == 0 || XINT (vec[5]) == 1)
 692       || !STRINGP (vec[6])
 693       || !STRINGP (vec[7])
 694       || !STRINGP (vec[8]))
 695     error ("Invalid info-vector argument for defining charset %s",
 696            XSYMBOL (charset_symbol)->name->data);
 697
 698   if (NILP (charset_id))
 699     {
 700       charset_id = get_new_private_charset_id (XINT (vec[0]), XINT (vec[2]));
 701       if (XINT (charset_id) == 0)
 702         error ("There's no room for a new private charset %s",
 703                XSYMBOL (charset_symbol)->name->data);
 704     }
 705
 706   update_charset_table (charset_id, vec[0], vec[1], vec[2], vec[3],
 707                         vec[4], vec[5], vec[6], vec[7], vec[8]);
 708   Fput (charset_symbol, Qcharset, CHARSET_TABLE_ENTRY (XINT (charset_id)));
 709   CHARSET_SYMBOL (XINT (charset_id)) = charset_symbol;
 710   Vcharset_list = Fcons (charset_symbol, Vcharset_list);
 711   Fupdate_coding_systems_internal ();
 712   return Qnil;
 713 }
 714
 715 DEFUN ("generic-character-list", Fgeneric_character_list,
 716        Sgeneric_character_list, 0, 0, 0,
 717        doc: /* Return a list of all possible generic characters.
 718 It includes a generic character for a charset not yet defined.  */)
 719      ()
 720 {
 721   return Vgeneric_character_list;
 722 }
 723
 724 DEFUN ("get-unused-iso-final-char", Fget_unused_iso_final_char,
 725        Sget_unused_iso_final_char, 2, 2, 0,
 726        doc: /* Return an unsed ISO's final char for a charset of DIMENISION and CHARS.
 727 DIMENSION is the number of bytes to represent a character: 1 or 2.
 728 CHARS is the number of characters in a dimension: 94 or 96.
 729
 730 This final char is for private use, thus the range is `0' (48) .. `?' (63).
 731 If there's no unused final char for the specified kind of charset,
 732 return nil.  */)
 733      (dimension, chars)
 734      Lisp_Object dimension, chars;
 735 {
 736   int final_char;
 737
 738   CHECK_NUMBER (dimension);
 739   CHECK_NUMBER (chars);
 740   if (XINT (dimension) != 1 && XINT (dimension) != 2)
 741     error ("Invalid charset dimension %d, it should be 1 or 2",
 742            XINT (dimension));
 743   if (XINT (chars) != 94 && XINT (chars) != 96)
 744     error ("Invalid charset chars %d, it should be 94 or 96",
 745            XINT (chars));
 746   for (final_char = '0'; final_char <= '?'; final_char++)
 747     {
 748       if (ISO_CHARSET_TABLE (dimension, chars, make_number (final_char)) < 0)
 749         break;
 750     }
 751   return (final_char <= '?' ? make_number (final_char) : Qnil);
 752 }
 753
 754 DEFUN ("declare-equiv-charset", Fdeclare_equiv_charset, Sdeclare_equiv_charset,
 755        4, 4, 0,
 756        doc: /* Declare a charset of DIMENSION, CHARS, FINAL-CHAR is the same as CHARSET.
 757 CHARSET should be defined by `defined-charset' in advance.  */)
 758      (dimension, chars, final_char, charset_symbol)
 759      Lisp_Object dimension, chars, final_char, charset_symbol;
 760 {
 761   int charset;
 762
 763   CHECK_NUMBER (dimension);
 764   CHECK_NUMBER (chars);
 765   CHECK_NUMBER (final_char);
 766   CHECK_SYMBOL (charset_symbol);
 767
 768   if (XINT (dimension) != 1 && XINT (dimension) != 2)
 769     error ("Invalid DIMENSION %d, it should be 1 or 2", XINT (dimension));
 770   if (XINT (chars) != 94 && XINT (chars) != 96)
 771     error ("Invalid CHARS %d, it should be 94 or 96", XINT (chars));
 772   if (XINT (final_char) < '0' || XFASTINT (final_char) > '~')
 773     error ("Invalid FINAL-CHAR %c, it should be `0'..`~'", XINT (chars));
 774   if ((charset = get_charset_id (charset_symbol)) < 0)
 775     error ("Invalid charset %s", XSYMBOL (charset_symbol)->name->data);
 776
 777   ISO_CHARSET_TABLE (dimension, chars, final_char) = charset;
 778   return Qnil;
 779 }
 780
 781 /* Return information about charsets in the text at PTR of NBYTES
 782    bytes, which are NCHARS characters.  The value is:
 783
 784         0: Each character is represented by one byte.  This is always
 785            true for unibyte text.
 786         1: No charsets other than ascii eight-bit-control,
 787            eight-bit-graphic, and latin-1 are found.
 788         2: Otherwise.
 789
 790    In addition, if CHARSETS is nonzero, for each found charset N, set
 791    CHARSETS[N] to 1.  For that, callers should allocate CHARSETS
 792    (MAX_CHARSET + 1 elements) in advance.  It may lookup a translation
 793    table TABLE if supplied.  For invalid charsets, set CHARSETS[1] to
 794    1 (note that there's no charset whose ID is 1).  */
 795
 796 int
 797 find_charset_in_text (ptr, nchars, nbytes, charsets, table)
 798      unsigned char *ptr;
 799      int nchars, nbytes, *charsets;
 800      Lisp_Object table;
 801 {
 802   if (nchars == nbytes)
 803     {
 804       if (charsets && nbytes > 0)
 805         {
 806           unsigned char *endp = ptr + nbytes;
 807           int maskbits = 0;
 808
 809           while (ptr < endp && maskbits != 7)
 810             {
 811               maskbits |= (*ptr < 0x80 ? 1 : *ptr < 0xA0 ? 2 : 4);
 812               ptr++;
 813             }
 814
 815           if (maskbits & 1)
 816             charsets[CHARSET_ASCII] = 1;
 817           if (maskbits & 2)
 818             charsets[CHARSET_8_BIT_CONTROL] = 1;
 819           if (maskbits & 4)
 820             charsets[CHARSET_8_BIT_GRAPHIC] = 1;
 821         }
 822       return 0;
 823     }
 824   else
 825     {
 826       int return_val = 1;
 827       int bytes, charset, c1, c2;
 828
 829       if (! CHAR_TABLE_P (table))
 830         table = Qnil;
 831
 832       while (nchars-- > 0)
 833         {
 834           SPLIT_MULTIBYTE_SEQ (ptr, len, bytes, charset, c1, c2);
 835           ptr += bytes;
 836
 837           if (!CHARSET_DEFINED_P (charset))
 838             charset = 1;
 839           else if (! NILP (table))
 840             {
 841               int c = translate_char (table, -1, charset, c1, c2);
 842               if (c >= 0)
 843                 charset = CHAR_CHARSET (c);
 844             }
 845
 846           if (return_val == 1
 847               && charset != CHARSET_ASCII
 848               && charset != CHARSET_8_BIT_CONTROL
 849               && charset != CHARSET_8_BIT_GRAPHIC
 850               && charset != charset_latin_iso8859_1)
 851             return_val = 2;
 852
 853           if (charsets)
 854             charsets[charset] = 1;
 855           else if (return_val == 2)
 856             break;
 857         }
 858       return return_val;
 859     }
 860 }
 861
 862 DEFUN ("find-charset-region", Ffind_charset_region, Sfind_charset_region,
 863        2, 3, 0,
 864        doc: /* Return a list of charsets in the region between BEG and END.
 865 BEG and END are buffer positions.
 866 Optional arg TABLE if non-nil is a translation table to look up.
 867
 868 If the region contains invalid multibyte characters,
 869 `unknown' is included in the returned list.
 870
 871 If the current buffer is unibyte, the returned list may contain
 872 only `ascii', `eight-bit-control', and `eight-bit-graphic'.  */)
 873      (beg, end, table)
 874      Lisp_Object beg, end, table;
 875 {
 876   int charsets[MAX_CHARSET + 1];
 877   int from, from_byte, to, stop, stop_byte, i;
 878   Lisp_Object val;
 879
 880   validate_region (&beg, &end);
 881   from = XFASTINT (beg);
 882   stop = to = XFASTINT (end);
 883
 884   if (from < GPT && GPT < to)
 885     {
 886       stop = GPT;
 887       stop_byte = GPT_BYTE;
 888     }
 889   else
 890     stop_byte = CHAR_TO_BYTE (stop);
 891
 892   from_byte = CHAR_TO_BYTE (from);
 893
 894   bzero (charsets, (MAX_CHARSET + 1) * sizeof (int));
 895   while (1)
 896     {
 897       find_charset_in_text (BYTE_POS_ADDR (from_byte), stop - from,
 898                             stop_byte - from_byte, charsets, table);
 899       if (stop < to)
 900         {
 901           from = stop, from_byte = stop_byte;
 902           stop = to, stop_byte = CHAR_TO_BYTE (stop);
 903         }
 904       else
 905         break;
 906     }
 907
 908   val = Qnil;
 909   if (charsets[1])
 910     val = Fcons (Qunknown, val);
 911   for (i = MAX_CHARSET; i >= MIN_CHARSET_OFFICIAL_DIMENSION1; i--)
 912     if (charsets[i])
 913       val = Fcons (CHARSET_SYMBOL (i), val);
 914   if (charsets[0])
 915     val = Fcons (Qascii, val);
 916   return val;
 917 }
 918
 919 DEFUN ("find-charset-string", Ffind_charset_string, Sfind_charset_string,
 920        1, 2, 0,
 921        doc: /* Return a list of charsets in STR.
 922 Optional arg TABLE if non-nil is a translation table to look up.
 923
 924 If the string contains invalid multibyte characters,
 925 `unknown' is included in the returned list.
 926
 927 If STR is unibyte, the returned list may contain
 928 only `ascii', `eight-bit-control', and `eight-bit-graphic'.  */)
 929      (str, table)
 930      Lisp_Object str, table;
 931 {
 932   int charsets[MAX_CHARSET + 1];
 933   int i;
 934   Lisp_Object val;
 935
 936   CHECK_STRING (str);
 937
 938   bzero (charsets, (MAX_CHARSET + 1) * sizeof (int));
 939   find_charset_in_text (XSTRING (str)->data, XSTRING (str)->size,
 940                         STRING_BYTES (XSTRING (str)), charsets, table);
 941
 942   val = Qnil;
 943   if (charsets[1])
 944     val = Fcons (Qunknown, val);
 945   for (i = MAX_CHARSET; i >= MIN_CHARSET_OFFICIAL_DIMENSION1; i--)
 946     if (charsets[i])
 947       val = Fcons (CHARSET_SYMBOL (i), val);
 948   if (charsets[0])
 949     val = Fcons (Qascii, val);
 950   return val;
 951 }
 952
 953 \f
 954 DEFUN ("make-char-internal", Fmake_char_internal, Smake_char_internal, 1, 3, 0,
 955        doc: /* Return a character made from arguments.
 956 Internal use only.  */)
 957      (charset, code1, code2)
 958      Lisp_Object charset, code1, code2;
 959 {
 960   int charset_id, c1, c2;
 961
 962   CHECK_NUMBER (charset);
 963   charset_id = XINT (charset);
 964   if (!CHARSET_DEFINED_P (charset_id))
 965     error ("Invalid charset ID: %d", XINT (charset));
 966
 967   if (NILP (code1))
 968     c1 = 0;
 969   else
 970     {
 971       CHECK_NUMBER (code1);
 972       c1 = XINT (code1);
 973     }
 974   if (NILP (code2))
 975     c2 = 0;
 976   else
 977     {
 978       CHECK_NUMBER (code2);
 979       c2 = XINT (code2);
 980     }
 981
 982   if (charset_id == CHARSET_ASCII)
 983     {
 984       if (c1 < 0 || c1 > 0x7F)
 985         goto invalid_code_posints;
 986       return make_number (c1);
 987     }
 988   else if (charset_id == CHARSET_8_BIT_CONTROL)
 989     {
 990       if (NILP (code1))
 991         c1 = 0x80;
 992       else if (c1 < 0x80 || c1 > 0x9F)
 993         goto invalid_code_posints;
 994       return make_number (c1);
 995     }
 996   else if (charset_id == CHARSET_8_BIT_GRAPHIC)
 997     {
 998       if (NILP (code1))
 999         c1 = 0xA0;
1000       else if (c1 < 0xA0 || c1 > 0xFF)
1001         goto invalid_code_posints;
1002       return make_number (c1);
1003     }
1004   else if (c1 < 0 || c1 > 0xFF || c2 < 0 || c2 > 0xFF)
1005     goto invalid_code_posints;
1006   c1 &= 0x7F;
1007   c2 &= 0x7F;
1008   if (c1 == 0
1009       ? c2 != 0
1010       : (c2 == 0
1011          ? !CHAR_COMPONENTS_VALID_P (charset_id, c1, 0x20)
1012          : !CHAR_COMPONENTS_VALID_P (charset_id, c1, c2)))
1013     goto invalid_code_posints;
1014   return make_number (MAKE_CHAR (charset_id, c1, c2));
1015
1016  invalid_code_posints:
1017   error ("Invalid code points for charset ID %d: %d %d", charset_id, c1, c2);
1018 }
1019
1020 DEFUN ("split-char", Fsplit_char, Ssplit_char, 1, 1, 0,
1021        doc: /* Return list of charset and one or two position-codes of CHAR.
1022 If CHAR is invalid as a character code,
1023 return a list of symbol `unknown' and CHAR.  */)
1024      (ch)
1025      Lisp_Object ch;
1026 {
1027   int c, charset, c1, c2;
1028
1029   CHECK_NUMBER (ch);
1030   c = XFASTINT (ch);
1031   if (!CHAR_VALID_P (c, 1))
1032     return Fcons (Qunknown, Fcons (ch, Qnil));
1033   SPLIT_CHAR (XFASTINT (ch), charset, c1, c2);
1034   return (c2 >= 0
1035           ? Fcons (CHARSET_SYMBOL (charset),
1036                    Fcons (make_number (c1), Fcons (make_number (c2), Qnil)))
1037           : Fcons (CHARSET_SYMBOL (charset), Fcons (make_number (c1), Qnil)));
1038 }
1039
1040 DEFUN ("char-charset", Fchar_charset, Schar_charset, 1, 1, 0,
1041        doc: /* Return charset of CHAR.  */)
1042      (ch)
1043      Lisp_Object ch;
1044 {
1045   CHECK_NUMBER (ch);
1046
1047   return CHARSET_SYMBOL (CHAR_CHARSET (XINT (ch)));
1048 }
1049
1050 DEFUN ("charset-after", Fcharset_after, Scharset_after, 0, 1, 0,
1051        doc: /* Return charset of a character in the current buffer at position POS.
1052 If POS is nil, it defauls to the current point.
1053 If POS is out of range, the value is nil.  */)
1054      (pos)
1055      Lisp_Object pos;
1056 {
1057   Lisp_Object ch;
1058   int charset;
1059
1060   ch = Fchar_after (pos);
1061   if (! INTEGERP (ch))
1062     return ch;
1063   charset = CHAR_CHARSET (XINT (ch));
1064   return CHARSET_SYMBOL (charset);
1065 }
1066
1067 DEFUN ("iso-charset", Fiso_charset, Siso_charset, 3, 3, 0,
1068        doc: /* Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR.
1069
1070 ISO 2022's designation sequence (escape sequence) distinguishes charsets
1071 by their DIMENSION, CHARS, and FINAL-CHAR,
1072 where as Emacs distinguishes them by charset symbol.
1073 See the documentation of the function `charset-info' for the meanings of
1074 DIMENSION, CHARS, and FINAL-CHAR.  */)
1075      (dimension, chars, final_char)
1076      Lisp_Object dimension, chars, final_char;
1077 {
1078   int charset;
1079
1080   CHECK_NUMBER (dimension);
1081   CHECK_NUMBER (chars);
1082   CHECK_NUMBER (final_char);
1083
1084   if ((charset = ISO_CHARSET_TABLE (dimension, chars, final_char)) < 0)
1085     return Qnil;
1086   return CHARSET_SYMBOL (charset);
1087 }
1088
1089 /* If GENERICP is nonzero, return nonzero iff C is a valid normal or
1090    generic character.  If GENERICP is zero, return nonzero iff C is a
1091    valid normal character.  Do not call this function directly,
1092    instead use macro CHAR_VALID_P.  */
1093 int
1094 char_valid_p (c, genericp)
1095      int c, genericp;
1096 {
1097   int charset, c1, c2;
1098
1099   if (c < 0 || c >= MAX_CHAR)
1100     return 0;
1101   if (SINGLE_BYTE_CHAR_P (c))
1102     return 1;
1103   SPLIT_CHAR (c, charset, c1, c2);
1104   if (genericp)
1105     {
1106       if (c1)
1107         {
1108           if (c2 <= 0) c2 = 0x20;
1109         }
1110       else
1111         {
1112           if (c2 <= 0) c1 = c2 = 0x20;
1113         }
1114     }
1115   return (CHARSET_DEFINED_P (charset)
1116           && CHAR_COMPONENTS_VALID_P (charset, c1, c2));
1117 }
1118
1119 DEFUN ("char-valid-p", Fchar_valid_p, Schar_valid_p, 1, 2, 0,
1120        doc: /* Return t if OBJECT is a valid normal character.
1121 If optional arg GENERICP is non-nil, also return t if OBJECT is
1122 a valid generic character.  */)
1123      (object, genericp)
1124      Lisp_Object object, genericp;
1125 {
1126   if (! NATNUMP (object))
1127     return Qnil;
1128   return (CHAR_VALID_P (XFASTINT (object), !NILP (genericp)) ? Qt : Qnil);
1129 }
1130
1131 DEFUN ("unibyte-char-to-multibyte", Funibyte_char_to_multibyte,
1132        Sunibyte_char_to_multibyte, 1, 1, 0,
1133        doc: /* Convert the unibyte character CH to multibyte character.
1134 The conversion is done based on `nonascii-translation-table' (which see)
1135  or `nonascii-insert-offset' (which see).  */)
1136      (ch)
1137      Lisp_Object ch;
1138 {
1139   int c;
1140
1141   CHECK_NUMBER (ch);
1142   c = XINT (ch);
1143   if (c < 0 || c >= 0400)
1144     error ("Invalid unibyte character: %d", c);
1145   c = unibyte_char_to_multibyte (c);
1146   if (c < 0)
1147     error ("Can't convert to multibyte character: %d", XINT (ch));
1148   return make_number (c);
1149 }
1150
1151 DEFUN ("multibyte-char-to-unibyte", Fmultibyte_char_to_unibyte,
1152        Smultibyte_char_to_unibyte, 1, 1, 0,
1153        doc: /* Convert the multibyte character CH to unibyte character.
1154 The conversion is done based on `nonascii-translation-table' (which see)
1155  or `nonascii-insert-offset' (which see).  */)
1156      (ch)
1157      Lisp_Object ch;
1158 {
1159   int c;
1160
1161   CHECK_NUMBER (ch);
1162   c = XINT (ch);
1163   if (! CHAR_VALID_P (c, 0))
1164     error ("Invalid multibyte character: %d", c);
1165   c = multibyte_char_to_unibyte (c, Qnil);
1166   if (c < 0)
1167     error ("Can't convert to unibyte character: %d", XINT (ch));
1168   return make_number (c);
1169 }
1170
1171 DEFUN ("char-bytes", Fchar_bytes, Schar_bytes, 1, 1, 0,
1172        doc: /* Return 1 regardless of the argument CHAR.
1173 This is now an obsolete function.  We keep it just for backward compatibility.  */)
1174      (ch)
1175      Lisp_Object ch;
1176 {
1177   CHECK_NUMBER (ch);
1178   return make_number (1);
1179 }
1180
1181 /* Return how many bytes C will occupy in a multibyte buffer.
1182    Don't call this function directly, instead use macro CHAR_BYTES.  */
1183 int
1184 char_bytes (c)
1185      int c;
1186 {
1187   int charset;
1188
1189   if (ASCII_BYTE_P (c) || (c & ~((1 << CHARACTERBITS) -1)))
1190     return 1;
1191   if (SINGLE_BYTE_CHAR_P (c) && c >= 0xA0)
1192     return 1;
1193
1194   charset = CHAR_CHARSET (c);
1195   return (CHARSET_DEFINED_P (charset) ? CHARSET_BYTES (charset) : 1);
1196 }
1197
1198 /* Return the width of character of which multi-byte form starts with
1199    C.  The width is measured by how many columns occupied on the
1200    screen when displayed in the current buffer.  */
1201
1202 #define ONE_BYTE_CHAR_WIDTH(c)                                          \
1203   (c < 0x20                                                             \
1204    ? (c == '\t'                                                         \
1205       ? XFASTINT (current_buffer->tab_width)                            \
1206       : (c == '\n' ? 0 : (NILP (current_buffer->ctl_arrow) ? 4 : 2)))   \
1207    : (c < 0x7f                                                          \
1208       ? 1                                                               \
1209       : (c == 0x7F                                                      \
1210          ? (NILP (current_buffer->ctl_arrow) ? 4 : 2)                   \
1211          : ((! NILP (current_buffer->enable_multibyte_characters)       \
1212              && BASE_LEADING_CODE_P (c))                                \
1213             ? WIDTH_BY_CHAR_HEAD (c)                                    \
1214             : 4))))
1215
1216 DEFUN ("char-width", Fchar_width, Schar_width, 1, 1, 0,
1217        doc: /* Return width of CHAR when displayed in the current buffer.
1218 The width is measured by how many columns it occupies on the screen.
1219 Tab is taken to occupy `tab-width' columns.  */)
1220      (ch)
1221      Lisp_Object ch;
1222 {
1223   Lisp_Object val, disp;
1224   int c;
1225   struct Lisp_Char_Table *dp = buffer_display_table ();
1226
1227   CHECK_NUMBER (ch);
1228
1229   c = XINT (ch);
1230
1231   /* Get the way the display table would display it.  */
1232   disp = dp ? DISP_CHAR_VECTOR (dp, c) : Qnil;
1233
1234   if (VECTORP (disp))
1235     XSETINT (val, XVECTOR (disp)->size);
1236   else if (SINGLE_BYTE_CHAR_P (c))
1237     XSETINT (val, ONE_BYTE_CHAR_WIDTH (c));
1238   else
1239     {
1240       int charset = CHAR_CHARSET (c);
1241
1242       XSETFASTINT (val, CHARSET_WIDTH (charset));
1243     }
1244   return val;
1245 }
1246
1247 /* Return width of string STR of length LEN when displayed in the
1248    current buffer.  The width is measured by how many columns it
1249    occupies on the screen.  */
1250
1251 int
1252 strwidth (str, len)
1253      unsigned char *str;
1254      int len;
1255 {
1256   return c_string_width (str, len, -1, NULL, NULL);
1257 }
1258
1259 /* Return width of string STR of length LEN when displayed in the
1260    current buffer.  The width is measured by how many columns it
1261    occupies on the screen.  If PRECISION > 0, return the width of
1262    longest substring that doesn't exceed PRECISION, and set number of
1263    characters and bytes of the substring in *NCHARS and *NBYTES
1264    respectively.  */
1265
1266 int
1267 c_string_width (str, len, precision, nchars, nbytes)
1268      unsigned char *str;
1269      int precision, *nchars, *nbytes;
1270 {
1271   int i = 0, i_byte = 0;
1272   int width = 0;
1273   int chars;
1274   struct Lisp_Char_Table *dp = buffer_display_table ();
1275
1276   while (i_byte < len)
1277     {
1278       int bytes, thiswidth;
1279       Lisp_Object val;
1280
1281       if (dp)
1282         {
1283           int c = STRING_CHAR_AND_LENGTH (str + i_byte, len - i_byte, bytes);
1284
1285           chars = 1;
1286           val = DISP_CHAR_VECTOR (dp, c);
1287           if (VECTORP (val))
1288             thiswidth = XVECTOR (val)->size;
1289           else
1290             thiswidth = ONE_BYTE_CHAR_WIDTH (str[i_byte]);
1291         }
1292       else
1293         {
1294           chars = 1;
1295           PARSE_MULTIBYTE_SEQ (str + i_byte, len - i_byte, bytes);
1296           thiswidth = ONE_BYTE_CHAR_WIDTH (str[i_byte]);
1297         }
1298
1299       if (precision > 0
1300           && (width + thiswidth > precision))
1301         {
1302           *nchars = i;
1303           *nbytes = i_byte;
1304           return width;
1305         }
1306       i++;
1307       i_byte += bytes;
1308       width += thiswidth;
1309   }
1310
1311   if (precision > 0)
1312     {
1313       *nchars = i;
1314       *nbytes = i_byte;
1315     }
1316
1317   return width;
1318 }
1319
1320 /* Return width of Lisp string STRING when displayed in the current
1321    buffer.  The width is measured by how many columns it occupies on
1322    the screen while paying attention to compositions.  If PRECISION >
1323    0, return the width of longest substring that doesn't exceed
1324    PRECISION, and set number of characters and bytes of the substring
1325    in *NCHARS and *NBYTES respectively.  */
1326
1327 int
1328 lisp_string_width (string, precision, nchars, nbytes)
1329      Lisp_Object string;
1330      int precision, *nchars, *nbytes;
1331 {
1332   int len = XSTRING (string)->size;
1333   int len_byte = STRING_BYTES (XSTRING (string));
1334   unsigned char *str = XSTRING (string)->data;
1335   int i = 0, i_byte = 0;
1336   int width = 0;
1337   struct Lisp_Char_Table *dp = buffer_display_table ();
1338
1339   while (i < len)
1340     {
1341       int chars, bytes, thiswidth;
1342       Lisp_Object val;
1343       int cmp_id;
1344       int ignore, end;
1345
1346       if (find_composition (i, -1, &ignore, &end, &val, string)
1347           && ((cmp_id = get_composition_id (i, i_byte, end - i, val, string))
1348               >= 0))
1349         {
1350           thiswidth = composition_table[cmp_id]->width;
1351           chars = end - i;
1352           bytes = string_char_to_byte (string, end) - i_byte;
1353         }
1354       else if (dp)
1355         {
1356           int c = STRING_CHAR_AND_LENGTH (str + i_byte, len - i_byte, bytes);
1357
1358           chars = 1;
1359           val = DISP_CHAR_VECTOR (dp, c);
1360           if (VECTORP (val))
1361             thiswidth = XVECTOR (val)->size;
1362           else
1363             thiswidth = ONE_BYTE_CHAR_WIDTH (str[i_byte]);
1364         }
1365       else
1366         {
1367           chars = 1;
1368           PARSE_MULTIBYTE_SEQ (str + i_byte, len_byte - i_byte, bytes);
1369           thiswidth = ONE_BYTE_CHAR_WIDTH (str[i_byte]);
1370         }
1371
1372       if (precision > 0
1373           && (width + thiswidth > precision))
1374         {
1375           *nchars = i;
1376           *nbytes = i_byte;
1377           return width;
1378         }
1379       i += chars;
1380       i_byte += bytes;
1381       width += thiswidth;
1382   }
1383
1384   if (precision > 0)
1385     {
1386       *nchars = i;
1387       *nbytes = i_byte;
1388     }
1389
1390   return width;
1391 }
1392
1393 DEFUN ("string-width", Fstring_width, Sstring_width, 1, 1, 0,
1394        doc: /* Return width of STRING when displayed in the current buffer.
1395 Width is measured by how many columns it occupies on the screen.
1396 When calculating width of a multibyte character in STRING,
1397 only the base leading-code is considered; the validity of
1398 the following bytes is not checked.  Tabs in STRING are always
1399 taken to occupy `tab-width' columns.  */)
1400      (str)
1401      Lisp_Object str;
1402 {
1403   Lisp_Object val;
1404
1405   CHECK_STRING (str);
1406   XSETFASTINT (val, lisp_string_width (str, -1, NULL, NULL));
1407   return val;
1408 }
1409
1410 DEFUN ("char-direction", Fchar_direction, Schar_direction, 1, 1, 0,
1411        doc: /* Return the direction of CHAR.
1412 The returned value is 0 for left-to-right and 1 for right-to-left.  */)
1413      (ch)
1414      Lisp_Object ch;
1415 {
1416   int charset;
1417
1418   CHECK_NUMBER (ch);
1419   charset = CHAR_CHARSET (XFASTINT (ch));
1420   if (!CHARSET_DEFINED_P (charset))
1421     invalid_character (XINT (ch));
1422   return CHARSET_TABLE_INFO (charset, CHARSET_DIRECTION_IDX);
1423 }
1424
1425 DEFUN ("chars-in-region", Fchars_in_region, Schars_in_region, 2, 2, 0,
1426        doc: /* Return number of characters between BEG and END.  */)
1427      (beg, end)
1428      Lisp_Object beg, end;
1429 {
1430   int from, to;
1431
1432   CHECK_NUMBER_COERCE_MARKER (beg);
1433   CHECK_NUMBER_COERCE_MARKER (end);
1434
1435   from = min (XFASTINT (beg), XFASTINT (end));
1436   to = max (XFASTINT (beg), XFASTINT (end));
1437
1438   return make_number (to - from);
1439 }
1440
1441 /* Return the number of characters in the NBYTES bytes at PTR.
1442    This works by looking at the contents and checking for multibyte sequences.
1443    However, if the current buffer has enable-multibyte-characters = nil,
1444    we treat each byte as a character.  */
1445
1446 int
1447 chars_in_text (ptr, nbytes)
1448      unsigned char *ptr;
1449      int nbytes;
1450 {
1451   /* current_buffer is null at early stages of Emacs initialization.  */
1452   if (current_buffer == 0
1453       || NILP (current_buffer->enable_multibyte_characters))
1454     return nbytes;
1455
1456   return multibyte_chars_in_text (ptr, nbytes);
1457 }
1458
1459 /* Return the number of characters in the NBYTES bytes at PTR.
1460    This works by looking at the contents and checking for multibyte sequences.
1461    It ignores enable-multibyte-characters.  */
1462
1463 int
1464 multibyte_chars_in_text (ptr, nbytes)
1465      unsigned char *ptr;
1466      int nbytes;
1467 {
1468   unsigned char *endp;
1469   int chars, bytes;
1470
1471   endp = ptr + nbytes;
1472   chars = 0;
1473
1474   while (ptr < endp)
1475     {
1476       PARSE_MULTIBYTE_SEQ (ptr, endp - ptr, bytes);
1477       ptr += bytes;
1478       chars++;
1479     }
1480
1481   return chars;
1482 }
1483
1484 /* Parse unibyte text at STR of LEN bytes as multibyte text, and
1485    count the numbers of characters and bytes in it.  On counting
1486    bytes, pay attention to the fact that 8-bit characters in the range
1487    0x80..0x9F are represented by 2 bytes in multibyte text.  */
1488 void
1489 parse_str_as_multibyte (str, len, nchars, nbytes)
1490      unsigned char *str;
1491      int len, *nchars, *nbytes;
1492 {
1493   unsigned char *endp = str + len;
1494   int n, chars = 0, bytes = 0;
1495
1496   while (str < endp)
1497     {
1498       if (UNIBYTE_STR_AS_MULTIBYTE_P (str, endp - str, n))
1499         str += n, bytes += n;
1500       else
1501         str++, bytes += 2;
1502       chars++;
1503     }
1504   *nchars = chars;
1505   *nbytes = bytes;
1506   return;
1507 }
1508
1509 /* Arrange unibyte text at STR of NBYTES bytes as multibyte text.
1510    It actually converts only 8-bit characters in the range 0x80..0x9F
1511    that don't contruct multibyte characters to multibyte forms.  If
1512    NCHARS is nonzero, set *NCHARS to the number of characters in the
1513    text.  It is assured that we can use LEN bytes at STR as a work
1514    area and that is enough.  Return the number of bytes of the
1515    resulting text.  */
1516
1517 int
1518 str_as_multibyte (str, len, nbytes, nchars)
1519      unsigned char *str;
1520      int len, nbytes, *nchars;
1521 {
1522   unsigned char *p = str, *endp = str + nbytes;
1523   unsigned char *to;
1524   int chars = 0;
1525   int n;
1526
1527   while (p < endp && UNIBYTE_STR_AS_MULTIBYTE_P (p, endp - p, n))
1528     p += n, chars++;
1529   if (nchars)
1530     *nchars = chars;
1531   if (p == endp)
1532     return nbytes;
1533
1534   to = p;
1535   nbytes = endp - p;
1536   endp = str + len;
1537   safe_bcopy (p, endp - nbytes, nbytes);
1538   p = endp - nbytes;
1539   while (p < endp)
1540     {
1541       if (UNIBYTE_STR_AS_MULTIBYTE_P (p, endp - p, n))
1542         {
1543           while (n--)
1544             *to++ = *p++;
1545         }
1546       else
1547         {
1548           *to++ = LEADING_CODE_8_BIT_CONTROL;
1549           *to++ = *p++ + 0x20;
1550         }
1551       chars++;
1552     }
1553   if (nchars)
1554     *nchars = chars;
1555   return (to - str);
1556 }
1557
1558 /* Parse unibyte string at STR of LEN bytes, and return the number of
1559    bytes it may ocupy when converted to multibyte string by
1560    `str_to_multibyte'.  */
1561
1562 int
1563 parse_str_to_multibyte (str, len)
1564      unsigned char *str;
1565      int len;
1566 {
1567   unsigned char *endp = str + len;
1568   int bytes;
1569
1570   for (bytes = 0; str < endp; str++)
1571     bytes += (*str < 0x80 || *str >= 0xA0) ? 1 : 2;
1572   return bytes;
1573 }
1574
1575 /* Convert unibyte text at STR of NBYTES bytes to multibyte text
1576    that contains the same single-byte characters.  It actually
1577    converts all 8-bit characters to multibyte forms.  It is assured
1578    that we can use LEN bytes at STR as a work area and that is
1579    enough.  */
1580
1581 int
1582 str_to_multibyte (str, len, bytes)
1583      unsigned char *str;
1584      int len, bytes;
1585 {
1586   unsigned char *p = str, *endp = str + bytes;
1587   unsigned char *to;
1588
1589   while (p < endp && (*p < 0x80 || *p >= 0xA0)) p++;
1590   if (p == endp)
1591     return bytes;
1592   to = p;
1593   bytes = endp - p;
1594   endp = str + len;
1595   safe_bcopy (p, endp - bytes, bytes);
1596   p = endp - bytes;
1597   while (p < endp)
1598     {
1599       if (*p < 0x80 || *p >= 0xA0)
1600         *to++ = *p++;
1601       else
1602         *to++ = LEADING_CODE_8_BIT_CONTROL, *to++ = *p++ + 0x20;
1603     }
1604   return (to - str);
1605 }
1606
1607 /* Arrange multibyte text at STR of LEN bytes as a unibyte text.  It
1608    actually converts only 8-bit characters in the range 0x80..0x9F to
1609    unibyte forms.  */
1610
1611 int
1612 str_as_unibyte (str, bytes)
1613      unsigned char *str;
1614      int bytes;
1615 {
1616   unsigned char *p = str, *endp = str + bytes;
1617   unsigned char *to = str;
1618
1619   while (p < endp && *p != LEADING_CODE_8_BIT_CONTROL) p++;
1620   to = p;
1621   while (p < endp)
1622     {
1623       if (*p == LEADING_CODE_8_BIT_CONTROL)
1624         *to++ = *(p + 1) - 0x20, p += 2;
1625       else
1626         *to++ = *p++;
1627     }
1628   return (to - str);
1629 }
1630
1631 \f
1632 DEFUN ("string", Fstring, Sstring, 1, MANY, 0,
1633   doc: /* Concatenate all the argument characters and make the result a string.
1634 usage: (string &rest CHARACTERS)  */)
1635      (n, args)
1636      int n;
1637      Lisp_Object *args;
1638 {
1639   int i;
1640   unsigned char *buf = (unsigned char *) alloca (MAX_MULTIBYTE_LENGTH * n);
1641   unsigned char *p = buf;
1642   int c;
1643   int multibyte = 0;
1644
1645   for (i = 0; i < n; i++)
1646     {
1647       CHECK_NUMBER (args[i]);
1648       if (!multibyte && !SINGLE_BYTE_CHAR_P (XFASTINT (args[i])))
1649         multibyte = 1;
1650     }
1651
1652   for (i = 0; i < n; i++)
1653     {
1654       c = XINT (args[i]);
1655       if (multibyte)
1656         p += CHAR_STRING (c, p);
1657       else
1658         *p++ = c;
1659     }
1660
1661   return make_string_from_bytes (buf, n, p - buf);
1662 }
1663
1664 #endif /* emacs */
1665 \f
1666 int
1667 charset_id_internal (charset_name)
1668      char *charset_name;
1669 {
1670   Lisp_Object val;
1671
1672   val= Fget (intern (charset_name), Qcharset);
1673   if (!VECTORP (val))
1674     error ("Charset %s is not defined", charset_name);
1675
1676   return (XINT (XVECTOR (val)->contents[0]));
1677 }
1678
1679 DEFUN ("setup-special-charsets", Fsetup_special_charsets,
1680        Ssetup_special_charsets, 0, 0, 0, doc: /* Internal use only.  */)
1681      ()
1682 {
1683   charset_latin_iso8859_1 = charset_id_internal ("latin-iso8859-1");
1684   charset_jisx0208_1978 = charset_id_internal ("japanese-jisx0208-1978");
1685   charset_jisx0208 = charset_id_internal ("japanese-jisx0208");
1686   charset_katakana_jisx0201 = charset_id_internal ("katakana-jisx0201");
1687   charset_latin_jisx0201 = charset_id_internal ("latin-jisx0201");
1688   charset_big5_1 = charset_id_internal ("chinese-big5-1");
1689   charset_big5_2 = charset_id_internal ("chinese-big5-2");
1690   return Qnil;
1691 }
1692
1693 void
1694 init_charset_once ()
1695 {
1696   int i, j, k;
1697
1698   staticpro (&Vcharset_table);
1699   staticpro (&Vcharset_symbol_table);
1700   staticpro (&Vgeneric_character_list);
1701
1702   /* This has to be done here, before we call Fmake_char_table.  */
1703   Qcharset_table = intern ("charset-table");
1704   staticpro (&Qcharset_table);
1705
1706   /* Intern this now in case it isn't already done.
1707      Setting this variable twice is harmless.
1708      But don't staticpro it here--that is done in alloc.c.  */
1709   Qchar_table_extra_slots = intern ("char-table-extra-slots");
1710
1711   /* Now we are ready to set up this property, so we can
1712      create the charset table.  */
1713   Fput (Qcharset_table, Qchar_table_extra_slots, make_number (0));
1714   Vcharset_table = Fmake_char_table (Qcharset_table, Qnil);
1715
1716   Qunknown = intern ("unknown");
1717   staticpro (&Qunknown);
1718   Vcharset_symbol_table = Fmake_vector (make_number (MAX_CHARSET + 1),
1719                                         Qunknown);
1720
1721   /* Setup tables.  */
1722   for (i = 0; i < 2; i++)
1723     for (j = 0; j < 2; j++)
1724       for (k = 0; k < 128; k++)
1725         iso_charset_table [i][j][k] = -1;
1726
1727   for (i = 0; i < 256; i++)
1728     bytes_by_char_head[i] = 1;
1729   bytes_by_char_head[LEADING_CODE_PRIVATE_11] = 3;
1730   bytes_by_char_head[LEADING_CODE_PRIVATE_12] = 3;
1731   bytes_by_char_head[LEADING_CODE_PRIVATE_21] = 4;
1732   bytes_by_char_head[LEADING_CODE_PRIVATE_22] = 4;
1733
1734   for (i = 0; i < 128; i++)
1735     width_by_char_head[i] = 1;
1736   for (; i < 256; i++)
1737     width_by_char_head[i] = 4;
1738   width_by_char_head[LEADING_CODE_PRIVATE_11] = 1;
1739   width_by_char_head[LEADING_CODE_PRIVATE_12] = 2;
1740   width_by_char_head[LEADING_CODE_PRIVATE_21] = 1;
1741   width_by_char_head[LEADING_CODE_PRIVATE_22] = 2;
1742
1743   {
1744     Lisp_Object val;
1745
1746     val = Qnil;
1747     for (i = 0x81; i < 0x90; i++)
1748       val = Fcons (make_number ((i - 0x70) << 7), val);
1749     for (; i < 0x9A; i++)
1750       val = Fcons (make_number ((i - 0x8F) << 14), val);
1751     for (i = 0xA0; i < 0xF0; i++)
1752       val = Fcons (make_number ((i - 0x70) << 7), val);
1753     for (; i < 0xFF; i++)
1754       val = Fcons (make_number ((i - 0xE0) << 14), val);
1755     Vgeneric_character_list = Fnreverse (val);
1756   }
1757
1758   nonascii_insert_offset = 0;
1759   Vnonascii_translation_table = Qnil;
1760 }
1761
1762 #ifdef emacs
1763
1764 void
1765 syms_of_charset ()
1766 {
1767   Qcharset = intern ("charset");
1768   staticpro (&Qcharset);
1769
1770   Qascii = intern ("ascii");
1771   staticpro (&Qascii);
1772
1773   Qeight_bit_control = intern ("eight-bit-control");
1774   staticpro (&Qeight_bit_control);
1775
1776   Qeight_bit_graphic = intern ("eight-bit-graphic");
1777   staticpro (&Qeight_bit_graphic);
1778
1779   /* Define special charsets ascii, eight-bit-control, and
1780      eight-bit-graphic.  */
1781   update_charset_table (make_number (CHARSET_ASCII),
1782                         make_number (1), make_number (94),
1783                         make_number (1),
1784                         make_number (0),
1785                         make_number ('B'),
1786                         make_number (0),
1787                         build_string ("ASCII"),
1788                         Qnil,   /* same as above */
1789                         build_string ("ASCII (ISO646 IRV)"));
1790   CHARSET_SYMBOL (CHARSET_ASCII) = Qascii;
1791   Fput (Qascii, Qcharset, CHARSET_TABLE_ENTRY (CHARSET_ASCII));
1792
1793   update_charset_table (make_number (CHARSET_8_BIT_CONTROL),
1794                         make_number (1), make_number (96),
1795                         make_number (4),
1796                         make_number (0),
1797                         make_number (-1),
1798                         make_number (-1),
1799                         build_string ("8-bit control code (0x80..0x9F)"),
1800                         Qnil,   /* same as above */
1801                         Qnil);  /* same as above */
1802   CHARSET_SYMBOL (CHARSET_8_BIT_CONTROL) = Qeight_bit_control;
1803   Fput (Qeight_bit_control, Qcharset,
1804         CHARSET_TABLE_ENTRY (CHARSET_8_BIT_CONTROL));
1805
1806   update_charset_table (make_number (CHARSET_8_BIT_GRAPHIC),
1807                         make_number (1), make_number (96),
1808                         make_number (4),
1809                         make_number (0),
1810                         make_number (-1),
1811                         make_number (-1),
1812                         build_string ("8-bit graphic char (0xA0..0xFF)"),
1813                         Qnil,   /* same as above */
1814                         Qnil);  /* same as above */
1815   CHARSET_SYMBOL (CHARSET_8_BIT_GRAPHIC) = Qeight_bit_graphic;
1816   Fput (Qeight_bit_graphic, Qcharset,
1817         CHARSET_TABLE_ENTRY (CHARSET_8_BIT_GRAPHIC));
1818
1819   Qauto_fill_chars = intern ("auto-fill-chars");
1820   staticpro (&Qauto_fill_chars);
1821   Fput (Qauto_fill_chars, Qchar_table_extra_slots, make_number (0));
1822
1823   defsubr (&Sdefine_charset);
1824   defsubr (&Sgeneric_character_list);
1825   defsubr (&Sget_unused_iso_final_char);
1826   defsubr (&Sdeclare_equiv_charset);
1827   defsubr (&Sfind_charset_region);
1828   defsubr (&Sfind_charset_string);
1829   defsubr (&Smake_char_internal);
1830   defsubr (&Ssplit_char);
1831   defsubr (&Schar_charset);
1832   defsubr (&Scharset_after);
1833   defsubr (&Siso_charset);
1834   defsubr (&Schar_valid_p);
1835   defsubr (&Sunibyte_char_to_multibyte);
1836   defsubr (&Smultibyte_char_to_unibyte);
1837   defsubr (&Schar_bytes);
1838   defsubr (&Schar_width);
1839   defsubr (&Sstring_width);
1840   defsubr (&Schar_direction);
1841   defsubr (&Schars_in_region);
1842   defsubr (&Sstring);
1843   defsubr (&Ssetup_special_charsets);
1844
1845   DEFVAR_LISP ("charset-list", &Vcharset_list,
1846                doc: /* List of charsets ever defined.  */);
1847   Vcharset_list = Fcons (Qascii, Fcons (Qeight_bit_control,
1848                                         Fcons (Qeight_bit_graphic, Qnil)));
1849
1850   DEFVAR_LISP ("translation-table-vector",  &Vtranslation_table_vector,
1851                doc: /* Vector of cons cell of a symbol and translation table ever defined.
1852 An ID of a translation table is an index of this vector.  */);
1853   Vtranslation_table_vector = Fmake_vector (make_number (16), Qnil);
1854
1855   DEFVAR_INT ("leading-code-private-11", &leading_code_private_11,
1856               doc: /* Leading-code of private TYPE9N charset of column-width 1.  */);
1857   leading_code_private_11 = LEADING_CODE_PRIVATE_11;
1858
1859   DEFVAR_INT ("leading-code-private-12", &leading_code_private_12,
1860               doc: /* Leading-code of private TYPE9N charset of column-width 2.  */);
1861   leading_code_private_12 = LEADING_CODE_PRIVATE_12;
1862
1863   DEFVAR_INT ("leading-code-private-21", &leading_code_private_21,
1864               doc: /* Leading-code of private TYPE9Nx9N charset of column-width 1.  */);
1865   leading_code_private_21 = LEADING_CODE_PRIVATE_21;
1866
1867   DEFVAR_INT ("leading-code-private-22", &leading_code_private_22,
1868               doc: /* Leading-code of private TYPE9Nx9N charset of column-width 2.  */);
1869   leading_code_private_22 = LEADING_CODE_PRIVATE_22;
1870
1871   DEFVAR_INT ("nonascii-insert-offset", &nonascii_insert_offset,
1872               doc: /* Offset for converting non-ASCII unibyte codes 0240...0377 to multibyte.
1873 This is used for converting unibyte text to multibyte,
1874 and for inserting character codes specified by number.
1875
1876 This serves to convert a Latin-1 or similar 8-bit character code
1877 to the corresponding Emacs multibyte character code.
1878 Typically the value should be (- (make-char CHARSET 0) 128),
1879 for your choice of character set.
1880 If `nonascii-translation-table' is non-nil, it overrides this variable.  */);
1881   nonascii_insert_offset = 0;
1882
1883   DEFVAR_LISP ("nonascii-translation-table", &Vnonascii_translation_table,
1884                doc: /* Translation table to convert non-ASCII unibyte codes to multibyte.
1885 This is used for converting unibyte text to multibyte,
1886 and for inserting character codes specified by number.
1887
1888 Conversion is performed only when multibyte characters are enabled,
1889 and it serves to convert a Latin-1 or similar 8-bit character code
1890 to the corresponding Emacs character code.
1891
1892 If this is nil, `nonascii-insert-offset' is used instead.
1893 See also the docstring of `make-translation-table'.  */);
1894   Vnonascii_translation_table = Qnil;
1895
1896   DEFVAR_LISP ("auto-fill-chars", &Vauto_fill_chars,
1897                doc: /* A char-table for characters which invoke auto-filling.
1898 Such characters have value t in this table.  */);
1899   Vauto_fill_chars = Fmake_char_table (Qauto_fill_chars, Qnil);
1900   CHAR_TABLE_SET (Vauto_fill_chars, make_number (' '), Qt);
1901   CHAR_TABLE_SET (Vauto_fill_chars, make_number ('\n'), Qt);
1902 }
1903
1904 #endif /* emacs */