src/charset.c

   1 /* Basic multilingual character support.
   2    Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
   3    Licensed to the Free Software Foundation.
   4
   5 This file is part of GNU Emacs.
   6
   7 GNU Emacs is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 2, or (at your option)
  10 any later version.
  11
  12 GNU Emacs is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GNU Emacs; see the file COPYING.  If not, write to
  19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  20 Boston, MA 02111-1307, USA.  */
  21
  22 /* At first, see the document in `charset.h' to understand the code in
  23    this file.  */
  24
  25 #ifdef emacs
  26 #include <config.h>
  27 #endif
  28
  29 #include <stdio.h>
  30
  31 #ifdef emacs
  32
  33 #include <sys/types.h>
  34 #include "lisp.h"
  35 #include "buffer.h"
  36 #include "charset.h"
  37 #include "composite.h"
  38 #include "coding.h"
  39 #include "disptab.h"
  40
  41 #else  /* not emacs */
  42
  43 #include "mulelib.h"
  44
  45 #endif /* emacs */
  46
  47 Lisp_Object Qcharset, Qascii, Qeight_bit_control, Qeight_bit_graphic;
  48 Lisp_Object Qunknown;
  49
  50 /* Declaration of special leading-codes.  */
  51 int leading_code_private_11;    /* for private DIMENSION1 of 1-column */
  52 int leading_code_private_12;    /* for private DIMENSION1 of 2-column */
  53 int leading_code_private_21;    /* for private DIMENSION2 of 1-column */
  54 int leading_code_private_22;    /* for private DIMENSION2 of 2-column */
  55
  56 /* Declaration of special charsets.  The values are set by
  57    Fsetup_special_charsets.  */
  58 int charset_latin_iso8859_1;    /* ISO8859-1 (Latin-1) */
  59 int charset_jisx0208_1978;      /* JISX0208.1978 (Japanese Kanji old set) */
  60 int charset_jisx0208;           /* JISX0208.1983 (Japanese Kanji) */
  61 int charset_katakana_jisx0201;  /* JISX0201.Kana (Japanese Katakana) */
  62 int charset_latin_jisx0201;     /* JISX0201.Roman (Japanese Roman) */
  63 int charset_big5_1;             /* Big5 Level 1 (Chinese Traditional) */
  64 int charset_big5_2;             /* Big5 Level 2 (Chinese Traditional) */
  65
  66 Lisp_Object Qcharset_table;
  67
  68 /* A char-table containing information of each character set.  */
  69 Lisp_Object Vcharset_table;
  70
  71 /* A vector of charset symbol indexed by charset-id.  This is used
  72    only for returning charset symbol from C functions.  */
  73 Lisp_Object Vcharset_symbol_table;
  74
  75 /* A list of charset symbols ever defined.  */
  76 Lisp_Object Vcharset_list;
  77
  78 /* Vector of translation table ever defined.
  79    ID of a translation table is used to index this vector.  */
  80 Lisp_Object Vtranslation_table_vector;
  81
  82 /* A char-table for characters which may invoke auto-filling.  */
  83 Lisp_Object Vauto_fill_chars;
  84
  85 Lisp_Object Qauto_fill_chars;
  86
  87 /* Tables used by macros BYTES_BY_CHAR_HEAD and WIDTH_BY_CHAR_HEAD.  */
  88 int bytes_by_char_head[256];
  89 int width_by_char_head[256];
  90
  91 /* Mapping table from ISO2022's charset (specified by DIMENSION,
  92    CHARS, and FINAL-CHAR) to Emacs' charset.  */
  93 int iso_charset_table[2][2][128];
  94
  95 /* Variables used locally in the macro FETCH_MULTIBYTE_CHAR.  */
  96 unsigned char *_fetch_multibyte_char_p;
  97 int _fetch_multibyte_char_len;
  98
  99 /* Offset to add to a non-ASCII value when inserting it.  */
 100 int nonascii_insert_offset;
 101
 102 /* Translation table for converting non-ASCII unibyte characters
 103    to multibyte codes, or nil.  */
 104 Lisp_Object Vnonascii_translation_table;
 105
 106 /* List of all possible generic characters.  */
 107 Lisp_Object Vgeneric_character_list;
 108
 109 #define min(X, Y) ((X) < (Y) ? (X) : (Y))
 110 #define max(X, Y) ((X) > (Y) ? (X) : (Y))
 111 \f
 112 void
 113 invalid_character (c)
 114      int c;
 115 {
 116   error ("Invalid character: 0%o, %d, 0x%x", c, c, c);
 117 }
 118
 119 /* Parse string STR of length LENGTH and fetch information of a
 120    character at STR.  Set BYTES to the byte length the character
 121    occupies, CHARSET, C1, C2 to proper values of the character. */
 122
 123 #define SPLIT_MULTIBYTE_SEQ(str, length, bytes, charset, c1, c2)             \
 124   do {                                                                       \
 125     (c1) = *(str);                                                           \
 126     (bytes) = BYTES_BY_CHAR_HEAD (c1);                                       \
 127     if ((bytes) == 1)                                                        \
 128       (charset) = ASCII_BYTE_P (c1) ? CHARSET_ASCII : CHARSET_8_BIT_GRAPHIC; \
 129     else if ((bytes) == 2)                                                   \
 130       {                                                                      \
 131         if ((c1) == LEADING_CODE_8_BIT_CONTROL)                              \
 132           (charset) = CHARSET_8_BIT_CONTROL, (c1) = (str)[1] - 0x20;         \
 133         else                                                                 \
 134           (charset) = (c1), (c1) = (str)[1] & 0x7F;                          \
 135       }                                                                      \
 136     else if ((bytes) == 3)                                                   \
 137       {                                                                      \
 138         if ((c1) < LEADING_CODE_PRIVATE_11)                                  \
 139           (charset) = (c1), (c1) = (str)[1] & 0x7F, (c2) = (str)[2] & 0x7F;  \
 140         else                                                                 \
 141           (charset) = (str)[1], (c1) = (str)[2] & 0x7F;                      \
 142       }                                                                      \
 143     else                                                                     \
 144       (charset) = (str)[1], (c1) = (str)[2] & 0x7F, (c2) = (str)[3] & 0x7F;  \
 145   } while (0)
 146
 147 /* 1 if CHARSET, C1, and C2 compose a valid character, else 0.  */
 148 #define CHAR_COMPONENTS_VALID_P(charset, c1, c2)        \
 149   ((charset) == CHARSET_ASCII                           \
 150    ? ((c1) >= 0 && (c1) <= 0x7F)                        \
 151    : ((charset) == CHARSET_8_BIT_CONTROL                \
 152       ? ((c1) >= 0x80 && (c1) <= 0x9F)                  \
 153       : ((charset) == CHARSET_8_BIT_GRAPHIC             \
 154          ? ((c1) >= 0x80 && (c1) <= 0xFF)               \
 155          : (CHARSET_DIMENSION (charset) == 1            \
 156             ? ((c1) >= 0x20 && (c1) <= 0x7F)            \
 157             : ((c1) >= 0x20 && (c1) <= 0x7F             \
 158                && (c2) >= 0x20 && (c2) <= 0x7F)))))
 159
 160 /* Store multi-byte form of the character C in STR.  The caller should
 161    allocate at least 4-byte area at STR in advance.  Returns the
 162    length of the multi-byte form.  If C is an invalid character code,
 163    signal an error.
 164
 165    Use macro `CHAR_STRING (C, STR)' instead of calling this function
 166    directly if C can be an ASCII character.  */
 167
 168 int
 169 char_to_string (c, str)
 170      int c;
 171      unsigned char *str;
 172 {
 173   unsigned char *p = str;
 174
 175   if (c & CHAR_MODIFIER_MASK)   /* This includes the case C is negative.  */
 176     {
 177       /* Multibyte character can't have a modifier bit.  */
 178       if (! SINGLE_BYTE_CHAR_P ((c & ~CHAR_MODIFIER_MASK)))
 179         invalid_character (c);
 180
 181       /* For Meta, Shift, and Control modifiers, we need special care.  */
 182       if (c & CHAR_META)
 183         {
 184           /* Move the meta bit to the right place for a string.  */
 185           c = (c & ~CHAR_META) | 0x80;
 186         }
 187       if (c & CHAR_SHIFT)
 188         {
 189           /* Shift modifier is valid only with [A-Za-z].  */
 190           if ((c & 0377) >= 'A' && (c & 0377) <= 'Z')
 191             c &= ~CHAR_SHIFT;
 192           else if ((c & 0377) >= 'a' && (c & 0377) <= 'z')
 193             c = (c & ~CHAR_SHIFT) - ('a' - 'A');
 194         }
 195       if (c & CHAR_CTL)
 196         {
 197           /* Simulate the code in lread.c.  */
 198           /* Allow `\C- ' and `\C-?'.  */
 199           if (c == (CHAR_CTL | ' '))
 200             c = 0;
 201           else if (c == (CHAR_CTL | '?'))
 202             c = 127;
 203           /* ASCII control chars are made from letters (both cases),
 204              as well as the non-letters within 0100...0137.  */
 205           else if ((c & 0137) >= 0101 && (c & 0137) <= 0132)
 206             c &= (037 | (~0177 & ~CHAR_CTL));
 207           else if ((c & 0177) >= 0100 && (c & 0177) <= 0137)
 208             c &= (037 | (~0177 & ~CHAR_CTL));
 209         }
 210
 211       /* If C still has any modifier bits, it is an invalid character.  */
 212       if (c & CHAR_MODIFIER_MASK)
 213         invalid_character (c);
 214     }
 215   if (SINGLE_BYTE_CHAR_P (c))
 216     {
 217       if (ASCII_BYTE_P (c) || c >= 0xA0)
 218         *p++ = c;
 219       else
 220         {
 221           *p++ = LEADING_CODE_8_BIT_CONTROL;
 222           *p++ = c + 0x20;
 223         }
 224     }
 225   else if (CHAR_VALID_P (c, 0))
 226     {
 227       int charset, c1, c2;
 228
 229       SPLIT_CHAR (c, charset, c1, c2);
 230
 231       if (charset >= LEADING_CODE_EXT_11)
 232         *p++ = (charset < LEADING_CODE_EXT_12
 233                 ? LEADING_CODE_PRIVATE_11
 234                 : (charset < LEADING_CODE_EXT_21
 235                    ? LEADING_CODE_PRIVATE_12
 236                    : (charset < LEADING_CODE_EXT_22
 237                       ? LEADING_CODE_PRIVATE_21
 238                       : LEADING_CODE_PRIVATE_22)));
 239       *p++ = charset;
 240       if (c1 > 0 && c1 < 32 || c2 > 0 && c2 < 32)
 241         invalid_character (c);
 242       if (c1)
 243         {
 244           *p++ = c1 | 0x80;
 245           if (c2 > 0)
 246             *p++ = c2 | 0x80;
 247         }
 248     }
 249   else
 250     invalid_character (c);
 251
 252   return (p - str);
 253 }
 254
 255 /* Return the non-ASCII character corresponding to multi-byte form at
 256    STR of length LEN.  If ACTUAL_LEN is not NULL, store the byte
 257    length of the multibyte form in *ACTUAL_LEN.
 258
 259    Use macros STRING_CHAR or STRING_CHAR_AND_LENGTH instead of calling
 260    this function directly if you want ot handle ASCII characters as
 261    well.  */
 262
 263 int
 264 string_to_char (str, len, actual_len)
 265      const unsigned char *str;
 266      int len, *actual_len;
 267 {
 268   int c, bytes, charset, c1, c2;
 269
 270   SPLIT_MULTIBYTE_SEQ (str, len, bytes, charset, c1, c2);
 271   c = MAKE_CHAR (charset, c1, c2);
 272   if (actual_len)
 273     *actual_len = bytes;
 274   return c;
 275 }
 276
 277 /* Return the length of the multi-byte form at string STR of length LEN.
 278    Use the macro MULTIBYTE_FORM_LENGTH instead.  */
 279 int
 280 multibyte_form_length (str, len)
 281      const unsigned char *str;
 282      int len;
 283 {
 284   int bytes;
 285
 286   PARSE_MULTIBYTE_SEQ (str, len, bytes);
 287   return bytes;
 288 }
 289
 290 /* Check multibyte form at string STR of length LEN and set variables
 291    pointed by CHARSET, C1, and C2 to charset and position codes of the
 292    character at STR, and return 0.  If there's no multibyte character,
 293    return -1.  This should be used only in the macro SPLIT_STRING
 294    which checks range of STR in advance.  */
 295
 296 int
 297 split_string (str, len, charset, c1, c2)
 298      const unsigned char *str;
 299      unsigned char *c1, *c2;
 300      int len, *charset;
 301 {
 302   register int bytes, cs, code1, code2 = -1;
 303
 304   SPLIT_MULTIBYTE_SEQ (str, len, bytes, cs, code1, code2);
 305   if (cs == CHARSET_ASCII)
 306     return -1;
 307   *charset = cs;
 308   *c1 = code1;
 309   *c2 = code2;
 310   return 0;
 311 }
 312
 313 /* Return 1 iff character C has valid printable glyph.
 314    Use the macro CHAR_PRINTABLE_P instead.  */
 315 int
 316 char_printable_p (c)
 317      int c;
 318 {
 319   int charset, c1, c2;
 320
 321   if (ASCII_BYTE_P (c))
 322     return 1;
 323   else if (SINGLE_BYTE_CHAR_P (c))
 324     return 0;
 325   else if (c >= MAX_CHAR)
 326     return 0;
 327
 328   SPLIT_CHAR (c, charset, c1, c2);
 329   if (! CHARSET_DEFINED_P (charset))
 330     return 0;
 331   if (CHARSET_CHARS (charset) == 94
 332       ? c1 <= 32 || c1 >= 127
 333       : c1 < 32)
 334     return 0;
 335   if (CHARSET_DIMENSION (charset) == 2
 336       && (CHARSET_CHARS (charset) == 94
 337           ? c2 <= 32 || c2 >= 127
 338           : c2 < 32))
 339     return 0;
 340   return 1;
 341 }
 342
 343 /* Translate character C by translation table TABLE.  If C
 344    is negative, translate a character specified by CHARSET, C1, and C2
 345    (C1 and C2 are code points of the character).  If no translation is
 346    found in TABLE, return C.  */
 347 int
 348 translate_char (table, c, charset, c1, c2)
 349      Lisp_Object table;
 350      int c, charset, c1, c2;
 351 {
 352   Lisp_Object ch;
 353   int alt_charset, alt_c1, alt_c2, dimension;
 354
 355   if (c < 0) c = MAKE_CHAR (charset, (c1 & 0x7F) , (c2 & 0x7F));
 356   if (!CHAR_TABLE_P (table)
 357       || (ch = Faref (table, make_number (c)), !NATNUMP (ch)))
 358     return c;
 359
 360   SPLIT_CHAR (XFASTINT (ch), alt_charset, alt_c1, alt_c2);
 361   dimension = CHARSET_DIMENSION (alt_charset);
 362   if (dimension == 1 && alt_c1 > 0 || dimension == 2 && alt_c2 > 0)
 363     /* CH is not a generic character, just return it.  */
 364     return XFASTINT (ch);
 365
 366   /* Since CH is a generic character, we must return a specific
 367      charater which has the same position codes as C from CH.  */
 368   if (charset < 0)
 369     SPLIT_CHAR (c, charset, c1, c2);
 370   if (dimension != CHARSET_DIMENSION (charset))
 371     /* We can't make such a character because of dimension mismatch.  */
 372     return c;
 373   return MAKE_CHAR (alt_charset, c1, c2);
 374 }
 375
 376 /* Convert the unibyte character C to multibyte based on
 377    Vnonascii_translation_table or nonascii_insert_offset.  If they can't
 378    convert C to a valid multibyte character, convert it based on
 379    DEFAULT_NONASCII_INSERT_OFFSET which makes C a Latin-1 character.  */
 380
 381 int
 382 unibyte_char_to_multibyte (c)
 383      int c;
 384 {
 385   if (c < 0400 && c >= 0200)
 386     {
 387       int c_save = c;
 388
 389       if (! NILP (Vnonascii_translation_table))
 390         {
 391           c = XINT (Faref (Vnonascii_translation_table, make_number (c)));
 392           if (c >= 0400 && ! char_valid_p (c, 0))
 393             c = c_save + DEFAULT_NONASCII_INSERT_OFFSET;
 394         }
 395       else if (c >= 0240 && nonascii_insert_offset > 0)
 396         {
 397           c += nonascii_insert_offset;
 398           if (c < 0400 || ! char_valid_p (c, 0))
 399             c = c_save + DEFAULT_NONASCII_INSERT_OFFSET;
 400         }
 401       else if (c >= 0240)
 402         c = c_save + DEFAULT_NONASCII_INSERT_OFFSET;
 403     }
 404   return c;
 405 }
 406
 407
 408 /* Convert the multibyte character C to unibyte 8-bit character based
 409    on Vnonascii_translation_table or nonascii_insert_offset.  If
 410    REV_TBL is non-nil, it should be a reverse table of
 411    Vnonascii_translation_table, i.e. what given by:
 412      Fchar_table_extra_slot (Vnonascii_translation_table, make_number (0))  */
 413
 414 int
 415 multibyte_char_to_unibyte (c, rev_tbl)
 416      int c;
 417      Lisp_Object rev_tbl;
 418 {
 419   if (!SINGLE_BYTE_CHAR_P (c))
 420     {
 421       int c_save = c;
 422
 423       if (! CHAR_TABLE_P (rev_tbl)
 424           && CHAR_TABLE_P (Vnonascii_translation_table))
 425         rev_tbl = Fchar_table_extra_slot (Vnonascii_translation_table,
 426                                           make_number (0));
 427       if (CHAR_TABLE_P (rev_tbl))
 428         {
 429           Lisp_Object temp;
 430           temp = Faref (rev_tbl, make_number (c));
 431           if (INTEGERP (temp))
 432             c = XINT (temp);
 433           if (c >= 256)
 434             c = (c_save & 0177) + 0200;
 435         }
 436       else
 437         {
 438           if (nonascii_insert_offset > 0)
 439             c -= nonascii_insert_offset;
 440           if (c < 128 || c >= 256)
 441             c = (c_save & 0177) + 0200;
 442         }
 443     }
 444
 445   return c;
 446 }
 447
 448 \f
 449 /* Update the table Vcharset_table with the given arguments (see the
 450    document of `define-charset' for the meaning of each argument).
 451    Several other table contents are also updated.  The caller should
 452    check the validity of CHARSET-ID and the remaining arguments in
 453    advance.  */
 454
 455 void
 456 update_charset_table (charset_id, dimension, chars, width, direction,
 457                       iso_final_char, iso_graphic_plane,
 458                       short_name, long_name, description)
 459      Lisp_Object charset_id, dimension, chars, width, direction;
 460      Lisp_Object iso_final_char, iso_graphic_plane;
 461      Lisp_Object short_name, long_name, description;
 462 {
 463   int charset = XINT (charset_id);
 464   int bytes;
 465   unsigned char leading_code_base, leading_code_ext;
 466
 467   if (NILP (CHARSET_TABLE_ENTRY (charset)))
 468     CHARSET_TABLE_ENTRY (charset)
 469       = Fmake_vector (make_number (CHARSET_MAX_IDX), Qnil);
 470
 471   if (NILP (long_name))
 472     long_name = short_name;
 473   if (NILP (description))
 474     description = long_name;
 475
 476   /* Get byte length of multibyte form, base leading-code, and
 477      extended leading-code of the charset.  See the comment under the
 478      title "GENERAL NOTE on CHARACTER SET (CHARSET)" in charset.h.  */
 479   bytes = XINT (dimension);
 480   if (charset < MIN_CHARSET_PRIVATE_DIMENSION1)
 481     {
 482       /* Official charset, it doesn't have an extended leading-code.  */
 483       if (charset != CHARSET_ASCII && charset != CHARSET_8_BIT_GRAPHIC)
 484         bytes += 1; /* For a base leading-code.  */
 485       leading_code_base = charset;
 486       leading_code_ext = 0;
 487     }
 488   else
 489     {
 490       /* Private charset.  */
 491       bytes += 2; /* For base and extended leading-codes.  */
 492       leading_code_base
 493         = (charset < LEADING_CODE_EXT_12
 494            ? LEADING_CODE_PRIVATE_11
 495            : (charset < LEADING_CODE_EXT_21
 496               ? LEADING_CODE_PRIVATE_12
 497               : (charset < LEADING_CODE_EXT_22
 498                  ? LEADING_CODE_PRIVATE_21
 499                  : LEADING_CODE_PRIVATE_22)));
 500       leading_code_ext = charset;
 501       if (BYTES_BY_CHAR_HEAD (leading_code_base) != bytes)
 502         error ("Invalid dimension for the charset-ID %d", charset);
 503     }
 504
 505   CHARSET_TABLE_INFO (charset, CHARSET_ID_IDX) = charset_id;
 506   CHARSET_TABLE_INFO (charset, CHARSET_BYTES_IDX) = make_number (bytes);
 507   CHARSET_TABLE_INFO (charset, CHARSET_DIMENSION_IDX) = dimension;
 508   CHARSET_TABLE_INFO (charset, CHARSET_CHARS_IDX) = chars;
 509   CHARSET_TABLE_INFO (charset, CHARSET_WIDTH_IDX) = width;
 510   CHARSET_TABLE_INFO (charset, CHARSET_DIRECTION_IDX) = direction;
 511   CHARSET_TABLE_INFO (charset, CHARSET_LEADING_CODE_BASE_IDX)
 512     = make_number (leading_code_base);
 513   CHARSET_TABLE_INFO (charset, CHARSET_LEADING_CODE_EXT_IDX)
 514     = make_number (leading_code_ext);
 515   CHARSET_TABLE_INFO (charset, CHARSET_ISO_FINAL_CHAR_IDX) = iso_final_char;
 516   CHARSET_TABLE_INFO (charset, CHARSET_ISO_GRAPHIC_PLANE_IDX)
 517     = iso_graphic_plane;
 518   CHARSET_TABLE_INFO (charset, CHARSET_SHORT_NAME_IDX) = short_name;
 519   CHARSET_TABLE_INFO (charset, CHARSET_LONG_NAME_IDX) = long_name;
 520   CHARSET_TABLE_INFO (charset, CHARSET_DESCRIPTION_IDX) = description;
 521   CHARSET_TABLE_INFO (charset, CHARSET_PLIST_IDX) = Qnil;
 522
 523   {
 524     /* If we have already defined a charset which has the same
 525        DIMENSION, CHARS and ISO-FINAL-CHAR but the different
 526        DIRECTION, we must update the entry REVERSE-CHARSET of both
 527        charsets.  If there's no such charset, the value of the entry
 528        is set to nil.  */
 529     int i;
 530
 531     for (i = 0; i <= MAX_CHARSET; i++)
 532       if (!NILP (CHARSET_TABLE_ENTRY (i)))
 533         {
 534           if (CHARSET_DIMENSION (i) == XINT (dimension)
 535               && CHARSET_CHARS (i) == XINT (chars)
 536               && CHARSET_ISO_FINAL_CHAR (i) == XINT (iso_final_char)
 537               && CHARSET_DIRECTION (i) != XINT (direction))
 538             {
 539               CHARSET_TABLE_INFO (charset, CHARSET_REVERSE_CHARSET_IDX)
 540                 = make_number (i);
 541               CHARSET_TABLE_INFO (i, CHARSET_REVERSE_CHARSET_IDX) = charset_id;
 542               break;
 543             }
 544         }
 545     if (i > MAX_CHARSET)
 546       /* No such a charset.  */
 547       CHARSET_TABLE_INFO (charset, CHARSET_REVERSE_CHARSET_IDX)
 548         = make_number (-1);
 549   }
 550
 551   if (charset != CHARSET_ASCII && charset != CHARSET_8_BIT_GRAPHIC
 552       && charset < MIN_CHARSET_PRIVATE_DIMENSION1)
 553     {
 554       bytes_by_char_head[leading_code_base] = bytes;
 555       width_by_char_head[leading_code_base] = XINT (width);
 556
 557       /* Update table emacs_code_class.  */
 558       emacs_code_class[charset] = (bytes == 2
 559                                    ? EMACS_leading_code_2
 560                                    : (bytes == 3
 561                                       ? EMACS_leading_code_3
 562                                       : EMACS_leading_code_4));
 563     }
 564
 565   /* Update table iso_charset_table.  */
 566   if (XINT (iso_final_char) >= 0
 567       && ISO_CHARSET_TABLE (dimension, chars, iso_final_char) < 0)
 568     ISO_CHARSET_TABLE (dimension, chars, iso_final_char) = charset;
 569 }
 570
 571 #ifdef emacs
 572
 573 /* Return charset id of CHARSET_SYMBOL, or return -1 if CHARSET_SYMBOL
 574    is invalid.  */
 575 int
 576 get_charset_id (charset_symbol)
 577      Lisp_Object charset_symbol;
 578 {
 579   Lisp_Object val;
 580   int charset;
 581
 582   return ((SYMBOLP (charset_symbol)
 583            && (val = Fget (charset_symbol, Qcharset), VECTORP (val))
 584            && (charset = XINT (XVECTOR (val)->contents[CHARSET_ID_IDX]),
 585                CHARSET_VALID_P (charset)))
 586           ? charset : -1);
 587 }
 588
 589 /* Return an identification number for a new private charset of
 590    DIMENSION and WIDTH.  If there's no more room for the new charset,
 591    return 0.  */
 592 Lisp_Object
 593 get_new_private_charset_id (dimension, width)
 594      int dimension, width;
 595 {
 596   int charset, from, to;
 597
 598   if (dimension == 1)
 599     {
 600       from = LEADING_CODE_EXT_11;
 601       to = LEADING_CODE_EXT_21;
 602     }
 603   else
 604     {
 605       from = LEADING_CODE_EXT_21;
 606       to = LEADING_CODE_EXT_MAX + 1;
 607     }
 608
 609   for (charset = from; charset < to; charset++)
 610     if (!CHARSET_DEFINED_P (charset)) break;
 611
 612   return make_number (charset < to ? charset : 0);
 613 }
 614
 615 DEFUN ("define-charset", Fdefine_charset, Sdefine_charset, 3, 3, 0,
 616   "Define CHARSET-ID as the identification number of CHARSET with INFO-VECTOR.\n\
 617 If CHARSET-ID is nil, it is decided automatically, which means CHARSET is\n\
 618  treated as a private charset.\n\
 619 INFO-VECTOR is a vector of the format:\n\
 620    [DIMENSION CHARS WIDTH DIRECTION ISO-FINAL-CHAR ISO-GRAPHIC-PLANE\n\
 621     SHORT-NAME LONG-NAME DESCRIPTION]\n\
 622 The meanings of each elements is as follows:\n\
 623 DIMENSION (integer) is the number of bytes to represent a character: 1 or 2.\n\
 624 CHARS (integer) is the number of characters in a dimension: 94 or 96.\n\
 625 WIDTH (integer) is the number of columns a character in the charset\n\
 626 occupies on the screen: one of 0, 1, and 2.\n\
 627 \n\
 628 DIRECTION (integer) is the rendering direction of characters in the\n\
 629 charset when rendering.  If 0, render from left to right, else\n\
 630 render from right to left.\n\
 631 \n\
 632 ISO-FINAL-CHAR (character) is the final character of the\n\
 633 corresponding ISO 2022 charset.\n\
 634 It may be -1 if the charset is internal use only.\n\
 635 \n\
 636 ISO-GRAPHIC-PLANE (integer) is the graphic plane to be invoked\n\
 637 while encoding to variants of ISO 2022 coding system, one of the\n\
 638 following: 0/graphic-plane-left(GL), 1/graphic-plane-right(GR).\n\
 639 It may be -1 if the charset is internal use only.\n\
 640 \n\
 641 SHORT-NAME (string) is the short name to refer to the charset.\n\
 642 \n\
 643 LONG-NAME (string) is the long name to refer to the charset.\n\
 644 \n\
 645 DESCRIPTION (string) is the description string of the charset.")
 646   (charset_id, charset_symbol, info_vector)
 647      Lisp_Object charset_id, charset_symbol, info_vector;
 648 {
 649   Lisp_Object *vec;
 650
 651   if (!NILP (charset_id))
 652     CHECK_NUMBER (charset_id, 0);
 653   CHECK_SYMBOL (charset_symbol, 1);
 654   CHECK_VECTOR (info_vector, 2);
 655
 656   if (! NILP (charset_id))
 657     {
 658       if (! CHARSET_VALID_P (XINT (charset_id)))
 659         error ("Invalid CHARSET: %d", XINT (charset_id));
 660       else if (CHARSET_DEFINED_P (XINT (charset_id)))
 661         error ("Already defined charset: %d", XINT (charset_id));
 662     }
 663
 664   vec = XVECTOR (info_vector)->contents;
 665   if (XVECTOR (info_vector)->size != 9
 666       || !INTEGERP (vec[0]) || !(XINT (vec[0]) == 1 || XINT (vec[0]) == 2)
 667       || !INTEGERP (vec[1]) || !(XINT (vec[1]) == 94 || XINT (vec[1]) == 96)
 668       || !INTEGERP (vec[2]) || !(XINT (vec[2]) == 1 || XINT (vec[2]) == 2)
 669       || !INTEGERP (vec[3]) || !(XINT (vec[3]) == 0 || XINT (vec[3]) == 1)
 670       || !INTEGERP (vec[4])
 671       || !(XINT (vec[4]) == -1 || XINT (vec[4]) >= '0' && XINT (vec[4]) <= '~')
 672       || !INTEGERP (vec[5])
 673       || !(XINT (vec[5]) == -1 || XINT (vec[5]) == 0 || XINT (vec[5]) == 1)
 674       || !STRINGP (vec[6])
 675       || !STRINGP (vec[7])
 676       || !STRINGP (vec[8]))
 677     error ("Invalid info-vector argument for defining charset %s",
 678            XSYMBOL (charset_symbol)->name->data);
 679
 680   if (NILP (charset_id))
 681     {
 682       charset_id = get_new_private_charset_id (XINT (vec[0]), XINT (vec[2]));
 683       if (XINT (charset_id) == 0)
 684         error ("There's no room for a new private charset %s",
 685                XSYMBOL (charset_symbol)->name->data);
 686     }
 687
 688   update_charset_table (charset_id, vec[0], vec[1], vec[2], vec[3],
 689                         vec[4], vec[5], vec[6], vec[7], vec[8]);
 690   Fput (charset_symbol, Qcharset, CHARSET_TABLE_ENTRY (XINT (charset_id)));
 691   CHARSET_SYMBOL (XINT (charset_id)) = charset_symbol;
 692   Vcharset_list = Fcons (charset_symbol, Vcharset_list);
 693   return Qnil;
 694 }
 695
 696 DEFUN ("generic-character-list", Fgeneric_character_list,
 697        Sgeneric_character_list, 0, 0, 0,
 698   "Return a list of all possible generic characters.\n\
 699 It includes a generic character for a charset not yet defined.")
 700   ()
 701 {
 702   return Vgeneric_character_list;
 703 }
 704
 705 DEFUN ("get-unused-iso-final-char", Fget_unused_iso_final_char,
 706        Sget_unused_iso_final_char, 2, 2, 0,
 707   "Return an unsed ISO's final char for a charset of DIMENISION and CHARS.\n\
 708 DIMENSION is the number of bytes to represent a character: 1 or 2.\n\
 709 CHARS is the number of characters in a dimension: 94 or 96.\n\
 710 \n\
 711 This final char is for private use, thus the range is `0' (48) .. `?' (63).\n\
 712 If there's no unused final char for the specified kind of charset,\n\
 713 return nil.")
 714   (dimension, chars)
 715      Lisp_Object dimension, chars;
 716 {
 717   int final_char;
 718
 719   CHECK_NUMBER (dimension, 0);
 720   CHECK_NUMBER (chars, 1);
 721   if (XINT (dimension) != 1 && XINT (dimension) != 2)
 722     error ("Invalid charset dimension %d, it should be 1 or 2",
 723            XINT (dimension));
 724   if (XINT (chars) != 94 && XINT (chars) != 96)
 725     error ("Invalid charset chars %d, it should be 94 or 96",
 726            XINT (chars));
 727   for (final_char = '0'; final_char <= '?'; final_char++)
 728     {
 729       if (ISO_CHARSET_TABLE (dimension, chars, make_number (final_char)) < 0)
 730         break;
 731     }
 732   return (final_char <= '?' ? make_number (final_char) : Qnil);
 733 }
 734
 735 DEFUN ("declare-equiv-charset", Fdeclare_equiv_charset, Sdeclare_equiv_charset,
 736        4, 4, 0,
 737   "Declare a charset of DIMENSION, CHARS, FINAL-CHAR is the same as CHARSET.\n\
 738 CHARSET should be defined by `defined-charset' in advance.")
 739   (dimension, chars, final_char, charset_symbol)
 740      Lisp_Object dimension, chars, final_char, charset_symbol;
 741 {
 742   int charset;
 743
 744   CHECK_NUMBER (dimension, 0);
 745   CHECK_NUMBER (chars, 1);
 746   CHECK_NUMBER (final_char, 2);
 747   CHECK_SYMBOL (charset_symbol, 3);
 748
 749   if (XINT (dimension) != 1 && XINT (dimension) != 2)
 750     error ("Invalid DIMENSION %d, it should be 1 or 2", XINT (dimension));
 751   if (XINT (chars) != 94 && XINT (chars) != 96)
 752     error ("Invalid CHARS %d, it should be 94 or 96", XINT (chars));
 753   if (XINT (final_char) < '0' || XFASTINT (final_char) > '~')
 754     error ("Invalid FINAL-CHAR %c, it should be `0'..`~'", XINT (chars));
 755   if ((charset = get_charset_id (charset_symbol)) < 0)
 756     error ("Invalid charset %s", XSYMBOL (charset_symbol)->name->data);
 757
 758   ISO_CHARSET_TABLE (dimension, chars, final_char) = charset;
 759   return Qnil;
 760 }
 761
 762 /* Return information about charsets in the text at PTR of NBYTES
 763    bytes, which are NCHARS characters.  The value is:
 764
 765         0: Each character is represented by one byte.  This is always
 766            true for unibyte text.
 767         1: No charsets other than ascii eight-bit-control,
 768            eight-bit-graphic, and latin-1 are found.
 769         2: Otherwise.
 770
 771    In addition, if CHARSETS is nonzero, for each found charset N, set
 772    CHARSETS[N] to 1.  For that, callers should allocate CHARSETS
 773    (MAX_CHARSET + 1 elements) in advance.  It may lookup a translation
 774    table TABLE if supplied.  For invalid charsets, set CHARSETS[1] to
 775    1 (note that there's no charset whose ID is 1).  */
 776
 777 int
 778 find_charset_in_text (ptr, nchars, nbytes, charsets, table)
 779      unsigned char *ptr;
 780      int nchars, nbytes, *charsets;
 781      Lisp_Object table;
 782 {
 783   if (nchars == nbytes)
 784     {
 785       if (charsets && nbytes > 0)
 786         {
 787           unsigned char *endp = ptr + nbytes;
 788           int maskbits = 0;
 789
 790           while (ptr < endp && maskbits != 7)
 791             {
 792               maskbits |= (*ptr < 0x80 ? 1 : *ptr < 0xA0 ? 2 : 4);
 793               ptr++;
 794             }
 795
 796           if (maskbits & 1)
 797             charsets[CHARSET_ASCII] = 1;
 798           if (maskbits & 2)
 799             charsets[CHARSET_8_BIT_CONTROL] = 1;
 800           if (maskbits & 4)
 801             charsets[CHARSET_8_BIT_GRAPHIC] = 1;
 802         }
 803       return 0;
 804     }
 805   else
 806     {
 807       int return_val = 1;
 808       int bytes, charset, c1, c2;
 809
 810       if (! CHAR_TABLE_P (table))
 811         table = Qnil;
 812
 813       while (nchars-- > 0)
 814         {
 815           SPLIT_MULTIBYTE_SEQ (ptr, len, bytes, charset, c1, c2);
 816           ptr += bytes;
 817
 818           if (!CHARSET_DEFINED_P (charset))
 819             charset = 1;
 820           else if (! NILP (table))
 821             {
 822               int c = translate_char (table, -1, charset, c1, c2);
 823               if (c >= 0)
 824                 charset = CHAR_CHARSET (c);
 825             }
 826
 827           if (return_val == 1
 828               && charset != CHARSET_ASCII
 829               && charset != CHARSET_8_BIT_CONTROL
 830               && charset != CHARSET_8_BIT_GRAPHIC
 831               && charset != charset_latin_iso8859_1)
 832             return_val = 2;
 833
 834           if (charsets)
 835             charsets[charset] = 1;
 836           else if (return_val == 2)
 837             break;
 838         }
 839       return return_val;
 840     }
 841 }
 842
 843 DEFUN ("find-charset-region", Ffind_charset_region, Sfind_charset_region,
 844        2, 3, 0,
 845   "Return a list of charsets in the region between BEG and END.\n\
 846 BEG and END are buffer positions.\n\
 847 Optional arg TABLE if non-nil is a translation table to look up.\n\
 848 \n\
 849 If the region contains invalid multibyte characters,\n\
 850 `unknown' is included in the returned list.\n\
 851 \n\
 852 If the current buffer is unibyte, the returned list may contain\n\
 853 only `ascii', `eight-bit-control', and `eight-bit-graphic'.")
 854   (beg, end, table)
 855      Lisp_Object beg, end, table;
 856 {
 857   int charsets[MAX_CHARSET + 1];
 858   int from, from_byte, to, stop, stop_byte, i;
 859   Lisp_Object val;
 860
 861   validate_region (&beg, &end);
 862   from = XFASTINT (beg);
 863   stop = to = XFASTINT (end);
 864
 865   if (from < GPT && GPT < to)
 866     {
 867       stop = GPT;
 868       stop_byte = GPT_BYTE;
 869     }
 870   else
 871     stop_byte = CHAR_TO_BYTE (stop);
 872
 873   from_byte = CHAR_TO_BYTE (from);
 874
 875   bzero (charsets, (MAX_CHARSET + 1) * sizeof (int));
 876   while (1)
 877     {
 878       find_charset_in_text (BYTE_POS_ADDR (from_byte), stop - from,
 879                             stop_byte - from_byte, charsets, table);
 880       if (stop < to)
 881         {
 882           from = stop, from_byte = stop_byte;
 883           stop = to, stop_byte = CHAR_TO_BYTE (stop);
 884         }
 885       else
 886         break;
 887     }
 888
 889   val = Qnil;
 890   if (charsets[1])
 891     val = Fcons (Qunknown, val);
 892   for (i = MAX_CHARSET; i >= MIN_CHARSET_OFFICIAL_DIMENSION1; i--)
 893     if (charsets[i])
 894       val = Fcons (CHARSET_SYMBOL (i), val);
 895   if (charsets[0])
 896     val = Fcons (Qascii, val);
 897   return val;
 898 }
 899
 900 DEFUN ("find-charset-string", Ffind_charset_string, Sfind_charset_string,
 901        1, 2, 0,
 902   "Return a list of charsets in STR.\n\
 903 Optional arg TABLE if non-nil is a translation table to look up.\n\
 904 \n\
 905 If the string contains invalid multibyte characters,\n\
 906 `unknown' is included in the returned list.\n\
 907 \n\
 908 If STR is unibyte, the returned list may contain\n\
 909 only `ascii', `eight-bit-control', and `eight-bit-graphic'.")
 910   (str, table)
 911      Lisp_Object str, table;
 912 {
 913   int charsets[MAX_CHARSET + 1];
 914   int i;
 915   Lisp_Object val;
 916
 917   CHECK_STRING (str, 0);
 918
 919   bzero (charsets, (MAX_CHARSET + 1) * sizeof (int));
 920   find_charset_in_text (XSTRING (str)->data, XSTRING (str)->size,
 921                         STRING_BYTES (XSTRING (str)), charsets, table);
 922
 923   val = Qnil;
 924   if (charsets[1])
 925     val = Fcons (Qunknown, val);
 926   for (i = MAX_CHARSET; i >= MIN_CHARSET_OFFICIAL_DIMENSION1; i--)
 927     if (charsets[i])
 928       val = Fcons (CHARSET_SYMBOL (i), val);
 929   if (charsets[0])
 930     val = Fcons (Qascii, val);
 931   return val;
 932 }
 933
 934 \f
 935 DEFUN ("make-char-internal", Fmake_char_internal, Smake_char_internal, 1, 3, 0,
 936   "")
 937   (charset, code1, code2)
 938      Lisp_Object charset, code1, code2;
 939 {
 940   int charset_id, c1, c2;
 941
 942   CHECK_NUMBER (charset, 0);
 943   charset_id = XINT (charset);
 944   if (!CHARSET_DEFINED_P (charset_id))
 945     error ("Invalid charset ID: %d", XINT (charset));
 946
 947   if (NILP (code1))
 948     c1 = 0;
 949   else
 950     {
 951       CHECK_NUMBER (code1, 1);
 952       c1 = XINT (code1);
 953     }
 954   if (NILP (code2))
 955     c2 = 0;
 956   else
 957     {
 958       CHECK_NUMBER (code2, 2);
 959       c2 = XINT (code2);
 960     }
 961
 962   if (charset_id == CHARSET_ASCII)
 963     {
 964       if (c1 < 0 || c1 > 0x7F)
 965         goto invalid_code_posints;
 966       return make_number (c1);
 967     }
 968   else if (charset_id == CHARSET_8_BIT_CONTROL)
 969     {
 970       if (NILP (code1))
 971         c1 = 0x80;
 972       else if (c1 < 0x80 || c1 > 0x9F)
 973         goto invalid_code_posints;
 974       return make_number (c1);
 975     }
 976   else if (charset_id == CHARSET_8_BIT_GRAPHIC)
 977     {
 978       if (NILP (code1))
 979         c1 = 0xA0;
 980       else if (c1 < 0xA0 || c1 > 0xFF)
 981         goto invalid_code_posints;
 982       return make_number (c1);
 983     }
 984   else if (c1 < 0 || c1 > 0xFF || c2 < 0 || c2 > 0xFF)
 985     goto invalid_code_posints;
 986   c1 &= 0x7F;
 987   c2 &= 0x7F;
 988   if (c1 == 0
 989       ? c2 != 0
 990       : (c2 == 0
 991          ? !CHAR_COMPONENTS_VALID_P (charset_id, c1, 0x20)
 992          : !CHAR_COMPONENTS_VALID_P (charset_id, c1, c2)))
 993     goto invalid_code_posints;
 994   return make_number (MAKE_CHAR (charset_id, c1, c2));
 995
 996  invalid_code_posints:
 997   error ("Invalid code points for charset ID %d: %d %d", charset_id, c1, c2);
 998 }
 999
1000 DEFUN ("split-char", Fsplit_char, Ssplit_char, 1, 1, 0,
1001   "Return list of charset and one or two position-codes of CHAR.\n\
1002 If CHAR is invalid as a character code,\n\
1003 return a list of symbol `unknown' and CHAR.")
1004   (ch)
1005      Lisp_Object ch;
1006 {
1007   int c, charset, c1, c2;
1008
1009   CHECK_NUMBER (ch, 0);
1010   c = XFASTINT (ch);
1011   if (!CHAR_VALID_P (c, 1))
1012     return Fcons (Qunknown, Fcons (ch, Qnil));
1013   SPLIT_CHAR (XFASTINT (ch), charset, c1, c2);
1014   return (c2 >= 0
1015           ? Fcons (CHARSET_SYMBOL (charset),
1016                    Fcons (make_number (c1), Fcons (make_number (c2), Qnil)))
1017           : Fcons (CHARSET_SYMBOL (charset), Fcons (make_number (c1), Qnil)));
1018 }
1019
1020 DEFUN ("char-charset", Fchar_charset, Schar_charset, 1, 1, 0,
1021   "Return charset of CHAR.")
1022   (ch)
1023      Lisp_Object ch;
1024 {
1025   CHECK_NUMBER (ch, 0);
1026
1027   return CHARSET_SYMBOL (CHAR_CHARSET (XINT (ch)));
1028 }
1029
1030 DEFUN ("charset-after", Fcharset_after, Scharset_after, 0, 1, 0,
1031   "Return charset of a character in the current buffer at position POS.\n\
1032 If POS is nil, it defauls to the current point.\n\
1033 If POS is out of range, the value is nil.")
1034   (pos)
1035      Lisp_Object pos;
1036 {
1037   Lisp_Object ch;
1038   int charset;
1039
1040   ch = Fchar_after (pos);
1041   if (! INTEGERP (ch))
1042     return ch;
1043   charset = CHAR_CHARSET (XINT (ch));
1044   return CHARSET_SYMBOL (charset);
1045 }
1046
1047 DEFUN ("iso-charset", Fiso_charset, Siso_charset, 3, 3, 0,
1048   "Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR.\n\
1049 \n\
1050 ISO 2022's designation sequence (escape sequence) distinguishes charsets\n\
1051 by their DIMENSION, CHARS, and FINAL-CHAR,\n\
1052 where as Emacs distinguishes them by charset symbol.\n\
1053 See the documentation of the function `charset-info' for the meanings of\n\
1054 DIMENSION, CHARS, and FINAL-CHAR.")
1055   (dimension, chars, final_char)
1056      Lisp_Object dimension, chars, final_char;
1057 {
1058   int charset;
1059
1060   CHECK_NUMBER (dimension, 0);
1061   CHECK_NUMBER (chars, 1);
1062   CHECK_NUMBER (final_char, 2);
1063
1064   if ((charset = ISO_CHARSET_TABLE (dimension, chars, final_char)) < 0)
1065     return Qnil;
1066   return CHARSET_SYMBOL (charset);
1067 }
1068
1069 /* If GENERICP is nonzero, return nonzero iff C is a valid normal or
1070    generic character.  If GENERICP is zero, return nonzero iff C is a
1071    valid normal character.  Do not call this function directly,
1072    instead use macro CHAR_VALID_P.  */
1073 int
1074 char_valid_p (c, genericp)
1075      int c, genericp;
1076 {
1077   int charset, c1, c2;
1078
1079   if (c < 0 || c >= MAX_CHAR)
1080     return 0;
1081   if (SINGLE_BYTE_CHAR_P (c))
1082     return 1;
1083   SPLIT_CHAR (c, charset, c1, c2);
1084   if (genericp)
1085     {
1086       if (c1)
1087         {
1088           if (c2 <= 0) c2 = 0x20;
1089         }
1090       else
1091         {
1092           if (c2 <= 0) c1 = c2 = 0x20;
1093         }
1094     }
1095   return (CHARSET_DEFINED_P (charset)
1096           && CHAR_COMPONENTS_VALID_P (charset, c1, c2));
1097 }
1098
1099 DEFUN ("char-valid-p", Fchar_valid_p, Schar_valid_p, 1, 2, 0,
1100   "Return t if OBJECT is a valid normal character.\n\
1101 If optional arg GENERICP is non-nil, also return t if OBJECT is\n\
1102 a valid generic character.")
1103   (object, genericp)
1104      Lisp_Object object, genericp;
1105 {
1106   if (! NATNUMP (object))
1107     return Qnil;
1108   return (CHAR_VALID_P (XFASTINT (object), !NILP (genericp)) ? Qt : Qnil);
1109 }
1110
1111 DEFUN ("unibyte-char-to-multibyte", Funibyte_char_to_multibyte,
1112        Sunibyte_char_to_multibyte, 1, 1, 0,
1113   "Convert the unibyte character CH to multibyte character.\n\
1114 The conversion is done based on `nonascii-translation-table' (which see)\n\
1115  or `nonascii-insert-offset' (which see).")
1116   (ch)
1117      Lisp_Object ch;
1118 {
1119   int c;
1120
1121   CHECK_NUMBER (ch, 0);
1122   c = XINT (ch);
1123   if (c < 0 || c >= 0400)
1124     error ("Invalid unibyte character: %d", c);
1125   c = unibyte_char_to_multibyte (c);
1126   if (c < 0)
1127     error ("Can't convert to multibyte character: %d", XINT (ch));
1128   return make_number (c);
1129 }
1130
1131 DEFUN ("multibyte-char-to-unibyte", Fmultibyte_char_to_unibyte,
1132        Smultibyte_char_to_unibyte, 1, 1, 0,
1133   "Convert the multibyte character CH to unibyte character.\n\
1134 The conversion is done based on `nonascii-translation-table' (which see)\n\
1135  or `nonascii-insert-offset' (which see).")
1136   (ch)
1137      Lisp_Object ch;
1138 {
1139   int c;
1140
1141   CHECK_NUMBER (ch, 0);
1142   c = XINT (ch);
1143   if (! CHAR_VALID_P (c, 0))
1144     error ("Invalid multibyte character: %d", c);
1145   c = multibyte_char_to_unibyte (c, Qnil);
1146   if (c < 0)
1147     error ("Can't convert to unibyte character: %d", XINT (ch));
1148   return make_number (c);
1149 }
1150
1151 DEFUN ("char-bytes", Fchar_bytes, Schar_bytes, 1, 1, 0,
1152   "Return 1 regardless of the argument CHAR.\n\
1153 This is now an obsolete function.  We keep it just for backward compatibility.")
1154   (ch)
1155      Lisp_Object ch;
1156 {
1157   CHECK_NUMBER (ch, 0);
1158   return make_number (1);
1159 }
1160
1161 /* Return how many bytes C will occupy in a multibyte buffer.
1162    Don't call this function directly, instead use macro CHAR_BYTES.  */
1163 int
1164 char_bytes (c)
1165      int c;
1166 {
1167   int charset;
1168
1169   if (ASCII_BYTE_P (c) || (c & ~((1 << CHARACTERBITS) -1)))
1170     return 1;
1171   if (SINGLE_BYTE_CHAR_P (c) && c >= 0xA0)
1172     return 1;
1173
1174   charset = CHAR_CHARSET (c);
1175   return (CHARSET_DEFINED_P (charset) ? CHARSET_BYTES (charset) : 1);
1176 }
1177
1178 /* Return the width of character of which multi-byte form starts with
1179    C.  The width is measured by how many columns occupied on the
1180    screen when displayed in the current buffer.  */
1181
1182 #define ONE_BYTE_CHAR_WIDTH(c)                                          \
1183   (c < 0x20                                                             \
1184    ? (c == '\t'                                                         \
1185       ? XFASTINT (current_buffer->tab_width)                            \
1186       : (c == '\n' ? 0 : (NILP (current_buffer->ctl_arrow) ? 4 : 2)))   \
1187    : (c < 0x7f                                                          \
1188       ? 1                                                               \
1189       : (c == 0x7F                                                      \
1190          ? (NILP (current_buffer->ctl_arrow) ? 4 : 2)                   \
1191          : ((! NILP (current_buffer->enable_multibyte_characters)       \
1192              && BASE_LEADING_CODE_P (c))                                \
1193             ? WIDTH_BY_CHAR_HEAD (c)                                    \
1194             : 4))))
1195
1196 DEFUN ("char-width", Fchar_width, Schar_width, 1, 1, 0,
1197   "Return width of CHAR when displayed in the current buffer.\n\
1198 The width is measured by how many columns it occupies on the screen.\n\
1199 Tab is taken to occupy `tab-width' columns.")
1200   (ch)
1201        Lisp_Object ch;
1202 {
1203   Lisp_Object val, disp;
1204   int c;
1205   struct Lisp_Char_Table *dp = buffer_display_table ();
1206
1207   CHECK_NUMBER (ch, 0);
1208
1209   c = XINT (ch);
1210
1211   /* Get the way the display table would display it.  */
1212   disp = dp ? DISP_CHAR_VECTOR (dp, c) : Qnil;
1213
1214   if (VECTORP (disp))
1215     XSETINT (val, XVECTOR (disp)->size);
1216   else if (SINGLE_BYTE_CHAR_P (c))
1217     XSETINT (val, ONE_BYTE_CHAR_WIDTH (c));
1218   else
1219     {
1220       int charset = CHAR_CHARSET (c);
1221
1222       XSETFASTINT (val, CHARSET_WIDTH (charset));
1223     }
1224   return val;
1225 }
1226
1227 /* Return width of string STR of length LEN when displayed in the
1228    current buffer.  The width is measured by how many columns it
1229    occupies on the screen.  */
1230
1231 int
1232 strwidth (str, len)
1233      unsigned char *str;
1234      int len;
1235 {
1236   return c_string_width (str, len, -1, NULL, NULL);
1237 }
1238
1239 /* Return width of string STR of length LEN when displayed in the
1240    current buffer.  The width is measured by how many columns it
1241    occupies on the screen.  If PRECISION > 0, return the width of
1242    longest substring that doesn't exceed PRECISION, and set number of
1243    characters and bytes of the substring in *NCHARS and *NBYTES
1244    respectively.  */
1245
1246 int
1247 c_string_width (str, len, precision, nchars, nbytes)
1248      unsigned char *str;
1249      int precision, *nchars, *nbytes;
1250 {
1251   int i = 0, i_byte = 0;
1252   int width = 0;
1253   int chars;
1254   struct Lisp_Char_Table *dp = buffer_display_table ();
1255
1256   while (i_byte < len)
1257     {
1258       int bytes, thiswidth;
1259       Lisp_Object val;
1260
1261       if (dp)
1262         {
1263           int c = STRING_CHAR_AND_LENGTH (str + i_byte, len - i_byte, bytes);
1264
1265           chars = 1;
1266           val = DISP_CHAR_VECTOR (dp, c);
1267           if (VECTORP (val))
1268             thiswidth = XVECTOR (val)->size;
1269           else
1270             thiswidth = ONE_BYTE_CHAR_WIDTH (str[i_byte]);
1271         }
1272       else
1273         {
1274           chars = 1;
1275           PARSE_MULTIBYTE_SEQ (str + i_byte, len - i_byte, bytes);
1276           thiswidth = ONE_BYTE_CHAR_WIDTH (str[i_byte]);
1277         }
1278
1279       if (precision > 0
1280           && (width + thiswidth > precision))
1281         {
1282           *nchars = i;
1283           *nbytes = i_byte;
1284           return width;
1285         }
1286       i++;
1287       i_byte += bytes;
1288       width += thiswidth;
1289   }
1290
1291   if (precision > 0)
1292     {
1293       *nchars = i;
1294       *nbytes = i_byte;
1295     }
1296
1297   return width;
1298 }
1299
1300 /* Return width of Lisp string STRING when displayed in the current
1301    buffer.  The width is measured by how many columns it occupies on
1302    the screen while paying attention to compositions.  If PRECISION >
1303    0, return the width of longest substring that doesn't exceed
1304    PRECISION, and set number of characters and bytes of the substring
1305    in *NCHARS and *NBYTES respectively.  */
1306
1307 int
1308 lisp_string_width (string, precision, nchars, nbytes)
1309      Lisp_Object string;
1310      int precision, *nchars, *nbytes;
1311 {
1312   int len = XSTRING (string)->size;
1313   int len_byte = STRING_BYTES (XSTRING (string));
1314   unsigned char *str = XSTRING (string)->data;
1315   int i = 0, i_byte = 0;
1316   int width = 0;
1317   struct Lisp_Char_Table *dp = buffer_display_table ();
1318
1319   while (i < len)
1320     {
1321       int chars, bytes, thiswidth;
1322       Lisp_Object val;
1323       int cmp_id;
1324       int ignore, end;
1325
1326       if (find_composition (i, -1, &ignore, &end, &val, string)
1327           && ((cmp_id = get_composition_id (i, i_byte, end - i, val, string))
1328               >= 0))
1329         {
1330           thiswidth = composition_table[cmp_id]->width;
1331           chars = end - i;
1332           bytes = string_char_to_byte (string, end) - i_byte;
1333         }
1334       else if (dp)
1335         {
1336           int c = STRING_CHAR_AND_LENGTH (str + i_byte, len - i_byte, bytes);
1337
1338           chars = 1;
1339           val = DISP_CHAR_VECTOR (dp, c);
1340           if (VECTORP (val))
1341             thiswidth = XVECTOR (val)->size;
1342           else
1343             thiswidth = ONE_BYTE_CHAR_WIDTH (str[i_byte]);
1344         }
1345       else
1346         {
1347           chars = 1;
1348           PARSE_MULTIBYTE_SEQ (str + i_byte, len_byte - i_byte, bytes);
1349           thiswidth = ONE_BYTE_CHAR_WIDTH (str[i_byte]);
1350         }
1351
1352       if (precision > 0
1353           && (width + thiswidth > precision))
1354         {
1355           *nchars = i;
1356           *nbytes = i_byte;
1357           return width;
1358         }
1359       i += chars;
1360       i_byte += bytes;
1361       width += thiswidth;
1362   }
1363
1364   if (precision > 0)
1365     {
1366       *nchars = i;
1367       *nbytes = i_byte;
1368     }
1369
1370   return width;
1371 }
1372
1373 DEFUN ("string-width", Fstring_width, Sstring_width, 1, 1, 0,
1374   "Return width of STRING when displayed in the current buffer.\n\
1375 Width is measured by how many columns it occupies on the screen.\n\
1376 When calculating width of a multibyte character in STRING,\n\
1377 only the base leading-code is considered; the validity of\n\
1378 the following bytes is not checked.  Tabs in STRING are always\n\
1379 taken to occupy `tab-width' columns.")
1380   (str)
1381      Lisp_Object str;
1382 {
1383   Lisp_Object val;
1384
1385   CHECK_STRING (str, 0);
1386   XSETFASTINT (val, lisp_string_width (str, -1, NULL, NULL));
1387   return val;
1388 }
1389
1390 DEFUN ("char-direction", Fchar_direction, Schar_direction, 1, 1, 0,
1391   "Return the direction of CHAR.\n\
1392 The returned value is 0 for left-to-right and 1 for right-to-left.")
1393   (ch)
1394      Lisp_Object ch;
1395 {
1396   int charset;
1397
1398   CHECK_NUMBER (ch, 0);
1399   charset = CHAR_CHARSET (XFASTINT (ch));
1400   if (!CHARSET_DEFINED_P (charset))
1401     invalid_character (XINT (ch));
1402   return CHARSET_TABLE_INFO (charset, CHARSET_DIRECTION_IDX);
1403 }
1404
1405 DEFUN ("chars-in-region", Fchars_in_region, Schars_in_region, 2, 2, 0,
1406   "Return number of characters between BEG and END.")
1407   (beg, end)
1408      Lisp_Object beg, end;
1409 {
1410   int from, to;
1411
1412   CHECK_NUMBER_COERCE_MARKER (beg, 0);
1413   CHECK_NUMBER_COERCE_MARKER (end, 1);
1414
1415   from = min (XFASTINT (beg), XFASTINT (end));
1416   to = max (XFASTINT (beg), XFASTINT (end));
1417
1418   return make_number (to - from);
1419 }
1420
1421 /* Return the number of characters in the NBYTES bytes at PTR.
1422    This works by looking at the contents and checking for multibyte sequences.
1423    However, if the current buffer has enable-multibyte-characters = nil,
1424    we treat each byte as a character.  */
1425
1426 int
1427 chars_in_text (ptr, nbytes)
1428      unsigned char *ptr;
1429      int nbytes;
1430 {
1431   /* current_buffer is null at early stages of Emacs initialization.  */
1432   if (current_buffer == 0
1433       || NILP (current_buffer->enable_multibyte_characters))
1434     return nbytes;
1435
1436   return multibyte_chars_in_text (ptr, nbytes);
1437 }
1438
1439 /* Return the number of characters in the NBYTES bytes at PTR.
1440    This works by looking at the contents and checking for multibyte sequences.
1441    It ignores enable-multibyte-characters.  */
1442
1443 int
1444 multibyte_chars_in_text (ptr, nbytes)
1445      unsigned char *ptr;
1446      int nbytes;
1447 {
1448   unsigned char *endp;
1449   int chars, bytes;
1450
1451   endp = ptr + nbytes;
1452   chars = 0;
1453
1454   while (ptr < endp)
1455     {
1456       PARSE_MULTIBYTE_SEQ (ptr, endp - ptr, bytes);
1457       ptr += bytes;
1458       chars++;
1459     }
1460
1461   return chars;
1462 }
1463
1464 /* Parse unibyte text at STR of LEN bytes as multibyte text, and
1465    count the numbers of characters and bytes in it.  On counting
1466    bytes, pay attention to the fact that 8-bit characters in the range
1467    0x80..0x9F are represented by 2 bytes in multibyte text.  */
1468 void
1469 parse_str_as_multibyte (str, len, nchars, nbytes)
1470      unsigned char *str;
1471      int len, *nchars, *nbytes;
1472 {
1473   unsigned char *endp = str + len;
1474   int n, chars = 0, bytes = 0;
1475
1476   while (str < endp)
1477     {
1478       if (UNIBYTE_STR_AS_MULTIBYTE_P (str, endp - str, n))
1479         str += n, bytes += n;
1480       else
1481         str++, bytes += 2;
1482       chars++;
1483     }
1484   *nchars = chars;
1485   *nbytes = bytes;
1486   return;
1487 }
1488
1489 /* Arrange unibyte text at STR of NBYTES bytes as multibyte text.
1490    It actually converts only 8-bit characters in the range 0x80..0x9F
1491    that don't contruct multibyte characters to multibyte forms.  If
1492    NCHARS is nonzero, set *NCHARS to the number of characters in the
1493    text.  It is assured that we can use LEN bytes at STR as a work
1494    area and that is enough.  Return the number of bytes of the
1495    resulting text.  */
1496
1497 int
1498 str_as_multibyte (str, len, nbytes, nchars)
1499      unsigned char *str;
1500      int len, nbytes, *nchars;
1501 {
1502   unsigned char *p = str, *endp = str + nbytes;
1503   unsigned char *to;
1504   int chars = 0;
1505   int n;
1506
1507   while (p < endp && UNIBYTE_STR_AS_MULTIBYTE_P (p, endp - p, n))
1508     p += n, chars++;
1509   if (nchars)
1510     *nchars = chars;
1511   if (p == endp)
1512     return nbytes;
1513
1514   to = p;
1515   nbytes = endp - p;
1516   endp = str + len;
1517   safe_bcopy (p, endp - nbytes, nbytes);
1518   p = endp - nbytes;
1519   while (p < endp)
1520     {
1521       if (UNIBYTE_STR_AS_MULTIBYTE_P (p, endp - p, n))
1522         {
1523           while (n--)
1524             *to++ = *p++;
1525         }
1526       else
1527         {
1528           *to++ = LEADING_CODE_8_BIT_CONTROL;
1529           *to++ = *p++ + 0x20;
1530         }
1531       chars++;
1532     }
1533   if (nchars)
1534     *nchars = chars;
1535   return (to - str);
1536 }
1537
1538 /* Parse unibyte string at STR of LEN bytes, and return the number of
1539    bytes it may ocupy when converted to multibyte string by
1540    `str_to_multibyte'.  */
1541
1542 int
1543 parse_str_to_multibyte (str, len)
1544      unsigned char *str;
1545      int len;
1546 {
1547   unsigned char *endp = str + len;
1548   int bytes;
1549
1550   for (bytes = 0; str < endp; str++)
1551     bytes += (*str < 0x80 || *str >= 0xA0) ? 1 : 2;
1552   return bytes;
1553 }
1554
1555 /* Convert unibyte text at STR of NBYTES bytes to multibyte text
1556    that contains the same single-byte characters.  It actually
1557    converts all 8-bit characters to multibyte forms.  It is assured
1558    that we can use LEN bytes at STR as a work area and that is
1559    enough.  */
1560
1561 int
1562 str_to_multibyte (str, len, bytes)
1563      unsigned char *str;
1564      int len, bytes;
1565 {
1566   unsigned char *p = str, *endp = str + bytes;
1567   unsigned char *to;
1568
1569   while (p < endp && (*p < 0x80 || *p >= 0xA0)) p++;
1570   if (p == endp)
1571     return bytes;
1572   to = p;
1573   bytes = endp - p;
1574   endp = str + len;
1575   safe_bcopy (p, endp - bytes, bytes);
1576   p = endp - bytes;
1577   while (p < endp)
1578     {
1579       if (*p < 0x80 || *p >= 0xA0)
1580         *to++ = *p++;
1581       else
1582         *to++ = LEADING_CODE_8_BIT_CONTROL, *to++ = *p++ + 0x20;
1583     }
1584   return (to - str);
1585 }
1586
1587 /* Arrange multibyte text at STR of LEN bytes as a unibyte text.  It
1588    actually converts only 8-bit characters in the range 0x80..0x9F to
1589    unibyte forms.  */
1590
1591 int
1592 str_as_unibyte (str, bytes)
1593      unsigned char *str;
1594      int bytes;
1595 {
1596   unsigned char *p = str, *endp = str + bytes;
1597   unsigned char *to = str;
1598
1599   while (p < endp && *p != LEADING_CODE_8_BIT_CONTROL) p++;
1600   to = p;
1601   while (p < endp)
1602     {
1603       if (*p == LEADING_CODE_8_BIT_CONTROL)
1604         *to++ = *(p + 1) - 0x20, p += 2;
1605       else
1606         *to++ = *p++;
1607     }
1608   return (to - str);
1609 }
1610
1611 \f
1612 DEFUN ("string", Fstring, Sstring, 1, MANY, 0,
1613   "Concatenate all the argument characters and make the result a string.")
1614   (n, args)
1615      int n;
1616      Lisp_Object *args;
1617 {
1618   int i;
1619   unsigned char *buf = (unsigned char *) alloca (MAX_MULTIBYTE_LENGTH * n);
1620   unsigned char *p = buf;
1621   int c;
1622   int multibyte = 0;
1623
1624   for (i = 0; i < n; i++)
1625     {
1626       CHECK_NUMBER (args[i], 0);
1627       if (!multibyte && !SINGLE_BYTE_CHAR_P (XFASTINT (args[i])))
1628         multibyte = 1;
1629     }
1630
1631   for (i = 0; i < n; i++)
1632     {
1633       c = XINT (args[i]);
1634       if (multibyte)
1635         p += CHAR_STRING (c, p);
1636       else
1637         *p++ += c;
1638     }
1639
1640   return make_string_from_bytes (buf, n, p - buf);
1641 }
1642
1643 #endif /* emacs */
1644 \f
1645 int
1646 charset_id_internal (charset_name)
1647      char *charset_name;
1648 {
1649   Lisp_Object val;
1650
1651   val= Fget (intern (charset_name), Qcharset);
1652   if (!VECTORP (val))
1653     error ("Charset %s is not defined", charset_name);
1654
1655   return (XINT (XVECTOR (val)->contents[0]));
1656 }
1657
1658 DEFUN ("setup-special-charsets", Fsetup_special_charsets,
1659        Ssetup_special_charsets, 0, 0, 0, "Internal use only.")
1660    ()
1661 {
1662   charset_latin_iso8859_1 = charset_id_internal ("latin-iso8859-1");
1663   charset_jisx0208_1978 = charset_id_internal ("japanese-jisx0208-1978");
1664   charset_jisx0208 = charset_id_internal ("japanese-jisx0208");
1665   charset_katakana_jisx0201 = charset_id_internal ("katakana-jisx0201");
1666   charset_latin_jisx0201 = charset_id_internal ("latin-jisx0201");
1667   charset_big5_1 = charset_id_internal ("chinese-big5-1");
1668   charset_big5_2 = charset_id_internal ("chinese-big5-2");
1669   return Qnil;
1670 }
1671
1672 void
1673 init_charset_once ()
1674 {
1675   int i, j, k;
1676
1677   staticpro (&Vcharset_table);
1678   staticpro (&Vcharset_symbol_table);
1679   staticpro (&Vgeneric_character_list);
1680
1681   /* This has to be done here, before we call Fmake_char_table.  */
1682   Qcharset_table = intern ("charset-table");
1683   staticpro (&Qcharset_table);
1684
1685   /* Intern this now in case it isn't already done.
1686      Setting this variable twice is harmless.
1687      But don't staticpro it here--that is done in alloc.c.  */
1688   Qchar_table_extra_slots = intern ("char-table-extra-slots");
1689
1690   /* Now we are ready to set up this property, so we can
1691      create the charset table.  */
1692   Fput (Qcharset_table, Qchar_table_extra_slots, make_number (0));
1693   Vcharset_table = Fmake_char_table (Qcharset_table, Qnil);
1694
1695   Qunknown = intern ("unknown");
1696   staticpro (&Qunknown);
1697   Vcharset_symbol_table = Fmake_vector (make_number (MAX_CHARSET + 1),
1698                                         Qunknown);
1699
1700   /* Setup tables.  */
1701   for (i = 0; i < 2; i++)
1702     for (j = 0; j < 2; j++)
1703       for (k = 0; k < 128; k++)
1704         iso_charset_table [i][j][k] = -1;
1705
1706   for (i = 0; i < 256; i++)
1707     bytes_by_char_head[i] = 1;
1708   bytes_by_char_head[LEADING_CODE_PRIVATE_11] = 3;
1709   bytes_by_char_head[LEADING_CODE_PRIVATE_12] = 3;
1710   bytes_by_char_head[LEADING_CODE_PRIVATE_21] = 4;
1711   bytes_by_char_head[LEADING_CODE_PRIVATE_22] = 4;
1712
1713   for (i = 0; i < 128; i++)
1714     width_by_char_head[i] = 1;
1715   for (; i < 256; i++)
1716     width_by_char_head[i] = 4;
1717   width_by_char_head[LEADING_CODE_PRIVATE_11] = 1;
1718   width_by_char_head[LEADING_CODE_PRIVATE_12] = 2;
1719   width_by_char_head[LEADING_CODE_PRIVATE_21] = 1;
1720   width_by_char_head[LEADING_CODE_PRIVATE_22] = 2;
1721
1722   {
1723     Lisp_Object val;
1724
1725     val = Qnil;
1726     for (i = 0x81; i < 0x90; i++)
1727       val = Fcons (make_number ((i - 0x70) << 7), val);
1728     for (; i < 0x9A; i++)
1729       val = Fcons (make_number ((i - 0x8F) << 14), val);
1730     for (i = 0xA0; i < 0xF0; i++)
1731       val = Fcons (make_number ((i - 0x70) << 7), val);
1732     for (; i < 0xFF; i++)
1733       val = Fcons (make_number ((i - 0xE0) << 14), val);
1734     Vgeneric_character_list = Fnreverse (val);
1735   }
1736
1737   nonascii_insert_offset = 0;
1738   Vnonascii_translation_table = Qnil;
1739 }
1740
1741 #ifdef emacs
1742
1743 void
1744 syms_of_charset ()
1745 {
1746   Qcharset = intern ("charset");
1747   staticpro (&Qcharset);
1748
1749   Qascii = intern ("ascii");
1750   staticpro (&Qascii);
1751
1752   Qeight_bit_control = intern ("eight-bit-control");
1753   staticpro (&Qeight_bit_control);
1754
1755   Qeight_bit_graphic = intern ("eight-bit-graphic");
1756   staticpro (&Qeight_bit_graphic);
1757
1758   /* Define special charsets ascii, eight-bit-control, and
1759      eight-bit-graphic.  */
1760   update_charset_table (make_number (CHARSET_ASCII),
1761                         make_number (1), make_number (94),
1762                         make_number (1),
1763                         make_number (0),
1764                         make_number ('B'),
1765                         make_number (0),
1766                         build_string ("ASCII"),
1767                         Qnil,   /* same as above */
1768                         build_string ("ASCII (ISO646 IRV)"));
1769   CHARSET_SYMBOL (CHARSET_ASCII) = Qascii;
1770   Fput (Qascii, Qcharset, CHARSET_TABLE_ENTRY (CHARSET_ASCII));
1771
1772   update_charset_table (make_number (CHARSET_8_BIT_CONTROL),
1773                         make_number (1), make_number (96),
1774                         make_number (4),
1775                         make_number (0),
1776                         make_number (-1),
1777                         make_number (-1),
1778                         build_string ("8-bit control code (0x80..0x9F)"),
1779                         Qnil,   /* same as above */
1780                         Qnil);  /* same as above */
1781   CHARSET_SYMBOL (CHARSET_8_BIT_CONTROL) = Qeight_bit_control;
1782   Fput (Qeight_bit_control, Qcharset,
1783         CHARSET_TABLE_ENTRY (CHARSET_8_BIT_CONTROL));
1784
1785   update_charset_table (make_number (CHARSET_8_BIT_GRAPHIC),
1786                         make_number (1), make_number (96),
1787                         make_number (4),
1788                         make_number (0),
1789                         make_number (-1),
1790                         make_number (-1),
1791                         build_string ("8-bit graphic char (0xA0..0xFF)"),
1792                         Qnil,   /* same as above */
1793                         Qnil);  /* same as above */
1794   CHARSET_SYMBOL (CHARSET_8_BIT_GRAPHIC) = Qeight_bit_graphic;
1795   Fput (Qeight_bit_graphic, Qcharset,
1796         CHARSET_TABLE_ENTRY (CHARSET_8_BIT_GRAPHIC));
1797
1798   Qauto_fill_chars = intern ("auto-fill-chars");
1799   staticpro (&Qauto_fill_chars);
1800   Fput (Qauto_fill_chars, Qchar_table_extra_slots, make_number (0));
1801
1802   defsubr (&Sdefine_charset);
1803   defsubr (&Sgeneric_character_list);
1804   defsubr (&Sget_unused_iso_final_char);
1805   defsubr (&Sdeclare_equiv_charset);
1806   defsubr (&Sfind_charset_region);
1807   defsubr (&Sfind_charset_string);
1808   defsubr (&Smake_char_internal);
1809   defsubr (&Ssplit_char);
1810   defsubr (&Schar_charset);
1811   defsubr (&Scharset_after);
1812   defsubr (&Siso_charset);
1813   defsubr (&Schar_valid_p);
1814   defsubr (&Sunibyte_char_to_multibyte);
1815   defsubr (&Smultibyte_char_to_unibyte);
1816   defsubr (&Schar_bytes);
1817   defsubr (&Schar_width);
1818   defsubr (&Sstring_width);
1819   defsubr (&Schar_direction);
1820   defsubr (&Schars_in_region);
1821   defsubr (&Sstring);
1822   defsubr (&Ssetup_special_charsets);
1823
1824   DEFVAR_LISP ("charset-list", &Vcharset_list,
1825     "List of charsets ever defined.");
1826   Vcharset_list = Fcons (Qascii, Fcons (Qeight_bit_control,
1827                                         Fcons (Qeight_bit_graphic, Qnil)));
1828
1829   DEFVAR_LISP ("translation-table-vector",  &Vtranslation_table_vector,
1830     "Vector of cons cell of a symbol and translation table ever defined.\n\
1831 An ID of a translation table is an index of this vector.");
1832   Vtranslation_table_vector = Fmake_vector (make_number (16), Qnil);
1833
1834   DEFVAR_INT ("leading-code-private-11", &leading_code_private_11,
1835     "Leading-code of private TYPE9N charset of column-width 1.");
1836   leading_code_private_11 = LEADING_CODE_PRIVATE_11;
1837
1838   DEFVAR_INT ("leading-code-private-12", &leading_code_private_12,
1839     "Leading-code of private TYPE9N charset of column-width 2.");
1840   leading_code_private_12 = LEADING_CODE_PRIVATE_12;
1841
1842   DEFVAR_INT ("leading-code-private-21", &leading_code_private_21,
1843     "Leading-code of private TYPE9Nx9N charset of column-width 1.");
1844   leading_code_private_21 = LEADING_CODE_PRIVATE_21;
1845
1846   DEFVAR_INT ("leading-code-private-22", &leading_code_private_22,
1847     "Leading-code of private TYPE9Nx9N charset of column-width 2.");
1848   leading_code_private_22 = LEADING_CODE_PRIVATE_22;
1849
1850   DEFVAR_INT ("nonascii-insert-offset", &nonascii_insert_offset,
1851     "Offset for converting non-ASCII unibyte codes 0240...0377 to multibyte.\n\
1852 This is used for converting unibyte text to multibyte,\n\
1853 and for inserting character codes specified by number.\n\n\
1854 This serves to convert a Latin-1 or similar 8-bit character code\n\
1855 to the corresponding Emacs multibyte character code.\n\
1856 Typically the value should be (- (make-char CHARSET 0) 128),\n\
1857 for your choice of character set.\n\
1858 If `nonascii-translation-table' is non-nil, it overrides this variable.");
1859   nonascii_insert_offset = 0;
1860
1861   DEFVAR_LISP ("nonascii-translation-table", &Vnonascii_translation_table,
1862     "Translation table to convert non-ASCII unibyte codes to multibyte.\n\
1863 This is used for converting unibyte text to multibyte,\n\
1864 and for inserting character codes specified by number.\n\n\
1865 Conversion is performed only when multibyte characters are enabled,\n\
1866 and it serves to convert a Latin-1 or similar 8-bit character code\n\
1867 to the corresponding Emacs character code.\n\n\
1868 If this is nil, `nonascii-insert-offset' is used instead.\n\
1869 See also the docstring of `make-translation-table'.");
1870   Vnonascii_translation_table = Qnil;
1871
1872   DEFVAR_LISP ("auto-fill-chars", &Vauto_fill_chars,
1873     "A char-table for characters which invoke auto-filling.\n\
1874 Such characters have value t in this table.");
1875   Vauto_fill_chars = Fmake_char_table (Qauto_fill_chars, Qnil);
1876   CHAR_TABLE_SET (Vauto_fill_chars, make_number (' '), Qt);
1877   CHAR_TABLE_SET (Vauto_fill_chars, make_number ('\n'), Qt);
1878 }
1879
1880 #endif /* emacs */