src/charset.c

   1 /* Basic multilingual character support.
   2    Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
   3    Licensed to the Free Software Foundation.
   4    Copyright (C) 2001 Free Software Foundation, Inc.
   5
   6 This file is part of GNU Emacs.
   7
   8 GNU Emacs is free software; you can redistribute it and/or modify
   9 it under the terms of the GNU General Public License as published by
  10 the Free Software Foundation; either version 2, or (at your option)
  11 any later version.
  12
  13 GNU Emacs is distributed in the hope that it will be useful,
  14 but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 GNU General Public License for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GNU Emacs; see the file COPYING.  If not, write to
  20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  21 Boston, MA 02111-1307, USA.  */
  22
  23 /* At first, see the document in `charset.h' to understand the code in
  24    this file.  */
  25
  26 #ifdef emacs
  27 #include <config.h>
  28 #endif
  29
  30 #include <stdio.h>
  31
  32 #ifdef emacs
  33
  34 #include <sys/types.h>
  35 #include "lisp.h"
  36 #include "buffer.h"
  37 #include "charset.h"
  38 #include "composite.h"
  39 #include "coding.h"
  40 #include "disptab.h"
  41
  42 #else  /* not emacs */
  43
  44 #include "mulelib.h"
  45
  46 #endif /* emacs */
  47
  48 Lisp_Object Qcharset, Qascii, Qeight_bit_control, Qeight_bit_graphic;
  49 Lisp_Object Qunknown;
  50
  51 /* Declaration of special leading-codes.  */
  52 int leading_code_private_11;    /* for private DIMENSION1 of 1-column */
  53 int leading_code_private_12;    /* for private DIMENSION1 of 2-column */
  54 int leading_code_private_21;    /* for private DIMENSION2 of 1-column */
  55 int leading_code_private_22;    /* for private DIMENSION2 of 2-column */
  56
  57 /* Declaration of special charsets.  The values are set by
  58    Fsetup_special_charsets.  */
  59 int charset_latin_iso8859_1;    /* ISO8859-1 (Latin-1) */
  60 int charset_jisx0208_1978;      /* JISX0208.1978 (Japanese Kanji old set) */
  61 int charset_jisx0208;           /* JISX0208.1983 (Japanese Kanji) */
  62 int charset_katakana_jisx0201;  /* JISX0201.Kana (Japanese Katakana) */
  63 int charset_latin_jisx0201;     /* JISX0201.Roman (Japanese Roman) */
  64 int charset_big5_1;             /* Big5 Level 1 (Chinese Traditional) */
  65 int charset_big5_2;             /* Big5 Level 2 (Chinese Traditional) */
  66
  67 Lisp_Object Qcharset_table;
  68
  69 /* A char-table containing information of each character set.  */
  70 Lisp_Object Vcharset_table;
  71
  72 /* A vector of charset symbol indexed by charset-id.  This is used
  73    only for returning charset symbol from C functions.  */
  74 Lisp_Object Vcharset_symbol_table;
  75
  76 /* A list of charset symbols ever defined.  */
  77 Lisp_Object Vcharset_list;
  78
  79 /* Vector of translation table ever defined.
  80    ID of a translation table is used to index this vector.  */
  81 Lisp_Object Vtranslation_table_vector;
  82
  83 /* A char-table for characters which may invoke auto-filling.  */
  84 Lisp_Object Vauto_fill_chars;
  85
  86 Lisp_Object Qauto_fill_chars;
  87
  88 /* Tables used by macros BYTES_BY_CHAR_HEAD and WIDTH_BY_CHAR_HEAD.  */
  89 int bytes_by_char_head[256];
  90 int width_by_char_head[256];
  91
  92 /* Mapping table from ISO2022's charset (specified by DIMENSION,
  93    CHARS, and FINAL-CHAR) to Emacs' charset.  */
  94 int iso_charset_table[2][2][128];
  95
  96 /* Variables used locally in the macro FETCH_MULTIBYTE_CHAR.  */
  97 unsigned char *_fetch_multibyte_char_p;
  98 int _fetch_multibyte_char_len;
  99
 100 /* Offset to add to a non-ASCII value when inserting it.  */
 101 int nonascii_insert_offset;
 102
 103 /* Translation table for converting non-ASCII unibyte characters
 104    to multibyte codes, or nil.  */
 105 Lisp_Object Vnonascii_translation_table;
 106
 107 /* List of all possible generic characters.  */
 108 Lisp_Object Vgeneric_character_list;
 109
 110 #define min(X, Y) ((X) < (Y) ? (X) : (Y))
 111 #define max(X, Y) ((X) > (Y) ? (X) : (Y))
 112 \f
 113 void
 114 invalid_character (c)
 115      int c;
 116 {
 117   error ("Invalid character: 0%o, %d, 0x%x", c, c, c);
 118 }
 119
 120 /* Parse string STR of length LENGTH and fetch information of a
 121    character at STR.  Set BYTES to the byte length the character
 122    occupies, CHARSET, C1, C2 to proper values of the character. */
 123
 124 #define SPLIT_MULTIBYTE_SEQ(str, length, bytes, charset, c1, c2)             \
 125   do {                                                                       \
 126     (c1) = *(str);                                                           \
 127     (bytes) = BYTES_BY_CHAR_HEAD (c1);                                       \
 128     if ((bytes) == 1)                                                        \
 129       (charset) = ASCII_BYTE_P (c1) ? CHARSET_ASCII : CHARSET_8_BIT_GRAPHIC; \
 130     else if ((bytes) == 2)                                                   \
 131       {                                                                      \
 132         if ((c1) == LEADING_CODE_8_BIT_CONTROL)                              \
 133           (charset) = CHARSET_8_BIT_CONTROL, (c1) = (str)[1] - 0x20;         \
 134         else                                                                 \
 135           (charset) = (c1), (c1) = (str)[1] & 0x7F;                          \
 136       }                                                                      \
 137     else if ((bytes) == 3)                                                   \
 138       {                                                                      \
 139         if ((c1) < LEADING_CODE_PRIVATE_11)                                  \
 140           (charset) = (c1), (c1) = (str)[1] & 0x7F, (c2) = (str)[2] & 0x7F;  \
 141         else                                                                 \
 142           (charset) = (str)[1], (c1) = (str)[2] & 0x7F;                      \
 143       }                                                                      \
 144     else                                                                     \
 145       (charset) = (str)[1], (c1) = (str)[2] & 0x7F, (c2) = (str)[3] & 0x7F;  \
 146   } while (0)
 147
 148 /* 1 if CHARSET, C1, and C2 compose a valid character, else 0.  */
 149 #define CHAR_COMPONENTS_VALID_P(charset, c1, c2)        \
 150   ((charset) == CHARSET_ASCII                           \
 151    ? ((c1) >= 0 && (c1) <= 0x7F)                        \
 152    : ((charset) == CHARSET_8_BIT_CONTROL                \
 153       ? ((c1) >= 0x80 && (c1) <= 0x9F)                  \
 154       : ((charset) == CHARSET_8_BIT_GRAPHIC             \
 155          ? ((c1) >= 0x80 && (c1) <= 0xFF)               \
 156          : (CHARSET_DIMENSION (charset) == 1            \
 157             ? ((c1) >= 0x20 && (c1) <= 0x7F)            \
 158             : ((c1) >= 0x20 && (c1) <= 0x7F             \
 159                && (c2) >= 0x20 && (c2) <= 0x7F)))))
 160
 161 /* Store multi-byte form of the character C in STR.  The caller should
 162    allocate at least 4-byte area at STR in advance.  Returns the
 163    length of the multi-byte form.  If C is an invalid character code,
 164    return -1.  */
 165
 166 int
 167 char_to_string_1 (c, str)
 168      int c;
 169      unsigned char *str;
 170 {
 171   unsigned char *p = str;
 172
 173   if (c & CHAR_MODIFIER_MASK)   /* This includes the case C is negative.  */
 174     {
 175       /* Multibyte character can't have a modifier bit.  */
 176       if (! SINGLE_BYTE_CHAR_P ((c & ~CHAR_MODIFIER_MASK)))
 177         return -1;
 178
 179       /* For Meta, Shift, and Control modifiers, we need special care.  */
 180       if (c & CHAR_META)
 181         {
 182           /* Move the meta bit to the right place for a string.  */
 183           c = (c & ~CHAR_META) | 0x80;
 184         }
 185       if (c & CHAR_SHIFT)
 186         {
 187           /* Shift modifier is valid only with [A-Za-z].  */
 188           if ((c & 0377) >= 'A' && (c & 0377) <= 'Z')
 189             c &= ~CHAR_SHIFT;
 190           else if ((c & 0377) >= 'a' && (c & 0377) <= 'z')
 191             c = (c & ~CHAR_SHIFT) - ('a' - 'A');
 192         }
 193       if (c & CHAR_CTL)
 194         {
 195           /* Simulate the code in lread.c.  */
 196           /* Allow `\C- ' and `\C-?'.  */
 197           if (c == (CHAR_CTL | ' '))
 198             c = 0;
 199           else if (c == (CHAR_CTL | '?'))
 200             c = 127;
 201           /* ASCII control chars are made from letters (both cases),
 202              as well as the non-letters within 0100...0137.  */
 203           else if ((c & 0137) >= 0101 && (c & 0137) <= 0132)
 204             c &= (037 | (~0177 & ~CHAR_CTL));
 205           else if ((c & 0177) >= 0100 && (c & 0177) <= 0137)
 206             c &= (037 | (~0177 & ~CHAR_CTL));
 207         }
 208
 209       /* If C still has any modifier bits, just ignore it.  */
 210       c &= ~CHAR_MODIFIER_MASK;
 211     }
 212
 213   if (SINGLE_BYTE_CHAR_P (c))
 214     {
 215       if (ASCII_BYTE_P (c) || c >= 0xA0)
 216         *p++ = c;
 217       else
 218         {
 219           *p++ = LEADING_CODE_8_BIT_CONTROL;
 220           *p++ = c + 0x20;
 221         }
 222     }
 223   else if (CHAR_VALID_P (c, 0))
 224     {
 225       int charset, c1, c2;
 226
 227       SPLIT_CHAR (c, charset, c1, c2);
 228
 229       if (charset >= LEADING_CODE_EXT_11)
 230         *p++ = (charset < LEADING_CODE_EXT_12
 231                 ? LEADING_CODE_PRIVATE_11
 232                 : (charset < LEADING_CODE_EXT_21
 233                    ? LEADING_CODE_PRIVATE_12
 234                    : (charset < LEADING_CODE_EXT_22
 235                       ? LEADING_CODE_PRIVATE_21
 236                       : LEADING_CODE_PRIVATE_22)));
 237       *p++ = charset;
 238       if (c1 > 0 && c1 < 32 || c2 > 0 && c2 < 32)
 239         return -1;
 240       if (c1)
 241         {
 242           *p++ = c1 | 0x80;
 243           if (c2 > 0)
 244             *p++ = c2 | 0x80;
 245         }
 246     }
 247   else
 248     return -1;
 249
 250   return (p - str);
 251 }
 252
 253
 254 /* Store multi-byte form of the character C in STR.  The caller should
 255    allocate at least 4-byte area at STR in advance.  Returns the
 256    length of the multi-byte form.  If C is an invalid character code,
 257    signal an error.
 258
 259    Use macro `CHAR_STRING (C, STR)' instead of calling this function
 260    directly if C can be an ASCII character.  */
 261
 262 int
 263 char_to_string (c, str)
 264      int c;
 265      unsigned char *str;
 266 {
 267   int len;
 268   len = char_to_string_1 (c, str);
 269   if (len == -1)
 270     invalid_character (c);
 271   return len;
 272 }
 273
 274
 275 /* Return the non-ASCII character corresponding to multi-byte form at
 276    STR of length LEN.  If ACTUAL_LEN is not NULL, store the byte
 277    length of the multibyte form in *ACTUAL_LEN.
 278
 279    Use macros STRING_CHAR or STRING_CHAR_AND_LENGTH instead of calling
 280    this function directly if you want ot handle ASCII characters as
 281    well.  */
 282
 283 int
 284 string_to_char (str, len, actual_len)
 285      const unsigned char *str;
 286      int len, *actual_len;
 287 {
 288   int c, bytes, charset, c1, c2;
 289
 290   SPLIT_MULTIBYTE_SEQ (str, len, bytes, charset, c1, c2);
 291   c = MAKE_CHAR (charset, c1, c2);
 292   if (actual_len)
 293     *actual_len = bytes;
 294   return c;
 295 }
 296
 297 /* Return the length of the multi-byte form at string STR of length LEN.
 298    Use the macro MULTIBYTE_FORM_LENGTH instead.  */
 299 int
 300 multibyte_form_length (str, len)
 301      const unsigned char *str;
 302      int len;
 303 {
 304   int bytes;
 305
 306   PARSE_MULTIBYTE_SEQ (str, len, bytes);
 307   return bytes;
 308 }
 309
 310 /* Check multibyte form at string STR of length LEN and set variables
 311    pointed by CHARSET, C1, and C2 to charset and position codes of the
 312    character at STR, and return 0.  If there's no multibyte character,
 313    return -1.  This should be used only in the macro SPLIT_STRING
 314    which checks range of STR in advance.  */
 315
 316 int
 317 split_string (str, len, charset, c1, c2)
 318      const unsigned char *str;
 319      unsigned char *c1, *c2;
 320      int len, *charset;
 321 {
 322   register int bytes, cs, code1, code2 = -1;
 323
 324   SPLIT_MULTIBYTE_SEQ (str, len, bytes, cs, code1, code2);
 325   if (cs == CHARSET_ASCII)
 326     return -1;
 327   *charset = cs;
 328   *c1 = code1;
 329   *c2 = code2;
 330   return 0;
 331 }
 332
 333 /* Return 1 iff character C has valid printable glyph.
 334    Use the macro CHAR_PRINTABLE_P instead.  */
 335 int
 336 char_printable_p (c)
 337      int c;
 338 {
 339   int charset, c1, c2;
 340
 341   if (ASCII_BYTE_P (c))
 342     return 1;
 343   else if (SINGLE_BYTE_CHAR_P (c))
 344     return 0;
 345   else if (c >= MAX_CHAR)
 346     return 0;
 347
 348   SPLIT_CHAR (c, charset, c1, c2);
 349   if (! CHARSET_DEFINED_P (charset))
 350     return 0;
 351   if (CHARSET_CHARS (charset) == 94
 352       ? c1 <= 32 || c1 >= 127
 353       : c1 < 32)
 354     return 0;
 355   if (CHARSET_DIMENSION (charset) == 2
 356       && (CHARSET_CHARS (charset) == 94
 357           ? c2 <= 32 || c2 >= 127
 358           : c2 < 32))
 359     return 0;
 360   return 1;
 361 }
 362
 363 /* Translate character C by translation table TABLE.  If C
 364    is negative, translate a character specified by CHARSET, C1, and C2
 365    (C1 and C2 are code points of the character).  If no translation is
 366    found in TABLE, return C.  */
 367 int
 368 translate_char (table, c, charset, c1, c2)
 369      Lisp_Object table;
 370      int c, charset, c1, c2;
 371 {
 372   Lisp_Object ch;
 373   int alt_charset, alt_c1, alt_c2, dimension;
 374
 375   if (c < 0) c = MAKE_CHAR (charset, (c1 & 0x7F) , (c2 & 0x7F));
 376   if (!CHAR_TABLE_P (table)
 377       || (ch = Faref (table, make_number (c)), !NATNUMP (ch)))
 378     return c;
 379
 380   SPLIT_CHAR (XFASTINT (ch), alt_charset, alt_c1, alt_c2);
 381   dimension = CHARSET_DIMENSION (alt_charset);
 382   if (dimension == 1 && alt_c1 > 0 || dimension == 2 && alt_c2 > 0)
 383     /* CH is not a generic character, just return it.  */
 384     return XFASTINT (ch);
 385
 386   /* Since CH is a generic character, we must return a specific
 387      charater which has the same position codes as C from CH.  */
 388   if (charset < 0)
 389     SPLIT_CHAR (c, charset, c1, c2);
 390   if (dimension != CHARSET_DIMENSION (charset))
 391     /* We can't make such a character because of dimension mismatch.  */
 392     return c;
 393   return MAKE_CHAR (alt_charset, c1, c2);
 394 }
 395
 396 /* Convert the unibyte character C to multibyte based on
 397    Vnonascii_translation_table or nonascii_insert_offset.  If they can't
 398    convert C to a valid multibyte character, convert it based on
 399    DEFAULT_NONASCII_INSERT_OFFSET which makes C a Latin-1 character.  */
 400
 401 int
 402 unibyte_char_to_multibyte (c)
 403      int c;
 404 {
 405   if (c < 0400 && c >= 0200)
 406     {
 407       int c_save = c;
 408
 409       if (! NILP (Vnonascii_translation_table))
 410         {
 411           c = XINT (Faref (Vnonascii_translation_table, make_number (c)));
 412           if (c >= 0400 && ! char_valid_p (c, 0))
 413             c = c_save + DEFAULT_NONASCII_INSERT_OFFSET;
 414         }
 415       else if (c >= 0240 && nonascii_insert_offset > 0)
 416         {
 417           c += nonascii_insert_offset;
 418           if (c < 0400 || ! char_valid_p (c, 0))
 419             c = c_save + DEFAULT_NONASCII_INSERT_OFFSET;
 420         }
 421       else if (c >= 0240)
 422         c = c_save + DEFAULT_NONASCII_INSERT_OFFSET;
 423     }
 424   return c;
 425 }
 426
 427
 428 /* Convert the multibyte character C to unibyte 8-bit character based
 429    on Vnonascii_translation_table or nonascii_insert_offset.  If
 430    REV_TBL is non-nil, it should be a reverse table of
 431    Vnonascii_translation_table, i.e. what given by:
 432      Fchar_table_extra_slot (Vnonascii_translation_table, make_number (0))  */
 433
 434 int
 435 multibyte_char_to_unibyte (c, rev_tbl)
 436      int c;
 437      Lisp_Object rev_tbl;
 438 {
 439   if (!SINGLE_BYTE_CHAR_P (c))
 440     {
 441       int c_save = c;
 442
 443       if (! CHAR_TABLE_P (rev_tbl)
 444           && CHAR_TABLE_P (Vnonascii_translation_table))
 445         rev_tbl = Fchar_table_extra_slot (Vnonascii_translation_table,
 446                                           make_number (0));
 447       if (CHAR_TABLE_P (rev_tbl))
 448         {
 449           Lisp_Object temp;
 450           temp = Faref (rev_tbl, make_number (c));
 451           if (INTEGERP (temp))
 452             c = XINT (temp);
 453           if (c >= 256)
 454             c = (c_save & 0177) + 0200;
 455         }
 456       else
 457         {
 458           if (nonascii_insert_offset > 0)
 459             c -= nonascii_insert_offset;
 460           if (c < 128 || c >= 256)
 461             c = (c_save & 0177) + 0200;
 462         }
 463     }
 464
 465   return c;
 466 }
 467
 468 \f
 469 /* Update the table Vcharset_table with the given arguments (see the
 470    document of `define-charset' for the meaning of each argument).
 471    Several other table contents are also updated.  The caller should
 472    check the validity of CHARSET-ID and the remaining arguments in
 473    advance.  */
 474
 475 void
 476 update_charset_table (charset_id, dimension, chars, width, direction,
 477                       iso_final_char, iso_graphic_plane,
 478                       short_name, long_name, description)
 479      Lisp_Object charset_id, dimension, chars, width, direction;
 480      Lisp_Object iso_final_char, iso_graphic_plane;
 481      Lisp_Object short_name, long_name, description;
 482 {
 483   int charset = XINT (charset_id);
 484   int bytes;
 485   unsigned char leading_code_base, leading_code_ext;
 486
 487   if (NILP (CHARSET_TABLE_ENTRY (charset)))
 488     CHARSET_TABLE_ENTRY (charset)
 489       = Fmake_vector (make_number (CHARSET_MAX_IDX), Qnil);
 490
 491   if (NILP (long_name))
 492     long_name = short_name;
 493   if (NILP (description))
 494     description = long_name;
 495
 496   /* Get byte length of multibyte form, base leading-code, and
 497      extended leading-code of the charset.  See the comment under the
 498      title "GENERAL NOTE on CHARACTER SET (CHARSET)" in charset.h.  */
 499   bytes = XINT (dimension);
 500   if (charset < MIN_CHARSET_PRIVATE_DIMENSION1)
 501     {
 502       /* Official charset, it doesn't have an extended leading-code.  */
 503       if (charset != CHARSET_ASCII && charset != CHARSET_8_BIT_GRAPHIC)
 504         bytes += 1; /* For a base leading-code.  */
 505       leading_code_base = charset;
 506       leading_code_ext = 0;
 507     }
 508   else
 509     {
 510       /* Private charset.  */
 511       bytes += 2; /* For base and extended leading-codes.  */
 512       leading_code_base
 513         = (charset < LEADING_CODE_EXT_12
 514            ? LEADING_CODE_PRIVATE_11
 515            : (charset < LEADING_CODE_EXT_21
 516               ? LEADING_CODE_PRIVATE_12
 517               : (charset < LEADING_CODE_EXT_22
 518                  ? LEADING_CODE_PRIVATE_21
 519                  : LEADING_CODE_PRIVATE_22)));
 520       leading_code_ext = charset;
 521       if (BYTES_BY_CHAR_HEAD (leading_code_base) != bytes)
 522         error ("Invalid dimension for the charset-ID %d", charset);
 523     }
 524
 525   CHARSET_TABLE_INFO (charset, CHARSET_ID_IDX) = charset_id;
 526   CHARSET_TABLE_INFO (charset, CHARSET_BYTES_IDX) = make_number (bytes);
 527   CHARSET_TABLE_INFO (charset, CHARSET_DIMENSION_IDX) = dimension;
 528   CHARSET_TABLE_INFO (charset, CHARSET_CHARS_IDX) = chars;
 529   CHARSET_TABLE_INFO (charset, CHARSET_WIDTH_IDX) = width;
 530   CHARSET_TABLE_INFO (charset, CHARSET_DIRECTION_IDX) = direction;
 531   CHARSET_TABLE_INFO (charset, CHARSET_LEADING_CODE_BASE_IDX)
 532     = make_number (leading_code_base);
 533   CHARSET_TABLE_INFO (charset, CHARSET_LEADING_CODE_EXT_IDX)
 534     = make_number (leading_code_ext);
 535   CHARSET_TABLE_INFO (charset, CHARSET_ISO_FINAL_CHAR_IDX) = iso_final_char;
 536   CHARSET_TABLE_INFO (charset, CHARSET_ISO_GRAPHIC_PLANE_IDX)
 537     = iso_graphic_plane;
 538   CHARSET_TABLE_INFO (charset, CHARSET_SHORT_NAME_IDX) = short_name;
 539   CHARSET_TABLE_INFO (charset, CHARSET_LONG_NAME_IDX) = long_name;
 540   CHARSET_TABLE_INFO (charset, CHARSET_DESCRIPTION_IDX) = description;
 541   CHARSET_TABLE_INFO (charset, CHARSET_PLIST_IDX) = Qnil;
 542
 543   {
 544     /* If we have already defined a charset which has the same
 545        DIMENSION, CHARS and ISO-FINAL-CHAR but the different
 546        DIRECTION, we must update the entry REVERSE-CHARSET of both
 547        charsets.  If there's no such charset, the value of the entry
 548        is set to nil.  */
 549     int i;
 550
 551     for (i = 0; i <= MAX_CHARSET; i++)
 552       if (!NILP (CHARSET_TABLE_ENTRY (i)))
 553         {
 554           if (CHARSET_DIMENSION (i) == XINT (dimension)
 555               && CHARSET_CHARS (i) == XINT (chars)
 556               && CHARSET_ISO_FINAL_CHAR (i) == XINT (iso_final_char)
 557               && CHARSET_DIRECTION (i) != XINT (direction))
 558             {
 559               CHARSET_TABLE_INFO (charset, CHARSET_REVERSE_CHARSET_IDX)
 560                 = make_number (i);
 561               CHARSET_TABLE_INFO (i, CHARSET_REVERSE_CHARSET_IDX) = charset_id;
 562               break;
 563             }
 564         }
 565     if (i > MAX_CHARSET)
 566       /* No such a charset.  */
 567       CHARSET_TABLE_INFO (charset, CHARSET_REVERSE_CHARSET_IDX)
 568         = make_number (-1);
 569   }
 570
 571   if (charset != CHARSET_ASCII && charset != CHARSET_8_BIT_GRAPHIC
 572       && charset < MIN_CHARSET_PRIVATE_DIMENSION1)
 573     {
 574       bytes_by_char_head[leading_code_base] = bytes;
 575       width_by_char_head[leading_code_base] = XINT (width);
 576
 577       /* Update table emacs_code_class.  */
 578       emacs_code_class[charset] = (bytes == 2
 579                                    ? EMACS_leading_code_2
 580                                    : (bytes == 3
 581                                       ? EMACS_leading_code_3
 582                                       : EMACS_leading_code_4));
 583     }
 584
 585   /* Update table iso_charset_table.  */
 586   if (XINT (iso_final_char) >= 0
 587       && ISO_CHARSET_TABLE (dimension, chars, iso_final_char) < 0)
 588     ISO_CHARSET_TABLE (dimension, chars, iso_final_char) = charset;
 589 }
 590
 591 #ifdef emacs
 592
 593 /* Return charset id of CHARSET_SYMBOL, or return -1 if CHARSET_SYMBOL
 594    is invalid.  */
 595 int
 596 get_charset_id (charset_symbol)
 597      Lisp_Object charset_symbol;
 598 {
 599   Lisp_Object val;
 600   int charset;
 601
 602   return ((SYMBOLP (charset_symbol)
 603            && (val = Fget (charset_symbol, Qcharset), VECTORP (val))
 604            && (charset = XINT (XVECTOR (val)->contents[CHARSET_ID_IDX]),
 605                CHARSET_VALID_P (charset)))
 606           ? charset : -1);
 607 }
 608
 609 /* Return an identification number for a new private charset of
 610    DIMENSION and WIDTH.  If there's no more room for the new charset,
 611    return 0.  */
 612 Lisp_Object
 613 get_new_private_charset_id (dimension, width)
 614      int dimension, width;
 615 {
 616   int charset, from, to;
 617
 618   if (dimension == 1)
 619     {
 620       from = LEADING_CODE_EXT_11;
 621       to = LEADING_CODE_EXT_21;
 622     }
 623   else
 624     {
 625       from = LEADING_CODE_EXT_21;
 626       to = LEADING_CODE_EXT_MAX + 1;
 627     }
 628
 629   for (charset = from; charset < to; charset++)
 630     if (!CHARSET_DEFINED_P (charset)) break;
 631
 632   return make_number (charset < to ? charset : 0);
 633 }
 634
 635 DEFUN ("define-charset", Fdefine_charset, Sdefine_charset, 3, 3, 0,
 636   "Define CHARSET-ID as the identification number of CHARSET with INFO-VECTOR.\n\
 637 If CHARSET-ID is nil, it is decided automatically, which means CHARSET is\n\
 638  treated as a private charset.\n\
 639 INFO-VECTOR is a vector of the format:\n\
 640    [DIMENSION CHARS WIDTH DIRECTION ISO-FINAL-CHAR ISO-GRAPHIC-PLANE\n\
 641     SHORT-NAME LONG-NAME DESCRIPTION]\n\
 642 The meanings of each elements is as follows:\n\
 643 DIMENSION (integer) is the number of bytes to represent a character: 1 or 2.\n\
 644 CHARS (integer) is the number of characters in a dimension: 94 or 96.\n\
 645 WIDTH (integer) is the number of columns a character in the charset\n\
 646 occupies on the screen: one of 0, 1, and 2.\n\
 647 \n\
 648 DIRECTION (integer) is the rendering direction of characters in the\n\
 649 charset when rendering.  If 0, render from left to right, else\n\
 650 render from right to left.\n\
 651 \n\
 652 ISO-FINAL-CHAR (character) is the final character of the\n\
 653 corresponding ISO 2022 charset.\n\
 654 It may be -1 if the charset is internal use only.\n\
 655 \n\
 656 ISO-GRAPHIC-PLANE (integer) is the graphic plane to be invoked\n\
 657 while encoding to variants of ISO 2022 coding system, one of the\n\
 658 following: 0/graphic-plane-left(GL), 1/graphic-plane-right(GR).\n\
 659 It may be -1 if the charset is internal use only.\n\
 660 \n\
 661 SHORT-NAME (string) is the short name to refer to the charset.\n\
 662 \n\
 663 LONG-NAME (string) is the long name to refer to the charset.\n\
 664 \n\
 665 DESCRIPTION (string) is the description string of the charset.")
 666   (charset_id, charset_symbol, info_vector)
 667      Lisp_Object charset_id, charset_symbol, info_vector;
 668 {
 669   Lisp_Object *vec;
 670
 671   if (!NILP (charset_id))
 672     CHECK_NUMBER (charset_id, 0);
 673   CHECK_SYMBOL (charset_symbol, 1);
 674   CHECK_VECTOR (info_vector, 2);
 675
 676   if (! NILP (charset_id))
 677     {
 678       if (! CHARSET_VALID_P (XINT (charset_id)))
 679         error ("Invalid CHARSET: %d", XINT (charset_id));
 680       else if (CHARSET_DEFINED_P (XINT (charset_id)))
 681         error ("Already defined charset: %d", XINT (charset_id));
 682     }
 683
 684   vec = XVECTOR (info_vector)->contents;
 685   if (XVECTOR (info_vector)->size != 9
 686       || !INTEGERP (vec[0]) || !(XINT (vec[0]) == 1 || XINT (vec[0]) == 2)
 687       || !INTEGERP (vec[1]) || !(XINT (vec[1]) == 94 || XINT (vec[1]) == 96)
 688       || !INTEGERP (vec[2]) || !(XINT (vec[2]) == 1 || XINT (vec[2]) == 2)
 689       || !INTEGERP (vec[3]) || !(XINT (vec[3]) == 0 || XINT (vec[3]) == 1)
 690       || !INTEGERP (vec[4])
 691       || !(XINT (vec[4]) == -1 || XINT (vec[4]) >= '0' && XINT (vec[4]) <= '~')
 692       || !INTEGERP (vec[5])
 693       || !(XINT (vec[5]) == -1 || XINT (vec[5]) == 0 || XINT (vec[5]) == 1)
 694       || !STRINGP (vec[6])
 695       || !STRINGP (vec[7])
 696       || !STRINGP (vec[8]))
 697     error ("Invalid info-vector argument for defining charset %s",
 698            XSYMBOL (charset_symbol)->name->data);
 699
 700   if (NILP (charset_id))
 701     {
 702       charset_id = get_new_private_charset_id (XINT (vec[0]), XINT (vec[2]));
 703       if (XINT (charset_id) == 0)
 704         error ("There's no room for a new private charset %s",
 705                XSYMBOL (charset_symbol)->name->data);
 706     }
 707
 708   update_charset_table (charset_id, vec[0], vec[1], vec[2], vec[3],
 709                         vec[4], vec[5], vec[6], vec[7], vec[8]);
 710   Fput (charset_symbol, Qcharset, CHARSET_TABLE_ENTRY (XINT (charset_id)));
 711   CHARSET_SYMBOL (XINT (charset_id)) = charset_symbol;
 712   Vcharset_list = Fcons (charset_symbol, Vcharset_list);
 713   return Qnil;
 714 }
 715
 716 DEFUN ("generic-character-list", Fgeneric_character_list,
 717        Sgeneric_character_list, 0, 0, 0,
 718   "Return a list of all possible generic characters.\n\
 719 It includes a generic character for a charset not yet defined.")
 720   ()
 721 {
 722   return Vgeneric_character_list;
 723 }
 724
 725 DEFUN ("get-unused-iso-final-char", Fget_unused_iso_final_char,
 726        Sget_unused_iso_final_char, 2, 2, 0,
 727   "Return an unsed ISO's final char for a charset of DIMENISION and CHARS.\n\
 728 DIMENSION is the number of bytes to represent a character: 1 or 2.\n\
 729 CHARS is the number of characters in a dimension: 94 or 96.\n\
 730 \n\
 731 This final char is for private use, thus the range is `0' (48) .. `?' (63).\n\
 732 If there's no unused final char for the specified kind of charset,\n\
 733 return nil.")
 734   (dimension, chars)
 735      Lisp_Object dimension, chars;
 736 {
 737   int final_char;
 738
 739   CHECK_NUMBER (dimension, 0);
 740   CHECK_NUMBER (chars, 1);
 741   if (XINT (dimension) != 1 && XINT (dimension) != 2)
 742     error ("Invalid charset dimension %d, it should be 1 or 2",
 743            XINT (dimension));
 744   if (XINT (chars) != 94 && XINT (chars) != 96)
 745     error ("Invalid charset chars %d, it should be 94 or 96",
 746            XINT (chars));
 747   for (final_char = '0'; final_char <= '?'; final_char++)
 748     {
 749       if (ISO_CHARSET_TABLE (dimension, chars, make_number (final_char)) < 0)
 750         break;
 751     }
 752   return (final_char <= '?' ? make_number (final_char) : Qnil);
 753 }
 754
 755 DEFUN ("declare-equiv-charset", Fdeclare_equiv_charset, Sdeclare_equiv_charset,
 756        4, 4, 0,
 757   "Declare a charset of DIMENSION, CHARS, FINAL-CHAR is the same as CHARSET.\n\
 758 CHARSET should be defined by `defined-charset' in advance.")
 759   (dimension, chars, final_char, charset_symbol)
 760      Lisp_Object dimension, chars, final_char, charset_symbol;
 761 {
 762   int charset;
 763
 764   CHECK_NUMBER (dimension, 0);
 765   CHECK_NUMBER (chars, 1);
 766   CHECK_NUMBER (final_char, 2);
 767   CHECK_SYMBOL (charset_symbol, 3);
 768
 769   if (XINT (dimension) != 1 && XINT (dimension) != 2)
 770     error ("Invalid DIMENSION %d, it should be 1 or 2", XINT (dimension));
 771   if (XINT (chars) != 94 && XINT (chars) != 96)
 772     error ("Invalid CHARS %d, it should be 94 or 96", XINT (chars));
 773   if (XINT (final_char) < '0' || XFASTINT (final_char) > '~')
 774     error ("Invalid FINAL-CHAR %c, it should be `0'..`~'", XINT (chars));
 775   if ((charset = get_charset_id (charset_symbol)) < 0)
 776     error ("Invalid charset %s", XSYMBOL (charset_symbol)->name->data);
 777
 778   ISO_CHARSET_TABLE (dimension, chars, final_char) = charset;
 779   return Qnil;
 780 }
 781
 782 /* Return information about charsets in the text at PTR of NBYTES
 783    bytes, which are NCHARS characters.  The value is:
 784
 785         0: Each character is represented by one byte.  This is always
 786            true for unibyte text.
 787         1: No charsets other than ascii eight-bit-control,
 788            eight-bit-graphic, and latin-1 are found.
 789         2: Otherwise.
 790
 791    In addition, if CHARSETS is nonzero, for each found charset N, set
 792    CHARSETS[N] to 1.  For that, callers should allocate CHARSETS
 793    (MAX_CHARSET + 1 elements) in advance.  It may lookup a translation
 794    table TABLE if supplied.  For invalid charsets, set CHARSETS[1] to
 795    1 (note that there's no charset whose ID is 1).  */
 796
 797 int
 798 find_charset_in_text (ptr, nchars, nbytes, charsets, table)
 799      unsigned char *ptr;
 800      int nchars, nbytes, *charsets;
 801      Lisp_Object table;
 802 {
 803   if (nchars == nbytes)
 804     {
 805       if (charsets && nbytes > 0)
 806         {
 807           unsigned char *endp = ptr + nbytes;
 808           int maskbits = 0;
 809
 810           while (ptr < endp && maskbits != 7)
 811             {
 812               maskbits |= (*ptr < 0x80 ? 1 : *ptr < 0xA0 ? 2 : 4);
 813               ptr++;
 814             }
 815
 816           if (maskbits & 1)
 817             charsets[CHARSET_ASCII] = 1;
 818           if (maskbits & 2)
 819             charsets[CHARSET_8_BIT_CONTROL] = 1;
 820           if (maskbits & 4)
 821             charsets[CHARSET_8_BIT_GRAPHIC] = 1;
 822         }
 823       return 0;
 824     }
 825   else
 826     {
 827       int return_val = 1;
 828       int bytes, charset, c1, c2;
 829
 830       if (! CHAR_TABLE_P (table))
 831         table = Qnil;
 832
 833       while (nchars-- > 0)
 834         {
 835           SPLIT_MULTIBYTE_SEQ (ptr, len, bytes, charset, c1, c2);
 836           ptr += bytes;
 837
 838           if (!CHARSET_DEFINED_P (charset))
 839             charset = 1;
 840           else if (! NILP (table))
 841             {
 842               int c = translate_char (table, -1, charset, c1, c2);
 843               if (c >= 0)
 844                 charset = CHAR_CHARSET (c);
 845             }
 846
 847           if (return_val == 1
 848               && charset != CHARSET_ASCII
 849               && charset != CHARSET_8_BIT_CONTROL
 850               && charset != CHARSET_8_BIT_GRAPHIC
 851               && charset != charset_latin_iso8859_1)
 852             return_val = 2;
 853
 854           if (charsets)
 855             charsets[charset] = 1;
 856           else if (return_val == 2)
 857             break;
 858         }
 859       return return_val;
 860     }
 861 }
 862
 863 DEFUN ("find-charset-region", Ffind_charset_region, Sfind_charset_region,
 864        2, 3, 0,
 865   "Return a list of charsets in the region between BEG and END.\n\
 866 BEG and END are buffer positions.\n\
 867 Optional arg TABLE if non-nil is a translation table to look up.\n\
 868 \n\
 869 If the region contains invalid multibyte characters,\n\
 870 `unknown' is included in the returned list.\n\
 871 \n\
 872 If the current buffer is unibyte, the returned list may contain\n\
 873 only `ascii', `eight-bit-control', and `eight-bit-graphic'.")
 874   (beg, end, table)
 875      Lisp_Object beg, end, table;
 876 {
 877   int charsets[MAX_CHARSET + 1];
 878   int from, from_byte, to, stop, stop_byte, i;
 879   Lisp_Object val;
 880
 881   validate_region (&beg, &end);
 882   from = XFASTINT (beg);
 883   stop = to = XFASTINT (end);
 884
 885   if (from < GPT && GPT < to)
 886     {
 887       stop = GPT;
 888       stop_byte = GPT_BYTE;
 889     }
 890   else
 891     stop_byte = CHAR_TO_BYTE (stop);
 892
 893   from_byte = CHAR_TO_BYTE (from);
 894
 895   bzero (charsets, (MAX_CHARSET + 1) * sizeof (int));
 896   while (1)
 897     {
 898       find_charset_in_text (BYTE_POS_ADDR (from_byte), stop - from,
 899                             stop_byte - from_byte, charsets, table);
 900       if (stop < to)
 901         {
 902           from = stop, from_byte = stop_byte;
 903           stop = to, stop_byte = CHAR_TO_BYTE (stop);
 904         }
 905       else
 906         break;
 907     }
 908
 909   val = Qnil;
 910   if (charsets[1])
 911     val = Fcons (Qunknown, val);
 912   for (i = MAX_CHARSET; i >= MIN_CHARSET_OFFICIAL_DIMENSION1; i--)
 913     if (charsets[i])
 914       val = Fcons (CHARSET_SYMBOL (i), val);
 915   if (charsets[0])
 916     val = Fcons (Qascii, val);
 917   return val;
 918 }
 919
 920 DEFUN ("find-charset-string", Ffind_charset_string, Sfind_charset_string,
 921        1, 2, 0,
 922   "Return a list of charsets in STR.\n\
 923 Optional arg TABLE if non-nil is a translation table to look up.\n\
 924 \n\
 925 If the string contains invalid multibyte characters,\n\
 926 `unknown' is included in the returned list.\n\
 927 \n\
 928 If STR is unibyte, the returned list may contain\n\
 929 only `ascii', `eight-bit-control', and `eight-bit-graphic'.")
 930   (str, table)
 931      Lisp_Object str, table;
 932 {
 933   int charsets[MAX_CHARSET + 1];
 934   int i;
 935   Lisp_Object val;
 936
 937   CHECK_STRING (str, 0);
 938
 939   bzero (charsets, (MAX_CHARSET + 1) * sizeof (int));
 940   find_charset_in_text (XSTRING (str)->data, XSTRING (str)->size,
 941                         STRING_BYTES (XSTRING (str)), charsets, table);
 942
 943   val = Qnil;
 944   if (charsets[1])
 945     val = Fcons (Qunknown, val);
 946   for (i = MAX_CHARSET; i >= MIN_CHARSET_OFFICIAL_DIMENSION1; i--)
 947     if (charsets[i])
 948       val = Fcons (CHARSET_SYMBOL (i), val);
 949   if (charsets[0])
 950     val = Fcons (Qascii, val);
 951   return val;
 952 }
 953
 954 \f
 955 DEFUN ("make-char-internal", Fmake_char_internal, Smake_char_internal, 1, 3, 0,
 956   "")
 957   (charset, code1, code2)
 958      Lisp_Object charset, code1, code2;
 959 {
 960   int charset_id, c1, c2;
 961
 962   CHECK_NUMBER (charset, 0);
 963   charset_id = XINT (charset);
 964   if (!CHARSET_DEFINED_P (charset_id))
 965     error ("Invalid charset ID: %d", XINT (charset));
 966
 967   if (NILP (code1))
 968     c1 = 0;
 969   else
 970     {
 971       CHECK_NUMBER (code1, 1);
 972       c1 = XINT (code1);
 973     }
 974   if (NILP (code2))
 975     c2 = 0;
 976   else
 977     {
 978       CHECK_NUMBER (code2, 2);
 979       c2 = XINT (code2);
 980     }
 981
 982   if (charset_id == CHARSET_ASCII)
 983     {
 984       if (c1 < 0 || c1 > 0x7F)
 985         goto invalid_code_posints;
 986       return make_number (c1);
 987     }
 988   else if (charset_id == CHARSET_8_BIT_CONTROL)
 989     {
 990       if (NILP (code1))
 991         c1 = 0x80;
 992       else if (c1 < 0x80 || c1 > 0x9F)
 993         goto invalid_code_posints;
 994       return make_number (c1);
 995     }
 996   else if (charset_id == CHARSET_8_BIT_GRAPHIC)
 997     {
 998       if (NILP (code1))
 999         c1 = 0xA0;
1000       else if (c1 < 0xA0 || c1 > 0xFF)
1001         goto invalid_code_posints;
1002       return make_number (c1);
1003     }
1004   else if (c1 < 0 || c1 > 0xFF || c2 < 0 || c2 > 0xFF)
1005     goto invalid_code_posints;
1006   c1 &= 0x7F;
1007   c2 &= 0x7F;
1008   if (c1 == 0
1009       ? c2 != 0
1010       : (c2 == 0
1011          ? !CHAR_COMPONENTS_VALID_P (charset_id, c1, 0x20)
1012          : !CHAR_COMPONENTS_VALID_P (charset_id, c1, c2)))
1013     goto invalid_code_posints;
1014   return make_number (MAKE_CHAR (charset_id, c1, c2));
1015
1016  invalid_code_posints:
1017   error ("Invalid code points for charset ID %d: %d %d", charset_id, c1, c2);
1018 }
1019
1020 DEFUN ("split-char", Fsplit_char, Ssplit_char, 1, 1, 0,
1021   "Return list of charset and one or two position-codes of CHAR.\n\
1022 If CHAR is invalid as a character code,\n\
1023 return a list of symbol `unknown' and CHAR.")
1024   (ch)
1025      Lisp_Object ch;
1026 {
1027   int c, charset, c1, c2;
1028
1029   CHECK_NUMBER (ch, 0);
1030   c = XFASTINT (ch);
1031   if (!CHAR_VALID_P (c, 1))
1032     return Fcons (Qunknown, Fcons (ch, Qnil));
1033   SPLIT_CHAR (XFASTINT (ch), charset, c1, c2);
1034   return (c2 >= 0
1035           ? Fcons (CHARSET_SYMBOL (charset),
1036                    Fcons (make_number (c1), Fcons (make_number (c2), Qnil)))
1037           : Fcons (CHARSET_SYMBOL (charset), Fcons (make_number (c1), Qnil)));
1038 }
1039
1040 DEFUN ("char-charset", Fchar_charset, Schar_charset, 1, 1, 0,
1041   "Return charset of CHAR.")
1042   (ch)
1043      Lisp_Object ch;
1044 {
1045   CHECK_NUMBER (ch, 0);
1046
1047   return CHARSET_SYMBOL (CHAR_CHARSET (XINT (ch)));
1048 }
1049
1050 DEFUN ("charset-after", Fcharset_after, Scharset_after, 0, 1, 0,
1051   "Return charset of a character in the current buffer at position POS.\n\
1052 If POS is nil, it defauls to the current point.\n\
1053 If POS is out of range, the value is nil.")
1054   (pos)
1055      Lisp_Object pos;
1056 {
1057   Lisp_Object ch;
1058   int charset;
1059
1060   ch = Fchar_after (pos);
1061   if (! INTEGERP (ch))
1062     return ch;
1063   charset = CHAR_CHARSET (XINT (ch));
1064   return CHARSET_SYMBOL (charset);
1065 }
1066
1067 DEFUN ("iso-charset", Fiso_charset, Siso_charset, 3, 3, 0,
1068   "Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR.\n\
1069 \n\
1070 ISO 2022's designation sequence (escape sequence) distinguishes charsets\n\
1071 by their DIMENSION, CHARS, and FINAL-CHAR,\n\
1072 where as Emacs distinguishes them by charset symbol.\n\
1073 See the documentation of the function `charset-info' for the meanings of\n\
1074 DIMENSION, CHARS, and FINAL-CHAR.")
1075   (dimension, chars, final_char)
1076      Lisp_Object dimension, chars, final_char;
1077 {
1078   int charset;
1079
1080   CHECK_NUMBER (dimension, 0);
1081   CHECK_NUMBER (chars, 1);
1082   CHECK_NUMBER (final_char, 2);
1083
1084   if ((charset = ISO_CHARSET_TABLE (dimension, chars, final_char)) < 0)
1085     return Qnil;
1086   return CHARSET_SYMBOL (charset);
1087 }
1088
1089 /* If GENERICP is nonzero, return nonzero iff C is a valid normal or
1090    generic character.  If GENERICP is zero, return nonzero iff C is a
1091    valid normal character.  Do not call this function directly,
1092    instead use macro CHAR_VALID_P.  */
1093 int
1094 char_valid_p (c, genericp)
1095      int c, genericp;
1096 {
1097   int charset, c1, c2;
1098
1099   if (c < 0 || c >= MAX_CHAR)
1100     return 0;
1101   if (SINGLE_BYTE_CHAR_P (c))
1102     return 1;
1103   SPLIT_CHAR (c, charset, c1, c2);
1104   if (genericp)
1105     {
1106       if (c1)
1107         {
1108           if (c2 <= 0) c2 = 0x20;
1109         }
1110       else
1111         {
1112           if (c2 <= 0) c1 = c2 = 0x20;
1113         }
1114     }
1115   return (CHARSET_DEFINED_P (charset)
1116           && CHAR_COMPONENTS_VALID_P (charset, c1, c2));
1117 }
1118
1119 DEFUN ("char-valid-p", Fchar_valid_p, Schar_valid_p, 1, 2, 0,
1120   "Return t if OBJECT is a valid normal character.\n\
1121 If optional arg GENERICP is non-nil, also return t if OBJECT is\n\
1122 a valid generic character.")
1123   (object, genericp)
1124      Lisp_Object object, genericp;
1125 {
1126   if (! NATNUMP (object))
1127     return Qnil;
1128   return (CHAR_VALID_P (XFASTINT (object), !NILP (genericp)) ? Qt : Qnil);
1129 }
1130
1131 DEFUN ("unibyte-char-to-multibyte", Funibyte_char_to_multibyte,
1132        Sunibyte_char_to_multibyte, 1, 1, 0,
1133   "Convert the unibyte character CH to multibyte character.\n\
1134 The conversion is done based on `nonascii-translation-table' (which see)\n\
1135  or `nonascii-insert-offset' (which see).")
1136   (ch)
1137      Lisp_Object ch;
1138 {
1139   int c;
1140
1141   CHECK_NUMBER (ch, 0);
1142   c = XINT (ch);
1143   if (c < 0 || c >= 0400)
1144     error ("Invalid unibyte character: %d", c);
1145   c = unibyte_char_to_multibyte (c);
1146   if (c < 0)
1147     error ("Can't convert to multibyte character: %d", XINT (ch));
1148   return make_number (c);
1149 }
1150
1151 DEFUN ("multibyte-char-to-unibyte", Fmultibyte_char_to_unibyte,
1152        Smultibyte_char_to_unibyte, 1, 1, 0,
1153   "Convert the multibyte character CH to unibyte character.\n\
1154 The conversion is done based on `nonascii-translation-table' (which see)\n\
1155  or `nonascii-insert-offset' (which see).")
1156   (ch)
1157      Lisp_Object ch;
1158 {
1159   int c;
1160
1161   CHECK_NUMBER (ch, 0);
1162   c = XINT (ch);
1163   if (! CHAR_VALID_P (c, 0))
1164     error ("Invalid multibyte character: %d", c);
1165   c = multibyte_char_to_unibyte (c, Qnil);
1166   if (c < 0)
1167     error ("Can't convert to unibyte character: %d", XINT (ch));
1168   return make_number (c);
1169 }
1170
1171 DEFUN ("char-bytes", Fchar_bytes, Schar_bytes, 1, 1, 0,
1172   "Return 1 regardless of the argument CHAR.\n\
1173 This is now an obsolete function.  We keep it just for backward compatibility.")
1174   (ch)
1175      Lisp_Object ch;
1176 {
1177   CHECK_NUMBER (ch, 0);
1178   return make_number (1);
1179 }
1180
1181 /* Return how many bytes C will occupy in a multibyte buffer.
1182    Don't call this function directly, instead use macro CHAR_BYTES.  */
1183 int
1184 char_bytes (c)
1185      int c;
1186 {
1187   int charset;
1188
1189   if (ASCII_BYTE_P (c) || (c & ~((1 << CHARACTERBITS) -1)))
1190     return 1;
1191   if (SINGLE_BYTE_CHAR_P (c) && c >= 0xA0)
1192     return 1;
1193
1194   charset = CHAR_CHARSET (c);
1195   return (CHARSET_DEFINED_P (charset) ? CHARSET_BYTES (charset) : 1);
1196 }
1197
1198 /* Return the width of character of which multi-byte form starts with
1199    C.  The width is measured by how many columns occupied on the
1200    screen when displayed in the current buffer.  */
1201
1202 #define ONE_BYTE_CHAR_WIDTH(c)                                          \
1203   (c < 0x20                                                             \
1204    ? (c == '\t'                                                         \
1205       ? XFASTINT (current_buffer->tab_width)                            \
1206       : (c == '\n' ? 0 : (NILP (current_buffer->ctl_arrow) ? 4 : 2)))   \
1207    : (c < 0x7f                                                          \
1208       ? 1                                                               \
1209       : (c == 0x7F                                                      \
1210          ? (NILP (current_buffer->ctl_arrow) ? 4 : 2)                   \
1211          : ((! NILP (current_buffer->enable_multibyte_characters)       \
1212              && BASE_LEADING_CODE_P (c))                                \
1213             ? WIDTH_BY_CHAR_HEAD (c)                                    \
1214             : 4))))
1215
1216 DEFUN ("char-width", Fchar_width, Schar_width, 1, 1, 0,
1217   "Return width of CHAR when displayed in the current buffer.\n\
1218 The width is measured by how many columns it occupies on the screen.\n\
1219 Tab is taken to occupy `tab-width' columns.")
1220   (ch)
1221        Lisp_Object ch;
1222 {
1223   Lisp_Object val, disp;
1224   int c;
1225   struct Lisp_Char_Table *dp = buffer_display_table ();
1226
1227   CHECK_NUMBER (ch, 0);
1228
1229   c = XINT (ch);
1230
1231   /* Get the way the display table would display it.  */
1232   disp = dp ? DISP_CHAR_VECTOR (dp, c) : Qnil;
1233
1234   if (VECTORP (disp))
1235     XSETINT (val, XVECTOR (disp)->size);
1236   else if (SINGLE_BYTE_CHAR_P (c))
1237     XSETINT (val, ONE_BYTE_CHAR_WIDTH (c));
1238   else
1239     {
1240       int charset = CHAR_CHARSET (c);
1241
1242       XSETFASTINT (val, CHARSET_WIDTH (charset));
1243     }
1244   return val;
1245 }
1246
1247 /* Return width of string STR of length LEN when displayed in the
1248    current buffer.  The width is measured by how many columns it
1249    occupies on the screen.  */
1250
1251 int
1252 strwidth (str, len)
1253      unsigned char *str;
1254      int len;
1255 {
1256   return c_string_width (str, len, -1, NULL, NULL);
1257 }
1258
1259 /* Return width of string STR of length LEN when displayed in the
1260    current buffer.  The width is measured by how many columns it
1261    occupies on the screen.  If PRECISION > 0, return the width of
1262    longest substring that doesn't exceed PRECISION, and set number of
1263    characters and bytes of the substring in *NCHARS and *NBYTES
1264    respectively.  */
1265
1266 int
1267 c_string_width (str, len, precision, nchars, nbytes)
1268      unsigned char *str;
1269      int precision, *nchars, *nbytes;
1270 {
1271   int i = 0, i_byte = 0;
1272   int width = 0;
1273   int chars;
1274   struct Lisp_Char_Table *dp = buffer_display_table ();
1275
1276   while (i_byte < len)
1277     {
1278       int bytes, thiswidth;
1279       Lisp_Object val;
1280
1281       if (dp)
1282         {
1283           int c = STRING_CHAR_AND_LENGTH (str + i_byte, len - i_byte, bytes);
1284
1285           chars = 1;
1286           val = DISP_CHAR_VECTOR (dp, c);
1287           if (VECTORP (val))
1288             thiswidth = XVECTOR (val)->size;
1289           else
1290             thiswidth = ONE_BYTE_CHAR_WIDTH (str[i_byte]);
1291         }
1292       else
1293         {
1294           chars = 1;
1295           PARSE_MULTIBYTE_SEQ (str + i_byte, len - i_byte, bytes);
1296           thiswidth = ONE_BYTE_CHAR_WIDTH (str[i_byte]);
1297         }
1298
1299       if (precision > 0
1300           && (width + thiswidth > precision))
1301         {
1302           *nchars = i;
1303           *nbytes = i_byte;
1304           return width;
1305         }
1306       i++;
1307       i_byte += bytes;
1308       width += thiswidth;
1309   }
1310
1311   if (precision > 0)
1312     {
1313       *nchars = i;
1314       *nbytes = i_byte;
1315     }
1316
1317   return width;
1318 }
1319
1320 /* Return width of Lisp string STRING when displayed in the current
1321    buffer.  The width is measured by how many columns it occupies on
1322    the screen while paying attention to compositions.  If PRECISION >
1323    0, return the width of longest substring that doesn't exceed
1324    PRECISION, and set number of characters and bytes of the substring
1325    in *NCHARS and *NBYTES respectively.  */
1326
1327 int
1328 lisp_string_width (string, precision, nchars, nbytes)
1329      Lisp_Object string;
1330      int precision, *nchars, *nbytes;
1331 {
1332   int len = XSTRING (string)->size;
1333   int len_byte = STRING_BYTES (XSTRING (string));
1334   unsigned char *str = XSTRING (string)->data;
1335   int i = 0, i_byte = 0;
1336   int width = 0;
1337   struct Lisp_Char_Table *dp = buffer_display_table ();
1338
1339   while (i < len)
1340     {
1341       int chars, bytes, thiswidth;
1342       Lisp_Object val;
1343       int cmp_id;
1344       int ignore, end;
1345
1346       if (find_composition (i, -1, &ignore, &end, &val, string)
1347           && ((cmp_id = get_composition_id (i, i_byte, end - i, val, string))
1348               >= 0))
1349         {
1350           thiswidth = composition_table[cmp_id]->width;
1351           chars = end - i;
1352           bytes = string_char_to_byte (string, end) - i_byte;
1353         }
1354       else if (dp)
1355         {
1356           int c = STRING_CHAR_AND_LENGTH (str + i_byte, len - i_byte, bytes);
1357
1358           chars = 1;
1359           val = DISP_CHAR_VECTOR (dp, c);
1360           if (VECTORP (val))
1361             thiswidth = XVECTOR (val)->size;
1362           else
1363             thiswidth = ONE_BYTE_CHAR_WIDTH (str[i_byte]);
1364         }
1365       else
1366         {
1367           chars = 1;
1368           PARSE_MULTIBYTE_SEQ (str + i_byte, len_byte - i_byte, bytes);
1369           thiswidth = ONE_BYTE_CHAR_WIDTH (str[i_byte]);
1370         }
1371
1372       if (precision > 0
1373           && (width + thiswidth > precision))
1374         {
1375           *nchars = i;
1376           *nbytes = i_byte;
1377           return width;
1378         }
1379       i += chars;
1380       i_byte += bytes;
1381       width += thiswidth;
1382   }
1383
1384   if (precision > 0)
1385     {
1386       *nchars = i;
1387       *nbytes = i_byte;
1388     }
1389
1390   return width;
1391 }
1392
1393 DEFUN ("string-width", Fstring_width, Sstring_width, 1, 1, 0,
1394   "Return width of STRING when displayed in the current buffer.\n\
1395 Width is measured by how many columns it occupies on the screen.\n\
1396 When calculating width of a multibyte character in STRING,\n\
1397 only the base leading-code is considered; the validity of\n\
1398 the following bytes is not checked.  Tabs in STRING are always\n\
1399 taken to occupy `tab-width' columns.")
1400   (str)
1401      Lisp_Object str;
1402 {
1403   Lisp_Object val;
1404
1405   CHECK_STRING (str, 0);
1406   XSETFASTINT (val, lisp_string_width (str, -1, NULL, NULL));
1407   return val;
1408 }
1409
1410 DEFUN ("char-direction", Fchar_direction, Schar_direction, 1, 1, 0,
1411   "Return the direction of CHAR.\n\
1412 The returned value is 0 for left-to-right and 1 for right-to-left.")
1413   (ch)
1414      Lisp_Object ch;
1415 {
1416   int charset;
1417
1418   CHECK_NUMBER (ch, 0);
1419   charset = CHAR_CHARSET (XFASTINT (ch));
1420   if (!CHARSET_DEFINED_P (charset))
1421     invalid_character (XINT (ch));
1422   return CHARSET_TABLE_INFO (charset, CHARSET_DIRECTION_IDX);
1423 }
1424
1425 DEFUN ("chars-in-region", Fchars_in_region, Schars_in_region, 2, 2, 0,
1426   "Return number of characters between BEG and END.")
1427   (beg, end)
1428      Lisp_Object beg, end;
1429 {
1430   int from, to;
1431
1432   CHECK_NUMBER_COERCE_MARKER (beg, 0);
1433   CHECK_NUMBER_COERCE_MARKER (end, 1);
1434
1435   from = min (XFASTINT (beg), XFASTINT (end));
1436   to = max (XFASTINT (beg), XFASTINT (end));
1437
1438   return make_number (to - from);
1439 }
1440
1441 /* Return the number of characters in the NBYTES bytes at PTR.
1442    This works by looking at the contents and checking for multibyte sequences.
1443    However, if the current buffer has enable-multibyte-characters = nil,
1444    we treat each byte as a character.  */
1445
1446 int
1447 chars_in_text (ptr, nbytes)
1448      unsigned char *ptr;
1449      int nbytes;
1450 {
1451   /* current_buffer is null at early stages of Emacs initialization.  */
1452   if (current_buffer == 0
1453       || NILP (current_buffer->enable_multibyte_characters))
1454     return nbytes;
1455
1456   return multibyte_chars_in_text (ptr, nbytes);
1457 }
1458
1459 /* Return the number of characters in the NBYTES bytes at PTR.
1460    This works by looking at the contents and checking for multibyte sequences.
1461    It ignores enable-multibyte-characters.  */
1462
1463 int
1464 multibyte_chars_in_text (ptr, nbytes)
1465      unsigned char *ptr;
1466      int nbytes;
1467 {
1468   unsigned char *endp;
1469   int chars, bytes;
1470
1471   endp = ptr + nbytes;
1472   chars = 0;
1473
1474   while (ptr < endp)
1475     {
1476       PARSE_MULTIBYTE_SEQ (ptr, endp - ptr, bytes);
1477       ptr += bytes;
1478       chars++;
1479     }
1480
1481   return chars;
1482 }
1483
1484 /* Parse unibyte text at STR of LEN bytes as multibyte text, and
1485    count the numbers of characters and bytes in it.  On counting
1486    bytes, pay attention to the fact that 8-bit characters in the range
1487    0x80..0x9F are represented by 2 bytes in multibyte text.  */
1488 void
1489 parse_str_as_multibyte (str, len, nchars, nbytes)
1490      unsigned char *str;
1491      int len, *nchars, *nbytes;
1492 {
1493   unsigned char *endp = str + len;
1494   int n, chars = 0, bytes = 0;
1495
1496   while (str < endp)
1497     {
1498       if (UNIBYTE_STR_AS_MULTIBYTE_P (str, endp - str, n))
1499         str += n, bytes += n;
1500       else
1501         str++, bytes += 2;
1502       chars++;
1503     }
1504   *nchars = chars;
1505   *nbytes = bytes;
1506   return;
1507 }
1508
1509 /* Arrange unibyte text at STR of NBYTES bytes as multibyte text.
1510    It actually converts only 8-bit characters in the range 0x80..0x9F
1511    that don't contruct multibyte characters to multibyte forms.  If
1512    NCHARS is nonzero, set *NCHARS to the number of characters in the
1513    text.  It is assured that we can use LEN bytes at STR as a work
1514    area and that is enough.  Return the number of bytes of the
1515    resulting text.  */
1516
1517 int
1518 str_as_multibyte (str, len, nbytes, nchars)
1519      unsigned char *str;
1520      int len, nbytes, *nchars;
1521 {
1522   unsigned char *p = str, *endp = str + nbytes;
1523   unsigned char *to;
1524   int chars = 0;
1525   int n;
1526
1527   while (p < endp && UNIBYTE_STR_AS_MULTIBYTE_P (p, endp - p, n))
1528     p += n, chars++;
1529   if (nchars)
1530     *nchars = chars;
1531   if (p == endp)
1532     return nbytes;
1533
1534   to = p;
1535   nbytes = endp - p;
1536   endp = str + len;
1537   safe_bcopy (p, endp - nbytes, nbytes);
1538   p = endp - nbytes;
1539   while (p < endp)
1540     {
1541       if (UNIBYTE_STR_AS_MULTIBYTE_P (p, endp - p, n))
1542         {
1543           while (n--)
1544             *to++ = *p++;
1545         }
1546       else
1547         {
1548           *to++ = LEADING_CODE_8_BIT_CONTROL;
1549           *to++ = *p++ + 0x20;
1550         }
1551       chars++;
1552     }
1553   if (nchars)
1554     *nchars = chars;
1555   return (to - str);
1556 }
1557
1558 /* Parse unibyte string at STR of LEN bytes, and return the number of
1559    bytes it may ocupy when converted to multibyte string by
1560    `str_to_multibyte'.  */
1561
1562 int
1563 parse_str_to_multibyte (str, len)
1564      unsigned char *str;
1565      int len;
1566 {
1567   unsigned char *endp = str + len;
1568   int bytes;
1569
1570   for (bytes = 0; str < endp; str++)
1571     bytes += (*str < 0x80 || *str >= 0xA0) ? 1 : 2;
1572   return bytes;
1573 }
1574
1575 /* Convert unibyte text at STR of NBYTES bytes to multibyte text
1576    that contains the same single-byte characters.  It actually
1577    converts all 8-bit characters to multibyte forms.  It is assured
1578    that we can use LEN bytes at STR as a work area and that is
1579    enough.  */
1580
1581 int
1582 str_to_multibyte (str, len, bytes)
1583      unsigned char *str;
1584      int len, bytes;
1585 {
1586   unsigned char *p = str, *endp = str + bytes;
1587   unsigned char *to;
1588
1589   while (p < endp && (*p < 0x80 || *p >= 0xA0)) p++;
1590   if (p == endp)
1591     return bytes;
1592   to = p;
1593   bytes = endp - p;
1594   endp = str + len;
1595   safe_bcopy (p, endp - bytes, bytes);
1596   p = endp - bytes;
1597   while (p < endp)
1598     {
1599       if (*p < 0x80 || *p >= 0xA0)
1600         *to++ = *p++;
1601       else
1602         *to++ = LEADING_CODE_8_BIT_CONTROL, *to++ = *p++ + 0x20;
1603     }
1604   return (to - str);
1605 }
1606
1607 /* Arrange multibyte text at STR of LEN bytes as a unibyte text.  It
1608    actually converts only 8-bit characters in the range 0x80..0x9F to
1609    unibyte forms.  */
1610
1611 int
1612 str_as_unibyte (str, bytes)
1613      unsigned char *str;
1614      int bytes;
1615 {
1616   unsigned char *p = str, *endp = str + bytes;
1617   unsigned char *to = str;
1618
1619   while (p < endp && *p != LEADING_CODE_8_BIT_CONTROL) p++;
1620   to = p;
1621   while (p < endp)
1622     {
1623       if (*p == LEADING_CODE_8_BIT_CONTROL)
1624         *to++ = *(p + 1) - 0x20, p += 2;
1625       else
1626         *to++ = *p++;
1627     }
1628   return (to - str);
1629 }
1630
1631 \f
1632 DEFUN ("string", Fstring, Sstring, 1, MANY, 0,
1633   "Concatenate all the argument characters and make the result a string.")
1634   (n, args)
1635      int n;
1636      Lisp_Object *args;
1637 {
1638   int i;
1639   unsigned char *buf = (unsigned char *) alloca (MAX_MULTIBYTE_LENGTH * n);
1640   unsigned char *p = buf;
1641   int c;
1642   int multibyte = 0;
1643
1644   for (i = 0; i < n; i++)
1645     {
1646       CHECK_NUMBER (args[i], 0);
1647       if (!multibyte && !SINGLE_BYTE_CHAR_P (XFASTINT (args[i])))
1648         multibyte = 1;
1649     }
1650
1651   for (i = 0; i < n; i++)
1652     {
1653       c = XINT (args[i]);
1654       if (multibyte)
1655         p += CHAR_STRING (c, p);
1656       else
1657         *p++ = c;
1658     }
1659
1660   return make_string_from_bytes (buf, n, p - buf);
1661 }
1662
1663 #endif /* emacs */
1664 \f
1665 int
1666 charset_id_internal (charset_name)
1667      char *charset_name;
1668 {
1669   Lisp_Object val;
1670
1671   val= Fget (intern (charset_name), Qcharset);
1672   if (!VECTORP (val))
1673     error ("Charset %s is not defined", charset_name);
1674
1675   return (XINT (XVECTOR (val)->contents[0]));
1676 }
1677
1678 DEFUN ("setup-special-charsets", Fsetup_special_charsets,
1679        Ssetup_special_charsets, 0, 0, 0, "Internal use only.")
1680    ()
1681 {
1682   charset_latin_iso8859_1 = charset_id_internal ("latin-iso8859-1");
1683   charset_jisx0208_1978 = charset_id_internal ("japanese-jisx0208-1978");
1684   charset_jisx0208 = charset_id_internal ("japanese-jisx0208");
1685   charset_katakana_jisx0201 = charset_id_internal ("katakana-jisx0201");
1686   charset_latin_jisx0201 = charset_id_internal ("latin-jisx0201");
1687   charset_big5_1 = charset_id_internal ("chinese-big5-1");
1688   charset_big5_2 = charset_id_internal ("chinese-big5-2");
1689   return Qnil;
1690 }
1691
1692 void
1693 init_charset_once ()
1694 {
1695   int i, j, k;
1696
1697   staticpro (&Vcharset_table);
1698   staticpro (&Vcharset_symbol_table);
1699   staticpro (&Vgeneric_character_list);
1700
1701   /* This has to be done here, before we call Fmake_char_table.  */
1702   Qcharset_table = intern ("charset-table");
1703   staticpro (&Qcharset_table);
1704
1705   /* Intern this now in case it isn't already done.
1706      Setting this variable twice is harmless.
1707      But don't staticpro it here--that is done in alloc.c.  */
1708   Qchar_table_extra_slots = intern ("char-table-extra-slots");
1709
1710   /* Now we are ready to set up this property, so we can
1711      create the charset table.  */
1712   Fput (Qcharset_table, Qchar_table_extra_slots, make_number (0));
1713   Vcharset_table = Fmake_char_table (Qcharset_table, Qnil);
1714
1715   Qunknown = intern ("unknown");
1716   staticpro (&Qunknown);
1717   Vcharset_symbol_table = Fmake_vector (make_number (MAX_CHARSET + 1),
1718                                         Qunknown);
1719
1720   /* Setup tables.  */
1721   for (i = 0; i < 2; i++)
1722     for (j = 0; j < 2; j++)
1723       for (k = 0; k < 128; k++)
1724         iso_charset_table [i][j][k] = -1;
1725
1726   for (i = 0; i < 256; i++)
1727     bytes_by_char_head[i] = 1;
1728   bytes_by_char_head[LEADING_CODE_PRIVATE_11] = 3;
1729   bytes_by_char_head[LEADING_CODE_PRIVATE_12] = 3;
1730   bytes_by_char_head[LEADING_CODE_PRIVATE_21] = 4;
1731   bytes_by_char_head[LEADING_CODE_PRIVATE_22] = 4;
1732
1733   for (i = 0; i < 128; i++)
1734     width_by_char_head[i] = 1;
1735   for (; i < 256; i++)
1736     width_by_char_head[i] = 4;
1737   width_by_char_head[LEADING_CODE_PRIVATE_11] = 1;
1738   width_by_char_head[LEADING_CODE_PRIVATE_12] = 2;
1739   width_by_char_head[LEADING_CODE_PRIVATE_21] = 1;
1740   width_by_char_head[LEADING_CODE_PRIVATE_22] = 2;
1741
1742   {
1743     Lisp_Object val;
1744
1745     val = Qnil;
1746     for (i = 0x81; i < 0x90; i++)
1747       val = Fcons (make_number ((i - 0x70) << 7), val);
1748     for (; i < 0x9A; i++)
1749       val = Fcons (make_number ((i - 0x8F) << 14), val);
1750     for (i = 0xA0; i < 0xF0; i++)
1751       val = Fcons (make_number ((i - 0x70) << 7), val);
1752     for (; i < 0xFF; i++)
1753       val = Fcons (make_number ((i - 0xE0) << 14), val);
1754     Vgeneric_character_list = Fnreverse (val);
1755   }
1756
1757   nonascii_insert_offset = 0;
1758   Vnonascii_translation_table = Qnil;
1759 }
1760
1761 #ifdef emacs
1762
1763 void
1764 syms_of_charset ()
1765 {
1766   Qcharset = intern ("charset");
1767   staticpro (&Qcharset);
1768
1769   Qascii = intern ("ascii");
1770   staticpro (&Qascii);
1771
1772   Qeight_bit_control = intern ("eight-bit-control");
1773   staticpro (&Qeight_bit_control);
1774
1775   Qeight_bit_graphic = intern ("eight-bit-graphic");
1776   staticpro (&Qeight_bit_graphic);
1777
1778   /* Define special charsets ascii, eight-bit-control, and
1779      eight-bit-graphic.  */
1780   update_charset_table (make_number (CHARSET_ASCII),
1781                         make_number (1), make_number (94),
1782                         make_number (1),
1783                         make_number (0),
1784                         make_number ('B'),
1785                         make_number (0),
1786                         build_string ("ASCII"),
1787                         Qnil,   /* same as above */
1788                         build_string ("ASCII (ISO646 IRV)"));
1789   CHARSET_SYMBOL (CHARSET_ASCII) = Qascii;
1790   Fput (Qascii, Qcharset, CHARSET_TABLE_ENTRY (CHARSET_ASCII));
1791
1792   update_charset_table (make_number (CHARSET_8_BIT_CONTROL),
1793                         make_number (1), make_number (96),
1794                         make_number (4),
1795                         make_number (0),
1796                         make_number (-1),
1797                         make_number (-1),
1798                         build_string ("8-bit control code (0x80..0x9F)"),
1799                         Qnil,   /* same as above */
1800                         Qnil);  /* same as above */
1801   CHARSET_SYMBOL (CHARSET_8_BIT_CONTROL) = Qeight_bit_control;
1802   Fput (Qeight_bit_control, Qcharset,
1803         CHARSET_TABLE_ENTRY (CHARSET_8_BIT_CONTROL));
1804
1805   update_charset_table (make_number (CHARSET_8_BIT_GRAPHIC),
1806                         make_number (1), make_number (96),
1807                         make_number (4),
1808                         make_number (0),
1809                         make_number (-1),
1810                         make_number (-1),
1811                         build_string ("8-bit graphic char (0xA0..0xFF)"),
1812                         Qnil,   /* same as above */
1813                         Qnil);  /* same as above */
1814   CHARSET_SYMBOL (CHARSET_8_BIT_GRAPHIC) = Qeight_bit_graphic;
1815   Fput (Qeight_bit_graphic, Qcharset,
1816         CHARSET_TABLE_ENTRY (CHARSET_8_BIT_GRAPHIC));
1817
1818   Qauto_fill_chars = intern ("auto-fill-chars");
1819   staticpro (&Qauto_fill_chars);
1820   Fput (Qauto_fill_chars, Qchar_table_extra_slots, make_number (0));
1821
1822   defsubr (&Sdefine_charset);
1823   defsubr (&Sgeneric_character_list);
1824   defsubr (&Sget_unused_iso_final_char);
1825   defsubr (&Sdeclare_equiv_charset);
1826   defsubr (&Sfind_charset_region);
1827   defsubr (&Sfind_charset_string);
1828   defsubr (&Smake_char_internal);
1829   defsubr (&Ssplit_char);
1830   defsubr (&Schar_charset);
1831   defsubr (&Scharset_after);
1832   defsubr (&Siso_charset);
1833   defsubr (&Schar_valid_p);
1834   defsubr (&Sunibyte_char_to_multibyte);
1835   defsubr (&Smultibyte_char_to_unibyte);
1836   defsubr (&Schar_bytes);
1837   defsubr (&Schar_width);
1838   defsubr (&Sstring_width);
1839   defsubr (&Schar_direction);
1840   defsubr (&Schars_in_region);
1841   defsubr (&Sstring);
1842   defsubr (&Ssetup_special_charsets);
1843
1844   DEFVAR_LISP ("charset-list", &Vcharset_list,
1845     "List of charsets ever defined.");
1846   Vcharset_list = Fcons (Qascii, Fcons (Qeight_bit_control,
1847                                         Fcons (Qeight_bit_graphic, Qnil)));
1848
1849   DEFVAR_LISP ("translation-table-vector",  &Vtranslation_table_vector,
1850     "Vector of cons cell of a symbol and translation table ever defined.\n\
1851 An ID of a translation table is an index of this vector.");
1852   Vtranslation_table_vector = Fmake_vector (make_number (16), Qnil);
1853
1854   DEFVAR_INT ("leading-code-private-11", &leading_code_private_11,
1855     "Leading-code of private TYPE9N charset of column-width 1.");
1856   leading_code_private_11 = LEADING_CODE_PRIVATE_11;
1857
1858   DEFVAR_INT ("leading-code-private-12", &leading_code_private_12,
1859     "Leading-code of private TYPE9N charset of column-width 2.");
1860   leading_code_private_12 = LEADING_CODE_PRIVATE_12;
1861
1862   DEFVAR_INT ("leading-code-private-21", &leading_code_private_21,
1863     "Leading-code of private TYPE9Nx9N charset of column-width 1.");
1864   leading_code_private_21 = LEADING_CODE_PRIVATE_21;
1865
1866   DEFVAR_INT ("leading-code-private-22", &leading_code_private_22,
1867     "Leading-code of private TYPE9Nx9N charset of column-width 2.");
1868   leading_code_private_22 = LEADING_CODE_PRIVATE_22;
1869
1870   DEFVAR_INT ("nonascii-insert-offset", &nonascii_insert_offset,
1871     "Offset for converting non-ASCII unibyte codes 0240...0377 to multibyte.\n\
1872 This is used for converting unibyte text to multibyte,\n\
1873 and for inserting character codes specified by number.\n\n\
1874 This serves to convert a Latin-1 or similar 8-bit character code\n\
1875 to the corresponding Emacs multibyte character code.\n\
1876 Typically the value should be (- (make-char CHARSET 0) 128),\n\
1877 for your choice of character set.\n\
1878 If `nonascii-translation-table' is non-nil, it overrides this variable.");
1879   nonascii_insert_offset = 0;
1880
1881   DEFVAR_LISP ("nonascii-translation-table", &Vnonascii_translation_table,
1882     "Translation table to convert non-ASCII unibyte codes to multibyte.\n\
1883 This is used for converting unibyte text to multibyte,\n\
1884 and for inserting character codes specified by number.\n\n\
1885 Conversion is performed only when multibyte characters are enabled,\n\
1886 and it serves to convert a Latin-1 or similar 8-bit character code\n\
1887 to the corresponding Emacs character code.\n\n\
1888 If this is nil, `nonascii-insert-offset' is used instead.\n\
1889 See also the docstring of `make-translation-table'.");
1890   Vnonascii_translation_table = Qnil;
1891
1892   DEFVAR_LISP ("auto-fill-chars", &Vauto_fill_chars,
1893     "A char-table for characters which invoke auto-filling.\n\
1894 Such characters have value t in this table.");
1895   Vauto_fill_chars = Fmake_char_table (Qauto_fill_chars, Qnil);
1896   CHAR_TABLE_SET (Vauto_fill_chars, make_number (' '), Qt);
1897   CHAR_TABLE_SET (Vauto_fill_chars, make_number ('\n'), Qt);
1898 }
1899
1900 #endif /* emacs */