src/charset.c

   1 /* Basic multilingual character support.
   2    Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
   3    Licensed to the Free Software Foundation.
   4    Copyright (C) 2001 Free Software Foundation, Inc.
   5
   6 This file is part of GNU Emacs.
   7
   8 GNU Emacs is free software; you can redistribute it and/or modify
   9 it under the terms of the GNU General Public License as published by
  10 the Free Software Foundation; either version 2, or (at your option)
  11 any later version.
  12
  13 GNU Emacs is distributed in the hope that it will be useful,
  14 but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 GNU General Public License for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GNU Emacs; see the file COPYING.  If not, write to
  20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  21 Boston, MA 02111-1307, USA.  */
  22
  23 /* At first, see the document in `charset.h' to understand the code in
  24    this file.  */
  25
  26 #ifdef emacs
  27 #include <config.h>
  28 #endif
  29
  30 #include <stdio.h>
  31
  32 #ifdef emacs
  33
  34 #include <sys/types.h>
  35 #include "lisp.h"
  36 #include "buffer.h"
  37 #include "charset.h"
  38 #include "composite.h"
  39 #include "coding.h"
  40 #include "disptab.h"
  41
  42 #else  /* not emacs */
  43
  44 #include "mulelib.h"
  45
  46 #endif /* emacs */
  47
  48 Lisp_Object Qcharset, Qascii, Qeight_bit_control, Qeight_bit_graphic;
  49 Lisp_Object Qunknown;
  50
  51 /* Declaration of special leading-codes.  */
  52 EMACS_INT leading_code_private_11;      /* for private DIMENSION1 of 1-column */
  53 EMACS_INT leading_code_private_12;      /* for private DIMENSION1 of 2-column */
  54 EMACS_INT leading_code_private_21;      /* for private DIMENSION2 of 1-column */
  55 EMACS_INT leading_code_private_22;      /* for private DIMENSION2 of 2-column */
  56
  57 /* Declaration of special charsets.  The values are set by
  58    Fsetup_special_charsets.  */
  59 int charset_latin_iso8859_1;    /* ISO8859-1 (Latin-1) */
  60 int charset_jisx0208_1978;      /* JISX0208.1978 (Japanese Kanji old set) */
  61 int charset_jisx0208;           /* JISX0208.1983 (Japanese Kanji) */
  62 int charset_katakana_jisx0201;  /* JISX0201.Kana (Japanese Katakana) */
  63 int charset_latin_jisx0201;     /* JISX0201.Roman (Japanese Roman) */
  64 int charset_big5_1;             /* Big5 Level 1 (Chinese Traditional) */
  65 int charset_big5_2;             /* Big5 Level 2 (Chinese Traditional) */
  66
  67 Lisp_Object Qcharset_table;
  68
  69 /* A char-table containing information of each character set.  */
  70 Lisp_Object Vcharset_table;
  71
  72 /* A vector of charset symbol indexed by charset-id.  This is used
  73    only for returning charset symbol from C functions.  */
  74 Lisp_Object Vcharset_symbol_table;
  75
  76 /* A list of charset symbols ever defined.  */
  77 Lisp_Object Vcharset_list;
  78
  79 /* Vector of translation table ever defined.
  80    ID of a translation table is used to index this vector.  */
  81 Lisp_Object Vtranslation_table_vector;
  82
  83 /* A char-table for characters which may invoke auto-filling.  */
  84 Lisp_Object Vauto_fill_chars;
  85
  86 Lisp_Object Qauto_fill_chars;
  87
  88 /* Tables used by macros BYTES_BY_CHAR_HEAD and WIDTH_BY_CHAR_HEAD.  */
  89 int bytes_by_char_head[256];
  90 int width_by_char_head[256];
  91
  92 /* Mapping table from ISO2022's charset (specified by DIMENSION,
  93    CHARS, and FINAL-CHAR) to Emacs' charset.  */
  94 int iso_charset_table[2][2][128];
  95
  96 /* Variables used locally in the macro FETCH_MULTIBYTE_CHAR.  */
  97 unsigned char *_fetch_multibyte_char_p;
  98 int _fetch_multibyte_char_len;
  99
 100 /* Offset to add to a non-ASCII value when inserting it.  */
 101 EMACS_INT nonascii_insert_offset;
 102
 103 /* Translation table for converting non-ASCII unibyte characters
 104    to multibyte codes, or nil.  */
 105 Lisp_Object Vnonascii_translation_table;
 106
 107 /* List of all possible generic characters.  */
 108 Lisp_Object Vgeneric_character_list;
 109
 110 \f
 111 void
 112 invalid_character (c)
 113      int c;
 114 {
 115   error ("Invalid character: 0%o, %d, 0x%x", c, c, c);
 116 }
 117
 118 /* Parse string STR of length LENGTH and fetch information of a
 119    character at STR.  Set BYTES to the byte length the character
 120    occupies, CHARSET, C1, C2 to proper values of the character. */
 121
 122 #define SPLIT_MULTIBYTE_SEQ(str, length, bytes, charset, c1, c2)             \
 123   do {                                                                       \
 124     (c1) = *(str);                                                           \
 125     (bytes) = BYTES_BY_CHAR_HEAD (c1);                                       \
 126     if ((bytes) == 1)                                                        \
 127       (charset) = ASCII_BYTE_P (c1) ? CHARSET_ASCII : CHARSET_8_BIT_GRAPHIC; \
 128     else if ((bytes) == 2)                                                   \
 129       {                                                                      \
 130         if ((c1) == LEADING_CODE_8_BIT_CONTROL)                              \
 131           (charset) = CHARSET_8_BIT_CONTROL, (c1) = (str)[1] - 0x20;         \
 132         else                                                                 \
 133           (charset) = (c1), (c1) = (str)[1] & 0x7F;                          \
 134       }                                                                      \
 135     else if ((bytes) == 3)                                                   \
 136       {                                                                      \
 137         if ((c1) < LEADING_CODE_PRIVATE_11)                                  \
 138           (charset) = (c1), (c1) = (str)[1] & 0x7F, (c2) = (str)[2] & 0x7F;  \
 139         else                                                                 \
 140           (charset) = (str)[1], (c1) = (str)[2] & 0x7F;                      \
 141       }                                                                      \
 142     else                                                                     \
 143       (charset) = (str)[1], (c1) = (str)[2] & 0x7F, (c2) = (str)[3] & 0x7F;  \
 144   } while (0)
 145
 146 /* 1 if CHARSET, C1, and C2 compose a valid character, else 0.  */
 147 #define CHAR_COMPONENTS_VALID_P(charset, c1, c2)        \
 148   ((charset) == CHARSET_ASCII                           \
 149    ? ((c1) >= 0 && (c1) <= 0x7F)                        \
 150    : ((charset) == CHARSET_8_BIT_CONTROL                \
 151       ? ((c1) >= 0x80 && (c1) <= 0x9F)                  \
 152       : ((charset) == CHARSET_8_BIT_GRAPHIC             \
 153          ? ((c1) >= 0x80 && (c1) <= 0xFF)               \
 154          : (CHARSET_DIMENSION (charset) == 1            \
 155             ? ((c1) >= 0x20 && (c1) <= 0x7F)            \
 156             : ((c1) >= 0x20 && (c1) <= 0x7F             \
 157                && (c2) >= 0x20 && (c2) <= 0x7F)))))
 158
 159 /* Store multi-byte form of the character C in STR.  The caller should
 160    allocate at least 4-byte area at STR in advance.  Returns the
 161    length of the multi-byte form.  If C is an invalid character code,
 162    return -1.  */
 163
 164 int
 165 char_to_string_1 (c, str)
 166      int c;
 167      unsigned char *str;
 168 {
 169   unsigned char *p = str;
 170
 171   if (c & CHAR_MODIFIER_MASK)   /* This includes the case C is negative.  */
 172     {
 173       /* Multibyte character can't have a modifier bit.  */
 174       if (! SINGLE_BYTE_CHAR_P ((c & ~CHAR_MODIFIER_MASK)))
 175         return -1;
 176
 177       /* For Meta, Shift, and Control modifiers, we need special care.  */
 178       if (c & CHAR_META)
 179         {
 180           /* Move the meta bit to the right place for a string.  */
 181           c = (c & ~CHAR_META) | 0x80;
 182         }
 183       if (c & CHAR_SHIFT)
 184         {
 185           /* Shift modifier is valid only with [A-Za-z].  */
 186           if ((c & 0377) >= 'A' && (c & 0377) <= 'Z')
 187             c &= ~CHAR_SHIFT;
 188           else if ((c & 0377) >= 'a' && (c & 0377) <= 'z')
 189             c = (c & ~CHAR_SHIFT) - ('a' - 'A');
 190         }
 191       if (c & CHAR_CTL)
 192         {
 193           /* Simulate the code in lread.c.  */
 194           /* Allow `\C- ' and `\C-?'.  */
 195           if (c == (CHAR_CTL | ' '))
 196             c = 0;
 197           else if (c == (CHAR_CTL | '?'))
 198             c = 127;
 199           /* ASCII control chars are made from letters (both cases),
 200              as well as the non-letters within 0100...0137.  */
 201           else if ((c & 0137) >= 0101 && (c & 0137) <= 0132)
 202             c &= (037 | (~0177 & ~CHAR_CTL));
 203           else if ((c & 0177) >= 0100 && (c & 0177) <= 0137)
 204             c &= (037 | (~0177 & ~CHAR_CTL));
 205         }
 206
 207       /* If C still has any modifier bits, just ignore it.  */
 208       c &= ~CHAR_MODIFIER_MASK;
 209     }
 210
 211   if (SINGLE_BYTE_CHAR_P (c))
 212     {
 213       if (ASCII_BYTE_P (c) || c >= 0xA0)
 214         *p++ = c;
 215       else
 216         {
 217           *p++ = LEADING_CODE_8_BIT_CONTROL;
 218           *p++ = c + 0x20;
 219         }
 220     }
 221   else if (CHAR_VALID_P (c, 0))
 222     {
 223       int charset, c1, c2;
 224
 225       SPLIT_CHAR (c, charset, c1, c2);
 226
 227       if (charset >= LEADING_CODE_EXT_11)
 228         *p++ = (charset < LEADING_CODE_EXT_12
 229                 ? LEADING_CODE_PRIVATE_11
 230                 : (charset < LEADING_CODE_EXT_21
 231                    ? LEADING_CODE_PRIVATE_12
 232                    : (charset < LEADING_CODE_EXT_22
 233                       ? LEADING_CODE_PRIVATE_21
 234                       : LEADING_CODE_PRIVATE_22)));
 235       *p++ = charset;
 236       if ((c1 > 0 && c1 < 32) || (c2 > 0 && c2 < 32))
 237         return -1;
 238       if (c1)
 239         {
 240           *p++ = c1 | 0x80;
 241           if (c2 > 0)
 242             *p++ = c2 | 0x80;
 243         }
 244     }
 245   else
 246     return -1;
 247
 248   return (p - str);
 249 }
 250
 251
 252 /* Store multi-byte form of the character C in STR.  The caller should
 253    allocate at least 4-byte area at STR in advance.  Returns the
 254    length of the multi-byte form.  If C is an invalid character code,
 255    signal an error.
 256
 257    Use macro `CHAR_STRING (C, STR)' instead of calling this function
 258    directly if C can be an ASCII character.  */
 259
 260 int
 261 char_to_string (c, str)
 262      int c;
 263      unsigned char *str;
 264 {
 265   int len;
 266   len = char_to_string_1 (c, str);
 267   if (len == -1)
 268     invalid_character (c);
 269   return len;
 270 }
 271
 272
 273 /* Return the non-ASCII character corresponding to multi-byte form at
 274    STR of length LEN.  If ACTUAL_LEN is not NULL, store the byte
 275    length of the multibyte form in *ACTUAL_LEN.
 276
 277    Use macros STRING_CHAR or STRING_CHAR_AND_LENGTH instead of calling
 278    this function directly if you want ot handle ASCII characters as
 279    well.  */
 280
 281 int
 282 string_to_char (str, len, actual_len)
 283      const unsigned char *str;
 284      int len, *actual_len;
 285 {
 286   int c, bytes, charset, c1, c2;
 287
 288   SPLIT_MULTIBYTE_SEQ (str, len, bytes, charset, c1, c2);
 289   c = MAKE_CHAR (charset, c1, c2);
 290   if (actual_len)
 291     *actual_len = bytes;
 292   return c;
 293 }
 294
 295 /* Return the length of the multi-byte form at string STR of length LEN.
 296    Use the macro MULTIBYTE_FORM_LENGTH instead.  */
 297 int
 298 multibyte_form_length (str, len)
 299      const unsigned char *str;
 300      int len;
 301 {
 302   int bytes;
 303
 304   PARSE_MULTIBYTE_SEQ (str, len, bytes);
 305   return bytes;
 306 }
 307
 308 /* Check multibyte form at string STR of length LEN and set variables
 309    pointed by CHARSET, C1, and C2 to charset and position codes of the
 310    character at STR, and return 0.  If there's no multibyte character,
 311    return -1.  This should be used only in the macro SPLIT_STRING
 312    which checks range of STR in advance.  */
 313
 314 int
 315 split_string (str, len, charset, c1, c2)
 316      const unsigned char *str;
 317      unsigned char *c1, *c2;
 318      int len, *charset;
 319 {
 320   register int bytes, cs, code1, code2 = -1;
 321
 322   SPLIT_MULTIBYTE_SEQ (str, len, bytes, cs, code1, code2);
 323   if (cs == CHARSET_ASCII)
 324     return -1;
 325   *charset = cs;
 326   *c1 = code1;
 327   *c2 = code2;
 328   return 0;
 329 }
 330
 331 /* Return 1 iff character C has valid printable glyph.
 332    Use the macro CHAR_PRINTABLE_P instead.  */
 333 int
 334 char_printable_p (c)
 335      int c;
 336 {
 337   int charset, c1, c2;
 338
 339   if (ASCII_BYTE_P (c))
 340     return 1;
 341   else if (SINGLE_BYTE_CHAR_P (c))
 342     return 0;
 343   else if (c >= MAX_CHAR)
 344     return 0;
 345
 346   SPLIT_CHAR (c, charset, c1, c2);
 347   if (! CHARSET_DEFINED_P (charset))
 348     return 0;
 349   if (CHARSET_CHARS (charset) == 94
 350       ? c1 <= 32 || c1 >= 127
 351       : c1 < 32)
 352     return 0;
 353   if (CHARSET_DIMENSION (charset) == 2
 354       && (CHARSET_CHARS (charset) == 94
 355           ? c2 <= 32 || c2 >= 127
 356           : c2 < 32))
 357     return 0;
 358   return 1;
 359 }
 360
 361 /* Translate character C by translation table TABLE.  If C
 362    is negative, translate a character specified by CHARSET, C1, and C2
 363    (C1 and C2 are code points of the character).  If no translation is
 364    found in TABLE, return C.  */
 365 int
 366 translate_char (table, c, charset, c1, c2)
 367      Lisp_Object table;
 368      int c, charset, c1, c2;
 369 {
 370   Lisp_Object ch;
 371   int alt_charset, alt_c1, alt_c2, dimension;
 372
 373   if (c < 0) c = MAKE_CHAR (charset, (c1 & 0x7F) , (c2 & 0x7F));
 374   if (!CHAR_TABLE_P (table)
 375       || (ch = Faref (table, make_number (c)), !NATNUMP (ch)))
 376     return c;
 377
 378   SPLIT_CHAR (XFASTINT (ch), alt_charset, alt_c1, alt_c2);
 379   dimension = CHARSET_DIMENSION (alt_charset);
 380   if ((dimension == 1 && alt_c1 > 0) || (dimension == 2 && alt_c2 > 0))
 381     /* CH is not a generic character, just return it.  */
 382     return XFASTINT (ch);
 383
 384   /* Since CH is a generic character, we must return a specific
 385      charater which has the same position codes as C from CH.  */
 386   if (charset < 0)
 387     SPLIT_CHAR (c, charset, c1, c2);
 388   if (dimension != CHARSET_DIMENSION (charset))
 389     /* We can't make such a character because of dimension mismatch.  */
 390     return c;
 391   return MAKE_CHAR (alt_charset, c1, c2);
 392 }
 393
 394 /* Convert the unibyte character C to multibyte based on
 395    Vnonascii_translation_table or nonascii_insert_offset.  If they can't
 396    convert C to a valid multibyte character, convert it based on
 397    DEFAULT_NONASCII_INSERT_OFFSET which makes C a Latin-1 character.  */
 398
 399 int
 400 unibyte_char_to_multibyte (c)
 401      int c;
 402 {
 403   if (c < 0400 && c >= 0200)
 404     {
 405       int c_save = c;
 406
 407       if (! NILP (Vnonascii_translation_table))
 408         {
 409           c = XINT (Faref (Vnonascii_translation_table, make_number (c)));
 410           if (c >= 0400 && ! char_valid_p (c, 0))
 411             c = c_save + DEFAULT_NONASCII_INSERT_OFFSET;
 412         }
 413       else if (c >= 0240 && nonascii_insert_offset > 0)
 414         {
 415           c += nonascii_insert_offset;
 416           if (c < 0400 || ! char_valid_p (c, 0))
 417             c = c_save + DEFAULT_NONASCII_INSERT_OFFSET;
 418         }
 419       else if (c >= 0240)
 420         c = c_save + DEFAULT_NONASCII_INSERT_OFFSET;
 421     }
 422   return c;
 423 }
 424
 425
 426 /* Convert the multibyte character C to unibyte 8-bit character based
 427    on Vnonascii_translation_table or nonascii_insert_offset.  If
 428    REV_TBL is non-nil, it should be a reverse table of
 429    Vnonascii_translation_table, i.e. what given by:
 430      Fchar_table_extra_slot (Vnonascii_translation_table, make_number (0))  */
 431
 432 int
 433 multibyte_char_to_unibyte (c, rev_tbl)
 434      int c;
 435      Lisp_Object rev_tbl;
 436 {
 437   if (!SINGLE_BYTE_CHAR_P (c))
 438     {
 439       int c_save = c;
 440
 441       if (! CHAR_TABLE_P (rev_tbl)
 442           && CHAR_TABLE_P (Vnonascii_translation_table))
 443         rev_tbl = Fchar_table_extra_slot (Vnonascii_translation_table,
 444                                           make_number (0));
 445       if (CHAR_TABLE_P (rev_tbl))
 446         {
 447           Lisp_Object temp;
 448           temp = Faref (rev_tbl, make_number (c));
 449           if (INTEGERP (temp))
 450             c = XINT (temp);
 451           if (c >= 256)
 452             c = (c_save & 0177) + 0200;
 453         }
 454       else
 455         {
 456           if (nonascii_insert_offset > 0)
 457             c -= nonascii_insert_offset;
 458           if (c < 128 || c >= 256)
 459             c = (c_save & 0177) + 0200;
 460         }
 461     }
 462
 463   return c;
 464 }
 465
 466 \f
 467 /* Update the table Vcharset_table with the given arguments (see the
 468    document of `define-charset' for the meaning of each argument).
 469    Several other table contents are also updated.  The caller should
 470    check the validity of CHARSET-ID and the remaining arguments in
 471    advance.  */
 472
 473 void
 474 update_charset_table (charset_id, dimension, chars, width, direction,
 475                       iso_final_char, iso_graphic_plane,
 476                       short_name, long_name, description)
 477      Lisp_Object charset_id, dimension, chars, width, direction;
 478      Lisp_Object iso_final_char, iso_graphic_plane;
 479      Lisp_Object short_name, long_name, description;
 480 {
 481   int charset = XINT (charset_id);
 482   int bytes;
 483   unsigned char leading_code_base, leading_code_ext;
 484
 485   if (NILP (CHARSET_TABLE_ENTRY (charset)))
 486     CHARSET_TABLE_ENTRY (charset)
 487       = Fmake_vector (make_number (CHARSET_MAX_IDX), Qnil);
 488
 489   if (NILP (long_name))
 490     long_name = short_name;
 491   if (NILP (description))
 492     description = long_name;
 493
 494   /* Get byte length of multibyte form, base leading-code, and
 495      extended leading-code of the charset.  See the comment under the
 496      title "GENERAL NOTE on CHARACTER SET (CHARSET)" in charset.h.  */
 497   bytes = XINT (dimension);
 498   if (charset < MIN_CHARSET_PRIVATE_DIMENSION1)
 499     {
 500       /* Official charset, it doesn't have an extended leading-code.  */
 501       if (charset != CHARSET_ASCII && charset != CHARSET_8_BIT_GRAPHIC)
 502         bytes += 1; /* For a base leading-code.  */
 503       leading_code_base = charset;
 504       leading_code_ext = 0;
 505     }
 506   else
 507     {
 508       /* Private charset.  */
 509       bytes += 2; /* For base and extended leading-codes.  */
 510       leading_code_base
 511         = (charset < LEADING_CODE_EXT_12
 512            ? LEADING_CODE_PRIVATE_11
 513            : (charset < LEADING_CODE_EXT_21
 514               ? LEADING_CODE_PRIVATE_12
 515               : (charset < LEADING_CODE_EXT_22
 516                  ? LEADING_CODE_PRIVATE_21
 517                  : LEADING_CODE_PRIVATE_22)));
 518       leading_code_ext = charset;
 519       if (BYTES_BY_CHAR_HEAD (leading_code_base) != bytes)
 520         error ("Invalid dimension for the charset-ID %d", charset);
 521     }
 522
 523   CHARSET_TABLE_INFO (charset, CHARSET_ID_IDX) = charset_id;
 524   CHARSET_TABLE_INFO (charset, CHARSET_BYTES_IDX) = make_number (bytes);
 525   CHARSET_TABLE_INFO (charset, CHARSET_DIMENSION_IDX) = dimension;
 526   CHARSET_TABLE_INFO (charset, CHARSET_CHARS_IDX) = chars;
 527   CHARSET_TABLE_INFO (charset, CHARSET_WIDTH_IDX) = width;
 528   CHARSET_TABLE_INFO (charset, CHARSET_DIRECTION_IDX) = direction;
 529   CHARSET_TABLE_INFO (charset, CHARSET_LEADING_CODE_BASE_IDX)
 530     = make_number (leading_code_base);
 531   CHARSET_TABLE_INFO (charset, CHARSET_LEADING_CODE_EXT_IDX)
 532     = make_number (leading_code_ext);
 533   CHARSET_TABLE_INFO (charset, CHARSET_ISO_FINAL_CHAR_IDX) = iso_final_char;
 534   CHARSET_TABLE_INFO (charset, CHARSET_ISO_GRAPHIC_PLANE_IDX)
 535     = iso_graphic_plane;
 536   CHARSET_TABLE_INFO (charset, CHARSET_SHORT_NAME_IDX) = short_name;
 537   CHARSET_TABLE_INFO (charset, CHARSET_LONG_NAME_IDX) = long_name;
 538   CHARSET_TABLE_INFO (charset, CHARSET_DESCRIPTION_IDX) = description;
 539   CHARSET_TABLE_INFO (charset, CHARSET_PLIST_IDX) = Qnil;
 540
 541   {
 542     /* If we have already defined a charset which has the same
 543        DIMENSION, CHARS and ISO-FINAL-CHAR but the different
 544        DIRECTION, we must update the entry REVERSE-CHARSET of both
 545        charsets.  If there's no such charset, the value of the entry
 546        is set to nil.  */
 547     int i;
 548
 549     for (i = 0; i <= MAX_CHARSET; i++)
 550       if (!NILP (CHARSET_TABLE_ENTRY (i)))
 551         {
 552           if (CHARSET_DIMENSION (i) == XINT (dimension)
 553               && CHARSET_CHARS (i) == XINT (chars)
 554               && CHARSET_ISO_FINAL_CHAR (i) == XINT (iso_final_char)
 555               && CHARSET_DIRECTION (i) != XINT (direction))
 556             {
 557               CHARSET_TABLE_INFO (charset, CHARSET_REVERSE_CHARSET_IDX)
 558                 = make_number (i);
 559               CHARSET_TABLE_INFO (i, CHARSET_REVERSE_CHARSET_IDX) = charset_id;
 560               break;
 561             }
 562         }
 563     if (i > MAX_CHARSET)
 564       /* No such a charset.  */
 565       CHARSET_TABLE_INFO (charset, CHARSET_REVERSE_CHARSET_IDX)
 566         = make_number (-1);
 567   }
 568
 569   if (charset != CHARSET_ASCII && charset != CHARSET_8_BIT_GRAPHIC
 570       && charset < MIN_CHARSET_PRIVATE_DIMENSION1)
 571     {
 572       bytes_by_char_head[leading_code_base] = bytes;
 573       width_by_char_head[leading_code_base] = XINT (width);
 574
 575       /* Update table emacs_code_class.  */
 576       emacs_code_class[charset] = (bytes == 2
 577                                    ? EMACS_leading_code_2
 578                                    : (bytes == 3
 579                                       ? EMACS_leading_code_3
 580                                       : EMACS_leading_code_4));
 581     }
 582
 583   /* Update table iso_charset_table.  */
 584   if (XINT (iso_final_char) >= 0
 585       && ISO_CHARSET_TABLE (dimension, chars, iso_final_char) < 0)
 586     ISO_CHARSET_TABLE (dimension, chars, iso_final_char) = charset;
 587 }
 588
 589 #ifdef emacs
 590
 591 /* Return charset id of CHARSET_SYMBOL, or return -1 if CHARSET_SYMBOL
 592    is invalid.  */
 593 int
 594 get_charset_id (charset_symbol)
 595      Lisp_Object charset_symbol;
 596 {
 597   Lisp_Object val;
 598   int charset;
 599
 600   /* This originally used a ?: operator, but reportedly the HP-UX
 601      compiler version HP92453-01 A.10.32.22 miscompiles that.  */
 602   if (SYMBOLP (charset_symbol)
 603       && VECTORP (val = Fget (charset_symbol, Qcharset))
 604       && CHARSET_VALID_P (charset =
 605                           XINT (XVECTOR (val)->contents[CHARSET_ID_IDX])))
 606     return charset;
 607   else
 608     return -1;
 609 }
 610
 611 /* Return an identification number for a new private charset of
 612    DIMENSION and WIDTH.  If there's no more room for the new charset,
 613    return 0.  */
 614 Lisp_Object
 615 get_new_private_charset_id (dimension, width)
 616      int dimension, width;
 617 {
 618   int charset, from, to;
 619
 620   if (dimension == 1)
 621     {
 622       from = LEADING_CODE_EXT_11;
 623       to = LEADING_CODE_EXT_21;
 624     }
 625   else
 626     {
 627       from = LEADING_CODE_EXT_21;
 628       to = LEADING_CODE_EXT_MAX + 1;
 629     }
 630
 631   for (charset = from; charset < to; charset++)
 632     if (!CHARSET_DEFINED_P (charset)) break;
 633
 634   return make_number (charset < to ? charset : 0);
 635 }
 636
 637 DEFUN ("define-charset", Fdefine_charset, Sdefine_charset, 3, 3, 0,
 638        doc: /* Define CHARSET-ID as the identification number of CHARSET with INFO-VECTOR.
 639 If CHARSET-ID is nil, it is decided automatically, which means CHARSET is
 640  treated as a private charset.
 641 INFO-VECTOR is a vector of the format:
 642    [DIMENSION CHARS WIDTH DIRECTION ISO-FINAL-CHAR ISO-GRAPHIC-PLANE
 643     SHORT-NAME LONG-NAME DESCRIPTION]
 644 The meanings of each elements is as follows:
 645 DIMENSION (integer) is the number of bytes to represent a character: 1 or 2.
 646 CHARS (integer) is the number of characters in a dimension: 94 or 96.
 647 WIDTH (integer) is the number of columns a character in the charset
 648 occupies on the screen: one of 0, 1, and 2.
 649
 650 DIRECTION (integer) is the rendering direction of characters in the
 651 charset when rendering.  If 0, render from left to right, else
 652 render from right to left.
 653
 654 ISO-FINAL-CHAR (character) is the final character of the
 655 corresponding ISO 2022 charset.
 656 It may be -1 if the charset is internal use only.
 657
 658 ISO-GRAPHIC-PLANE (integer) is the graphic plane to be invoked
 659 while encoding to variants of ISO 2022 coding system, one of the
 660 following: 0/graphic-plane-left(GL), 1/graphic-plane-right(GR).
 661 It may be -1 if the charset is internal use only.
 662
 663 SHORT-NAME (string) is the short name to refer to the charset.
 664
 665 LONG-NAME (string) is the long name to refer to the charset.
 666
 667 DESCRIPTION (string) is the description string of the charset.  */)
 668        (charset_id, charset_symbol, info_vector)
 669      Lisp_Object charset_id, charset_symbol, info_vector;
 670 {
 671   Lisp_Object *vec;
 672
 673   if (!NILP (charset_id))
 674     CHECK_NUMBER (charset_id);
 675   CHECK_SYMBOL (charset_symbol);
 676   CHECK_VECTOR (info_vector);
 677
 678   if (! NILP (charset_id))
 679     {
 680       if (! CHARSET_VALID_P (XINT (charset_id)))
 681         error ("Invalid CHARSET: %d", XINT (charset_id));
 682       else if (CHARSET_DEFINED_P (XINT (charset_id)))
 683         error ("Already defined charset: %d", XINT (charset_id));
 684     }
 685
 686   vec = XVECTOR (info_vector)->contents;
 687   if (XVECTOR (info_vector)->size != 9
 688       || !INTEGERP (vec[0]) || !(XINT (vec[0]) == 1 || XINT (vec[0]) == 2)
 689       || !INTEGERP (vec[1]) || !(XINT (vec[1]) == 94 || XINT (vec[1]) == 96)
 690       || !INTEGERP (vec[2]) || !(XINT (vec[2]) == 1 || XINT (vec[2]) == 2)
 691       || !INTEGERP (vec[3]) || !(XINT (vec[3]) == 0 || XINT (vec[3]) == 1)
 692       || !INTEGERP (vec[4])
 693       || !(XINT (vec[4]) == -1 || (XINT (vec[4]) >= '0' && XINT (vec[4]) <= '~'))
 694       || !INTEGERP (vec[5])
 695       || !(XINT (vec[5]) == -1 || XINT (vec[5]) == 0 || XINT (vec[5]) == 1)
 696       || !STRINGP (vec[6])
 697       || !STRINGP (vec[7])
 698       || !STRINGP (vec[8]))
 699     error ("Invalid info-vector argument for defining charset %s",
 700            XSYMBOL (charset_symbol)->name->data);
 701
 702   if (NILP (charset_id))
 703     {
 704       charset_id = get_new_private_charset_id (XINT (vec[0]), XINT (vec[2]));
 705       if (XINT (charset_id) == 0)
 706         error ("There's no room for a new private charset %s",
 707                XSYMBOL (charset_symbol)->name->data);
 708     }
 709
 710   update_charset_table (charset_id, vec[0], vec[1], vec[2], vec[3],
 711                         vec[4], vec[5], vec[6], vec[7], vec[8]);
 712   Fput (charset_symbol, Qcharset, CHARSET_TABLE_ENTRY (XINT (charset_id)));
 713   CHARSET_SYMBOL (XINT (charset_id)) = charset_symbol;
 714   Vcharset_list = Fcons (charset_symbol, Vcharset_list);
 715   Fupdate_coding_systems_internal ();
 716   return Qnil;
 717 }
 718
 719 DEFUN ("generic-character-list", Fgeneric_character_list,
 720        Sgeneric_character_list, 0, 0, 0,
 721        doc: /* Return a list of all possible generic characters.
 722 It includes a generic character for a charset not yet defined.  */)
 723      ()
 724 {
 725   return Vgeneric_character_list;
 726 }
 727
 728 DEFUN ("get-unused-iso-final-char", Fget_unused_iso_final_char,
 729        Sget_unused_iso_final_char, 2, 2, 0,
 730        doc: /* Return an unsed ISO's final char for a charset of DIMENISION and CHARS.
 731 DIMENSION is the number of bytes to represent a character: 1 or 2.
 732 CHARS is the number of characters in a dimension: 94 or 96.
 733
 734 This final char is for private use, thus the range is `0' (48) .. `?' (63).
 735 If there's no unused final char for the specified kind of charset,
 736 return nil.  */)
 737      (dimension, chars)
 738      Lisp_Object dimension, chars;
 739 {
 740   int final_char;
 741
 742   CHECK_NUMBER (dimension);
 743   CHECK_NUMBER (chars);
 744   if (XINT (dimension) != 1 && XINT (dimension) != 2)
 745     error ("Invalid charset dimension %d, it should be 1 or 2",
 746            XINT (dimension));
 747   if (XINT (chars) != 94 && XINT (chars) != 96)
 748     error ("Invalid charset chars %d, it should be 94 or 96",
 749            XINT (chars));
 750   for (final_char = '0'; final_char <= '?'; final_char++)
 751     {
 752       if (ISO_CHARSET_TABLE (dimension, chars, make_number (final_char)) < 0)
 753         break;
 754     }
 755   return (final_char <= '?' ? make_number (final_char) : Qnil);
 756 }
 757
 758 DEFUN ("declare-equiv-charset", Fdeclare_equiv_charset, Sdeclare_equiv_charset,
 759        4, 4, 0,
 760        doc: /* Declare a charset of DIMENSION, CHARS, FINAL-CHAR is the same as CHARSET.
 761 CHARSET should be defined by `defined-charset' in advance.  */)
 762      (dimension, chars, final_char, charset_symbol)
 763      Lisp_Object dimension, chars, final_char, charset_symbol;
 764 {
 765   int charset;
 766
 767   CHECK_NUMBER (dimension);
 768   CHECK_NUMBER (chars);
 769   CHECK_NUMBER (final_char);
 770   CHECK_SYMBOL (charset_symbol);
 771
 772   if (XINT (dimension) != 1 && XINT (dimension) != 2)
 773     error ("Invalid DIMENSION %d, it should be 1 or 2", XINT (dimension));
 774   if (XINT (chars) != 94 && XINT (chars) != 96)
 775     error ("Invalid CHARS %d, it should be 94 or 96", XINT (chars));
 776   if (XINT (final_char) < '0' || XFASTINT (final_char) > '~')
 777     error ("Invalid FINAL-CHAR %c, it should be `0'..`~'", XINT (chars));
 778   if ((charset = get_charset_id (charset_symbol)) < 0)
 779     error ("Invalid charset %s", XSYMBOL (charset_symbol)->name->data);
 780
 781   ISO_CHARSET_TABLE (dimension, chars, final_char) = charset;
 782   return Qnil;
 783 }
 784
 785 /* Return information about charsets in the text at PTR of NBYTES
 786    bytes, which are NCHARS characters.  The value is:
 787
 788         0: Each character is represented by one byte.  This is always
 789            true for unibyte text.
 790         1: No charsets other than ascii eight-bit-control,
 791            eight-bit-graphic, and latin-1 are found.
 792         2: Otherwise.
 793
 794    In addition, if CHARSETS is nonzero, for each found charset N, set
 795    CHARSETS[N] to 1.  For that, callers should allocate CHARSETS
 796    (MAX_CHARSET + 1 elements) in advance.  It may lookup a translation
 797    table TABLE if supplied.  For invalid charsets, set CHARSETS[1] to
 798    1 (note that there's no charset whose ID is 1).  */
 799
 800 int
 801 find_charset_in_text (ptr, nchars, nbytes, charsets, table)
 802      unsigned char *ptr;
 803      int nchars, nbytes, *charsets;
 804      Lisp_Object table;
 805 {
 806   if (nchars == nbytes)
 807     {
 808       if (charsets && nbytes > 0)
 809         {
 810           unsigned char *endp = ptr + nbytes;
 811           int maskbits = 0;
 812
 813           while (ptr < endp && maskbits != 7)
 814             {
 815               maskbits |= (*ptr < 0x80 ? 1 : *ptr < 0xA0 ? 2 : 4);
 816               ptr++;
 817             }
 818
 819           if (maskbits & 1)
 820             charsets[CHARSET_ASCII] = 1;
 821           if (maskbits & 2)
 822             charsets[CHARSET_8_BIT_CONTROL] = 1;
 823           if (maskbits & 4)
 824             charsets[CHARSET_8_BIT_GRAPHIC] = 1;
 825         }
 826       return 0;
 827     }
 828   else
 829     {
 830       int return_val = 1;
 831       int bytes, charset, c1, c2;
 832
 833       if (! CHAR_TABLE_P (table))
 834         table = Qnil;
 835
 836       while (nchars-- > 0)
 837         {
 838           SPLIT_MULTIBYTE_SEQ (ptr, len, bytes, charset, c1, c2);
 839           ptr += bytes;
 840
 841           if (!CHARSET_DEFINED_P (charset))
 842             charset = 1;
 843           else if (! NILP (table))
 844             {
 845               int c = translate_char (table, -1, charset, c1, c2);
 846               if (c >= 0)
 847                 charset = CHAR_CHARSET (c);
 848             }
 849
 850           if (return_val == 1
 851               && charset != CHARSET_ASCII
 852               && charset != CHARSET_8_BIT_CONTROL
 853               && charset != CHARSET_8_BIT_GRAPHIC
 854               && charset != charset_latin_iso8859_1)
 855             return_val = 2;
 856
 857           if (charsets)
 858             charsets[charset] = 1;
 859           else if (return_val == 2)
 860             break;
 861         }
 862       return return_val;
 863     }
 864 }
 865
 866 DEFUN ("find-charset-region", Ffind_charset_region, Sfind_charset_region,
 867        2, 3, 0,
 868        doc: /* Return a list of charsets in the region between BEG and END.
 869 BEG and END are buffer positions.
 870 Optional arg TABLE if non-nil is a translation table to look up.
 871
 872 If the region contains invalid multibyte characters,
 873 `unknown' is included in the returned list.
 874
 875 If the current buffer is unibyte, the returned list may contain
 876 only `ascii', `eight-bit-control', and `eight-bit-graphic'.  */)
 877      (beg, end, table)
 878      Lisp_Object beg, end, table;
 879 {
 880   int charsets[MAX_CHARSET + 1];
 881   int from, from_byte, to, stop, stop_byte, i;
 882   Lisp_Object val;
 883
 884   validate_region (&beg, &end);
 885   from = XFASTINT (beg);
 886   stop = to = XFASTINT (end);
 887
 888   if (from < GPT && GPT < to)
 889     {
 890       stop = GPT;
 891       stop_byte = GPT_BYTE;
 892     }
 893   else
 894     stop_byte = CHAR_TO_BYTE (stop);
 895
 896   from_byte = CHAR_TO_BYTE (from);
 897
 898   bzero (charsets, (MAX_CHARSET + 1) * sizeof (int));
 899   while (1)
 900     {
 901       find_charset_in_text (BYTE_POS_ADDR (from_byte), stop - from,
 902                             stop_byte - from_byte, charsets, table);
 903       if (stop < to)
 904         {
 905           from = stop, from_byte = stop_byte;
 906           stop = to, stop_byte = CHAR_TO_BYTE (stop);
 907         }
 908       else
 909         break;
 910     }
 911
 912   val = Qnil;
 913   if (charsets[1])
 914     val = Fcons (Qunknown, val);
 915   for (i = MAX_CHARSET; i >= MIN_CHARSET_OFFICIAL_DIMENSION1; i--)
 916     if (charsets[i])
 917       val = Fcons (CHARSET_SYMBOL (i), val);
 918   if (charsets[0])
 919     val = Fcons (Qascii, val);
 920   return val;
 921 }
 922
 923 DEFUN ("find-charset-string", Ffind_charset_string, Sfind_charset_string,
 924        1, 2, 0,
 925        doc: /* Return a list of charsets in STR.
 926 Optional arg TABLE if non-nil is a translation table to look up.
 927
 928 If the string contains invalid multibyte characters,
 929 `unknown' is included in the returned list.
 930
 931 If STR is unibyte, the returned list may contain
 932 only `ascii', `eight-bit-control', and `eight-bit-graphic'.  */)
 933      (str, table)
 934      Lisp_Object str, table;
 935 {
 936   int charsets[MAX_CHARSET + 1];
 937   int i;
 938   Lisp_Object val;
 939
 940   CHECK_STRING (str);
 941
 942   bzero (charsets, (MAX_CHARSET + 1) * sizeof (int));
 943   find_charset_in_text (XSTRING (str)->data, XSTRING (str)->size,
 944                         STRING_BYTES (XSTRING (str)), charsets, table);
 945
 946   val = Qnil;
 947   if (charsets[1])
 948     val = Fcons (Qunknown, val);
 949   for (i = MAX_CHARSET; i >= MIN_CHARSET_OFFICIAL_DIMENSION1; i--)
 950     if (charsets[i])
 951       val = Fcons (CHARSET_SYMBOL (i), val);
 952   if (charsets[0])
 953     val = Fcons (Qascii, val);
 954   return val;
 955 }
 956
 957 \f
 958 DEFUN ("make-char-internal", Fmake_char_internal, Smake_char_internal, 1, 3, 0,
 959        doc: /* Return a character made from arguments.
 960 Internal use only.  */)
 961      (charset, code1, code2)
 962      Lisp_Object charset, code1, code2;
 963 {
 964   int charset_id, c1, c2;
 965
 966   CHECK_NUMBER (charset);
 967   charset_id = XINT (charset);
 968   if (!CHARSET_DEFINED_P (charset_id))
 969     error ("Invalid charset ID: %d", XINT (charset));
 970
 971   if (NILP (code1))
 972     c1 = 0;
 973   else
 974     {
 975       CHECK_NUMBER (code1);
 976       c1 = XINT (code1);
 977     }
 978   if (NILP (code2))
 979     c2 = 0;
 980   else
 981     {
 982       CHECK_NUMBER (code2);
 983       c2 = XINT (code2);
 984     }
 985
 986   if (charset_id == CHARSET_ASCII)
 987     {
 988       if (c1 < 0 || c1 > 0x7F)
 989         goto invalid_code_posints;
 990       return make_number (c1);
 991     }
 992   else if (charset_id == CHARSET_8_BIT_CONTROL)
 993     {
 994       if (NILP (code1))
 995         c1 = 0x80;
 996       else if (c1 < 0x80 || c1 > 0x9F)
 997         goto invalid_code_posints;
 998       return make_number (c1);
 999     }
1000   else if (charset_id == CHARSET_8_BIT_GRAPHIC)
1001     {
1002       if (NILP (code1))
1003         c1 = 0xA0;
1004       else if (c1 < 0xA0 || c1 > 0xFF)
1005         goto invalid_code_posints;
1006       return make_number (c1);
1007     }
1008   else if (c1 < 0 || c1 > 0xFF || c2 < 0 || c2 > 0xFF)
1009     goto invalid_code_posints;
1010   c1 &= 0x7F;
1011   c2 &= 0x7F;
1012   if (c1 == 0
1013       ? c2 != 0
1014       : (c2 == 0
1015          ? !CHAR_COMPONENTS_VALID_P (charset_id, c1, 0x20)
1016          : !CHAR_COMPONENTS_VALID_P (charset_id, c1, c2)))
1017     goto invalid_code_posints;
1018   return make_number (MAKE_CHAR (charset_id, c1, c2));
1019
1020  invalid_code_posints:
1021   error ("Invalid code points for charset ID %d: %d %d", charset_id, c1, c2);
1022 }
1023
1024 DEFUN ("split-char", Fsplit_char, Ssplit_char, 1, 1, 0,
1025        doc: /* Return list of charset and one or two position-codes of CHAR.
1026 If CHAR is invalid as a character code,
1027 return a list of symbol `unknown' and CHAR.  */)
1028      (ch)
1029      Lisp_Object ch;
1030 {
1031   int c, charset, c1, c2;
1032
1033   CHECK_NUMBER (ch);
1034   c = XFASTINT (ch);
1035   if (!CHAR_VALID_P (c, 1))
1036     return Fcons (Qunknown, Fcons (ch, Qnil));
1037   SPLIT_CHAR (XFASTINT (ch), charset, c1, c2);
1038   return (c2 >= 0
1039           ? Fcons (CHARSET_SYMBOL (charset),
1040                    Fcons (make_number (c1), Fcons (make_number (c2), Qnil)))
1041           : Fcons (CHARSET_SYMBOL (charset), Fcons (make_number (c1), Qnil)));
1042 }
1043
1044 DEFUN ("char-charset", Fchar_charset, Schar_charset, 1, 1, 0,
1045        doc: /* Return charset of CHAR.  */)
1046      (ch)
1047      Lisp_Object ch;
1048 {
1049   CHECK_NUMBER (ch);
1050
1051   return CHARSET_SYMBOL (CHAR_CHARSET (XINT (ch)));
1052 }
1053
1054 DEFUN ("charset-after", Fcharset_after, Scharset_after, 0, 1, 0,
1055        doc: /* Return charset of a character in the current buffer at position POS.
1056 If POS is nil, it defauls to the current point.
1057 If POS is out of range, the value is nil.  */)
1058      (pos)
1059      Lisp_Object pos;
1060 {
1061   Lisp_Object ch;
1062   int charset;
1063
1064   ch = Fchar_after (pos);
1065   if (! INTEGERP (ch))
1066     return ch;
1067   charset = CHAR_CHARSET (XINT (ch));
1068   return CHARSET_SYMBOL (charset);
1069 }
1070
1071 DEFUN ("iso-charset", Fiso_charset, Siso_charset, 3, 3, 0,
1072        doc: /* Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR.
1073
1074 ISO 2022's designation sequence (escape sequence) distinguishes charsets
1075 by their DIMENSION, CHARS, and FINAL-CHAR,
1076 where as Emacs distinguishes them by charset symbol.
1077 See the documentation of the function `charset-info' for the meanings of
1078 DIMENSION, CHARS, and FINAL-CHAR.  */)
1079      (dimension, chars, final_char)
1080      Lisp_Object dimension, chars, final_char;
1081 {
1082   int charset;
1083
1084   CHECK_NUMBER (dimension);
1085   CHECK_NUMBER (chars);
1086   CHECK_NUMBER (final_char);
1087
1088   if ((charset = ISO_CHARSET_TABLE (dimension, chars, final_char)) < 0)
1089     return Qnil;
1090   return CHARSET_SYMBOL (charset);
1091 }
1092
1093 /* If GENERICP is nonzero, return nonzero iff C is a valid normal or
1094    generic character.  If GENERICP is zero, return nonzero iff C is a
1095    valid normal character.  Do not call this function directly,
1096    instead use macro CHAR_VALID_P.  */
1097 int
1098 char_valid_p (c, genericp)
1099      int c, genericp;
1100 {
1101   int charset, c1, c2;
1102
1103   if (c < 0 || c >= MAX_CHAR)
1104     return 0;
1105   if (SINGLE_BYTE_CHAR_P (c))
1106     return 1;
1107   SPLIT_CHAR (c, charset, c1, c2);
1108   if (genericp)
1109     {
1110       if (c1)
1111         {
1112           if (c2 <= 0) c2 = 0x20;
1113         }
1114       else
1115         {
1116           if (c2 <= 0) c1 = c2 = 0x20;
1117         }
1118     }
1119   return (CHARSET_DEFINED_P (charset)
1120           && CHAR_COMPONENTS_VALID_P (charset, c1, c2));
1121 }
1122
1123 DEFUN ("char-valid-p", Fchar_valid_p, Schar_valid_p, 1, 2, 0,
1124        doc: /* Return t if OBJECT is a valid normal character.
1125 If optional arg GENERICP is non-nil, also return t if OBJECT is
1126 a valid generic character.  */)
1127      (object, genericp)
1128      Lisp_Object object, genericp;
1129 {
1130   if (! NATNUMP (object))
1131     return Qnil;
1132   return (CHAR_VALID_P (XFASTINT (object), !NILP (genericp)) ? Qt : Qnil);
1133 }
1134
1135 DEFUN ("unibyte-char-to-multibyte", Funibyte_char_to_multibyte,
1136        Sunibyte_char_to_multibyte, 1, 1, 0,
1137        doc: /* Convert the unibyte character CH to multibyte character.
1138 The conversion is done based on `nonascii-translation-table' (which see)
1139  or `nonascii-insert-offset' (which see).  */)
1140      (ch)
1141      Lisp_Object ch;
1142 {
1143   int c;
1144
1145   CHECK_NUMBER (ch);
1146   c = XINT (ch);
1147   if (c < 0 || c >= 0400)
1148     error ("Invalid unibyte character: %d", c);
1149   c = unibyte_char_to_multibyte (c);
1150   if (c < 0)
1151     error ("Can't convert to multibyte character: %d", XINT (ch));
1152   return make_number (c);
1153 }
1154
1155 DEFUN ("multibyte-char-to-unibyte", Fmultibyte_char_to_unibyte,
1156        Smultibyte_char_to_unibyte, 1, 1, 0,
1157        doc: /* Convert the multibyte character CH to unibyte character.
1158 The conversion is done based on `nonascii-translation-table' (which see)
1159  or `nonascii-insert-offset' (which see).  */)
1160      (ch)
1161      Lisp_Object ch;
1162 {
1163   int c;
1164
1165   CHECK_NUMBER (ch);
1166   c = XINT (ch);
1167   if (! CHAR_VALID_P (c, 0))
1168     error ("Invalid multibyte character: %d", c);
1169   c = multibyte_char_to_unibyte (c, Qnil);
1170   if (c < 0)
1171     error ("Can't convert to unibyte character: %d", XINT (ch));
1172   return make_number (c);
1173 }
1174
1175 DEFUN ("char-bytes", Fchar_bytes, Schar_bytes, 1, 1, 0,
1176        doc: /* Return 1 regardless of the argument CHAR.
1177 This is now an obsolete function.  We keep it just for backward compatibility.  */)
1178      (ch)
1179      Lisp_Object ch;
1180 {
1181   CHECK_NUMBER (ch);
1182   return make_number (1);
1183 }
1184
1185 /* Return how many bytes C will occupy in a multibyte buffer.
1186    Don't call this function directly, instead use macro CHAR_BYTES.  */
1187 int
1188 char_bytes (c)
1189      int c;
1190 {
1191   int charset;
1192
1193   if (ASCII_BYTE_P (c) || (c & ~((1 << CHARACTERBITS) -1)))
1194     return 1;
1195   if (SINGLE_BYTE_CHAR_P (c) && c >= 0xA0)
1196     return 1;
1197
1198   charset = CHAR_CHARSET (c);
1199   return (CHARSET_DEFINED_P (charset) ? CHARSET_BYTES (charset) : 1);
1200 }
1201
1202 /* Return the width of character of which multi-byte form starts with
1203    C.  The width is measured by how many columns occupied on the
1204    screen when displayed in the current buffer.  */
1205
1206 #define ONE_BYTE_CHAR_WIDTH(c)                                          \
1207   (c < 0x20                                                             \
1208    ? (c == '\t'                                                         \
1209       ? XFASTINT (current_buffer->tab_width)                            \
1210       : (c == '\n' ? 0 : (NILP (current_buffer->ctl_arrow) ? 4 : 2)))   \
1211    : (c < 0x7f                                                          \
1212       ? 1                                                               \
1213       : (c == 0x7F                                                      \
1214          ? (NILP (current_buffer->ctl_arrow) ? 4 : 2)                   \
1215          : ((! NILP (current_buffer->enable_multibyte_characters)       \
1216              && BASE_LEADING_CODE_P (c))                                \
1217             ? WIDTH_BY_CHAR_HEAD (c)                                    \
1218             : 4))))
1219
1220 DEFUN ("char-width", Fchar_width, Schar_width, 1, 1, 0,
1221        doc: /* Return width of CHAR when displayed in the current buffer.
1222 The width is measured by how many columns it occupies on the screen.
1223 Tab is taken to occupy `tab-width' columns.  */)
1224      (ch)
1225      Lisp_Object ch;
1226 {
1227   Lisp_Object val, disp;
1228   int c;
1229   struct Lisp_Char_Table *dp = buffer_display_table ();
1230
1231   CHECK_NUMBER (ch);
1232
1233   c = XINT (ch);
1234
1235   /* Get the way the display table would display it.  */
1236   disp = dp ? DISP_CHAR_VECTOR (dp, c) : Qnil;
1237
1238   if (VECTORP (disp))
1239     XSETINT (val, XVECTOR (disp)->size);
1240   else if (SINGLE_BYTE_CHAR_P (c))
1241     XSETINT (val, ONE_BYTE_CHAR_WIDTH (c));
1242   else
1243     {
1244       int charset = CHAR_CHARSET (c);
1245
1246       XSETFASTINT (val, CHARSET_WIDTH (charset));
1247     }
1248   return val;
1249 }
1250
1251 /* Return width of string STR of length LEN when displayed in the
1252    current buffer.  The width is measured by how many columns it
1253    occupies on the screen.  */
1254
1255 int
1256 strwidth (str, len)
1257      unsigned char *str;
1258      int len;
1259 {
1260   return c_string_width (str, len, -1, NULL, NULL);
1261 }
1262
1263 /* Return width of string STR of length LEN when displayed in the
1264    current buffer.  The width is measured by how many columns it
1265    occupies on the screen.  If PRECISION > 0, return the width of
1266    longest substring that doesn't exceed PRECISION, and set number of
1267    characters and bytes of the substring in *NCHARS and *NBYTES
1268    respectively.  */
1269
1270 int
1271 c_string_width (str, len, precision, nchars, nbytes)
1272      unsigned char *str;
1273      int precision, *nchars, *nbytes;
1274 {
1275   int i = 0, i_byte = 0;
1276   int width = 0;
1277   int chars;
1278   struct Lisp_Char_Table *dp = buffer_display_table ();
1279
1280   while (i_byte < len)
1281     {
1282       int bytes, thiswidth;
1283       Lisp_Object val;
1284
1285       if (dp)
1286         {
1287           int c = STRING_CHAR_AND_LENGTH (str + i_byte, len - i_byte, bytes);
1288
1289           chars = 1;
1290           val = DISP_CHAR_VECTOR (dp, c);
1291           if (VECTORP (val))
1292             thiswidth = XVECTOR (val)->size;
1293           else
1294             thiswidth = ONE_BYTE_CHAR_WIDTH (str[i_byte]);
1295         }
1296       else
1297         {
1298           chars = 1;
1299           PARSE_MULTIBYTE_SEQ (str + i_byte, len - i_byte, bytes);
1300           thiswidth = ONE_BYTE_CHAR_WIDTH (str[i_byte]);
1301         }
1302
1303       if (precision > 0
1304           && (width + thiswidth > precision))
1305         {
1306           *nchars = i;
1307           *nbytes = i_byte;
1308           return width;
1309         }
1310       i++;
1311       i_byte += bytes;
1312       width += thiswidth;
1313   }
1314
1315   if (precision > 0)
1316     {
1317       *nchars = i;
1318       *nbytes = i_byte;
1319     }
1320
1321   return width;
1322 }
1323
1324 /* Return width of Lisp string STRING when displayed in the current
1325    buffer.  The width is measured by how many columns it occupies on
1326    the screen while paying attention to compositions.  If PRECISION >
1327    0, return the width of longest substring that doesn't exceed
1328    PRECISION, and set number of characters and bytes of the substring
1329    in *NCHARS and *NBYTES respectively.  */
1330
1331 int
1332 lisp_string_width (string, precision, nchars, nbytes)
1333      Lisp_Object string;
1334      int precision, *nchars, *nbytes;
1335 {
1336   int len = XSTRING (string)->size;
1337   int len_byte = STRING_BYTES (XSTRING (string));
1338   unsigned char *str = XSTRING (string)->data;
1339   int i = 0, i_byte = 0;
1340   int width = 0;
1341   struct Lisp_Char_Table *dp = buffer_display_table ();
1342
1343   while (i < len)
1344     {
1345       int chars, bytes, thiswidth;
1346       Lisp_Object val;
1347       int cmp_id;
1348       int ignore, end;
1349
1350       if (find_composition (i, -1, &ignore, &end, &val, string)
1351           && ((cmp_id = get_composition_id (i, i_byte, end - i, val, string))
1352               >= 0))
1353         {
1354           thiswidth = composition_table[cmp_id]->width;
1355           chars = end - i;
1356           bytes = string_char_to_byte (string, end) - i_byte;
1357         }
1358       else if (dp)
1359         {
1360           int c = STRING_CHAR_AND_LENGTH (str + i_byte, len - i_byte, bytes);
1361
1362           chars = 1;
1363           val = DISP_CHAR_VECTOR (dp, c);
1364           if (VECTORP (val))
1365             thiswidth = XVECTOR (val)->size;
1366           else
1367             thiswidth = ONE_BYTE_CHAR_WIDTH (str[i_byte]);
1368         }
1369       else
1370         {
1371           chars = 1;
1372           PARSE_MULTIBYTE_SEQ (str + i_byte, len_byte - i_byte, bytes);
1373           thiswidth = ONE_BYTE_CHAR_WIDTH (str[i_byte]);
1374         }
1375
1376       if (precision > 0
1377           && (width + thiswidth > precision))
1378         {
1379           *nchars = i;
1380           *nbytes = i_byte;
1381           return width;
1382         }
1383       i += chars;
1384       i_byte += bytes;
1385       width += thiswidth;
1386   }
1387
1388   if (precision > 0)
1389     {
1390       *nchars = i;
1391       *nbytes = i_byte;
1392     }
1393
1394   return width;
1395 }
1396
1397 DEFUN ("string-width", Fstring_width, Sstring_width, 1, 1, 0,
1398        doc: /* Return width of STRING when displayed in the current buffer.
1399 Width is measured by how many columns it occupies on the screen.
1400 When calculating width of a multibyte character in STRING,
1401 only the base leading-code is considered; the validity of
1402 the following bytes is not checked.  Tabs in STRING are always
1403 taken to occupy `tab-width' columns.  */)
1404      (str)
1405      Lisp_Object str;
1406 {
1407   Lisp_Object val;
1408
1409   CHECK_STRING (str);
1410   XSETFASTINT (val, lisp_string_width (str, -1, NULL, NULL));
1411   return val;
1412 }
1413
1414 DEFUN ("char-direction", Fchar_direction, Schar_direction, 1, 1, 0,
1415        doc: /* Return the direction of CHAR.
1416 The returned value is 0 for left-to-right and 1 for right-to-left.  */)
1417      (ch)
1418      Lisp_Object ch;
1419 {
1420   int charset;
1421
1422   CHECK_NUMBER (ch);
1423   charset = CHAR_CHARSET (XFASTINT (ch));
1424   if (!CHARSET_DEFINED_P (charset))
1425     invalid_character (XINT (ch));
1426   return CHARSET_TABLE_INFO (charset, CHARSET_DIRECTION_IDX);
1427 }
1428
1429 DEFUN ("chars-in-region", Fchars_in_region, Schars_in_region, 2, 2, 0,
1430        doc: /* Return number of characters between BEG and END.  */)
1431      (beg, end)
1432      Lisp_Object beg, end;
1433 {
1434   int from, to;
1435
1436   CHECK_NUMBER_COERCE_MARKER (beg);
1437   CHECK_NUMBER_COERCE_MARKER (end);
1438
1439   from = min (XFASTINT (beg), XFASTINT (end));
1440   to = max (XFASTINT (beg), XFASTINT (end));
1441
1442   return make_number (to - from);
1443 }
1444
1445 /* Return the number of characters in the NBYTES bytes at PTR.
1446    This works by looking at the contents and checking for multibyte sequences.
1447    However, if the current buffer has enable-multibyte-characters = nil,
1448    we treat each byte as a character.  */
1449
1450 int
1451 chars_in_text (ptr, nbytes)
1452      unsigned char *ptr;
1453      int nbytes;
1454 {
1455   /* current_buffer is null at early stages of Emacs initialization.  */
1456   if (current_buffer == 0
1457       || NILP (current_buffer->enable_multibyte_characters))
1458     return nbytes;
1459
1460   return multibyte_chars_in_text (ptr, nbytes);
1461 }
1462
1463 /* Return the number of characters in the NBYTES bytes at PTR.
1464    This works by looking at the contents and checking for multibyte sequences.
1465    It ignores enable-multibyte-characters.  */
1466
1467 int
1468 multibyte_chars_in_text (ptr, nbytes)
1469      unsigned char *ptr;
1470      int nbytes;
1471 {
1472   unsigned char *endp;
1473   int chars, bytes;
1474
1475   endp = ptr + nbytes;
1476   chars = 0;
1477
1478   while (ptr < endp)
1479     {
1480       PARSE_MULTIBYTE_SEQ (ptr, endp - ptr, bytes);
1481       ptr += bytes;
1482       chars++;
1483     }
1484
1485   return chars;
1486 }
1487
1488 /* Parse unibyte text at STR of LEN bytes as multibyte text, and
1489    count the numbers of characters and bytes in it.  On counting
1490    bytes, pay attention to the fact that 8-bit characters in the range
1491    0x80..0x9F are represented by 2 bytes in multibyte text.  */
1492 void
1493 parse_str_as_multibyte (str, len, nchars, nbytes)
1494      unsigned char *str;
1495      int len, *nchars, *nbytes;
1496 {
1497   unsigned char *endp = str + len;
1498   int n, chars = 0, bytes = 0;
1499
1500   while (str < endp)
1501     {
1502       if (UNIBYTE_STR_AS_MULTIBYTE_P (str, endp - str, n))
1503         str += n, bytes += n;
1504       else
1505         str++, bytes += 2;
1506       chars++;
1507     }
1508   *nchars = chars;
1509   *nbytes = bytes;
1510   return;
1511 }
1512
1513 /* Arrange unibyte text at STR of NBYTES bytes as multibyte text.
1514    It actually converts only 8-bit characters in the range 0x80..0x9F
1515    that don't contruct multibyte characters to multibyte forms.  If
1516    NCHARS is nonzero, set *NCHARS to the number of characters in the
1517    text.  It is assured that we can use LEN bytes at STR as a work
1518    area and that is enough.  Return the number of bytes of the
1519    resulting text.  */
1520
1521 int
1522 str_as_multibyte (str, len, nbytes, nchars)
1523      unsigned char *str;
1524      int len, nbytes, *nchars;
1525 {
1526   unsigned char *p = str, *endp = str + nbytes;
1527   unsigned char *to;
1528   int chars = 0;
1529   int n;
1530
1531   while (p < endp && UNIBYTE_STR_AS_MULTIBYTE_P (p, endp - p, n))
1532     p += n, chars++;
1533   if (nchars)
1534     *nchars = chars;
1535   if (p == endp)
1536     return nbytes;
1537
1538   to = p;
1539   nbytes = endp - p;
1540   endp = str + len;
1541   safe_bcopy (p, endp - nbytes, nbytes);
1542   p = endp - nbytes;
1543   while (p < endp)
1544     {
1545       if (UNIBYTE_STR_AS_MULTIBYTE_P (p, endp - p, n))
1546         {
1547           while (n--)
1548             *to++ = *p++;
1549         }
1550       else
1551         {
1552           *to++ = LEADING_CODE_8_BIT_CONTROL;
1553           *to++ = *p++ + 0x20;
1554         }
1555       chars++;
1556     }
1557   if (nchars)
1558     *nchars = chars;
1559   return (to - str);
1560 }
1561
1562 /* Parse unibyte string at STR of LEN bytes, and return the number of
1563    bytes it may ocupy when converted to multibyte string by
1564    `str_to_multibyte'.  */
1565
1566 int
1567 parse_str_to_multibyte (str, len)
1568      unsigned char *str;
1569      int len;
1570 {
1571   unsigned char *endp = str + len;
1572   int bytes;
1573
1574   for (bytes = 0; str < endp; str++)
1575     bytes += (*str < 0x80 || *str >= 0xA0) ? 1 : 2;
1576   return bytes;
1577 }
1578
1579 /* Convert unibyte text at STR of NBYTES bytes to multibyte text
1580    that contains the same single-byte characters.  It actually
1581    converts all 8-bit characters to multibyte forms.  It is assured
1582    that we can use LEN bytes at STR as a work area and that is
1583    enough.  */
1584
1585 int
1586 str_to_multibyte (str, len, bytes)
1587      unsigned char *str;
1588      int len, bytes;
1589 {
1590   unsigned char *p = str, *endp = str + bytes;
1591   unsigned char *to;
1592
1593   while (p < endp && (*p < 0x80 || *p >= 0xA0)) p++;
1594   if (p == endp)
1595     return bytes;
1596   to = p;
1597   bytes = endp - p;
1598   endp = str + len;
1599   safe_bcopy (p, endp - bytes, bytes);
1600   p = endp - bytes;
1601   while (p < endp)
1602     {
1603       if (*p < 0x80 || *p >= 0xA0)
1604         *to++ = *p++;
1605       else
1606         *to++ = LEADING_CODE_8_BIT_CONTROL, *to++ = *p++ + 0x20;
1607     }
1608   return (to - str);
1609 }
1610
1611 /* Arrange multibyte text at STR of LEN bytes as a unibyte text.  It
1612    actually converts only 8-bit characters in the range 0x80..0x9F to
1613    unibyte forms.  */
1614
1615 int
1616 str_as_unibyte (str, bytes)
1617      unsigned char *str;
1618      int bytes;
1619 {
1620   unsigned char *p = str, *endp = str + bytes;
1621   unsigned char *to = str;
1622
1623   while (p < endp && *p != LEADING_CODE_8_BIT_CONTROL) p++;
1624   to = p;
1625   while (p < endp)
1626     {
1627       if (*p == LEADING_CODE_8_BIT_CONTROL)
1628         *to++ = *(p + 1) - 0x20, p += 2;
1629       else
1630         *to++ = *p++;
1631     }
1632   return (to - str);
1633 }
1634
1635 \f
1636 DEFUN ("string", Fstring, Sstring, 1, MANY, 0,
1637   doc: /* Concatenate all the argument characters and make the result a string.
1638 usage: (string &rest CHARACTERS)  */)
1639      (n, args)
1640      int n;
1641      Lisp_Object *args;
1642 {
1643   int i;
1644   unsigned char *buf = (unsigned char *) alloca (MAX_MULTIBYTE_LENGTH * n);
1645   unsigned char *p = buf;
1646   int c;
1647   int multibyte = 0;
1648
1649   for (i = 0; i < n; i++)
1650     {
1651       CHECK_NUMBER (args[i]);
1652       if (!multibyte && !SINGLE_BYTE_CHAR_P (XFASTINT (args[i])))
1653         multibyte = 1;
1654     }
1655
1656   for (i = 0; i < n; i++)
1657     {
1658       c = XINT (args[i]);
1659       if (multibyte)
1660         p += CHAR_STRING (c, p);
1661       else
1662         *p++ = c;
1663     }
1664
1665   return make_string_from_bytes (buf, n, p - buf);
1666 }
1667
1668 #endif /* emacs */
1669 \f
1670 int
1671 charset_id_internal (charset_name)
1672      char *charset_name;
1673 {
1674   Lisp_Object val;
1675
1676   val= Fget (intern (charset_name), Qcharset);
1677   if (!VECTORP (val))
1678     error ("Charset %s is not defined", charset_name);
1679
1680   return (XINT (XVECTOR (val)->contents[0]));
1681 }
1682
1683 DEFUN ("setup-special-charsets", Fsetup_special_charsets,
1684        Ssetup_special_charsets, 0, 0, 0, doc: /* Internal use only.  */)
1685      ()
1686 {
1687   charset_latin_iso8859_1 = charset_id_internal ("latin-iso8859-1");
1688   charset_jisx0208_1978 = charset_id_internal ("japanese-jisx0208-1978");
1689   charset_jisx0208 = charset_id_internal ("japanese-jisx0208");
1690   charset_katakana_jisx0201 = charset_id_internal ("katakana-jisx0201");
1691   charset_latin_jisx0201 = charset_id_internal ("latin-jisx0201");
1692   charset_big5_1 = charset_id_internal ("chinese-big5-1");
1693   charset_big5_2 = charset_id_internal ("chinese-big5-2");
1694   return Qnil;
1695 }
1696
1697 void
1698 init_charset_once ()
1699 {
1700   int i, j, k;
1701
1702   staticpro (&Vcharset_table);
1703   staticpro (&Vcharset_symbol_table);
1704   staticpro (&Vgeneric_character_list);
1705
1706   /* This has to be done here, before we call Fmake_char_table.  */
1707   Qcharset_table = intern ("charset-table");
1708   staticpro (&Qcharset_table);
1709
1710   /* Intern this now in case it isn't already done.
1711      Setting this variable twice is harmless.
1712      But don't staticpro it here--that is done in alloc.c.  */
1713   Qchar_table_extra_slots = intern ("char-table-extra-slots");
1714
1715   /* Now we are ready to set up this property, so we can
1716      create the charset table.  */
1717   Fput (Qcharset_table, Qchar_table_extra_slots, make_number (0));
1718   Vcharset_table = Fmake_char_table (Qcharset_table, Qnil);
1719
1720   Qunknown = intern ("unknown");
1721   staticpro (&Qunknown);
1722   Vcharset_symbol_table = Fmake_vector (make_number (MAX_CHARSET + 1),
1723                                         Qunknown);
1724
1725   /* Setup tables.  */
1726   for (i = 0; i < 2; i++)
1727     for (j = 0; j < 2; j++)
1728       for (k = 0; k < 128; k++)
1729         iso_charset_table [i][j][k] = -1;
1730
1731   for (i = 0; i < 256; i++)
1732     bytes_by_char_head[i] = 1;
1733   bytes_by_char_head[LEADING_CODE_PRIVATE_11] = 3;
1734   bytes_by_char_head[LEADING_CODE_PRIVATE_12] = 3;
1735   bytes_by_char_head[LEADING_CODE_PRIVATE_21] = 4;
1736   bytes_by_char_head[LEADING_CODE_PRIVATE_22] = 4;
1737
1738   for (i = 0; i < 128; i++)
1739     width_by_char_head[i] = 1;
1740   for (; i < 256; i++)
1741     width_by_char_head[i] = 4;
1742   width_by_char_head[LEADING_CODE_PRIVATE_11] = 1;
1743   width_by_char_head[LEADING_CODE_PRIVATE_12] = 2;
1744   width_by_char_head[LEADING_CODE_PRIVATE_21] = 1;
1745   width_by_char_head[LEADING_CODE_PRIVATE_22] = 2;
1746
1747   {
1748     Lisp_Object val;
1749
1750     val = Qnil;
1751     for (i = 0x81; i < 0x90; i++)
1752       val = Fcons (make_number ((i - 0x70) << 7), val);
1753     for (; i < 0x9A; i++)
1754       val = Fcons (make_number ((i - 0x8F) << 14), val);
1755     for (i = 0xA0; i < 0xF0; i++)
1756       val = Fcons (make_number ((i - 0x70) << 7), val);
1757     for (; i < 0xFF; i++)
1758       val = Fcons (make_number ((i - 0xE0) << 14), val);
1759     Vgeneric_character_list = Fnreverse (val);
1760   }
1761
1762   nonascii_insert_offset = 0;
1763   Vnonascii_translation_table = Qnil;
1764 }
1765
1766 #ifdef emacs
1767
1768 void
1769 syms_of_charset ()
1770 {
1771   Qcharset = intern ("charset");
1772   staticpro (&Qcharset);
1773
1774   Qascii = intern ("ascii");
1775   staticpro (&Qascii);
1776
1777   Qeight_bit_control = intern ("eight-bit-control");
1778   staticpro (&Qeight_bit_control);
1779
1780   Qeight_bit_graphic = intern ("eight-bit-graphic");
1781   staticpro (&Qeight_bit_graphic);
1782
1783   /* Define special charsets ascii, eight-bit-control, and
1784      eight-bit-graphic.  */
1785   update_charset_table (make_number (CHARSET_ASCII),
1786                         make_number (1), make_number (94),
1787                         make_number (1),
1788                         make_number (0),
1789                         make_number ('B'),
1790                         make_number (0),
1791                         build_string ("ASCII"),
1792                         Qnil,   /* same as above */
1793                         build_string ("ASCII (ISO646 IRV)"));
1794   CHARSET_SYMBOL (CHARSET_ASCII) = Qascii;
1795   Fput (Qascii, Qcharset, CHARSET_TABLE_ENTRY (CHARSET_ASCII));
1796
1797   update_charset_table (make_number (CHARSET_8_BIT_CONTROL),
1798                         make_number (1), make_number (96),
1799                         make_number (4),
1800                         make_number (0),
1801                         make_number (-1),
1802                         make_number (-1),
1803                         build_string ("8-bit control code (0x80..0x9F)"),
1804                         Qnil,   /* same as above */
1805                         Qnil);  /* same as above */
1806   CHARSET_SYMBOL (CHARSET_8_BIT_CONTROL) = Qeight_bit_control;
1807   Fput (Qeight_bit_control, Qcharset,
1808         CHARSET_TABLE_ENTRY (CHARSET_8_BIT_CONTROL));
1809
1810   update_charset_table (make_number (CHARSET_8_BIT_GRAPHIC),
1811                         make_number (1), make_number (96),
1812                         make_number (4),
1813                         make_number (0),
1814                         make_number (-1),
1815                         make_number (-1),
1816                         build_string ("8-bit graphic char (0xA0..0xFF)"),
1817                         Qnil,   /* same as above */
1818                         Qnil);  /* same as above */
1819   CHARSET_SYMBOL (CHARSET_8_BIT_GRAPHIC) = Qeight_bit_graphic;
1820   Fput (Qeight_bit_graphic, Qcharset,
1821         CHARSET_TABLE_ENTRY (CHARSET_8_BIT_GRAPHIC));
1822
1823   Qauto_fill_chars = intern ("auto-fill-chars");
1824   staticpro (&Qauto_fill_chars);
1825   Fput (Qauto_fill_chars, Qchar_table_extra_slots, make_number (0));
1826
1827   defsubr (&Sdefine_charset);
1828   defsubr (&Sgeneric_character_list);
1829   defsubr (&Sget_unused_iso_final_char);
1830   defsubr (&Sdeclare_equiv_charset);
1831   defsubr (&Sfind_charset_region);
1832   defsubr (&Sfind_charset_string);
1833   defsubr (&Smake_char_internal);
1834   defsubr (&Ssplit_char);
1835   defsubr (&Schar_charset);
1836   defsubr (&Scharset_after);
1837   defsubr (&Siso_charset);
1838   defsubr (&Schar_valid_p);
1839   defsubr (&Sunibyte_char_to_multibyte);
1840   defsubr (&Smultibyte_char_to_unibyte);
1841   defsubr (&Schar_bytes);
1842   defsubr (&Schar_width);
1843   defsubr (&Sstring_width);
1844   defsubr (&Schar_direction);
1845   defsubr (&Schars_in_region);
1846   defsubr (&Sstring);
1847   defsubr (&Ssetup_special_charsets);
1848
1849   DEFVAR_LISP ("charset-list", &Vcharset_list,
1850                doc: /* List of charsets ever defined.  */);
1851   Vcharset_list = Fcons (Qascii, Fcons (Qeight_bit_control,
1852                                         Fcons (Qeight_bit_graphic, Qnil)));
1853
1854   DEFVAR_LISP ("translation-table-vector",  &Vtranslation_table_vector,
1855                doc: /* Vector of cons cell of a symbol and translation table ever defined.
1856 An ID of a translation table is an index of this vector.  */);
1857   Vtranslation_table_vector = Fmake_vector (make_number (16), Qnil);
1858
1859   DEFVAR_INT ("leading-code-private-11", &leading_code_private_11,
1860               doc: /* Leading-code of private TYPE9N charset of column-width 1.  */);
1861   leading_code_private_11 = LEADING_CODE_PRIVATE_11;
1862
1863   DEFVAR_INT ("leading-code-private-12", &leading_code_private_12,
1864               doc: /* Leading-code of private TYPE9N charset of column-width 2.  */);
1865   leading_code_private_12 = LEADING_CODE_PRIVATE_12;
1866
1867   DEFVAR_INT ("leading-code-private-21", &leading_code_private_21,
1868               doc: /* Leading-code of private TYPE9Nx9N charset of column-width 1.  */);
1869   leading_code_private_21 = LEADING_CODE_PRIVATE_21;
1870
1871   DEFVAR_INT ("leading-code-private-22", &leading_code_private_22,
1872               doc: /* Leading-code of private TYPE9Nx9N charset of column-width 2.  */);
1873   leading_code_private_22 = LEADING_CODE_PRIVATE_22;
1874
1875   DEFVAR_INT ("nonascii-insert-offset", &nonascii_insert_offset,
1876               doc: /* Offset for converting non-ASCII unibyte codes 0240...0377 to multibyte.
1877 This is used for converting unibyte text to multibyte,
1878 and for inserting character codes specified by number.
1879
1880 This serves to convert a Latin-1 or similar 8-bit character code
1881 to the corresponding Emacs multibyte character code.
1882 Typically the value should be (- (make-char CHARSET 0) 128),
1883 for your choice of character set.
1884 If `nonascii-translation-table' is non-nil, it overrides this variable.  */);
1885   nonascii_insert_offset = 0;
1886
1887   DEFVAR_LISP ("nonascii-translation-table", &Vnonascii_translation_table,
1888                doc: /* Translation table to convert non-ASCII unibyte codes to multibyte.
1889 This is used for converting unibyte text to multibyte,
1890 and for inserting character codes specified by number.
1891
1892 Conversion is performed only when multibyte characters are enabled,
1893 and it serves to convert a Latin-1 or similar 8-bit character code
1894 to the corresponding Emacs character code.
1895
1896 If this is nil, `nonascii-insert-offset' is used instead.
1897 See also the docstring of `make-translation-table'.  */);
1898   Vnonascii_translation_table = Qnil;
1899
1900   DEFVAR_LISP ("auto-fill-chars", &Vauto_fill_chars,
1901                doc: /* A char-table for characters which invoke auto-filling.
1902 Such characters have value t in this table.  */);
1903   Vauto_fill_chars = Fmake_char_table (Qauto_fill_chars, Qnil);
1904   CHAR_TABLE_SET (Vauto_fill_chars, make_number (' '), Qt);
1905   CHAR_TABLE_SET (Vauto_fill_chars, make_number ('\n'), Qt);
1906 }
1907
1908 #endif /* emacs */