src/charset.c

   1 /* Basic multilingual character support.
   2    Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
   3    Licensed to the Free Software Foundation.
   4
   5 This file is part of GNU Emacs.
   6
   7 GNU Emacs is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 2, or (at your option)
  10 any later version.
  11
  12 GNU Emacs is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GNU Emacs; see the file COPYING.  If not, write to
  19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  20 Boston, MA 02111-1307, USA.  */
  21
  22 /* At first, see the document in `charset.h' to understand the code in
  23    this file.  */
  24
  25 #include <stdio.h>
  26
  27 #ifdef emacs
  28
  29 #include <sys/types.h>
  30 #include <config.h>
  31 #include "lisp.h"
  32 #include "buffer.h"
  33 #include "charset.h"
  34 #include "coding.h"
  35 #include "disptab.h"
  36
  37 #else  /* not emacs */
  38
  39 #include "mulelib.h"
  40
  41 #endif /* emacs */
  42
  43 Lisp_Object Qcharset, Qascii, Qcomposition;
  44
  45 /* Declaration of special leading-codes.  */
  46 int leading_code_composition;   /* for composite characters */
  47 int leading_code_private_11;    /* for private DIMENSION1 of 1-column */
  48 int leading_code_private_12;    /* for private DIMENSION1 of 2-column */
  49 int leading_code_private_21;    /* for private DIMENSION2 of 1-column */
  50 int leading_code_private_22;    /* for private DIMENSION2 of 2-column */
  51
  52 /* Declaration of special charsets.  */
  53 int charset_ascii;              /* ASCII */
  54 int charset_composition;        /* for a composite character */
  55 int charset_latin_iso8859_1;    /* ISO8859-1 (Latin-1) */
  56 int charset_jisx0208_1978;      /* JISX0208.1978 (Japanese Kanji old set) */
  57 int charset_jisx0208;           /* JISX0208.1983 (Japanese Kanji) */
  58 int charset_katakana_jisx0201;  /* JISX0201.Kana (Japanese Katakana) */
  59 int charset_latin_jisx0201;     /* JISX0201.Roman (Japanese Roman) */
  60 int charset_big5_1;             /* Big5 Level 1 (Chinese Traditional) */
  61 int charset_big5_2;             /* Big5 Level 2 (Chinese Traditional) */
  62
  63 int min_composite_char;
  64
  65 Lisp_Object Qcharset_table;
  66
  67 /* A char-table containing information of each character set.  */
  68 Lisp_Object Vcharset_table;
  69
  70 /* A vector of charset symbol indexed by charset-id.  This is used
  71    only for returning charset symbol from C functions.  */
  72 Lisp_Object Vcharset_symbol_table;
  73
  74 /* A list of charset symbols ever defined.  */
  75 Lisp_Object Vcharset_list;
  76
  77 /* Vector of unification table ever defined.
  78    An ID of a unification table is an index of this vector.  */
  79 Lisp_Object Vcharacter_unification_table_vector;
  80
  81 /* Tables used by macros BYTES_BY_CHAR_HEAD and WIDTH_BY_CHAR_HEAD.  */
  82 int bytes_by_char_head[256];
  83 int width_by_char_head[256];
  84
  85 /* Mapping table from ISO2022's charset (specified by DIMENSION,
  86    CHARS, and FINAL-CHAR) to Emacs' charset.  */
  87 int iso_charset_table[2][2][128];
  88
  89 /* Table of pointers to the structure `cmpchar_info' indexed by
  90    CMPCHAR-ID.  */
  91 struct cmpchar_info **cmpchar_table;
  92 /* The current size of `cmpchar_table'.  */
  93 static int cmpchar_table_size;
  94 /* Number of the current composite characters.  */
  95 int n_cmpchars;
  96
  97 /* Variables used locally in the macro FETCH_MULTIBYTE_CHAR.  */
  98 unsigned char *_fetch_multibyte_char_p;
  99 int _fetch_multibyte_char_len;
 100
 101 /* Offset to add to a non-ASCII value when inserting it.  */
 102 int nonascii_insert_offset;
 103
 104 /* Translation table for converting non-ASCII unibyte characters
 105    to multibyte codes, or nil.  */
 106 Lisp_Object Vnonascii_translate_table;
 107
 108 #define min(X, Y) ((X) < (Y) ? (X) : (Y))
 109 #define max(X, Y) ((X) > (Y) ? (X) : (Y))
 110 \f
 111 void
 112 invalid_character (c)
 113      int c;
 114 {
 115   error ("Invalid character: %o, %d, 0x%x", c);
 116 }
 117
 118
 119 /* Set STR a pointer to the multi-byte form of the character C.  If C
 120    is not a composite character, the multi-byte form is set in WORKBUF
 121    and STR points WORKBUF.  The caller should allocate at least 4-byte
 122    area at WORKBUF in advance.  Returns the length of the multi-byte
 123    form.  If C is an invalid character to have a multi-byte form,
 124    signal an error.
 125
 126    Use macro `CHAR_STRING (C, WORKBUF, STR)' instead of calling this
 127    function directly if C can be an ASCII character.  */
 128
 129 int
 130 non_ascii_char_to_string (c, workbuf, str)
 131      int c;
 132      unsigned char *workbuf, **str;
 133 {
 134   int charset, c1, c2;
 135
 136   if (COMPOSITE_CHAR_P (c))
 137     {
 138       int cmpchar_id = COMPOSITE_CHAR_ID (c);
 139
 140       if (cmpchar_id < n_cmpchars)
 141         {
 142           *str = cmpchar_table[cmpchar_id]->data;
 143           return cmpchar_table[cmpchar_id]->len;
 144         }
 145       else
 146         {
 147           invalid_character (c);
 148         }
 149     }
 150
 151   SPLIT_NON_ASCII_CHAR (c, charset, c1, c2);
 152   if (!charset
 153       || ! CHARSET_DEFINED_P (charset)
 154       || c1 >= 0 && c1 < 32
 155       || c2 >= 0 && c2 < 32)
 156     invalid_character (c);
 157
 158   *str = workbuf;
 159   *workbuf++ = CHARSET_LEADING_CODE_BASE (charset);
 160   if (*workbuf = CHARSET_LEADING_CODE_EXT (charset))
 161     workbuf++;
 162   *workbuf++ = c1 | 0x80;
 163   if (c2 >= 0)
 164     *workbuf++ = c2 | 0x80;
 165
 166   return (workbuf - *str);
 167 }
 168
 169 /* Return a non-ASCII character of which multi-byte form is at STR of
 170    length LEN.  If ACTUAL_LEN is not NULL, the actual length of the
 171    character is set to the address ACTUAL_LEN.
 172
 173    Use macro `STRING_CHAR (STR, LEN)' instead of calling this function
 174    directly if STR can hold an ASCII character.  */
 175
 176 string_to_non_ascii_char (str, len, actual_len)
 177      const unsigned char *str;
 178      int len, *actual_len;
 179 {
 180   int charset;
 181   unsigned char c1, c2;
 182   register int c, bytes;
 183
 184   c = *str;
 185   bytes = 1;
 186
 187   if (BASE_LEADING_CODE_P (c))
 188     {
 189       while (bytes < len && ! CHAR_HEAD_P (str[bytes])) bytes++;
 190
 191       if (c == LEADING_CODE_COMPOSITION)
 192         {
 193           int cmpchar_id = str_cmpchar_id (str, bytes);
 194
 195           if (cmpchar_id >= 0)
 196             c = MAKE_COMPOSITE_CHAR (cmpchar_id);
 197         }
 198       else
 199         {
 200           int charset = c, c1, c2 = 0;
 201
 202           str++;
 203           if (c >= LEADING_CODE_PRIVATE_11)
 204             charset = *str++;
 205           if (BYTES_BY_CHAR_HEAD (c) <= bytes && CHARSET_DEFINED_P (charset))
 206             {
 207               c1 = *str++ & 0x7f;
 208               if (CHARSET_DIMENSION (charset) == 2)
 209                 c2 = *str & 0x7F;
 210               c = MAKE_NON_ASCII_CHAR (charset, c1, c2);
 211             }
 212         }
 213     }
 214
 215   if (actual_len)
 216     *actual_len = bytes;
 217   return c;
 218 }
 219
 220 /* Return the length of the multi-byte form at string STR of length LEN.  */
 221 int
 222 multibyte_form_length (str, len)
 223      const unsigned char *str;
 224      int len;
 225 {
 226   int bytes = 1;
 227
 228   if (BASE_LEADING_CODE_P (*str))
 229     while (bytes < len && ! CHAR_HEAD_P (str[bytes])) bytes++;
 230
 231   return bytes;
 232 }
 233
 234 /* Check if string STR of length LEN contains valid multi-byte form of
 235    a character.  If valid, charset and position codes of the character
 236    is set at *CHARSET, *C1, and *C2, and return 0.  If not valid,
 237    return -1.  This should be used only in the macro SPLIT_STRING
 238    which checks range of STR in advance.  */
 239
 240 split_non_ascii_string (str, len, charset, c1, c2)
 241      register const unsigned char *str;
 242      register unsigned char *c1, *c2;
 243      register int len, *charset;
 244 {
 245   register unsigned int cs = *str++;
 246
 247   if (cs == LEADING_CODE_COMPOSITION)
 248     {
 249       int cmpchar_id = str_cmpchar_id (str - 1, len);
 250
 251       if (cmpchar_id < 0)
 252         return -1;
 253       *charset = cs, *c1 = cmpchar_id >> 7, *c2 = cmpchar_id & 0x7F;
 254     }
 255   else if ((cs < LEADING_CODE_PRIVATE_11 || (cs = *str++) >= 0xA0)
 256            && CHARSET_DEFINED_P (cs))
 257     {
 258       *charset = cs;
 259       if (*str < 0xA0)
 260         return -1;
 261       *c1 = (*str++) & 0x7F;
 262       if (CHARSET_DIMENSION (cs) == 2)
 263         {
 264           if (*str < 0xA0)
 265             return -1;
 266           *c2 = (*str++) & 0x7F;
 267         }
 268     }
 269   else
 270     return -1;
 271   return 0;
 272 }
 273
 274 /* Return a character unified with C (or a character made of CHARSET,
 275    C1, and C2) in unification table TABLE.  If no unification is found
 276    in TABLE, return C.  */
 277 unify_char (table, c, charset, c1, c2)
 278      Lisp_Object table;
 279      int c, charset, c1, c2;
 280 {
 281   Lisp_Object ch;
 282   int alt_charset, alt_c1, alt_c2, dimension;
 283
 284   if (c < 0) c = MAKE_CHAR (charset, c1, c2);
 285   if (!CHAR_TABLE_P (table)
 286       || (ch = Faref (table, make_number (c)), !INTEGERP (ch))
 287       || XINT (ch) < 0)
 288     return c;
 289
 290   SPLIT_CHAR (XFASTINT (ch), alt_charset, alt_c1, alt_c2);
 291   dimension = CHARSET_DIMENSION (alt_charset);
 292   if (dimension == 1 && alt_c1 > 0 || dimension == 2 && alt_c2 > 0)
 293     /* CH is not a generic character, just return it.  */
 294     return XFASTINT (ch);
 295
 296   /* Since CH is a generic character, we must return a specific
 297      charater which has the same position codes as C from CH.  */
 298   if (charset < 0)
 299     SPLIT_CHAR (c, charset, c1, c2);
 300   if (dimension != CHARSET_DIMENSION (charset))
 301     /* We can't make such a character because of dimension mismatch.  */
 302     return c;
 303   return MAKE_CHAR (alt_charset, c1, c2);
 304 }
 305
 306 /* Convert the unibyte character C to multibyte based on
 307    Vnonascii_translate_table or nonascii_insert_offset.  If they can't
 308    convert C to a valid multibyte character, convert it based on
 309    DEFAULT_NONASCII_INSERT_OFFSET which makes C a Latin-1 character.  */
 310
 311 unibyte_char_to_multibyte (c)
 312      int c;
 313 {
 314   if (c >= 0240 && c < 0400)
 315     {
 316       int c_save = c;
 317
 318       if (! NILP (Vnonascii_translate_table))
 319         c = XINT (Faref (Vnonascii_translate_table, make_number (c)));
 320       else if (nonascii_insert_offset > 0)
 321         c += nonascii_insert_offset;
 322       if (c >= 0240 && (c < 0400 || ! VALID_MULTIBYTE_CHAR_P (c)))
 323         c = c_save + DEFAULT_NONASCII_INSERT_OFFSET;
 324     }
 325   return c;
 326 }
 327 \f
 328 /* Update the table Vcharset_table with the given arguments (see the
 329    document of `define-charset' for the meaning of each argument).
 330    Several other table contents are also updated.  The caller should
 331    check the validity of CHARSET-ID and the remaining arguments in
 332    advance.  */
 333
 334 void
 335 update_charset_table (charset_id, dimension, chars, width, direction,
 336                       iso_final_char, iso_graphic_plane,
 337                       short_name, long_name, description)
 338      Lisp_Object charset_id, dimension, chars, width, direction;
 339      Lisp_Object iso_final_char, iso_graphic_plane;
 340      Lisp_Object short_name, long_name, description;
 341 {
 342   int charset = XINT (charset_id);
 343   int bytes;
 344   unsigned char leading_code_base, leading_code_ext;
 345
 346   if (NILP (CHARSET_TABLE_ENTRY (charset)))
 347     CHARSET_TABLE_ENTRY (charset)
 348       = Fmake_vector (make_number (CHARSET_MAX_IDX), Qnil);
 349
 350   /* Get byte length of multibyte form, base leading-code, and
 351      extended leading-code of the charset.  See the comment under the
 352      title "GENERAL NOTE on CHARACTER SET (CHARSET)" in charset.h.  */
 353   bytes = XINT (dimension);
 354   if (charset < MIN_CHARSET_PRIVATE_DIMENSION1)
 355     {
 356       /* Official charset, it doesn't have an extended leading-code.  */
 357       if (charset != CHARSET_ASCII)
 358         bytes += 1; /* For a base leading-code.  */
 359       leading_code_base = charset;
 360       leading_code_ext = 0;
 361     }
 362   else
 363     {
 364       /* Private charset.  */
 365       bytes += 2; /* For base and extended leading-codes.  */
 366       leading_code_base
 367         = (charset < LEADING_CODE_EXT_12
 368            ? LEADING_CODE_PRIVATE_11
 369            : (charset < LEADING_CODE_EXT_21
 370               ? LEADING_CODE_PRIVATE_12
 371               : (charset < LEADING_CODE_EXT_22
 372                  ? LEADING_CODE_PRIVATE_21
 373                  : LEADING_CODE_PRIVATE_22)));
 374       leading_code_ext = charset;
 375     }
 376
 377   CHARSET_TABLE_INFO (charset, CHARSET_ID_IDX) = charset_id;
 378   CHARSET_TABLE_INFO (charset, CHARSET_BYTES_IDX) = make_number (bytes);
 379   CHARSET_TABLE_INFO (charset, CHARSET_DIMENSION_IDX) = dimension;
 380   CHARSET_TABLE_INFO (charset, CHARSET_CHARS_IDX) = chars;
 381   CHARSET_TABLE_INFO (charset, CHARSET_WIDTH_IDX) = width;
 382   CHARSET_TABLE_INFO (charset, CHARSET_DIRECTION_IDX) = direction;
 383   CHARSET_TABLE_INFO (charset, CHARSET_LEADING_CODE_BASE_IDX)
 384     = make_number (leading_code_base);
 385   CHARSET_TABLE_INFO (charset, CHARSET_LEADING_CODE_EXT_IDX)
 386     = make_number (leading_code_ext);
 387   CHARSET_TABLE_INFO (charset, CHARSET_ISO_FINAL_CHAR_IDX) = iso_final_char;
 388   CHARSET_TABLE_INFO (charset, CHARSET_ISO_GRAPHIC_PLANE_IDX)
 389     = iso_graphic_plane;
 390   CHARSET_TABLE_INFO (charset, CHARSET_SHORT_NAME_IDX) = short_name;
 391   CHARSET_TABLE_INFO (charset, CHARSET_LONG_NAME_IDX) = long_name;
 392   CHARSET_TABLE_INFO (charset, CHARSET_DESCRIPTION_IDX) = description;
 393   CHARSET_TABLE_INFO (charset, CHARSET_PLIST_IDX) = Qnil;
 394
 395   {
 396     /* If we have already defined a charset which has the same
 397        DIMENSION, CHARS and ISO-FINAL-CHAR but the different
 398        DIRECTION, we must update the entry REVERSE-CHARSET of both
 399        charsets.  If there's no such charset, the value of the entry
 400        is set to nil.  */
 401     int i;
 402
 403     for (i = 0; i <= MAX_CHARSET; i++)
 404       if (!NILP (CHARSET_TABLE_ENTRY (i)))
 405         {
 406           if (CHARSET_DIMENSION (i) == XINT (dimension)
 407               && CHARSET_CHARS (i) == XINT (chars)
 408               && CHARSET_ISO_FINAL_CHAR (i) == XINT (iso_final_char)
 409               && CHARSET_DIRECTION (i) != XINT (direction))
 410             {
 411               CHARSET_TABLE_INFO (charset, CHARSET_REVERSE_CHARSET_IDX)
 412                 = make_number (i);
 413               CHARSET_TABLE_INFO (i, CHARSET_REVERSE_CHARSET_IDX) = charset_id;
 414               break;
 415             }
 416         }
 417     if (i > MAX_CHARSET)
 418       /* No such a charset.  */
 419       CHARSET_TABLE_INFO (charset, CHARSET_REVERSE_CHARSET_IDX)
 420         = make_number (-1);
 421   }
 422
 423   if (charset != CHARSET_ASCII
 424       && charset < MIN_CHARSET_PRIVATE_DIMENSION1)
 425     {
 426       /* Update tables bytes_by_char_head and width_by_char_head.  */
 427       bytes_by_char_head[leading_code_base] = bytes;
 428       width_by_char_head[leading_code_base] = XINT (width);
 429
 430       /* Update table emacs_code_class.  */
 431       emacs_code_class[charset] = (bytes == 2
 432                                    ? EMACS_leading_code_2
 433                                    : (bytes == 3
 434                                       ? EMACS_leading_code_3
 435                                       : EMACS_leading_code_4));
 436     }
 437
 438   /* Update table iso_charset_table.  */
 439   if (ISO_CHARSET_TABLE (dimension, chars, iso_final_char) < 0)
 440     ISO_CHARSET_TABLE (dimension, chars, iso_final_char) = charset;
 441 }
 442
 443 #ifdef emacs
 444
 445 /* Return charset id of CHARSET_SYMBOL, or return -1 if CHARSET_SYMBOL
 446    is invalid.  */
 447 int
 448 get_charset_id (charset_symbol)
 449      Lisp_Object charset_symbol;
 450 {
 451   Lisp_Object val;
 452   int charset;
 453
 454   return ((SYMBOLP (charset_symbol)
 455            && (val = Fget (charset_symbol, Qcharset), VECTORP (val))
 456            && (charset = XINT (XVECTOR (val)->contents[CHARSET_ID_IDX]),
 457                CHARSET_VALID_P (charset)))
 458           ? charset : -1);
 459 }
 460
 461 /* Return an identification number for a new private charset of
 462    DIMENSION and WIDTH.  If there's no more room for the new charset,
 463    return 0.  */
 464 Lisp_Object
 465 get_new_private_charset_id (dimension, width)
 466      int dimension, width;
 467 {
 468   int charset, from, to;
 469
 470   if (dimension == 1)
 471     {
 472       if (width == 1)
 473         from = LEADING_CODE_EXT_11, to = LEADING_CODE_EXT_12;
 474       else
 475         from = LEADING_CODE_EXT_12, to = LEADING_CODE_EXT_21;
 476     }
 477   else
 478     {
 479       if (width == 1)
 480         from = LEADING_CODE_EXT_21, to = LEADING_CODE_EXT_22;
 481       else
 482         from = LEADING_CODE_EXT_22, to = LEADING_CODE_EXT_MAX + 1;
 483     }
 484
 485   for (charset = from; charset < to; charset++)
 486     if (!CHARSET_DEFINED_P (charset)) break;
 487
 488   return make_number (charset < to ? charset : 0);
 489 }
 490
 491 DEFUN ("define-charset", Fdefine_charset, Sdefine_charset, 3, 3, 0,
 492   "Define CHARSET-ID as the identification number of CHARSET with INFO-VECTOR.\n\
 493 If CHARSET-ID is nil, it is decided automatically, which means CHARSET is\n\
 494  treated as a private charset.\n\
 495 INFO-VECTOR is a vector of the format:\n\
 496    [DIMENSION CHARS WIDTH DIRECTION ISO-FINAL-CHAR ISO-GRAPHIC-PLANE\n\
 497     SHORT-NAME LONG-NAME DESCRIPTION]\n\
 498 The meanings of each elements is as follows:\n\
 499 DIMENSION (integer) is the number of bytes to represent a character: 1 or 2.\n\
 500 CHARS (integer) is the number of characters in a dimension: 94 or 96.\n\
 501 WIDTH (integer) is the number of columns a character in the charset\n\
 502 occupies on the screen: one of 0, 1, and 2.\n\
 503 \n\
 504 DIRECTION (integer) is the rendering direction of characters in the\n\
 505 charset when rendering.  If 0, render from right to left, else\n\
 506 render from left to right.\n\
 507 \n\
 508 ISO-FINAL-CHAR (character) is the final character of the\n\
 509 corresponding ISO 2022 charset.\n\
 510 \n\
 511 ISO-GRAPHIC-PLANE (integer) is the graphic plane to be invoked\n\
 512 while encoding to variants of ISO 2022 coding system, one of the\n\
 513 following: 0/graphic-plane-left(GL), 1/graphic-plane-right(GR).\n\
 514 \n\
 515 SHORT-NAME (string) is the short name to refer to the charset.\n\
 516 \n\
 517 LONG-NAME (string) is the long name to refer to the charset.\n\
 518 \n\
 519 DESCRIPTION (string) is the description string of the charset.")
 520   (charset_id, charset_symbol, info_vector)
 521      Lisp_Object charset_id, charset_symbol, info_vector;
 522 {
 523   Lisp_Object *vec;
 524
 525   if (!NILP (charset_id))
 526     CHECK_NUMBER (charset_id, 0);
 527   CHECK_SYMBOL (charset_symbol, 1);
 528   CHECK_VECTOR (info_vector, 2);
 529
 530   if (! NILP (charset_id))
 531     {
 532       if (! CHARSET_VALID_P (XINT (charset_id)))
 533         error ("Invalid CHARSET: %d", XINT (charset_id));
 534       else if (CHARSET_DEFINED_P (XINT (charset_id)))
 535         error ("Already defined charset: %d", XINT (charset_id));
 536     }
 537
 538   vec = XVECTOR (info_vector)->contents;
 539   if (XVECTOR (info_vector)->size != 9
 540       || !INTEGERP (vec[0]) || !(XINT (vec[0]) == 1 || XINT (vec[0]) == 2)
 541       || !INTEGERP (vec[1]) || !(XINT (vec[1]) == 94 || XINT (vec[1]) == 96)
 542       || !INTEGERP (vec[2]) || !(XINT (vec[2]) == 1 || XINT (vec[2]) == 2)
 543       || !INTEGERP (vec[3]) || !(XINT (vec[3]) == 0 || XINT (vec[3]) == 1)
 544       || !INTEGERP (vec[4]) || !(XINT (vec[4]) >= '0' && XINT (vec[4]) <= '~')
 545       || !INTEGERP (vec[5]) || !(XINT (vec[5]) == 0 || XINT (vec[5]) == 1)
 546       || !STRINGP (vec[6])
 547       || !STRINGP (vec[7])
 548       || !STRINGP (vec[8]))
 549     error ("Invalid info-vector argument for defining charset %s",
 550            XSYMBOL (charset_symbol)->name->data);
 551
 552   if (NILP (charset_id))
 553     {
 554       charset_id = get_new_private_charset_id (XINT (vec[0]), XINT (vec[2]));
 555       if (XINT (charset_id) == 0)
 556         error ("There's no room for a new private charset %s",
 557                XSYMBOL (charset_symbol)->name->data);
 558     }
 559
 560   update_charset_table (charset_id, vec[0], vec[1], vec[2], vec[3],
 561                         vec[4], vec[5], vec[6], vec[7], vec[8]);
 562   Fput (charset_symbol, Qcharset, CHARSET_TABLE_ENTRY (XINT (charset_id)));
 563   CHARSET_SYMBOL (XINT (charset_id)) = charset_symbol;
 564   Vcharset_list = Fcons (charset_symbol, Vcharset_list);
 565   return Qnil;
 566 }
 567
 568 DEFUN ("get-unused-iso-final-char", Fget_unused_iso_final_char,
 569        Sget_unused_iso_final_char, 2, 2, 0,
 570   "Return an unsed ISO's final char for a charset of DIMENISION and CHARS.\n\
 571 DIMENSION is the number of bytes to represent a character: 1 or 2.\n\
 572 CHARS is the number of characters in a dimension: 94 or 96.\n\
 573 \n\
 574 This final char is for private use, thus the range is `0' (48) .. `?' (63).\n\
 575 If there's no unused final char for the specified kind of charset,\n\
 576 return nil.")
 577   (dimension, chars)
 578      Lisp_Object dimension, chars;
 579 {
 580   int final_char;
 581
 582   CHECK_NUMBER (dimension, 0);
 583   CHECK_NUMBER (chars, 1);
 584   if (XINT (dimension) != 1 && XINT (dimension) != 2)
 585     error ("Invalid charset dimension %d, it should be 1 or 2",
 586            XINT (dimension));
 587   if (XINT (chars) != 94 && XINT (chars) != 96)
 588     error ("Invalid charset chars %d, it should be 94 or 96",
 589            XINT (chars));
 590   for (final_char = '0'; final_char <= '?'; final_char++)
 591     {
 592       if (ISO_CHARSET_TABLE (dimension, chars, make_number (final_char)) < 0)
 593         break;
 594     }
 595   return (final_char <= '?' ? make_number (final_char) : Qnil);
 596 }
 597
 598 DEFUN ("declare-equiv-charset", Fdeclare_equiv_charset, Sdeclare_equiv_charset,
 599        4, 4, 0,
 600   "Declare a charset of DIMENSION, CHARS, FINAL-CHAR is the same as CHARSET.\n\
 601 CHARSET should be defined by `defined-charset' in advance.")
 602   (dimension, chars, final_char, charset_symbol)
 603      Lisp_Object dimension, chars, final_char, charset_symbol;
 604 {
 605   int charset;
 606
 607   CHECK_NUMBER (dimension, 0);
 608   CHECK_NUMBER (chars, 1);
 609   CHECK_NUMBER (final_char, 2);
 610   CHECK_SYMBOL (charset_symbol, 3);
 611
 612   if (XINT (dimension) != 1 && XINT (dimension) != 2)
 613     error ("Invalid DIMENSION %d, it should be 1 or 2", XINT (dimension));
 614   if (XINT (chars) != 94 && XINT (chars) != 96)
 615     error ("Invalid CHARS %d, it should be 94 or 96", XINT (chars));
 616   if (XINT (final_char) < '0' || XFASTINT (final_char) > '~')
 617     error ("Invalid FINAL-CHAR %c, it should be `0'..`~'", XINT (chars));
 618   if ((charset = get_charset_id (charset_symbol)) < 0)
 619     error ("Invalid charset %s", XSYMBOL (charset_symbol)->name->data);
 620
 621   ISO_CHARSET_TABLE (dimension, chars, final_char) = charset;
 622   return Qnil;
 623 }
 624
 625 /* Return number of different charsets in STR of length LEN.  In
 626    addition, for each found charset N, CHARSETS[N] is set 1.  The
 627    caller should allocate CHARSETS (MAX_CHARSET + 1 elements) in advance.
 628    It may lookup a unification table TABLE if supplied.  */
 629
 630 int
 631 find_charset_in_str (str, len, charsets, table)
 632      unsigned char *str;
 633      int len, *charsets;
 634      Lisp_Object table;
 635 {
 636   register int num = 0, c;
 637
 638   if (! CHAR_TABLE_P (table))
 639     table = Qnil;
 640
 641   while (len > 0)
 642     {
 643       int bytes, charset;
 644       c = *str;
 645
 646       if (c == LEADING_CODE_COMPOSITION)
 647         {
 648           int cmpchar_id = str_cmpchar_id (str, len);
 649           GLYPH *glyph;
 650
 651           if (cmpchar_id > 0)
 652             {
 653               struct cmpchar_info *cmpcharp = cmpchar_table[cmpchar_id];
 654               int i;
 655
 656               for (i = 0; i < cmpcharp->glyph_len; i++)
 657                 {
 658                   c = cmpcharp->glyph[i];
 659                   if (!NILP (table))
 660                     {
 661                       if ((c = unify_char (table, c, 0, 0, 0)) < 0)
 662                         c = cmpcharp->glyph[i];
 663                     }
 664                   if ((charset = CHAR_CHARSET (c)) < 0)
 665                     charset = CHARSET_ASCII;
 666                   if (!charsets[charset])
 667                     {
 668                       charsets[charset] = 1;
 669                       num += 1;
 670                     }
 671                 }
 672               str += cmpcharp->len;
 673               len -= cmpcharp->len;
 674               continue;
 675             }
 676
 677           charset = CHARSET_ASCII;
 678           bytes = 1;
 679         }
 680       else
 681         {
 682           c = STRING_CHAR_AND_LENGTH (str, len, bytes);
 683           if (! NILP (table))
 684             {
 685               int c1 = unify_char (table, c, 0, 0, 0);
 686               if (c1 >= 0)
 687                 c = c1;
 688             }
 689           charset = CHAR_CHARSET (c);
 690         }
 691
 692       if (!charsets[charset])
 693         {
 694           charsets[charset] = 1;
 695           num += 1;
 696         }
 697       str += bytes;
 698       len -= bytes;
 699     }
 700   return num;
 701 }
 702
 703 DEFUN ("find-charset-region", Ffind_charset_region, Sfind_charset_region,
 704        2, 3, 0,
 705   "Return a list of charsets in the region between BEG and END.\n\
 706 BEG and END are buffer positions.\n\
 707 Optional arg TABLE if non-nil is a unification table to look up.")
 708   (beg, end, table)
 709      Lisp_Object beg, end, table;
 710 {
 711   int charsets[MAX_CHARSET + 1];
 712   int from, from_byte, to, stop, stop_byte, i;
 713   Lisp_Object val;
 714
 715   validate_region (&beg, &end);
 716   from = XFASTINT (beg);
 717   stop = to = XFASTINT (end);
 718
 719   if (from < GPT && GPT < to)
 720     {
 721       stop = GPT;
 722       stop_byte = GPT_BYTE;
 723     }
 724   else
 725     stop_byte = CHAR_TO_BYTE (stop);
 726
 727   from_byte = CHAR_TO_BYTE (from);
 728
 729   bzero (charsets, (MAX_CHARSET + 1) * sizeof (int));
 730   while (1)
 731     {
 732       find_charset_in_str (BYTE_POS_ADDR (from_byte), stop_byte - from_byte,
 733                            charsets, table);
 734       if (stop < to)
 735         {
 736           from = stop, from_byte = stop_byte;
 737           stop = to, stop_byte = CHAR_TO_BYTE (stop);
 738         }
 739       else
 740         break;
 741     }
 742
 743   val = Qnil;
 744   for (i = MAX_CHARSET; i >= 0; i--)
 745     if (charsets[i])
 746       val = Fcons (CHARSET_SYMBOL (i), val);
 747   return val;
 748 }
 749
 750 DEFUN ("find-charset-string", Ffind_charset_string, Sfind_charset_string,
 751        1, 2, 0,
 752   "Return a list of charsets in STR.\n\
 753 Optional arg TABLE if non-nil is a unification table to look up.")
 754   (str, table)
 755      Lisp_Object str, table;
 756 {
 757   int charsets[MAX_CHARSET + 1];
 758   int i;
 759   Lisp_Object val;
 760
 761   CHECK_STRING (str, 0);
 762
 763   if (! STRING_MULTIBYTE (str))
 764     return Qnil;
 765
 766   bzero (charsets, (MAX_CHARSET + 1) * sizeof (int));
 767   find_charset_in_str (XSTRING (str)->data, STRING_BYTES (XSTRING (str)),
 768                        charsets, table);
 769   val = Qnil;
 770   for (i = MAX_CHARSET; i >= 0; i--)
 771     if (charsets[i])
 772       val = Fcons (CHARSET_SYMBOL (i), val);
 773   return val;
 774 }
 775 \f
 776 DEFUN ("make-char-internal", Fmake_char_internal, Smake_char_internal, 1, 3, 0,
 777   "")
 778   (charset, code1, code2)
 779      Lisp_Object charset, code1, code2;
 780 {
 781   CHECK_NUMBER (charset, 0);
 782
 783   if (NILP (code1))
 784     XSETFASTINT (code1, 0);
 785   else
 786     CHECK_NUMBER (code1, 1);
 787   if (NILP (code2))
 788     XSETFASTINT (code2, 0);
 789   else
 790     CHECK_NUMBER (code2, 2);
 791
 792   if (!CHARSET_DEFINED_P (XINT (charset)))
 793     error ("Invalid charset: %d", XINT (charset));
 794
 795   return make_number (MAKE_CHAR (XINT (charset), XINT (code1), XINT (code2)));
 796 }
 797
 798 DEFUN ("split-char", Fsplit_char, Ssplit_char, 1, 1, 0,
 799   "Return list of charset and one or two position-codes of CHAR.")
 800   (ch)
 801      Lisp_Object ch;
 802 {
 803   Lisp_Object val;
 804   int charset, c1, c2;
 805
 806   CHECK_NUMBER (ch, 0);
 807   SPLIT_CHAR (XFASTINT (ch), charset, c1, c2);
 808   return (c2 >= 0
 809           ? Fcons (CHARSET_SYMBOL (charset),
 810                    Fcons (make_number (c1), Fcons (make_number (c2), Qnil)))
 811           : Fcons (CHARSET_SYMBOL (charset), Fcons (make_number (c1), Qnil)));
 812 }
 813
 814 DEFUN ("char-charset", Fchar_charset, Schar_charset, 1, 1, 0,
 815   "Return charset of CHAR.")
 816   (ch)
 817      Lisp_Object ch;
 818 {
 819   CHECK_NUMBER (ch, 0);
 820
 821   return CHARSET_SYMBOL (CHAR_CHARSET (XINT (ch)));
 822 }
 823
 824 DEFUN ("charset-after", Fcharset_after, Scharset_after, 0, 1, 0,
 825   "Return charset of a character in current buffer at position POS.\n\
 826 If POS is nil, it defauls to the current point.")
 827   (pos)
 828      Lisp_Object pos;
 829 {
 830   register int pos_byte, c, charset;
 831   register unsigned char *p;
 832
 833   if (NILP (pos))
 834     pos_byte = PT_BYTE;
 835   else if (MARKERP (pos))
 836     pos_byte = marker_byte_position (pos);
 837   else
 838     {
 839       CHECK_NUMBER (pos, 0);
 840       pos_byte = CHAR_TO_BYTE (XINT (pos));
 841     }
 842   p = BYTE_POS_ADDR (pos_byte);
 843   c = STRING_CHAR (p, Z_BYTE - pos_byte);
 844   charset = CHAR_CHARSET (c);
 845   return CHARSET_SYMBOL (charset);
 846 }
 847
 848 DEFUN ("iso-charset", Fiso_charset, Siso_charset, 3, 3, 0,
 849   "Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR.\n\
 850 \n\
 851 ISO 2022's designation sequence (escape sequence) distinguishes charsets\n\
 852 by their DIMENSION, CHARS, and FINAL-CHAR,\n\
 853 where as Emacs distinguishes them by charset symbol.\n\
 854 See the documentation of the function `charset-info' for the meanings of\n\
 855 DIMENSION, CHARS, and FINAL-CHAR.")
 856   (dimension, chars, final_char)
 857      Lisp_Object dimension, chars, final_char;
 858 {
 859   int charset;
 860
 861   CHECK_NUMBER (dimension, 0);
 862   CHECK_NUMBER (chars, 1);
 863   CHECK_NUMBER (final_char, 2);
 864
 865   if ((charset = ISO_CHARSET_TABLE (dimension, chars, final_char)) < 0)
 866     return Qnil;
 867   return CHARSET_SYMBOL (charset);
 868 }
 869
 870 /* If GENERICP is nonzero, return nonzero iff C is a valid normal or
 871    generic character.  If GENERICP is zero, return nonzero iff C is a
 872    valid normal character.  Do not call this function directly,
 873    instead use macro CHAR_VALID_P.  */
 874 int
 875 char_valid_p (c, genericp)
 876      int c, genericp;
 877 {
 878   int charset, c1, c2;
 879
 880   if (c < 0)
 881     return 0;
 882   if (SINGLE_BYTE_CHAR_P (c))
 883     return 1;
 884   SPLIT_NON_ASCII_CHAR (c, charset, c1, c2);
 885   if (!CHARSET_VALID_P (charset))
 886     return 0;
 887   return (c < MIN_CHAR_COMPOSITION
 888           ? ((c & CHAR_FIELD1_MASK) /* i.e. dimension of C is two.  */
 889              ? (genericp && c1 == 0 && c2 == 0
 890                 || c1 >= 32 && c2 >= 32)
 891              : (genericp && c1 == 0
 892                 || c1 >= 32))
 893           : c < MIN_CHAR_COMPOSITION + n_cmpchars);
 894 }
 895
 896 DEFUN ("char-valid-p", Fchar_valid_p, Schar_valid_p, 1, 2, 0,
 897   "Return t if OBJECT is a valid normal character.\n\
 898 If optional arg GENERICP is non-nil, also return t if OBJECT is\n\
 899 a valid generic character.")
 900   (object, genericp)
 901      Lisp_Object object, genericp;
 902 {
 903   if (! NATNUMP (object))
 904     return Qnil;
 905   return (CHAR_VALID_P (XFASTINT (object), !NILP (genericp)) ? Qt : Qnil);
 906 }
 907
 908 DEFUN ("unibyte-char-to-multibyte", Funibyte_char_to_multibyte,
 909        Sunibyte_char_to_multibyte, 1, 1, 0,
 910   "Convert the unibyte character CH to multibyte character.\n\
 911 The conversion is done based on nonascii-translate-table (which see)\n\
 912  or nonascii-insert-offset (which see).")
 913   (ch)
 914      Lisp_Object ch;
 915 {
 916   int c;
 917
 918   CHECK_NUMBER (ch, 0);
 919   c = XINT (ch);
 920   if (c < 0 || c >= 0400)
 921     error ("Invalid unibyte character: %d", c);
 922   c = unibyte_char_to_multibyte (c);
 923   if (c < 0)
 924     error ("Can't convert to multibyte character: %d", XINT (ch));
 925   return make_number (c);
 926 }
 927
 928 DEFUN ("char-bytes", Fchar_bytes, Schar_bytes, 1, 1, 0,
 929   "Return byte length of multi-byte form of CHAR.")
 930   (ch)
 931      Lisp_Object ch;
 932 {
 933   Lisp_Object val;
 934   int bytes;
 935
 936   CHECK_NUMBER (ch, 0);
 937   if (COMPOSITE_CHAR_P (XFASTINT (ch)))
 938     {
 939       unsigned int id = COMPOSITE_CHAR_ID (XFASTINT (ch));
 940
 941       bytes = (id < n_cmpchars ? cmpchar_table[id]->len : 1);
 942     }
 943   else
 944     {
 945       int charset = CHAR_CHARSET (XFASTINT (ch));
 946
 947       bytes = CHARSET_DEFINED_P (charset) ? CHARSET_BYTES (charset) : 1;
 948     }
 949
 950   XSETFASTINT (val, bytes);
 951   return val;
 952 }
 953
 954 /* Return the width of character of which multi-byte form starts with
 955    C.  The width is measured by how many columns occupied on the
 956    screen when displayed in the current buffer.  */
 957
 958 #define ONE_BYTE_CHAR_WIDTH(c)                                          \
 959   (c < 0x20                                                             \
 960    ? (c == '\t'                                                         \
 961       ? XFASTINT (current_buffer->tab_width)                            \
 962       : (c == '\n' ? 0 : (NILP (current_buffer->ctl_arrow) ? 4 : 2)))   \
 963    : (c < 0x7f                                                          \
 964       ? 1                                                               \
 965       : (c == 0x7F                                                      \
 966          ? (NILP (current_buffer->ctl_arrow) ? 4 : 2)                   \
 967          : ((! NILP (current_buffer->enable_multibyte_characters)       \
 968              && BASE_LEADING_CODE_P (c))                                \
 969             ? WIDTH_BY_CHAR_HEAD (c)                                    \
 970             : 4))))                                                     \
 971
 972
 973 DEFUN ("char-width", Fchar_width, Schar_width, 1, 1, 0,
 974   "Return width of CHAR when displayed in the current buffer.\n\
 975 The width is measured by how many columns it occupies on the screen.")
 976   (ch)
 977        Lisp_Object ch;
 978 {
 979   Lisp_Object val, disp;
 980   int c;
 981   struct Lisp_Char_Table *dp = buffer_display_table ();
 982
 983   CHECK_NUMBER (ch, 0);
 984
 985   c = XINT (ch);
 986
 987   /* Get the way the display table would display it.  */
 988   disp = dp ? DISP_CHAR_VECTOR (dp, c) : Qnil;
 989
 990   if (VECTORP (disp))
 991     XSETINT (val, XVECTOR (disp)->size);
 992   else if (SINGLE_BYTE_CHAR_P (c))
 993     XSETINT (val, ONE_BYTE_CHAR_WIDTH (c));
 994   else if (COMPOSITE_CHAR_P (c))
 995     {
 996       int id = COMPOSITE_CHAR_ID (XFASTINT (ch));
 997       XSETFASTINT (val, (id < n_cmpchars ? cmpchar_table[id]->width : 0));
 998     }
 999   else
1000     {
1001       int charset = CHAR_CHARSET (c);
1002
1003       XSETFASTINT (val, CHARSET_WIDTH (charset));
1004     }
1005   return val;
1006 }
1007
1008 /* Return width of string STR of length LEN when displayed in the
1009    current buffer.  The width is measured by how many columns it
1010    occupies on the screen.  */
1011
1012 int
1013 strwidth (str, len)
1014      unsigned char *str;
1015      int len;
1016 {
1017   unsigned char *endp = str + len;
1018   int width = 0;
1019   struct Lisp_Char_Table *dp = buffer_display_table ();
1020
1021   while (str < endp)
1022     {
1023       if (*str == LEADING_CODE_COMPOSITION)
1024         {
1025           int id = str_cmpchar_id (str, endp - str);
1026
1027           if (id < 0)
1028             {
1029               width += 4;
1030               str++;
1031             }
1032           else
1033             {
1034               width += cmpchar_table[id]->width;
1035               str += cmpchar_table[id]->len;
1036             }
1037         }
1038       else
1039         {
1040           Lisp_Object disp;
1041           int thislen;
1042           int c = STRING_CHAR_AND_LENGTH (str, endp - str, thislen);
1043
1044           /* Get the way the display table would display it.  */
1045           if (dp)
1046             disp = DISP_CHAR_VECTOR (dp, c);
1047           else
1048             disp = Qnil;
1049
1050           if (VECTORP (disp))
1051             width += XVECTOR (disp)->size;
1052           else
1053             width += ONE_BYTE_CHAR_WIDTH (*str);
1054
1055           str += thislen;
1056         }
1057     }
1058   return width;
1059 }
1060
1061 DEFUN ("string-width", Fstring_width, Sstring_width, 1, 1, 0,
1062   "Return width of STRING when displayed in the current buffer.\n\
1063 Width is measured by how many columns it occupies on the screen.\n\
1064 When calculating width of a multibyte character in STRING,\n\
1065 only the base leading-code is considered; the validity of\n\
1066 the following bytes is not checked.")
1067   (str)
1068      Lisp_Object str;
1069 {
1070   Lisp_Object val;
1071
1072   CHECK_STRING (str, 0);
1073   XSETFASTINT (val, strwidth (XSTRING (str)->data,
1074                               STRING_BYTES (XSTRING (str))));
1075   return val;
1076 }
1077
1078 DEFUN ("char-direction", Fchar_direction, Schar_direction, 1, 1, 0,
1079   "Return the direction of CHAR.\n\
1080 The returned value is 0 for left-to-right and 1 for right-to-left.")
1081   (ch)
1082      Lisp_Object ch;
1083 {
1084   int charset;
1085
1086   CHECK_NUMBER (ch, 0);
1087   charset = CHAR_CHARSET (XFASTINT (ch));
1088   if (!CHARSET_DEFINED_P (charset))
1089     invalid_character (XINT (ch));
1090   return CHARSET_TABLE_INFO (charset, CHARSET_DIRECTION_IDX);
1091 }
1092
1093 DEFUN ("chars-in-region", Fchars_in_region, Schars_in_region, 2, 2, 0,
1094   "Return number of characters between BEG and END.")
1095   (beg, end)
1096      Lisp_Object beg, end;
1097 {
1098   int from, to;
1099
1100   from = min (XFASTINT (beg), XFASTINT (end));
1101   to = max (XFASTINT (beg), XFASTINT (end));
1102
1103   return to - from;
1104 }
1105
1106 /* Return the number of characters in the NBYTES bytes at PTR.
1107    This works by looking at the contents and checking for multibyte sequences.
1108    However, if the current buffer has enable-multibyte-characters = nil,
1109    we treat each byte as a character.  */
1110
1111 int
1112 chars_in_text (ptr, nbytes)
1113      unsigned char *ptr;
1114      int nbytes;
1115 {
1116   unsigned char *endp, c;
1117   int chars;
1118
1119   /* current_buffer is null at early stages of Emacs initialization.  */
1120   if (current_buffer == 0
1121       || NILP (current_buffer->enable_multibyte_characters))
1122     return nbytes;
1123
1124   endp = ptr + nbytes;
1125   chars = 0;
1126
1127   while (ptr < endp)
1128     {
1129       c = *ptr++;
1130
1131       if (BASE_LEADING_CODE_P (c))
1132         while (ptr < endp && ! CHAR_HEAD_P (*ptr)) ptr++;
1133       chars++;
1134     }
1135
1136   return chars;
1137 }
1138
1139 /* Return the number of characters in the NBYTES bytes at PTR.
1140    This works by looking at the contents and checking for multibyte sequences.
1141    It ignores enable-multibyte-characters.  */
1142
1143 int
1144 multibyte_chars_in_text (ptr, nbytes)
1145      unsigned char *ptr;
1146      int nbytes;
1147 {
1148   unsigned char *endp, c;
1149   int chars;
1150
1151   endp = ptr + nbytes;
1152   chars = 0;
1153
1154   while (ptr < endp)
1155     {
1156       c = *ptr++;
1157
1158       if (BASE_LEADING_CODE_P (c))
1159         while (ptr < endp && ! CHAR_HEAD_P (*ptr)) ptr++;
1160       chars++;
1161     }
1162
1163   return chars;
1164 }
1165
1166 DEFUN ("string", Fstring, Sstring, 1, MANY, 0,
1167   "Concatenate all the argument characters and make the result a string.")
1168   (n, args)
1169      int n;
1170      Lisp_Object *args;
1171 {
1172   int i;
1173   unsigned char *buf
1174     = (unsigned char *) alloca (MAX_LENGTH_OF_MULTI_BYTE_FORM * n);
1175   unsigned char *p = buf;
1176   Lisp_Object val;
1177
1178   for (i = 0; i < n; i++)
1179     {
1180       int c, len;
1181       unsigned char *str;
1182
1183       if (!INTEGERP (args[i]))
1184         CHECK_NUMBER (args[i], 0);
1185       c = XINT (args[i]);
1186       len = CHAR_STRING (c, p, str);
1187       if (p != str)
1188         /* C is a composite character.  */
1189         bcopy (str, p, len);
1190       p += len;
1191     }
1192
1193   val = make_string_from_bytes (buf, n, p - buf);
1194   return val;
1195 }
1196
1197 #endif /* emacs */
1198 \f
1199 /*** Composite characters staffs ***/
1200
1201 /* Each composite character is identified by CMPCHAR-ID which is
1202    assigned when Emacs needs the character code of the composite
1203    character (e.g. when displaying it on the screen).  See the
1204    document "GENERAL NOTE on COMPOSITE CHARACTER" in `charset.h' how a
1205    composite character is represented in Emacs.  */
1206
1207 /* If `static' is defined, it means that it is defined to null string. */
1208 #ifndef static
1209 /* The following function is copied from lread.c.  */
1210 static int
1211 hash_string (ptr, len)
1212      unsigned char *ptr;
1213      int len;
1214 {
1215   register unsigned char *p = ptr;
1216   register unsigned char *end = p + len;
1217   register unsigned char c;
1218   register int hash = 0;
1219
1220   while (p != end)
1221     {
1222       c = *p++;
1223       if (c >= 0140) c -= 40;
1224       hash = ((hash<<3) + (hash>>28) + c);
1225     }
1226   return hash & 07777777777;
1227 }
1228 #endif
1229
1230 #define CMPCHAR_HASH_TABLE_SIZE 0xFFF
1231
1232 static int *cmpchar_hash_table[CMPCHAR_HASH_TABLE_SIZE];
1233
1234 /* Each element of `cmpchar_hash_table' is a pointer to an array of
1235    integer, where the 1st element is the size of the array, the 2nd
1236    element is how many elements are actually used in the array, and
1237    the remaining elements are CMPCHAR-IDs of composite characters of
1238    the same hash value.  */
1239 #define CMPCHAR_HASH_SIZE(table) table[0]
1240 #define CMPCHAR_HASH_USED(table) table[1]
1241 #define CMPCHAR_HASH_CMPCHAR_ID(table, i) table[i]
1242
1243 /* Return CMPCHAR-ID of the composite character in STR of the length
1244    LEN.  If the composite character has not yet been registered,
1245    register it in `cmpchar_table' and assign new CMPCHAR-ID.  This
1246    is the sole function for assigning CMPCHAR-ID.  */
1247 int
1248 str_cmpchar_id (str, len)
1249      const unsigned char *str;
1250      int len;
1251 {
1252   int hash_idx, *hashp;
1253   unsigned char *buf;
1254   int embedded_rule;            /* 1 if composition rule is embedded.  */
1255   int chars;                    /* number of components.  */
1256   int i;
1257   struct cmpchar_info *cmpcharp;
1258
1259   /* The second byte 0xFF means compostion rule is embedded.  */
1260   embedded_rule = (str[1] == 0xFF);
1261
1262   /* At first, get the actual length of the composite character.  */
1263   {
1264     const unsigned char *p, *endp = str + 1, *lastp = str + len;
1265     int bytes;
1266
1267     while (endp < lastp && ! CHAR_HEAD_P (*endp)) endp++;
1268     if (endp - str < 5)
1269       /* Any composite char have at least 5-byte length.  */
1270       return -1;
1271
1272     chars = 0;
1273     p = str + 1;
1274     while (p < endp)
1275       {
1276         if (embedded_rule) p++;
1277         /* No need of checking if *P is 0xA0 because
1278            BYTES_BY_CHAR_HEAD (0x80) surely returns 2.  */
1279         p += BYTES_BY_CHAR_HEAD (*p - 0x20);
1280         chars++;
1281       }
1282     if (p > endp || chars < 2 || chars > MAX_COMPONENT_COUNT)
1283       /* Invalid components.  */
1284       return -1;
1285     len = p - str;
1286   }
1287   hash_idx = hash_string (str, len) % CMPCHAR_HASH_TABLE_SIZE;
1288   hashp = cmpchar_hash_table[hash_idx];
1289
1290   /* Then, look into the hash table.  */
1291   if (hashp != NULL)
1292     /* Find the correct one among composite characters of the same
1293        hash value.  */
1294     for (i = 2; i < CMPCHAR_HASH_USED (hashp); i++)
1295       {
1296         cmpcharp = cmpchar_table[CMPCHAR_HASH_CMPCHAR_ID (hashp, i)];
1297         if (len == cmpcharp->len
1298             && ! bcmp (str, cmpcharp->data, len))
1299           return CMPCHAR_HASH_CMPCHAR_ID (hashp, i);
1300       }
1301
1302   /* We have to register the composite character in cmpchar_table.  */
1303   if (n_cmpchars > (CHAR_FIELD2_MASK | CHAR_FIELD3_MASK))
1304     /* No, we have no more room for a new composite character.  */
1305     return -1;
1306
1307   /* Make the entry in hash table.  */
1308   if (hashp == NULL)
1309     {
1310       /* Make a table for 8 composite characters initially.  */
1311       hashp = (cmpchar_hash_table[hash_idx]
1312                = (int *) xmalloc (sizeof (int) * (2 + 8)));
1313       CMPCHAR_HASH_SIZE (hashp) = 10;
1314       CMPCHAR_HASH_USED (hashp) = 2;
1315     }
1316   else if (CMPCHAR_HASH_USED (hashp) >= CMPCHAR_HASH_SIZE (hashp))
1317     {
1318       CMPCHAR_HASH_SIZE (hashp) += 8;
1319       hashp = (cmpchar_hash_table[hash_idx]
1320                = (int *) xrealloc (hashp,
1321                                    sizeof (int) * CMPCHAR_HASH_SIZE (hashp)));
1322     }
1323   CMPCHAR_HASH_CMPCHAR_ID (hashp, CMPCHAR_HASH_USED (hashp)) = n_cmpchars;
1324   CMPCHAR_HASH_USED (hashp)++;
1325
1326   /* Set information of the composite character in cmpchar_table.  */
1327   if (cmpchar_table_size == 0)
1328     {
1329       /* This is the first composite character to be registered.  */
1330       cmpchar_table_size = 256;
1331       cmpchar_table
1332         = (struct cmpchar_info **) xmalloc (sizeof (cmpchar_table[0])
1333                                             * cmpchar_table_size);
1334     }
1335   else if (cmpchar_table_size <= n_cmpchars)
1336     {
1337       cmpchar_table_size += 256;
1338       cmpchar_table
1339         = (struct cmpchar_info **) xrealloc (cmpchar_table,
1340                                              sizeof (cmpchar_table[0])
1341                                              * cmpchar_table_size);
1342     }
1343
1344   cmpcharp = (struct cmpchar_info *) xmalloc (sizeof (struct cmpchar_info));
1345
1346   cmpcharp->len = len;
1347   cmpcharp->data = (unsigned char *) xmalloc (len + 1);
1348   bcopy (str, cmpcharp->data, len);
1349   cmpcharp->data[len] = 0;
1350   cmpcharp->glyph_len = chars;
1351   cmpcharp->glyph = (GLYPH *) xmalloc (sizeof (GLYPH) * chars);
1352   if (embedded_rule)
1353     {
1354       cmpcharp->cmp_rule = (unsigned char *) xmalloc (chars);
1355       cmpcharp->col_offset = (float *) xmalloc (sizeof (float) * chars);
1356     }
1357   else
1358     {
1359       cmpcharp->cmp_rule = NULL;
1360       cmpcharp->col_offset = NULL;
1361     }
1362
1363   /* Setup GLYPH data and composition rules (if any) so as not to make
1364      them every time on displaying.  */
1365   {
1366     unsigned char *bufp;
1367     int width;
1368     float leftmost = 0.0, rightmost = 1.0;
1369
1370     if (embedded_rule)
1371       /* At first, col_offset[N] is set to relative to col_offset[0].  */
1372       cmpcharp->col_offset[0] = 0;
1373
1374     for (i = 0, bufp = cmpcharp->data + 1; i < chars; i++)
1375       {
1376         if (embedded_rule)
1377           cmpcharp->cmp_rule[i] = *bufp++;
1378
1379         if (*bufp == 0xA0)      /* This is an ASCII character.  */
1380           {
1381             cmpcharp->glyph[i] = FAST_MAKE_GLYPH ((*++bufp & 0x7F), 0);
1382             width = 1;
1383             bufp++;
1384           }
1385         else                    /* Multibyte character.  */
1386           {
1387             /* Make `bufp' point normal multi-byte form temporally.  */
1388             *bufp -= 0x20;
1389             cmpcharp->glyph[i]
1390               = FAST_MAKE_GLYPH (string_to_non_ascii_char (bufp, 4, 0), 0);
1391             width = WIDTH_BY_CHAR_HEAD (*bufp);
1392             *bufp += 0x20;
1393             bufp += BYTES_BY_CHAR_HEAD (*bufp - 0x20);
1394           }
1395
1396         if (embedded_rule && i > 0)
1397           {
1398             /* Reference points (global_ref and new_ref) are
1399                encoded as below:
1400
1401                0--1--2 -- ascent
1402                |     |
1403                |     |
1404                |  4 -+--- center
1405             -- 3     5 -- baseline
1406                |     |
1407                6--7--8 -- descent
1408
1409                Now, we calculate the column offset of the new glyph
1410                from the left edge of the first glyph.  This can avoid
1411                the same calculation everytime displaying this
1412                composite character.  */
1413
1414             /* Reference points of global glyph and new glyph.  */
1415             int global_ref = (cmpcharp->cmp_rule[i] - 0xA0) / 9;
1416             int new_ref = (cmpcharp->cmp_rule[i] - 0xA0) % 9;
1417             /* Column offset relative to the first glyph.  */
1418             float left = (leftmost
1419                           + (global_ref % 3) * (rightmost - leftmost) / 2.0
1420                           - (new_ref % 3) * width / 2.0);
1421
1422             cmpcharp->col_offset[i] = left;
1423             if (left < leftmost)
1424               leftmost = left;
1425             if (left + width > rightmost)
1426               rightmost = left + width;
1427           }
1428         else
1429           {
1430             if (width > rightmost)
1431               rightmost = width;
1432           }
1433       }
1434     if (embedded_rule)
1435       {
1436         /* Now col_offset[N] are relative to the left edge of the
1437            first component.  Make them relative to the left edge of
1438            overall glyph.  */
1439         for (i = 0; i < chars; i++)
1440           cmpcharp->col_offset[i] -= leftmost;
1441         /* Make rightmost holds width of overall glyph.  */
1442         rightmost -= leftmost;
1443       }
1444
1445     cmpcharp->width = rightmost;
1446     if (cmpcharp->width < rightmost)
1447       /* To get a ceiling integer value.  */
1448       cmpcharp->width++;
1449   }
1450
1451   cmpchar_table[n_cmpchars] = cmpcharp;
1452
1453   return n_cmpchars++;
1454 }
1455
1456 /* Return the Nth element of the composite character C.  */
1457 int
1458 cmpchar_component (c, n)
1459      unsigned int c, n;
1460 {
1461   int id = COMPOSITE_CHAR_ID (c);
1462
1463   if (id >= n_cmpchars          /* C is not a valid composite character.  */
1464       || n >= cmpchar_table[id]->glyph_len) /* No such component.  */
1465     return -1;
1466   /* No face data is stored in glyph code.  */
1467   return ((int) (cmpchar_table[id]->glyph[n]));
1468 }
1469
1470 DEFUN ("cmpcharp", Fcmpcharp, Scmpcharp, 1, 1, 0,
1471   "T if CHAR is a composite character.")
1472   (ch)
1473      Lisp_Object ch;
1474 {
1475   CHECK_NUMBER (ch, 0);
1476   return (COMPOSITE_CHAR_P (XINT (ch)) ? Qt : Qnil);
1477 }
1478
1479 DEFUN ("composite-char-component", Fcmpchar_component, Scmpchar_component,
1480        2, 2, 0,
1481   "Return the IDXth component character of composite character CHARACTER.")
1482   (character, idx)
1483      Lisp_Object character, idx;
1484 {
1485   int c;
1486
1487   CHECK_NUMBER (character, 0);
1488   CHECK_NUMBER (idx, 1);
1489
1490   if ((c = cmpchar_component (XINT (character), XINT (idx))) < 0)
1491     args_out_of_range (character, idx);
1492
1493   return make_number (c);
1494 }
1495
1496 DEFUN ("composite-char-composition-rule", Fcmpchar_cmp_rule, Scmpchar_cmp_rule,
1497        2, 2, 0,
1498   "Return the Nth composition rule embedded in composite character CHARACTER.\n\
1499 The returned rule is for composing the Nth component\n\
1500 on the (N-1)th component.  If N is 0, the returned value is always 255.")
1501   (character, n)
1502      Lisp_Object character, n;
1503 {
1504   int id, i;
1505
1506   CHECK_NUMBER (character, 0);
1507   CHECK_NUMBER (n, 1);
1508
1509   id = COMPOSITE_CHAR_ID (XINT (character));
1510   if (id < 0 || id >= n_cmpchars)
1511     error ("Invalid composite character: %d", XINT (character));
1512   i = XINT (n);
1513   if (i > cmpchar_table[id]->glyph_len)
1514     args_out_of_range (character, n);
1515
1516   return make_number (cmpchar_table[id]->cmp_rule[i]);
1517 }
1518
1519 DEFUN ("composite-char-composition-rule-p", Fcmpchar_cmp_rule_p,
1520        Scmpchar_cmp_rule_p, 1, 1, 0,
1521   "Return non-nil if composite character CHARACTER contains a embedded rule.")
1522   (character)
1523      Lisp_Object character;
1524 {
1525   int id;
1526
1527   CHECK_NUMBER (character, 0);
1528   id = COMPOSITE_CHAR_ID (XINT (character));
1529   if (id < 0 || id >= n_cmpchars)
1530     error ("Invalid composite character: %d", XINT (character));
1531
1532   return (cmpchar_table[id]->cmp_rule ? Qt : Qnil);
1533 }
1534
1535 DEFUN ("composite-char-component-count", Fcmpchar_cmp_count,
1536        Scmpchar_cmp_count, 1, 1, 0,
1537   "Return number of compoents of composite character CHARACTER.")
1538   (character)
1539      Lisp_Object character;
1540 {
1541   int id;
1542
1543   CHECK_NUMBER (character, 0);
1544   id = COMPOSITE_CHAR_ID (XINT (character));
1545   if (id < 0 || id >= n_cmpchars)
1546     error ("Invalid composite character: %d", XINT (character));
1547
1548   return (make_number (cmpchar_table[id]->glyph_len));
1549 }
1550
1551 DEFUN ("compose-string", Fcompose_string, Scompose_string,
1552        1, 1, 0,
1553   "Return one char string composed from all characters in STRING.")
1554   (str)
1555      Lisp_Object str;
1556 {
1557   unsigned char buf[MAX_LENGTH_OF_MULTI_BYTE_FORM], *p, *pend, *ptemp;
1558   int len, i;
1559
1560   CHECK_STRING (str, 0);
1561
1562   buf[0] = LEADING_CODE_COMPOSITION;
1563   p = XSTRING (str)->data;
1564   pend = p + STRING_BYTES (XSTRING (str));
1565   i = 1;
1566   while (p < pend)
1567     {
1568       if (*p < 0x20 || *p == 127) /* control code */
1569         error ("Invalid component character: %d", *p);
1570       else if (*p < 0x80)       /* ASCII */
1571         {
1572           if (i + 2 >= MAX_LENGTH_OF_MULTI_BYTE_FORM)
1573             error ("Too long string to be composed: %s", XSTRING (str)->data);
1574           /* Prepend an ASCII charset indicator 0xA0, set MSB of the
1575              code itself.  */
1576           buf[i++] = 0xA0;
1577           buf[i++] = *p++ + 0x80;
1578         }
1579       else if (*p == LEADING_CODE_COMPOSITION) /* composite char */
1580         {
1581           /* Already composed.  Eliminate the heading
1582              LEADING_CODE_COMPOSITION, keep the remaining bytes
1583              unchanged.  */
1584           p++;
1585           ptemp = p;
1586           while (! CHAR_HEAD_P (*p)) p++;
1587           if (i + (p - ptemp) >= MAX_LENGTH_OF_MULTI_BYTE_FORM)
1588             error ("Too long string to be composed: %s", XSTRING (str)->data);
1589           bcopy (ptemp, buf + i, p - ptemp);
1590           i += p - ptemp;
1591         }
1592       else                      /* multibyte char */
1593         {
1594           /* Add 0x20 to the base leading-code, keep the remaining
1595              bytes unchanged.  */
1596           len = BYTES_BY_CHAR_HEAD (*p);
1597           if (i + len >= MAX_LENGTH_OF_MULTI_BYTE_FORM)
1598             error ("Too long string to be composed: %s", XSTRING (str)->data);
1599           bcopy (p, buf + i, len);
1600           buf[i] += 0x20;
1601           p += len, i += len;
1602         }
1603     }
1604
1605   if (i < 5)
1606     /* STR contains only one character, which can't be composed.  */
1607     error ("Too short string to be composed: %s", XSTRING (str)->data);
1608
1609   return make_string_from_bytes (buf, 1, i);
1610 }
1611
1612 \f
1613 charset_id_internal (charset_name)
1614      char *charset_name;
1615 {
1616   Lisp_Object val = Fget (intern (charset_name), Qcharset);
1617
1618   if (!VECTORP (val))
1619     error ("Charset %s is not defined", charset_name);
1620
1621   return (XINT (XVECTOR (val)->contents[0]));
1622 }
1623
1624 DEFUN ("setup-special-charsets", Fsetup_special_charsets,
1625        Ssetup_special_charsets, 0, 0, 0, "Internal use only.")
1626    ()
1627 {
1628   charset_latin_iso8859_1 = charset_id_internal ("latin-iso8859-1");
1629   charset_jisx0208_1978 = charset_id_internal ("japanese-jisx0208-1978");
1630   charset_jisx0208 = charset_id_internal ("japanese-jisx0208");
1631   charset_katakana_jisx0201 = charset_id_internal ("katakana-jisx0201");
1632   charset_latin_jisx0201 = charset_id_internal ("latin-jisx0201");
1633   charset_big5_1 = charset_id_internal ("chinese-big5-1");
1634   charset_big5_2 = charset_id_internal ("chinese-big5-2");
1635   return Qnil;
1636 }
1637
1638 init_charset_once ()
1639 {
1640   int i, j, k;
1641
1642   staticpro (&Vcharset_table);
1643   staticpro (&Vcharset_symbol_table);
1644
1645   /* This has to be done here, before we call Fmake_char_table.  */
1646   Qcharset_table = intern ("charset-table");
1647   staticpro (&Qcharset_table);
1648
1649   /* Intern this now in case it isn't already done.
1650      Setting this variable twice is harmless.
1651      But don't staticpro it here--that is done in alloc.c.  */
1652   Qchar_table_extra_slots = intern ("char-table-extra-slots");
1653
1654   /* Now we are ready to set up this property, so we can
1655      create the charset table.  */
1656   Fput (Qcharset_table, Qchar_table_extra_slots, make_number (0));
1657   Vcharset_table = Fmake_char_table (Qcharset_table, Qnil);
1658
1659   Vcharset_symbol_table = Fmake_vector (make_number (MAX_CHARSET + 1), Qnil);
1660
1661   /* Setup tables.  */
1662   for (i = 0; i < 2; i++)
1663     for (j = 0; j < 2; j++)
1664       for (k = 0; k < 128; k++)
1665         iso_charset_table [i][j][k] = -1;
1666
1667   bzero (cmpchar_hash_table, sizeof cmpchar_hash_table);
1668   cmpchar_table_size = n_cmpchars = 0;
1669
1670   for (i = 0; i < 256; i++)
1671     BYTES_BY_CHAR_HEAD (i) = 1;
1672   BYTES_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_11) = 3;
1673   BYTES_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_12) = 3;
1674   BYTES_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_21) = 4;
1675   BYTES_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_22) = 4;
1676   /* The following doesn't reflect the actual bytes, but just to tell
1677      that it is a start of a multibyte character.  */
1678   BYTES_BY_CHAR_HEAD (LEADING_CODE_COMPOSITION) = 2;
1679
1680   for (i = 0; i < 128; i++)
1681     WIDTH_BY_CHAR_HEAD (i) = 1;
1682   for (; i < 256; i++)
1683     WIDTH_BY_CHAR_HEAD (i) = 4;
1684   WIDTH_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_11) = 1;
1685   WIDTH_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_12) = 2;
1686   WIDTH_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_21) = 1;
1687   WIDTH_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_22) = 2;
1688 }
1689
1690 #ifdef emacs
1691
1692 syms_of_charset ()
1693 {
1694   Qascii = intern ("ascii");
1695   staticpro (&Qascii);
1696
1697   Qcharset = intern ("charset");
1698   staticpro (&Qcharset);
1699
1700   /* Define ASCII charset now.  */
1701   update_charset_table (make_number (CHARSET_ASCII),
1702                         make_number (1), make_number (94),
1703                         make_number (1),
1704                         make_number (0),
1705                         make_number ('B'),
1706                         make_number (0),
1707                         build_string ("ASCII"),
1708                         build_string ("ASCII"),
1709                         build_string ("ASCII (ISO646 IRV)"));
1710   CHARSET_SYMBOL (CHARSET_ASCII) = Qascii;
1711   Fput (Qascii, Qcharset, CHARSET_TABLE_ENTRY (CHARSET_ASCII));
1712
1713   Qcomposition = intern ("composition");
1714   staticpro (&Qcomposition);
1715   CHARSET_SYMBOL (CHARSET_COMPOSITION) = Qcomposition;
1716
1717   defsubr (&Sdefine_charset);
1718   defsubr (&Sget_unused_iso_final_char);
1719   defsubr (&Sdeclare_equiv_charset);
1720   defsubr (&Sfind_charset_region);
1721   defsubr (&Sfind_charset_string);
1722   defsubr (&Smake_char_internal);
1723   defsubr (&Ssplit_char);
1724   defsubr (&Schar_charset);
1725   defsubr (&Scharset_after);
1726   defsubr (&Siso_charset);
1727   defsubr (&Schar_valid_p);
1728   defsubr (&Sunibyte_char_to_multibyte);
1729   defsubr (&Schar_bytes);
1730   defsubr (&Schar_width);
1731   defsubr (&Sstring_width);
1732   defsubr (&Schar_direction);
1733   defsubr (&Schars_in_region);
1734   defsubr (&Sstring);
1735   defsubr (&Scmpcharp);
1736   defsubr (&Scmpchar_component);
1737   defsubr (&Scmpchar_cmp_rule);
1738   defsubr (&Scmpchar_cmp_rule_p);
1739   defsubr (&Scmpchar_cmp_count);
1740   defsubr (&Scompose_string);
1741   defsubr (&Ssetup_special_charsets);
1742
1743   DEFVAR_LISP ("charset-list", &Vcharset_list,
1744     "List of charsets ever defined.");
1745   Vcharset_list = Fcons (Qascii, Qnil);
1746
1747   DEFVAR_LISP ("character-unification-table-vector",
1748                &Vcharacter_unification_table_vector,
1749     "Vector of cons cell of a symbol and unification table ever defined.\n\
1750 An ID of a unification table is an index of this vector.");
1751   Vcharacter_unification_table_vector = Fmake_vector (make_number (16), Qnil);
1752
1753   DEFVAR_INT ("leading-code-composition", &leading_code_composition,
1754     "Leading-code of composite characters.");
1755   leading_code_composition = LEADING_CODE_COMPOSITION;
1756
1757   DEFVAR_INT ("leading-code-private-11", &leading_code_private_11,
1758     "Leading-code of private TYPE9N charset of column-width 1.");
1759   leading_code_private_11 = LEADING_CODE_PRIVATE_11;
1760
1761   DEFVAR_INT ("leading-code-private-12", &leading_code_private_12,
1762     "Leading-code of private TYPE9N charset of column-width 2.");
1763   leading_code_private_12 = LEADING_CODE_PRIVATE_12;
1764
1765   DEFVAR_INT ("leading-code-private-21", &leading_code_private_21,
1766     "Leading-code of private TYPE9Nx9N charset of column-width 1.");
1767   leading_code_private_21 = LEADING_CODE_PRIVATE_21;
1768
1769   DEFVAR_INT ("leading-code-private-22", &leading_code_private_22,
1770     "Leading-code of private TYPE9Nx9N charset of column-width 2.");
1771   leading_code_private_22 = LEADING_CODE_PRIVATE_22;
1772
1773   DEFVAR_INT ("nonascii-insert-offset", &nonascii_insert_offset,
1774     "Offset for converting non-ASCII unibyte codes 0240...0377 to multibyte.\n\
1775 This is used for converting unibyte text to multibyte,\n\
1776 and for inserting character codes specified by number.\n\n\
1777 Conversion is performed only when multibyte characters are enabled,\n\
1778 and it serves to convert a Latin-1 or similar 8-bit character code\n\
1779 to the corresponding Emacs character code.\n\
1780 If `nonascii-translate-table' is non-nil, it overrides this variable.");
1781   nonascii_insert_offset = 0;
1782
1783   DEFVAR_LISP ("nonascii-translate-table", &Vnonascii_translate_table,
1784     "Translate table for converting non-ASCII unibyte codes to multibyte.\n\
1785 This is used for converting unibyte text to multibyte,\n\
1786 and for inserting character codes specified by number.\n\n\
1787 Conversion is performed only when multibyte characters are enabled,\n\
1788 and it serves to convert a Latin-1 or similar 8-bit character code\n\
1789 to the corresponding Emacs character code.\n\n\
1790 If this is nil, `nonascii-insert-offset' is used instead.");
1791   Vnonascii_translate_table = Qnil;
1792
1793   DEFVAR_INT ("min-composite-char", &min_composite_char,
1794     "Minimum character code of a composite character.");
1795   min_composite_char = MIN_CHAR_COMPOSITION;
1796 }
1797
1798 #endif /* emacs */