(MULTIBYTE_FORM_LENGTH): Don't check LEN here.
[bpt/emacs.git] / src / charset.c
CommitLineData
75c8c592 1/* Basic multilingual character support.
35e623fb 2 Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
75c8c592 3 Licensed to the Free Software Foundation.
4ed46869 4
369314dc
KH
5This file is part of GNU Emacs.
6
7GNU Emacs is free software; you can redistribute it and/or modify
8it under the terms of the GNU General Public License as published by
9the Free Software Foundation; either version 2, or (at your option)
10any later version.
4ed46869 11
369314dc
KH
12GNU Emacs is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15GNU General Public License for more details.
4ed46869 16
369314dc
KH
17You should have received a copy of the GNU General Public License
18along with GNU Emacs; see the file COPYING. If not, write to
19the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20Boston, MA 02111-1307, USA. */
4ed46869
KH
21
22/* At first, see the document in `charset.h' to understand the code in
23 this file. */
24
25#include <stdio.h>
26
27#ifdef emacs
28
29#include <sys/types.h>
30#include <config.h>
31#include "lisp.h"
32#include "buffer.h"
33#include "charset.h"
34#include "coding.h"
fc6b09bf 35#include "disptab.h"
4ed46869
KH
36
37#else /* not emacs */
38
39#include "mulelib.h"
40
41#endif /* emacs */
42
43Lisp_Object Qcharset, Qascii, Qcomposition;
44
45/* Declaration of special leading-codes. */
46int leading_code_composition; /* for composite characters */
47int leading_code_private_11; /* for private DIMENSION1 of 1-column */
48int leading_code_private_12; /* for private DIMENSION1 of 2-column */
49int leading_code_private_21; /* for private DIMENSION2 of 1-column */
50int leading_code_private_22; /* for private DIMENSION2 of 2-column */
51
52/* Declaration of special charsets. */
53int charset_ascii; /* ASCII */
54int charset_composition; /* for a composite character */
55int charset_latin_iso8859_1; /* ISO8859-1 (Latin-1) */
56int charset_jisx0208_1978; /* JISX0208.1978 (Japanese Kanji old set) */
57int charset_jisx0208; /* JISX0208.1983 (Japanese Kanji) */
58int charset_katakana_jisx0201; /* JISX0201.Kana (Japanese Katakana) */
59int charset_latin_jisx0201; /* JISX0201.Roman (Japanese Roman) */
60int charset_big5_1; /* Big5 Level 1 (Chinese Traditional) */
61int charset_big5_2; /* Big5 Level 2 (Chinese Traditional) */
62
b0e3cf2b
KH
63int min_composite_char;
64
4ed46869
KH
65Lisp_Object Qcharset_table;
66
67/* A char-table containing information of each character set. */
68Lisp_Object Vcharset_table;
69
70/* A vector of charset symbol indexed by charset-id. This is used
71 only for returning charset symbol from C functions. */
72Lisp_Object Vcharset_symbol_table;
73
74/* A list of charset symbols ever defined. */
75Lisp_Object Vcharset_list;
76
b0e3cf2b
KH
77/* Vector of unification table ever defined.
78 An ID of a unification table is an index of this vector. */
79Lisp_Object Vcharacter_unification_table_vector;
80
4ed46869
KH
81/* Tables used by macros BYTES_BY_CHAR_HEAD and WIDTH_BY_CHAR_HEAD. */
82int bytes_by_char_head[256];
83int width_by_char_head[256];
84
85/* Mapping table from ISO2022's charset (specified by DIMENSION,
86 CHARS, and FINAL-CHAR) to Emacs' charset. */
87int iso_charset_table[2][2][128];
88
513ee442
KH
89/* Table of pointers to the structure `cmpchar_info' indexed by
90 CMPCHAR-ID. */
91struct cmpchar_info **cmpchar_table;
92/* The current size of `cmpchar_table'. */
93static int cmpchar_table_size;
94/* Number of the current composite characters. */
95int n_cmpchars;
96
4ed46869
KH
97/* Variables used locally in the macro FETCH_MULTIBYTE_CHAR. */
98unsigned char *_fetch_multibyte_char_p;
99int _fetch_multibyte_char_len;
100
35e623fb
RS
101/* Offset to add to a non-ASCII value when inserting it. */
102int nonascii_insert_offset;
103
4cf9710d
RS
104/* Translation table for converting non-ASCII unibyte characters
105 to multibyte codes, or nil. */
106Lisp_Object Vnonascii_translate_table;
107
046b1f03
RS
108#define min(X, Y) ((X) < (Y) ? (X) : (Y))
109#define max(X, Y) ((X) > (Y) ? (X) : (Y))
110\f
93bcb785
KH
111void
112invalid_character (c)
113 int c;
114{
115 error ("Invalid character: %o, %d, 0x%x", c);
116}
117
118
4ed46869
KH
119/* Set STR a pointer to the multi-byte form of the character C. If C
120 is not a composite character, the multi-byte form is set in WORKBUF
121 and STR points WORKBUF. The caller should allocate at least 4-byte
122 area at WORKBUF in advance. Returns the length of the multi-byte
bd4c6dd0
KH
123 form. If C is an invalid character to have a multi-byte form,
124 signal an error.
4ed46869
KH
125
126 Use macro `CHAR_STRING (C, WORKBUF, STR)' instead of calling this
127 function directly if C can be an ASCII character. */
128
129int
130non_ascii_char_to_string (c, workbuf, str)
131 int c;
132 unsigned char *workbuf, **str;
133{
6dc0722d 134 int charset, c1, c2;
4ed46869
KH
135
136 if (COMPOSITE_CHAR_P (c))
137 {
138 int cmpchar_id = COMPOSITE_CHAR_ID (c);
139
140 if (cmpchar_id < n_cmpchars)
141 {
142 *str = cmpchar_table[cmpchar_id]->data;
143 return cmpchar_table[cmpchar_id]->len;
144 }
145 else
146 {
93bcb785 147 invalid_character (c);
4ed46869
KH
148 }
149 }
150
151 SPLIT_NON_ASCII_CHAR (c, charset, c1, c2);
bd4c6dd0
KH
152 if (!charset
153 || ! CHARSET_DEFINED_P (charset)
154 || c1 >= 0 && c1 < 32
155 || c2 >= 0 && c2 < 32)
93bcb785 156 invalid_character (c);
4ed46869
KH
157
158 *str = workbuf;
159 *workbuf++ = CHARSET_LEADING_CODE_BASE (charset);
160 if (*workbuf = CHARSET_LEADING_CODE_EXT (charset))
161 workbuf++;
162 *workbuf++ = c1 | 0x80;
6dc0722d 163 if (c2 >= 0)
4ed46869
KH
164 *workbuf++ = c2 | 0x80;
165
166 return (workbuf - *str);
167}
168
169/* Return a non-ASCII character of which multi-byte form is at STR of
170 length LEN. If ACTUAL_LEN is not NULL, the actual length of the
171 character is set to the address ACTUAL_LEN.
172
173 Use macro `STRING_CHAR (STR, LEN)' instead of calling this function
174 directly if STR can hold an ASCII character. */
175
176string_to_non_ascii_char (str, len, actual_len)
8867de67 177 const unsigned char *str;
4ed46869
KH
178 int len, *actual_len;
179{
180 int charset;
181 unsigned char c1, c2;
182 register int c;
183
184 if (SPLIT_STRING (str, len, charset, c1, c2) == CHARSET_ASCII)
185 {
186 if (actual_len)
187 *actual_len = 1;
188 return (int) *str;
189 }
190
191 c = MAKE_NON_ASCII_CHAR (charset, c1, c2);
192
193 if (actual_len)
194 *actual_len = (charset == CHARSET_COMPOSITION
195 ? cmpchar_table[COMPOSITE_CHAR_ID (c)]->len
196 : BYTES_BY_CHAR_HEAD (*str));
197 return c;
198}
199
200/* Return the length of the multi-byte form at string STR of length LEN. */
201int
202multibyte_form_length (str, len)
8867de67 203 const unsigned char *str;
4ed46869
KH
204 int len;
205{
206 int charset;
207 unsigned char c1, c2;
208 register int c;
209
210 if (SPLIT_STRING (str, len, charset, c1, c2) == CHARSET_ASCII)
211 return 1;
212
213 return (charset == CHARSET_COMPOSITION
214 ? cmpchar_table[(c1 << 7) | c2]->len
215 : BYTES_BY_CHAR_HEAD (*str));
216}
217
218/* Check if string STR of length LEN contains valid multi-byte form of
219 a character. If valid, charset and position codes of the character
220 is set at *CHARSET, *C1, and *C2, and return 0. If not valid,
221 return -1. This should be used only in the macro SPLIT_STRING
222 which checks range of STR in advance. */
223
224split_non_ascii_string (str, len, charset, c1, c2)
8867de67
KH
225 register const unsigned char *str;
226 register unsigned char *c1, *c2;
4ed46869
KH
227 register int len, *charset;
228{
229 register unsigned int cs = *str++;
230
231 if (cs == LEADING_CODE_COMPOSITION)
232 {
233 int cmpchar_id = str_cmpchar_id (str - 1, len);
234
235 if (cmpchar_id < 0)
236 return -1;
237 *charset = cs, *c1 = cmpchar_id >> 7, *c2 = cmpchar_id & 0x7F;
238 }
239 else if ((cs < LEADING_CODE_PRIVATE_11 || (cs = *str++) >= 0xA0)
240 && CHARSET_DEFINED_P (cs))
241 {
242 *charset = cs;
243 if (*str < 0xA0)
244 return -1;
245 *c1 = (*str++) & 0x7F;
246 if (CHARSET_DIMENSION (cs) == 2)
247 {
248 if (*str < 0xA0)
249 return -1;
250 *c2 = (*str++) & 0x7F;
251 }
252 }
253 else
254 return -1;
255 return 0;
256}
257
23d2a7f1
KH
258/* Return a character unified with C (or a character made of CHARSET,
259 C1, and C2) in unification table TABLE. If no unification is found
260 in TABLE, return C. */
261unify_char (table, c, charset, c1, c2)
262 Lisp_Object table;
263 int c, charset, c1, c2;
264{
265 Lisp_Object ch;
266 int alt_charset, alt_c1, alt_c2, dimension;
267
268 if (c < 0) c = MAKE_CHAR (charset, c1, c2);
269 if (!CHAR_TABLE_P (table)
270 || (ch = Faref (table, make_number (c)), !INTEGERP (ch))
271 || XINT (ch) < 0)
272 return c;
273
274 SPLIT_CHAR (XFASTINT (ch), alt_charset, alt_c1, alt_c2);
275 dimension = CHARSET_DIMENSION (alt_charset);
276 if (dimension == 1 && alt_c1 > 0 || dimension == 2 && alt_c2 > 0)
277 /* CH is not a generic character, just return it. */
278 return XFASTINT (ch);
279
280 /* Since CH is a generic character, we must return a specific
281 charater which has the same position codes as C from CH. */
282 if (charset < 0)
283 SPLIT_CHAR (c, charset, c1, c2);
284 if (dimension != CHARSET_DIMENSION (charset))
285 /* We can't make such a character because of dimension mismatch. */
286 return c;
23d2a7f1
KH
287 return MAKE_CHAR (alt_charset, c1, c2);
288}
289
d2665018
KH
290/* Convert the unibyte character C to multibyte based on
291 Vnonascii_translate_table or nonascii_insert_offset. If they can't
292 convert C to a valid multibyte character, convert it based on
293 DEFAULT_NONASCII_INSERT_OFFSET which makes C a Latin-1 character. */
35e623fb 294
35e623fb
RS
295unibyte_char_to_multibyte (c)
296 int c;
297{
d2665018 298 if (c >= 0240 && c < 0400)
35e623fb 299 {
d2665018
KH
300 int c_save = c;
301
4cf9710d
RS
302 if (! NILP (Vnonascii_translate_table))
303 c = XINT (Faref (Vnonascii_translate_table, make_number (c)));
304 else if (nonascii_insert_offset > 0)
35e623fb 305 c += nonascii_insert_offset;
d2665018
KH
306 if (c >= 0240 && (c < 0400 || ! VALID_MULTIBYTE_CHAR_P (c)))
307 c = c_save + DEFAULT_NONASCII_INSERT_OFFSET;
35e623fb
RS
308 }
309 return c;
310}
311\f
4ed46869
KH
312/* Update the table Vcharset_table with the given arguments (see the
313 document of `define-charset' for the meaning of each argument).
314 Several other table contents are also updated. The caller should
315 check the validity of CHARSET-ID and the remaining arguments in
316 advance. */
317
318void
319update_charset_table (charset_id, dimension, chars, width, direction,
320 iso_final_char, iso_graphic_plane,
321 short_name, long_name, description)
322 Lisp_Object charset_id, dimension, chars, width, direction;
323 Lisp_Object iso_final_char, iso_graphic_plane;
324 Lisp_Object short_name, long_name, description;
325{
326 int charset = XINT (charset_id);
327 int bytes;
328 unsigned char leading_code_base, leading_code_ext;
329
6dc0722d
KH
330 if (NILP (CHARSET_TABLE_ENTRY (charset)))
331 CHARSET_TABLE_ENTRY (charset)
332 = Fmake_vector (make_number (CHARSET_MAX_IDX), Qnil);
4ed46869
KH
333
334 /* Get byte length of multibyte form, base leading-code, and
335 extended leading-code of the charset. See the comment under the
336 title "GENERAL NOTE on CHARACTER SET (CHARSET)" in charset.h. */
337 bytes = XINT (dimension);
338 if (charset < MIN_CHARSET_PRIVATE_DIMENSION1)
339 {
340 /* Official charset, it doesn't have an extended leading-code. */
341 if (charset != CHARSET_ASCII)
342 bytes += 1; /* For a base leading-code. */
343 leading_code_base = charset;
344 leading_code_ext = 0;
345 }
346 else
347 {
348 /* Private charset. */
349 bytes += 2; /* For base and extended leading-codes. */
350 leading_code_base
351 = (charset < LEADING_CODE_EXT_12
352 ? LEADING_CODE_PRIVATE_11
353 : (charset < LEADING_CODE_EXT_21
354 ? LEADING_CODE_PRIVATE_12
355 : (charset < LEADING_CODE_EXT_22
356 ? LEADING_CODE_PRIVATE_21
357 : LEADING_CODE_PRIVATE_22)));
358 leading_code_ext = charset;
359 }
360
361 CHARSET_TABLE_INFO (charset, CHARSET_ID_IDX) = charset_id;
362 CHARSET_TABLE_INFO (charset, CHARSET_BYTES_IDX) = make_number (bytes);
363 CHARSET_TABLE_INFO (charset, CHARSET_DIMENSION_IDX) = dimension;
364 CHARSET_TABLE_INFO (charset, CHARSET_CHARS_IDX) = chars;
365 CHARSET_TABLE_INFO (charset, CHARSET_WIDTH_IDX) = width;
366 CHARSET_TABLE_INFO (charset, CHARSET_DIRECTION_IDX) = direction;
367 CHARSET_TABLE_INFO (charset, CHARSET_LEADING_CODE_BASE_IDX)
368 = make_number (leading_code_base);
369 CHARSET_TABLE_INFO (charset, CHARSET_LEADING_CODE_EXT_IDX)
370 = make_number (leading_code_ext);
371 CHARSET_TABLE_INFO (charset, CHARSET_ISO_FINAL_CHAR_IDX) = iso_final_char;
372 CHARSET_TABLE_INFO (charset, CHARSET_ISO_GRAPHIC_PLANE_IDX)
373 = iso_graphic_plane;
374 CHARSET_TABLE_INFO (charset, CHARSET_SHORT_NAME_IDX) = short_name;
375 CHARSET_TABLE_INFO (charset, CHARSET_LONG_NAME_IDX) = long_name;
376 CHARSET_TABLE_INFO (charset, CHARSET_DESCRIPTION_IDX) = description;
377 CHARSET_TABLE_INFO (charset, CHARSET_PLIST_IDX) = Qnil;
378
379 {
380 /* If we have already defined a charset which has the same
381 DIMENSION, CHARS and ISO-FINAL-CHAR but the different
382 DIRECTION, we must update the entry REVERSE-CHARSET of both
383 charsets. If there's no such charset, the value of the entry
384 is set to nil. */
385 int i;
386
513ee442 387 for (i = 0; i <= MAX_CHARSET; i++)
4ed46869
KH
388 if (!NILP (CHARSET_TABLE_ENTRY (i)))
389 {
390 if (CHARSET_DIMENSION (i) == XINT (dimension)
391 && CHARSET_CHARS (i) == XINT (chars)
392 && CHARSET_ISO_FINAL_CHAR (i) == XINT (iso_final_char)
393 && CHARSET_DIRECTION (i) != XINT (direction))
394 {
395 CHARSET_TABLE_INFO (charset, CHARSET_REVERSE_CHARSET_IDX)
396 = make_number (i);
397 CHARSET_TABLE_INFO (i, CHARSET_REVERSE_CHARSET_IDX) = charset_id;
398 break;
399 }
400 }
513ee442 401 if (i > MAX_CHARSET)
4ed46869
KH
402 /* No such a charset. */
403 CHARSET_TABLE_INFO (charset, CHARSET_REVERSE_CHARSET_IDX)
404 = make_number (-1);
405 }
406
407 if (charset != CHARSET_ASCII
408 && charset < MIN_CHARSET_PRIVATE_DIMENSION1)
409 {
410 /* Update tables bytes_by_char_head and width_by_char_head. */
411 bytes_by_char_head[leading_code_base] = bytes;
412 width_by_char_head[leading_code_base] = XINT (width);
413
414 /* Update table emacs_code_class. */
415 emacs_code_class[charset] = (bytes == 2
416 ? EMACS_leading_code_2
417 : (bytes == 3
418 ? EMACS_leading_code_3
419 : EMACS_leading_code_4));
420 }
421
422 /* Update table iso_charset_table. */
423 if (ISO_CHARSET_TABLE (dimension, chars, iso_final_char) < 0)
424 ISO_CHARSET_TABLE (dimension, chars, iso_final_char) = charset;
425}
426
427#ifdef emacs
428
429/* Return charset id of CHARSET_SYMBOL, or return -1 if CHARSET_SYMBOL
430 is invalid. */
431int
432get_charset_id (charset_symbol)
433 Lisp_Object charset_symbol;
434{
435 Lisp_Object val;
436 int charset;
437
438 return ((SYMBOLP (charset_symbol)
439 && (val = Fget (charset_symbol, Qcharset), VECTORP (val))
440 && (charset = XINT (XVECTOR (val)->contents[CHARSET_ID_IDX]),
441 CHARSET_VALID_P (charset)))
442 ? charset : -1);
443}
444
445/* Return an identification number for a new private charset of
446 DIMENSION and WIDTH. If there's no more room for the new charset,
447 return 0. */
448Lisp_Object
449get_new_private_charset_id (dimension, width)
450 int dimension, width;
451{
452 int charset, from, to;
453
454 if (dimension == 1)
455 {
456 if (width == 1)
457 from = LEADING_CODE_EXT_11, to = LEADING_CODE_EXT_12;
458 else
459 from = LEADING_CODE_EXT_12, to = LEADING_CODE_EXT_21;
460 }
461 else
462 {
463 if (width == 1)
464 from = LEADING_CODE_EXT_21, to = LEADING_CODE_EXT_22;
465 else
b0e3cf2b 466 from = LEADING_CODE_EXT_22, to = LEADING_CODE_EXT_MAX + 1;
4ed46869
KH
467 }
468
469 for (charset = from; charset < to; charset++)
470 if (!CHARSET_DEFINED_P (charset)) break;
471
472 return make_number (charset < to ? charset : 0);
473}
474
475DEFUN ("define-charset", Fdefine_charset, Sdefine_charset, 3, 3, 0,
476 "Define CHARSET-ID as the identification number of CHARSET with INFO-VECTOR.\n\
23d2a7f1 477If CHARSET-ID is nil, it is decided automatically, which means CHARSET is\n\
4ed46869
KH
478 treated as a private charset.\n\
479INFO-VECTOR is a vector of the format:\n\
480 [DIMENSION CHARS WIDTH DIRECTION ISO-FINAL-CHAR ISO-GRAPHIC-PLANE\n\
481 SHORT-NAME LONG-NAME DESCRIPTION]\n\
482The meanings of each elements is as follows:\n\
483DIMENSION (integer) is the number of bytes to represent a character: 1 or 2.\n\
484CHARS (integer) is the number of characters in a dimension: 94 or 96.\n\
485WIDTH (integer) is the number of columns a character in the charset\n\
486occupies on the screen: one of 0, 1, and 2.\n\
487\n\
488DIRECTION (integer) is the rendering direction of characters in the\n\
489charset when rendering. If 0, render from right to left, else\n\
490render from left to right.\n\
491\n\
492ISO-FINAL-CHAR (character) is the final character of the\n\
493corresponding ISO 2022 charset.\n\
494\n\
495ISO-GRAPHIC-PLANE (integer) is the graphic plane to be invoked\n\
496while encoding to variants of ISO 2022 coding system, one of the\n\
497following: 0/graphic-plane-left(GL), 1/graphic-plane-right(GR).\n\
498\n\
499SHORT-NAME (string) is the short name to refer to the charset.\n\
500\n\
501LONG-NAME (string) is the long name to refer to the charset.\n\
502\n\
503DESCRIPTION (string) is the description string of the charset.")
504 (charset_id, charset_symbol, info_vector)
505 Lisp_Object charset_id, charset_symbol, info_vector;
506{
507 Lisp_Object *vec;
508
509 if (!NILP (charset_id))
510 CHECK_NUMBER (charset_id, 0);
511 CHECK_SYMBOL (charset_symbol, 1);
512 CHECK_VECTOR (info_vector, 2);
513
514 if (! NILP (charset_id))
515 {
516 if (! CHARSET_VALID_P (XINT (charset_id)))
517 error ("Invalid CHARSET: %d", XINT (charset_id));
518 else if (CHARSET_DEFINED_P (XINT (charset_id)))
519 error ("Already defined charset: %d", XINT (charset_id));
520 }
521
522 vec = XVECTOR (info_vector)->contents;
523 if (XVECTOR (info_vector)->size != 9
524 || !INTEGERP (vec[0]) || !(XINT (vec[0]) == 1 || XINT (vec[0]) == 2)
525 || !INTEGERP (vec[1]) || !(XINT (vec[1]) == 94 || XINT (vec[1]) == 96)
526 || !INTEGERP (vec[2]) || !(XINT (vec[2]) == 1 || XINT (vec[2]) == 2)
527 || !INTEGERP (vec[3]) || !(XINT (vec[3]) == 0 || XINT (vec[3]) == 1)
528 || !INTEGERP (vec[4]) || !(XINT (vec[4]) >= '0' && XINT (vec[4]) <= '~')
529 || !INTEGERP (vec[5]) || !(XINT (vec[5]) == 0 || XINT (vec[5]) == 1)
530 || !STRINGP (vec[6])
531 || !STRINGP (vec[7])
532 || !STRINGP (vec[8]))
533 error ("Invalid info-vector argument for defining charset %s",
534 XSYMBOL (charset_symbol)->name->data);
535
536 if (NILP (charset_id))
537 {
538 charset_id = get_new_private_charset_id (XINT (vec[0]), XINT (vec[2]));
539 if (XINT (charset_id) == 0)
540 error ("There's no room for a new private charset %s",
541 XSYMBOL (charset_symbol)->name->data);
542 }
543
544 update_charset_table (charset_id, vec[0], vec[1], vec[2], vec[3],
545 vec[4], vec[5], vec[6], vec[7], vec[8]);
6dc0722d 546 Fput (charset_symbol, Qcharset, CHARSET_TABLE_ENTRY (XINT (charset_id)));
4ed46869
KH
547 CHARSET_SYMBOL (XINT (charset_id)) = charset_symbol;
548 Vcharset_list = Fcons (charset_symbol, Vcharset_list);
549 return Qnil;
550}
551
3fac5a51
KH
552DEFUN ("get-unused-iso-final-char", Fget_unused_iso_final_char,
553 Sget_unused_iso_final_char, 2, 2, 0,
554 "Return an unsed ISO's final char for a charset of DIMENISION and CHARS.\n\
555DIMENSION is the number of bytes to represent a character: 1 or 2.\n\
556CHARS is the number of characters in a dimension: 94 or 96.\n\
557\n\
558This final char is for private use, thus the range is `0' (48) .. `?' (63).\n\
559If there's no unused final char for the specified kind of charset,\n\
560return nil.")
561 (dimension, chars)
562 Lisp_Object dimension, chars;
563{
564 int final_char;
565
566 CHECK_NUMBER (dimension, 0);
567 CHECK_NUMBER (chars, 1);
568 if (XINT (dimension) != 1 && XINT (dimension) != 2)
569 error ("Invalid charset dimension %d, it should be 1 or 2",
570 XINT (dimension));
571 if (XINT (chars) != 94 && XINT (chars) != 96)
572 error ("Invalid charset chars %d, it should be 94 or 96",
573 XINT (chars));
574 for (final_char = '0'; final_char <= '?'; final_char++)
575 {
576 if (ISO_CHARSET_TABLE (dimension, chars, make_number (final_char)) < 0)
577 break;
578 }
579 return (final_char <= '?' ? make_number (final_char) : Qnil);
580}
581
4ed46869
KH
582DEFUN ("declare-equiv-charset", Fdeclare_equiv_charset, Sdeclare_equiv_charset,
583 4, 4, 0,
584 "Declare a charset of DIMENSION, CHARS, FINAL-CHAR is the same as CHARSET.\n\
585CHARSET should be defined by `defined-charset' in advance.")
586 (dimension, chars, final_char, charset_symbol)
587 Lisp_Object dimension, chars, final_char, charset_symbol;
588{
589 int charset;
590
591 CHECK_NUMBER (dimension, 0);
592 CHECK_NUMBER (chars, 1);
593 CHECK_NUMBER (final_char, 2);
594 CHECK_SYMBOL (charset_symbol, 3);
595
596 if (XINT (dimension) != 1 && XINT (dimension) != 2)
597 error ("Invalid DIMENSION %d, it should be 1 or 2", XINT (dimension));
598 if (XINT (chars) != 94 && XINT (chars) != 96)
599 error ("Invalid CHARS %d, it should be 94 or 96", XINT (chars));
600 if (XINT (final_char) < '0' || XFASTINT (final_char) > '~')
601 error ("Invalid FINAL-CHAR %c, it should be `0'..`~'", XINT (chars));
602 if ((charset = get_charset_id (charset_symbol)) < 0)
603 error ("Invalid charset %s", XSYMBOL (charset_symbol)->name->data);
604
605 ISO_CHARSET_TABLE (dimension, chars, final_char) = charset;
606 return Qnil;
607}
608
609/* Return number of different charsets in STR of length LEN. In
610 addition, for each found charset N, CHARSETS[N] is set 1. The
a29e3b1b 611 caller should allocate CHARSETS (MAX_CHARSET + 1 elements) in advance.
23d2a7f1 612 It may lookup a unification table TABLE if supplied. */
4ed46869
KH
613
614int
23d2a7f1 615find_charset_in_str (str, len, charsets, table)
028d516b
KH
616 unsigned char *str;
617 int len, *charsets;
23d2a7f1 618 Lisp_Object table;
4ed46869 619{
733eafd8 620 register int num = 0, c;
4ed46869 621
23d2a7f1
KH
622 if (! CHAR_TABLE_P (table))
623 table = Qnil;
624
4ed46869
KH
625 while (len > 0)
626 {
05505664 627 int bytes, charset;
733eafd8 628 c = *str;
23d2a7f1 629
733eafd8 630 if (c == LEADING_CODE_COMPOSITION)
05505664 631 {
733eafd8
KH
632 int cmpchar_id = str_cmpchar_id (str, len);
633 GLYPH *glyph;
05505664 634
733eafd8 635 if (cmpchar_id > 0)
05505664 636 {
733eafd8
KH
637 struct cmpchar_info *cmpcharp = cmpchar_table[cmpchar_id];
638 int i;
639
640 for (i = 0; i < cmpcharp->glyph_len; i++)
641 {
642 c = cmpcharp->glyph[i];
643 if (!NILP (table))
644 {
645 if ((c = unify_char (table, c, 0, 0, 0)) < 0)
646 c = cmpcharp->glyph[i];
647 }
648 if ((charset = CHAR_CHARSET (c)) < 0)
649 charset = CHARSET_ASCII;
650 if (!charsets[charset])
651 {
652 charsets[charset] = 1;
653 num += 1;
654 }
655 }
656 str += cmpcharp->len;
657 len -= cmpcharp->len;
658 continue;
05505664 659 }
05505664 660
733eafd8
KH
661 charset = CHARSET_ASCII;
662 bytes = 1;
663 }
23d2a7f1
KH
664 else
665 {
733eafd8
KH
666 c = STRING_CHAR_AND_LENGTH (str, len, bytes);
667 if (! NILP (table))
668 {
669 int c1 = unify_char (table, c, 0, 0, 0);
670 if (c1 >= 0)
671 c = c1;
672 }
673 charset = CHAR_CHARSET (c);
23d2a7f1 674 }
4ed46869
KH
675
676 if (!charsets[charset])
677 {
678 charsets[charset] = 1;
679 num += 1;
680 }
681 str += bytes;
682 len -= bytes;
683 }
684 return num;
685}
686
687DEFUN ("find-charset-region", Ffind_charset_region, Sfind_charset_region,
23d2a7f1 688 2, 3, 0,
4ed46869 689 "Return a list of charsets in the region between BEG and END.\n\
23d2a7f1
KH
690BEG and END are buffer positions.\n\
691Optional arg TABLE if non-nil is a unification table to look up.")
692 (beg, end, table)
693 Lisp_Object beg, end, table;
4ed46869 694{
028d516b 695 int charsets[MAX_CHARSET + 1];
6ae1f27e 696 int from, from_byte, to, stop, stop_byte, i;
4ed46869
KH
697 Lisp_Object val;
698
699 validate_region (&beg, &end);
700 from = XFASTINT (beg);
701 stop = to = XFASTINT (end);
6ae1f27e 702
4ed46869 703 if (from < GPT && GPT < to)
6ae1f27e
RS
704 {
705 stop = GPT;
706 stop_byte = GPT_BYTE;
707 }
708 else
709 stop_byte = CHAR_TO_BYTE (stop);
710
711 from_byte = CHAR_TO_BYTE (from);
712
028d516b 713 bzero (charsets, (MAX_CHARSET + 1) * sizeof (int));
4ed46869
KH
714 while (1)
715 {
6ae1f27e
RS
716 find_charset_in_str (BYTE_POS_ADDR (from_byte), stop_byte - from_byte,
717 charsets, table);
4ed46869 718 if (stop < to)
6ae1f27e
RS
719 {
720 from = stop, from_byte = stop_byte;
721 stop = to, stop_byte = CHAR_TO_BYTE (stop);
722 }
4ed46869
KH
723 else
724 break;
725 }
6ae1f27e 726
4ed46869 727 val = Qnil;
513ee442 728 for (i = MAX_CHARSET; i >= 0; i--)
4ed46869
KH
729 if (charsets[i])
730 val = Fcons (CHARSET_SYMBOL (i), val);
731 return val;
732}
733
734DEFUN ("find-charset-string", Ffind_charset_string, Sfind_charset_string,
23d2a7f1
KH
735 1, 2, 0,
736 "Return a list of charsets in STR.\n\
737Optional arg TABLE if non-nil is a unification table to look up.")
738 (str, table)
739 Lisp_Object str, table;
4ed46869 740{
a29e3b1b 741 int charsets[MAX_CHARSET + 1];
4ed46869
KH
742 int i;
743 Lisp_Object val;
744
745 CHECK_STRING (str, 0);
87b089ad
RS
746
747 if (! STRING_MULTIBYTE (str))
748 return Qnil;
749
a29e3b1b 750 bzero (charsets, (MAX_CHARSET + 1) * sizeof (int));
fc932ac6 751 find_charset_in_str (XSTRING (str)->data, STRING_BYTES (XSTRING (str)),
23d2a7f1 752 charsets, table);
4ed46869 753 val = Qnil;
513ee442 754 for (i = MAX_CHARSET; i >= 0; i--)
4ed46869
KH
755 if (charsets[i])
756 val = Fcons (CHARSET_SYMBOL (i), val);
757 return val;
758}
759\f
760DEFUN ("make-char-internal", Fmake_char_internal, Smake_char_internal, 1, 3, 0,
513ee442 761 "")
4ed46869
KH
762 (charset, code1, code2)
763 Lisp_Object charset, code1, code2;
764{
765 CHECK_NUMBER (charset, 0);
766
767 if (NILP (code1))
768 XSETFASTINT (code1, 0);
769 else
770 CHECK_NUMBER (code1, 1);
771 if (NILP (code2))
772 XSETFASTINT (code2, 0);
773 else
774 CHECK_NUMBER (code2, 2);
775
776 if (!CHARSET_DEFINED_P (XINT (charset)))
777 error ("Invalid charset: %d", XINT (charset));
778
779 return make_number (MAKE_CHAR (XINT (charset), XINT (code1), XINT (code2)));
780}
781
782DEFUN ("split-char", Fsplit_char, Ssplit_char, 1, 1, 0,
783 "Return list of charset and one or two position-codes of CHAR.")
784 (ch)
785 Lisp_Object ch;
786{
787 Lisp_Object val;
6dc0722d 788 int charset, c1, c2;
4ed46869
KH
789
790 CHECK_NUMBER (ch, 0);
791 SPLIT_CHAR (XFASTINT (ch), charset, c1, c2);
6dc0722d 792 return (c2 >= 0
4ed46869
KH
793 ? Fcons (CHARSET_SYMBOL (charset),
794 Fcons (make_number (c1), Fcons (make_number (c2), Qnil)))
795 : Fcons (CHARSET_SYMBOL (charset), Fcons (make_number (c1), Qnil)));
796}
797
798DEFUN ("char-charset", Fchar_charset, Schar_charset, 1, 1, 0,
799 "Return charset of CHAR.")
800 (ch)
801 Lisp_Object ch;
802{
803 CHECK_NUMBER (ch, 0);
804
805 return CHARSET_SYMBOL (CHAR_CHARSET (XINT (ch)));
806}
807
808DEFUN ("iso-charset", Fiso_charset, Siso_charset, 3, 3, 0,
2b71bb78
KH
809 "Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR.\n\
810\n\
811ISO 2022's designation sequence (escape sequence) distinguishes charsets\n\
812by their DIMENSION, CHARS, and FINAL-CHAR,\n\
813where as Emacs distinguishes them by charset symbol.\n\
814See the documentation of the function `charset-info' for the meanings of\n\
815DIMENSION, CHARS, and FINAL-CHAR.")
4ed46869
KH
816 (dimension, chars, final_char)
817 Lisp_Object dimension, chars, final_char;
818{
819 int charset;
820
821 CHECK_NUMBER (dimension, 0);
822 CHECK_NUMBER (chars, 1);
823 CHECK_NUMBER (final_char, 2);
824
825 if ((charset = ISO_CHARSET_TABLE (dimension, chars, final_char)) < 0)
826 return Qnil;
827 return CHARSET_SYMBOL (charset);
828}
829
9d3d8cba
KH
830/* If GENERICP is nonzero, return nonzero iff C is a valid normal or
831 generic character. If GENERICP is zero, return nonzero iff C is a
832 valid normal character. Do not call this function directly,
833 instead use macro CHAR_VALID_P. */
834int
835char_valid_p (c, genericp)
836 int c, genericp;
837{
838 int charset, c1, c2;
839
840 if (c < 0)
841 return 0;
842 if (SINGLE_BYTE_CHAR_P (c))
843 return 1;
844 SPLIT_NON_ASCII_CHAR (c, charset, c1, c2);
845 if (!CHARSET_VALID_P (charset))
846 return 0;
847 return (c < MIN_CHAR_COMPOSITION
848 ? ((c & CHAR_FIELD1_MASK) /* i.e. dimension of C is two. */
849 ? (genericp && c1 == 0 && c2 == 0
850 || c1 >= 32 && c2 >= 32)
851 : (genericp && c1 == 0
852 || c1 >= 32))
853 : c < MIN_CHAR_COMPOSITION + n_cmpchars);
854}
855
856DEFUN ("char-valid-p", Fchar_valid_p, Schar_valid_p, 1, 2, 0,
a9d02884
DL
857 "Return t if OBJECT is a valid normal character.\n\
858If optional arg GENERICP is non-nil, also return t if OBJECT is\n\
9d3d8cba
KH
859a valid generic character.")
860 (object, genericp)
861 Lisp_Object object, genericp;
862{
863 if (! NATNUMP (object))
864 return Qnil;
865 return (CHAR_VALID_P (XFASTINT (object), !NILP (genericp)) ? Qt : Qnil);
866}
867
d2665018
KH
868DEFUN ("unibyte-char-to-multibyte", Funibyte_char_to_multibyte,
869 Sunibyte_char_to_multibyte, 1, 1, 0,
870 "Convert the unibyte character CH to multibyte character.\n\
871The conversion is done based on nonascii-translate-table (which see)\n\
872 or nonascii-insert-offset (which see).")
873 (ch)
874 Lisp_Object ch;
875{
876 int c;
877
878 CHECK_NUMBER (ch, 0);
879 c = XINT (ch);
880 if (c < 0 || c >= 0400)
881 error ("Invalid unibyte character: %d", c);
882 c = unibyte_char_to_multibyte (c);
883 if (c < 0)
884 error ("Can't convert to multibyte character: %d", XINT (ch));
885 return make_number (c);
886}
887
4ed46869
KH
888DEFUN ("char-bytes", Fchar_bytes, Schar_bytes, 1, 1, 0,
889 "Return byte length of multi-byte form of CHAR.")
890 (ch)
891 Lisp_Object ch;
892{
893 Lisp_Object val;
894 int bytes;
895
896 CHECK_NUMBER (ch, 0);
897 if (COMPOSITE_CHAR_P (XFASTINT (ch)))
898 {
899 unsigned int id = COMPOSITE_CHAR_ID (XFASTINT (ch));
900
901 bytes = (id < n_cmpchars ? cmpchar_table[id]->len : 1);
902 }
903 else
904 {
905 int charset = CHAR_CHARSET (XFASTINT (ch));
906
907 bytes = CHARSET_DEFINED_P (charset) ? CHARSET_BYTES (charset) : 1;
908 }
909
910 XSETFASTINT (val, bytes);
911 return val;
912}
913
914/* Return the width of character of which multi-byte form starts with
915 C. The width is measured by how many columns occupied on the
916 screen when displayed in the current buffer. */
917
918#define ONE_BYTE_CHAR_WIDTH(c) \
919 (c < 0x20 \
920 ? (c == '\t' \
53316e55 921 ? XFASTINT (current_buffer->tab_width) \
4ed46869
KH
922 : (c == '\n' ? 0 : (NILP (current_buffer->ctl_arrow) ? 4 : 2))) \
923 : (c < 0x7f \
924 ? 1 \
925 : (c == 0x7F \
926 ? (NILP (current_buffer->ctl_arrow) ? 4 : 2) \
927 : ((! NILP (current_buffer->enable_multibyte_characters) \
928 && BASE_LEADING_CODE_P (c)) \
929 ? WIDTH_BY_CHAR_HEAD (c) \
930 : 4)))) \
931
932
933DEFUN ("char-width", Fchar_width, Schar_width, 1, 1, 0,
934 "Return width of CHAR when displayed in the current buffer.\n\
935The width is measured by how many columns it occupies on the screen.")
936 (ch)
937 Lisp_Object ch;
938{
859f2b3c 939 Lisp_Object val, disp;
4ed46869 940 int c;
51c4025f 941 struct Lisp_Char_Table *dp = buffer_display_table ();
4ed46869
KH
942
943 CHECK_NUMBER (ch, 0);
944
859f2b3c
RS
945 c = XINT (ch);
946
947 /* Get the way the display table would display it. */
51c4025f 948 disp = dp ? DISP_CHAR_VECTOR (dp, c) : Qnil;
859f2b3c
RS
949
950 if (VECTORP (disp))
951 XSETINT (val, XVECTOR (disp)->size);
952 else if (SINGLE_BYTE_CHAR_P (c))
953 XSETINT (val, ONE_BYTE_CHAR_WIDTH (c));
4ed46869
KH
954 else if (COMPOSITE_CHAR_P (c))
955 {
956 int id = COMPOSITE_CHAR_ID (XFASTINT (ch));
957 XSETFASTINT (val, (id < n_cmpchars ? cmpchar_table[id]->width : 0));
958 }
959 else
960 {
961 int charset = CHAR_CHARSET (c);
962
963 XSETFASTINT (val, CHARSET_WIDTH (charset));
964 }
965 return val;
966}
967
968/* Return width of string STR of length LEN when displayed in the
969 current buffer. The width is measured by how many columns it
970 occupies on the screen. */
859f2b3c 971
4ed46869
KH
972int
973strwidth (str, len)
974 unsigned char *str;
975 int len;
976{
977 unsigned char *endp = str + len;
978 int width = 0;
c4a4e28f 979 struct Lisp_Char_Table *dp = buffer_display_table ();
4ed46869 980
859f2b3c
RS
981 while (str < endp)
982 {
983 if (*str == LEADING_CODE_COMPOSITION)
984 {
985 int id = str_cmpchar_id (str, endp - str);
986
987 if (id < 0)
988 {
989 width += 4;
990 str++;
991 }
992 else
993 {
994 width += cmpchar_table[id]->width;
995 str += cmpchar_table[id]->len;
996 }
997 }
998 else
999 {
1000 Lisp_Object disp;
e515b0a9
KH
1001 int thislen;
1002 int c = STRING_CHAR_AND_LENGTH (str, endp - str, thislen);
859f2b3c
RS
1003
1004 /* Get the way the display table would display it. */
acc35c36
RS
1005 if (dp)
1006 disp = DISP_CHAR_VECTOR (dp, c);
1007 else
1008 disp = Qnil;
859f2b3c
RS
1009
1010 if (VECTORP (disp))
e515b0a9 1011 width += XVECTOR (disp)->size;
859f2b3c 1012 else
e515b0a9 1013 width += ONE_BYTE_CHAR_WIDTH (*str);
859f2b3c 1014
e515b0a9 1015 str += thislen;
859f2b3c
RS
1016 }
1017 }
4ed46869
KH
1018 return width;
1019}
1020
1021DEFUN ("string-width", Fstring_width, Sstring_width, 1, 1, 0,
1022 "Return width of STRING when displayed in the current buffer.\n\
1023Width is measured by how many columns it occupies on the screen.\n\
046b1f03
RS
1024When calculating width of a multibyte character in STRING,\n\
1025only the base leading-code is considered; the validity of\n\
1026the following bytes is not checked.")
4ed46869
KH
1027 (str)
1028 Lisp_Object str;
1029{
1030 Lisp_Object val;
1031
1032 CHECK_STRING (str, 0);
fc932ac6
RS
1033 XSETFASTINT (val, strwidth (XSTRING (str)->data,
1034 STRING_BYTES (XSTRING (str))));
4ed46869
KH
1035 return val;
1036}
1037
1038DEFUN ("char-direction", Fchar_direction, Schar_direction, 1, 1, 0,
1039 "Return the direction of CHAR.\n\
1040The returned value is 0 for left-to-right and 1 for right-to-left.")
1041 (ch)
1042 Lisp_Object ch;
1043{
1044 int charset;
1045
1046 CHECK_NUMBER (ch, 0);
1047 charset = CHAR_CHARSET (XFASTINT (ch));
1048 if (!CHARSET_DEFINED_P (charset))
93bcb785 1049 invalid_character (XINT (ch));
4ed46869
KH
1050 return CHARSET_TABLE_INFO (charset, CHARSET_DIRECTION_IDX);
1051}
1052
af4fecb4 1053DEFUN ("chars-in-region", Fchars_in_region, Schars_in_region, 2, 2, 0,
6ae1f27e 1054 "Return number of characters between BEG and END.")
046b1f03
RS
1055 (beg, end)
1056 Lisp_Object beg, end;
1057{
6ae1f27e 1058 int from, to;
046b1f03
RS
1059
1060 from = min (XFASTINT (beg), XFASTINT (end));
a8a35e61 1061 to = max (XFASTINT (beg), XFASTINT (end));
046b1f03 1062
6ae1f27e
RS
1063 return to - from;
1064}
9036eb45 1065
87b089ad
RS
1066/* Return the number of characters in the NBYTES bytes at PTR.
1067 This works by looking at the contents and checking for multibyte sequences.
1068 However, if the current buffer has enable-multibyte-characters = nil,
1069 we treat each byte as a character. */
1070
6ae1f27e
RS
1071int
1072chars_in_text (ptr, nbytes)
1073 unsigned char *ptr;
1074 int nbytes;
1075{
93bcb785 1076 unsigned char *endp, c;
6ae1f27e 1077 int chars;
046b1f03 1078
87b089ad
RS
1079 /* current_buffer is null at early stages of Emacs initialization. */
1080 if (current_buffer == 0
1081 || NILP (current_buffer->enable_multibyte_characters))
6ae1f27e 1082 return nbytes;
a8a35e61 1083
6ae1f27e
RS
1084 endp = ptr + nbytes;
1085 chars = 0;
046b1f03 1086
6ae1f27e
RS
1087 while (ptr < endp)
1088 {
93bcb785
KH
1089 c = *ptr++;
1090
1091 if (BASE_LEADING_CODE_P (c))
1092 while (ptr < endp && ! CHAR_HEAD_P (*ptr)) ptr++;
046b1f03
RS
1093 chars++;
1094 }
1095
6ae1f27e 1096 return chars;
046b1f03
RS
1097}
1098
87b089ad
RS
1099/* Return the number of characters in the NBYTES bytes at PTR.
1100 This works by looking at the contents and checking for multibyte sequences.
1101 It ignores enable-multibyte-characters. */
1102
1103int
1104multibyte_chars_in_text (ptr, nbytes)
1105 unsigned char *ptr;
1106 int nbytes;
1107{
93bcb785 1108 unsigned char *endp, c;
87b089ad
RS
1109 int chars;
1110
1111 endp = ptr + nbytes;
1112 chars = 0;
1113
1114 while (ptr < endp)
1115 {
93bcb785
KH
1116 c = *ptr++;
1117
1118 if (BASE_LEADING_CODE_P (c))
1119 while (ptr < endp && ! CHAR_HEAD_P (*ptr)) ptr++;
87b089ad
RS
1120 chars++;
1121 }
1122
1123 return chars;
1124}
1125
1126DEFUN ("string", Fstring, Sstring, 1, MANY, 0,
4ed46869 1127 "Concatenate all the argument characters and make the result a string.")
53316e55
KH
1128 (n, args)
1129 int n;
4ed46869
KH
1130 Lisp_Object *args;
1131{
53316e55 1132 int i;
4ed46869 1133 unsigned char *buf
bd4c6dd0 1134 = (unsigned char *) alloca (MAX_LENGTH_OF_MULTI_BYTE_FORM * n);
4ed46869
KH
1135 unsigned char *p = buf;
1136 Lisp_Object val;
1137
1138 for (i = 0; i < n; i++)
1139 {
1140 int c, len;
1141 unsigned char *str;
1142
1143 if (!INTEGERP (args[i]))
b0e3cf2b 1144 CHECK_NUMBER (args[i], 0);
4ed46869
KH
1145 c = XINT (args[i]);
1146 len = CHAR_STRING (c, p, str);
1147 if (p != str)
1148 /* C is a composite character. */
1149 bcopy (str, p, len);
1150 p += len;
1151 }
1152
27802600 1153 val = make_string_from_bytes (buf, n, p - buf);
4ed46869
KH
1154 return val;
1155}
1156
1157#endif /* emacs */
1158\f
1159/*** Composite characters staffs ***/
1160
1161/* Each composite character is identified by CMPCHAR-ID which is
1162 assigned when Emacs needs the character code of the composite
1163 character (e.g. when displaying it on the screen). See the
1164 document "GENERAL NOTE on COMPOSITE CHARACTER" in `charset.h' how a
1165 composite character is represented in Emacs. */
1166
1167/* If `static' is defined, it means that it is defined to null string. */
1168#ifndef static
1169/* The following function is copied from lread.c. */
1170static int
1171hash_string (ptr, len)
1172 unsigned char *ptr;
1173 int len;
1174{
1175 register unsigned char *p = ptr;
1176 register unsigned char *end = p + len;
1177 register unsigned char c;
1178 register int hash = 0;
1179
1180 while (p != end)
1181 {
1182 c = *p++;
1183 if (c >= 0140) c -= 40;
1184 hash = ((hash<<3) + (hash>>28) + c);
1185 }
1186 return hash & 07777777777;
1187}
1188#endif
1189
4ed46869
KH
1190#define CMPCHAR_HASH_TABLE_SIZE 0xFFF
1191
1192static int *cmpchar_hash_table[CMPCHAR_HASH_TABLE_SIZE];
1193
1194/* Each element of `cmpchar_hash_table' is a pointer to an array of
1195 integer, where the 1st element is the size of the array, the 2nd
1196 element is how many elements are actually used in the array, and
1197 the remaining elements are CMPCHAR-IDs of composite characters of
1198 the same hash value. */
1199#define CMPCHAR_HASH_SIZE(table) table[0]
1200#define CMPCHAR_HASH_USED(table) table[1]
1201#define CMPCHAR_HASH_CMPCHAR_ID(table, i) table[i]
1202
1203/* Return CMPCHAR-ID of the composite character in STR of the length
1204 LEN. If the composite character has not yet been registered,
1205 register it in `cmpchar_table' and assign new CMPCHAR-ID. This
1206 is the sole function for assigning CMPCHAR-ID. */
1207int
1208str_cmpchar_id (str, len)
8867de67 1209 const unsigned char *str;
4ed46869
KH
1210 int len;
1211{
1212 int hash_idx, *hashp;
1213 unsigned char *buf;
1214 int embedded_rule; /* 1 if composition rule is embedded. */
1215 int chars; /* number of components. */
1216 int i;
1217 struct cmpchar_info *cmpcharp;
1218
4ed46869
KH
1219 /* The second byte 0xFF means compostion rule is embedded. */
1220 embedded_rule = (str[1] == 0xFF);
1221
1222 /* At first, get the actual length of the composite character. */
1223 {
8867de67 1224 const unsigned char *p, *endp = str + 1, *lastp = str + len;
4ed46869
KH
1225 int bytes;
1226
6ae1f27e 1227 while (endp < lastp && ! CHAR_HEAD_P (*endp)) endp++;
93bcb785
KH
1228 if (endp - str < 5)
1229 /* Any composite char have at least 5-byte length. */
1230 return -1;
1231
4ed46869 1232 chars = 0;
93bcb785 1233 p = str + 1;
4ed46869
KH
1234 while (p < endp)
1235 {
93bcb785 1236 if (embedded_rule) p++;
4ed46869 1237 /* No need of checking if *P is 0xA0 because
93bcb785
KH
1238 BYTES_BY_CHAR_HEAD (0x80) surely returns 2. */
1239 p += BYTES_BY_CHAR_HEAD (*p - 0x20);
4ed46869
KH
1240 chars++;
1241 }
93bcb785
KH
1242 if (p > endp || chars < 2 || chars > MAX_COMPONENT_COUNT)
1243 /* Invalid components. */
4ed46869 1244 return -1;
93bcb785 1245 len = p - str;
4ed46869
KH
1246 }
1247 hash_idx = hash_string (str, len) % CMPCHAR_HASH_TABLE_SIZE;
1248 hashp = cmpchar_hash_table[hash_idx];
1249
1250 /* Then, look into the hash table. */
1251 if (hashp != NULL)
1252 /* Find the correct one among composite characters of the same
1253 hash value. */
1254 for (i = 2; i < CMPCHAR_HASH_USED (hashp); i++)
1255 {
1256 cmpcharp = cmpchar_table[CMPCHAR_HASH_CMPCHAR_ID (hashp, i)];
1257 if (len == cmpcharp->len
1258 && ! bcmp (str, cmpcharp->data, len))
1259 return CMPCHAR_HASH_CMPCHAR_ID (hashp, i);
1260 }
1261
1262 /* We have to register the composite character in cmpchar_table. */
513ee442
KH
1263 if (n_cmpchars > (CHAR_FIELD2_MASK | CHAR_FIELD3_MASK))
1264 /* No, we have no more room for a new composite character. */
1265 return -1;
1266
4ed46869
KH
1267 /* Make the entry in hash table. */
1268 if (hashp == NULL)
1269 {
1270 /* Make a table for 8 composite characters initially. */
1271 hashp = (cmpchar_hash_table[hash_idx]
1272 = (int *) xmalloc (sizeof (int) * (2 + 8)));
1273 CMPCHAR_HASH_SIZE (hashp) = 10;
1274 CMPCHAR_HASH_USED (hashp) = 2;
1275 }
1276 else if (CMPCHAR_HASH_USED (hashp) >= CMPCHAR_HASH_SIZE (hashp))
1277 {
1278 CMPCHAR_HASH_SIZE (hashp) += 8;
1279 hashp = (cmpchar_hash_table[hash_idx]
1280 = (int *) xrealloc (hashp,
1281 sizeof (int) * CMPCHAR_HASH_SIZE (hashp)));
1282 }
1283 CMPCHAR_HASH_CMPCHAR_ID (hashp, CMPCHAR_HASH_USED (hashp)) = n_cmpchars;
1284 CMPCHAR_HASH_USED (hashp)++;
1285
1286 /* Set information of the composite character in cmpchar_table. */
1287 if (cmpchar_table_size == 0)
1288 {
1289 /* This is the first composite character to be registered. */
1290 cmpchar_table_size = 256;
1291 cmpchar_table
1292 = (struct cmpchar_info **) xmalloc (sizeof (cmpchar_table[0])
1293 * cmpchar_table_size);
1294 }
1295 else if (cmpchar_table_size <= n_cmpchars)
1296 {
1297 cmpchar_table_size += 256;
1298 cmpchar_table
1299 = (struct cmpchar_info **) xrealloc (cmpchar_table,
1300 sizeof (cmpchar_table[0])
1301 * cmpchar_table_size);
1302 }
1303
1304 cmpcharp = (struct cmpchar_info *) xmalloc (sizeof (struct cmpchar_info));
1305
1306 cmpcharp->len = len;
1307 cmpcharp->data = (unsigned char *) xmalloc (len + 1);
1308 bcopy (str, cmpcharp->data, len);
1309 cmpcharp->data[len] = 0;
1310 cmpcharp->glyph_len = chars;
1311 cmpcharp->glyph = (GLYPH *) xmalloc (sizeof (GLYPH) * chars);
1312 if (embedded_rule)
1313 {
1314 cmpcharp->cmp_rule = (unsigned char *) xmalloc (chars);
1315 cmpcharp->col_offset = (float *) xmalloc (sizeof (float) * chars);
1316 }
1317 else
1318 {
1319 cmpcharp->cmp_rule = NULL;
1320 cmpcharp->col_offset = NULL;
1321 }
1322
1323 /* Setup GLYPH data and composition rules (if any) so as not to make
1324 them every time on displaying. */
1325 {
1326 unsigned char *bufp;
1327 int width;
1328 float leftmost = 0.0, rightmost = 1.0;
1329
1330 if (embedded_rule)
1331 /* At first, col_offset[N] is set to relative to col_offset[0]. */
1332 cmpcharp->col_offset[0] = 0;
1333
1334 for (i = 0, bufp = cmpcharp->data + 1; i < chars; i++)
1335 {
1336 if (embedded_rule)
1337 cmpcharp->cmp_rule[i] = *bufp++;
1338
1339 if (*bufp == 0xA0) /* This is an ASCII character. */
1340 {
1341 cmpcharp->glyph[i] = FAST_MAKE_GLYPH ((*++bufp & 0x7F), 0);
1342 width = 1;
1343 bufp++;
1344 }
1345 else /* Multibyte character. */
1346 {
1347 /* Make `bufp' point normal multi-byte form temporally. */
1348 *bufp -= 0x20;
1349 cmpcharp->glyph[i]
1350 = FAST_MAKE_GLYPH (string_to_non_ascii_char (bufp, 4, 0), 0);
1351 width = WIDTH_BY_CHAR_HEAD (*bufp);
1352 *bufp += 0x20;
1353 bufp += BYTES_BY_CHAR_HEAD (*bufp - 0x20);
1354 }
1355
1356 if (embedded_rule && i > 0)
1357 {
1358 /* Reference points (global_ref and new_ref) are
1359 encoded as below:
1360
1361 0--1--2 -- ascent
1362 | |
1363 | |
1364 | 4 -+--- center
1365 -- 3 5 -- baseline
1366 | |
1367 6--7--8 -- descent
1368
1369 Now, we calculate the column offset of the new glyph
1370 from the left edge of the first glyph. This can avoid
1371 the same calculation everytime displaying this
1372 composite character. */
1373
1374 /* Reference points of global glyph and new glyph. */
1375 int global_ref = (cmpcharp->cmp_rule[i] - 0xA0) / 9;
1376 int new_ref = (cmpcharp->cmp_rule[i] - 0xA0) % 9;
1377 /* Column offset relative to the first glyph. */
1378 float left = (leftmost
1379 + (global_ref % 3) * (rightmost - leftmost) / 2.0
1380 - (new_ref % 3) * width / 2.0);
1381
1382 cmpcharp->col_offset[i] = left;
1383 if (left < leftmost)
1384 leftmost = left;
1385 if (left + width > rightmost)
1386 rightmost = left + width;
1387 }
1388 else
1389 {
1390 if (width > rightmost)
1391 rightmost = width;
1392 }
1393 }
1394 if (embedded_rule)
1395 {
1396 /* Now col_offset[N] are relative to the left edge of the
1397 first component. Make them relative to the left edge of
1398 overall glyph. */
1399 for (i = 0; i < chars; i++)
1400 cmpcharp->col_offset[i] -= leftmost;
1401 /* Make rightmost holds width of overall glyph. */
1402 rightmost -= leftmost;
1403 }
1404
1405 cmpcharp->width = rightmost;
1406 if (cmpcharp->width < rightmost)
1407 /* To get a ceiling integer value. */
1408 cmpcharp->width++;
1409 }
1410
1411 cmpchar_table[n_cmpchars] = cmpcharp;
1412
1413 return n_cmpchars++;
1414}
1415
1416/* Return the Nth element of the composite character C. */
1417int
1418cmpchar_component (c, n)
1419 unsigned int c, n;
1420{
1421 int id = COMPOSITE_CHAR_ID (c);
1422
1423 if (id >= n_cmpchars /* C is not a valid composite character. */
1424 || n >= cmpchar_table[id]->glyph_len) /* No such component. */
1425 return -1;
1426 /* No face data is stored in glyph code. */
1427 return ((int) (cmpchar_table[id]->glyph[n]));
1428}
1429
1430DEFUN ("cmpcharp", Fcmpcharp, Scmpcharp, 1, 1, 0,
1431 "T if CHAR is a composite character.")
1432 (ch)
1433 Lisp_Object ch;
1434{
1435 CHECK_NUMBER (ch, 0);
1436 return (COMPOSITE_CHAR_P (XINT (ch)) ? Qt : Qnil);
1437}
1438
1439DEFUN ("composite-char-component", Fcmpchar_component, Scmpchar_component,
1440 2, 2, 0,
1441 "Return the IDXth component character of composite character CHARACTER.")
1442 (character, idx)
1443 Lisp_Object character, idx;
1444{
1445 int c;
1446
1447 CHECK_NUMBER (character, 0);
1448 CHECK_NUMBER (idx, 1);
1449
1450 if ((c = cmpchar_component (XINT (character), XINT (idx))) < 0)
1451 args_out_of_range (character, idx);
1452
1453 return make_number (c);
1454}
1455
1456DEFUN ("composite-char-composition-rule", Fcmpchar_cmp_rule, Scmpchar_cmp_rule,
1457 2, 2, 0,
55001746
KH
1458 "Return the Nth composition rule embedded in composite character CHARACTER.\n\
1459The returned rule is for composing the Nth component\n\
1460on the (N-1)th component. If N is 0, the returned value is always 255.")
1461 (character, n)
1462 Lisp_Object character, n;
4ed46869
KH
1463{
1464 int id, i;
1465
1466 CHECK_NUMBER (character, 0);
55001746 1467 CHECK_NUMBER (n, 1);
4ed46869
KH
1468
1469 id = COMPOSITE_CHAR_ID (XINT (character));
1470 if (id < 0 || id >= n_cmpchars)
1471 error ("Invalid composite character: %d", XINT (character));
55001746 1472 i = XINT (n);
4ed46869 1473 if (i > cmpchar_table[id]->glyph_len)
55001746 1474 args_out_of_range (character, n);
4ed46869
KH
1475
1476 return make_number (cmpchar_table[id]->cmp_rule[i]);
1477}
1478
1479DEFUN ("composite-char-composition-rule-p", Fcmpchar_cmp_rule_p,
1480 Scmpchar_cmp_rule_p, 1, 1, 0,
1481 "Return non-nil if composite character CHARACTER contains a embedded rule.")
1482 (character)
1483 Lisp_Object character;
1484{
1485 int id;
1486
1487 CHECK_NUMBER (character, 0);
1488 id = COMPOSITE_CHAR_ID (XINT (character));
1489 if (id < 0 || id >= n_cmpchars)
1490 error ("Invalid composite character: %d", XINT (character));
1491
1492 return (cmpchar_table[id]->cmp_rule ? Qt : Qnil);
1493}
1494
1495DEFUN ("composite-char-component-count", Fcmpchar_cmp_count,
1496 Scmpchar_cmp_count, 1, 1, 0,
1497 "Return number of compoents of composite character CHARACTER.")
1498 (character)
1499 Lisp_Object character;
1500{
1501 int id;
1502
1503 CHECK_NUMBER (character, 0);
1504 id = COMPOSITE_CHAR_ID (XINT (character));
1505 if (id < 0 || id >= n_cmpchars)
1506 error ("Invalid composite character: %d", XINT (character));
1507
1508 return (make_number (cmpchar_table[id]->glyph_len));
1509}
1510
1511DEFUN ("compose-string", Fcompose_string, Scompose_string,
1512 1, 1, 0,
1513 "Return one char string composed from all characters in STRING.")
1514 (str)
1515 Lisp_Object str;
1516{
1517 unsigned char buf[MAX_LENGTH_OF_MULTI_BYTE_FORM], *p, *pend, *ptemp;
1518 int len, i;
1519
1520 CHECK_STRING (str, 0);
1521
1522 buf[0] = LEADING_CODE_COMPOSITION;
1523 p = XSTRING (str)->data;
fc932ac6 1524 pend = p + STRING_BYTES (XSTRING (str));
4ed46869
KH
1525 i = 1;
1526 while (p < pend)
1527 {
1528 if (*p < 0x20 || *p == 127) /* control code */
1529 error ("Invalid component character: %d", *p);
1530 else if (*p < 0x80) /* ASCII */
1531 {
1532 if (i + 2 >= MAX_LENGTH_OF_MULTI_BYTE_FORM)
1533 error ("Too long string to be composed: %s", XSTRING (str)->data);
1534 /* Prepend an ASCII charset indicator 0xA0, set MSB of the
1535 code itself. */
1536 buf[i++] = 0xA0;
1537 buf[i++] = *p++ + 0x80;
1538 }
1539 else if (*p == LEADING_CODE_COMPOSITION) /* composite char */
1540 {
1541 /* Already composed. Eliminate the heading
1542 LEADING_CODE_COMPOSITION, keep the remaining bytes
1543 unchanged. */
1544 p++;
1545 ptemp = p;
6ae1f27e 1546 while (! CHAR_HEAD_P (*p)) p++;
4ed46869
KH
1547 if (i + (p - ptemp) >= MAX_LENGTH_OF_MULTI_BYTE_FORM)
1548 error ("Too long string to be composed: %s", XSTRING (str)->data);
1549 bcopy (ptemp, buf + i, p - ptemp);
1550 i += p - ptemp;
1551 }
1552 else /* multibyte char */
1553 {
1554 /* Add 0x20 to the base leading-code, keep the remaining
1555 bytes unchanged. */
1556 len = BYTES_BY_CHAR_HEAD (*p);
1557 if (i + len >= MAX_LENGTH_OF_MULTI_BYTE_FORM)
1558 error ("Too long string to be composed: %s", XSTRING (str)->data);
1559 bcopy (p, buf + i, len);
1560 buf[i] += 0x20;
1561 p += len, i += len;
1562 }
1563 }
1564
1565 if (i < 5)
1566 /* STR contains only one character, which can't be composed. */
1567 error ("Too short string to be composed: %s", XSTRING (str)->data);
1568
27802600 1569 return make_string_from_bytes (buf, 1, i);
4ed46869
KH
1570}
1571
1572\f
1573charset_id_internal (charset_name)
1574 char *charset_name;
1575{
1576 Lisp_Object val = Fget (intern (charset_name), Qcharset);
1577
1578 if (!VECTORP (val))
1579 error ("Charset %s is not defined", charset_name);
1580
1581 return (XINT (XVECTOR (val)->contents[0]));
1582}
1583
1584DEFUN ("setup-special-charsets", Fsetup_special_charsets,
1585 Ssetup_special_charsets, 0, 0, 0, "Internal use only.")
1586 ()
1587{
1588 charset_latin_iso8859_1 = charset_id_internal ("latin-iso8859-1");
1589 charset_jisx0208_1978 = charset_id_internal ("japanese-jisx0208-1978");
1590 charset_jisx0208 = charset_id_internal ("japanese-jisx0208");
1591 charset_katakana_jisx0201 = charset_id_internal ("katakana-jisx0201");
1592 charset_latin_jisx0201 = charset_id_internal ("latin-jisx0201");
1593 charset_big5_1 = charset_id_internal ("chinese-big5-1");
1594 charset_big5_2 = charset_id_internal ("chinese-big5-2");
1595 return Qnil;
1596}
1597
1598init_charset_once ()
1599{
1600 int i, j, k;
1601
1602 staticpro (&Vcharset_table);
1603 staticpro (&Vcharset_symbol_table);
1604
1605 /* This has to be done here, before we call Fmake_char_table. */
1606 Qcharset_table = intern ("charset-table");
1607 staticpro (&Qcharset_table);
1608
1609 /* Intern this now in case it isn't already done.
1610 Setting this variable twice is harmless.
1611 But don't staticpro it here--that is done in alloc.c. */
1612 Qchar_table_extra_slots = intern ("char-table-extra-slots");
1613
1614 /* Now we are ready to set up this property, so we can
1615 create the charset table. */
1616 Fput (Qcharset_table, Qchar_table_extra_slots, make_number (0));
1617 Vcharset_table = Fmake_char_table (Qcharset_table, Qnil);
1618
513ee442 1619 Vcharset_symbol_table = Fmake_vector (make_number (MAX_CHARSET + 1), Qnil);
4ed46869
KH
1620
1621 /* Setup tables. */
1622 for (i = 0; i < 2; i++)
1623 for (j = 0; j < 2; j++)
1624 for (k = 0; k < 128; k++)
1625 iso_charset_table [i][j][k] = -1;
1626
1627 bzero (cmpchar_hash_table, sizeof cmpchar_hash_table);
1628 cmpchar_table_size = n_cmpchars = 0;
1629
1630 for (i = 0; i < 256; i++)
1631 BYTES_BY_CHAR_HEAD (i) = 1;
1632 BYTES_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_11) = 3;
1633 BYTES_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_12) = 3;
1634 BYTES_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_21) = 4;
1635 BYTES_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_22) = 4;
1636 /* The following doesn't reflect the actual bytes, but just to tell
1637 that it is a start of a multibyte character. */
1638 BYTES_BY_CHAR_HEAD (LEADING_CODE_COMPOSITION) = 2;
1639
1640 for (i = 0; i < 128; i++)
1641 WIDTH_BY_CHAR_HEAD (i) = 1;
1642 for (; i < 256; i++)
1643 WIDTH_BY_CHAR_HEAD (i) = 4;
1644 WIDTH_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_11) = 1;
1645 WIDTH_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_12) = 2;
1646 WIDTH_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_21) = 1;
1647 WIDTH_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_22) = 2;
1648}
1649
1650#ifdef emacs
1651
1652syms_of_charset ()
1653{
1654 Qascii = intern ("ascii");
1655 staticpro (&Qascii);
1656
1657 Qcharset = intern ("charset");
1658 staticpro (&Qcharset);
1659
1660 /* Define ASCII charset now. */
1661 update_charset_table (make_number (CHARSET_ASCII),
1662 make_number (1), make_number (94),
1663 make_number (1),
1664 make_number (0),
1665 make_number ('B'),
1666 make_number (0),
1667 build_string ("ASCII"),
1668 build_string ("ASCII"),
1669 build_string ("ASCII (ISO646 IRV)"));
1670 CHARSET_SYMBOL (CHARSET_ASCII) = Qascii;
1671 Fput (Qascii, Qcharset, CHARSET_TABLE_ENTRY (CHARSET_ASCII));
1672
1673 Qcomposition = intern ("composition");
1674 staticpro (&Qcomposition);
1675 CHARSET_SYMBOL (CHARSET_COMPOSITION) = Qcomposition;
1676
1677 defsubr (&Sdefine_charset);
3fac5a51 1678 defsubr (&Sget_unused_iso_final_char);
4ed46869
KH
1679 defsubr (&Sdeclare_equiv_charset);
1680 defsubr (&Sfind_charset_region);
1681 defsubr (&Sfind_charset_string);
1682 defsubr (&Smake_char_internal);
1683 defsubr (&Ssplit_char);
1684 defsubr (&Schar_charset);
1685 defsubr (&Siso_charset);
9d3d8cba 1686 defsubr (&Schar_valid_p);
d2665018 1687 defsubr (&Sunibyte_char_to_multibyte);
4ed46869
KH
1688 defsubr (&Schar_bytes);
1689 defsubr (&Schar_width);
1690 defsubr (&Sstring_width);
1691 defsubr (&Schar_direction);
af4fecb4 1692 defsubr (&Schars_in_region);
87b089ad 1693 defsubr (&Sstring);
4ed46869
KH
1694 defsubr (&Scmpcharp);
1695 defsubr (&Scmpchar_component);
1696 defsubr (&Scmpchar_cmp_rule);
1697 defsubr (&Scmpchar_cmp_rule_p);
1698 defsubr (&Scmpchar_cmp_count);
1699 defsubr (&Scompose_string);
1700 defsubr (&Ssetup_special_charsets);
1701
1702 DEFVAR_LISP ("charset-list", &Vcharset_list,
1703 "List of charsets ever defined.");
1704 Vcharset_list = Fcons (Qascii, Qnil);
1705
b0e3cf2b
KH
1706 DEFVAR_LISP ("character-unification-table-vector",
1707 &Vcharacter_unification_table_vector,
1708 "Vector of cons cell of a symbol and unification table ever defined.\n\
1709An ID of a unification table is an index of this vector.");
1710 Vcharacter_unification_table_vector = Fmake_vector (make_number (16), Qnil);
1711
4ed46869
KH
1712 DEFVAR_INT ("leading-code-composition", &leading_code_composition,
1713 "Leading-code of composite characters.");
1714 leading_code_composition = LEADING_CODE_COMPOSITION;
1715
1716 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11,
1717 "Leading-code of private TYPE9N charset of column-width 1.");
1718 leading_code_private_11 = LEADING_CODE_PRIVATE_11;
1719
1720 DEFVAR_INT ("leading-code-private-12", &leading_code_private_12,
1721 "Leading-code of private TYPE9N charset of column-width 2.");
1722 leading_code_private_12 = LEADING_CODE_PRIVATE_12;
1723
1724 DEFVAR_INT ("leading-code-private-21", &leading_code_private_21,
1725 "Leading-code of private TYPE9Nx9N charset of column-width 1.");
1726 leading_code_private_21 = LEADING_CODE_PRIVATE_21;
1727
1728 DEFVAR_INT ("leading-code-private-22", &leading_code_private_22,
1729 "Leading-code of private TYPE9Nx9N charset of column-width 2.");
1730 leading_code_private_22 = LEADING_CODE_PRIVATE_22;
35e623fb
RS
1731
1732 DEFVAR_INT ("nonascii-insert-offset", &nonascii_insert_offset,
d2665018 1733 "Offset for converting non-ASCII unibyte codes 0240...0377 to multibyte.\n\
4cf9710d
RS
1734This is used for converting unibyte text to multibyte,\n\
1735and for inserting character codes specified by number.\n\n\
1736Conversion is performed only when multibyte characters are enabled,\n\
1737and it serves to convert a Latin-1 or similar 8-bit character code\n\
1738to the corresponding Emacs character code.\n\
1739If `nonascii-translate-table' is non-nil, it overrides this variable.");
35e623fb 1740 nonascii_insert_offset = 0;
b0e3cf2b 1741
4cf9710d
RS
1742 DEFVAR_LISP ("nonascii-translate-table", &Vnonascii_translate_table,
1743 "Translate table for converting non-ASCII unibyte codes to multibyte.\n\
1744This is used for converting unibyte text to multibyte,\n\
1745and for inserting character codes specified by number.\n\n\
1746Conversion is performed only when multibyte characters are enabled,\n\
1747and it serves to convert a Latin-1 or similar 8-bit character code\n\
1748to the corresponding Emacs character code.\n\n\
1749If this is nil, `nonascii-insert-offset' is used instead.");
1750 Vnonascii_translate_table = Qnil;
1751
b0e3cf2b
KH
1752 DEFVAR_INT ("min-composite-char", &min_composite_char,
1753 "Minimum character code of a composite character.");
1754 min_composite_char = MIN_CHAR_COMPOSITION;
4ed46869
KH
1755}
1756
1757#endif /* emacs */