(printchar): Fix previous change.
[bpt/emacs.git] / src / charset.c
CommitLineData
75c8c592 1/* Basic multilingual character support.
35e623fb 2 Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
75c8c592 3 Licensed to the Free Software Foundation.
4ed46869 4
369314dc
KH
5This file is part of GNU Emacs.
6
7GNU Emacs is free software; you can redistribute it and/or modify
8it under the terms of the GNU General Public License as published by
9the Free Software Foundation; either version 2, or (at your option)
10any later version.
4ed46869 11
369314dc
KH
12GNU Emacs is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15GNU General Public License for more details.
4ed46869 16
369314dc
KH
17You should have received a copy of the GNU General Public License
18along with GNU Emacs; see the file COPYING. If not, write to
19the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20Boston, MA 02111-1307, USA. */
4ed46869
KH
21
22/* At first, see the document in `charset.h' to understand the code in
23 this file. */
24
25#include <stdio.h>
26
27#ifdef emacs
28
29#include <sys/types.h>
30#include <config.h>
31#include "lisp.h"
32#include "buffer.h"
33#include "charset.h"
34#include "coding.h"
fc6b09bf 35#include "disptab.h"
4ed46869
KH
36
37#else /* not emacs */
38
39#include "mulelib.h"
40
41#endif /* emacs */
42
43Lisp_Object Qcharset, Qascii, Qcomposition;
0282eb69 44Lisp_Object Qunknown;
4ed46869
KH
45
46/* Declaration of special leading-codes. */
47int leading_code_composition; /* for composite characters */
48int leading_code_private_11; /* for private DIMENSION1 of 1-column */
49int leading_code_private_12; /* for private DIMENSION1 of 2-column */
50int leading_code_private_21; /* for private DIMENSION2 of 1-column */
51int leading_code_private_22; /* for private DIMENSION2 of 2-column */
52
53/* Declaration of special charsets. */
54int charset_ascii; /* ASCII */
55int charset_composition; /* for a composite character */
56int charset_latin_iso8859_1; /* ISO8859-1 (Latin-1) */
57int charset_jisx0208_1978; /* JISX0208.1978 (Japanese Kanji old set) */
58int charset_jisx0208; /* JISX0208.1983 (Japanese Kanji) */
59int charset_katakana_jisx0201; /* JISX0201.Kana (Japanese Katakana) */
60int charset_latin_jisx0201; /* JISX0201.Roman (Japanese Roman) */
61int charset_big5_1; /* Big5 Level 1 (Chinese Traditional) */
62int charset_big5_2; /* Big5 Level 2 (Chinese Traditional) */
63
b0e3cf2b
KH
64int min_composite_char;
65
4ed46869
KH
66Lisp_Object Qcharset_table;
67
68/* A char-table containing information of each character set. */
69Lisp_Object Vcharset_table;
70
71/* A vector of charset symbol indexed by charset-id. This is used
72 only for returning charset symbol from C functions. */
73Lisp_Object Vcharset_symbol_table;
74
75/* A list of charset symbols ever defined. */
76Lisp_Object Vcharset_list;
77
537efd8d
KH
78/* Vector of translation table ever defined.
79 ID of a translation table is used to index this vector. */
80Lisp_Object Vtranslation_table_vector;
b0e3cf2b 81
c1a08b4c
KH
82/* A char-table for characters which may invoke auto-filling. */
83Lisp_Object Vauto_fill_chars;
84
85Lisp_Object Qauto_fill_chars;
86
4ed46869
KH
87/* Tables used by macros BYTES_BY_CHAR_HEAD and WIDTH_BY_CHAR_HEAD. */
88int bytes_by_char_head[256];
89int width_by_char_head[256];
90
91/* Mapping table from ISO2022's charset (specified by DIMENSION,
92 CHARS, and FINAL-CHAR) to Emacs' charset. */
93int iso_charset_table[2][2][128];
94
513ee442
KH
95/* Table of pointers to the structure `cmpchar_info' indexed by
96 CMPCHAR-ID. */
97struct cmpchar_info **cmpchar_table;
98/* The current size of `cmpchar_table'. */
99static int cmpchar_table_size;
100/* Number of the current composite characters. */
101int n_cmpchars;
102
4ed46869
KH
103/* Variables used locally in the macro FETCH_MULTIBYTE_CHAR. */
104unsigned char *_fetch_multibyte_char_p;
105int _fetch_multibyte_char_len;
106
35e623fb
RS
107/* Offset to add to a non-ASCII value when inserting it. */
108int nonascii_insert_offset;
109
4cf9710d
RS
110/* Translation table for converting non-ASCII unibyte characters
111 to multibyte codes, or nil. */
b4e9dd77 112Lisp_Object Vnonascii_translation_table;
4cf9710d 113
8a73a704
KH
114/* List of all possible generic characters. */
115Lisp_Object Vgeneric_character_list;
116
046b1f03
RS
117#define min(X, Y) ((X) < (Y) ? (X) : (Y))
118#define max(X, Y) ((X) > (Y) ? (X) : (Y))
119\f
93bcb785
KH
120void
121invalid_character (c)
122 int c;
123{
ba7434e5 124 error ("Invalid character: 0%o, %d, 0x%x", c, c, c);
93bcb785
KH
125}
126
127
4ed46869
KH
128/* Set STR a pointer to the multi-byte form of the character C. If C
129 is not a composite character, the multi-byte form is set in WORKBUF
130 and STR points WORKBUF. The caller should allocate at least 4-byte
131 area at WORKBUF in advance. Returns the length of the multi-byte
bd4c6dd0
KH
132 form. If C is an invalid character to have a multi-byte form,
133 signal an error.
4ed46869
KH
134
135 Use macro `CHAR_STRING (C, WORKBUF, STR)' instead of calling this
136 function directly if C can be an ASCII character. */
137
138int
139non_ascii_char_to_string (c, workbuf, str)
140 int c;
141 unsigned char *workbuf, **str;
142{
6dc0722d 143 int charset, c1, c2;
4ed46869 144
0282eb69 145 if (c & ~GLYPH_MASK_CHAR) /* This includes the case C is negative. */
8ac5a9cc
KH
146 {
147 if (c & CHAR_META)
148 /* Move the meta bit to the right place for a string. */
149 c |= 0x80;
150 if (c & CHAR_CTL)
151 c &= 0x9F;
152 else if (c & CHAR_SHIFT && (c & 0x7F) >= 'a' && (c & 0x7F) <= 'z')
153 c -= 'a' - 'A';
154 *str = workbuf;
155 *workbuf = c;
156 return 1;
157 }
158
da63a5fe
KH
159 if (c < 0)
160 invalid_character (c);
161
4ed46869
KH
162 if (COMPOSITE_CHAR_P (c))
163 {
164 int cmpchar_id = COMPOSITE_CHAR_ID (c);
165
166 if (cmpchar_id < n_cmpchars)
167 {
168 *str = cmpchar_table[cmpchar_id]->data;
169 return cmpchar_table[cmpchar_id]->len;
170 }
171 else
172 {
93bcb785 173 invalid_character (c);
4ed46869
KH
174 }
175 }
176
177 SPLIT_NON_ASCII_CHAR (c, charset, c1, c2);
bd4c6dd0
KH
178 if (!charset
179 || ! CHARSET_DEFINED_P (charset)
180 || c1 >= 0 && c1 < 32
181 || c2 >= 0 && c2 < 32)
93bcb785 182 invalid_character (c);
4ed46869
KH
183
184 *str = workbuf;
185 *workbuf++ = CHARSET_LEADING_CODE_BASE (charset);
186 if (*workbuf = CHARSET_LEADING_CODE_EXT (charset))
187 workbuf++;
188 *workbuf++ = c1 | 0x80;
6dc0722d 189 if (c2 >= 0)
4ed46869
KH
190 *workbuf++ = c2 | 0x80;
191
192 return (workbuf - *str);
193}
194
195/* Return a non-ASCII character of which multi-byte form is at STR of
196 length LEN. If ACTUAL_LEN is not NULL, the actual length of the
537efd8d
KH
197 multibyte form is set to the address ACTUAL_LEN.
198
199 If exclude_tail_garbage is nonzero, ACTUAL_LEN excludes gabage
200 bytes following the non-ASCII character.
4ed46869
KH
201
202 Use macro `STRING_CHAR (STR, LEN)' instead of calling this function
203 directly if STR can hold an ASCII character. */
204
dfcf069d 205int
537efd8d 206string_to_non_ascii_char (str, len, actual_len, exclude_tail_garbage)
8867de67 207 const unsigned char *str;
537efd8d 208 int len, *actual_len, exclude_tail_garbage;
4ed46869
KH
209{
210 int charset;
211 unsigned char c1, c2;
bb63e573
KH
212 int c, bytes;
213 const unsigned char *begp = str;
4ed46869 214
bb63e573 215 c = *str++;
90d7b74e
KH
216 bytes = 1;
217
218 if (BASE_LEADING_CODE_P (c))
bb63e573
KH
219 do {
220 while (bytes < len && ! CHAR_HEAD_P (begp[bytes])) bytes++;
90d7b74e
KH
221
222 if (c == LEADING_CODE_COMPOSITION)
223 {
bb63e573 224 int cmpchar_id = str_cmpchar_id (begp, bytes);
90d7b74e
KH
225
226 if (cmpchar_id >= 0)
5d76bc89
KH
227 {
228 c = MAKE_COMPOSITE_CHAR (cmpchar_id);
1dca54f6 229 str += cmpchar_table[cmpchar_id]->len - 1;
5d76bc89 230 }
1dca54f6
KH
231 else
232 str += bytes - 1;
90d7b74e
KH
233 }
234 else
235 {
bb63e573 236 const unsigned char *endp = begp + bytes;
90d7b74e 237 int charset = c, c1, c2 = 0;
4ed46869 238
bb63e573 239 if (str >= endp) break;
a6c25326 240 if (c >= LEADING_CODE_PRIVATE_11 && c <= LEADING_CODE_PRIVATE_22)
90d7b74e 241 {
bb63e573
KH
242 charset = *str++;
243 if (str < endp)
244 c1 = *str++ & 0x7F;
245 else
246 c1 = charset, charset = c;
90d7b74e 247 }
bb63e573
KH
248 else
249 c1 = *str++ & 0x7f;
250 if (CHARSET_DEFINED_P (charset)
251 && CHARSET_DIMENSION (charset) == 2
252 && str < endp)
1dca54f6 253 c2 = *str++ & 0x7F;
bb63e573 254 c = MAKE_NON_ASCII_CHAR (charset, c1, c2);
90d7b74e 255 }
bb63e573 256 } while (0);
4ed46869
KH
257
258 if (actual_len)
bb63e573 259 *actual_len = exclude_tail_garbage ? str - begp : bytes;
4ed46869
KH
260 return c;
261}
262
263/* Return the length of the multi-byte form at string STR of length LEN. */
264int
265multibyte_form_length (str, len)
8867de67 266 const unsigned char *str;
4ed46869
KH
267 int len;
268{
90d7b74e 269 int bytes = 1;
4ed46869 270
90d7b74e
KH
271 if (BASE_LEADING_CODE_P (*str))
272 while (bytes < len && ! CHAR_HEAD_P (str[bytes])) bytes++;
4ed46869 273
90d7b74e 274 return bytes;
4ed46869
KH
275}
276
277/* Check if string STR of length LEN contains valid multi-byte form of
278 a character. If valid, charset and position codes of the character
279 is set at *CHARSET, *C1, and *C2, and return 0. If not valid,
280 return -1. This should be used only in the macro SPLIT_STRING
281 which checks range of STR in advance. */
282
dfcf069d 283int
4ed46869 284split_non_ascii_string (str, len, charset, c1, c2)
8867de67
KH
285 register const unsigned char *str;
286 register unsigned char *c1, *c2;
4ed46869
KH
287 register int len, *charset;
288{
289 register unsigned int cs = *str++;
290
291 if (cs == LEADING_CODE_COMPOSITION)
292 {
293 int cmpchar_id = str_cmpchar_id (str - 1, len);
294
295 if (cmpchar_id < 0)
296 return -1;
297 *charset = cs, *c1 = cmpchar_id >> 7, *c2 = cmpchar_id & 0x7F;
298 }
299 else if ((cs < LEADING_CODE_PRIVATE_11 || (cs = *str++) >= 0xA0)
300 && CHARSET_DEFINED_P (cs))
301 {
302 *charset = cs;
303 if (*str < 0xA0)
304 return -1;
305 *c1 = (*str++) & 0x7F;
306 if (CHARSET_DIMENSION (cs) == 2)
307 {
308 if (*str < 0xA0)
309 return -1;
310 *c2 = (*str++) & 0x7F;
311 }
312 }
313 else
314 return -1;
315 return 0;
316}
317
537efd8d 318/* Translate character C by translation table TABLE. If C
b4e9dd77
KH
319 is negative, translate a character specified by CHARSET, C1, and C2
320 (C1 and C2 are code points of the character). If no translation is
321 found in TABLE, return C. */
dfcf069d 322int
b4e9dd77 323translate_char (table, c, charset, c1, c2)
23d2a7f1
KH
324 Lisp_Object table;
325 int c, charset, c1, c2;
326{
327 Lisp_Object ch;
328 int alt_charset, alt_c1, alt_c2, dimension;
329
330 if (c < 0) c = MAKE_CHAR (charset, c1, c2);
331 if (!CHAR_TABLE_P (table)
332 || (ch = Faref (table, make_number (c)), !INTEGERP (ch))
333 || XINT (ch) < 0)
334 return c;
335
336 SPLIT_CHAR (XFASTINT (ch), alt_charset, alt_c1, alt_c2);
337 dimension = CHARSET_DIMENSION (alt_charset);
338 if (dimension == 1 && alt_c1 > 0 || dimension == 2 && alt_c2 > 0)
339 /* CH is not a generic character, just return it. */
340 return XFASTINT (ch);
341
342 /* Since CH is a generic character, we must return a specific
343 charater which has the same position codes as C from CH. */
344 if (charset < 0)
345 SPLIT_CHAR (c, charset, c1, c2);
346 if (dimension != CHARSET_DIMENSION (charset))
347 /* We can't make such a character because of dimension mismatch. */
348 return c;
23d2a7f1
KH
349 return MAKE_CHAR (alt_charset, c1, c2);
350}
351
d2665018 352/* Convert the unibyte character C to multibyte based on
b4e9dd77 353 Vnonascii_translation_table or nonascii_insert_offset. If they can't
d2665018
KH
354 convert C to a valid multibyte character, convert it based on
355 DEFAULT_NONASCII_INSERT_OFFSET which makes C a Latin-1 character. */
35e623fb 356
dfcf069d 357int
35e623fb
RS
358unibyte_char_to_multibyte (c)
359 int c;
360{
543b4f61 361 if (c < 0400 && c >= 0200)
35e623fb 362 {
d2665018
KH
363 int c_save = c;
364
b4e9dd77 365 if (! NILP (Vnonascii_translation_table))
bbf12bb3
KH
366 {
367 c = XINT (Faref (Vnonascii_translation_table, make_number (c)));
368 if (c >= 0400 && ! VALID_MULTIBYTE_CHAR_P (c))
369 c = c_save + DEFAULT_NONASCII_INSERT_OFFSET;
370 }
371 else if (c >= 0240 && nonascii_insert_offset > 0)
372 {
373 c += nonascii_insert_offset;
374 if (c < 0400 || ! VALID_MULTIBYTE_CHAR_P (c))
375 c = c_save + DEFAULT_NONASCII_INSERT_OFFSET;
376 }
377 else if (c >= 0240)
d2665018 378 c = c_save + DEFAULT_NONASCII_INSERT_OFFSET;
35e623fb
RS
379 }
380 return c;
381}
76d7b829
KH
382
383
384/* Convert the multibyte character C to unibyte 8-bit character based
385 on Vnonascii_translation_table or nonascii_insert_offset. If
386 REV_TBL is non-nil, it should be a reverse table of
387 Vnonascii_translation_table, i.e. what given by:
388 Fchar_table_extra_slot (Vnonascii_translation_table, make_number (0)) */
389
390int
391multibyte_char_to_unibyte (c, rev_tbl)
392 int c;
393 Lisp_Object rev_tbl;
394{
395 if (!SINGLE_BYTE_CHAR_P (c))
396 {
397 int c_save = c;
398
399 if (! CHAR_TABLE_P (rev_tbl)
400 && CHAR_TABLE_P (Vnonascii_translation_table))
401 rev_tbl = Fchar_table_extra_slot (Vnonascii_translation_table,
402 make_number (0));
403 if (CHAR_TABLE_P (rev_tbl))
404 {
405 Lisp_Object temp;
406 temp = Faref (rev_tbl, make_number (c));
407 if (INTEGERP (temp))
408 c = XINT (temp);
bbf12bb3
KH
409 if (c >= 256)
410 c = (c_save & 0177) + 0200;
411 }
412 else
413 {
414 if (nonascii_insert_offset > 0)
415 c -= nonascii_insert_offset;
416 if (c < 128 || c >= 256)
417 c = (c_save & 0177) + 0200;
76d7b829 418 }
76d7b829
KH
419 }
420
421 return c;
422}
423
35e623fb 424\f
4ed46869
KH
425/* Update the table Vcharset_table with the given arguments (see the
426 document of `define-charset' for the meaning of each argument).
427 Several other table contents are also updated. The caller should
428 check the validity of CHARSET-ID and the remaining arguments in
429 advance. */
430
431void
432update_charset_table (charset_id, dimension, chars, width, direction,
433 iso_final_char, iso_graphic_plane,
434 short_name, long_name, description)
435 Lisp_Object charset_id, dimension, chars, width, direction;
436 Lisp_Object iso_final_char, iso_graphic_plane;
437 Lisp_Object short_name, long_name, description;
438{
439 int charset = XINT (charset_id);
440 int bytes;
441 unsigned char leading_code_base, leading_code_ext;
442
6dc0722d
KH
443 if (NILP (CHARSET_TABLE_ENTRY (charset)))
444 CHARSET_TABLE_ENTRY (charset)
445 = Fmake_vector (make_number (CHARSET_MAX_IDX), Qnil);
4ed46869
KH
446
447 /* Get byte length of multibyte form, base leading-code, and
448 extended leading-code of the charset. See the comment under the
449 title "GENERAL NOTE on CHARACTER SET (CHARSET)" in charset.h. */
450 bytes = XINT (dimension);
451 if (charset < MIN_CHARSET_PRIVATE_DIMENSION1)
452 {
453 /* Official charset, it doesn't have an extended leading-code. */
454 if (charset != CHARSET_ASCII)
455 bytes += 1; /* For a base leading-code. */
456 leading_code_base = charset;
457 leading_code_ext = 0;
458 }
459 else
460 {
461 /* Private charset. */
462 bytes += 2; /* For base and extended leading-codes. */
463 leading_code_base
464 = (charset < LEADING_CODE_EXT_12
465 ? LEADING_CODE_PRIVATE_11
466 : (charset < LEADING_CODE_EXT_21
467 ? LEADING_CODE_PRIVATE_12
468 : (charset < LEADING_CODE_EXT_22
469 ? LEADING_CODE_PRIVATE_21
470 : LEADING_CODE_PRIVATE_22)));
471 leading_code_ext = charset;
472 }
473
6ef23ebb
KH
474 if (BYTES_BY_CHAR_HEAD (leading_code_base) != bytes)
475 error ("Invalid dimension for the charset-ID %d", charset);
476
4ed46869
KH
477 CHARSET_TABLE_INFO (charset, CHARSET_ID_IDX) = charset_id;
478 CHARSET_TABLE_INFO (charset, CHARSET_BYTES_IDX) = make_number (bytes);
479 CHARSET_TABLE_INFO (charset, CHARSET_DIMENSION_IDX) = dimension;
480 CHARSET_TABLE_INFO (charset, CHARSET_CHARS_IDX) = chars;
481 CHARSET_TABLE_INFO (charset, CHARSET_WIDTH_IDX) = width;
482 CHARSET_TABLE_INFO (charset, CHARSET_DIRECTION_IDX) = direction;
483 CHARSET_TABLE_INFO (charset, CHARSET_LEADING_CODE_BASE_IDX)
484 = make_number (leading_code_base);
485 CHARSET_TABLE_INFO (charset, CHARSET_LEADING_CODE_EXT_IDX)
486 = make_number (leading_code_ext);
487 CHARSET_TABLE_INFO (charset, CHARSET_ISO_FINAL_CHAR_IDX) = iso_final_char;
488 CHARSET_TABLE_INFO (charset, CHARSET_ISO_GRAPHIC_PLANE_IDX)
489 = iso_graphic_plane;
490 CHARSET_TABLE_INFO (charset, CHARSET_SHORT_NAME_IDX) = short_name;
491 CHARSET_TABLE_INFO (charset, CHARSET_LONG_NAME_IDX) = long_name;
492 CHARSET_TABLE_INFO (charset, CHARSET_DESCRIPTION_IDX) = description;
493 CHARSET_TABLE_INFO (charset, CHARSET_PLIST_IDX) = Qnil;
494
495 {
496 /* If we have already defined a charset which has the same
497 DIMENSION, CHARS and ISO-FINAL-CHAR but the different
498 DIRECTION, we must update the entry REVERSE-CHARSET of both
499 charsets. If there's no such charset, the value of the entry
500 is set to nil. */
501 int i;
502
513ee442 503 for (i = 0; i <= MAX_CHARSET; i++)
4ed46869
KH
504 if (!NILP (CHARSET_TABLE_ENTRY (i)))
505 {
506 if (CHARSET_DIMENSION (i) == XINT (dimension)
507 && CHARSET_CHARS (i) == XINT (chars)
508 && CHARSET_ISO_FINAL_CHAR (i) == XINT (iso_final_char)
509 && CHARSET_DIRECTION (i) != XINT (direction))
510 {
511 CHARSET_TABLE_INFO (charset, CHARSET_REVERSE_CHARSET_IDX)
512 = make_number (i);
513 CHARSET_TABLE_INFO (i, CHARSET_REVERSE_CHARSET_IDX) = charset_id;
514 break;
515 }
516 }
513ee442 517 if (i > MAX_CHARSET)
4ed46869
KH
518 /* No such a charset. */
519 CHARSET_TABLE_INFO (charset, CHARSET_REVERSE_CHARSET_IDX)
520 = make_number (-1);
521 }
522
523 if (charset != CHARSET_ASCII
524 && charset < MIN_CHARSET_PRIVATE_DIMENSION1)
525 {
4ed46869
KH
526 width_by_char_head[leading_code_base] = XINT (width);
527
528 /* Update table emacs_code_class. */
529 emacs_code_class[charset] = (bytes == 2
530 ? EMACS_leading_code_2
531 : (bytes == 3
532 ? EMACS_leading_code_3
533 : EMACS_leading_code_4));
534 }
535
536 /* Update table iso_charset_table. */
537 if (ISO_CHARSET_TABLE (dimension, chars, iso_final_char) < 0)
538 ISO_CHARSET_TABLE (dimension, chars, iso_final_char) = charset;
539}
540
541#ifdef emacs
542
543/* Return charset id of CHARSET_SYMBOL, or return -1 if CHARSET_SYMBOL
544 is invalid. */
545int
546get_charset_id (charset_symbol)
547 Lisp_Object charset_symbol;
548{
549 Lisp_Object val;
550 int charset;
551
552 return ((SYMBOLP (charset_symbol)
553 && (val = Fget (charset_symbol, Qcharset), VECTORP (val))
554 && (charset = XINT (XVECTOR (val)->contents[CHARSET_ID_IDX]),
555 CHARSET_VALID_P (charset)))
556 ? charset : -1);
557}
558
559/* Return an identification number for a new private charset of
560 DIMENSION and WIDTH. If there's no more room for the new charset,
561 return 0. */
562Lisp_Object
563get_new_private_charset_id (dimension, width)
564 int dimension, width;
565{
566 int charset, from, to;
567
568 if (dimension == 1)
569 {
570 if (width == 1)
571 from = LEADING_CODE_EXT_11, to = LEADING_CODE_EXT_12;
572 else
573 from = LEADING_CODE_EXT_12, to = LEADING_CODE_EXT_21;
574 }
575 else
576 {
577 if (width == 1)
578 from = LEADING_CODE_EXT_21, to = LEADING_CODE_EXT_22;
579 else
b0e3cf2b 580 from = LEADING_CODE_EXT_22, to = LEADING_CODE_EXT_MAX + 1;
4ed46869
KH
581 }
582
583 for (charset = from; charset < to; charset++)
584 if (!CHARSET_DEFINED_P (charset)) break;
585
586 return make_number (charset < to ? charset : 0);
587}
588
589DEFUN ("define-charset", Fdefine_charset, Sdefine_charset, 3, 3, 0,
590 "Define CHARSET-ID as the identification number of CHARSET with INFO-VECTOR.\n\
23d2a7f1 591If CHARSET-ID is nil, it is decided automatically, which means CHARSET is\n\
4ed46869
KH
592 treated as a private charset.\n\
593INFO-VECTOR is a vector of the format:\n\
594 [DIMENSION CHARS WIDTH DIRECTION ISO-FINAL-CHAR ISO-GRAPHIC-PLANE\n\
595 SHORT-NAME LONG-NAME DESCRIPTION]\n\
596The meanings of each elements is as follows:\n\
597DIMENSION (integer) is the number of bytes to represent a character: 1 or 2.\n\
598CHARS (integer) is the number of characters in a dimension: 94 or 96.\n\
599WIDTH (integer) is the number of columns a character in the charset\n\
600occupies on the screen: one of 0, 1, and 2.\n\
601\n\
602DIRECTION (integer) is the rendering direction of characters in the\n\
277576f6
KH
603charset when rendering. If 0, render from left to right, else\n\
604render from right to left.\n\
4ed46869
KH
605\n\
606ISO-FINAL-CHAR (character) is the final character of the\n\
607corresponding ISO 2022 charset.\n\
608\n\
609ISO-GRAPHIC-PLANE (integer) is the graphic plane to be invoked\n\
610while encoding to variants of ISO 2022 coding system, one of the\n\
611following: 0/graphic-plane-left(GL), 1/graphic-plane-right(GR).\n\
612\n\
613SHORT-NAME (string) is the short name to refer to the charset.\n\
614\n\
615LONG-NAME (string) is the long name to refer to the charset.\n\
616\n\
617DESCRIPTION (string) is the description string of the charset.")
618 (charset_id, charset_symbol, info_vector)
619 Lisp_Object charset_id, charset_symbol, info_vector;
620{
621 Lisp_Object *vec;
622
623 if (!NILP (charset_id))
624 CHECK_NUMBER (charset_id, 0);
625 CHECK_SYMBOL (charset_symbol, 1);
626 CHECK_VECTOR (info_vector, 2);
627
628 if (! NILP (charset_id))
629 {
630 if (! CHARSET_VALID_P (XINT (charset_id)))
631 error ("Invalid CHARSET: %d", XINT (charset_id));
632 else if (CHARSET_DEFINED_P (XINT (charset_id)))
633 error ("Already defined charset: %d", XINT (charset_id));
634 }
635
636 vec = XVECTOR (info_vector)->contents;
637 if (XVECTOR (info_vector)->size != 9
638 || !INTEGERP (vec[0]) || !(XINT (vec[0]) == 1 || XINT (vec[0]) == 2)
639 || !INTEGERP (vec[1]) || !(XINT (vec[1]) == 94 || XINT (vec[1]) == 96)
640 || !INTEGERP (vec[2]) || !(XINT (vec[2]) == 1 || XINT (vec[2]) == 2)
641 || !INTEGERP (vec[3]) || !(XINT (vec[3]) == 0 || XINT (vec[3]) == 1)
642 || !INTEGERP (vec[4]) || !(XINT (vec[4]) >= '0' && XINT (vec[4]) <= '~')
643 || !INTEGERP (vec[5]) || !(XINT (vec[5]) == 0 || XINT (vec[5]) == 1)
644 || !STRINGP (vec[6])
645 || !STRINGP (vec[7])
646 || !STRINGP (vec[8]))
647 error ("Invalid info-vector argument for defining charset %s",
648 XSYMBOL (charset_symbol)->name->data);
649
650 if (NILP (charset_id))
651 {
652 charset_id = get_new_private_charset_id (XINT (vec[0]), XINT (vec[2]));
653 if (XINT (charset_id) == 0)
654 error ("There's no room for a new private charset %s",
655 XSYMBOL (charset_symbol)->name->data);
656 }
657
658 update_charset_table (charset_id, vec[0], vec[1], vec[2], vec[3],
659 vec[4], vec[5], vec[6], vec[7], vec[8]);
6dc0722d 660 Fput (charset_symbol, Qcharset, CHARSET_TABLE_ENTRY (XINT (charset_id)));
4ed46869
KH
661 CHARSET_SYMBOL (XINT (charset_id)) = charset_symbol;
662 Vcharset_list = Fcons (charset_symbol, Vcharset_list);
663 return Qnil;
664}
665
8a73a704
KH
666DEFUN ("generic-character-list", Fgeneric_character_list,
667 Sgeneric_character_list, 0, 0, 0,
668 "Return a list of all possible generic characters.\n\
669It includes a generic character for a charset not yet defined.")
670 ()
671{
672 return Vgeneric_character_list;
673}
674
3fac5a51
KH
675DEFUN ("get-unused-iso-final-char", Fget_unused_iso_final_char,
676 Sget_unused_iso_final_char, 2, 2, 0,
677 "Return an unsed ISO's final char for a charset of DIMENISION and CHARS.\n\
678DIMENSION is the number of bytes to represent a character: 1 or 2.\n\
679CHARS is the number of characters in a dimension: 94 or 96.\n\
680\n\
681This final char is for private use, thus the range is `0' (48) .. `?' (63).\n\
682If there's no unused final char for the specified kind of charset,\n\
683return nil.")
684 (dimension, chars)
685 Lisp_Object dimension, chars;
686{
687 int final_char;
688
689 CHECK_NUMBER (dimension, 0);
690 CHECK_NUMBER (chars, 1);
691 if (XINT (dimension) != 1 && XINT (dimension) != 2)
692 error ("Invalid charset dimension %d, it should be 1 or 2",
693 XINT (dimension));
694 if (XINT (chars) != 94 && XINT (chars) != 96)
695 error ("Invalid charset chars %d, it should be 94 or 96",
696 XINT (chars));
697 for (final_char = '0'; final_char <= '?'; final_char++)
698 {
699 if (ISO_CHARSET_TABLE (dimension, chars, make_number (final_char)) < 0)
700 break;
701 }
702 return (final_char <= '?' ? make_number (final_char) : Qnil);
703}
704
4ed46869
KH
705DEFUN ("declare-equiv-charset", Fdeclare_equiv_charset, Sdeclare_equiv_charset,
706 4, 4, 0,
707 "Declare a charset of DIMENSION, CHARS, FINAL-CHAR is the same as CHARSET.\n\
708CHARSET should be defined by `defined-charset' in advance.")
709 (dimension, chars, final_char, charset_symbol)
710 Lisp_Object dimension, chars, final_char, charset_symbol;
711{
712 int charset;
713
714 CHECK_NUMBER (dimension, 0);
715 CHECK_NUMBER (chars, 1);
716 CHECK_NUMBER (final_char, 2);
717 CHECK_SYMBOL (charset_symbol, 3);
718
719 if (XINT (dimension) != 1 && XINT (dimension) != 2)
720 error ("Invalid DIMENSION %d, it should be 1 or 2", XINT (dimension));
721 if (XINT (chars) != 94 && XINT (chars) != 96)
722 error ("Invalid CHARS %d, it should be 94 or 96", XINT (chars));
723 if (XINT (final_char) < '0' || XFASTINT (final_char) > '~')
724 error ("Invalid FINAL-CHAR %c, it should be `0'..`~'", XINT (chars));
725 if ((charset = get_charset_id (charset_symbol)) < 0)
726 error ("Invalid charset %s", XSYMBOL (charset_symbol)->name->data);
727
728 ISO_CHARSET_TABLE (dimension, chars, final_char) = charset;
729 return Qnil;
730}
731
732/* Return number of different charsets in STR of length LEN. In
733 addition, for each found charset N, CHARSETS[N] is set 1. The
a29e3b1b 734 caller should allocate CHARSETS (MAX_CHARSET + 1 elements) in advance.
1d67c29b
KH
735 It may lookup a translation table TABLE if supplied.
736
737 If CMPCHARP is nonzero and some composite character is found,
738 CHARSETS[128] is also set 1 and the returned number is incremented
0282eb69
KH
739 by 1.
740
741 If MULTIBYTE is zero, do not check multibyte characters, i.e. if
742 any ASCII codes (7-bit) are found, CHARSET[0] is set to 1, if any
743 8-bit codes are found CHARSET[1] is set to 1. */
4ed46869
KH
744
745int
0282eb69 746find_charset_in_str (str, len, charsets, table, cmpcharp, multibyte)
028d516b
KH
747 unsigned char *str;
748 int len, *charsets;
23d2a7f1 749 Lisp_Object table;
1d67c29b 750 int cmpcharp;
0282eb69 751 int multibyte;
4ed46869 752{
733eafd8 753 register int num = 0, c;
4ed46869 754
0282eb69
KH
755 if (! multibyte)
756 {
757 unsigned char *endp = str + len;
758 int maskbits = 0;
759
760 while (str < endp && maskbits != 3)
761 maskbits |= (*str++ < 0x80 ? 1 : 2);
762 if (maskbits & 1)
763 {
764 charsets[0] = 1;
765 num++;
766 }
767 if (maskbits & 2)
768 {
769 charsets[1] = 1;
770 num++;
771 }
772 return num;
773 }
774
23d2a7f1
KH
775 if (! CHAR_TABLE_P (table))
776 table = Qnil;
777
4ed46869
KH
778 while (len > 0)
779 {
05505664 780 int bytes, charset;
733eafd8 781 c = *str;
23d2a7f1 782
733eafd8 783 if (c == LEADING_CODE_COMPOSITION)
05505664 784 {
733eafd8
KH
785 int cmpchar_id = str_cmpchar_id (str, len);
786 GLYPH *glyph;
05505664 787
1d67c29b 788 if (cmpchar_id >= 0)
05505664 789 {
020da460 790 struct cmpchar_info *cmp_p = cmpchar_table[cmpchar_id];
733eafd8
KH
791 int i;
792
020da460 793 for (i = 0; i < cmp_p->glyph_len; i++)
733eafd8 794 {
020da460 795 c = cmp_p->glyph[i];
733eafd8
KH
796 if (!NILP (table))
797 {
b4e9dd77 798 if ((c = translate_char (table, c, 0, 0, 0)) < 0)
020da460 799 c = cmp_p->glyph[i];
733eafd8
KH
800 }
801 if ((charset = CHAR_CHARSET (c)) < 0)
802 charset = CHARSET_ASCII;
803 if (!charsets[charset])
804 {
805 charsets[charset] = 1;
806 num += 1;
807 }
808 }
020da460
KH
809 str += cmp_p->len;
810 len -= cmp_p->len;
811 if (cmpcharp && !charsets[CHARSET_COMPOSITION])
812 {
813 charsets[CHARSET_COMPOSITION] = 1;
814 num += 1;
815 }
733eafd8 816 continue;
05505664 817 }
05505664 818
0282eb69 819 charset = 1; /* This leads to `unknown' charset. */
733eafd8
KH
820 bytes = 1;
821 }
23d2a7f1
KH
822 else
823 {
733eafd8
KH
824 c = STRING_CHAR_AND_LENGTH (str, len, bytes);
825 if (! NILP (table))
826 {
b4e9dd77 827 int c1 = translate_char (table, c, 0, 0, 0);
733eafd8
KH
828 if (c1 >= 0)
829 c = c1;
830 }
831 charset = CHAR_CHARSET (c);
23d2a7f1 832 }
4ed46869
KH
833
834 if (!charsets[charset])
835 {
836 charsets[charset] = 1;
837 num += 1;
838 }
839 str += bytes;
840 len -= bytes;
841 }
842 return num;
843}
844
845DEFUN ("find-charset-region", Ffind_charset_region, Sfind_charset_region,
23d2a7f1 846 2, 3, 0,
4ed46869 847 "Return a list of charsets in the region between BEG and END.\n\
23d2a7f1 848BEG and END are buffer positions.\n\
020da460
KH
849If the region contains any composite character,\n\
850`composition' is included in the returned list.\n\
0282eb69
KH
851Optional arg TABLE if non-nil is a translation table to look up.\n\
852\n\
853If the region contains invalid multiybte characters,\n\
854`unknown' is included in the returned list.
855\n\
856If the current buffer is unibyte, the returned list contains\n\
857`ascii' if any 7-bit characters are found,\n\
858and `unknown' if any 8-bit characters are found.")
23d2a7f1
KH
859 (beg, end, table)
860 Lisp_Object beg, end, table;
4ed46869 861{
028d516b 862 int charsets[MAX_CHARSET + 1];
6ae1f27e 863 int from, from_byte, to, stop, stop_byte, i;
4ed46869 864 Lisp_Object val;
0282eb69
KH
865 int undefined;
866 int multibyte = !NILP (current_buffer->enable_multibyte_characters);
4ed46869
KH
867
868 validate_region (&beg, &end);
869 from = XFASTINT (beg);
870 stop = to = XFASTINT (end);
6ae1f27e 871
4ed46869 872 if (from < GPT && GPT < to)
6ae1f27e
RS
873 {
874 stop = GPT;
875 stop_byte = GPT_BYTE;
876 }
877 else
878 stop_byte = CHAR_TO_BYTE (stop);
879
880 from_byte = CHAR_TO_BYTE (from);
881
028d516b 882 bzero (charsets, (MAX_CHARSET + 1) * sizeof (int));
4ed46869
KH
883 while (1)
884 {
6ae1f27e 885 find_charset_in_str (BYTE_POS_ADDR (from_byte), stop_byte - from_byte,
0282eb69 886 charsets, table, 1, multibyte);
4ed46869 887 if (stop < to)
6ae1f27e
RS
888 {
889 from = stop, from_byte = stop_byte;
890 stop = to, stop_byte = CHAR_TO_BYTE (stop);
891 }
4ed46869
KH
892 else
893 break;
894 }
6ae1f27e 895
4ed46869 896 val = Qnil;
0282eb69
KH
897 undefined = 0;
898 for (i = (multibyte ? MAX_CHARSET : 1); i >= 0; i--)
4ed46869 899 if (charsets[i])
0282eb69
KH
900 {
901 if (CHARSET_DEFINED_P (i) || i == CHARSET_COMPOSITION)
902 val = Fcons (CHARSET_SYMBOL (i), val);
903 else
904 undefined = 1;
905 }
906 if (undefined)
907 val = Fcons (Qunknown, val);
4ed46869
KH
908 return val;
909}
910
911DEFUN ("find-charset-string", Ffind_charset_string, Sfind_charset_string,
23d2a7f1
KH
912 1, 2, 0,
913 "Return a list of charsets in STR.\n\
020da460
KH
914If the string contains any composite characters,\n\
915`composition' is included in the returned list.\n\
0282eb69
KH
916Optional arg TABLE if non-nil is a translation table to look up.\n\
917\n\
918If the region contains invalid multiybte characters,\n\
919`unknown' is included in the returned list.\n\
920\n\
921If STR is unibyte, the returned list contains\n\
922`ascii' if any 7-bit characters are found,\n\
923and `unknown' if any 8-bit characters are found.")
23d2a7f1
KH
924 (str, table)
925 Lisp_Object str, table;
4ed46869 926{
a29e3b1b 927 int charsets[MAX_CHARSET + 1];
4ed46869
KH
928 int i;
929 Lisp_Object val;
0282eb69
KH
930 int undefined;
931 int multibyte;
4ed46869
KH
932
933 CHECK_STRING (str, 0);
0282eb69 934 multibyte = STRING_MULTIBYTE (str);
87b089ad 935
a29e3b1b 936 bzero (charsets, (MAX_CHARSET + 1) * sizeof (int));
fc932ac6 937 find_charset_in_str (XSTRING (str)->data, STRING_BYTES (XSTRING (str)),
0282eb69 938 charsets, table, 1, multibyte);
4ed46869 939 val = Qnil;
0282eb69
KH
940 undefined = 0;
941 for (i = (multibyte ? MAX_CHARSET : 1); i >= 0; i--)
4ed46869 942 if (charsets[i])
0282eb69
KH
943 {
944 if (CHARSET_DEFINED_P (i) || i == CHARSET_COMPOSITION)
945 val = Fcons (CHARSET_SYMBOL (i), val);
946 else
947 undefined = 1;
948 }
949 if (undefined)
950 val = Fcons (Qunknown, val);
4ed46869
KH
951 return val;
952}
953\f
954DEFUN ("make-char-internal", Fmake_char_internal, Smake_char_internal, 1, 3, 0,
513ee442 955 "")
4ed46869
KH
956 (charset, code1, code2)
957 Lisp_Object charset, code1, code2;
958{
959 CHECK_NUMBER (charset, 0);
960
961 if (NILP (code1))
962 XSETFASTINT (code1, 0);
963 else
964 CHECK_NUMBER (code1, 1);
965 if (NILP (code2))
966 XSETFASTINT (code2, 0);
967 else
968 CHECK_NUMBER (code2, 2);
969
970 if (!CHARSET_DEFINED_P (XINT (charset)))
971 error ("Invalid charset: %d", XINT (charset));
972
973 return make_number (MAKE_CHAR (XINT (charset), XINT (code1), XINT (code2)));
974}
975
976DEFUN ("split-char", Fsplit_char, Ssplit_char, 1, 1, 0,
0282eb69
KH
977 "Return list of charset and one or two position-codes of CHAR.\n\
978If CHAR is invalid as a character code,\n\
979return a list of symbol `unknown' and CHAR.")
4ed46869
KH
980 (ch)
981 Lisp_Object ch;
982{
983 Lisp_Object val;
0282eb69 984 int c, charset, c1, c2;
4ed46869
KH
985
986 CHECK_NUMBER (ch, 0);
0282eb69
KH
987 c = XFASTINT (ch);
988 if (!CHAR_VALID_P (c, 1))
989 return Fcons (Qunknown, Fcons (ch, Qnil));
4ed46869 990 SPLIT_CHAR (XFASTINT (ch), charset, c1, c2);
6dc0722d 991 return (c2 >= 0
4ed46869
KH
992 ? Fcons (CHARSET_SYMBOL (charset),
993 Fcons (make_number (c1), Fcons (make_number (c2), Qnil)))
994 : Fcons (CHARSET_SYMBOL (charset), Fcons (make_number (c1), Qnil)));
995}
996
997DEFUN ("char-charset", Fchar_charset, Schar_charset, 1, 1, 0,
998 "Return charset of CHAR.")
999 (ch)
1000 Lisp_Object ch;
1001{
1002 CHECK_NUMBER (ch, 0);
1003
1004 return CHARSET_SYMBOL (CHAR_CHARSET (XINT (ch)));
1005}
1006
90d7b74e
KH
1007DEFUN ("charset-after", Fcharset_after, Scharset_after, 0, 1, 0,
1008 "Return charset of a character in current buffer at position POS.\n\
1009If POS is nil, it defauls to the current point.")
1010 (pos)
1011 Lisp_Object pos;
1012{
1013 register int pos_byte, c, charset;
1014 register unsigned char *p;
1015
1016 if (NILP (pos))
1017 pos_byte = PT_BYTE;
1018 else if (MARKERP (pos))
1019 pos_byte = marker_byte_position (pos);
1020 else
1021 {
1022 CHECK_NUMBER (pos, 0);
1023 pos_byte = CHAR_TO_BYTE (XINT (pos));
1024 }
1025 p = BYTE_POS_ADDR (pos_byte);
1026 c = STRING_CHAR (p, Z_BYTE - pos_byte);
1027 charset = CHAR_CHARSET (c);
1028 return CHARSET_SYMBOL (charset);
1029}
1030
4ed46869 1031DEFUN ("iso-charset", Fiso_charset, Siso_charset, 3, 3, 0,
2b71bb78
KH
1032 "Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR.\n\
1033\n\
1034ISO 2022's designation sequence (escape sequence) distinguishes charsets\n\
1035by their DIMENSION, CHARS, and FINAL-CHAR,\n\
1036where as Emacs distinguishes them by charset symbol.\n\
1037See the documentation of the function `charset-info' for the meanings of\n\
1038DIMENSION, CHARS, and FINAL-CHAR.")
4ed46869
KH
1039 (dimension, chars, final_char)
1040 Lisp_Object dimension, chars, final_char;
1041{
1042 int charset;
1043
1044 CHECK_NUMBER (dimension, 0);
1045 CHECK_NUMBER (chars, 1);
1046 CHECK_NUMBER (final_char, 2);
1047
1048 if ((charset = ISO_CHARSET_TABLE (dimension, chars, final_char)) < 0)
1049 return Qnil;
1050 return CHARSET_SYMBOL (charset);
1051}
1052
9d3d8cba
KH
1053/* If GENERICP is nonzero, return nonzero iff C is a valid normal or
1054 generic character. If GENERICP is zero, return nonzero iff C is a
1055 valid normal character. Do not call this function directly,
1056 instead use macro CHAR_VALID_P. */
1057int
1058char_valid_p (c, genericp)
1059 int c, genericp;
1060{
1061 int charset, c1, c2;
1062
1063 if (c < 0)
1064 return 0;
1065 if (SINGLE_BYTE_CHAR_P (c))
1066 return 1;
1067 SPLIT_NON_ASCII_CHAR (c, charset, c1, c2);
32278fd5 1068 if (charset != CHARSET_COMPOSITION && !CHARSET_DEFINED_P (charset))
9d3d8cba
KH
1069 return 0;
1070 return (c < MIN_CHAR_COMPOSITION
1071 ? ((c & CHAR_FIELD1_MASK) /* i.e. dimension of C is two. */
1072 ? (genericp && c1 == 0 && c2 == 0
1073 || c1 >= 32 && c2 >= 32)
1074 : (genericp && c1 == 0
1075 || c1 >= 32))
1076 : c < MIN_CHAR_COMPOSITION + n_cmpchars);
1077}
1078
1079DEFUN ("char-valid-p", Fchar_valid_p, Schar_valid_p, 1, 2, 0,
a9d02884
DL
1080 "Return t if OBJECT is a valid normal character.\n\
1081If optional arg GENERICP is non-nil, also return t if OBJECT is\n\
9d3d8cba
KH
1082a valid generic character.")
1083 (object, genericp)
1084 Lisp_Object object, genericp;
1085{
1086 if (! NATNUMP (object))
1087 return Qnil;
1088 return (CHAR_VALID_P (XFASTINT (object), !NILP (genericp)) ? Qt : Qnil);
1089}
1090
d2665018
KH
1091DEFUN ("unibyte-char-to-multibyte", Funibyte_char_to_multibyte,
1092 Sunibyte_char_to_multibyte, 1, 1, 0,
1093 "Convert the unibyte character CH to multibyte character.\n\
537efd8d 1094The conversion is done based on `nonascii-translation-table' (which see)\n\
340b8d58 1095 or `nonascii-insert-offset' (which see).")
d2665018
KH
1096 (ch)
1097 Lisp_Object ch;
1098{
1099 int c;
1100
1101 CHECK_NUMBER (ch, 0);
1102 c = XINT (ch);
1103 if (c < 0 || c >= 0400)
1104 error ("Invalid unibyte character: %d", c);
1105 c = unibyte_char_to_multibyte (c);
1106 if (c < 0)
1107 error ("Can't convert to multibyte character: %d", XINT (ch));
1108 return make_number (c);
1109}
1110
1bcc1567
RS
1111DEFUN ("multibyte-char-to-unibyte", Fmultibyte_char_to_unibyte,
1112 Smultibyte_char_to_unibyte, 1, 1, 0,
1113 "Convert the multibyte character CH to unibyte character.\n\
1114The conversion is done based on `nonascii-translation-table' (which see)\n\
1115 or `nonascii-insert-offset' (which see).")
1116 (ch)
1117 Lisp_Object ch;
1118{
1119 int c;
1120
1121 CHECK_NUMBER (ch, 0);
1122 c = XINT (ch);
1123 if (c < 0)
1124 error ("Invalid multibyte character: %d", c);
1125 c = multibyte_char_to_unibyte (c, Qnil);
1126 if (c < 0)
1127 error ("Can't convert to unibyte character: %d", XINT (ch));
1128 return make_number (c);
1129}
1130
4ed46869 1131DEFUN ("char-bytes", Fchar_bytes, Schar_bytes, 1, 1, 0,
f78643ef 1132 "Return 1 regardless of the argument CHAR.\n\
60022cb7 1133This is now an obsolete function. We keep it just for backward compatibility.")
4ed46869
KH
1134 (ch)
1135 Lisp_Object ch;
1136{
1137 Lisp_Object val;
4ed46869
KH
1138
1139 CHECK_NUMBER (ch, 0);
9b6a601f
KH
1140 return make_number (1);
1141}
1142
1143/* Return how many bytes C will occupy in a multibyte buffer.
1144 Don't call this function directly, instead use macro CHAR_BYTES. */
1145int
1146char_bytes (c)
1147 int c;
1148{
1149 int bytes;
1150
8ac5a9cc
KH
1151 if (SINGLE_BYTE_CHAR_P (c) || (c & ~GLYPH_MASK_CHAR))
1152 return 1;
1153
9b6a601f 1154 if (COMPOSITE_CHAR_P (c))
4ed46869 1155 {
9b6a601f 1156 unsigned int id = COMPOSITE_CHAR_ID (c);
4ed46869
KH
1157
1158 bytes = (id < n_cmpchars ? cmpchar_table[id]->len : 1);
1159 }
1160 else
1161 {
9b6a601f 1162 int charset = CHAR_CHARSET (c);
4ed46869
KH
1163
1164 bytes = CHARSET_DEFINED_P (charset) ? CHARSET_BYTES (charset) : 1;
1165 }
1166
60022cb7 1167 return bytes;
4ed46869
KH
1168}
1169
1170/* Return the width of character of which multi-byte form starts with
1171 C. The width is measured by how many columns occupied on the
1172 screen when displayed in the current buffer. */
1173
1174#define ONE_BYTE_CHAR_WIDTH(c) \
1175 (c < 0x20 \
1176 ? (c == '\t' \
53316e55 1177 ? XFASTINT (current_buffer->tab_width) \
4ed46869
KH
1178 : (c == '\n' ? 0 : (NILP (current_buffer->ctl_arrow) ? 4 : 2))) \
1179 : (c < 0x7f \
1180 ? 1 \
1181 : (c == 0x7F \
1182 ? (NILP (current_buffer->ctl_arrow) ? 4 : 2) \
1183 : ((! NILP (current_buffer->enable_multibyte_characters) \
1184 && BASE_LEADING_CODE_P (c)) \
1185 ? WIDTH_BY_CHAR_HEAD (c) \
b4e9dd77 1186 : 4))))
4ed46869
KH
1187
1188DEFUN ("char-width", Fchar_width, Schar_width, 1, 1, 0,
1189 "Return width of CHAR when displayed in the current buffer.\n\
1190The width is measured by how many columns it occupies on the screen.")
1191 (ch)
1192 Lisp_Object ch;
1193{
859f2b3c 1194 Lisp_Object val, disp;
4ed46869 1195 int c;
51c4025f 1196 struct Lisp_Char_Table *dp = buffer_display_table ();
4ed46869
KH
1197
1198 CHECK_NUMBER (ch, 0);
1199
859f2b3c
RS
1200 c = XINT (ch);
1201
1202 /* Get the way the display table would display it. */
51c4025f 1203 disp = dp ? DISP_CHAR_VECTOR (dp, c) : Qnil;
859f2b3c
RS
1204
1205 if (VECTORP (disp))
1206 XSETINT (val, XVECTOR (disp)->size);
1207 else if (SINGLE_BYTE_CHAR_P (c))
1208 XSETINT (val, ONE_BYTE_CHAR_WIDTH (c));
4ed46869
KH
1209 else if (COMPOSITE_CHAR_P (c))
1210 {
1211 int id = COMPOSITE_CHAR_ID (XFASTINT (ch));
0282eb69 1212 XSETFASTINT (val, (id < n_cmpchars ? cmpchar_table[id]->width : 1));
4ed46869
KH
1213 }
1214 else
1215 {
1216 int charset = CHAR_CHARSET (c);
1217
1218 XSETFASTINT (val, CHARSET_WIDTH (charset));
1219 }
1220 return val;
1221}
1222
1223/* Return width of string STR of length LEN when displayed in the
1224 current buffer. The width is measured by how many columns it
1225 occupies on the screen. */
859f2b3c 1226
4ed46869
KH
1227int
1228strwidth (str, len)
1229 unsigned char *str;
1230 int len;
1231{
1232 unsigned char *endp = str + len;
1233 int width = 0;
c4a4e28f 1234 struct Lisp_Char_Table *dp = buffer_display_table ();
4ed46869 1235
859f2b3c
RS
1236 while (str < endp)
1237 {
1238 if (*str == LEADING_CODE_COMPOSITION)
1239 {
1240 int id = str_cmpchar_id (str, endp - str);
1241
1242 if (id < 0)
1243 {
1244 width += 4;
1245 str++;
1246 }
1247 else
1248 {
1249 width += cmpchar_table[id]->width;
1250 str += cmpchar_table[id]->len;
1251 }
1252 }
1253 else
1254 {
1255 Lisp_Object disp;
e515b0a9
KH
1256 int thislen;
1257 int c = STRING_CHAR_AND_LENGTH (str, endp - str, thislen);
859f2b3c
RS
1258
1259 /* Get the way the display table would display it. */
acc35c36
RS
1260 if (dp)
1261 disp = DISP_CHAR_VECTOR (dp, c);
1262 else
1263 disp = Qnil;
859f2b3c
RS
1264
1265 if (VECTORP (disp))
e515b0a9 1266 width += XVECTOR (disp)->size;
859f2b3c 1267 else
e515b0a9 1268 width += ONE_BYTE_CHAR_WIDTH (*str);
859f2b3c 1269
e515b0a9 1270 str += thislen;
859f2b3c
RS
1271 }
1272 }
4ed46869
KH
1273 return width;
1274}
1275
1276DEFUN ("string-width", Fstring_width, Sstring_width, 1, 1, 0,
1277 "Return width of STRING when displayed in the current buffer.\n\
1278Width is measured by how many columns it occupies on the screen.\n\
046b1f03
RS
1279When calculating width of a multibyte character in STRING,\n\
1280only the base leading-code is considered; the validity of\n\
1281the following bytes is not checked.")
4ed46869
KH
1282 (str)
1283 Lisp_Object str;
1284{
1285 Lisp_Object val;
1286
1287 CHECK_STRING (str, 0);
fc932ac6
RS
1288 XSETFASTINT (val, strwidth (XSTRING (str)->data,
1289 STRING_BYTES (XSTRING (str))));
4ed46869
KH
1290 return val;
1291}
1292
1293DEFUN ("char-direction", Fchar_direction, Schar_direction, 1, 1, 0,
1294 "Return the direction of CHAR.\n\
1295The returned value is 0 for left-to-right and 1 for right-to-left.")
1296 (ch)
1297 Lisp_Object ch;
1298{
1299 int charset;
1300
1301 CHECK_NUMBER (ch, 0);
1302 charset = CHAR_CHARSET (XFASTINT (ch));
1303 if (!CHARSET_DEFINED_P (charset))
93bcb785 1304 invalid_character (XINT (ch));
4ed46869
KH
1305 return CHARSET_TABLE_INFO (charset, CHARSET_DIRECTION_IDX);
1306}
1307
af4fecb4 1308DEFUN ("chars-in-region", Fchars_in_region, Schars_in_region, 2, 2, 0,
6ae1f27e 1309 "Return number of characters between BEG and END.")
046b1f03
RS
1310 (beg, end)
1311 Lisp_Object beg, end;
1312{
6ae1f27e 1313 int from, to;
046b1f03 1314
17e7ef1b
RS
1315 CHECK_NUMBER_COERCE_MARKER (beg, 0);
1316 CHECK_NUMBER_COERCE_MARKER (end, 1);
1317
046b1f03 1318 from = min (XFASTINT (beg), XFASTINT (end));
a8a35e61 1319 to = max (XFASTINT (beg), XFASTINT (end));
046b1f03 1320
a8c21066 1321 return make_number (to - from);
6ae1f27e 1322}
9036eb45 1323
87b089ad
RS
1324/* Return the number of characters in the NBYTES bytes at PTR.
1325 This works by looking at the contents and checking for multibyte sequences.
1326 However, if the current buffer has enable-multibyte-characters = nil,
1327 we treat each byte as a character. */
1328
6ae1f27e
RS
1329int
1330chars_in_text (ptr, nbytes)
1331 unsigned char *ptr;
1332 int nbytes;
1333{
93bcb785 1334 unsigned char *endp, c;
6ae1f27e 1335 int chars;
046b1f03 1336
87b089ad
RS
1337 /* current_buffer is null at early stages of Emacs initialization. */
1338 if (current_buffer == 0
1339 || NILP (current_buffer->enable_multibyte_characters))
6ae1f27e 1340 return nbytes;
a8a35e61 1341
6ae1f27e
RS
1342 endp = ptr + nbytes;
1343 chars = 0;
046b1f03 1344
6ae1f27e
RS
1345 while (ptr < endp)
1346 {
93bcb785
KH
1347 c = *ptr++;
1348
1349 if (BASE_LEADING_CODE_P (c))
1350 while (ptr < endp && ! CHAR_HEAD_P (*ptr)) ptr++;
046b1f03
RS
1351 chars++;
1352 }
1353
6ae1f27e 1354 return chars;
046b1f03
RS
1355}
1356
87b089ad
RS
1357/* Return the number of characters in the NBYTES bytes at PTR.
1358 This works by looking at the contents and checking for multibyte sequences.
1359 It ignores enable-multibyte-characters. */
1360
1361int
1362multibyte_chars_in_text (ptr, nbytes)
1363 unsigned char *ptr;
1364 int nbytes;
1365{
93bcb785 1366 unsigned char *endp, c;
87b089ad
RS
1367 int chars;
1368
1369 endp = ptr + nbytes;
1370 chars = 0;
1371
1372 while (ptr < endp)
1373 {
93bcb785
KH
1374 c = *ptr++;
1375
1376 if (BASE_LEADING_CODE_P (c))
1377 while (ptr < endp && ! CHAR_HEAD_P (*ptr)) ptr++;
87b089ad
RS
1378 chars++;
1379 }
1380
1381 return chars;
1382}
1383
1384DEFUN ("string", Fstring, Sstring, 1, MANY, 0,
4ed46869 1385 "Concatenate all the argument characters and make the result a string.")
53316e55
KH
1386 (n, args)
1387 int n;
4ed46869
KH
1388 Lisp_Object *args;
1389{
53316e55 1390 int i;
4ed46869 1391 unsigned char *buf
bd4c6dd0 1392 = (unsigned char *) alloca (MAX_LENGTH_OF_MULTI_BYTE_FORM * n);
4ed46869
KH
1393 unsigned char *p = buf;
1394 Lisp_Object val;
1395
1396 for (i = 0; i < n; i++)
1397 {
1398 int c, len;
1399 unsigned char *str;
1400
1401 if (!INTEGERP (args[i]))
b0e3cf2b 1402 CHECK_NUMBER (args[i], 0);
4ed46869
KH
1403 c = XINT (args[i]);
1404 len = CHAR_STRING (c, p, str);
1405 if (p != str)
1406 /* C is a composite character. */
1407 bcopy (str, p, len);
1408 p += len;
1409 }
1410
020da460
KH
1411 /* Here, we can't use make_string_from_bytes because of byte
1412 combining problem. */
1413 val = make_string (buf, p - buf);
4ed46869
KH
1414 return val;
1415}
1416
1417#endif /* emacs */
1418\f
1419/*** Composite characters staffs ***/
1420
1421/* Each composite character is identified by CMPCHAR-ID which is
1422 assigned when Emacs needs the character code of the composite
1423 character (e.g. when displaying it on the screen). See the
1424 document "GENERAL NOTE on COMPOSITE CHARACTER" in `charset.h' how a
1425 composite character is represented in Emacs. */
1426
1427/* If `static' is defined, it means that it is defined to null string. */
1428#ifndef static
1429/* The following function is copied from lread.c. */
1430static int
1431hash_string (ptr, len)
1432 unsigned char *ptr;
1433 int len;
1434{
1435 register unsigned char *p = ptr;
1436 register unsigned char *end = p + len;
1437 register unsigned char c;
1438 register int hash = 0;
1439
1440 while (p != end)
1441 {
1442 c = *p++;
1443 if (c >= 0140) c -= 40;
1444 hash = ((hash<<3) + (hash>>28) + c);
1445 }
1446 return hash & 07777777777;
1447}
1448#endif
1449
4ed46869
KH
1450#define CMPCHAR_HASH_TABLE_SIZE 0xFFF
1451
1452static int *cmpchar_hash_table[CMPCHAR_HASH_TABLE_SIZE];
1453
1454/* Each element of `cmpchar_hash_table' is a pointer to an array of
1455 integer, where the 1st element is the size of the array, the 2nd
1456 element is how many elements are actually used in the array, and
1457 the remaining elements are CMPCHAR-IDs of composite characters of
1458 the same hash value. */
1459#define CMPCHAR_HASH_SIZE(table) table[0]
1460#define CMPCHAR_HASH_USED(table) table[1]
1461#define CMPCHAR_HASH_CMPCHAR_ID(table, i) table[i]
1462
1463/* Return CMPCHAR-ID of the composite character in STR of the length
1464 LEN. If the composite character has not yet been registered,
1465 register it in `cmpchar_table' and assign new CMPCHAR-ID. This
1466 is the sole function for assigning CMPCHAR-ID. */
1467int
1468str_cmpchar_id (str, len)
8867de67 1469 const unsigned char *str;
4ed46869
KH
1470 int len;
1471{
1472 int hash_idx, *hashp;
1473 unsigned char *buf;
1474 int embedded_rule; /* 1 if composition rule is embedded. */
1475 int chars; /* number of components. */
1476 int i;
1477 struct cmpchar_info *cmpcharp;
1478
4ed46869
KH
1479 /* The second byte 0xFF means compostion rule is embedded. */
1480 embedded_rule = (str[1] == 0xFF);
1481
1482 /* At first, get the actual length of the composite character. */
1483 {
8867de67 1484 const unsigned char *p, *endp = str + 1, *lastp = str + len;
4ed46869
KH
1485 int bytes;
1486
6ae1f27e 1487 while (endp < lastp && ! CHAR_HEAD_P (*endp)) endp++;
93bcb785
KH
1488 if (endp - str < 5)
1489 /* Any composite char have at least 5-byte length. */
1490 return -1;
1491
4ed46869 1492 chars = 0;
93bcb785 1493 p = str + 1;
4ed46869
KH
1494 while (p < endp)
1495 {
9b4d1fe6
KH
1496 if (embedded_rule)
1497 {
1498 p++;
1499 if (p >= endp)
1500 return -1;
1501 }
4ed46869 1502 /* No need of checking if *P is 0xA0 because
93bcb785
KH
1503 BYTES_BY_CHAR_HEAD (0x80) surely returns 2. */
1504 p += BYTES_BY_CHAR_HEAD (*p - 0x20);
4ed46869
KH
1505 chars++;
1506 }
93bcb785
KH
1507 if (p > endp || chars < 2 || chars > MAX_COMPONENT_COUNT)
1508 /* Invalid components. */
4ed46869 1509 return -1;
93bcb785 1510 len = p - str;
4ed46869
KH
1511 }
1512 hash_idx = hash_string (str, len) % CMPCHAR_HASH_TABLE_SIZE;
1513 hashp = cmpchar_hash_table[hash_idx];
1514
1515 /* Then, look into the hash table. */
1516 if (hashp != NULL)
1517 /* Find the correct one among composite characters of the same
1518 hash value. */
1519 for (i = 2; i < CMPCHAR_HASH_USED (hashp); i++)
1520 {
1521 cmpcharp = cmpchar_table[CMPCHAR_HASH_CMPCHAR_ID (hashp, i)];
1522 if (len == cmpcharp->len
1523 && ! bcmp (str, cmpcharp->data, len))
1524 return CMPCHAR_HASH_CMPCHAR_ID (hashp, i);
1525 }
1526
1527 /* We have to register the composite character in cmpchar_table. */
0282eb69 1528 if (n_cmpchars >= (CHAR_FIELD2_MASK | CHAR_FIELD3_MASK))
513ee442
KH
1529 /* No, we have no more room for a new composite character. */
1530 return -1;
1531
4ed46869
KH
1532 /* Make the entry in hash table. */
1533 if (hashp == NULL)
1534 {
1535 /* Make a table for 8 composite characters initially. */
1536 hashp = (cmpchar_hash_table[hash_idx]
1537 = (int *) xmalloc (sizeof (int) * (2 + 8)));
1538 CMPCHAR_HASH_SIZE (hashp) = 10;
1539 CMPCHAR_HASH_USED (hashp) = 2;
1540 }
1541 else if (CMPCHAR_HASH_USED (hashp) >= CMPCHAR_HASH_SIZE (hashp))
1542 {
1543 CMPCHAR_HASH_SIZE (hashp) += 8;
1544 hashp = (cmpchar_hash_table[hash_idx]
1545 = (int *) xrealloc (hashp,
1546 sizeof (int) * CMPCHAR_HASH_SIZE (hashp)));
1547 }
1548 CMPCHAR_HASH_CMPCHAR_ID (hashp, CMPCHAR_HASH_USED (hashp)) = n_cmpchars;
1549 CMPCHAR_HASH_USED (hashp)++;
1550
1551 /* Set information of the composite character in cmpchar_table. */
1552 if (cmpchar_table_size == 0)
1553 {
1554 /* This is the first composite character to be registered. */
1555 cmpchar_table_size = 256;
1556 cmpchar_table
1557 = (struct cmpchar_info **) xmalloc (sizeof (cmpchar_table[0])
1558 * cmpchar_table_size);
1559 }
1560 else if (cmpchar_table_size <= n_cmpchars)
1561 {
1562 cmpchar_table_size += 256;
1563 cmpchar_table
1564 = (struct cmpchar_info **) xrealloc (cmpchar_table,
1565 sizeof (cmpchar_table[0])
1566 * cmpchar_table_size);
1567 }
1568
1569 cmpcharp = (struct cmpchar_info *) xmalloc (sizeof (struct cmpchar_info));
1570
1571 cmpcharp->len = len;
1572 cmpcharp->data = (unsigned char *) xmalloc (len + 1);
1573 bcopy (str, cmpcharp->data, len);
1574 cmpcharp->data[len] = 0;
1575 cmpcharp->glyph_len = chars;
1576 cmpcharp->glyph = (GLYPH *) xmalloc (sizeof (GLYPH) * chars);
1577 if (embedded_rule)
1578 {
1579 cmpcharp->cmp_rule = (unsigned char *) xmalloc (chars);
1580 cmpcharp->col_offset = (float *) xmalloc (sizeof (float) * chars);
1581 }
1582 else
1583 {
1584 cmpcharp->cmp_rule = NULL;
1585 cmpcharp->col_offset = NULL;
1586 }
1587
1588 /* Setup GLYPH data and composition rules (if any) so as not to make
1589 them every time on displaying. */
1590 {
1591 unsigned char *bufp;
1592 int width;
1593 float leftmost = 0.0, rightmost = 1.0;
1594
1595 if (embedded_rule)
1596 /* At first, col_offset[N] is set to relative to col_offset[0]. */
1597 cmpcharp->col_offset[0] = 0;
1598
1599 for (i = 0, bufp = cmpcharp->data + 1; i < chars; i++)
1600 {
1601 if (embedded_rule)
1602 cmpcharp->cmp_rule[i] = *bufp++;
1603
1604 if (*bufp == 0xA0) /* This is an ASCII character. */
1605 {
1606 cmpcharp->glyph[i] = FAST_MAKE_GLYPH ((*++bufp & 0x7F), 0);
1607 width = 1;
1608 bufp++;
1609 }
1610 else /* Multibyte character. */
1611 {
1612 /* Make `bufp' point normal multi-byte form temporally. */
1613 *bufp -= 0x20;
1614 cmpcharp->glyph[i]
537efd8d 1615 = FAST_MAKE_GLYPH (string_to_non_ascii_char (bufp, 4, 0, 0), 0);
4ed46869
KH
1616 width = WIDTH_BY_CHAR_HEAD (*bufp);
1617 *bufp += 0x20;
1618 bufp += BYTES_BY_CHAR_HEAD (*bufp - 0x20);
1619 }
1620
1621 if (embedded_rule && i > 0)
1622 {
1623 /* Reference points (global_ref and new_ref) are
1624 encoded as below:
1625
1626 0--1--2 -- ascent
1627 | |
1628 | |
1629 | 4 -+--- center
1630 -- 3 5 -- baseline
1631 | |
1632 6--7--8 -- descent
1633
1634 Now, we calculate the column offset of the new glyph
1635 from the left edge of the first glyph. This can avoid
1636 the same calculation everytime displaying this
1637 composite character. */
1638
1639 /* Reference points of global glyph and new glyph. */
1640 int global_ref = (cmpcharp->cmp_rule[i] - 0xA0) / 9;
1641 int new_ref = (cmpcharp->cmp_rule[i] - 0xA0) % 9;
1642 /* Column offset relative to the first glyph. */
1643 float left = (leftmost
1644 + (global_ref % 3) * (rightmost - leftmost) / 2.0
1645 - (new_ref % 3) * width / 2.0);
1646
1647 cmpcharp->col_offset[i] = left;
1648 if (left < leftmost)
1649 leftmost = left;
1650 if (left + width > rightmost)
1651 rightmost = left + width;
1652 }
1653 else
1654 {
1655 if (width > rightmost)
1656 rightmost = width;
1657 }
1658 }
1659 if (embedded_rule)
1660 {
1661 /* Now col_offset[N] are relative to the left edge of the
1662 first component. Make them relative to the left edge of
1663 overall glyph. */
1664 for (i = 0; i < chars; i++)
1665 cmpcharp->col_offset[i] -= leftmost;
1666 /* Make rightmost holds width of overall glyph. */
1667 rightmost -= leftmost;
1668 }
1669
1670 cmpcharp->width = rightmost;
1671 if (cmpcharp->width < rightmost)
1672 /* To get a ceiling integer value. */
1673 cmpcharp->width++;
1674 }
1675
1676 cmpchar_table[n_cmpchars] = cmpcharp;
1677
1678 return n_cmpchars++;
1679}
1680
de54b0d5
KH
1681/* Return the Nth element of the composite character C. If NOERROR is
1682 nonzero, return 0 on error condition (C is an invalid composite
1683 charcter, or N is out of range). */
4ed46869 1684int
de54b0d5
KH
1685cmpchar_component (c, n, noerror)
1686 int c, n, noerror;
4ed46869
KH
1687{
1688 int id = COMPOSITE_CHAR_ID (c);
1689
de54b0d5
KH
1690 if (id < 0 || id >= n_cmpchars)
1691 {
1692 /* C is not a valid composite character. */
1693 if (noerror) return 0;
1694 error ("Invalid composite character: %d", c) ;
1695 }
1696 if (n >= cmpchar_table[id]->glyph_len)
1697 {
1698 /* No such component. */
1699 if (noerror) return 0;
1700 args_out_of_range (make_number (c), make_number (n));
1701 }
4ed46869
KH
1702 /* No face data is stored in glyph code. */
1703 return ((int) (cmpchar_table[id]->glyph[n]));
1704}
1705
1706DEFUN ("cmpcharp", Fcmpcharp, Scmpcharp, 1, 1, 0,
1707 "T if CHAR is a composite character.")
1708 (ch)
1709 Lisp_Object ch;
1710{
1711 CHECK_NUMBER (ch, 0);
1712 return (COMPOSITE_CHAR_P (XINT (ch)) ? Qt : Qnil);
1713}
1714
1715DEFUN ("composite-char-component", Fcmpchar_component, Scmpchar_component,
1716 2, 2, 0,
de54b0d5
KH
1717 "Return the Nth component character of composite character CHARACTER.")
1718 (character, n)
1719 Lisp_Object character, n;
4ed46869 1720{
de54b0d5 1721 int id;
4ed46869
KH
1722
1723 CHECK_NUMBER (character, 0);
de54b0d5 1724 CHECK_NUMBER (n, 1);
4ed46869 1725
de54b0d5 1726 return (make_number (cmpchar_component (XINT (character), XINT (n), 0)));
4ed46869
KH
1727}
1728
1729DEFUN ("composite-char-composition-rule", Fcmpchar_cmp_rule, Scmpchar_cmp_rule,
1730 2, 2, 0,
de54b0d5 1731 "Return the Nth composition rule of composite character CHARACTER.\n\
55001746 1732The returned rule is for composing the Nth component\n\
de54b0d5
KH
1733on the (N-1)th component.\n\
1734If CHARACTER should be composed relatively or N is 0, return 255.")
55001746
KH
1735 (character, n)
1736 Lisp_Object character, n;
4ed46869 1737{
de54b0d5 1738 int id;
4ed46869
KH
1739
1740 CHECK_NUMBER (character, 0);
55001746 1741 CHECK_NUMBER (n, 1);
4ed46869
KH
1742
1743 id = COMPOSITE_CHAR_ID (XINT (character));
1744 if (id < 0 || id >= n_cmpchars)
1745 error ("Invalid composite character: %d", XINT (character));
de54b0d5 1746 if (XINT (n) < 0 || XINT (n) >= cmpchar_table[id]->glyph_len)
55001746 1747 args_out_of_range (character, n);
4ed46869 1748
de54b0d5
KH
1749 return make_number (cmpchar_table[id]->cmp_rule
1750 ? cmpchar_table[id]->cmp_rule[XINT (n)]
1751 : 255);
4ed46869
KH
1752}
1753
1754DEFUN ("composite-char-composition-rule-p", Fcmpchar_cmp_rule_p,
1755 Scmpchar_cmp_rule_p, 1, 1, 0,
1756 "Return non-nil if composite character CHARACTER contains a embedded rule.")
1757 (character)
1758 Lisp_Object character;
1759{
1760 int id;
1761
1762 CHECK_NUMBER (character, 0);
1763 id = COMPOSITE_CHAR_ID (XINT (character));
1764 if (id < 0 || id >= n_cmpchars)
1765 error ("Invalid composite character: %d", XINT (character));
1766
1767 return (cmpchar_table[id]->cmp_rule ? Qt : Qnil);
1768}
1769
1770DEFUN ("composite-char-component-count", Fcmpchar_cmp_count,
1771 Scmpchar_cmp_count, 1, 1, 0,
1772 "Return number of compoents of composite character CHARACTER.")
1773 (character)
1774 Lisp_Object character;
1775{
1776 int id;
1777
1778 CHECK_NUMBER (character, 0);
1779 id = COMPOSITE_CHAR_ID (XINT (character));
1780 if (id < 0 || id >= n_cmpchars)
1781 error ("Invalid composite character: %d", XINT (character));
1782
1783 return (make_number (cmpchar_table[id]->glyph_len));
1784}
1785
1786DEFUN ("compose-string", Fcompose_string, Scompose_string,
1787 1, 1, 0,
1788 "Return one char string composed from all characters in STRING.")
1789 (str)
1790 Lisp_Object str;
1791{
1792 unsigned char buf[MAX_LENGTH_OF_MULTI_BYTE_FORM], *p, *pend, *ptemp;
1793 int len, i;
1794
1795 CHECK_STRING (str, 0);
1796
1797 buf[0] = LEADING_CODE_COMPOSITION;
1798 p = XSTRING (str)->data;
fc932ac6 1799 pend = p + STRING_BYTES (XSTRING (str));
4ed46869
KH
1800 i = 1;
1801 while (p < pend)
1802 {
9b4d1fe6 1803 if (*p < 0x20) /* control code */
4ed46869
KH
1804 error ("Invalid component character: %d", *p);
1805 else if (*p < 0x80) /* ASCII */
1806 {
1807 if (i + 2 >= MAX_LENGTH_OF_MULTI_BYTE_FORM)
1808 error ("Too long string to be composed: %s", XSTRING (str)->data);
1809 /* Prepend an ASCII charset indicator 0xA0, set MSB of the
1810 code itself. */
1811 buf[i++] = 0xA0;
1812 buf[i++] = *p++ + 0x80;
1813 }
1814 else if (*p == LEADING_CODE_COMPOSITION) /* composite char */
1815 {
1816 /* Already composed. Eliminate the heading
1817 LEADING_CODE_COMPOSITION, keep the remaining bytes
1818 unchanged. */
1819 p++;
de54b0d5
KH
1820 if (*p == 255)
1821 error ("Can't compose a rule-based composition character");
4ed46869 1822 ptemp = p;
6ae1f27e 1823 while (! CHAR_HEAD_P (*p)) p++;
9b4d1fe6
KH
1824 if (str_cmpchar_id (ptemp - 1, p - ptemp + 1) < 0)
1825 error ("Can't compose an invalid composition character");
4ed46869
KH
1826 if (i + (p - ptemp) >= MAX_LENGTH_OF_MULTI_BYTE_FORM)
1827 error ("Too long string to be composed: %s", XSTRING (str)->data);
1828 bcopy (ptemp, buf + i, p - ptemp);
1829 i += p - ptemp;
1830 }
1831 else /* multibyte char */
1832 {
1833 /* Add 0x20 to the base leading-code, keep the remaining
1834 bytes unchanged. */
9b4d1fe6
KH
1835 int c = STRING_CHAR_AND_CHAR_LENGTH (p, pend - p, len);
1836
1837 if (len <= 1 || ! CHAR_VALID_P (c, 0))
1838 error ("Can't compose an invalid character");
4ed46869
KH
1839 if (i + len >= MAX_LENGTH_OF_MULTI_BYTE_FORM)
1840 error ("Too long string to be composed: %s", XSTRING (str)->data);
1841 bcopy (p, buf + i, len);
1842 buf[i] += 0x20;
1843 p += len, i += len;
1844 }
1845 }
1846
1847 if (i < 5)
1848 /* STR contains only one character, which can't be composed. */
1849 error ("Too short string to be composed: %s", XSTRING (str)->data);
1850
27802600 1851 return make_string_from_bytes (buf, 1, i);
4ed46869
KH
1852}
1853
1854\f
dfcf069d 1855int
4ed46869
KH
1856charset_id_internal (charset_name)
1857 char *charset_name;
1858{
76d7b829 1859 Lisp_Object val;
4ed46869 1860
76d7b829 1861 val= Fget (intern (charset_name), Qcharset);
4ed46869
KH
1862 if (!VECTORP (val))
1863 error ("Charset %s is not defined", charset_name);
1864
1865 return (XINT (XVECTOR (val)->contents[0]));
1866}
1867
1868DEFUN ("setup-special-charsets", Fsetup_special_charsets,
1869 Ssetup_special_charsets, 0, 0, 0, "Internal use only.")
1870 ()
1871{
1872 charset_latin_iso8859_1 = charset_id_internal ("latin-iso8859-1");
1873 charset_jisx0208_1978 = charset_id_internal ("japanese-jisx0208-1978");
1874 charset_jisx0208 = charset_id_internal ("japanese-jisx0208");
1875 charset_katakana_jisx0201 = charset_id_internal ("katakana-jisx0201");
1876 charset_latin_jisx0201 = charset_id_internal ("latin-jisx0201");
1877 charset_big5_1 = charset_id_internal ("chinese-big5-1");
1878 charset_big5_2 = charset_id_internal ("chinese-big5-2");
1879 return Qnil;
1880}
1881
dfcf069d 1882void
4ed46869
KH
1883init_charset_once ()
1884{
1885 int i, j, k;
1886
1887 staticpro (&Vcharset_table);
1888 staticpro (&Vcharset_symbol_table);
8a73a704 1889 staticpro (&Vgeneric_character_list);
4ed46869
KH
1890
1891 /* This has to be done here, before we call Fmake_char_table. */
1892 Qcharset_table = intern ("charset-table");
1893 staticpro (&Qcharset_table);
1894
1895 /* Intern this now in case it isn't already done.
1896 Setting this variable twice is harmless.
1897 But don't staticpro it here--that is done in alloc.c. */
1898 Qchar_table_extra_slots = intern ("char-table-extra-slots");
1899
1900 /* Now we are ready to set up this property, so we can
1901 create the charset table. */
1902 Fput (Qcharset_table, Qchar_table_extra_slots, make_number (0));
1903 Vcharset_table = Fmake_char_table (Qcharset_table, Qnil);
1904
0282eb69
KH
1905 Qunknown = intern ("unknown");
1906 staticpro (&Qunknown);
1907 Vcharset_symbol_table = Fmake_vector (make_number (MAX_CHARSET + 1),
1908 Qunknown);
4ed46869
KH
1909
1910 /* Setup tables. */
1911 for (i = 0; i < 2; i++)
1912 for (j = 0; j < 2; j++)
1913 for (k = 0; k < 128; k++)
1914 iso_charset_table [i][j][k] = -1;
1915
1916 bzero (cmpchar_hash_table, sizeof cmpchar_hash_table);
1917 cmpchar_table_size = n_cmpchars = 0;
1918
60383934 1919 for (i = 0; i < 256; i++)
4ed46869 1920 BYTES_BY_CHAR_HEAD (i) = 1;
6ef23ebb
KH
1921 for (i = MIN_CHARSET_OFFICIAL_DIMENSION1;
1922 i <= MAX_CHARSET_OFFICIAL_DIMENSION1; i++)
1923 BYTES_BY_CHAR_HEAD (i) = 2;
1924 for (i = MIN_CHARSET_OFFICIAL_DIMENSION2;
1925 i <= MAX_CHARSET_OFFICIAL_DIMENSION2; i++)
1926 BYTES_BY_CHAR_HEAD (i) = 3;
4ed46869
KH
1927 BYTES_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_11) = 3;
1928 BYTES_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_12) = 3;
1929 BYTES_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_21) = 4;
1930 BYTES_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_22) = 4;
6ef23ebb 1931 /* The followings don't reflect the actual bytes, but just to tell
4ed46869
KH
1932 that it is a start of a multibyte character. */
1933 BYTES_BY_CHAR_HEAD (LEADING_CODE_COMPOSITION) = 2;
6ef23ebb
KH
1934 BYTES_BY_CHAR_HEAD (0x9E) = 2;
1935 BYTES_BY_CHAR_HEAD (0x9F) = 2;
4ed46869
KH
1936
1937 for (i = 0; i < 128; i++)
1938 WIDTH_BY_CHAR_HEAD (i) = 1;
1939 for (; i < 256; i++)
1940 WIDTH_BY_CHAR_HEAD (i) = 4;
1941 WIDTH_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_11) = 1;
1942 WIDTH_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_12) = 2;
1943 WIDTH_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_21) = 1;
1944 WIDTH_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_22) = 2;
8a73a704
KH
1945
1946 {
76d7b829 1947 Lisp_Object val;
8a73a704 1948
76d7b829 1949 val = Qnil;
8a73a704
KH
1950 for (i = 0x81; i < 0x90; i++)
1951 val = Fcons (make_number ((i - 0x70) << 7), val);
1952 for (; i < 0x9A; i++)
1953 val = Fcons (make_number ((i - 0x8F) << 14), val);
1954 for (i = 0xA0; i < 0xF0; i++)
1955 val = Fcons (make_number ((i - 0x70) << 7), val);
1956 for (; i < 0xFF; i++)
1957 val = Fcons (make_number ((i - 0xE0) << 14), val);
1958 val = Fcons (make_number (GENERIC_COMPOSITION_CHAR), val);
1959 Vgeneric_character_list = Fnreverse (val);
1960 }
bbf12bb3
KH
1961
1962 nonascii_insert_offset = 0;
1963 Vnonascii_translation_table = Qnil;
4ed46869
KH
1964}
1965
1966#ifdef emacs
1967
dfcf069d 1968void
4ed46869
KH
1969syms_of_charset ()
1970{
1971 Qascii = intern ("ascii");
1972 staticpro (&Qascii);
1973
1974 Qcharset = intern ("charset");
1975 staticpro (&Qcharset);
1976
1977 /* Define ASCII charset now. */
1978 update_charset_table (make_number (CHARSET_ASCII),
1979 make_number (1), make_number (94),
1980 make_number (1),
1981 make_number (0),
1982 make_number ('B'),
1983 make_number (0),
1984 build_string ("ASCII"),
1985 build_string ("ASCII"),
1986 build_string ("ASCII (ISO646 IRV)"));
1987 CHARSET_SYMBOL (CHARSET_ASCII) = Qascii;
1988 Fput (Qascii, Qcharset, CHARSET_TABLE_ENTRY (CHARSET_ASCII));
1989
1990 Qcomposition = intern ("composition");
1991 staticpro (&Qcomposition);
1992 CHARSET_SYMBOL (CHARSET_COMPOSITION) = Qcomposition;
1993
c1a08b4c
KH
1994 Qauto_fill_chars = intern ("auto-fill-chars");
1995 staticpro (&Qauto_fill_chars);
1996 Fput (Qauto_fill_chars, Qchar_table_extra_slots, make_number (0));
1997
4ed46869 1998 defsubr (&Sdefine_charset);
8a73a704 1999 defsubr (&Sgeneric_character_list);
3fac5a51 2000 defsubr (&Sget_unused_iso_final_char);
4ed46869
KH
2001 defsubr (&Sdeclare_equiv_charset);
2002 defsubr (&Sfind_charset_region);
2003 defsubr (&Sfind_charset_string);
2004 defsubr (&Smake_char_internal);
2005 defsubr (&Ssplit_char);
2006 defsubr (&Schar_charset);
90d7b74e 2007 defsubr (&Scharset_after);
4ed46869 2008 defsubr (&Siso_charset);
9d3d8cba 2009 defsubr (&Schar_valid_p);
d2665018 2010 defsubr (&Sunibyte_char_to_multibyte);
1bcc1567 2011 defsubr (&Smultibyte_char_to_unibyte);
4ed46869
KH
2012 defsubr (&Schar_bytes);
2013 defsubr (&Schar_width);
2014 defsubr (&Sstring_width);
2015 defsubr (&Schar_direction);
af4fecb4 2016 defsubr (&Schars_in_region);
87b089ad 2017 defsubr (&Sstring);
4ed46869
KH
2018 defsubr (&Scmpcharp);
2019 defsubr (&Scmpchar_component);
2020 defsubr (&Scmpchar_cmp_rule);
2021 defsubr (&Scmpchar_cmp_rule_p);
2022 defsubr (&Scmpchar_cmp_count);
2023 defsubr (&Scompose_string);
2024 defsubr (&Ssetup_special_charsets);
2025
2026 DEFVAR_LISP ("charset-list", &Vcharset_list,
2027 "List of charsets ever defined.");
2028 Vcharset_list = Fcons (Qascii, Qnil);
2029
537efd8d 2030 DEFVAR_LISP ("translation-table-vector", &Vtranslation_table_vector,
b4e9dd77
KH
2031 "Vector of cons cell of a symbol and translation table ever defined.\n\
2032An ID of a translation table is an index of this vector.");
537efd8d 2033 Vtranslation_table_vector = Fmake_vector (make_number (16), Qnil);
b0e3cf2b 2034
4ed46869
KH
2035 DEFVAR_INT ("leading-code-composition", &leading_code_composition,
2036 "Leading-code of composite characters.");
2037 leading_code_composition = LEADING_CODE_COMPOSITION;
2038
2039 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11,
2040 "Leading-code of private TYPE9N charset of column-width 1.");
2041 leading_code_private_11 = LEADING_CODE_PRIVATE_11;
2042
2043 DEFVAR_INT ("leading-code-private-12", &leading_code_private_12,
2044 "Leading-code of private TYPE9N charset of column-width 2.");
2045 leading_code_private_12 = LEADING_CODE_PRIVATE_12;
2046
2047 DEFVAR_INT ("leading-code-private-21", &leading_code_private_21,
2048 "Leading-code of private TYPE9Nx9N charset of column-width 1.");
2049 leading_code_private_21 = LEADING_CODE_PRIVATE_21;
2050
2051 DEFVAR_INT ("leading-code-private-22", &leading_code_private_22,
2052 "Leading-code of private TYPE9Nx9N charset of column-width 2.");
2053 leading_code_private_22 = LEADING_CODE_PRIVATE_22;
35e623fb
RS
2054
2055 DEFVAR_INT ("nonascii-insert-offset", &nonascii_insert_offset,
d2665018 2056 "Offset for converting non-ASCII unibyte codes 0240...0377 to multibyte.\n\
4cf9710d
RS
2057This is used for converting unibyte text to multibyte,\n\
2058and for inserting character codes specified by number.\n\n\
3e8ceaac
RS
2059This serves to convert a Latin-1 or similar 8-bit character code\n\
2060to the corresponding Emacs multibyte character code.\n\
2061Typically the value should be (- (make-char CHARSET 0) 128),\n\
2062for your choice of character set.\n\
537efd8d 2063If `nonascii-translation-table' is non-nil, it overrides this variable.");
35e623fb 2064 nonascii_insert_offset = 0;
b0e3cf2b 2065
b4e9dd77 2066 DEFVAR_LISP ("nonascii-translation-table", &Vnonascii_translation_table,
537efd8d 2067 "Translation table to convert non-ASCII unibyte codes to multibyte.\n\
4cf9710d
RS
2068This is used for converting unibyte text to multibyte,\n\
2069and for inserting character codes specified by number.\n\n\
2070Conversion is performed only when multibyte characters are enabled,\n\
2071and it serves to convert a Latin-1 or similar 8-bit character code\n\
2072to the corresponding Emacs character code.\n\n\
da4d65af 2073If this is nil, `nonascii-insert-offset' is used instead.\n\
b4e9dd77
KH
2074See also the docstring of `make-translation-table'.");
2075 Vnonascii_translation_table = Qnil;
4cf9710d 2076
b0e3cf2b
KH
2077 DEFVAR_INT ("min-composite-char", &min_composite_char,
2078 "Minimum character code of a composite character.");
2079 min_composite_char = MIN_CHAR_COMPOSITION;
c1a08b4c
KH
2080
2081 DEFVAR_LISP ("auto-fill-chars", &Vauto_fill_chars,
2082 "A char-table for characters which invoke auto-filling.\n\
2083Such characters has value t in this table.");
2084 Vauto_fill_chars = Fmake_char_table (Qauto_fill_chars, Qnil);
60022cb7
AS
2085 CHAR_TABLE_SET (Vauto_fill_chars, make_number (' '), Qt);
2086 CHAR_TABLE_SET (Vauto_fill_chars, make_number ('\n'), Qt);
4ed46869
KH
2087}
2088
2089#endif /* emacs */