(struct iso2022_spec): Change the declaration of
[bpt/emacs.git] / src / charset.c
CommitLineData
4ed46869
KH
1/* Multilingual characters handler.
2 Ver.1.0
4ed46869
KH
3 Copyright (C) 1995 Free Software Foundation, Inc.
4 Copyright (C) 1995 Electrotechnical Laboratory, JAPAN.
5
369314dc
KH
6This file is part of GNU Emacs.
7
8GNU Emacs is free software; you can redistribute it and/or modify
9it under the terms of the GNU General Public License as published by
10the Free Software Foundation; either version 2, or (at your option)
11any later version.
4ed46869 12
369314dc
KH
13GNU Emacs is distributed in the hope that it will be useful,
14but WITHOUT ANY WARRANTY; without even the implied warranty of
15MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16GNU General Public License for more details.
4ed46869 17
369314dc
KH
18You should have received a copy of the GNU General Public License
19along with GNU Emacs; see the file COPYING. If not, write to
20the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21Boston, MA 02111-1307, USA. */
4ed46869
KH
22
23/* At first, see the document in `charset.h' to understand the code in
24 this file. */
25
26#include <stdio.h>
27
28#ifdef emacs
29
30#include <sys/types.h>
31#include <config.h>
32#include "lisp.h"
33#include "buffer.h"
34#include "charset.h"
35#include "coding.h"
fc6b09bf 36#include "disptab.h"
4ed46869
KH
37
38#else /* not emacs */
39
40#include "mulelib.h"
41
42#endif /* emacs */
43
44Lisp_Object Qcharset, Qascii, Qcomposition;
45
46/* Declaration of special leading-codes. */
47int leading_code_composition; /* for composite characters */
48int leading_code_private_11; /* for private DIMENSION1 of 1-column */
49int leading_code_private_12; /* for private DIMENSION1 of 2-column */
50int leading_code_private_21; /* for private DIMENSION2 of 1-column */
51int leading_code_private_22; /* for private DIMENSION2 of 2-column */
52
53/* Declaration of special charsets. */
54int charset_ascii; /* ASCII */
55int charset_composition; /* for a composite character */
56int charset_latin_iso8859_1; /* ISO8859-1 (Latin-1) */
57int charset_jisx0208_1978; /* JISX0208.1978 (Japanese Kanji old set) */
58int charset_jisx0208; /* JISX0208.1983 (Japanese Kanji) */
59int charset_katakana_jisx0201; /* JISX0201.Kana (Japanese Katakana) */
60int charset_latin_jisx0201; /* JISX0201.Roman (Japanese Roman) */
61int charset_big5_1; /* Big5 Level 1 (Chinese Traditional) */
62int charset_big5_2; /* Big5 Level 2 (Chinese Traditional) */
63
64Lisp_Object Qcharset_table;
65
66/* A char-table containing information of each character set. */
67Lisp_Object Vcharset_table;
68
69/* A vector of charset symbol indexed by charset-id. This is used
70 only for returning charset symbol from C functions. */
71Lisp_Object Vcharset_symbol_table;
72
73/* A list of charset symbols ever defined. */
74Lisp_Object Vcharset_list;
75
76/* Tables used by macros BYTES_BY_CHAR_HEAD and WIDTH_BY_CHAR_HEAD. */
77int bytes_by_char_head[256];
78int width_by_char_head[256];
79
80/* Mapping table from ISO2022's charset (specified by DIMENSION,
81 CHARS, and FINAL-CHAR) to Emacs' charset. */
82int iso_charset_table[2][2][128];
83
513ee442
KH
84/* Table of pointers to the structure `cmpchar_info' indexed by
85 CMPCHAR-ID. */
86struct cmpchar_info **cmpchar_table;
87/* The current size of `cmpchar_table'. */
88static int cmpchar_table_size;
89/* Number of the current composite characters. */
90int n_cmpchars;
91
4ed46869
KH
92/* Variables used locally in the macro FETCH_MULTIBYTE_CHAR. */
93unsigned char *_fetch_multibyte_char_p;
94int _fetch_multibyte_char_len;
95
96/* Set STR a pointer to the multi-byte form of the character C. If C
97 is not a composite character, the multi-byte form is set in WORKBUF
98 and STR points WORKBUF. The caller should allocate at least 4-byte
99 area at WORKBUF in advance. Returns the length of the multi-byte
bd4c6dd0
KH
100 form. If C is an invalid character to have a multi-byte form,
101 signal an error.
4ed46869
KH
102
103 Use macro `CHAR_STRING (C, WORKBUF, STR)' instead of calling this
104 function directly if C can be an ASCII character. */
105
106int
107non_ascii_char_to_string (c, workbuf, str)
108 int c;
109 unsigned char *workbuf, **str;
110{
6dc0722d 111 int charset, c1, c2;
4ed46869
KH
112
113 if (COMPOSITE_CHAR_P (c))
114 {
115 int cmpchar_id = COMPOSITE_CHAR_ID (c);
116
117 if (cmpchar_id < n_cmpchars)
118 {
119 *str = cmpchar_table[cmpchar_id]->data;
120 return cmpchar_table[cmpchar_id]->len;
121 }
122 else
123 {
bd4c6dd0 124 error ("Invalid characer: %d", c);
4ed46869
KH
125 }
126 }
127
128 SPLIT_NON_ASCII_CHAR (c, charset, c1, c2);
bd4c6dd0
KH
129 if (!charset
130 || ! CHARSET_DEFINED_P (charset)
131 || c1 >= 0 && c1 < 32
132 || c2 >= 0 && c2 < 32)
133 error ("Invalid characer: %d", c);
4ed46869
KH
134
135 *str = workbuf;
136 *workbuf++ = CHARSET_LEADING_CODE_BASE (charset);
137 if (*workbuf = CHARSET_LEADING_CODE_EXT (charset))
138 workbuf++;
139 *workbuf++ = c1 | 0x80;
6dc0722d 140 if (c2 >= 0)
4ed46869
KH
141 *workbuf++ = c2 | 0x80;
142
143 return (workbuf - *str);
144}
145
146/* Return a non-ASCII character of which multi-byte form is at STR of
147 length LEN. If ACTUAL_LEN is not NULL, the actual length of the
148 character is set to the address ACTUAL_LEN.
149
150 Use macro `STRING_CHAR (STR, LEN)' instead of calling this function
151 directly if STR can hold an ASCII character. */
152
153string_to_non_ascii_char (str, len, actual_len)
154 unsigned char *str;
155 int len, *actual_len;
156{
157 int charset;
158 unsigned char c1, c2;
159 register int c;
160
161 if (SPLIT_STRING (str, len, charset, c1, c2) == CHARSET_ASCII)
162 {
163 if (actual_len)
164 *actual_len = 1;
165 return (int) *str;
166 }
167
168 c = MAKE_NON_ASCII_CHAR (charset, c1, c2);
169
170 if (actual_len)
171 *actual_len = (charset == CHARSET_COMPOSITION
172 ? cmpchar_table[COMPOSITE_CHAR_ID (c)]->len
173 : BYTES_BY_CHAR_HEAD (*str));
174 return c;
175}
176
177/* Return the length of the multi-byte form at string STR of length LEN. */
178int
179multibyte_form_length (str, len)
180 unsigned char *str;
181 int len;
182{
183 int charset;
184 unsigned char c1, c2;
185 register int c;
186
187 if (SPLIT_STRING (str, len, charset, c1, c2) == CHARSET_ASCII)
188 return 1;
189
190 return (charset == CHARSET_COMPOSITION
191 ? cmpchar_table[(c1 << 7) | c2]->len
192 : BYTES_BY_CHAR_HEAD (*str));
193}
194
195/* Check if string STR of length LEN contains valid multi-byte form of
196 a character. If valid, charset and position codes of the character
197 is set at *CHARSET, *C1, and *C2, and return 0. If not valid,
198 return -1. This should be used only in the macro SPLIT_STRING
199 which checks range of STR in advance. */
200
201split_non_ascii_string (str, len, charset, c1, c2)
202 register unsigned char *str, *c1, *c2;
203 register int len, *charset;
204{
205 register unsigned int cs = *str++;
206
207 if (cs == LEADING_CODE_COMPOSITION)
208 {
209 int cmpchar_id = str_cmpchar_id (str - 1, len);
210
211 if (cmpchar_id < 0)
212 return -1;
213 *charset = cs, *c1 = cmpchar_id >> 7, *c2 = cmpchar_id & 0x7F;
214 }
215 else if ((cs < LEADING_CODE_PRIVATE_11 || (cs = *str++) >= 0xA0)
216 && CHARSET_DEFINED_P (cs))
217 {
218 *charset = cs;
219 if (*str < 0xA0)
220 return -1;
221 *c1 = (*str++) & 0x7F;
222 if (CHARSET_DIMENSION (cs) == 2)
223 {
224 if (*str < 0xA0)
225 return -1;
226 *c2 = (*str++) & 0x7F;
227 }
228 }
229 else
230 return -1;
231 return 0;
232}
233
23d2a7f1
KH
234/* Return a character unified with C (or a character made of CHARSET,
235 C1, and C2) in unification table TABLE. If no unification is found
236 in TABLE, return C. */
237unify_char (table, c, charset, c1, c2)
238 Lisp_Object table;
239 int c, charset, c1, c2;
240{
241 Lisp_Object ch;
242 int alt_charset, alt_c1, alt_c2, dimension;
243
244 if (c < 0) c = MAKE_CHAR (charset, c1, c2);
245 if (!CHAR_TABLE_P (table)
246 || (ch = Faref (table, make_number (c)), !INTEGERP (ch))
247 || XINT (ch) < 0)
248 return c;
249
250 SPLIT_CHAR (XFASTINT (ch), alt_charset, alt_c1, alt_c2);
251 dimension = CHARSET_DIMENSION (alt_charset);
252 if (dimension == 1 && alt_c1 > 0 || dimension == 2 && alt_c2 > 0)
253 /* CH is not a generic character, just return it. */
254 return XFASTINT (ch);
255
256 /* Since CH is a generic character, we must return a specific
257 charater which has the same position codes as C from CH. */
258 if (charset < 0)
259 SPLIT_CHAR (c, charset, c1, c2);
260 if (dimension != CHARSET_DIMENSION (charset))
261 /* We can't make such a character because of dimension mismatch. */
262 return c;
263 if (!alt_c1) alt_c1 = c1;
264 if (!alt_c2) alt_c2 = c2;
265 return MAKE_CHAR (alt_charset, c1, c2);
266}
267
4ed46869
KH
268/* Update the table Vcharset_table with the given arguments (see the
269 document of `define-charset' for the meaning of each argument).
270 Several other table contents are also updated. The caller should
271 check the validity of CHARSET-ID and the remaining arguments in
272 advance. */
273
274void
275update_charset_table (charset_id, dimension, chars, width, direction,
276 iso_final_char, iso_graphic_plane,
277 short_name, long_name, description)
278 Lisp_Object charset_id, dimension, chars, width, direction;
279 Lisp_Object iso_final_char, iso_graphic_plane;
280 Lisp_Object short_name, long_name, description;
281{
282 int charset = XINT (charset_id);
283 int bytes;
284 unsigned char leading_code_base, leading_code_ext;
285
6dc0722d
KH
286 if (NILP (CHARSET_TABLE_ENTRY (charset)))
287 CHARSET_TABLE_ENTRY (charset)
288 = Fmake_vector (make_number (CHARSET_MAX_IDX), Qnil);
4ed46869
KH
289
290 /* Get byte length of multibyte form, base leading-code, and
291 extended leading-code of the charset. See the comment under the
292 title "GENERAL NOTE on CHARACTER SET (CHARSET)" in charset.h. */
293 bytes = XINT (dimension);
294 if (charset < MIN_CHARSET_PRIVATE_DIMENSION1)
295 {
296 /* Official charset, it doesn't have an extended leading-code. */
297 if (charset != CHARSET_ASCII)
298 bytes += 1; /* For a base leading-code. */
299 leading_code_base = charset;
300 leading_code_ext = 0;
301 }
302 else
303 {
304 /* Private charset. */
305 bytes += 2; /* For base and extended leading-codes. */
306 leading_code_base
307 = (charset < LEADING_CODE_EXT_12
308 ? LEADING_CODE_PRIVATE_11
309 : (charset < LEADING_CODE_EXT_21
310 ? LEADING_CODE_PRIVATE_12
311 : (charset < LEADING_CODE_EXT_22
312 ? LEADING_CODE_PRIVATE_21
313 : LEADING_CODE_PRIVATE_22)));
314 leading_code_ext = charset;
315 }
316
317 CHARSET_TABLE_INFO (charset, CHARSET_ID_IDX) = charset_id;
318 CHARSET_TABLE_INFO (charset, CHARSET_BYTES_IDX) = make_number (bytes);
319 CHARSET_TABLE_INFO (charset, CHARSET_DIMENSION_IDX) = dimension;
320 CHARSET_TABLE_INFO (charset, CHARSET_CHARS_IDX) = chars;
321 CHARSET_TABLE_INFO (charset, CHARSET_WIDTH_IDX) = width;
322 CHARSET_TABLE_INFO (charset, CHARSET_DIRECTION_IDX) = direction;
323 CHARSET_TABLE_INFO (charset, CHARSET_LEADING_CODE_BASE_IDX)
324 = make_number (leading_code_base);
325 CHARSET_TABLE_INFO (charset, CHARSET_LEADING_CODE_EXT_IDX)
326 = make_number (leading_code_ext);
327 CHARSET_TABLE_INFO (charset, CHARSET_ISO_FINAL_CHAR_IDX) = iso_final_char;
328 CHARSET_TABLE_INFO (charset, CHARSET_ISO_GRAPHIC_PLANE_IDX)
329 = iso_graphic_plane;
330 CHARSET_TABLE_INFO (charset, CHARSET_SHORT_NAME_IDX) = short_name;
331 CHARSET_TABLE_INFO (charset, CHARSET_LONG_NAME_IDX) = long_name;
332 CHARSET_TABLE_INFO (charset, CHARSET_DESCRIPTION_IDX) = description;
333 CHARSET_TABLE_INFO (charset, CHARSET_PLIST_IDX) = Qnil;
334
335 {
336 /* If we have already defined a charset which has the same
337 DIMENSION, CHARS and ISO-FINAL-CHAR but the different
338 DIRECTION, we must update the entry REVERSE-CHARSET of both
339 charsets. If there's no such charset, the value of the entry
340 is set to nil. */
341 int i;
342
513ee442 343 for (i = 0; i <= MAX_CHARSET; i++)
4ed46869
KH
344 if (!NILP (CHARSET_TABLE_ENTRY (i)))
345 {
346 if (CHARSET_DIMENSION (i) == XINT (dimension)
347 && CHARSET_CHARS (i) == XINT (chars)
348 && CHARSET_ISO_FINAL_CHAR (i) == XINT (iso_final_char)
349 && CHARSET_DIRECTION (i) != XINT (direction))
350 {
351 CHARSET_TABLE_INFO (charset, CHARSET_REVERSE_CHARSET_IDX)
352 = make_number (i);
353 CHARSET_TABLE_INFO (i, CHARSET_REVERSE_CHARSET_IDX) = charset_id;
354 break;
355 }
356 }
513ee442 357 if (i > MAX_CHARSET)
4ed46869
KH
358 /* No such a charset. */
359 CHARSET_TABLE_INFO (charset, CHARSET_REVERSE_CHARSET_IDX)
360 = make_number (-1);
361 }
362
363 if (charset != CHARSET_ASCII
364 && charset < MIN_CHARSET_PRIVATE_DIMENSION1)
365 {
366 /* Update tables bytes_by_char_head and width_by_char_head. */
367 bytes_by_char_head[leading_code_base] = bytes;
368 width_by_char_head[leading_code_base] = XINT (width);
369
370 /* Update table emacs_code_class. */
371 emacs_code_class[charset] = (bytes == 2
372 ? EMACS_leading_code_2
373 : (bytes == 3
374 ? EMACS_leading_code_3
375 : EMACS_leading_code_4));
376 }
377
378 /* Update table iso_charset_table. */
379 if (ISO_CHARSET_TABLE (dimension, chars, iso_final_char) < 0)
380 ISO_CHARSET_TABLE (dimension, chars, iso_final_char) = charset;
381}
382
383#ifdef emacs
384
385/* Return charset id of CHARSET_SYMBOL, or return -1 if CHARSET_SYMBOL
386 is invalid. */
387int
388get_charset_id (charset_symbol)
389 Lisp_Object charset_symbol;
390{
391 Lisp_Object val;
392 int charset;
393
394 return ((SYMBOLP (charset_symbol)
395 && (val = Fget (charset_symbol, Qcharset), VECTORP (val))
396 && (charset = XINT (XVECTOR (val)->contents[CHARSET_ID_IDX]),
397 CHARSET_VALID_P (charset)))
398 ? charset : -1);
399}
400
401/* Return an identification number for a new private charset of
402 DIMENSION and WIDTH. If there's no more room for the new charset,
403 return 0. */
404Lisp_Object
405get_new_private_charset_id (dimension, width)
406 int dimension, width;
407{
408 int charset, from, to;
409
410 if (dimension == 1)
411 {
412 if (width == 1)
413 from = LEADING_CODE_EXT_11, to = LEADING_CODE_EXT_12;
414 else
415 from = LEADING_CODE_EXT_12, to = LEADING_CODE_EXT_21;
416 }
417 else
418 {
419 if (width == 1)
420 from = LEADING_CODE_EXT_21, to = LEADING_CODE_EXT_22;
421 else
422 from = LEADING_CODE_EXT_22, to = LEADING_CODE_EXT_MAX - 1;
423 }
424
425 for (charset = from; charset < to; charset++)
426 if (!CHARSET_DEFINED_P (charset)) break;
427
428 return make_number (charset < to ? charset : 0);
429}
430
431DEFUN ("define-charset", Fdefine_charset, Sdefine_charset, 3, 3, 0,
432 "Define CHARSET-ID as the identification number of CHARSET with INFO-VECTOR.\n\
23d2a7f1 433If CHARSET-ID is nil, it is decided automatically, which means CHARSET is\n\
4ed46869
KH
434 treated as a private charset.\n\
435INFO-VECTOR is a vector of the format:\n\
436 [DIMENSION CHARS WIDTH DIRECTION ISO-FINAL-CHAR ISO-GRAPHIC-PLANE\n\
437 SHORT-NAME LONG-NAME DESCRIPTION]\n\
438The meanings of each elements is as follows:\n\
439DIMENSION (integer) is the number of bytes to represent a character: 1 or 2.\n\
440CHARS (integer) is the number of characters in a dimension: 94 or 96.\n\
441WIDTH (integer) is the number of columns a character in the charset\n\
442occupies on the screen: one of 0, 1, and 2.\n\
443\n\
444DIRECTION (integer) is the rendering direction of characters in the\n\
445charset when rendering. If 0, render from right to left, else\n\
446render from left to right.\n\
447\n\
448ISO-FINAL-CHAR (character) is the final character of the\n\
449corresponding ISO 2022 charset.\n\
450\n\
451ISO-GRAPHIC-PLANE (integer) is the graphic plane to be invoked\n\
452while encoding to variants of ISO 2022 coding system, one of the\n\
453following: 0/graphic-plane-left(GL), 1/graphic-plane-right(GR).\n\
454\n\
455SHORT-NAME (string) is the short name to refer to the charset.\n\
456\n\
457LONG-NAME (string) is the long name to refer to the charset.\n\
458\n\
459DESCRIPTION (string) is the description string of the charset.")
460 (charset_id, charset_symbol, info_vector)
461 Lisp_Object charset_id, charset_symbol, info_vector;
462{
463 Lisp_Object *vec;
464
465 if (!NILP (charset_id))
466 CHECK_NUMBER (charset_id, 0);
467 CHECK_SYMBOL (charset_symbol, 1);
468 CHECK_VECTOR (info_vector, 2);
469
470 if (! NILP (charset_id))
471 {
472 if (! CHARSET_VALID_P (XINT (charset_id)))
473 error ("Invalid CHARSET: %d", XINT (charset_id));
474 else if (CHARSET_DEFINED_P (XINT (charset_id)))
475 error ("Already defined charset: %d", XINT (charset_id));
476 }
477
478 vec = XVECTOR (info_vector)->contents;
479 if (XVECTOR (info_vector)->size != 9
480 || !INTEGERP (vec[0]) || !(XINT (vec[0]) == 1 || XINT (vec[0]) == 2)
481 || !INTEGERP (vec[1]) || !(XINT (vec[1]) == 94 || XINT (vec[1]) == 96)
482 || !INTEGERP (vec[2]) || !(XINT (vec[2]) == 1 || XINT (vec[2]) == 2)
483 || !INTEGERP (vec[3]) || !(XINT (vec[3]) == 0 || XINT (vec[3]) == 1)
484 || !INTEGERP (vec[4]) || !(XINT (vec[4]) >= '0' && XINT (vec[4]) <= '~')
485 || !INTEGERP (vec[5]) || !(XINT (vec[5]) == 0 || XINT (vec[5]) == 1)
486 || !STRINGP (vec[6])
487 || !STRINGP (vec[7])
488 || !STRINGP (vec[8]))
489 error ("Invalid info-vector argument for defining charset %s",
490 XSYMBOL (charset_symbol)->name->data);
491
492 if (NILP (charset_id))
493 {
494 charset_id = get_new_private_charset_id (XINT (vec[0]), XINT (vec[2]));
495 if (XINT (charset_id) == 0)
496 error ("There's no room for a new private charset %s",
497 XSYMBOL (charset_symbol)->name->data);
498 }
499
500 update_charset_table (charset_id, vec[0], vec[1], vec[2], vec[3],
501 vec[4], vec[5], vec[6], vec[7], vec[8]);
6dc0722d 502 Fput (charset_symbol, Qcharset, CHARSET_TABLE_ENTRY (XINT (charset_id)));
4ed46869
KH
503 CHARSET_SYMBOL (XINT (charset_id)) = charset_symbol;
504 Vcharset_list = Fcons (charset_symbol, Vcharset_list);
505 return Qnil;
506}
507
508DEFUN ("declare-equiv-charset", Fdeclare_equiv_charset, Sdeclare_equiv_charset,
509 4, 4, 0,
510 "Declare a charset of DIMENSION, CHARS, FINAL-CHAR is the same as CHARSET.\n\
511CHARSET should be defined by `defined-charset' in advance.")
512 (dimension, chars, final_char, charset_symbol)
513 Lisp_Object dimension, chars, final_char, charset_symbol;
514{
515 int charset;
516
517 CHECK_NUMBER (dimension, 0);
518 CHECK_NUMBER (chars, 1);
519 CHECK_NUMBER (final_char, 2);
520 CHECK_SYMBOL (charset_symbol, 3);
521
522 if (XINT (dimension) != 1 && XINT (dimension) != 2)
523 error ("Invalid DIMENSION %d, it should be 1 or 2", XINT (dimension));
524 if (XINT (chars) != 94 && XINT (chars) != 96)
525 error ("Invalid CHARS %d, it should be 94 or 96", XINT (chars));
526 if (XINT (final_char) < '0' || XFASTINT (final_char) > '~')
527 error ("Invalid FINAL-CHAR %c, it should be `0'..`~'", XINT (chars));
528 if ((charset = get_charset_id (charset_symbol)) < 0)
529 error ("Invalid charset %s", XSYMBOL (charset_symbol)->name->data);
530
531 ISO_CHARSET_TABLE (dimension, chars, final_char) = charset;
532 return Qnil;
533}
534
535/* Return number of different charsets in STR of length LEN. In
536 addition, for each found charset N, CHARSETS[N] is set 1. The
23d2a7f1
KH
537 caller should allocate CHARSETS (MAX_CHARSET + 1 bytes) in advance.
538 It may lookup a unification table TABLE if supplied. */
4ed46869
KH
539
540int
23d2a7f1 541find_charset_in_str (str, len, charsets, table)
4ed46869
KH
542 unsigned char *str, *charsets;
543 int len;
23d2a7f1 544 Lisp_Object table;
4ed46869
KH
545{
546 int num = 0;
547
23d2a7f1
KH
548 if (! CHAR_TABLE_P (table))
549 table = Qnil;
550
4ed46869
KH
551 while (len > 0)
552 {
553 int bytes = BYTES_BY_CHAR_HEAD (*str);
23d2a7f1
KH
554 int charset;
555
556 if (NILP (table))
557 charset = CHARSET_AT (str);
558 else
559 {
560 int c, charset;
561 unsigned char c1, c2;
562
563 SPLIT_STRING(str, bytes, charset, c1, c2);
564 if ((c = unify_char (table, -1, charset, c1, c2)) >= 0)
565 charset = CHAR_CHARSET (c);
566 }
4ed46869
KH
567
568 if (!charsets[charset])
569 {
570 charsets[charset] = 1;
571 num += 1;
572 }
573 str += bytes;
574 len -= bytes;
575 }
576 return num;
577}
578
579DEFUN ("find-charset-region", Ffind_charset_region, Sfind_charset_region,
23d2a7f1 580 2, 3, 0,
4ed46869 581 "Return a list of charsets in the region between BEG and END.\n\
23d2a7f1
KH
582BEG and END are buffer positions.\n\
583Optional arg TABLE if non-nil is a unification table to look up.")
584 (beg, end, table)
585 Lisp_Object beg, end, table;
4ed46869 586{
513ee442 587 char charsets[MAX_CHARSET + 1];
4ed46869
KH
588 int from, to, stop, i;
589 Lisp_Object val;
590
591 validate_region (&beg, &end);
592 from = XFASTINT (beg);
593 stop = to = XFASTINT (end);
594 if (from < GPT && GPT < to)
595 stop = GPT;
513ee442 596 bzero (charsets, MAX_CHARSET + 1);
4ed46869
KH
597 while (1)
598 {
23d2a7f1 599 find_charset_in_str (POS_ADDR (from), stop - from, charsets, table);
4ed46869
KH
600 if (stop < to)
601 from = stop, stop = to;
602 else
603 break;
604 }
605 val = Qnil;
513ee442 606 for (i = MAX_CHARSET; i >= 0; i--)
4ed46869
KH
607 if (charsets[i])
608 val = Fcons (CHARSET_SYMBOL (i), val);
609 return val;
610}
611
612DEFUN ("find-charset-string", Ffind_charset_string, Sfind_charset_string,
23d2a7f1
KH
613 1, 2, 0,
614 "Return a list of charsets in STR.\n\
615Optional arg TABLE if non-nil is a unification table to look up.")
616 (str, table)
617 Lisp_Object str, table;
4ed46869 618{
513ee442 619 char charsets[MAX_CHARSET + 1];
4ed46869
KH
620 int i;
621 Lisp_Object val;
622
623 CHECK_STRING (str, 0);
513ee442 624 bzero (charsets, MAX_CHARSET + 1);
23d2a7f1
KH
625 find_charset_in_str (XSTRING (str)->data, XSTRING (str)->size,
626 charsets, table);
4ed46869 627 val = Qnil;
513ee442 628 for (i = MAX_CHARSET; i >= 0; i--)
4ed46869
KH
629 if (charsets[i])
630 val = Fcons (CHARSET_SYMBOL (i), val);
631 return val;
632}
633\f
634DEFUN ("make-char-internal", Fmake_char_internal, Smake_char_internal, 1, 3, 0,
513ee442 635 "")
4ed46869
KH
636 (charset, code1, code2)
637 Lisp_Object charset, code1, code2;
638{
639 CHECK_NUMBER (charset, 0);
640
641 if (NILP (code1))
642 XSETFASTINT (code1, 0);
643 else
644 CHECK_NUMBER (code1, 1);
645 if (NILP (code2))
646 XSETFASTINT (code2, 0);
647 else
648 CHECK_NUMBER (code2, 2);
649
650 if (!CHARSET_DEFINED_P (XINT (charset)))
651 error ("Invalid charset: %d", XINT (charset));
652
653 return make_number (MAKE_CHAR (XINT (charset), XINT (code1), XINT (code2)));
654}
655
656DEFUN ("split-char", Fsplit_char, Ssplit_char, 1, 1, 0,
657 "Return list of charset and one or two position-codes of CHAR.")
658 (ch)
659 Lisp_Object ch;
660{
661 Lisp_Object val;
6dc0722d 662 int charset, c1, c2;
4ed46869
KH
663
664 CHECK_NUMBER (ch, 0);
665 SPLIT_CHAR (XFASTINT (ch), charset, c1, c2);
6dc0722d 666 return (c2 >= 0
4ed46869
KH
667 ? Fcons (CHARSET_SYMBOL (charset),
668 Fcons (make_number (c1), Fcons (make_number (c2), Qnil)))
669 : Fcons (CHARSET_SYMBOL (charset), Fcons (make_number (c1), Qnil)));
670}
671
672DEFUN ("char-charset", Fchar_charset, Schar_charset, 1, 1, 0,
673 "Return charset of CHAR.")
674 (ch)
675 Lisp_Object ch;
676{
677 CHECK_NUMBER (ch, 0);
678
679 return CHARSET_SYMBOL (CHAR_CHARSET (XINT (ch)));
680}
681
682DEFUN ("iso-charset", Fiso_charset, Siso_charset, 3, 3, 0,
683 "Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR.")
684 (dimension, chars, final_char)
685 Lisp_Object dimension, chars, final_char;
686{
687 int charset;
688
689 CHECK_NUMBER (dimension, 0);
690 CHECK_NUMBER (chars, 1);
691 CHECK_NUMBER (final_char, 2);
692
693 if ((charset = ISO_CHARSET_TABLE (dimension, chars, final_char)) < 0)
694 return Qnil;
695 return CHARSET_SYMBOL (charset);
696}
697
698DEFUN ("char-bytes", Fchar_bytes, Schar_bytes, 1, 1, 0,
699 "Return byte length of multi-byte form of CHAR.")
700 (ch)
701 Lisp_Object ch;
702{
703 Lisp_Object val;
704 int bytes;
705
706 CHECK_NUMBER (ch, 0);
707 if (COMPOSITE_CHAR_P (XFASTINT (ch)))
708 {
709 unsigned int id = COMPOSITE_CHAR_ID (XFASTINT (ch));
710
711 bytes = (id < n_cmpchars ? cmpchar_table[id]->len : 1);
712 }
713 else
714 {
715 int charset = CHAR_CHARSET (XFASTINT (ch));
716
717 bytes = CHARSET_DEFINED_P (charset) ? CHARSET_BYTES (charset) : 1;
718 }
719
720 XSETFASTINT (val, bytes);
721 return val;
722}
723
724/* Return the width of character of which multi-byte form starts with
725 C. The width is measured by how many columns occupied on the
726 screen when displayed in the current buffer. */
727
728#define ONE_BYTE_CHAR_WIDTH(c) \
729 (c < 0x20 \
730 ? (c == '\t' \
53316e55 731 ? XFASTINT (current_buffer->tab_width) \
4ed46869
KH
732 : (c == '\n' ? 0 : (NILP (current_buffer->ctl_arrow) ? 4 : 2))) \
733 : (c < 0x7f \
734 ? 1 \
735 : (c == 0x7F \
736 ? (NILP (current_buffer->ctl_arrow) ? 4 : 2) \
737 : ((! NILP (current_buffer->enable_multibyte_characters) \
738 && BASE_LEADING_CODE_P (c)) \
739 ? WIDTH_BY_CHAR_HEAD (c) \
740 : 4)))) \
741
742
743DEFUN ("char-width", Fchar_width, Schar_width, 1, 1, 0,
744 "Return width of CHAR when displayed in the current buffer.\n\
745The width is measured by how many columns it occupies on the screen.")
746 (ch)
747 Lisp_Object ch;
748{
859f2b3c 749 Lisp_Object val, disp;
4ed46869
KH
750 int c;
751
752 CHECK_NUMBER (ch, 0);
753
859f2b3c
RS
754 c = XINT (ch);
755
756 /* Get the way the display table would display it. */
757 disp = DISP_CHAR_VECTOR (buffer_display_table (current_buffer), (c));
758
759 if (VECTORP (disp))
760 XSETINT (val, XVECTOR (disp)->size);
761 else if (SINGLE_BYTE_CHAR_P (c))
762 XSETINT (val, ONE_BYTE_CHAR_WIDTH (c));
4ed46869
KH
763 else if (COMPOSITE_CHAR_P (c))
764 {
765 int id = COMPOSITE_CHAR_ID (XFASTINT (ch));
766 XSETFASTINT (val, (id < n_cmpchars ? cmpchar_table[id]->width : 0));
767 }
768 else
769 {
770 int charset = CHAR_CHARSET (c);
771
772 XSETFASTINT (val, CHARSET_WIDTH (charset));
773 }
774 return val;
775}
776
777/* Return width of string STR of length LEN when displayed in the
778 current buffer. The width is measured by how many columns it
779 occupies on the screen. */
859f2b3c 780
4ed46869
KH
781int
782strwidth (str, len)
783 unsigned char *str;
784 int len;
785{
786 unsigned char *endp = str + len;
787 int width = 0;
859f2b3c 788 struct Lisp_Char_Table *dp = buffer_display_table (current_buffer);
4ed46869 789
859f2b3c
RS
790 while (str < endp)
791 {
792 if (*str == LEADING_CODE_COMPOSITION)
793 {
794 int id = str_cmpchar_id (str, endp - str);
795
796 if (id < 0)
797 {
798 width += 4;
799 str++;
800 }
801 else
802 {
803 width += cmpchar_table[id]->width;
804 str += cmpchar_table[id]->len;
805 }
806 }
807 else
808 {
809 Lisp_Object disp;
810 int thiswidth;
811 int c = STRING_CHAR (str, endp - str);
812
813 /* Get the way the display table would display it. */
acc35c36
RS
814 if (dp)
815 disp = DISP_CHAR_VECTOR (dp, c);
816 else
817 disp = Qnil;
859f2b3c
RS
818
819 if (VECTORP (disp))
820 thiswidth = XVECTOR (disp)->size;
821 else
822 thiswidth = ONE_BYTE_CHAR_WIDTH (*str);
823
824 width += thiswidth;
825 str += BYTES_BY_CHAR_HEAD (*str);
826 }
827 }
4ed46869
KH
828 return width;
829}
830
831DEFUN ("string-width", Fstring_width, Sstring_width, 1, 1, 0,
832 "Return width of STRING when displayed in the current buffer.\n\
833Width is measured by how many columns it occupies on the screen.\n\
834When calculating width of a multi-byte character in STRING,\n\
835 only the base leading-code is considered and the validity of\n\
836 the following bytes are not checked.")
837 (str)
838 Lisp_Object str;
839{
840 Lisp_Object val;
841
842 CHECK_STRING (str, 0);
843 XSETFASTINT (val, strwidth (XSTRING (str)->data, XSTRING (str)->size));
844 return val;
845}
846
847DEFUN ("char-direction", Fchar_direction, Schar_direction, 1, 1, 0,
848 "Return the direction of CHAR.\n\
849The returned value is 0 for left-to-right and 1 for right-to-left.")
850 (ch)
851 Lisp_Object ch;
852{
853 int charset;
854
855 CHECK_NUMBER (ch, 0);
856 charset = CHAR_CHARSET (XFASTINT (ch));
857 if (!CHARSET_DEFINED_P (charset))
858 error ("Invalid character: %d", XINT (ch));
859 return CHARSET_TABLE_INFO (charset, CHARSET_DIRECTION_IDX);
860}
861
862DEFUN ("chars-in-string", Fchars_in_string, Schars_in_string, 1, 1, 0,
863 "Return number of characters in STRING.")
864 (str)
865 Lisp_Object str;
866{
867 Lisp_Object val;
868 unsigned char *p, *endp;
869 int chars;
870
871 CHECK_STRING (str, 0);
872
873 p = XSTRING (str)->data; endp = p + XSTRING (str)->size;
874 chars = 0;
875 while (p < endp)
876 {
877 if (*p == LEADING_CODE_COMPOSITION)
878 {
879 p++;
880 while (p < endp && ! CHAR_HEAD_P (p)) p++;
881 }
882 else
883 p += BYTES_BY_CHAR_HEAD (*p);
884 chars++;
885 }
886
887 XSETFASTINT (val, chars);
888 return val;
889}
890
891DEFUN ("char-boundary-p", Fchar_boundary_p, Schar_boundary_p, 1, 1, 0,
892 "Return non-nil value if POS is at character boundary of multibyte form.\n\
893The return value is:\n\
894 0 if POS is at an ASCII character or at the end of range,\n\
895 1 if POS is at a head of 2-byte length multi-byte form,\n\
896 2 if POS is at a head of 3-byte length multi-byte form,\n\
897 3 if POS is at a head of 4-byte length multi-byte form,\n\
898 4 if POS is at a head of multi-byte form of a composite character.\n\
899If POS is out of range or not at character boundary, return NIL.")
900 (pos)
901 Lisp_Object pos;
902{
903 Lisp_Object val;
904 int n;
905
906 CHECK_NUMBER_COERCE_MARKER (pos, 0);
907
908 n = XINT (pos);
909 if (n < BEGV || n > ZV)
910 return Qnil;
911
912 if (n == ZV || NILP (current_buffer->enable_multibyte_characters))
913 XSETFASTINT (val, 0);
914 else
915 {
916 unsigned char *p = POS_ADDR (n);
917
918 if (SINGLE_BYTE_CHAR_P (*p))
919 XSETFASTINT (val, 0);
920 else if (*p == LEADING_CODE_COMPOSITION)
921 XSETFASTINT (val, 4);
922 else if (BYTES_BY_CHAR_HEAD (*p) > 1)
923 XSETFASTINT (val, BYTES_BY_CHAR_HEAD (*p) - 1);
924 else
925 val = Qnil;
926 }
927 return val;
928}
929
930DEFUN ("concat-chars", Fconcat_chars, Sconcat_chars, 1, MANY, 0,
931 "Concatenate all the argument characters and make the result a string.")
53316e55
KH
932 (n, args)
933 int n;
4ed46869
KH
934 Lisp_Object *args;
935{
53316e55 936 int i;
4ed46869 937 unsigned char *buf
bd4c6dd0 938 = (unsigned char *) alloca (MAX_LENGTH_OF_MULTI_BYTE_FORM * n);
4ed46869
KH
939 unsigned char *p = buf;
940 Lisp_Object val;
941
942 for (i = 0; i < n; i++)
943 {
944 int c, len;
945 unsigned char *str;
946
947 if (!INTEGERP (args[i]))
948 {
949 free (buf);
950 CHECK_NUMBER (args[i], 0);
951 }
952 c = XINT (args[i]);
953 len = CHAR_STRING (c, p, str);
954 if (p != str)
955 /* C is a composite character. */
956 bcopy (str, p, len);
957 p += len;
958 }
959
960 val = make_string (buf, p - buf);
4ed46869
KH
961 return val;
962}
963
964#endif /* emacs */
965\f
966/*** Composite characters staffs ***/
967
968/* Each composite character is identified by CMPCHAR-ID which is
969 assigned when Emacs needs the character code of the composite
970 character (e.g. when displaying it on the screen). See the
971 document "GENERAL NOTE on COMPOSITE CHARACTER" in `charset.h' how a
972 composite character is represented in Emacs. */
973
974/* If `static' is defined, it means that it is defined to null string. */
975#ifndef static
976/* The following function is copied from lread.c. */
977static int
978hash_string (ptr, len)
979 unsigned char *ptr;
980 int len;
981{
982 register unsigned char *p = ptr;
983 register unsigned char *end = p + len;
984 register unsigned char c;
985 register int hash = 0;
986
987 while (p != end)
988 {
989 c = *p++;
990 if (c >= 0140) c -= 40;
991 hash = ((hash<<3) + (hash>>28) + c);
992 }
993 return hash & 07777777777;
994}
995#endif
996
4ed46869
KH
997#define CMPCHAR_HASH_TABLE_SIZE 0xFFF
998
999static int *cmpchar_hash_table[CMPCHAR_HASH_TABLE_SIZE];
1000
1001/* Each element of `cmpchar_hash_table' is a pointer to an array of
1002 integer, where the 1st element is the size of the array, the 2nd
1003 element is how many elements are actually used in the array, and
1004 the remaining elements are CMPCHAR-IDs of composite characters of
1005 the same hash value. */
1006#define CMPCHAR_HASH_SIZE(table) table[0]
1007#define CMPCHAR_HASH_USED(table) table[1]
1008#define CMPCHAR_HASH_CMPCHAR_ID(table, i) table[i]
1009
1010/* Return CMPCHAR-ID of the composite character in STR of the length
1011 LEN. If the composite character has not yet been registered,
1012 register it in `cmpchar_table' and assign new CMPCHAR-ID. This
1013 is the sole function for assigning CMPCHAR-ID. */
1014int
1015str_cmpchar_id (str, len)
1016 unsigned char *str;
1017 int len;
1018{
1019 int hash_idx, *hashp;
1020 unsigned char *buf;
1021 int embedded_rule; /* 1 if composition rule is embedded. */
1022 int chars; /* number of components. */
1023 int i;
1024 struct cmpchar_info *cmpcharp;
1025
1026 if (len < 5)
1027 /* Any composite char have at least 3-byte length. */
1028 return -1;
1029
1030 /* The second byte 0xFF means compostion rule is embedded. */
1031 embedded_rule = (str[1] == 0xFF);
1032
1033 /* At first, get the actual length of the composite character. */
1034 {
1035 unsigned char *p, *endp = str + 1, *lastp = str + len;
1036 int bytes;
1037
1038 while (endp < lastp && ! CHAR_HEAD_P (endp)) endp++;
1039 chars = 0;
1040 p = str + 1 + embedded_rule;
1041 while (p < endp)
1042 {
1043 /* No need of checking if *P is 0xA0 because
1044 BYTES_BY_CHAR_HEAD (0x80) surely returns 2. */
1045 p += (bytes = BYTES_BY_CHAR_HEAD (*p - 0x20) + embedded_rule);
1046 chars++;
1047 }
1048 len = (p -= embedded_rule) - str;
1049 if (p > endp)
1050 len -= - bytes, chars--;
1051
1052 if (chars < 2 || chars > MAX_COMPONENT_COUNT)
1053 /* Invalid number of components. */
1054 return -1;
1055 }
1056 hash_idx = hash_string (str, len) % CMPCHAR_HASH_TABLE_SIZE;
1057 hashp = cmpchar_hash_table[hash_idx];
1058
1059 /* Then, look into the hash table. */
1060 if (hashp != NULL)
1061 /* Find the correct one among composite characters of the same
1062 hash value. */
1063 for (i = 2; i < CMPCHAR_HASH_USED (hashp); i++)
1064 {
1065 cmpcharp = cmpchar_table[CMPCHAR_HASH_CMPCHAR_ID (hashp, i)];
1066 if (len == cmpcharp->len
1067 && ! bcmp (str, cmpcharp->data, len))
1068 return CMPCHAR_HASH_CMPCHAR_ID (hashp, i);
1069 }
1070
1071 /* We have to register the composite character in cmpchar_table. */
513ee442
KH
1072 if (n_cmpchars > (CHAR_FIELD2_MASK | CHAR_FIELD3_MASK))
1073 /* No, we have no more room for a new composite character. */
1074 return -1;
1075
4ed46869
KH
1076 /* Make the entry in hash table. */
1077 if (hashp == NULL)
1078 {
1079 /* Make a table for 8 composite characters initially. */
1080 hashp = (cmpchar_hash_table[hash_idx]
1081 = (int *) xmalloc (sizeof (int) * (2 + 8)));
1082 CMPCHAR_HASH_SIZE (hashp) = 10;
1083 CMPCHAR_HASH_USED (hashp) = 2;
1084 }
1085 else if (CMPCHAR_HASH_USED (hashp) >= CMPCHAR_HASH_SIZE (hashp))
1086 {
1087 CMPCHAR_HASH_SIZE (hashp) += 8;
1088 hashp = (cmpchar_hash_table[hash_idx]
1089 = (int *) xrealloc (hashp,
1090 sizeof (int) * CMPCHAR_HASH_SIZE (hashp)));
1091 }
1092 CMPCHAR_HASH_CMPCHAR_ID (hashp, CMPCHAR_HASH_USED (hashp)) = n_cmpchars;
1093 CMPCHAR_HASH_USED (hashp)++;
1094
1095 /* Set information of the composite character in cmpchar_table. */
1096 if (cmpchar_table_size == 0)
1097 {
1098 /* This is the first composite character to be registered. */
1099 cmpchar_table_size = 256;
1100 cmpchar_table
1101 = (struct cmpchar_info **) xmalloc (sizeof (cmpchar_table[0])
1102 * cmpchar_table_size);
1103 }
1104 else if (cmpchar_table_size <= n_cmpchars)
1105 {
1106 cmpchar_table_size += 256;
1107 cmpchar_table
1108 = (struct cmpchar_info **) xrealloc (cmpchar_table,
1109 sizeof (cmpchar_table[0])
1110 * cmpchar_table_size);
1111 }
1112
1113 cmpcharp = (struct cmpchar_info *) xmalloc (sizeof (struct cmpchar_info));
1114
1115 cmpcharp->len = len;
1116 cmpcharp->data = (unsigned char *) xmalloc (len + 1);
1117 bcopy (str, cmpcharp->data, len);
1118 cmpcharp->data[len] = 0;
1119 cmpcharp->glyph_len = chars;
1120 cmpcharp->glyph = (GLYPH *) xmalloc (sizeof (GLYPH) * chars);
1121 if (embedded_rule)
1122 {
1123 cmpcharp->cmp_rule = (unsigned char *) xmalloc (chars);
1124 cmpcharp->col_offset = (float *) xmalloc (sizeof (float) * chars);
1125 }
1126 else
1127 {
1128 cmpcharp->cmp_rule = NULL;
1129 cmpcharp->col_offset = NULL;
1130 }
1131
1132 /* Setup GLYPH data and composition rules (if any) so as not to make
1133 them every time on displaying. */
1134 {
1135 unsigned char *bufp;
1136 int width;
1137 float leftmost = 0.0, rightmost = 1.0;
1138
1139 if (embedded_rule)
1140 /* At first, col_offset[N] is set to relative to col_offset[0]. */
1141 cmpcharp->col_offset[0] = 0;
1142
1143 for (i = 0, bufp = cmpcharp->data + 1; i < chars; i++)
1144 {
1145 if (embedded_rule)
1146 cmpcharp->cmp_rule[i] = *bufp++;
1147
1148 if (*bufp == 0xA0) /* This is an ASCII character. */
1149 {
1150 cmpcharp->glyph[i] = FAST_MAKE_GLYPH ((*++bufp & 0x7F), 0);
1151 width = 1;
1152 bufp++;
1153 }
1154 else /* Multibyte character. */
1155 {
1156 /* Make `bufp' point normal multi-byte form temporally. */
1157 *bufp -= 0x20;
1158 cmpcharp->glyph[i]
1159 = FAST_MAKE_GLYPH (string_to_non_ascii_char (bufp, 4, 0), 0);
1160 width = WIDTH_BY_CHAR_HEAD (*bufp);
1161 *bufp += 0x20;
1162 bufp += BYTES_BY_CHAR_HEAD (*bufp - 0x20);
1163 }
1164
1165 if (embedded_rule && i > 0)
1166 {
1167 /* Reference points (global_ref and new_ref) are
1168 encoded as below:
1169
1170 0--1--2 -- ascent
1171 | |
1172 | |
1173 | 4 -+--- center
1174 -- 3 5 -- baseline
1175 | |
1176 6--7--8 -- descent
1177
1178 Now, we calculate the column offset of the new glyph
1179 from the left edge of the first glyph. This can avoid
1180 the same calculation everytime displaying this
1181 composite character. */
1182
1183 /* Reference points of global glyph and new glyph. */
1184 int global_ref = (cmpcharp->cmp_rule[i] - 0xA0) / 9;
1185 int new_ref = (cmpcharp->cmp_rule[i] - 0xA0) % 9;
1186 /* Column offset relative to the first glyph. */
1187 float left = (leftmost
1188 + (global_ref % 3) * (rightmost - leftmost) / 2.0
1189 - (new_ref % 3) * width / 2.0);
1190
1191 cmpcharp->col_offset[i] = left;
1192 if (left < leftmost)
1193 leftmost = left;
1194 if (left + width > rightmost)
1195 rightmost = left + width;
1196 }
1197 else
1198 {
1199 if (width > rightmost)
1200 rightmost = width;
1201 }
1202 }
1203 if (embedded_rule)
1204 {
1205 /* Now col_offset[N] are relative to the left edge of the
1206 first component. Make them relative to the left edge of
1207 overall glyph. */
1208 for (i = 0; i < chars; i++)
1209 cmpcharp->col_offset[i] -= leftmost;
1210 /* Make rightmost holds width of overall glyph. */
1211 rightmost -= leftmost;
1212 }
1213
1214 cmpcharp->width = rightmost;
1215 if (cmpcharp->width < rightmost)
1216 /* To get a ceiling integer value. */
1217 cmpcharp->width++;
1218 }
1219
1220 cmpchar_table[n_cmpchars] = cmpcharp;
1221
1222 return n_cmpchars++;
1223}
1224
1225/* Return the Nth element of the composite character C. */
1226int
1227cmpchar_component (c, n)
1228 unsigned int c, n;
1229{
1230 int id = COMPOSITE_CHAR_ID (c);
1231
1232 if (id >= n_cmpchars /* C is not a valid composite character. */
1233 || n >= cmpchar_table[id]->glyph_len) /* No such component. */
1234 return -1;
1235 /* No face data is stored in glyph code. */
1236 return ((int) (cmpchar_table[id]->glyph[n]));
1237}
1238
1239DEFUN ("cmpcharp", Fcmpcharp, Scmpcharp, 1, 1, 0,
1240 "T if CHAR is a composite character.")
1241 (ch)
1242 Lisp_Object ch;
1243{
1244 CHECK_NUMBER (ch, 0);
1245 return (COMPOSITE_CHAR_P (XINT (ch)) ? Qt : Qnil);
1246}
1247
1248DEFUN ("composite-char-component", Fcmpchar_component, Scmpchar_component,
1249 2, 2, 0,
1250 "Return the IDXth component character of composite character CHARACTER.")
1251 (character, idx)
1252 Lisp_Object character, idx;
1253{
1254 int c;
1255
1256 CHECK_NUMBER (character, 0);
1257 CHECK_NUMBER (idx, 1);
1258
1259 if ((c = cmpchar_component (XINT (character), XINT (idx))) < 0)
1260 args_out_of_range (character, idx);
1261
1262 return make_number (c);
1263}
1264
1265DEFUN ("composite-char-composition-rule", Fcmpchar_cmp_rule, Scmpchar_cmp_rule,
1266 2, 2, 0,
55001746
KH
1267 "Return the Nth composition rule embedded in composite character CHARACTER.\n\
1268The returned rule is for composing the Nth component\n\
1269on the (N-1)th component. If N is 0, the returned value is always 255.")
1270 (character, n)
1271 Lisp_Object character, n;
4ed46869
KH
1272{
1273 int id, i;
1274
1275 CHECK_NUMBER (character, 0);
55001746 1276 CHECK_NUMBER (n, 1);
4ed46869
KH
1277
1278 id = COMPOSITE_CHAR_ID (XINT (character));
1279 if (id < 0 || id >= n_cmpchars)
1280 error ("Invalid composite character: %d", XINT (character));
55001746 1281 i = XINT (n);
4ed46869 1282 if (i > cmpchar_table[id]->glyph_len)
55001746 1283 args_out_of_range (character, n);
4ed46869
KH
1284
1285 return make_number (cmpchar_table[id]->cmp_rule[i]);
1286}
1287
1288DEFUN ("composite-char-composition-rule-p", Fcmpchar_cmp_rule_p,
1289 Scmpchar_cmp_rule_p, 1, 1, 0,
1290 "Return non-nil if composite character CHARACTER contains a embedded rule.")
1291 (character)
1292 Lisp_Object character;
1293{
1294 int id;
1295
1296 CHECK_NUMBER (character, 0);
1297 id = COMPOSITE_CHAR_ID (XINT (character));
1298 if (id < 0 || id >= n_cmpchars)
1299 error ("Invalid composite character: %d", XINT (character));
1300
1301 return (cmpchar_table[id]->cmp_rule ? Qt : Qnil);
1302}
1303
1304DEFUN ("composite-char-component-count", Fcmpchar_cmp_count,
1305 Scmpchar_cmp_count, 1, 1, 0,
1306 "Return number of compoents of composite character CHARACTER.")
1307 (character)
1308 Lisp_Object character;
1309{
1310 int id;
1311
1312 CHECK_NUMBER (character, 0);
1313 id = COMPOSITE_CHAR_ID (XINT (character));
1314 if (id < 0 || id >= n_cmpchars)
1315 error ("Invalid composite character: %d", XINT (character));
1316
1317 return (make_number (cmpchar_table[id]->glyph_len));
1318}
1319
1320DEFUN ("compose-string", Fcompose_string, Scompose_string,
1321 1, 1, 0,
1322 "Return one char string composed from all characters in STRING.")
1323 (str)
1324 Lisp_Object str;
1325{
1326 unsigned char buf[MAX_LENGTH_OF_MULTI_BYTE_FORM], *p, *pend, *ptemp;
1327 int len, i;
1328
1329 CHECK_STRING (str, 0);
1330
1331 buf[0] = LEADING_CODE_COMPOSITION;
1332 p = XSTRING (str)->data;
1333 pend = p + XSTRING (str)->size;
1334 i = 1;
1335 while (p < pend)
1336 {
1337 if (*p < 0x20 || *p == 127) /* control code */
1338 error ("Invalid component character: %d", *p);
1339 else if (*p < 0x80) /* ASCII */
1340 {
1341 if (i + 2 >= MAX_LENGTH_OF_MULTI_BYTE_FORM)
1342 error ("Too long string to be composed: %s", XSTRING (str)->data);
1343 /* Prepend an ASCII charset indicator 0xA0, set MSB of the
1344 code itself. */
1345 buf[i++] = 0xA0;
1346 buf[i++] = *p++ + 0x80;
1347 }
1348 else if (*p == LEADING_CODE_COMPOSITION) /* composite char */
1349 {
1350 /* Already composed. Eliminate the heading
1351 LEADING_CODE_COMPOSITION, keep the remaining bytes
1352 unchanged. */
1353 p++;
1354 ptemp = p;
1355 while (! CHAR_HEAD_P (p)) p++;
1356 if (i + (p - ptemp) >= MAX_LENGTH_OF_MULTI_BYTE_FORM)
1357 error ("Too long string to be composed: %s", XSTRING (str)->data);
1358 bcopy (ptemp, buf + i, p - ptemp);
1359 i += p - ptemp;
1360 }
1361 else /* multibyte char */
1362 {
1363 /* Add 0x20 to the base leading-code, keep the remaining
1364 bytes unchanged. */
1365 len = BYTES_BY_CHAR_HEAD (*p);
1366 if (i + len >= MAX_LENGTH_OF_MULTI_BYTE_FORM)
1367 error ("Too long string to be composed: %s", XSTRING (str)->data);
1368 bcopy (p, buf + i, len);
1369 buf[i] += 0x20;
1370 p += len, i += len;
1371 }
1372 }
1373
1374 if (i < 5)
1375 /* STR contains only one character, which can't be composed. */
1376 error ("Too short string to be composed: %s", XSTRING (str)->data);
1377
1378 return make_string (buf, i);
1379}
1380
1381\f
1382charset_id_internal (charset_name)
1383 char *charset_name;
1384{
1385 Lisp_Object val = Fget (intern (charset_name), Qcharset);
1386
1387 if (!VECTORP (val))
1388 error ("Charset %s is not defined", charset_name);
1389
1390 return (XINT (XVECTOR (val)->contents[0]));
1391}
1392
1393DEFUN ("setup-special-charsets", Fsetup_special_charsets,
1394 Ssetup_special_charsets, 0, 0, 0, "Internal use only.")
1395 ()
1396{
1397 charset_latin_iso8859_1 = charset_id_internal ("latin-iso8859-1");
1398 charset_jisx0208_1978 = charset_id_internal ("japanese-jisx0208-1978");
1399 charset_jisx0208 = charset_id_internal ("japanese-jisx0208");
1400 charset_katakana_jisx0201 = charset_id_internal ("katakana-jisx0201");
1401 charset_latin_jisx0201 = charset_id_internal ("latin-jisx0201");
1402 charset_big5_1 = charset_id_internal ("chinese-big5-1");
1403 charset_big5_2 = charset_id_internal ("chinese-big5-2");
1404 return Qnil;
1405}
1406
1407init_charset_once ()
1408{
1409 int i, j, k;
1410
1411 staticpro (&Vcharset_table);
1412 staticpro (&Vcharset_symbol_table);
1413
1414 /* This has to be done here, before we call Fmake_char_table. */
1415 Qcharset_table = intern ("charset-table");
1416 staticpro (&Qcharset_table);
1417
1418 /* Intern this now in case it isn't already done.
1419 Setting this variable twice is harmless.
1420 But don't staticpro it here--that is done in alloc.c. */
1421 Qchar_table_extra_slots = intern ("char-table-extra-slots");
1422
1423 /* Now we are ready to set up this property, so we can
1424 create the charset table. */
1425 Fput (Qcharset_table, Qchar_table_extra_slots, make_number (0));
1426 Vcharset_table = Fmake_char_table (Qcharset_table, Qnil);
1427
513ee442 1428 Vcharset_symbol_table = Fmake_vector (make_number (MAX_CHARSET + 1), Qnil);
4ed46869
KH
1429
1430 /* Setup tables. */
1431 for (i = 0; i < 2; i++)
1432 for (j = 0; j < 2; j++)
1433 for (k = 0; k < 128; k++)
1434 iso_charset_table [i][j][k] = -1;
1435
1436 bzero (cmpchar_hash_table, sizeof cmpchar_hash_table);
1437 cmpchar_table_size = n_cmpchars = 0;
1438
1439 for (i = 0; i < 256; i++)
1440 BYTES_BY_CHAR_HEAD (i) = 1;
1441 BYTES_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_11) = 3;
1442 BYTES_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_12) = 3;
1443 BYTES_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_21) = 4;
1444 BYTES_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_22) = 4;
1445 /* The following doesn't reflect the actual bytes, but just to tell
1446 that it is a start of a multibyte character. */
1447 BYTES_BY_CHAR_HEAD (LEADING_CODE_COMPOSITION) = 2;
1448
1449 for (i = 0; i < 128; i++)
1450 WIDTH_BY_CHAR_HEAD (i) = 1;
1451 for (; i < 256; i++)
1452 WIDTH_BY_CHAR_HEAD (i) = 4;
1453 WIDTH_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_11) = 1;
1454 WIDTH_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_12) = 2;
1455 WIDTH_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_21) = 1;
1456 WIDTH_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_22) = 2;
1457}
1458
1459#ifdef emacs
1460
1461syms_of_charset ()
1462{
1463 Qascii = intern ("ascii");
1464 staticpro (&Qascii);
1465
1466 Qcharset = intern ("charset");
1467 staticpro (&Qcharset);
1468
1469 /* Define ASCII charset now. */
1470 update_charset_table (make_number (CHARSET_ASCII),
1471 make_number (1), make_number (94),
1472 make_number (1),
1473 make_number (0),
1474 make_number ('B'),
1475 make_number (0),
1476 build_string ("ASCII"),
1477 build_string ("ASCII"),
1478 build_string ("ASCII (ISO646 IRV)"));
1479 CHARSET_SYMBOL (CHARSET_ASCII) = Qascii;
1480 Fput (Qascii, Qcharset, CHARSET_TABLE_ENTRY (CHARSET_ASCII));
1481
1482 Qcomposition = intern ("composition");
1483 staticpro (&Qcomposition);
1484 CHARSET_SYMBOL (CHARSET_COMPOSITION) = Qcomposition;
1485
1486 defsubr (&Sdefine_charset);
1487 defsubr (&Sdeclare_equiv_charset);
1488 defsubr (&Sfind_charset_region);
1489 defsubr (&Sfind_charset_string);
1490 defsubr (&Smake_char_internal);
1491 defsubr (&Ssplit_char);
1492 defsubr (&Schar_charset);
1493 defsubr (&Siso_charset);
1494 defsubr (&Schar_bytes);
1495 defsubr (&Schar_width);
1496 defsubr (&Sstring_width);
1497 defsubr (&Schar_direction);
1498 defsubr (&Schars_in_string);
1499 defsubr (&Schar_boundary_p);
1500 defsubr (&Sconcat_chars);
1501 defsubr (&Scmpcharp);
1502 defsubr (&Scmpchar_component);
1503 defsubr (&Scmpchar_cmp_rule);
1504 defsubr (&Scmpchar_cmp_rule_p);
1505 defsubr (&Scmpchar_cmp_count);
1506 defsubr (&Scompose_string);
1507 defsubr (&Ssetup_special_charsets);
1508
1509 DEFVAR_LISP ("charset-list", &Vcharset_list,
1510 "List of charsets ever defined.");
1511 Vcharset_list = Fcons (Qascii, Qnil);
1512
1513 DEFVAR_INT ("leading-code-composition", &leading_code_composition,
1514 "Leading-code of composite characters.");
1515 leading_code_composition = LEADING_CODE_COMPOSITION;
1516
1517 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11,
1518 "Leading-code of private TYPE9N charset of column-width 1.");
1519 leading_code_private_11 = LEADING_CODE_PRIVATE_11;
1520
1521 DEFVAR_INT ("leading-code-private-12", &leading_code_private_12,
1522 "Leading-code of private TYPE9N charset of column-width 2.");
1523 leading_code_private_12 = LEADING_CODE_PRIVATE_12;
1524
1525 DEFVAR_INT ("leading-code-private-21", &leading_code_private_21,
1526 "Leading-code of private TYPE9Nx9N charset of column-width 1.");
1527 leading_code_private_21 = LEADING_CODE_PRIVATE_21;
1528
1529 DEFVAR_INT ("leading-code-private-22", &leading_code_private_22,
1530 "Leading-code of private TYPE9Nx9N charset of column-width 2.");
1531 leading_code_private_22 = LEADING_CODE_PRIVATE_22;
1532}
1533
1534#endif /* emacs */