Include disptab.h.
[bpt/emacs.git] / src / charset.c
1 /* Multilingual characters handler.
2 Ver.1.0
3 Copyright (C) 1995 Free Software Foundation, Inc.
4 Copyright (C) 1995 Electrotechnical Laboratory, JAPAN.
5
6 This file is part of GNU Emacs.
7
8 GNU Emacs is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2, or (at your option)
11 any later version.
12
13 GNU Emacs is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GNU Emacs; see the file COPYING. If not, write to
20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
22
23 /* At first, see the document in `charset.h' to understand the code in
24 this file. */
25
26 #include <stdio.h>
27
28 #ifdef emacs
29
30 #include <sys/types.h>
31 #include <config.h>
32 #include "lisp.h"
33 #include "buffer.h"
34 #include "charset.h"
35 #include "coding.h"
36 #include "disptab.h"
37
38 #else /* not emacs */
39
40 #include "mulelib.h"
41
42 #endif /* emacs */
43
44 Lisp_Object Qcharset, Qascii, Qcomposition;
45
46 /* Declaration of special leading-codes. */
47 int leading_code_composition; /* for composite characters */
48 int leading_code_private_11; /* for private DIMENSION1 of 1-column */
49 int leading_code_private_12; /* for private DIMENSION1 of 2-column */
50 int leading_code_private_21; /* for private DIMENSION2 of 1-column */
51 int leading_code_private_22; /* for private DIMENSION2 of 2-column */
52
53 /* Declaration of special charsets. */
54 int charset_ascii; /* ASCII */
55 int charset_composition; /* for a composite character */
56 int charset_latin_iso8859_1; /* ISO8859-1 (Latin-1) */
57 int charset_jisx0208_1978; /* JISX0208.1978 (Japanese Kanji old set) */
58 int charset_jisx0208; /* JISX0208.1983 (Japanese Kanji) */
59 int charset_katakana_jisx0201; /* JISX0201.Kana (Japanese Katakana) */
60 int charset_latin_jisx0201; /* JISX0201.Roman (Japanese Roman) */
61 int charset_big5_1; /* Big5 Level 1 (Chinese Traditional) */
62 int charset_big5_2; /* Big5 Level 2 (Chinese Traditional) */
63
64 Lisp_Object Qcharset_table;
65
66 /* A char-table containing information of each character set. */
67 Lisp_Object Vcharset_table;
68
69 /* A vector of charset symbol indexed by charset-id. This is used
70 only for returning charset symbol from C functions. */
71 Lisp_Object Vcharset_symbol_table;
72
73 /* A list of charset symbols ever defined. */
74 Lisp_Object Vcharset_list;
75
76 /* Tables used by macros BYTES_BY_CHAR_HEAD and WIDTH_BY_CHAR_HEAD. */
77 int bytes_by_char_head[256];
78 int width_by_char_head[256];
79
80 /* Mapping table from ISO2022's charset (specified by DIMENSION,
81 CHARS, and FINAL-CHAR) to Emacs' charset. */
82 int iso_charset_table[2][2][128];
83
84 /* Table of pointers to the structure `cmpchar_info' indexed by
85 CMPCHAR-ID. */
86 struct cmpchar_info **cmpchar_table;
87 /* The current size of `cmpchar_table'. */
88 static int cmpchar_table_size;
89 /* Number of the current composite characters. */
90 int n_cmpchars;
91
92 /* Variables used locally in the macro FETCH_MULTIBYTE_CHAR. */
93 unsigned char *_fetch_multibyte_char_p;
94 int _fetch_multibyte_char_len;
95
96 /* Set STR a pointer to the multi-byte form of the character C. If C
97 is not a composite character, the multi-byte form is set in WORKBUF
98 and STR points WORKBUF. The caller should allocate at least 4-byte
99 area at WORKBUF in advance. Returns the length of the multi-byte
100 form. If C is an invalid character to have a multi-byte form,
101 signal an error.
102
103 Use macro `CHAR_STRING (C, WORKBUF, STR)' instead of calling this
104 function directly if C can be an ASCII character. */
105
106 int
107 non_ascii_char_to_string (c, workbuf, str)
108 int c;
109 unsigned char *workbuf, **str;
110 {
111 int charset, c1, c2;
112
113 if (COMPOSITE_CHAR_P (c))
114 {
115 int cmpchar_id = COMPOSITE_CHAR_ID (c);
116
117 if (cmpchar_id < n_cmpchars)
118 {
119 *str = cmpchar_table[cmpchar_id]->data;
120 return cmpchar_table[cmpchar_id]->len;
121 }
122 else
123 {
124 error ("Invalid characer: %d", c);
125 }
126 }
127
128 SPLIT_NON_ASCII_CHAR (c, charset, c1, c2);
129 if (!charset
130 || ! CHARSET_DEFINED_P (charset)
131 || c1 >= 0 && c1 < 32
132 || c2 >= 0 && c2 < 32)
133 error ("Invalid characer: %d", c);
134
135 *str = workbuf;
136 *workbuf++ = CHARSET_LEADING_CODE_BASE (charset);
137 if (*workbuf = CHARSET_LEADING_CODE_EXT (charset))
138 workbuf++;
139 *workbuf++ = c1 | 0x80;
140 if (c2 >= 0)
141 *workbuf++ = c2 | 0x80;
142
143 return (workbuf - *str);
144 }
145
146 /* Return a non-ASCII character of which multi-byte form is at STR of
147 length LEN. If ACTUAL_LEN is not NULL, the actual length of the
148 character is set to the address ACTUAL_LEN.
149
150 Use macro `STRING_CHAR (STR, LEN)' instead of calling this function
151 directly if STR can hold an ASCII character. */
152
153 string_to_non_ascii_char (str, len, actual_len)
154 unsigned char *str;
155 int len, *actual_len;
156 {
157 int charset;
158 unsigned char c1, c2;
159 register int c;
160
161 if (SPLIT_STRING (str, len, charset, c1, c2) == CHARSET_ASCII)
162 {
163 if (actual_len)
164 *actual_len = 1;
165 return (int) *str;
166 }
167
168 c = MAKE_NON_ASCII_CHAR (charset, c1, c2);
169
170 if (actual_len)
171 *actual_len = (charset == CHARSET_COMPOSITION
172 ? cmpchar_table[COMPOSITE_CHAR_ID (c)]->len
173 : BYTES_BY_CHAR_HEAD (*str));
174 return c;
175 }
176
177 /* Return the length of the multi-byte form at string STR of length LEN. */
178 int
179 multibyte_form_length (str, len)
180 unsigned char *str;
181 int len;
182 {
183 int charset;
184 unsigned char c1, c2;
185 register int c;
186
187 if (SPLIT_STRING (str, len, charset, c1, c2) == CHARSET_ASCII)
188 return 1;
189
190 return (charset == CHARSET_COMPOSITION
191 ? cmpchar_table[(c1 << 7) | c2]->len
192 : BYTES_BY_CHAR_HEAD (*str));
193 }
194
195 /* Check if string STR of length LEN contains valid multi-byte form of
196 a character. If valid, charset and position codes of the character
197 is set at *CHARSET, *C1, and *C2, and return 0. If not valid,
198 return -1. This should be used only in the macro SPLIT_STRING
199 which checks range of STR in advance. */
200
201 split_non_ascii_string (str, len, charset, c1, c2)
202 register unsigned char *str, *c1, *c2;
203 register int len, *charset;
204 {
205 register unsigned int cs = *str++;
206
207 if (cs == LEADING_CODE_COMPOSITION)
208 {
209 int cmpchar_id = str_cmpchar_id (str - 1, len);
210
211 if (cmpchar_id < 0)
212 return -1;
213 *charset = cs, *c1 = cmpchar_id >> 7, *c2 = cmpchar_id & 0x7F;
214 }
215 else if ((cs < LEADING_CODE_PRIVATE_11 || (cs = *str++) >= 0xA0)
216 && CHARSET_DEFINED_P (cs))
217 {
218 *charset = cs;
219 if (*str < 0xA0)
220 return -1;
221 *c1 = (*str++) & 0x7F;
222 if (CHARSET_DIMENSION (cs) == 2)
223 {
224 if (*str < 0xA0)
225 return -1;
226 *c2 = (*str++) & 0x7F;
227 }
228 }
229 else
230 return -1;
231 return 0;
232 }
233
234 /* Return a character unified with C (or a character made of CHARSET,
235 C1, and C2) in unification table TABLE. If no unification is found
236 in TABLE, return C. */
237 unify_char (table, c, charset, c1, c2)
238 Lisp_Object table;
239 int c, charset, c1, c2;
240 {
241 Lisp_Object ch;
242 int alt_charset, alt_c1, alt_c2, dimension;
243
244 if (c < 0) c = MAKE_CHAR (charset, c1, c2);
245 if (!CHAR_TABLE_P (table)
246 || (ch = Faref (table, make_number (c)), !INTEGERP (ch))
247 || XINT (ch) < 0)
248 return c;
249
250 SPLIT_CHAR (XFASTINT (ch), alt_charset, alt_c1, alt_c2);
251 dimension = CHARSET_DIMENSION (alt_charset);
252 if (dimension == 1 && alt_c1 > 0 || dimension == 2 && alt_c2 > 0)
253 /* CH is not a generic character, just return it. */
254 return XFASTINT (ch);
255
256 /* Since CH is a generic character, we must return a specific
257 charater which has the same position codes as C from CH. */
258 if (charset < 0)
259 SPLIT_CHAR (c, charset, c1, c2);
260 if (dimension != CHARSET_DIMENSION (charset))
261 /* We can't make such a character because of dimension mismatch. */
262 return c;
263 if (!alt_c1) alt_c1 = c1;
264 if (!alt_c2) alt_c2 = c2;
265 return MAKE_CHAR (alt_charset, c1, c2);
266 }
267
268 /* Update the table Vcharset_table with the given arguments (see the
269 document of `define-charset' for the meaning of each argument).
270 Several other table contents are also updated. The caller should
271 check the validity of CHARSET-ID and the remaining arguments in
272 advance. */
273
274 void
275 update_charset_table (charset_id, dimension, chars, width, direction,
276 iso_final_char, iso_graphic_plane,
277 short_name, long_name, description)
278 Lisp_Object charset_id, dimension, chars, width, direction;
279 Lisp_Object iso_final_char, iso_graphic_plane;
280 Lisp_Object short_name, long_name, description;
281 {
282 int charset = XINT (charset_id);
283 int bytes;
284 unsigned char leading_code_base, leading_code_ext;
285
286 if (NILP (CHARSET_TABLE_ENTRY (charset)))
287 CHARSET_TABLE_ENTRY (charset)
288 = Fmake_vector (make_number (CHARSET_MAX_IDX), Qnil);
289
290 /* Get byte length of multibyte form, base leading-code, and
291 extended leading-code of the charset. See the comment under the
292 title "GENERAL NOTE on CHARACTER SET (CHARSET)" in charset.h. */
293 bytes = XINT (dimension);
294 if (charset < MIN_CHARSET_PRIVATE_DIMENSION1)
295 {
296 /* Official charset, it doesn't have an extended leading-code. */
297 if (charset != CHARSET_ASCII)
298 bytes += 1; /* For a base leading-code. */
299 leading_code_base = charset;
300 leading_code_ext = 0;
301 }
302 else
303 {
304 /* Private charset. */
305 bytes += 2; /* For base and extended leading-codes. */
306 leading_code_base
307 = (charset < LEADING_CODE_EXT_12
308 ? LEADING_CODE_PRIVATE_11
309 : (charset < LEADING_CODE_EXT_21
310 ? LEADING_CODE_PRIVATE_12
311 : (charset < LEADING_CODE_EXT_22
312 ? LEADING_CODE_PRIVATE_21
313 : LEADING_CODE_PRIVATE_22)));
314 leading_code_ext = charset;
315 }
316
317 CHARSET_TABLE_INFO (charset, CHARSET_ID_IDX) = charset_id;
318 CHARSET_TABLE_INFO (charset, CHARSET_BYTES_IDX) = make_number (bytes);
319 CHARSET_TABLE_INFO (charset, CHARSET_DIMENSION_IDX) = dimension;
320 CHARSET_TABLE_INFO (charset, CHARSET_CHARS_IDX) = chars;
321 CHARSET_TABLE_INFO (charset, CHARSET_WIDTH_IDX) = width;
322 CHARSET_TABLE_INFO (charset, CHARSET_DIRECTION_IDX) = direction;
323 CHARSET_TABLE_INFO (charset, CHARSET_LEADING_CODE_BASE_IDX)
324 = make_number (leading_code_base);
325 CHARSET_TABLE_INFO (charset, CHARSET_LEADING_CODE_EXT_IDX)
326 = make_number (leading_code_ext);
327 CHARSET_TABLE_INFO (charset, CHARSET_ISO_FINAL_CHAR_IDX) = iso_final_char;
328 CHARSET_TABLE_INFO (charset, CHARSET_ISO_GRAPHIC_PLANE_IDX)
329 = iso_graphic_plane;
330 CHARSET_TABLE_INFO (charset, CHARSET_SHORT_NAME_IDX) = short_name;
331 CHARSET_TABLE_INFO (charset, CHARSET_LONG_NAME_IDX) = long_name;
332 CHARSET_TABLE_INFO (charset, CHARSET_DESCRIPTION_IDX) = description;
333 CHARSET_TABLE_INFO (charset, CHARSET_PLIST_IDX) = Qnil;
334
335 {
336 /* If we have already defined a charset which has the same
337 DIMENSION, CHARS and ISO-FINAL-CHAR but the different
338 DIRECTION, we must update the entry REVERSE-CHARSET of both
339 charsets. If there's no such charset, the value of the entry
340 is set to nil. */
341 int i;
342
343 for (i = 0; i <= MAX_CHARSET; i++)
344 if (!NILP (CHARSET_TABLE_ENTRY (i)))
345 {
346 if (CHARSET_DIMENSION (i) == XINT (dimension)
347 && CHARSET_CHARS (i) == XINT (chars)
348 && CHARSET_ISO_FINAL_CHAR (i) == XINT (iso_final_char)
349 && CHARSET_DIRECTION (i) != XINT (direction))
350 {
351 CHARSET_TABLE_INFO (charset, CHARSET_REVERSE_CHARSET_IDX)
352 = make_number (i);
353 CHARSET_TABLE_INFO (i, CHARSET_REVERSE_CHARSET_IDX) = charset_id;
354 break;
355 }
356 }
357 if (i > MAX_CHARSET)
358 /* No such a charset. */
359 CHARSET_TABLE_INFO (charset, CHARSET_REVERSE_CHARSET_IDX)
360 = make_number (-1);
361 }
362
363 if (charset != CHARSET_ASCII
364 && charset < MIN_CHARSET_PRIVATE_DIMENSION1)
365 {
366 /* Update tables bytes_by_char_head and width_by_char_head. */
367 bytes_by_char_head[leading_code_base] = bytes;
368 width_by_char_head[leading_code_base] = XINT (width);
369
370 /* Update table emacs_code_class. */
371 emacs_code_class[charset] = (bytes == 2
372 ? EMACS_leading_code_2
373 : (bytes == 3
374 ? EMACS_leading_code_3
375 : EMACS_leading_code_4));
376 }
377
378 /* Update table iso_charset_table. */
379 if (ISO_CHARSET_TABLE (dimension, chars, iso_final_char) < 0)
380 ISO_CHARSET_TABLE (dimension, chars, iso_final_char) = charset;
381 }
382
383 #ifdef emacs
384
385 /* Return charset id of CHARSET_SYMBOL, or return -1 if CHARSET_SYMBOL
386 is invalid. */
387 int
388 get_charset_id (charset_symbol)
389 Lisp_Object charset_symbol;
390 {
391 Lisp_Object val;
392 int charset;
393
394 return ((SYMBOLP (charset_symbol)
395 && (val = Fget (charset_symbol, Qcharset), VECTORP (val))
396 && (charset = XINT (XVECTOR (val)->contents[CHARSET_ID_IDX]),
397 CHARSET_VALID_P (charset)))
398 ? charset : -1);
399 }
400
401 /* Return an identification number for a new private charset of
402 DIMENSION and WIDTH. If there's no more room for the new charset,
403 return 0. */
404 Lisp_Object
405 get_new_private_charset_id (dimension, width)
406 int dimension, width;
407 {
408 int charset, from, to;
409
410 if (dimension == 1)
411 {
412 if (width == 1)
413 from = LEADING_CODE_EXT_11, to = LEADING_CODE_EXT_12;
414 else
415 from = LEADING_CODE_EXT_12, to = LEADING_CODE_EXT_21;
416 }
417 else
418 {
419 if (width == 1)
420 from = LEADING_CODE_EXT_21, to = LEADING_CODE_EXT_22;
421 else
422 from = LEADING_CODE_EXT_22, to = LEADING_CODE_EXT_MAX - 1;
423 }
424
425 for (charset = from; charset < to; charset++)
426 if (!CHARSET_DEFINED_P (charset)) break;
427
428 return make_number (charset < to ? charset : 0);
429 }
430
431 DEFUN ("define-charset", Fdefine_charset, Sdefine_charset, 3, 3, 0,
432 "Define CHARSET-ID as the identification number of CHARSET with INFO-VECTOR.\n\
433 If CHARSET-ID is nil, it is decided automatically, which means CHARSET is\n\
434 treated as a private charset.\n\
435 INFO-VECTOR is a vector of the format:\n\
436 [DIMENSION CHARS WIDTH DIRECTION ISO-FINAL-CHAR ISO-GRAPHIC-PLANE\n\
437 SHORT-NAME LONG-NAME DESCRIPTION]\n\
438 The meanings of each elements is as follows:\n\
439 DIMENSION (integer) is the number of bytes to represent a character: 1 or 2.\n\
440 CHARS (integer) is the number of characters in a dimension: 94 or 96.\n\
441 WIDTH (integer) is the number of columns a character in the charset\n\
442 occupies on the screen: one of 0, 1, and 2.\n\
443 \n\
444 DIRECTION (integer) is the rendering direction of characters in the\n\
445 charset when rendering. If 0, render from right to left, else\n\
446 render from left to right.\n\
447 \n\
448 ISO-FINAL-CHAR (character) is the final character of the\n\
449 corresponding ISO 2022 charset.\n\
450 \n\
451 ISO-GRAPHIC-PLANE (integer) is the graphic plane to be invoked\n\
452 while encoding to variants of ISO 2022 coding system, one of the\n\
453 following: 0/graphic-plane-left(GL), 1/graphic-plane-right(GR).\n\
454 \n\
455 SHORT-NAME (string) is the short name to refer to the charset.\n\
456 \n\
457 LONG-NAME (string) is the long name to refer to the charset.\n\
458 \n\
459 DESCRIPTION (string) is the description string of the charset.")
460 (charset_id, charset_symbol, info_vector)
461 Lisp_Object charset_id, charset_symbol, info_vector;
462 {
463 Lisp_Object *vec;
464
465 if (!NILP (charset_id))
466 CHECK_NUMBER (charset_id, 0);
467 CHECK_SYMBOL (charset_symbol, 1);
468 CHECK_VECTOR (info_vector, 2);
469
470 if (! NILP (charset_id))
471 {
472 if (! CHARSET_VALID_P (XINT (charset_id)))
473 error ("Invalid CHARSET: %d", XINT (charset_id));
474 else if (CHARSET_DEFINED_P (XINT (charset_id)))
475 error ("Already defined charset: %d", XINT (charset_id));
476 }
477
478 vec = XVECTOR (info_vector)->contents;
479 if (XVECTOR (info_vector)->size != 9
480 || !INTEGERP (vec[0]) || !(XINT (vec[0]) == 1 || XINT (vec[0]) == 2)
481 || !INTEGERP (vec[1]) || !(XINT (vec[1]) == 94 || XINT (vec[1]) == 96)
482 || !INTEGERP (vec[2]) || !(XINT (vec[2]) == 1 || XINT (vec[2]) == 2)
483 || !INTEGERP (vec[3]) || !(XINT (vec[3]) == 0 || XINT (vec[3]) == 1)
484 || !INTEGERP (vec[4]) || !(XINT (vec[4]) >= '0' && XINT (vec[4]) <= '~')
485 || !INTEGERP (vec[5]) || !(XINT (vec[5]) == 0 || XINT (vec[5]) == 1)
486 || !STRINGP (vec[6])
487 || !STRINGP (vec[7])
488 || !STRINGP (vec[8]))
489 error ("Invalid info-vector argument for defining charset %s",
490 XSYMBOL (charset_symbol)->name->data);
491
492 if (NILP (charset_id))
493 {
494 charset_id = get_new_private_charset_id (XINT (vec[0]), XINT (vec[2]));
495 if (XINT (charset_id) == 0)
496 error ("There's no room for a new private charset %s",
497 XSYMBOL (charset_symbol)->name->data);
498 }
499
500 update_charset_table (charset_id, vec[0], vec[1], vec[2], vec[3],
501 vec[4], vec[5], vec[6], vec[7], vec[8]);
502 Fput (charset_symbol, Qcharset, CHARSET_TABLE_ENTRY (XINT (charset_id)));
503 CHARSET_SYMBOL (XINT (charset_id)) = charset_symbol;
504 Vcharset_list = Fcons (charset_symbol, Vcharset_list);
505 return Qnil;
506 }
507
508 DEFUN ("declare-equiv-charset", Fdeclare_equiv_charset, Sdeclare_equiv_charset,
509 4, 4, 0,
510 "Declare a charset of DIMENSION, CHARS, FINAL-CHAR is the same as CHARSET.\n\
511 CHARSET should be defined by `defined-charset' in advance.")
512 (dimension, chars, final_char, charset_symbol)
513 Lisp_Object dimension, chars, final_char, charset_symbol;
514 {
515 int charset;
516
517 CHECK_NUMBER (dimension, 0);
518 CHECK_NUMBER (chars, 1);
519 CHECK_NUMBER (final_char, 2);
520 CHECK_SYMBOL (charset_symbol, 3);
521
522 if (XINT (dimension) != 1 && XINT (dimension) != 2)
523 error ("Invalid DIMENSION %d, it should be 1 or 2", XINT (dimension));
524 if (XINT (chars) != 94 && XINT (chars) != 96)
525 error ("Invalid CHARS %d, it should be 94 or 96", XINT (chars));
526 if (XINT (final_char) < '0' || XFASTINT (final_char) > '~')
527 error ("Invalid FINAL-CHAR %c, it should be `0'..`~'", XINT (chars));
528 if ((charset = get_charset_id (charset_symbol)) < 0)
529 error ("Invalid charset %s", XSYMBOL (charset_symbol)->name->data);
530
531 ISO_CHARSET_TABLE (dimension, chars, final_char) = charset;
532 return Qnil;
533 }
534
535 /* Return number of different charsets in STR of length LEN. In
536 addition, for each found charset N, CHARSETS[N] is set 1. The
537 caller should allocate CHARSETS (MAX_CHARSET + 1 bytes) in advance.
538 It may lookup a unification table TABLE if supplied. */
539
540 int
541 find_charset_in_str (str, len, charsets, table)
542 unsigned char *str, *charsets;
543 int len;
544 Lisp_Object table;
545 {
546 int num = 0;
547
548 if (! CHAR_TABLE_P (table))
549 table = Qnil;
550
551 while (len > 0)
552 {
553 int bytes = BYTES_BY_CHAR_HEAD (*str);
554 int charset;
555
556 if (NILP (table))
557 charset = CHARSET_AT (str);
558 else
559 {
560 int c, charset;
561 unsigned char c1, c2;
562
563 SPLIT_STRING(str, bytes, charset, c1, c2);
564 if ((c = unify_char (table, -1, charset, c1, c2)) >= 0)
565 charset = CHAR_CHARSET (c);
566 }
567
568 if (!charsets[charset])
569 {
570 charsets[charset] = 1;
571 num += 1;
572 }
573 str += bytes;
574 len -= bytes;
575 }
576 return num;
577 }
578
579 DEFUN ("find-charset-region", Ffind_charset_region, Sfind_charset_region,
580 2, 3, 0,
581 "Return a list of charsets in the region between BEG and END.\n\
582 BEG and END are buffer positions.\n\
583 Optional arg TABLE if non-nil is a unification table to look up.")
584 (beg, end, table)
585 Lisp_Object beg, end, table;
586 {
587 char charsets[MAX_CHARSET + 1];
588 int from, to, stop, i;
589 Lisp_Object val;
590
591 validate_region (&beg, &end);
592 from = XFASTINT (beg);
593 stop = to = XFASTINT (end);
594 if (from < GPT && GPT < to)
595 stop = GPT;
596 bzero (charsets, MAX_CHARSET + 1);
597 while (1)
598 {
599 find_charset_in_str (POS_ADDR (from), stop - from, charsets, table);
600 if (stop < to)
601 from = stop, stop = to;
602 else
603 break;
604 }
605 val = Qnil;
606 for (i = MAX_CHARSET; i >= 0; i--)
607 if (charsets[i])
608 val = Fcons (CHARSET_SYMBOL (i), val);
609 return val;
610 }
611
612 DEFUN ("find-charset-string", Ffind_charset_string, Sfind_charset_string,
613 1, 2, 0,
614 "Return a list of charsets in STR.\n\
615 Optional arg TABLE if non-nil is a unification table to look up.")
616 (str, table)
617 Lisp_Object str, table;
618 {
619 char charsets[MAX_CHARSET + 1];
620 int i;
621 Lisp_Object val;
622
623 CHECK_STRING (str, 0);
624 bzero (charsets, MAX_CHARSET + 1);
625 find_charset_in_str (XSTRING (str)->data, XSTRING (str)->size,
626 charsets, table);
627 val = Qnil;
628 for (i = MAX_CHARSET; i >= 0; i--)
629 if (charsets[i])
630 val = Fcons (CHARSET_SYMBOL (i), val);
631 return val;
632 }
633 \f
634 DEFUN ("make-char-internal", Fmake_char_internal, Smake_char_internal, 1, 3, 0,
635 "")
636 (charset, code1, code2)
637 Lisp_Object charset, code1, code2;
638 {
639 CHECK_NUMBER (charset, 0);
640
641 if (NILP (code1))
642 XSETFASTINT (code1, 0);
643 else
644 CHECK_NUMBER (code1, 1);
645 if (NILP (code2))
646 XSETFASTINT (code2, 0);
647 else
648 CHECK_NUMBER (code2, 2);
649
650 if (!CHARSET_DEFINED_P (XINT (charset)))
651 error ("Invalid charset: %d", XINT (charset));
652
653 return make_number (MAKE_CHAR (XINT (charset), XINT (code1), XINT (code2)));
654 }
655
656 DEFUN ("split-char", Fsplit_char, Ssplit_char, 1, 1, 0,
657 "Return list of charset and one or two position-codes of CHAR.")
658 (ch)
659 Lisp_Object ch;
660 {
661 Lisp_Object val;
662 int charset, c1, c2;
663
664 CHECK_NUMBER (ch, 0);
665 SPLIT_CHAR (XFASTINT (ch), charset, c1, c2);
666 return (c2 >= 0
667 ? Fcons (CHARSET_SYMBOL (charset),
668 Fcons (make_number (c1), Fcons (make_number (c2), Qnil)))
669 : Fcons (CHARSET_SYMBOL (charset), Fcons (make_number (c1), Qnil)));
670 }
671
672 DEFUN ("char-charset", Fchar_charset, Schar_charset, 1, 1, 0,
673 "Return charset of CHAR.")
674 (ch)
675 Lisp_Object ch;
676 {
677 CHECK_NUMBER (ch, 0);
678
679 return CHARSET_SYMBOL (CHAR_CHARSET (XINT (ch)));
680 }
681
682 DEFUN ("iso-charset", Fiso_charset, Siso_charset, 3, 3, 0,
683 "Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR.")
684 (dimension, chars, final_char)
685 Lisp_Object dimension, chars, final_char;
686 {
687 int charset;
688
689 CHECK_NUMBER (dimension, 0);
690 CHECK_NUMBER (chars, 1);
691 CHECK_NUMBER (final_char, 2);
692
693 if ((charset = ISO_CHARSET_TABLE (dimension, chars, final_char)) < 0)
694 return Qnil;
695 return CHARSET_SYMBOL (charset);
696 }
697
698 DEFUN ("char-bytes", Fchar_bytes, Schar_bytes, 1, 1, 0,
699 "Return byte length of multi-byte form of CHAR.")
700 (ch)
701 Lisp_Object ch;
702 {
703 Lisp_Object val;
704 int bytes;
705
706 CHECK_NUMBER (ch, 0);
707 if (COMPOSITE_CHAR_P (XFASTINT (ch)))
708 {
709 unsigned int id = COMPOSITE_CHAR_ID (XFASTINT (ch));
710
711 bytes = (id < n_cmpchars ? cmpchar_table[id]->len : 1);
712 }
713 else
714 {
715 int charset = CHAR_CHARSET (XFASTINT (ch));
716
717 bytes = CHARSET_DEFINED_P (charset) ? CHARSET_BYTES (charset) : 1;
718 }
719
720 XSETFASTINT (val, bytes);
721 return val;
722 }
723
724 /* Return the width of character of which multi-byte form starts with
725 C. The width is measured by how many columns occupied on the
726 screen when displayed in the current buffer. */
727
728 #define ONE_BYTE_CHAR_WIDTH(c) \
729 (c < 0x20 \
730 ? (c == '\t' \
731 ? XFASTINT (current_buffer->tab_width) \
732 : (c == '\n' ? 0 : (NILP (current_buffer->ctl_arrow) ? 4 : 2))) \
733 : (c < 0x7f \
734 ? 1 \
735 : (c == 0x7F \
736 ? (NILP (current_buffer->ctl_arrow) ? 4 : 2) \
737 : ((! NILP (current_buffer->enable_multibyte_characters) \
738 && BASE_LEADING_CODE_P (c)) \
739 ? WIDTH_BY_CHAR_HEAD (c) \
740 : 4)))) \
741
742
743 DEFUN ("char-width", Fchar_width, Schar_width, 1, 1, 0,
744 "Return width of CHAR when displayed in the current buffer.\n\
745 The width is measured by how many columns it occupies on the screen.")
746 (ch)
747 Lisp_Object ch;
748 {
749 Lisp_Object val, disp;
750 int c;
751
752 CHECK_NUMBER (ch, 0);
753
754 c = XINT (ch);
755
756 /* Get the way the display table would display it. */
757 disp = DISP_CHAR_VECTOR (buffer_display_table (current_buffer), (c));
758
759 if (VECTORP (disp))
760 XSETINT (val, XVECTOR (disp)->size);
761 else if (SINGLE_BYTE_CHAR_P (c))
762 XSETINT (val, ONE_BYTE_CHAR_WIDTH (c));
763 else if (COMPOSITE_CHAR_P (c))
764 {
765 int id = COMPOSITE_CHAR_ID (XFASTINT (ch));
766 XSETFASTINT (val, (id < n_cmpchars ? cmpchar_table[id]->width : 0));
767 }
768 else
769 {
770 int charset = CHAR_CHARSET (c);
771
772 XSETFASTINT (val, CHARSET_WIDTH (charset));
773 }
774 return val;
775 }
776
777 /* Return width of string STR of length LEN when displayed in the
778 current buffer. The width is measured by how many columns it
779 occupies on the screen. */
780
781 int
782 strwidth (str, len)
783 unsigned char *str;
784 int len;
785 {
786 unsigned char *endp = str + len;
787 int width = 0;
788 struct Lisp_Char_Table *dp = buffer_display_table (current_buffer);
789
790 while (str < endp)
791 {
792 if (*str == LEADING_CODE_COMPOSITION)
793 {
794 int id = str_cmpchar_id (str, endp - str);
795
796 if (id < 0)
797 {
798 width += 4;
799 str++;
800 }
801 else
802 {
803 width += cmpchar_table[id]->width;
804 str += cmpchar_table[id]->len;
805 }
806 }
807 else
808 {
809 Lisp_Object disp;
810 int thiswidth;
811 int c = STRING_CHAR (str, endp - str);
812
813 /* Get the way the display table would display it. */
814 disp = DISP_CHAR_VECTOR (dp, c);
815
816 if (VECTORP (disp))
817 thiswidth = XVECTOR (disp)->size;
818 else
819 thiswidth = ONE_BYTE_CHAR_WIDTH (*str);
820
821 width += thiswidth;
822 str += BYTES_BY_CHAR_HEAD (*str);
823 }
824 }
825 return width;
826 }
827
828 DEFUN ("string-width", Fstring_width, Sstring_width, 1, 1, 0,
829 "Return width of STRING when displayed in the current buffer.\n\
830 Width is measured by how many columns it occupies on the screen.\n\
831 When calculating width of a multi-byte character in STRING,\n\
832 only the base leading-code is considered and the validity of\n\
833 the following bytes are not checked.")
834 (str)
835 Lisp_Object str;
836 {
837 Lisp_Object val;
838
839 CHECK_STRING (str, 0);
840 XSETFASTINT (val, strwidth (XSTRING (str)->data, XSTRING (str)->size));
841 return val;
842 }
843
844 DEFUN ("char-direction", Fchar_direction, Schar_direction, 1, 1, 0,
845 "Return the direction of CHAR.\n\
846 The returned value is 0 for left-to-right and 1 for right-to-left.")
847 (ch)
848 Lisp_Object ch;
849 {
850 int charset;
851
852 CHECK_NUMBER (ch, 0);
853 charset = CHAR_CHARSET (XFASTINT (ch));
854 if (!CHARSET_DEFINED_P (charset))
855 error ("Invalid character: %d", XINT (ch));
856 return CHARSET_TABLE_INFO (charset, CHARSET_DIRECTION_IDX);
857 }
858
859 DEFUN ("chars-in-string", Fchars_in_string, Schars_in_string, 1, 1, 0,
860 "Return number of characters in STRING.")
861 (str)
862 Lisp_Object str;
863 {
864 Lisp_Object val;
865 unsigned char *p, *endp;
866 int chars;
867
868 CHECK_STRING (str, 0);
869
870 p = XSTRING (str)->data; endp = p + XSTRING (str)->size;
871 chars = 0;
872 while (p < endp)
873 {
874 if (*p == LEADING_CODE_COMPOSITION)
875 {
876 p++;
877 while (p < endp && ! CHAR_HEAD_P (p)) p++;
878 }
879 else
880 p += BYTES_BY_CHAR_HEAD (*p);
881 chars++;
882 }
883
884 XSETFASTINT (val, chars);
885 return val;
886 }
887
888 DEFUN ("char-boundary-p", Fchar_boundary_p, Schar_boundary_p, 1, 1, 0,
889 "Return non-nil value if POS is at character boundary of multibyte form.\n\
890 The return value is:\n\
891 0 if POS is at an ASCII character or at the end of range,\n\
892 1 if POS is at a head of 2-byte length multi-byte form,\n\
893 2 if POS is at a head of 3-byte length multi-byte form,\n\
894 3 if POS is at a head of 4-byte length multi-byte form,\n\
895 4 if POS is at a head of multi-byte form of a composite character.\n\
896 If POS is out of range or not at character boundary, return NIL.")
897 (pos)
898 Lisp_Object pos;
899 {
900 Lisp_Object val;
901 int n;
902
903 CHECK_NUMBER_COERCE_MARKER (pos, 0);
904
905 n = XINT (pos);
906 if (n < BEGV || n > ZV)
907 return Qnil;
908
909 if (n == ZV || NILP (current_buffer->enable_multibyte_characters))
910 XSETFASTINT (val, 0);
911 else
912 {
913 unsigned char *p = POS_ADDR (n);
914
915 if (SINGLE_BYTE_CHAR_P (*p))
916 XSETFASTINT (val, 0);
917 else if (*p == LEADING_CODE_COMPOSITION)
918 XSETFASTINT (val, 4);
919 else if (BYTES_BY_CHAR_HEAD (*p) > 1)
920 XSETFASTINT (val, BYTES_BY_CHAR_HEAD (*p) - 1);
921 else
922 val = Qnil;
923 }
924 return val;
925 }
926
927 DEFUN ("concat-chars", Fconcat_chars, Sconcat_chars, 1, MANY, 0,
928 "Concatenate all the argument characters and make the result a string.")
929 (n, args)
930 int n;
931 Lisp_Object *args;
932 {
933 int i;
934 unsigned char *buf
935 = (unsigned char *) alloca (MAX_LENGTH_OF_MULTI_BYTE_FORM * n);
936 unsigned char *p = buf;
937 Lisp_Object val;
938
939 for (i = 0; i < n; i++)
940 {
941 int c, len;
942 unsigned char *str;
943
944 if (!INTEGERP (args[i]))
945 {
946 free (buf);
947 CHECK_NUMBER (args[i], 0);
948 }
949 c = XINT (args[i]);
950 len = CHAR_STRING (c, p, str);
951 if (p != str)
952 /* C is a composite character. */
953 bcopy (str, p, len);
954 p += len;
955 }
956
957 val = make_string (buf, p - buf);
958 return val;
959 }
960
961 #endif /* emacs */
962 \f
963 /*** Composite characters staffs ***/
964
965 /* Each composite character is identified by CMPCHAR-ID which is
966 assigned when Emacs needs the character code of the composite
967 character (e.g. when displaying it on the screen). See the
968 document "GENERAL NOTE on COMPOSITE CHARACTER" in `charset.h' how a
969 composite character is represented in Emacs. */
970
971 /* If `static' is defined, it means that it is defined to null string. */
972 #ifndef static
973 /* The following function is copied from lread.c. */
974 static int
975 hash_string (ptr, len)
976 unsigned char *ptr;
977 int len;
978 {
979 register unsigned char *p = ptr;
980 register unsigned char *end = p + len;
981 register unsigned char c;
982 register int hash = 0;
983
984 while (p != end)
985 {
986 c = *p++;
987 if (c >= 0140) c -= 40;
988 hash = ((hash<<3) + (hash>>28) + c);
989 }
990 return hash & 07777777777;
991 }
992 #endif
993
994 #define CMPCHAR_HASH_TABLE_SIZE 0xFFF
995
996 static int *cmpchar_hash_table[CMPCHAR_HASH_TABLE_SIZE];
997
998 /* Each element of `cmpchar_hash_table' is a pointer to an array of
999 integer, where the 1st element is the size of the array, the 2nd
1000 element is how many elements are actually used in the array, and
1001 the remaining elements are CMPCHAR-IDs of composite characters of
1002 the same hash value. */
1003 #define CMPCHAR_HASH_SIZE(table) table[0]
1004 #define CMPCHAR_HASH_USED(table) table[1]
1005 #define CMPCHAR_HASH_CMPCHAR_ID(table, i) table[i]
1006
1007 /* Return CMPCHAR-ID of the composite character in STR of the length
1008 LEN. If the composite character has not yet been registered,
1009 register it in `cmpchar_table' and assign new CMPCHAR-ID. This
1010 is the sole function for assigning CMPCHAR-ID. */
1011 int
1012 str_cmpchar_id (str, len)
1013 unsigned char *str;
1014 int len;
1015 {
1016 int hash_idx, *hashp;
1017 unsigned char *buf;
1018 int embedded_rule; /* 1 if composition rule is embedded. */
1019 int chars; /* number of components. */
1020 int i;
1021 struct cmpchar_info *cmpcharp;
1022
1023 if (len < 5)
1024 /* Any composite char have at least 3-byte length. */
1025 return -1;
1026
1027 /* The second byte 0xFF means compostion rule is embedded. */
1028 embedded_rule = (str[1] == 0xFF);
1029
1030 /* At first, get the actual length of the composite character. */
1031 {
1032 unsigned char *p, *endp = str + 1, *lastp = str + len;
1033 int bytes;
1034
1035 while (endp < lastp && ! CHAR_HEAD_P (endp)) endp++;
1036 chars = 0;
1037 p = str + 1 + embedded_rule;
1038 while (p < endp)
1039 {
1040 /* No need of checking if *P is 0xA0 because
1041 BYTES_BY_CHAR_HEAD (0x80) surely returns 2. */
1042 p += (bytes = BYTES_BY_CHAR_HEAD (*p - 0x20) + embedded_rule);
1043 chars++;
1044 }
1045 len = (p -= embedded_rule) - str;
1046 if (p > endp)
1047 len -= - bytes, chars--;
1048
1049 if (chars < 2 || chars > MAX_COMPONENT_COUNT)
1050 /* Invalid number of components. */
1051 return -1;
1052 }
1053 hash_idx = hash_string (str, len) % CMPCHAR_HASH_TABLE_SIZE;
1054 hashp = cmpchar_hash_table[hash_idx];
1055
1056 /* Then, look into the hash table. */
1057 if (hashp != NULL)
1058 /* Find the correct one among composite characters of the same
1059 hash value. */
1060 for (i = 2; i < CMPCHAR_HASH_USED (hashp); i++)
1061 {
1062 cmpcharp = cmpchar_table[CMPCHAR_HASH_CMPCHAR_ID (hashp, i)];
1063 if (len == cmpcharp->len
1064 && ! bcmp (str, cmpcharp->data, len))
1065 return CMPCHAR_HASH_CMPCHAR_ID (hashp, i);
1066 }
1067
1068 /* We have to register the composite character in cmpchar_table. */
1069 if (n_cmpchars > (CHAR_FIELD2_MASK | CHAR_FIELD3_MASK))
1070 /* No, we have no more room for a new composite character. */
1071 return -1;
1072
1073 /* Make the entry in hash table. */
1074 if (hashp == NULL)
1075 {
1076 /* Make a table for 8 composite characters initially. */
1077 hashp = (cmpchar_hash_table[hash_idx]
1078 = (int *) xmalloc (sizeof (int) * (2 + 8)));
1079 CMPCHAR_HASH_SIZE (hashp) = 10;
1080 CMPCHAR_HASH_USED (hashp) = 2;
1081 }
1082 else if (CMPCHAR_HASH_USED (hashp) >= CMPCHAR_HASH_SIZE (hashp))
1083 {
1084 CMPCHAR_HASH_SIZE (hashp) += 8;
1085 hashp = (cmpchar_hash_table[hash_idx]
1086 = (int *) xrealloc (hashp,
1087 sizeof (int) * CMPCHAR_HASH_SIZE (hashp)));
1088 }
1089 CMPCHAR_HASH_CMPCHAR_ID (hashp, CMPCHAR_HASH_USED (hashp)) = n_cmpchars;
1090 CMPCHAR_HASH_USED (hashp)++;
1091
1092 /* Set information of the composite character in cmpchar_table. */
1093 if (cmpchar_table_size == 0)
1094 {
1095 /* This is the first composite character to be registered. */
1096 cmpchar_table_size = 256;
1097 cmpchar_table
1098 = (struct cmpchar_info **) xmalloc (sizeof (cmpchar_table[0])
1099 * cmpchar_table_size);
1100 }
1101 else if (cmpchar_table_size <= n_cmpchars)
1102 {
1103 cmpchar_table_size += 256;
1104 cmpchar_table
1105 = (struct cmpchar_info **) xrealloc (cmpchar_table,
1106 sizeof (cmpchar_table[0])
1107 * cmpchar_table_size);
1108 }
1109
1110 cmpcharp = (struct cmpchar_info *) xmalloc (sizeof (struct cmpchar_info));
1111
1112 cmpcharp->len = len;
1113 cmpcharp->data = (unsigned char *) xmalloc (len + 1);
1114 bcopy (str, cmpcharp->data, len);
1115 cmpcharp->data[len] = 0;
1116 cmpcharp->glyph_len = chars;
1117 cmpcharp->glyph = (GLYPH *) xmalloc (sizeof (GLYPH) * chars);
1118 if (embedded_rule)
1119 {
1120 cmpcharp->cmp_rule = (unsigned char *) xmalloc (chars);
1121 cmpcharp->col_offset = (float *) xmalloc (sizeof (float) * chars);
1122 }
1123 else
1124 {
1125 cmpcharp->cmp_rule = NULL;
1126 cmpcharp->col_offset = NULL;
1127 }
1128
1129 /* Setup GLYPH data and composition rules (if any) so as not to make
1130 them every time on displaying. */
1131 {
1132 unsigned char *bufp;
1133 int width;
1134 float leftmost = 0.0, rightmost = 1.0;
1135
1136 if (embedded_rule)
1137 /* At first, col_offset[N] is set to relative to col_offset[0]. */
1138 cmpcharp->col_offset[0] = 0;
1139
1140 for (i = 0, bufp = cmpcharp->data + 1; i < chars; i++)
1141 {
1142 if (embedded_rule)
1143 cmpcharp->cmp_rule[i] = *bufp++;
1144
1145 if (*bufp == 0xA0) /* This is an ASCII character. */
1146 {
1147 cmpcharp->glyph[i] = FAST_MAKE_GLYPH ((*++bufp & 0x7F), 0);
1148 width = 1;
1149 bufp++;
1150 }
1151 else /* Multibyte character. */
1152 {
1153 /* Make `bufp' point normal multi-byte form temporally. */
1154 *bufp -= 0x20;
1155 cmpcharp->glyph[i]
1156 = FAST_MAKE_GLYPH (string_to_non_ascii_char (bufp, 4, 0), 0);
1157 width = WIDTH_BY_CHAR_HEAD (*bufp);
1158 *bufp += 0x20;
1159 bufp += BYTES_BY_CHAR_HEAD (*bufp - 0x20);
1160 }
1161
1162 if (embedded_rule && i > 0)
1163 {
1164 /* Reference points (global_ref and new_ref) are
1165 encoded as below:
1166
1167 0--1--2 -- ascent
1168 | |
1169 | |
1170 | 4 -+--- center
1171 -- 3 5 -- baseline
1172 | |
1173 6--7--8 -- descent
1174
1175 Now, we calculate the column offset of the new glyph
1176 from the left edge of the first glyph. This can avoid
1177 the same calculation everytime displaying this
1178 composite character. */
1179
1180 /* Reference points of global glyph and new glyph. */
1181 int global_ref = (cmpcharp->cmp_rule[i] - 0xA0) / 9;
1182 int new_ref = (cmpcharp->cmp_rule[i] - 0xA0) % 9;
1183 /* Column offset relative to the first glyph. */
1184 float left = (leftmost
1185 + (global_ref % 3) * (rightmost - leftmost) / 2.0
1186 - (new_ref % 3) * width / 2.0);
1187
1188 cmpcharp->col_offset[i] = left;
1189 if (left < leftmost)
1190 leftmost = left;
1191 if (left + width > rightmost)
1192 rightmost = left + width;
1193 }
1194 else
1195 {
1196 if (width > rightmost)
1197 rightmost = width;
1198 }
1199 }
1200 if (embedded_rule)
1201 {
1202 /* Now col_offset[N] are relative to the left edge of the
1203 first component. Make them relative to the left edge of
1204 overall glyph. */
1205 for (i = 0; i < chars; i++)
1206 cmpcharp->col_offset[i] -= leftmost;
1207 /* Make rightmost holds width of overall glyph. */
1208 rightmost -= leftmost;
1209 }
1210
1211 cmpcharp->width = rightmost;
1212 if (cmpcharp->width < rightmost)
1213 /* To get a ceiling integer value. */
1214 cmpcharp->width++;
1215 }
1216
1217 cmpchar_table[n_cmpchars] = cmpcharp;
1218
1219 return n_cmpchars++;
1220 }
1221
1222 /* Return the Nth element of the composite character C. */
1223 int
1224 cmpchar_component (c, n)
1225 unsigned int c, n;
1226 {
1227 int id = COMPOSITE_CHAR_ID (c);
1228
1229 if (id >= n_cmpchars /* C is not a valid composite character. */
1230 || n >= cmpchar_table[id]->glyph_len) /* No such component. */
1231 return -1;
1232 /* No face data is stored in glyph code. */
1233 return ((int) (cmpchar_table[id]->glyph[n]));
1234 }
1235
1236 DEFUN ("cmpcharp", Fcmpcharp, Scmpcharp, 1, 1, 0,
1237 "T if CHAR is a composite character.")
1238 (ch)
1239 Lisp_Object ch;
1240 {
1241 CHECK_NUMBER (ch, 0);
1242 return (COMPOSITE_CHAR_P (XINT (ch)) ? Qt : Qnil);
1243 }
1244
1245 DEFUN ("composite-char-component", Fcmpchar_component, Scmpchar_component,
1246 2, 2, 0,
1247 "Return the IDXth component character of composite character CHARACTER.")
1248 (character, idx)
1249 Lisp_Object character, idx;
1250 {
1251 int c;
1252
1253 CHECK_NUMBER (character, 0);
1254 CHECK_NUMBER (idx, 1);
1255
1256 if ((c = cmpchar_component (XINT (character), XINT (idx))) < 0)
1257 args_out_of_range (character, idx);
1258
1259 return make_number (c);
1260 }
1261
1262 DEFUN ("composite-char-composition-rule", Fcmpchar_cmp_rule, Scmpchar_cmp_rule,
1263 2, 2, 0,
1264 "Return the Nth composition rule embedded in composite character CHARACTER.\n\
1265 The returned rule is for composing the Nth component\n\
1266 on the (N-1)th component. If N is 0, the returned value is always 255.")
1267 (character, n)
1268 Lisp_Object character, n;
1269 {
1270 int id, i;
1271
1272 CHECK_NUMBER (character, 0);
1273 CHECK_NUMBER (n, 1);
1274
1275 id = COMPOSITE_CHAR_ID (XINT (character));
1276 if (id < 0 || id >= n_cmpchars)
1277 error ("Invalid composite character: %d", XINT (character));
1278 i = XINT (n);
1279 if (i > cmpchar_table[id]->glyph_len)
1280 args_out_of_range (character, n);
1281
1282 return make_number (cmpchar_table[id]->cmp_rule[i]);
1283 }
1284
1285 DEFUN ("composite-char-composition-rule-p", Fcmpchar_cmp_rule_p,
1286 Scmpchar_cmp_rule_p, 1, 1, 0,
1287 "Return non-nil if composite character CHARACTER contains a embedded rule.")
1288 (character)
1289 Lisp_Object character;
1290 {
1291 int id;
1292
1293 CHECK_NUMBER (character, 0);
1294 id = COMPOSITE_CHAR_ID (XINT (character));
1295 if (id < 0 || id >= n_cmpchars)
1296 error ("Invalid composite character: %d", XINT (character));
1297
1298 return (cmpchar_table[id]->cmp_rule ? Qt : Qnil);
1299 }
1300
1301 DEFUN ("composite-char-component-count", Fcmpchar_cmp_count,
1302 Scmpchar_cmp_count, 1, 1, 0,
1303 "Return number of compoents of composite character CHARACTER.")
1304 (character)
1305 Lisp_Object character;
1306 {
1307 int id;
1308
1309 CHECK_NUMBER (character, 0);
1310 id = COMPOSITE_CHAR_ID (XINT (character));
1311 if (id < 0 || id >= n_cmpchars)
1312 error ("Invalid composite character: %d", XINT (character));
1313
1314 return (make_number (cmpchar_table[id]->glyph_len));
1315 }
1316
1317 DEFUN ("compose-string", Fcompose_string, Scompose_string,
1318 1, 1, 0,
1319 "Return one char string composed from all characters in STRING.")
1320 (str)
1321 Lisp_Object str;
1322 {
1323 unsigned char buf[MAX_LENGTH_OF_MULTI_BYTE_FORM], *p, *pend, *ptemp;
1324 int len, i;
1325
1326 CHECK_STRING (str, 0);
1327
1328 buf[0] = LEADING_CODE_COMPOSITION;
1329 p = XSTRING (str)->data;
1330 pend = p + XSTRING (str)->size;
1331 i = 1;
1332 while (p < pend)
1333 {
1334 if (*p < 0x20 || *p == 127) /* control code */
1335 error ("Invalid component character: %d", *p);
1336 else if (*p < 0x80) /* ASCII */
1337 {
1338 if (i + 2 >= MAX_LENGTH_OF_MULTI_BYTE_FORM)
1339 error ("Too long string to be composed: %s", XSTRING (str)->data);
1340 /* Prepend an ASCII charset indicator 0xA0, set MSB of the
1341 code itself. */
1342 buf[i++] = 0xA0;
1343 buf[i++] = *p++ + 0x80;
1344 }
1345 else if (*p == LEADING_CODE_COMPOSITION) /* composite char */
1346 {
1347 /* Already composed. Eliminate the heading
1348 LEADING_CODE_COMPOSITION, keep the remaining bytes
1349 unchanged. */
1350 p++;
1351 ptemp = p;
1352 while (! CHAR_HEAD_P (p)) p++;
1353 if (i + (p - ptemp) >= MAX_LENGTH_OF_MULTI_BYTE_FORM)
1354 error ("Too long string to be composed: %s", XSTRING (str)->data);
1355 bcopy (ptemp, buf + i, p - ptemp);
1356 i += p - ptemp;
1357 }
1358 else /* multibyte char */
1359 {
1360 /* Add 0x20 to the base leading-code, keep the remaining
1361 bytes unchanged. */
1362 len = BYTES_BY_CHAR_HEAD (*p);
1363 if (i + len >= MAX_LENGTH_OF_MULTI_BYTE_FORM)
1364 error ("Too long string to be composed: %s", XSTRING (str)->data);
1365 bcopy (p, buf + i, len);
1366 buf[i] += 0x20;
1367 p += len, i += len;
1368 }
1369 }
1370
1371 if (i < 5)
1372 /* STR contains only one character, which can't be composed. */
1373 error ("Too short string to be composed: %s", XSTRING (str)->data);
1374
1375 return make_string (buf, i);
1376 }
1377
1378 \f
1379 charset_id_internal (charset_name)
1380 char *charset_name;
1381 {
1382 Lisp_Object val = Fget (intern (charset_name), Qcharset);
1383
1384 if (!VECTORP (val))
1385 error ("Charset %s is not defined", charset_name);
1386
1387 return (XINT (XVECTOR (val)->contents[0]));
1388 }
1389
1390 DEFUN ("setup-special-charsets", Fsetup_special_charsets,
1391 Ssetup_special_charsets, 0, 0, 0, "Internal use only.")
1392 ()
1393 {
1394 charset_latin_iso8859_1 = charset_id_internal ("latin-iso8859-1");
1395 charset_jisx0208_1978 = charset_id_internal ("japanese-jisx0208-1978");
1396 charset_jisx0208 = charset_id_internal ("japanese-jisx0208");
1397 charset_katakana_jisx0201 = charset_id_internal ("katakana-jisx0201");
1398 charset_latin_jisx0201 = charset_id_internal ("latin-jisx0201");
1399 charset_big5_1 = charset_id_internal ("chinese-big5-1");
1400 charset_big5_2 = charset_id_internal ("chinese-big5-2");
1401 return Qnil;
1402 }
1403
1404 init_charset_once ()
1405 {
1406 int i, j, k;
1407
1408 staticpro (&Vcharset_table);
1409 staticpro (&Vcharset_symbol_table);
1410
1411 /* This has to be done here, before we call Fmake_char_table. */
1412 Qcharset_table = intern ("charset-table");
1413 staticpro (&Qcharset_table);
1414
1415 /* Intern this now in case it isn't already done.
1416 Setting this variable twice is harmless.
1417 But don't staticpro it here--that is done in alloc.c. */
1418 Qchar_table_extra_slots = intern ("char-table-extra-slots");
1419
1420 /* Now we are ready to set up this property, so we can
1421 create the charset table. */
1422 Fput (Qcharset_table, Qchar_table_extra_slots, make_number (0));
1423 Vcharset_table = Fmake_char_table (Qcharset_table, Qnil);
1424
1425 Vcharset_symbol_table = Fmake_vector (make_number (MAX_CHARSET + 1), Qnil);
1426
1427 /* Setup tables. */
1428 for (i = 0; i < 2; i++)
1429 for (j = 0; j < 2; j++)
1430 for (k = 0; k < 128; k++)
1431 iso_charset_table [i][j][k] = -1;
1432
1433 bzero (cmpchar_hash_table, sizeof cmpchar_hash_table);
1434 cmpchar_table_size = n_cmpchars = 0;
1435
1436 for (i = 0; i < 256; i++)
1437 BYTES_BY_CHAR_HEAD (i) = 1;
1438 BYTES_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_11) = 3;
1439 BYTES_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_12) = 3;
1440 BYTES_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_21) = 4;
1441 BYTES_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_22) = 4;
1442 /* The following doesn't reflect the actual bytes, but just to tell
1443 that it is a start of a multibyte character. */
1444 BYTES_BY_CHAR_HEAD (LEADING_CODE_COMPOSITION) = 2;
1445
1446 for (i = 0; i < 128; i++)
1447 WIDTH_BY_CHAR_HEAD (i) = 1;
1448 for (; i < 256; i++)
1449 WIDTH_BY_CHAR_HEAD (i) = 4;
1450 WIDTH_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_11) = 1;
1451 WIDTH_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_12) = 2;
1452 WIDTH_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_21) = 1;
1453 WIDTH_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_22) = 2;
1454 }
1455
1456 #ifdef emacs
1457
1458 syms_of_charset ()
1459 {
1460 Qascii = intern ("ascii");
1461 staticpro (&Qascii);
1462
1463 Qcharset = intern ("charset");
1464 staticpro (&Qcharset);
1465
1466 /* Define ASCII charset now. */
1467 update_charset_table (make_number (CHARSET_ASCII),
1468 make_number (1), make_number (94),
1469 make_number (1),
1470 make_number (0),
1471 make_number ('B'),
1472 make_number (0),
1473 build_string ("ASCII"),
1474 build_string ("ASCII"),
1475 build_string ("ASCII (ISO646 IRV)"));
1476 CHARSET_SYMBOL (CHARSET_ASCII) = Qascii;
1477 Fput (Qascii, Qcharset, CHARSET_TABLE_ENTRY (CHARSET_ASCII));
1478
1479 Qcomposition = intern ("composition");
1480 staticpro (&Qcomposition);
1481 CHARSET_SYMBOL (CHARSET_COMPOSITION) = Qcomposition;
1482
1483 defsubr (&Sdefine_charset);
1484 defsubr (&Sdeclare_equiv_charset);
1485 defsubr (&Sfind_charset_region);
1486 defsubr (&Sfind_charset_string);
1487 defsubr (&Smake_char_internal);
1488 defsubr (&Ssplit_char);
1489 defsubr (&Schar_charset);
1490 defsubr (&Siso_charset);
1491 defsubr (&Schar_bytes);
1492 defsubr (&Schar_width);
1493 defsubr (&Sstring_width);
1494 defsubr (&Schar_direction);
1495 defsubr (&Schars_in_string);
1496 defsubr (&Schar_boundary_p);
1497 defsubr (&Sconcat_chars);
1498 defsubr (&Scmpcharp);
1499 defsubr (&Scmpchar_component);
1500 defsubr (&Scmpchar_cmp_rule);
1501 defsubr (&Scmpchar_cmp_rule_p);
1502 defsubr (&Scmpchar_cmp_count);
1503 defsubr (&Scompose_string);
1504 defsubr (&Ssetup_special_charsets);
1505
1506 DEFVAR_LISP ("charset-list", &Vcharset_list,
1507 "List of charsets ever defined.");
1508 Vcharset_list = Fcons (Qascii, Qnil);
1509
1510 DEFVAR_INT ("leading-code-composition", &leading_code_composition,
1511 "Leading-code of composite characters.");
1512 leading_code_composition = LEADING_CODE_COMPOSITION;
1513
1514 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11,
1515 "Leading-code of private TYPE9N charset of column-width 1.");
1516 leading_code_private_11 = LEADING_CODE_PRIVATE_11;
1517
1518 DEFVAR_INT ("leading-code-private-12", &leading_code_private_12,
1519 "Leading-code of private TYPE9N charset of column-width 2.");
1520 leading_code_private_12 = LEADING_CODE_PRIVATE_12;
1521
1522 DEFVAR_INT ("leading-code-private-21", &leading_code_private_21,
1523 "Leading-code of private TYPE9Nx9N charset of column-width 1.");
1524 leading_code_private_21 = LEADING_CODE_PRIVATE_21;
1525
1526 DEFVAR_INT ("leading-code-private-22", &leading_code_private_22,
1527 "Leading-code of private TYPE9Nx9N charset of column-width 2.");
1528 leading_code_private_22 = LEADING_CODE_PRIVATE_22;
1529 }
1530
1531 #endif /* emacs */