Change copyright notices.
[bpt/emacs.git] / src / charset.c
CommitLineData
75c8c592
RS
1/* Basic multilingual character support.
2 Copyright (C) 1995, 1997 Electrotechnical Laboratory, JAPAN.
3 Licensed to the Free Software Foundation.
4ed46869 4
369314dc
KH
5This file is part of GNU Emacs.
6
7GNU Emacs is free software; you can redistribute it and/or modify
8it under the terms of the GNU General Public License as published by
9the Free Software Foundation; either version 2, or (at your option)
10any later version.
4ed46869 11
369314dc
KH
12GNU Emacs is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15GNU General Public License for more details.
4ed46869 16
369314dc
KH
17You should have received a copy of the GNU General Public License
18along with GNU Emacs; see the file COPYING. If not, write to
19the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20Boston, MA 02111-1307, USA. */
4ed46869
KH
21
22/* At first, see the document in `charset.h' to understand the code in
23 this file. */
24
25#include <stdio.h>
26
27#ifdef emacs
28
29#include <sys/types.h>
30#include <config.h>
31#include "lisp.h"
32#include "buffer.h"
33#include "charset.h"
34#include "coding.h"
fc6b09bf 35#include "disptab.h"
4ed46869
KH
36
37#else /* not emacs */
38
39#include "mulelib.h"
40
41#endif /* emacs */
42
43Lisp_Object Qcharset, Qascii, Qcomposition;
44
45/* Declaration of special leading-codes. */
46int leading_code_composition; /* for composite characters */
47int leading_code_private_11; /* for private DIMENSION1 of 1-column */
48int leading_code_private_12; /* for private DIMENSION1 of 2-column */
49int leading_code_private_21; /* for private DIMENSION2 of 1-column */
50int leading_code_private_22; /* for private DIMENSION2 of 2-column */
51
52/* Declaration of special charsets. */
53int charset_ascii; /* ASCII */
54int charset_composition; /* for a composite character */
55int charset_latin_iso8859_1; /* ISO8859-1 (Latin-1) */
56int charset_jisx0208_1978; /* JISX0208.1978 (Japanese Kanji old set) */
57int charset_jisx0208; /* JISX0208.1983 (Japanese Kanji) */
58int charset_katakana_jisx0201; /* JISX0201.Kana (Japanese Katakana) */
59int charset_latin_jisx0201; /* JISX0201.Roman (Japanese Roman) */
60int charset_big5_1; /* Big5 Level 1 (Chinese Traditional) */
61int charset_big5_2; /* Big5 Level 2 (Chinese Traditional) */
62
63Lisp_Object Qcharset_table;
64
65/* A char-table containing information of each character set. */
66Lisp_Object Vcharset_table;
67
68/* A vector of charset symbol indexed by charset-id. This is used
69 only for returning charset symbol from C functions. */
70Lisp_Object Vcharset_symbol_table;
71
72/* A list of charset symbols ever defined. */
73Lisp_Object Vcharset_list;
74
75/* Tables used by macros BYTES_BY_CHAR_HEAD and WIDTH_BY_CHAR_HEAD. */
76int bytes_by_char_head[256];
77int width_by_char_head[256];
78
79/* Mapping table from ISO2022's charset (specified by DIMENSION,
80 CHARS, and FINAL-CHAR) to Emacs' charset. */
81int iso_charset_table[2][2][128];
82
513ee442
KH
83/* Table of pointers to the structure `cmpchar_info' indexed by
84 CMPCHAR-ID. */
85struct cmpchar_info **cmpchar_table;
86/* The current size of `cmpchar_table'. */
87static int cmpchar_table_size;
88/* Number of the current composite characters. */
89int n_cmpchars;
90
4ed46869
KH
91/* Variables used locally in the macro FETCH_MULTIBYTE_CHAR. */
92unsigned char *_fetch_multibyte_char_p;
93int _fetch_multibyte_char_len;
94
95/* Set STR a pointer to the multi-byte form of the character C. If C
96 is not a composite character, the multi-byte form is set in WORKBUF
97 and STR points WORKBUF. The caller should allocate at least 4-byte
98 area at WORKBUF in advance. Returns the length of the multi-byte
bd4c6dd0
KH
99 form. If C is an invalid character to have a multi-byte form,
100 signal an error.
4ed46869
KH
101
102 Use macro `CHAR_STRING (C, WORKBUF, STR)' instead of calling this
103 function directly if C can be an ASCII character. */
104
105int
106non_ascii_char_to_string (c, workbuf, str)
107 int c;
108 unsigned char *workbuf, **str;
109{
6dc0722d 110 int charset, c1, c2;
4ed46869
KH
111
112 if (COMPOSITE_CHAR_P (c))
113 {
114 int cmpchar_id = COMPOSITE_CHAR_ID (c);
115
116 if (cmpchar_id < n_cmpchars)
117 {
118 *str = cmpchar_table[cmpchar_id]->data;
119 return cmpchar_table[cmpchar_id]->len;
120 }
121 else
122 {
bd4c6dd0 123 error ("Invalid characer: %d", c);
4ed46869
KH
124 }
125 }
126
127 SPLIT_NON_ASCII_CHAR (c, charset, c1, c2);
bd4c6dd0
KH
128 if (!charset
129 || ! CHARSET_DEFINED_P (charset)
130 || c1 >= 0 && c1 < 32
131 || c2 >= 0 && c2 < 32)
132 error ("Invalid characer: %d", c);
4ed46869
KH
133
134 *str = workbuf;
135 *workbuf++ = CHARSET_LEADING_CODE_BASE (charset);
136 if (*workbuf = CHARSET_LEADING_CODE_EXT (charset))
137 workbuf++;
138 *workbuf++ = c1 | 0x80;
6dc0722d 139 if (c2 >= 0)
4ed46869
KH
140 *workbuf++ = c2 | 0x80;
141
142 return (workbuf - *str);
143}
144
145/* Return a non-ASCII character of which multi-byte form is at STR of
146 length LEN. If ACTUAL_LEN is not NULL, the actual length of the
147 character is set to the address ACTUAL_LEN.
148
149 Use macro `STRING_CHAR (STR, LEN)' instead of calling this function
150 directly if STR can hold an ASCII character. */
151
152string_to_non_ascii_char (str, len, actual_len)
153 unsigned char *str;
154 int len, *actual_len;
155{
156 int charset;
157 unsigned char c1, c2;
158 register int c;
159
160 if (SPLIT_STRING (str, len, charset, c1, c2) == CHARSET_ASCII)
161 {
162 if (actual_len)
163 *actual_len = 1;
164 return (int) *str;
165 }
166
167 c = MAKE_NON_ASCII_CHAR (charset, c1, c2);
168
169 if (actual_len)
170 *actual_len = (charset == CHARSET_COMPOSITION
171 ? cmpchar_table[COMPOSITE_CHAR_ID (c)]->len
172 : BYTES_BY_CHAR_HEAD (*str));
173 return c;
174}
175
176/* Return the length of the multi-byte form at string STR of length LEN. */
177int
178multibyte_form_length (str, len)
179 unsigned char *str;
180 int len;
181{
182 int charset;
183 unsigned char c1, c2;
184 register int c;
185
186 if (SPLIT_STRING (str, len, charset, c1, c2) == CHARSET_ASCII)
187 return 1;
188
189 return (charset == CHARSET_COMPOSITION
190 ? cmpchar_table[(c1 << 7) | c2]->len
191 : BYTES_BY_CHAR_HEAD (*str));
192}
193
194/* Check if string STR of length LEN contains valid multi-byte form of
195 a character. If valid, charset and position codes of the character
196 is set at *CHARSET, *C1, and *C2, and return 0. If not valid,
197 return -1. This should be used only in the macro SPLIT_STRING
198 which checks range of STR in advance. */
199
200split_non_ascii_string (str, len, charset, c1, c2)
201 register unsigned char *str, *c1, *c2;
202 register int len, *charset;
203{
204 register unsigned int cs = *str++;
205
206 if (cs == LEADING_CODE_COMPOSITION)
207 {
208 int cmpchar_id = str_cmpchar_id (str - 1, len);
209
210 if (cmpchar_id < 0)
211 return -1;
212 *charset = cs, *c1 = cmpchar_id >> 7, *c2 = cmpchar_id & 0x7F;
213 }
214 else if ((cs < LEADING_CODE_PRIVATE_11 || (cs = *str++) >= 0xA0)
215 && CHARSET_DEFINED_P (cs))
216 {
217 *charset = cs;
218 if (*str < 0xA0)
219 return -1;
220 *c1 = (*str++) & 0x7F;
221 if (CHARSET_DIMENSION (cs) == 2)
222 {
223 if (*str < 0xA0)
224 return -1;
225 *c2 = (*str++) & 0x7F;
226 }
227 }
228 else
229 return -1;
230 return 0;
231}
232
23d2a7f1
KH
233/* Return a character unified with C (or a character made of CHARSET,
234 C1, and C2) in unification table TABLE. If no unification is found
235 in TABLE, return C. */
236unify_char (table, c, charset, c1, c2)
237 Lisp_Object table;
238 int c, charset, c1, c2;
239{
240 Lisp_Object ch;
241 int alt_charset, alt_c1, alt_c2, dimension;
242
243 if (c < 0) c = MAKE_CHAR (charset, c1, c2);
244 if (!CHAR_TABLE_P (table)
245 || (ch = Faref (table, make_number (c)), !INTEGERP (ch))
246 || XINT (ch) < 0)
247 return c;
248
249 SPLIT_CHAR (XFASTINT (ch), alt_charset, alt_c1, alt_c2);
250 dimension = CHARSET_DIMENSION (alt_charset);
251 if (dimension == 1 && alt_c1 > 0 || dimension == 2 && alt_c2 > 0)
252 /* CH is not a generic character, just return it. */
253 return XFASTINT (ch);
254
255 /* Since CH is a generic character, we must return a specific
256 charater which has the same position codes as C from CH. */
257 if (charset < 0)
258 SPLIT_CHAR (c, charset, c1, c2);
259 if (dimension != CHARSET_DIMENSION (charset))
260 /* We can't make such a character because of dimension mismatch. */
261 return c;
262 if (!alt_c1) alt_c1 = c1;
263 if (!alt_c2) alt_c2 = c2;
264 return MAKE_CHAR (alt_charset, c1, c2);
265}
266
4ed46869
KH
267/* Update the table Vcharset_table with the given arguments (see the
268 document of `define-charset' for the meaning of each argument).
269 Several other table contents are also updated. The caller should
270 check the validity of CHARSET-ID and the remaining arguments in
271 advance. */
272
273void
274update_charset_table (charset_id, dimension, chars, width, direction,
275 iso_final_char, iso_graphic_plane,
276 short_name, long_name, description)
277 Lisp_Object charset_id, dimension, chars, width, direction;
278 Lisp_Object iso_final_char, iso_graphic_plane;
279 Lisp_Object short_name, long_name, description;
280{
281 int charset = XINT (charset_id);
282 int bytes;
283 unsigned char leading_code_base, leading_code_ext;
284
6dc0722d
KH
285 if (NILP (CHARSET_TABLE_ENTRY (charset)))
286 CHARSET_TABLE_ENTRY (charset)
287 = Fmake_vector (make_number (CHARSET_MAX_IDX), Qnil);
4ed46869
KH
288
289 /* Get byte length of multibyte form, base leading-code, and
290 extended leading-code of the charset. See the comment under the
291 title "GENERAL NOTE on CHARACTER SET (CHARSET)" in charset.h. */
292 bytes = XINT (dimension);
293 if (charset < MIN_CHARSET_PRIVATE_DIMENSION1)
294 {
295 /* Official charset, it doesn't have an extended leading-code. */
296 if (charset != CHARSET_ASCII)
297 bytes += 1; /* For a base leading-code. */
298 leading_code_base = charset;
299 leading_code_ext = 0;
300 }
301 else
302 {
303 /* Private charset. */
304 bytes += 2; /* For base and extended leading-codes. */
305 leading_code_base
306 = (charset < LEADING_CODE_EXT_12
307 ? LEADING_CODE_PRIVATE_11
308 : (charset < LEADING_CODE_EXT_21
309 ? LEADING_CODE_PRIVATE_12
310 : (charset < LEADING_CODE_EXT_22
311 ? LEADING_CODE_PRIVATE_21
312 : LEADING_CODE_PRIVATE_22)));
313 leading_code_ext = charset;
314 }
315
316 CHARSET_TABLE_INFO (charset, CHARSET_ID_IDX) = charset_id;
317 CHARSET_TABLE_INFO (charset, CHARSET_BYTES_IDX) = make_number (bytes);
318 CHARSET_TABLE_INFO (charset, CHARSET_DIMENSION_IDX) = dimension;
319 CHARSET_TABLE_INFO (charset, CHARSET_CHARS_IDX) = chars;
320 CHARSET_TABLE_INFO (charset, CHARSET_WIDTH_IDX) = width;
321 CHARSET_TABLE_INFO (charset, CHARSET_DIRECTION_IDX) = direction;
322 CHARSET_TABLE_INFO (charset, CHARSET_LEADING_CODE_BASE_IDX)
323 = make_number (leading_code_base);
324 CHARSET_TABLE_INFO (charset, CHARSET_LEADING_CODE_EXT_IDX)
325 = make_number (leading_code_ext);
326 CHARSET_TABLE_INFO (charset, CHARSET_ISO_FINAL_CHAR_IDX) = iso_final_char;
327 CHARSET_TABLE_INFO (charset, CHARSET_ISO_GRAPHIC_PLANE_IDX)
328 = iso_graphic_plane;
329 CHARSET_TABLE_INFO (charset, CHARSET_SHORT_NAME_IDX) = short_name;
330 CHARSET_TABLE_INFO (charset, CHARSET_LONG_NAME_IDX) = long_name;
331 CHARSET_TABLE_INFO (charset, CHARSET_DESCRIPTION_IDX) = description;
332 CHARSET_TABLE_INFO (charset, CHARSET_PLIST_IDX) = Qnil;
333
334 {
335 /* If we have already defined a charset which has the same
336 DIMENSION, CHARS and ISO-FINAL-CHAR but the different
337 DIRECTION, we must update the entry REVERSE-CHARSET of both
338 charsets. If there's no such charset, the value of the entry
339 is set to nil. */
340 int i;
341
513ee442 342 for (i = 0; i <= MAX_CHARSET; i++)
4ed46869
KH
343 if (!NILP (CHARSET_TABLE_ENTRY (i)))
344 {
345 if (CHARSET_DIMENSION (i) == XINT (dimension)
346 && CHARSET_CHARS (i) == XINT (chars)
347 && CHARSET_ISO_FINAL_CHAR (i) == XINT (iso_final_char)
348 && CHARSET_DIRECTION (i) != XINT (direction))
349 {
350 CHARSET_TABLE_INFO (charset, CHARSET_REVERSE_CHARSET_IDX)
351 = make_number (i);
352 CHARSET_TABLE_INFO (i, CHARSET_REVERSE_CHARSET_IDX) = charset_id;
353 break;
354 }
355 }
513ee442 356 if (i > MAX_CHARSET)
4ed46869
KH
357 /* No such a charset. */
358 CHARSET_TABLE_INFO (charset, CHARSET_REVERSE_CHARSET_IDX)
359 = make_number (-1);
360 }
361
362 if (charset != CHARSET_ASCII
363 && charset < MIN_CHARSET_PRIVATE_DIMENSION1)
364 {
365 /* Update tables bytes_by_char_head and width_by_char_head. */
366 bytes_by_char_head[leading_code_base] = bytes;
367 width_by_char_head[leading_code_base] = XINT (width);
368
369 /* Update table emacs_code_class. */
370 emacs_code_class[charset] = (bytes == 2
371 ? EMACS_leading_code_2
372 : (bytes == 3
373 ? EMACS_leading_code_3
374 : EMACS_leading_code_4));
375 }
376
377 /* Update table iso_charset_table. */
378 if (ISO_CHARSET_TABLE (dimension, chars, iso_final_char) < 0)
379 ISO_CHARSET_TABLE (dimension, chars, iso_final_char) = charset;
380}
381
382#ifdef emacs
383
384/* Return charset id of CHARSET_SYMBOL, or return -1 if CHARSET_SYMBOL
385 is invalid. */
386int
387get_charset_id (charset_symbol)
388 Lisp_Object charset_symbol;
389{
390 Lisp_Object val;
391 int charset;
392
393 return ((SYMBOLP (charset_symbol)
394 && (val = Fget (charset_symbol, Qcharset), VECTORP (val))
395 && (charset = XINT (XVECTOR (val)->contents[CHARSET_ID_IDX]),
396 CHARSET_VALID_P (charset)))
397 ? charset : -1);
398}
399
400/* Return an identification number for a new private charset of
401 DIMENSION and WIDTH. If there's no more room for the new charset,
402 return 0. */
403Lisp_Object
404get_new_private_charset_id (dimension, width)
405 int dimension, width;
406{
407 int charset, from, to;
408
409 if (dimension == 1)
410 {
411 if (width == 1)
412 from = LEADING_CODE_EXT_11, to = LEADING_CODE_EXT_12;
413 else
414 from = LEADING_CODE_EXT_12, to = LEADING_CODE_EXT_21;
415 }
416 else
417 {
418 if (width == 1)
419 from = LEADING_CODE_EXT_21, to = LEADING_CODE_EXT_22;
420 else
421 from = LEADING_CODE_EXT_22, to = LEADING_CODE_EXT_MAX - 1;
422 }
423
424 for (charset = from; charset < to; charset++)
425 if (!CHARSET_DEFINED_P (charset)) break;
426
427 return make_number (charset < to ? charset : 0);
428}
429
430DEFUN ("define-charset", Fdefine_charset, Sdefine_charset, 3, 3, 0,
431 "Define CHARSET-ID as the identification number of CHARSET with INFO-VECTOR.\n\
23d2a7f1 432If CHARSET-ID is nil, it is decided automatically, which means CHARSET is\n\
4ed46869
KH
433 treated as a private charset.\n\
434INFO-VECTOR is a vector of the format:\n\
435 [DIMENSION CHARS WIDTH DIRECTION ISO-FINAL-CHAR ISO-GRAPHIC-PLANE\n\
436 SHORT-NAME LONG-NAME DESCRIPTION]\n\
437The meanings of each elements is as follows:\n\
438DIMENSION (integer) is the number of bytes to represent a character: 1 or 2.\n\
439CHARS (integer) is the number of characters in a dimension: 94 or 96.\n\
440WIDTH (integer) is the number of columns a character in the charset\n\
441occupies on the screen: one of 0, 1, and 2.\n\
442\n\
443DIRECTION (integer) is the rendering direction of characters in the\n\
444charset when rendering. If 0, render from right to left, else\n\
445render from left to right.\n\
446\n\
447ISO-FINAL-CHAR (character) is the final character of the\n\
448corresponding ISO 2022 charset.\n\
449\n\
450ISO-GRAPHIC-PLANE (integer) is the graphic plane to be invoked\n\
451while encoding to variants of ISO 2022 coding system, one of the\n\
452following: 0/graphic-plane-left(GL), 1/graphic-plane-right(GR).\n\
453\n\
454SHORT-NAME (string) is the short name to refer to the charset.\n\
455\n\
456LONG-NAME (string) is the long name to refer to the charset.\n\
457\n\
458DESCRIPTION (string) is the description string of the charset.")
459 (charset_id, charset_symbol, info_vector)
460 Lisp_Object charset_id, charset_symbol, info_vector;
461{
462 Lisp_Object *vec;
463
464 if (!NILP (charset_id))
465 CHECK_NUMBER (charset_id, 0);
466 CHECK_SYMBOL (charset_symbol, 1);
467 CHECK_VECTOR (info_vector, 2);
468
469 if (! NILP (charset_id))
470 {
471 if (! CHARSET_VALID_P (XINT (charset_id)))
472 error ("Invalid CHARSET: %d", XINT (charset_id));
473 else if (CHARSET_DEFINED_P (XINT (charset_id)))
474 error ("Already defined charset: %d", XINT (charset_id));
475 }
476
477 vec = XVECTOR (info_vector)->contents;
478 if (XVECTOR (info_vector)->size != 9
479 || !INTEGERP (vec[0]) || !(XINT (vec[0]) == 1 || XINT (vec[0]) == 2)
480 || !INTEGERP (vec[1]) || !(XINT (vec[1]) == 94 || XINT (vec[1]) == 96)
481 || !INTEGERP (vec[2]) || !(XINT (vec[2]) == 1 || XINT (vec[2]) == 2)
482 || !INTEGERP (vec[3]) || !(XINT (vec[3]) == 0 || XINT (vec[3]) == 1)
483 || !INTEGERP (vec[4]) || !(XINT (vec[4]) >= '0' && XINT (vec[4]) <= '~')
484 || !INTEGERP (vec[5]) || !(XINT (vec[5]) == 0 || XINT (vec[5]) == 1)
485 || !STRINGP (vec[6])
486 || !STRINGP (vec[7])
487 || !STRINGP (vec[8]))
488 error ("Invalid info-vector argument for defining charset %s",
489 XSYMBOL (charset_symbol)->name->data);
490
491 if (NILP (charset_id))
492 {
493 charset_id = get_new_private_charset_id (XINT (vec[0]), XINT (vec[2]));
494 if (XINT (charset_id) == 0)
495 error ("There's no room for a new private charset %s",
496 XSYMBOL (charset_symbol)->name->data);
497 }
498
499 update_charset_table (charset_id, vec[0], vec[1], vec[2], vec[3],
500 vec[4], vec[5], vec[6], vec[7], vec[8]);
6dc0722d 501 Fput (charset_symbol, Qcharset, CHARSET_TABLE_ENTRY (XINT (charset_id)));
4ed46869
KH
502 CHARSET_SYMBOL (XINT (charset_id)) = charset_symbol;
503 Vcharset_list = Fcons (charset_symbol, Vcharset_list);
504 return Qnil;
505}
506
507DEFUN ("declare-equiv-charset", Fdeclare_equiv_charset, Sdeclare_equiv_charset,
508 4, 4, 0,
509 "Declare a charset of DIMENSION, CHARS, FINAL-CHAR is the same as CHARSET.\n\
510CHARSET should be defined by `defined-charset' in advance.")
511 (dimension, chars, final_char, charset_symbol)
512 Lisp_Object dimension, chars, final_char, charset_symbol;
513{
514 int charset;
515
516 CHECK_NUMBER (dimension, 0);
517 CHECK_NUMBER (chars, 1);
518 CHECK_NUMBER (final_char, 2);
519 CHECK_SYMBOL (charset_symbol, 3);
520
521 if (XINT (dimension) != 1 && XINT (dimension) != 2)
522 error ("Invalid DIMENSION %d, it should be 1 or 2", XINT (dimension));
523 if (XINT (chars) != 94 && XINT (chars) != 96)
524 error ("Invalid CHARS %d, it should be 94 or 96", XINT (chars));
525 if (XINT (final_char) < '0' || XFASTINT (final_char) > '~')
526 error ("Invalid FINAL-CHAR %c, it should be `0'..`~'", XINT (chars));
527 if ((charset = get_charset_id (charset_symbol)) < 0)
528 error ("Invalid charset %s", XSYMBOL (charset_symbol)->name->data);
529
530 ISO_CHARSET_TABLE (dimension, chars, final_char) = charset;
531 return Qnil;
532}
533
534/* Return number of different charsets in STR of length LEN. In
535 addition, for each found charset N, CHARSETS[N] is set 1. The
a29e3b1b 536 caller should allocate CHARSETS (MAX_CHARSET + 1 elements) in advance.
23d2a7f1 537 It may lookup a unification table TABLE if supplied. */
4ed46869
KH
538
539int
23d2a7f1 540find_charset_in_str (str, len, charsets, table)
028d516b
KH
541 unsigned char *str;
542 int len, *charsets;
23d2a7f1 543 Lisp_Object table;
4ed46869
KH
544{
545 int num = 0;
546
23d2a7f1
KH
547 if (! CHAR_TABLE_P (table))
548 table = Qnil;
549
4ed46869
KH
550 while (len > 0)
551 {
552 int bytes = BYTES_BY_CHAR_HEAD (*str);
23d2a7f1
KH
553 int charset;
554
555 if (NILP (table))
556 charset = CHARSET_AT (str);
557 else
558 {
559 int c, charset;
560 unsigned char c1, c2;
561
562 SPLIT_STRING(str, bytes, charset, c1, c2);
563 if ((c = unify_char (table, -1, charset, c1, c2)) >= 0)
564 charset = CHAR_CHARSET (c);
565 }
4ed46869
KH
566
567 if (!charsets[charset])
568 {
569 charsets[charset] = 1;
570 num += 1;
571 }
572 str += bytes;
573 len -= bytes;
574 }
575 return num;
576}
577
578DEFUN ("find-charset-region", Ffind_charset_region, Sfind_charset_region,
23d2a7f1 579 2, 3, 0,
4ed46869 580 "Return a list of charsets in the region between BEG and END.\n\
23d2a7f1
KH
581BEG and END are buffer positions.\n\
582Optional arg TABLE if non-nil is a unification table to look up.")
583 (beg, end, table)
584 Lisp_Object beg, end, table;
4ed46869 585{
028d516b 586 int charsets[MAX_CHARSET + 1];
4ed46869
KH
587 int from, to, stop, i;
588 Lisp_Object val;
589
590 validate_region (&beg, &end);
591 from = XFASTINT (beg);
592 stop = to = XFASTINT (end);
593 if (from < GPT && GPT < to)
594 stop = GPT;
028d516b 595 bzero (charsets, (MAX_CHARSET + 1) * sizeof (int));
4ed46869
KH
596 while (1)
597 {
23d2a7f1 598 find_charset_in_str (POS_ADDR (from), stop - from, charsets, table);
4ed46869
KH
599 if (stop < to)
600 from = stop, stop = to;
601 else
602 break;
603 }
604 val = Qnil;
513ee442 605 for (i = MAX_CHARSET; i >= 0; i--)
4ed46869
KH
606 if (charsets[i])
607 val = Fcons (CHARSET_SYMBOL (i), val);
608 return val;
609}
610
611DEFUN ("find-charset-string", Ffind_charset_string, Sfind_charset_string,
23d2a7f1
KH
612 1, 2, 0,
613 "Return a list of charsets in STR.\n\
614Optional arg TABLE if non-nil is a unification table to look up.")
615 (str, table)
616 Lisp_Object str, table;
4ed46869 617{
a29e3b1b 618 int charsets[MAX_CHARSET + 1];
4ed46869
KH
619 int i;
620 Lisp_Object val;
621
622 CHECK_STRING (str, 0);
a29e3b1b 623 bzero (charsets, (MAX_CHARSET + 1) * sizeof (int));
23d2a7f1
KH
624 find_charset_in_str (XSTRING (str)->data, XSTRING (str)->size,
625 charsets, table);
4ed46869 626 val = Qnil;
513ee442 627 for (i = MAX_CHARSET; i >= 0; i--)
4ed46869
KH
628 if (charsets[i])
629 val = Fcons (CHARSET_SYMBOL (i), val);
630 return val;
631}
632\f
633DEFUN ("make-char-internal", Fmake_char_internal, Smake_char_internal, 1, 3, 0,
513ee442 634 "")
4ed46869
KH
635 (charset, code1, code2)
636 Lisp_Object charset, code1, code2;
637{
638 CHECK_NUMBER (charset, 0);
639
640 if (NILP (code1))
641 XSETFASTINT (code1, 0);
642 else
643 CHECK_NUMBER (code1, 1);
644 if (NILP (code2))
645 XSETFASTINT (code2, 0);
646 else
647 CHECK_NUMBER (code2, 2);
648
649 if (!CHARSET_DEFINED_P (XINT (charset)))
650 error ("Invalid charset: %d", XINT (charset));
651
652 return make_number (MAKE_CHAR (XINT (charset), XINT (code1), XINT (code2)));
653}
654
655DEFUN ("split-char", Fsplit_char, Ssplit_char, 1, 1, 0,
656 "Return list of charset and one or two position-codes of CHAR.")
657 (ch)
658 Lisp_Object ch;
659{
660 Lisp_Object val;
6dc0722d 661 int charset, c1, c2;
4ed46869
KH
662
663 CHECK_NUMBER (ch, 0);
664 SPLIT_CHAR (XFASTINT (ch), charset, c1, c2);
6dc0722d 665 return (c2 >= 0
4ed46869
KH
666 ? Fcons (CHARSET_SYMBOL (charset),
667 Fcons (make_number (c1), Fcons (make_number (c2), Qnil)))
668 : Fcons (CHARSET_SYMBOL (charset), Fcons (make_number (c1), Qnil)));
669}
670
671DEFUN ("char-charset", Fchar_charset, Schar_charset, 1, 1, 0,
672 "Return charset of CHAR.")
673 (ch)
674 Lisp_Object ch;
675{
676 CHECK_NUMBER (ch, 0);
677
678 return CHARSET_SYMBOL (CHAR_CHARSET (XINT (ch)));
679}
680
681DEFUN ("iso-charset", Fiso_charset, Siso_charset, 3, 3, 0,
682 "Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR.")
683 (dimension, chars, final_char)
684 Lisp_Object dimension, chars, final_char;
685{
686 int charset;
687
688 CHECK_NUMBER (dimension, 0);
689 CHECK_NUMBER (chars, 1);
690 CHECK_NUMBER (final_char, 2);
691
692 if ((charset = ISO_CHARSET_TABLE (dimension, chars, final_char)) < 0)
693 return Qnil;
694 return CHARSET_SYMBOL (charset);
695}
696
697DEFUN ("char-bytes", Fchar_bytes, Schar_bytes, 1, 1, 0,
698 "Return byte length of multi-byte form of CHAR.")
699 (ch)
700 Lisp_Object ch;
701{
702 Lisp_Object val;
703 int bytes;
704
705 CHECK_NUMBER (ch, 0);
706 if (COMPOSITE_CHAR_P (XFASTINT (ch)))
707 {
708 unsigned int id = COMPOSITE_CHAR_ID (XFASTINT (ch));
709
710 bytes = (id < n_cmpchars ? cmpchar_table[id]->len : 1);
711 }
712 else
713 {
714 int charset = CHAR_CHARSET (XFASTINT (ch));
715
716 bytes = CHARSET_DEFINED_P (charset) ? CHARSET_BYTES (charset) : 1;
717 }
718
719 XSETFASTINT (val, bytes);
720 return val;
721}
722
723/* Return the width of character of which multi-byte form starts with
724 C. The width is measured by how many columns occupied on the
725 screen when displayed in the current buffer. */
726
727#define ONE_BYTE_CHAR_WIDTH(c) \
728 (c < 0x20 \
729 ? (c == '\t' \
53316e55 730 ? XFASTINT (current_buffer->tab_width) \
4ed46869
KH
731 : (c == '\n' ? 0 : (NILP (current_buffer->ctl_arrow) ? 4 : 2))) \
732 : (c < 0x7f \
733 ? 1 \
734 : (c == 0x7F \
735 ? (NILP (current_buffer->ctl_arrow) ? 4 : 2) \
736 : ((! NILP (current_buffer->enable_multibyte_characters) \
737 && BASE_LEADING_CODE_P (c)) \
738 ? WIDTH_BY_CHAR_HEAD (c) \
739 : 4)))) \
740
741
742DEFUN ("char-width", Fchar_width, Schar_width, 1, 1, 0,
743 "Return width of CHAR when displayed in the current buffer.\n\
744The width is measured by how many columns it occupies on the screen.")
745 (ch)
746 Lisp_Object ch;
747{
859f2b3c 748 Lisp_Object val, disp;
4ed46869
KH
749 int c;
750
751 CHECK_NUMBER (ch, 0);
752
859f2b3c
RS
753 c = XINT (ch);
754
755 /* Get the way the display table would display it. */
756 disp = DISP_CHAR_VECTOR (buffer_display_table (current_buffer), (c));
757
758 if (VECTORP (disp))
759 XSETINT (val, XVECTOR (disp)->size);
760 else if (SINGLE_BYTE_CHAR_P (c))
761 XSETINT (val, ONE_BYTE_CHAR_WIDTH (c));
4ed46869
KH
762 else if (COMPOSITE_CHAR_P (c))
763 {
764 int id = COMPOSITE_CHAR_ID (XFASTINT (ch));
765 XSETFASTINT (val, (id < n_cmpchars ? cmpchar_table[id]->width : 0));
766 }
767 else
768 {
769 int charset = CHAR_CHARSET (c);
770
771 XSETFASTINT (val, CHARSET_WIDTH (charset));
772 }
773 return val;
774}
775
776/* Return width of string STR of length LEN when displayed in the
777 current buffer. The width is measured by how many columns it
778 occupies on the screen. */
859f2b3c 779
4ed46869
KH
780int
781strwidth (str, len)
782 unsigned char *str;
783 int len;
784{
785 unsigned char *endp = str + len;
786 int width = 0;
859f2b3c 787 struct Lisp_Char_Table *dp = buffer_display_table (current_buffer);
4ed46869 788
859f2b3c
RS
789 while (str < endp)
790 {
791 if (*str == LEADING_CODE_COMPOSITION)
792 {
793 int id = str_cmpchar_id (str, endp - str);
794
795 if (id < 0)
796 {
797 width += 4;
798 str++;
799 }
800 else
801 {
802 width += cmpchar_table[id]->width;
803 str += cmpchar_table[id]->len;
804 }
805 }
806 else
807 {
808 Lisp_Object disp;
809 int thiswidth;
810 int c = STRING_CHAR (str, endp - str);
811
812 /* Get the way the display table would display it. */
acc35c36
RS
813 if (dp)
814 disp = DISP_CHAR_VECTOR (dp, c);
815 else
816 disp = Qnil;
859f2b3c
RS
817
818 if (VECTORP (disp))
819 thiswidth = XVECTOR (disp)->size;
820 else
821 thiswidth = ONE_BYTE_CHAR_WIDTH (*str);
822
823 width += thiswidth;
824 str += BYTES_BY_CHAR_HEAD (*str);
825 }
826 }
4ed46869
KH
827 return width;
828}
829
830DEFUN ("string-width", Fstring_width, Sstring_width, 1, 1, 0,
831 "Return width of STRING when displayed in the current buffer.\n\
832Width is measured by how many columns it occupies on the screen.\n\
833When calculating width of a multi-byte character in STRING,\n\
834 only the base leading-code is considered and the validity of\n\
835 the following bytes are not checked.")
836 (str)
837 Lisp_Object str;
838{
839 Lisp_Object val;
840
841 CHECK_STRING (str, 0);
842 XSETFASTINT (val, strwidth (XSTRING (str)->data, XSTRING (str)->size));
843 return val;
844}
845
846DEFUN ("char-direction", Fchar_direction, Schar_direction, 1, 1, 0,
847 "Return the direction of CHAR.\n\
848The returned value is 0 for left-to-right and 1 for right-to-left.")
849 (ch)
850 Lisp_Object ch;
851{
852 int charset;
853
854 CHECK_NUMBER (ch, 0);
855 charset = CHAR_CHARSET (XFASTINT (ch));
856 if (!CHARSET_DEFINED_P (charset))
857 error ("Invalid character: %d", XINT (ch));
858 return CHARSET_TABLE_INFO (charset, CHARSET_DIRECTION_IDX);
859}
860
861DEFUN ("chars-in-string", Fchars_in_string, Schars_in_string, 1, 1, 0,
862 "Return number of characters in STRING.")
863 (str)
864 Lisp_Object str;
865{
866 Lisp_Object val;
867 unsigned char *p, *endp;
868 int chars;
869
870 CHECK_STRING (str, 0);
871
872 p = XSTRING (str)->data; endp = p + XSTRING (str)->size;
873 chars = 0;
874 while (p < endp)
875 {
876 if (*p == LEADING_CODE_COMPOSITION)
877 {
878 p++;
879 while (p < endp && ! CHAR_HEAD_P (p)) p++;
880 }
881 else
882 p += BYTES_BY_CHAR_HEAD (*p);
883 chars++;
884 }
885
886 XSETFASTINT (val, chars);
887 return val;
888}
889
890DEFUN ("char-boundary-p", Fchar_boundary_p, Schar_boundary_p, 1, 1, 0,
891 "Return non-nil value if POS is at character boundary of multibyte form.\n\
892The return value is:\n\
893 0 if POS is at an ASCII character or at the end of range,\n\
894 1 if POS is at a head of 2-byte length multi-byte form,\n\
895 2 if POS is at a head of 3-byte length multi-byte form,\n\
896 3 if POS is at a head of 4-byte length multi-byte form,\n\
897 4 if POS is at a head of multi-byte form of a composite character.\n\
898If POS is out of range or not at character boundary, return NIL.")
899 (pos)
900 Lisp_Object pos;
901{
902 Lisp_Object val;
903 int n;
904
905 CHECK_NUMBER_COERCE_MARKER (pos, 0);
906
907 n = XINT (pos);
908 if (n < BEGV || n > ZV)
909 return Qnil;
910
911 if (n == ZV || NILP (current_buffer->enable_multibyte_characters))
912 XSETFASTINT (val, 0);
913 else
914 {
915 unsigned char *p = POS_ADDR (n);
916
917 if (SINGLE_BYTE_CHAR_P (*p))
918 XSETFASTINT (val, 0);
919 else if (*p == LEADING_CODE_COMPOSITION)
920 XSETFASTINT (val, 4);
921 else if (BYTES_BY_CHAR_HEAD (*p) > 1)
922 XSETFASTINT (val, BYTES_BY_CHAR_HEAD (*p) - 1);
923 else
924 val = Qnil;
925 }
926 return val;
927}
928
929DEFUN ("concat-chars", Fconcat_chars, Sconcat_chars, 1, MANY, 0,
930 "Concatenate all the argument characters and make the result a string.")
53316e55
KH
931 (n, args)
932 int n;
4ed46869
KH
933 Lisp_Object *args;
934{
53316e55 935 int i;
4ed46869 936 unsigned char *buf
bd4c6dd0 937 = (unsigned char *) alloca (MAX_LENGTH_OF_MULTI_BYTE_FORM * n);
4ed46869
KH
938 unsigned char *p = buf;
939 Lisp_Object val;
940
941 for (i = 0; i < n; i++)
942 {
943 int c, len;
944 unsigned char *str;
945
946 if (!INTEGERP (args[i]))
947 {
948 free (buf);
949 CHECK_NUMBER (args[i], 0);
950 }
951 c = XINT (args[i]);
952 len = CHAR_STRING (c, p, str);
953 if (p != str)
954 /* C is a composite character. */
955 bcopy (str, p, len);
956 p += len;
957 }
958
959 val = make_string (buf, p - buf);
4ed46869
KH
960 return val;
961}
962
963#endif /* emacs */
964\f
965/*** Composite characters staffs ***/
966
967/* Each composite character is identified by CMPCHAR-ID which is
968 assigned when Emacs needs the character code of the composite
969 character (e.g. when displaying it on the screen). See the
970 document "GENERAL NOTE on COMPOSITE CHARACTER" in `charset.h' how a
971 composite character is represented in Emacs. */
972
973/* If `static' is defined, it means that it is defined to null string. */
974#ifndef static
975/* The following function is copied from lread.c. */
976static int
977hash_string (ptr, len)
978 unsigned char *ptr;
979 int len;
980{
981 register unsigned char *p = ptr;
982 register unsigned char *end = p + len;
983 register unsigned char c;
984 register int hash = 0;
985
986 while (p != end)
987 {
988 c = *p++;
989 if (c >= 0140) c -= 40;
990 hash = ((hash<<3) + (hash>>28) + c);
991 }
992 return hash & 07777777777;
993}
994#endif
995
4ed46869
KH
996#define CMPCHAR_HASH_TABLE_SIZE 0xFFF
997
998static int *cmpchar_hash_table[CMPCHAR_HASH_TABLE_SIZE];
999
1000/* Each element of `cmpchar_hash_table' is a pointer to an array of
1001 integer, where the 1st element is the size of the array, the 2nd
1002 element is how many elements are actually used in the array, and
1003 the remaining elements are CMPCHAR-IDs of composite characters of
1004 the same hash value. */
1005#define CMPCHAR_HASH_SIZE(table) table[0]
1006#define CMPCHAR_HASH_USED(table) table[1]
1007#define CMPCHAR_HASH_CMPCHAR_ID(table, i) table[i]
1008
1009/* Return CMPCHAR-ID of the composite character in STR of the length
1010 LEN. If the composite character has not yet been registered,
1011 register it in `cmpchar_table' and assign new CMPCHAR-ID. This
1012 is the sole function for assigning CMPCHAR-ID. */
1013int
1014str_cmpchar_id (str, len)
1015 unsigned char *str;
1016 int len;
1017{
1018 int hash_idx, *hashp;
1019 unsigned char *buf;
1020 int embedded_rule; /* 1 if composition rule is embedded. */
1021 int chars; /* number of components. */
1022 int i;
1023 struct cmpchar_info *cmpcharp;
1024
1025 if (len < 5)
1026 /* Any composite char have at least 3-byte length. */
1027 return -1;
1028
1029 /* The second byte 0xFF means compostion rule is embedded. */
1030 embedded_rule = (str[1] == 0xFF);
1031
1032 /* At first, get the actual length of the composite character. */
1033 {
1034 unsigned char *p, *endp = str + 1, *lastp = str + len;
1035 int bytes;
1036
1037 while (endp < lastp && ! CHAR_HEAD_P (endp)) endp++;
1038 chars = 0;
1039 p = str + 1 + embedded_rule;
1040 while (p < endp)
1041 {
1042 /* No need of checking if *P is 0xA0 because
1043 BYTES_BY_CHAR_HEAD (0x80) surely returns 2. */
1044 p += (bytes = BYTES_BY_CHAR_HEAD (*p - 0x20) + embedded_rule);
1045 chars++;
1046 }
1047 len = (p -= embedded_rule) - str;
1048 if (p > endp)
1049 len -= - bytes, chars--;
1050
1051 if (chars < 2 || chars > MAX_COMPONENT_COUNT)
1052 /* Invalid number of components. */
1053 return -1;
1054 }
1055 hash_idx = hash_string (str, len) % CMPCHAR_HASH_TABLE_SIZE;
1056 hashp = cmpchar_hash_table[hash_idx];
1057
1058 /* Then, look into the hash table. */
1059 if (hashp != NULL)
1060 /* Find the correct one among composite characters of the same
1061 hash value. */
1062 for (i = 2; i < CMPCHAR_HASH_USED (hashp); i++)
1063 {
1064 cmpcharp = cmpchar_table[CMPCHAR_HASH_CMPCHAR_ID (hashp, i)];
1065 if (len == cmpcharp->len
1066 && ! bcmp (str, cmpcharp->data, len))
1067 return CMPCHAR_HASH_CMPCHAR_ID (hashp, i);
1068 }
1069
1070 /* We have to register the composite character in cmpchar_table. */
513ee442
KH
1071 if (n_cmpchars > (CHAR_FIELD2_MASK | CHAR_FIELD3_MASK))
1072 /* No, we have no more room for a new composite character. */
1073 return -1;
1074
4ed46869
KH
1075 /* Make the entry in hash table. */
1076 if (hashp == NULL)
1077 {
1078 /* Make a table for 8 composite characters initially. */
1079 hashp = (cmpchar_hash_table[hash_idx]
1080 = (int *) xmalloc (sizeof (int) * (2 + 8)));
1081 CMPCHAR_HASH_SIZE (hashp) = 10;
1082 CMPCHAR_HASH_USED (hashp) = 2;
1083 }
1084 else if (CMPCHAR_HASH_USED (hashp) >= CMPCHAR_HASH_SIZE (hashp))
1085 {
1086 CMPCHAR_HASH_SIZE (hashp) += 8;
1087 hashp = (cmpchar_hash_table[hash_idx]
1088 = (int *) xrealloc (hashp,
1089 sizeof (int) * CMPCHAR_HASH_SIZE (hashp)));
1090 }
1091 CMPCHAR_HASH_CMPCHAR_ID (hashp, CMPCHAR_HASH_USED (hashp)) = n_cmpchars;
1092 CMPCHAR_HASH_USED (hashp)++;
1093
1094 /* Set information of the composite character in cmpchar_table. */
1095 if (cmpchar_table_size == 0)
1096 {
1097 /* This is the first composite character to be registered. */
1098 cmpchar_table_size = 256;
1099 cmpchar_table
1100 = (struct cmpchar_info **) xmalloc (sizeof (cmpchar_table[0])
1101 * cmpchar_table_size);
1102 }
1103 else if (cmpchar_table_size <= n_cmpchars)
1104 {
1105 cmpchar_table_size += 256;
1106 cmpchar_table
1107 = (struct cmpchar_info **) xrealloc (cmpchar_table,
1108 sizeof (cmpchar_table[0])
1109 * cmpchar_table_size);
1110 }
1111
1112 cmpcharp = (struct cmpchar_info *) xmalloc (sizeof (struct cmpchar_info));
1113
1114 cmpcharp->len = len;
1115 cmpcharp->data = (unsigned char *) xmalloc (len + 1);
1116 bcopy (str, cmpcharp->data, len);
1117 cmpcharp->data[len] = 0;
1118 cmpcharp->glyph_len = chars;
1119 cmpcharp->glyph = (GLYPH *) xmalloc (sizeof (GLYPH) * chars);
1120 if (embedded_rule)
1121 {
1122 cmpcharp->cmp_rule = (unsigned char *) xmalloc (chars);
1123 cmpcharp->col_offset = (float *) xmalloc (sizeof (float) * chars);
1124 }
1125 else
1126 {
1127 cmpcharp->cmp_rule = NULL;
1128 cmpcharp->col_offset = NULL;
1129 }
1130
1131 /* Setup GLYPH data and composition rules (if any) so as not to make
1132 them every time on displaying. */
1133 {
1134 unsigned char *bufp;
1135 int width;
1136 float leftmost = 0.0, rightmost = 1.0;
1137
1138 if (embedded_rule)
1139 /* At first, col_offset[N] is set to relative to col_offset[0]. */
1140 cmpcharp->col_offset[0] = 0;
1141
1142 for (i = 0, bufp = cmpcharp->data + 1; i < chars; i++)
1143 {
1144 if (embedded_rule)
1145 cmpcharp->cmp_rule[i] = *bufp++;
1146
1147 if (*bufp == 0xA0) /* This is an ASCII character. */
1148 {
1149 cmpcharp->glyph[i] = FAST_MAKE_GLYPH ((*++bufp & 0x7F), 0);
1150 width = 1;
1151 bufp++;
1152 }
1153 else /* Multibyte character. */
1154 {
1155 /* Make `bufp' point normal multi-byte form temporally. */
1156 *bufp -= 0x20;
1157 cmpcharp->glyph[i]
1158 = FAST_MAKE_GLYPH (string_to_non_ascii_char (bufp, 4, 0), 0);
1159 width = WIDTH_BY_CHAR_HEAD (*bufp);
1160 *bufp += 0x20;
1161 bufp += BYTES_BY_CHAR_HEAD (*bufp - 0x20);
1162 }
1163
1164 if (embedded_rule && i > 0)
1165 {
1166 /* Reference points (global_ref and new_ref) are
1167 encoded as below:
1168
1169 0--1--2 -- ascent
1170 | |
1171 | |
1172 | 4 -+--- center
1173 -- 3 5 -- baseline
1174 | |
1175 6--7--8 -- descent
1176
1177 Now, we calculate the column offset of the new glyph
1178 from the left edge of the first glyph. This can avoid
1179 the same calculation everytime displaying this
1180 composite character. */
1181
1182 /* Reference points of global glyph and new glyph. */
1183 int global_ref = (cmpcharp->cmp_rule[i] - 0xA0) / 9;
1184 int new_ref = (cmpcharp->cmp_rule[i] - 0xA0) % 9;
1185 /* Column offset relative to the first glyph. */
1186 float left = (leftmost
1187 + (global_ref % 3) * (rightmost - leftmost) / 2.0
1188 - (new_ref % 3) * width / 2.0);
1189
1190 cmpcharp->col_offset[i] = left;
1191 if (left < leftmost)
1192 leftmost = left;
1193 if (left + width > rightmost)
1194 rightmost = left + width;
1195 }
1196 else
1197 {
1198 if (width > rightmost)
1199 rightmost = width;
1200 }
1201 }
1202 if (embedded_rule)
1203 {
1204 /* Now col_offset[N] are relative to the left edge of the
1205 first component. Make them relative to the left edge of
1206 overall glyph. */
1207 for (i = 0; i < chars; i++)
1208 cmpcharp->col_offset[i] -= leftmost;
1209 /* Make rightmost holds width of overall glyph. */
1210 rightmost -= leftmost;
1211 }
1212
1213 cmpcharp->width = rightmost;
1214 if (cmpcharp->width < rightmost)
1215 /* To get a ceiling integer value. */
1216 cmpcharp->width++;
1217 }
1218
1219 cmpchar_table[n_cmpchars] = cmpcharp;
1220
1221 return n_cmpchars++;
1222}
1223
1224/* Return the Nth element of the composite character C. */
1225int
1226cmpchar_component (c, n)
1227 unsigned int c, n;
1228{
1229 int id = COMPOSITE_CHAR_ID (c);
1230
1231 if (id >= n_cmpchars /* C is not a valid composite character. */
1232 || n >= cmpchar_table[id]->glyph_len) /* No such component. */
1233 return -1;
1234 /* No face data is stored in glyph code. */
1235 return ((int) (cmpchar_table[id]->glyph[n]));
1236}
1237
1238DEFUN ("cmpcharp", Fcmpcharp, Scmpcharp, 1, 1, 0,
1239 "T if CHAR is a composite character.")
1240 (ch)
1241 Lisp_Object ch;
1242{
1243 CHECK_NUMBER (ch, 0);
1244 return (COMPOSITE_CHAR_P (XINT (ch)) ? Qt : Qnil);
1245}
1246
1247DEFUN ("composite-char-component", Fcmpchar_component, Scmpchar_component,
1248 2, 2, 0,
1249 "Return the IDXth component character of composite character CHARACTER.")
1250 (character, idx)
1251 Lisp_Object character, idx;
1252{
1253 int c;
1254
1255 CHECK_NUMBER (character, 0);
1256 CHECK_NUMBER (idx, 1);
1257
1258 if ((c = cmpchar_component (XINT (character), XINT (idx))) < 0)
1259 args_out_of_range (character, idx);
1260
1261 return make_number (c);
1262}
1263
1264DEFUN ("composite-char-composition-rule", Fcmpchar_cmp_rule, Scmpchar_cmp_rule,
1265 2, 2, 0,
55001746
KH
1266 "Return the Nth composition rule embedded in composite character CHARACTER.\n\
1267The returned rule is for composing the Nth component\n\
1268on the (N-1)th component. If N is 0, the returned value is always 255.")
1269 (character, n)
1270 Lisp_Object character, n;
4ed46869
KH
1271{
1272 int id, i;
1273
1274 CHECK_NUMBER (character, 0);
55001746 1275 CHECK_NUMBER (n, 1);
4ed46869
KH
1276
1277 id = COMPOSITE_CHAR_ID (XINT (character));
1278 if (id < 0 || id >= n_cmpchars)
1279 error ("Invalid composite character: %d", XINT (character));
55001746 1280 i = XINT (n);
4ed46869 1281 if (i > cmpchar_table[id]->glyph_len)
55001746 1282 args_out_of_range (character, n);
4ed46869
KH
1283
1284 return make_number (cmpchar_table[id]->cmp_rule[i]);
1285}
1286
1287DEFUN ("composite-char-composition-rule-p", Fcmpchar_cmp_rule_p,
1288 Scmpchar_cmp_rule_p, 1, 1, 0,
1289 "Return non-nil if composite character CHARACTER contains a embedded rule.")
1290 (character)
1291 Lisp_Object character;
1292{
1293 int id;
1294
1295 CHECK_NUMBER (character, 0);
1296 id = COMPOSITE_CHAR_ID (XINT (character));
1297 if (id < 0 || id >= n_cmpchars)
1298 error ("Invalid composite character: %d", XINT (character));
1299
1300 return (cmpchar_table[id]->cmp_rule ? Qt : Qnil);
1301}
1302
1303DEFUN ("composite-char-component-count", Fcmpchar_cmp_count,
1304 Scmpchar_cmp_count, 1, 1, 0,
1305 "Return number of compoents of composite character CHARACTER.")
1306 (character)
1307 Lisp_Object character;
1308{
1309 int id;
1310
1311 CHECK_NUMBER (character, 0);
1312 id = COMPOSITE_CHAR_ID (XINT (character));
1313 if (id < 0 || id >= n_cmpchars)
1314 error ("Invalid composite character: %d", XINT (character));
1315
1316 return (make_number (cmpchar_table[id]->glyph_len));
1317}
1318
1319DEFUN ("compose-string", Fcompose_string, Scompose_string,
1320 1, 1, 0,
1321 "Return one char string composed from all characters in STRING.")
1322 (str)
1323 Lisp_Object str;
1324{
1325 unsigned char buf[MAX_LENGTH_OF_MULTI_BYTE_FORM], *p, *pend, *ptemp;
1326 int len, i;
1327
1328 CHECK_STRING (str, 0);
1329
1330 buf[0] = LEADING_CODE_COMPOSITION;
1331 p = XSTRING (str)->data;
1332 pend = p + XSTRING (str)->size;
1333 i = 1;
1334 while (p < pend)
1335 {
1336 if (*p < 0x20 || *p == 127) /* control code */
1337 error ("Invalid component character: %d", *p);
1338 else if (*p < 0x80) /* ASCII */
1339 {
1340 if (i + 2 >= MAX_LENGTH_OF_MULTI_BYTE_FORM)
1341 error ("Too long string to be composed: %s", XSTRING (str)->data);
1342 /* Prepend an ASCII charset indicator 0xA0, set MSB of the
1343 code itself. */
1344 buf[i++] = 0xA0;
1345 buf[i++] = *p++ + 0x80;
1346 }
1347 else if (*p == LEADING_CODE_COMPOSITION) /* composite char */
1348 {
1349 /* Already composed. Eliminate the heading
1350 LEADING_CODE_COMPOSITION, keep the remaining bytes
1351 unchanged. */
1352 p++;
1353 ptemp = p;
1354 while (! CHAR_HEAD_P (p)) p++;
1355 if (i + (p - ptemp) >= MAX_LENGTH_OF_MULTI_BYTE_FORM)
1356 error ("Too long string to be composed: %s", XSTRING (str)->data);
1357 bcopy (ptemp, buf + i, p - ptemp);
1358 i += p - ptemp;
1359 }
1360 else /* multibyte char */
1361 {
1362 /* Add 0x20 to the base leading-code, keep the remaining
1363 bytes unchanged. */
1364 len = BYTES_BY_CHAR_HEAD (*p);
1365 if (i + len >= MAX_LENGTH_OF_MULTI_BYTE_FORM)
1366 error ("Too long string to be composed: %s", XSTRING (str)->data);
1367 bcopy (p, buf + i, len);
1368 buf[i] += 0x20;
1369 p += len, i += len;
1370 }
1371 }
1372
1373 if (i < 5)
1374 /* STR contains only one character, which can't be composed. */
1375 error ("Too short string to be composed: %s", XSTRING (str)->data);
1376
1377 return make_string (buf, i);
1378}
1379
1380\f
1381charset_id_internal (charset_name)
1382 char *charset_name;
1383{
1384 Lisp_Object val = Fget (intern (charset_name), Qcharset);
1385
1386 if (!VECTORP (val))
1387 error ("Charset %s is not defined", charset_name);
1388
1389 return (XINT (XVECTOR (val)->contents[0]));
1390}
1391
1392DEFUN ("setup-special-charsets", Fsetup_special_charsets,
1393 Ssetup_special_charsets, 0, 0, 0, "Internal use only.")
1394 ()
1395{
1396 charset_latin_iso8859_1 = charset_id_internal ("latin-iso8859-1");
1397 charset_jisx0208_1978 = charset_id_internal ("japanese-jisx0208-1978");
1398 charset_jisx0208 = charset_id_internal ("japanese-jisx0208");
1399 charset_katakana_jisx0201 = charset_id_internal ("katakana-jisx0201");
1400 charset_latin_jisx0201 = charset_id_internal ("latin-jisx0201");
1401 charset_big5_1 = charset_id_internal ("chinese-big5-1");
1402 charset_big5_2 = charset_id_internal ("chinese-big5-2");
1403 return Qnil;
1404}
1405
1406init_charset_once ()
1407{
1408 int i, j, k;
1409
1410 staticpro (&Vcharset_table);
1411 staticpro (&Vcharset_symbol_table);
1412
1413 /* This has to be done here, before we call Fmake_char_table. */
1414 Qcharset_table = intern ("charset-table");
1415 staticpro (&Qcharset_table);
1416
1417 /* Intern this now in case it isn't already done.
1418 Setting this variable twice is harmless.
1419 But don't staticpro it here--that is done in alloc.c. */
1420 Qchar_table_extra_slots = intern ("char-table-extra-slots");
1421
1422 /* Now we are ready to set up this property, so we can
1423 create the charset table. */
1424 Fput (Qcharset_table, Qchar_table_extra_slots, make_number (0));
1425 Vcharset_table = Fmake_char_table (Qcharset_table, Qnil);
1426
513ee442 1427 Vcharset_symbol_table = Fmake_vector (make_number (MAX_CHARSET + 1), Qnil);
4ed46869
KH
1428
1429 /* Setup tables. */
1430 for (i = 0; i < 2; i++)
1431 for (j = 0; j < 2; j++)
1432 for (k = 0; k < 128; k++)
1433 iso_charset_table [i][j][k] = -1;
1434
1435 bzero (cmpchar_hash_table, sizeof cmpchar_hash_table);
1436 cmpchar_table_size = n_cmpchars = 0;
1437
1438 for (i = 0; i < 256; i++)
1439 BYTES_BY_CHAR_HEAD (i) = 1;
1440 BYTES_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_11) = 3;
1441 BYTES_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_12) = 3;
1442 BYTES_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_21) = 4;
1443 BYTES_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_22) = 4;
1444 /* The following doesn't reflect the actual bytes, but just to tell
1445 that it is a start of a multibyte character. */
1446 BYTES_BY_CHAR_HEAD (LEADING_CODE_COMPOSITION) = 2;
1447
1448 for (i = 0; i < 128; i++)
1449 WIDTH_BY_CHAR_HEAD (i) = 1;
1450 for (; i < 256; i++)
1451 WIDTH_BY_CHAR_HEAD (i) = 4;
1452 WIDTH_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_11) = 1;
1453 WIDTH_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_12) = 2;
1454 WIDTH_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_21) = 1;
1455 WIDTH_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_22) = 2;
1456}
1457
1458#ifdef emacs
1459
1460syms_of_charset ()
1461{
1462 Qascii = intern ("ascii");
1463 staticpro (&Qascii);
1464
1465 Qcharset = intern ("charset");
1466 staticpro (&Qcharset);
1467
1468 /* Define ASCII charset now. */
1469 update_charset_table (make_number (CHARSET_ASCII),
1470 make_number (1), make_number (94),
1471 make_number (1),
1472 make_number (0),
1473 make_number ('B'),
1474 make_number (0),
1475 build_string ("ASCII"),
1476 build_string ("ASCII"),
1477 build_string ("ASCII (ISO646 IRV)"));
1478 CHARSET_SYMBOL (CHARSET_ASCII) = Qascii;
1479 Fput (Qascii, Qcharset, CHARSET_TABLE_ENTRY (CHARSET_ASCII));
1480
1481 Qcomposition = intern ("composition");
1482 staticpro (&Qcomposition);
1483 CHARSET_SYMBOL (CHARSET_COMPOSITION) = Qcomposition;
1484
1485 defsubr (&Sdefine_charset);
1486 defsubr (&Sdeclare_equiv_charset);
1487 defsubr (&Sfind_charset_region);
1488 defsubr (&Sfind_charset_string);
1489 defsubr (&Smake_char_internal);
1490 defsubr (&Ssplit_char);
1491 defsubr (&Schar_charset);
1492 defsubr (&Siso_charset);
1493 defsubr (&Schar_bytes);
1494 defsubr (&Schar_width);
1495 defsubr (&Sstring_width);
1496 defsubr (&Schar_direction);
1497 defsubr (&Schars_in_string);
1498 defsubr (&Schar_boundary_p);
1499 defsubr (&Sconcat_chars);
1500 defsubr (&Scmpcharp);
1501 defsubr (&Scmpchar_component);
1502 defsubr (&Scmpchar_cmp_rule);
1503 defsubr (&Scmpchar_cmp_rule_p);
1504 defsubr (&Scmpchar_cmp_count);
1505 defsubr (&Scompose_string);
1506 defsubr (&Ssetup_special_charsets);
1507
1508 DEFVAR_LISP ("charset-list", &Vcharset_list,
1509 "List of charsets ever defined.");
1510 Vcharset_list = Fcons (Qascii, Qnil);
1511
1512 DEFVAR_INT ("leading-code-composition", &leading_code_composition,
1513 "Leading-code of composite characters.");
1514 leading_code_composition = LEADING_CODE_COMPOSITION;
1515
1516 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11,
1517 "Leading-code of private TYPE9N charset of column-width 1.");
1518 leading_code_private_11 = LEADING_CODE_PRIVATE_11;
1519
1520 DEFVAR_INT ("leading-code-private-12", &leading_code_private_12,
1521 "Leading-code of private TYPE9N charset of column-width 2.");
1522 leading_code_private_12 = LEADING_CODE_PRIVATE_12;
1523
1524 DEFVAR_INT ("leading-code-private-21", &leading_code_private_21,
1525 "Leading-code of private TYPE9Nx9N charset of column-width 1.");
1526 leading_code_private_21 = LEADING_CODE_PRIVATE_21;
1527
1528 DEFVAR_INT ("leading-code-private-22", &leading_code_private_22,
1529 "Leading-code of private TYPE9Nx9N charset of column-width 2.");
1530 leading_code_private_22 = LEADING_CODE_PRIVATE_22;
1531}
1532
1533#endif /* emacs */