(setup_coding_system): Do not set
[bpt/emacs.git] / src / charset.c
CommitLineData
75c8c592
RS
1/* Basic multilingual character support.
2 Copyright (C) 1995, 1997 Electrotechnical Laboratory, JAPAN.
3 Licensed to the Free Software Foundation.
4ed46869 4
369314dc
KH
5This file is part of GNU Emacs.
6
7GNU Emacs is free software; you can redistribute it and/or modify
8it under the terms of the GNU General Public License as published by
9the Free Software Foundation; either version 2, or (at your option)
10any later version.
4ed46869 11
369314dc
KH
12GNU Emacs is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15GNU General Public License for more details.
4ed46869 16
369314dc
KH
17You should have received a copy of the GNU General Public License
18along with GNU Emacs; see the file COPYING. If not, write to
19the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20Boston, MA 02111-1307, USA. */
4ed46869
KH
21
22/* At first, see the document in `charset.h' to understand the code in
23 this file. */
24
25#include <stdio.h>
26
27#ifdef emacs
28
29#include <sys/types.h>
30#include <config.h>
31#include "lisp.h"
32#include "buffer.h"
33#include "charset.h"
34#include "coding.h"
fc6b09bf 35#include "disptab.h"
4ed46869
KH
36
37#else /* not emacs */
38
39#include "mulelib.h"
40
41#endif /* emacs */
42
43Lisp_Object Qcharset, Qascii, Qcomposition;
44
45/* Declaration of special leading-codes. */
46int leading_code_composition; /* for composite characters */
47int leading_code_private_11; /* for private DIMENSION1 of 1-column */
48int leading_code_private_12; /* for private DIMENSION1 of 2-column */
49int leading_code_private_21; /* for private DIMENSION2 of 1-column */
50int leading_code_private_22; /* for private DIMENSION2 of 2-column */
51
52/* Declaration of special charsets. */
53int charset_ascii; /* ASCII */
54int charset_composition; /* for a composite character */
55int charset_latin_iso8859_1; /* ISO8859-1 (Latin-1) */
56int charset_jisx0208_1978; /* JISX0208.1978 (Japanese Kanji old set) */
57int charset_jisx0208; /* JISX0208.1983 (Japanese Kanji) */
58int charset_katakana_jisx0201; /* JISX0201.Kana (Japanese Katakana) */
59int charset_latin_jisx0201; /* JISX0201.Roman (Japanese Roman) */
60int charset_big5_1; /* Big5 Level 1 (Chinese Traditional) */
61int charset_big5_2; /* Big5 Level 2 (Chinese Traditional) */
62
63Lisp_Object Qcharset_table;
64
65/* A char-table containing information of each character set. */
66Lisp_Object Vcharset_table;
67
68/* A vector of charset symbol indexed by charset-id. This is used
69 only for returning charset symbol from C functions. */
70Lisp_Object Vcharset_symbol_table;
71
72/* A list of charset symbols ever defined. */
73Lisp_Object Vcharset_list;
74
75/* Tables used by macros BYTES_BY_CHAR_HEAD and WIDTH_BY_CHAR_HEAD. */
76int bytes_by_char_head[256];
77int width_by_char_head[256];
78
79/* Mapping table from ISO2022's charset (specified by DIMENSION,
80 CHARS, and FINAL-CHAR) to Emacs' charset. */
81int iso_charset_table[2][2][128];
82
513ee442
KH
83/* Table of pointers to the structure `cmpchar_info' indexed by
84 CMPCHAR-ID. */
85struct cmpchar_info **cmpchar_table;
86/* The current size of `cmpchar_table'. */
87static int cmpchar_table_size;
88/* Number of the current composite characters. */
89int n_cmpchars;
90
4ed46869
KH
91/* Variables used locally in the macro FETCH_MULTIBYTE_CHAR. */
92unsigned char *_fetch_multibyte_char_p;
93int _fetch_multibyte_char_len;
94
046b1f03
RS
95#define min(X, Y) ((X) < (Y) ? (X) : (Y))
96#define max(X, Y) ((X) > (Y) ? (X) : (Y))
97\f
4ed46869
KH
98/* Set STR a pointer to the multi-byte form of the character C. If C
99 is not a composite character, the multi-byte form is set in WORKBUF
100 and STR points WORKBUF. The caller should allocate at least 4-byte
101 area at WORKBUF in advance. Returns the length of the multi-byte
bd4c6dd0
KH
102 form. If C is an invalid character to have a multi-byte form,
103 signal an error.
4ed46869
KH
104
105 Use macro `CHAR_STRING (C, WORKBUF, STR)' instead of calling this
106 function directly if C can be an ASCII character. */
107
108int
109non_ascii_char_to_string (c, workbuf, str)
110 int c;
111 unsigned char *workbuf, **str;
112{
6dc0722d 113 int charset, c1, c2;
4ed46869
KH
114
115 if (COMPOSITE_CHAR_P (c))
116 {
117 int cmpchar_id = COMPOSITE_CHAR_ID (c);
118
119 if (cmpchar_id < n_cmpchars)
120 {
121 *str = cmpchar_table[cmpchar_id]->data;
122 return cmpchar_table[cmpchar_id]->len;
123 }
124 else
125 {
d16a4fc6 126 error ("Invalid character: %d", c);
4ed46869
KH
127 }
128 }
129
130 SPLIT_NON_ASCII_CHAR (c, charset, c1, c2);
bd4c6dd0
KH
131 if (!charset
132 || ! CHARSET_DEFINED_P (charset)
133 || c1 >= 0 && c1 < 32
134 || c2 >= 0 && c2 < 32)
d16a4fc6 135 error ("Invalid character: %d", c);
4ed46869
KH
136
137 *str = workbuf;
138 *workbuf++ = CHARSET_LEADING_CODE_BASE (charset);
139 if (*workbuf = CHARSET_LEADING_CODE_EXT (charset))
140 workbuf++;
141 *workbuf++ = c1 | 0x80;
6dc0722d 142 if (c2 >= 0)
4ed46869
KH
143 *workbuf++ = c2 | 0x80;
144
145 return (workbuf - *str);
146}
147
148/* Return a non-ASCII character of which multi-byte form is at STR of
149 length LEN. If ACTUAL_LEN is not NULL, the actual length of the
150 character is set to the address ACTUAL_LEN.
151
152 Use macro `STRING_CHAR (STR, LEN)' instead of calling this function
153 directly if STR can hold an ASCII character. */
154
155string_to_non_ascii_char (str, len, actual_len)
156 unsigned char *str;
157 int len, *actual_len;
158{
159 int charset;
160 unsigned char c1, c2;
161 register int c;
162
163 if (SPLIT_STRING (str, len, charset, c1, c2) == CHARSET_ASCII)
164 {
165 if (actual_len)
166 *actual_len = 1;
167 return (int) *str;
168 }
169
170 c = MAKE_NON_ASCII_CHAR (charset, c1, c2);
171
172 if (actual_len)
173 *actual_len = (charset == CHARSET_COMPOSITION
174 ? cmpchar_table[COMPOSITE_CHAR_ID (c)]->len
175 : BYTES_BY_CHAR_HEAD (*str));
176 return c;
177}
178
179/* Return the length of the multi-byte form at string STR of length LEN. */
180int
181multibyte_form_length (str, len)
182 unsigned char *str;
183 int len;
184{
185 int charset;
186 unsigned char c1, c2;
187 register int c;
188
189 if (SPLIT_STRING (str, len, charset, c1, c2) == CHARSET_ASCII)
190 return 1;
191
192 return (charset == CHARSET_COMPOSITION
193 ? cmpchar_table[(c1 << 7) | c2]->len
194 : BYTES_BY_CHAR_HEAD (*str));
195}
196
197/* Check if string STR of length LEN contains valid multi-byte form of
198 a character. If valid, charset and position codes of the character
199 is set at *CHARSET, *C1, and *C2, and return 0. If not valid,
200 return -1. This should be used only in the macro SPLIT_STRING
201 which checks range of STR in advance. */
202
203split_non_ascii_string (str, len, charset, c1, c2)
204 register unsigned char *str, *c1, *c2;
205 register int len, *charset;
206{
207 register unsigned int cs = *str++;
208
209 if (cs == LEADING_CODE_COMPOSITION)
210 {
211 int cmpchar_id = str_cmpchar_id (str - 1, len);
212
213 if (cmpchar_id < 0)
214 return -1;
215 *charset = cs, *c1 = cmpchar_id >> 7, *c2 = cmpchar_id & 0x7F;
216 }
217 else if ((cs < LEADING_CODE_PRIVATE_11 || (cs = *str++) >= 0xA0)
218 && CHARSET_DEFINED_P (cs))
219 {
220 *charset = cs;
221 if (*str < 0xA0)
222 return -1;
223 *c1 = (*str++) & 0x7F;
224 if (CHARSET_DIMENSION (cs) == 2)
225 {
226 if (*str < 0xA0)
227 return -1;
228 *c2 = (*str++) & 0x7F;
229 }
230 }
231 else
232 return -1;
233 return 0;
234}
235
23d2a7f1
KH
236/* Return a character unified with C (or a character made of CHARSET,
237 C1, and C2) in unification table TABLE. If no unification is found
238 in TABLE, return C. */
239unify_char (table, c, charset, c1, c2)
240 Lisp_Object table;
241 int c, charset, c1, c2;
242{
243 Lisp_Object ch;
244 int alt_charset, alt_c1, alt_c2, dimension;
245
246 if (c < 0) c = MAKE_CHAR (charset, c1, c2);
247 if (!CHAR_TABLE_P (table)
248 || (ch = Faref (table, make_number (c)), !INTEGERP (ch))
249 || XINT (ch) < 0)
250 return c;
251
252 SPLIT_CHAR (XFASTINT (ch), alt_charset, alt_c1, alt_c2);
253 dimension = CHARSET_DIMENSION (alt_charset);
254 if (dimension == 1 && alt_c1 > 0 || dimension == 2 && alt_c2 > 0)
255 /* CH is not a generic character, just return it. */
256 return XFASTINT (ch);
257
258 /* Since CH is a generic character, we must return a specific
259 charater which has the same position codes as C from CH. */
260 if (charset < 0)
261 SPLIT_CHAR (c, charset, c1, c2);
262 if (dimension != CHARSET_DIMENSION (charset))
263 /* We can't make such a character because of dimension mismatch. */
264 return c;
265 if (!alt_c1) alt_c1 = c1;
266 if (!alt_c2) alt_c2 = c2;
267 return MAKE_CHAR (alt_charset, c1, c2);
268}
269
4ed46869
KH
270/* Update the table Vcharset_table with the given arguments (see the
271 document of `define-charset' for the meaning of each argument).
272 Several other table contents are also updated. The caller should
273 check the validity of CHARSET-ID and the remaining arguments in
274 advance. */
275
276void
277update_charset_table (charset_id, dimension, chars, width, direction,
278 iso_final_char, iso_graphic_plane,
279 short_name, long_name, description)
280 Lisp_Object charset_id, dimension, chars, width, direction;
281 Lisp_Object iso_final_char, iso_graphic_plane;
282 Lisp_Object short_name, long_name, description;
283{
284 int charset = XINT (charset_id);
285 int bytes;
286 unsigned char leading_code_base, leading_code_ext;
287
6dc0722d
KH
288 if (NILP (CHARSET_TABLE_ENTRY (charset)))
289 CHARSET_TABLE_ENTRY (charset)
290 = Fmake_vector (make_number (CHARSET_MAX_IDX), Qnil);
4ed46869
KH
291
292 /* Get byte length of multibyte form, base leading-code, and
293 extended leading-code of the charset. See the comment under the
294 title "GENERAL NOTE on CHARACTER SET (CHARSET)" in charset.h. */
295 bytes = XINT (dimension);
296 if (charset < MIN_CHARSET_PRIVATE_DIMENSION1)
297 {
298 /* Official charset, it doesn't have an extended leading-code. */
299 if (charset != CHARSET_ASCII)
300 bytes += 1; /* For a base leading-code. */
301 leading_code_base = charset;
302 leading_code_ext = 0;
303 }
304 else
305 {
306 /* Private charset. */
307 bytes += 2; /* For base and extended leading-codes. */
308 leading_code_base
309 = (charset < LEADING_CODE_EXT_12
310 ? LEADING_CODE_PRIVATE_11
311 : (charset < LEADING_CODE_EXT_21
312 ? LEADING_CODE_PRIVATE_12
313 : (charset < LEADING_CODE_EXT_22
314 ? LEADING_CODE_PRIVATE_21
315 : LEADING_CODE_PRIVATE_22)));
316 leading_code_ext = charset;
317 }
318
319 CHARSET_TABLE_INFO (charset, CHARSET_ID_IDX) = charset_id;
320 CHARSET_TABLE_INFO (charset, CHARSET_BYTES_IDX) = make_number (bytes);
321 CHARSET_TABLE_INFO (charset, CHARSET_DIMENSION_IDX) = dimension;
322 CHARSET_TABLE_INFO (charset, CHARSET_CHARS_IDX) = chars;
323 CHARSET_TABLE_INFO (charset, CHARSET_WIDTH_IDX) = width;
324 CHARSET_TABLE_INFO (charset, CHARSET_DIRECTION_IDX) = direction;
325 CHARSET_TABLE_INFO (charset, CHARSET_LEADING_CODE_BASE_IDX)
326 = make_number (leading_code_base);
327 CHARSET_TABLE_INFO (charset, CHARSET_LEADING_CODE_EXT_IDX)
328 = make_number (leading_code_ext);
329 CHARSET_TABLE_INFO (charset, CHARSET_ISO_FINAL_CHAR_IDX) = iso_final_char;
330 CHARSET_TABLE_INFO (charset, CHARSET_ISO_GRAPHIC_PLANE_IDX)
331 = iso_graphic_plane;
332 CHARSET_TABLE_INFO (charset, CHARSET_SHORT_NAME_IDX) = short_name;
333 CHARSET_TABLE_INFO (charset, CHARSET_LONG_NAME_IDX) = long_name;
334 CHARSET_TABLE_INFO (charset, CHARSET_DESCRIPTION_IDX) = description;
335 CHARSET_TABLE_INFO (charset, CHARSET_PLIST_IDX) = Qnil;
336
337 {
338 /* If we have already defined a charset which has the same
339 DIMENSION, CHARS and ISO-FINAL-CHAR but the different
340 DIRECTION, we must update the entry REVERSE-CHARSET of both
341 charsets. If there's no such charset, the value of the entry
342 is set to nil. */
343 int i;
344
513ee442 345 for (i = 0; i <= MAX_CHARSET; i++)
4ed46869
KH
346 if (!NILP (CHARSET_TABLE_ENTRY (i)))
347 {
348 if (CHARSET_DIMENSION (i) == XINT (dimension)
349 && CHARSET_CHARS (i) == XINT (chars)
350 && CHARSET_ISO_FINAL_CHAR (i) == XINT (iso_final_char)
351 && CHARSET_DIRECTION (i) != XINT (direction))
352 {
353 CHARSET_TABLE_INFO (charset, CHARSET_REVERSE_CHARSET_IDX)
354 = make_number (i);
355 CHARSET_TABLE_INFO (i, CHARSET_REVERSE_CHARSET_IDX) = charset_id;
356 break;
357 }
358 }
513ee442 359 if (i > MAX_CHARSET)
4ed46869
KH
360 /* No such a charset. */
361 CHARSET_TABLE_INFO (charset, CHARSET_REVERSE_CHARSET_IDX)
362 = make_number (-1);
363 }
364
365 if (charset != CHARSET_ASCII
366 && charset < MIN_CHARSET_PRIVATE_DIMENSION1)
367 {
368 /* Update tables bytes_by_char_head and width_by_char_head. */
369 bytes_by_char_head[leading_code_base] = bytes;
370 width_by_char_head[leading_code_base] = XINT (width);
371
372 /* Update table emacs_code_class. */
373 emacs_code_class[charset] = (bytes == 2
374 ? EMACS_leading_code_2
375 : (bytes == 3
376 ? EMACS_leading_code_3
377 : EMACS_leading_code_4));
378 }
379
380 /* Update table iso_charset_table. */
381 if (ISO_CHARSET_TABLE (dimension, chars, iso_final_char) < 0)
382 ISO_CHARSET_TABLE (dimension, chars, iso_final_char) = charset;
383}
384
385#ifdef emacs
386
387/* Return charset id of CHARSET_SYMBOL, or return -1 if CHARSET_SYMBOL
388 is invalid. */
389int
390get_charset_id (charset_symbol)
391 Lisp_Object charset_symbol;
392{
393 Lisp_Object val;
394 int charset;
395
396 return ((SYMBOLP (charset_symbol)
397 && (val = Fget (charset_symbol, Qcharset), VECTORP (val))
398 && (charset = XINT (XVECTOR (val)->contents[CHARSET_ID_IDX]),
399 CHARSET_VALID_P (charset)))
400 ? charset : -1);
401}
402
403/* Return an identification number for a new private charset of
404 DIMENSION and WIDTH. If there's no more room for the new charset,
405 return 0. */
406Lisp_Object
407get_new_private_charset_id (dimension, width)
408 int dimension, width;
409{
410 int charset, from, to;
411
412 if (dimension == 1)
413 {
414 if (width == 1)
415 from = LEADING_CODE_EXT_11, to = LEADING_CODE_EXT_12;
416 else
417 from = LEADING_CODE_EXT_12, to = LEADING_CODE_EXT_21;
418 }
419 else
420 {
421 if (width == 1)
422 from = LEADING_CODE_EXT_21, to = LEADING_CODE_EXT_22;
423 else
424 from = LEADING_CODE_EXT_22, to = LEADING_CODE_EXT_MAX - 1;
425 }
426
427 for (charset = from; charset < to; charset++)
428 if (!CHARSET_DEFINED_P (charset)) break;
429
430 return make_number (charset < to ? charset : 0);
431}
432
433DEFUN ("define-charset", Fdefine_charset, Sdefine_charset, 3, 3, 0,
434 "Define CHARSET-ID as the identification number of CHARSET with INFO-VECTOR.\n\
23d2a7f1 435If CHARSET-ID is nil, it is decided automatically, which means CHARSET is\n\
4ed46869
KH
436 treated as a private charset.\n\
437INFO-VECTOR is a vector of the format:\n\
438 [DIMENSION CHARS WIDTH DIRECTION ISO-FINAL-CHAR ISO-GRAPHIC-PLANE\n\
439 SHORT-NAME LONG-NAME DESCRIPTION]\n\
440The meanings of each elements is as follows:\n\
441DIMENSION (integer) is the number of bytes to represent a character: 1 or 2.\n\
442CHARS (integer) is the number of characters in a dimension: 94 or 96.\n\
443WIDTH (integer) is the number of columns a character in the charset\n\
444occupies on the screen: one of 0, 1, and 2.\n\
445\n\
446DIRECTION (integer) is the rendering direction of characters in the\n\
447charset when rendering. If 0, render from right to left, else\n\
448render from left to right.\n\
449\n\
450ISO-FINAL-CHAR (character) is the final character of the\n\
451corresponding ISO 2022 charset.\n\
452\n\
453ISO-GRAPHIC-PLANE (integer) is the graphic plane to be invoked\n\
454while encoding to variants of ISO 2022 coding system, one of the\n\
455following: 0/graphic-plane-left(GL), 1/graphic-plane-right(GR).\n\
456\n\
457SHORT-NAME (string) is the short name to refer to the charset.\n\
458\n\
459LONG-NAME (string) is the long name to refer to the charset.\n\
460\n\
461DESCRIPTION (string) is the description string of the charset.")
462 (charset_id, charset_symbol, info_vector)
463 Lisp_Object charset_id, charset_symbol, info_vector;
464{
465 Lisp_Object *vec;
466
467 if (!NILP (charset_id))
468 CHECK_NUMBER (charset_id, 0);
469 CHECK_SYMBOL (charset_symbol, 1);
470 CHECK_VECTOR (info_vector, 2);
471
472 if (! NILP (charset_id))
473 {
474 if (! CHARSET_VALID_P (XINT (charset_id)))
475 error ("Invalid CHARSET: %d", XINT (charset_id));
476 else if (CHARSET_DEFINED_P (XINT (charset_id)))
477 error ("Already defined charset: %d", XINT (charset_id));
478 }
479
480 vec = XVECTOR (info_vector)->contents;
481 if (XVECTOR (info_vector)->size != 9
482 || !INTEGERP (vec[0]) || !(XINT (vec[0]) == 1 || XINT (vec[0]) == 2)
483 || !INTEGERP (vec[1]) || !(XINT (vec[1]) == 94 || XINT (vec[1]) == 96)
484 || !INTEGERP (vec[2]) || !(XINT (vec[2]) == 1 || XINT (vec[2]) == 2)
485 || !INTEGERP (vec[3]) || !(XINT (vec[3]) == 0 || XINT (vec[3]) == 1)
486 || !INTEGERP (vec[4]) || !(XINT (vec[4]) >= '0' && XINT (vec[4]) <= '~')
487 || !INTEGERP (vec[5]) || !(XINT (vec[5]) == 0 || XINT (vec[5]) == 1)
488 || !STRINGP (vec[6])
489 || !STRINGP (vec[7])
490 || !STRINGP (vec[8]))
491 error ("Invalid info-vector argument for defining charset %s",
492 XSYMBOL (charset_symbol)->name->data);
493
494 if (NILP (charset_id))
495 {
496 charset_id = get_new_private_charset_id (XINT (vec[0]), XINT (vec[2]));
497 if (XINT (charset_id) == 0)
498 error ("There's no room for a new private charset %s",
499 XSYMBOL (charset_symbol)->name->data);
500 }
501
502 update_charset_table (charset_id, vec[0], vec[1], vec[2], vec[3],
503 vec[4], vec[5], vec[6], vec[7], vec[8]);
6dc0722d 504 Fput (charset_symbol, Qcharset, CHARSET_TABLE_ENTRY (XINT (charset_id)));
4ed46869
KH
505 CHARSET_SYMBOL (XINT (charset_id)) = charset_symbol;
506 Vcharset_list = Fcons (charset_symbol, Vcharset_list);
507 return Qnil;
508}
509
510DEFUN ("declare-equiv-charset", Fdeclare_equiv_charset, Sdeclare_equiv_charset,
511 4, 4, 0,
512 "Declare a charset of DIMENSION, CHARS, FINAL-CHAR is the same as CHARSET.\n\
513CHARSET should be defined by `defined-charset' in advance.")
514 (dimension, chars, final_char, charset_symbol)
515 Lisp_Object dimension, chars, final_char, charset_symbol;
516{
517 int charset;
518
519 CHECK_NUMBER (dimension, 0);
520 CHECK_NUMBER (chars, 1);
521 CHECK_NUMBER (final_char, 2);
522 CHECK_SYMBOL (charset_symbol, 3);
523
524 if (XINT (dimension) != 1 && XINT (dimension) != 2)
525 error ("Invalid DIMENSION %d, it should be 1 or 2", XINT (dimension));
526 if (XINT (chars) != 94 && XINT (chars) != 96)
527 error ("Invalid CHARS %d, it should be 94 or 96", XINT (chars));
528 if (XINT (final_char) < '0' || XFASTINT (final_char) > '~')
529 error ("Invalid FINAL-CHAR %c, it should be `0'..`~'", XINT (chars));
530 if ((charset = get_charset_id (charset_symbol)) < 0)
531 error ("Invalid charset %s", XSYMBOL (charset_symbol)->name->data);
532
533 ISO_CHARSET_TABLE (dimension, chars, final_char) = charset;
534 return Qnil;
535}
536
537/* Return number of different charsets in STR of length LEN. In
538 addition, for each found charset N, CHARSETS[N] is set 1. The
a29e3b1b 539 caller should allocate CHARSETS (MAX_CHARSET + 1 elements) in advance.
23d2a7f1 540 It may lookup a unification table TABLE if supplied. */
4ed46869
KH
541
542int
23d2a7f1 543find_charset_in_str (str, len, charsets, table)
028d516b
KH
544 unsigned char *str;
545 int len, *charsets;
23d2a7f1 546 Lisp_Object table;
4ed46869 547{
733eafd8 548 register int num = 0, c;
4ed46869 549
23d2a7f1
KH
550 if (! CHAR_TABLE_P (table))
551 table = Qnil;
552
4ed46869
KH
553 while (len > 0)
554 {
05505664 555 int bytes, charset;
733eafd8 556 c = *str;
23d2a7f1 557
733eafd8 558 if (c == LEADING_CODE_COMPOSITION)
05505664 559 {
733eafd8
KH
560 int cmpchar_id = str_cmpchar_id (str, len);
561 GLYPH *glyph;
05505664 562
733eafd8 563 if (cmpchar_id > 0)
05505664 564 {
733eafd8
KH
565 struct cmpchar_info *cmpcharp = cmpchar_table[cmpchar_id];
566 int i;
567
568 for (i = 0; i < cmpcharp->glyph_len; i++)
569 {
570 c = cmpcharp->glyph[i];
571 if (!NILP (table))
572 {
573 if ((c = unify_char (table, c, 0, 0, 0)) < 0)
574 c = cmpcharp->glyph[i];
575 }
576 if ((charset = CHAR_CHARSET (c)) < 0)
577 charset = CHARSET_ASCII;
578 if (!charsets[charset])
579 {
580 charsets[charset] = 1;
581 num += 1;
582 }
583 }
584 str += cmpcharp->len;
585 len -= cmpcharp->len;
586 continue;
05505664 587 }
05505664 588
733eafd8
KH
589 charset = CHARSET_ASCII;
590 bytes = 1;
591 }
23d2a7f1
KH
592 else
593 {
733eafd8
KH
594 c = STRING_CHAR_AND_LENGTH (str, len, bytes);
595 if (! NILP (table))
596 {
597 int c1 = unify_char (table, c, 0, 0, 0);
598 if (c1 >= 0)
599 c = c1;
600 }
601 charset = CHAR_CHARSET (c);
23d2a7f1 602 }
4ed46869
KH
603
604 if (!charsets[charset])
605 {
606 charsets[charset] = 1;
607 num += 1;
608 }
609 str += bytes;
610 len -= bytes;
611 }
612 return num;
613}
614
615DEFUN ("find-charset-region", Ffind_charset_region, Sfind_charset_region,
23d2a7f1 616 2, 3, 0,
4ed46869 617 "Return a list of charsets in the region between BEG and END.\n\
23d2a7f1
KH
618BEG and END are buffer positions.\n\
619Optional arg TABLE if non-nil is a unification table to look up.")
620 (beg, end, table)
621 Lisp_Object beg, end, table;
4ed46869 622{
028d516b 623 int charsets[MAX_CHARSET + 1];
4ed46869
KH
624 int from, to, stop, i;
625 Lisp_Object val;
626
627 validate_region (&beg, &end);
628 from = XFASTINT (beg);
629 stop = to = XFASTINT (end);
630 if (from < GPT && GPT < to)
631 stop = GPT;
028d516b 632 bzero (charsets, (MAX_CHARSET + 1) * sizeof (int));
4ed46869
KH
633 while (1)
634 {
23d2a7f1 635 find_charset_in_str (POS_ADDR (from), stop - from, charsets, table);
4ed46869
KH
636 if (stop < to)
637 from = stop, stop = to;
638 else
639 break;
640 }
641 val = Qnil;
513ee442 642 for (i = MAX_CHARSET; i >= 0; i--)
4ed46869
KH
643 if (charsets[i])
644 val = Fcons (CHARSET_SYMBOL (i), val);
645 return val;
646}
647
648DEFUN ("find-charset-string", Ffind_charset_string, Sfind_charset_string,
23d2a7f1
KH
649 1, 2, 0,
650 "Return a list of charsets in STR.\n\
651Optional arg TABLE if non-nil is a unification table to look up.")
652 (str, table)
653 Lisp_Object str, table;
4ed46869 654{
a29e3b1b 655 int charsets[MAX_CHARSET + 1];
4ed46869
KH
656 int i;
657 Lisp_Object val;
658
659 CHECK_STRING (str, 0);
a29e3b1b 660 bzero (charsets, (MAX_CHARSET + 1) * sizeof (int));
23d2a7f1
KH
661 find_charset_in_str (XSTRING (str)->data, XSTRING (str)->size,
662 charsets, table);
4ed46869 663 val = Qnil;
513ee442 664 for (i = MAX_CHARSET; i >= 0; i--)
4ed46869
KH
665 if (charsets[i])
666 val = Fcons (CHARSET_SYMBOL (i), val);
667 return val;
668}
669\f
670DEFUN ("make-char-internal", Fmake_char_internal, Smake_char_internal, 1, 3, 0,
513ee442 671 "")
4ed46869
KH
672 (charset, code1, code2)
673 Lisp_Object charset, code1, code2;
674{
675 CHECK_NUMBER (charset, 0);
676
677 if (NILP (code1))
678 XSETFASTINT (code1, 0);
679 else
680 CHECK_NUMBER (code1, 1);
681 if (NILP (code2))
682 XSETFASTINT (code2, 0);
683 else
684 CHECK_NUMBER (code2, 2);
685
686 if (!CHARSET_DEFINED_P (XINT (charset)))
687 error ("Invalid charset: %d", XINT (charset));
688
689 return make_number (MAKE_CHAR (XINT (charset), XINT (code1), XINT (code2)));
690}
691
692DEFUN ("split-char", Fsplit_char, Ssplit_char, 1, 1, 0,
693 "Return list of charset and one or two position-codes of CHAR.")
694 (ch)
695 Lisp_Object ch;
696{
697 Lisp_Object val;
6dc0722d 698 int charset, c1, c2;
4ed46869
KH
699
700 CHECK_NUMBER (ch, 0);
701 SPLIT_CHAR (XFASTINT (ch), charset, c1, c2);
6dc0722d 702 return (c2 >= 0
4ed46869
KH
703 ? Fcons (CHARSET_SYMBOL (charset),
704 Fcons (make_number (c1), Fcons (make_number (c2), Qnil)))
705 : Fcons (CHARSET_SYMBOL (charset), Fcons (make_number (c1), Qnil)));
706}
707
708DEFUN ("char-charset", Fchar_charset, Schar_charset, 1, 1, 0,
709 "Return charset of CHAR.")
710 (ch)
711 Lisp_Object ch;
712{
713 CHECK_NUMBER (ch, 0);
714
715 return CHARSET_SYMBOL (CHAR_CHARSET (XINT (ch)));
716}
717
718DEFUN ("iso-charset", Fiso_charset, Siso_charset, 3, 3, 0,
719 "Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR.")
720 (dimension, chars, final_char)
721 Lisp_Object dimension, chars, final_char;
722{
723 int charset;
724
725 CHECK_NUMBER (dimension, 0);
726 CHECK_NUMBER (chars, 1);
727 CHECK_NUMBER (final_char, 2);
728
729 if ((charset = ISO_CHARSET_TABLE (dimension, chars, final_char)) < 0)
730 return Qnil;
731 return CHARSET_SYMBOL (charset);
732}
733
9d3d8cba
KH
734/* If GENERICP is nonzero, return nonzero iff C is a valid normal or
735 generic character. If GENERICP is zero, return nonzero iff C is a
736 valid normal character. Do not call this function directly,
737 instead use macro CHAR_VALID_P. */
738int
739char_valid_p (c, genericp)
740 int c, genericp;
741{
742 int charset, c1, c2;
743
744 if (c < 0)
745 return 0;
746 if (SINGLE_BYTE_CHAR_P (c))
747 return 1;
748 SPLIT_NON_ASCII_CHAR (c, charset, c1, c2);
749 if (!CHARSET_VALID_P (charset))
750 return 0;
751 return (c < MIN_CHAR_COMPOSITION
752 ? ((c & CHAR_FIELD1_MASK) /* i.e. dimension of C is two. */
753 ? (genericp && c1 == 0 && c2 == 0
754 || c1 >= 32 && c2 >= 32)
755 : (genericp && c1 == 0
756 || c1 >= 32))
757 : c < MIN_CHAR_COMPOSITION + n_cmpchars);
758}
759
760DEFUN ("char-valid-p", Fchar_valid_p, Schar_valid_p, 1, 2, 0,
761 "Return t if OBJECT is a valid normal character.
762If optional arg GENERICP is non-nil, also return t if OBJECT is
763a valid generic character.")
764 (object, genericp)
765 Lisp_Object object, genericp;
766{
767 if (! NATNUMP (object))
768 return Qnil;
769 return (CHAR_VALID_P (XFASTINT (object), !NILP (genericp)) ? Qt : Qnil);
770}
771
4ed46869
KH
772DEFUN ("char-bytes", Fchar_bytes, Schar_bytes, 1, 1, 0,
773 "Return byte length of multi-byte form of CHAR.")
774 (ch)
775 Lisp_Object ch;
776{
777 Lisp_Object val;
778 int bytes;
779
780 CHECK_NUMBER (ch, 0);
781 if (COMPOSITE_CHAR_P (XFASTINT (ch)))
782 {
783 unsigned int id = COMPOSITE_CHAR_ID (XFASTINT (ch));
784
785 bytes = (id < n_cmpchars ? cmpchar_table[id]->len : 1);
786 }
787 else
788 {
789 int charset = CHAR_CHARSET (XFASTINT (ch));
790
791 bytes = CHARSET_DEFINED_P (charset) ? CHARSET_BYTES (charset) : 1;
792 }
793
794 XSETFASTINT (val, bytes);
795 return val;
796}
797
798/* Return the width of character of which multi-byte form starts with
799 C. The width is measured by how many columns occupied on the
800 screen when displayed in the current buffer. */
801
802#define ONE_BYTE_CHAR_WIDTH(c) \
803 (c < 0x20 \
804 ? (c == '\t' \
53316e55 805 ? XFASTINT (current_buffer->tab_width) \
4ed46869
KH
806 : (c == '\n' ? 0 : (NILP (current_buffer->ctl_arrow) ? 4 : 2))) \
807 : (c < 0x7f \
808 ? 1 \
809 : (c == 0x7F \
810 ? (NILP (current_buffer->ctl_arrow) ? 4 : 2) \
811 : ((! NILP (current_buffer->enable_multibyte_characters) \
812 && BASE_LEADING_CODE_P (c)) \
813 ? WIDTH_BY_CHAR_HEAD (c) \
814 : 4)))) \
815
816
817DEFUN ("char-width", Fchar_width, Schar_width, 1, 1, 0,
818 "Return width of CHAR when displayed in the current buffer.\n\
819The width is measured by how many columns it occupies on the screen.")
820 (ch)
821 Lisp_Object ch;
822{
859f2b3c 823 Lisp_Object val, disp;
4ed46869 824 int c;
51c4025f 825 struct Lisp_Char_Table *dp = buffer_display_table ();
4ed46869
KH
826
827 CHECK_NUMBER (ch, 0);
828
859f2b3c
RS
829 c = XINT (ch);
830
831 /* Get the way the display table would display it. */
51c4025f 832 disp = dp ? DISP_CHAR_VECTOR (dp, c) : Qnil;
859f2b3c
RS
833
834 if (VECTORP (disp))
835 XSETINT (val, XVECTOR (disp)->size);
836 else if (SINGLE_BYTE_CHAR_P (c))
837 XSETINT (val, ONE_BYTE_CHAR_WIDTH (c));
4ed46869
KH
838 else if (COMPOSITE_CHAR_P (c))
839 {
840 int id = COMPOSITE_CHAR_ID (XFASTINT (ch));
841 XSETFASTINT (val, (id < n_cmpchars ? cmpchar_table[id]->width : 0));
842 }
843 else
844 {
845 int charset = CHAR_CHARSET (c);
846
847 XSETFASTINT (val, CHARSET_WIDTH (charset));
848 }
849 return val;
850}
851
852/* Return width of string STR of length LEN when displayed in the
853 current buffer. The width is measured by how many columns it
854 occupies on the screen. */
859f2b3c 855
4ed46869
KH
856int
857strwidth (str, len)
858 unsigned char *str;
859 int len;
860{
861 unsigned char *endp = str + len;
862 int width = 0;
c4a4e28f 863 struct Lisp_Char_Table *dp = buffer_display_table ();
4ed46869 864
859f2b3c
RS
865 while (str < endp)
866 {
867 if (*str == LEADING_CODE_COMPOSITION)
868 {
869 int id = str_cmpchar_id (str, endp - str);
870
871 if (id < 0)
872 {
873 width += 4;
874 str++;
875 }
876 else
877 {
878 width += cmpchar_table[id]->width;
879 str += cmpchar_table[id]->len;
880 }
881 }
882 else
883 {
884 Lisp_Object disp;
885 int thiswidth;
886 int c = STRING_CHAR (str, endp - str);
887
888 /* Get the way the display table would display it. */
acc35c36
RS
889 if (dp)
890 disp = DISP_CHAR_VECTOR (dp, c);
891 else
892 disp = Qnil;
859f2b3c
RS
893
894 if (VECTORP (disp))
895 thiswidth = XVECTOR (disp)->size;
896 else
897 thiswidth = ONE_BYTE_CHAR_WIDTH (*str);
898
899 width += thiswidth;
900 str += BYTES_BY_CHAR_HEAD (*str);
901 }
902 }
4ed46869
KH
903 return width;
904}
905
906DEFUN ("string-width", Fstring_width, Sstring_width, 1, 1, 0,
907 "Return width of STRING when displayed in the current buffer.\n\
908Width is measured by how many columns it occupies on the screen.\n\
046b1f03
RS
909When calculating width of a multibyte character in STRING,\n\
910only the base leading-code is considered; the validity of\n\
911the following bytes is not checked.")
4ed46869
KH
912 (str)
913 Lisp_Object str;
914{
915 Lisp_Object val;
916
917 CHECK_STRING (str, 0);
918 XSETFASTINT (val, strwidth (XSTRING (str)->data, XSTRING (str)->size));
919 return val;
920}
921
922DEFUN ("char-direction", Fchar_direction, Schar_direction, 1, 1, 0,
923 "Return the direction of CHAR.\n\
924The returned value is 0 for left-to-right and 1 for right-to-left.")
925 (ch)
926 Lisp_Object ch;
927{
928 int charset;
929
930 CHECK_NUMBER (ch, 0);
931 charset = CHAR_CHARSET (XFASTINT (ch));
932 if (!CHARSET_DEFINED_P (charset))
933 error ("Invalid character: %d", XINT (ch));
934 return CHARSET_TABLE_INFO (charset, CHARSET_DIRECTION_IDX);
935}
936
937DEFUN ("chars-in-string", Fchars_in_string, Schars_in_string, 1, 1, 0,
046b1f03
RS
938 "Return number of characters in STRING.\n\
939When using multibyte characters, this is not the necessarily same as\n\
940the length of STRING; the length counts a multibyte characters as\n\
941several bytes, but this function counts a multibyte character as one\n\
942character.")
4ed46869
KH
943 (str)
944 Lisp_Object str;
945{
946 Lisp_Object val;
947 unsigned char *p, *endp;
948 int chars;
949
950 CHECK_STRING (str, 0);
951
9036eb45
KH
952 if (NILP (current_buffer->enable_multibyte_characters))
953 return make_number (XSTRING (str)->size);
954
4ed46869
KH
955 p = XSTRING (str)->data; endp = p + XSTRING (str)->size;
956 chars = 0;
957 while (p < endp)
958 {
959 if (*p == LEADING_CODE_COMPOSITION)
960 {
961 p++;
962 while (p < endp && ! CHAR_HEAD_P (p)) p++;
963 }
964 else
965 p += BYTES_BY_CHAR_HEAD (*p);
966 chars++;
967 }
968
969 XSETFASTINT (val, chars);
970 return val;
971}
972
af4fecb4 973DEFUN ("chars-in-region", Fchars_in_region, Schars_in_region, 2, 2, 0,
046b1f03 974 "Return number of characters between BEG and END.\n\
a8a35e61
RS
975When using multibyte characters, this is not the necessarily same\n\
976as (- END BEG); that subtraction gives you the number of bytes, which\n\
046b1f03
RS
977may be more than the number of characters.")
978 (beg, end)
979 Lisp_Object beg, end;
980{
981 int from, to, stop;
982 Lisp_Object val;
983 int chars = 0;
984 unsigned char *p, *endp;
985
986 validate_region (&beg, &end);
987
988 from = min (XFASTINT (beg), XFASTINT (end));
a8a35e61 989 to = max (XFASTINT (beg), XFASTINT (end));
046b1f03 990 p = POS_ADDR (from);
046b1f03 991
9036eb45
KH
992 if (NILP (current_buffer->enable_multibyte_characters))
993 return make_number (to - from);
994
a8a35e61
RS
995 if (from < GPT && GPT <= to)
996 {
997 stop = GPT;
998 endp = GPT_ADDR;
999 }
1000 else
1001 {
1002 stop = to;
1003 endp = POS_ADDR (stop);
1004 }
046b1f03
RS
1005
1006 while (1)
1007 {
9036eb45 1008 if (p >= endp)
046b1f03 1009 {
9036eb45 1010 if (stop >= to)
046b1f03 1011 break;
a8a35e61
RS
1012
1013 p = POS_ADDR (stop);
1014 stop = to;
1015 endp = POS_ADDR (stop);
046b1f03
RS
1016 }
1017
1018 if (*p == LEADING_CODE_COMPOSITION)
1019 {
1020 p++;
1021 while (p < endp && ! CHAR_HEAD_P (p)) p++;
1022 }
1023 else
1024 p += BYTES_BY_CHAR_HEAD (*p);
1025
1026 chars++;
1027 }
1028
1029 return make_number (chars);
1030}
1031
4ed46869
KH
1032DEFUN ("char-boundary-p", Fchar_boundary_p, Schar_boundary_p, 1, 1, 0,
1033 "Return non-nil value if POS is at character boundary of multibyte form.\n\
046b1f03 1034When the value is non-nil, it contains some additional information:\n\
4ed46869 1035 0 if POS is at an ASCII character or at the end of range,\n\
046b1f03
RS
1036 1 if POS is before a 2-byte length multi-byte form,\n\
1037 2 if POS is before a 3-byte length multi-byte form,\n\
1038 3 if POS is before a 4-byte length multi-byte form,\n\
1039 4 if POS is before a composite character.\n\
4ed46869
KH
1040If POS is out of range or not at character boundary, return NIL.")
1041 (pos)
1042 Lisp_Object pos;
1043{
1044 Lisp_Object val;
1045 int n;
1046
1047 CHECK_NUMBER_COERCE_MARKER (pos, 0);
1048
1049 n = XINT (pos);
1050 if (n < BEGV || n > ZV)
1051 return Qnil;
1052
1053 if (n == ZV || NILP (current_buffer->enable_multibyte_characters))
1054 XSETFASTINT (val, 0);
1055 else
1056 {
1057 unsigned char *p = POS_ADDR (n);
1058
1059 if (SINGLE_BYTE_CHAR_P (*p))
1060 XSETFASTINT (val, 0);
1061 else if (*p == LEADING_CODE_COMPOSITION)
1062 XSETFASTINT (val, 4);
1063 else if (BYTES_BY_CHAR_HEAD (*p) > 1)
1064 XSETFASTINT (val, BYTES_BY_CHAR_HEAD (*p) - 1);
1065 else
1066 val = Qnil;
1067 }
1068 return val;
1069}
1070
1071DEFUN ("concat-chars", Fconcat_chars, Sconcat_chars, 1, MANY, 0,
1072 "Concatenate all the argument characters and make the result a string.")
53316e55
KH
1073 (n, args)
1074 int n;
4ed46869
KH
1075 Lisp_Object *args;
1076{
53316e55 1077 int i;
4ed46869 1078 unsigned char *buf
bd4c6dd0 1079 = (unsigned char *) alloca (MAX_LENGTH_OF_MULTI_BYTE_FORM * n);
4ed46869
KH
1080 unsigned char *p = buf;
1081 Lisp_Object val;
1082
1083 for (i = 0; i < n; i++)
1084 {
1085 int c, len;
1086 unsigned char *str;
1087
1088 if (!INTEGERP (args[i]))
1089 {
1090 free (buf);
1091 CHECK_NUMBER (args[i], 0);
1092 }
1093 c = XINT (args[i]);
1094 len = CHAR_STRING (c, p, str);
1095 if (p != str)
1096 /* C is a composite character. */
1097 bcopy (str, p, len);
1098 p += len;
1099 }
1100
1101 val = make_string (buf, p - buf);
4ed46869
KH
1102 return val;
1103}
1104
1105#endif /* emacs */
1106\f
1107/*** Composite characters staffs ***/
1108
1109/* Each composite character is identified by CMPCHAR-ID which is
1110 assigned when Emacs needs the character code of the composite
1111 character (e.g. when displaying it on the screen). See the
1112 document "GENERAL NOTE on COMPOSITE CHARACTER" in `charset.h' how a
1113 composite character is represented in Emacs. */
1114
1115/* If `static' is defined, it means that it is defined to null string. */
1116#ifndef static
1117/* The following function is copied from lread.c. */
1118static int
1119hash_string (ptr, len)
1120 unsigned char *ptr;
1121 int len;
1122{
1123 register unsigned char *p = ptr;
1124 register unsigned char *end = p + len;
1125 register unsigned char c;
1126 register int hash = 0;
1127
1128 while (p != end)
1129 {
1130 c = *p++;
1131 if (c >= 0140) c -= 40;
1132 hash = ((hash<<3) + (hash>>28) + c);
1133 }
1134 return hash & 07777777777;
1135}
1136#endif
1137
4ed46869
KH
1138#define CMPCHAR_HASH_TABLE_SIZE 0xFFF
1139
1140static int *cmpchar_hash_table[CMPCHAR_HASH_TABLE_SIZE];
1141
1142/* Each element of `cmpchar_hash_table' is a pointer to an array of
1143 integer, where the 1st element is the size of the array, the 2nd
1144 element is how many elements are actually used in the array, and
1145 the remaining elements are CMPCHAR-IDs of composite characters of
1146 the same hash value. */
1147#define CMPCHAR_HASH_SIZE(table) table[0]
1148#define CMPCHAR_HASH_USED(table) table[1]
1149#define CMPCHAR_HASH_CMPCHAR_ID(table, i) table[i]
1150
1151/* Return CMPCHAR-ID of the composite character in STR of the length
1152 LEN. If the composite character has not yet been registered,
1153 register it in `cmpchar_table' and assign new CMPCHAR-ID. This
1154 is the sole function for assigning CMPCHAR-ID. */
1155int
1156str_cmpchar_id (str, len)
1157 unsigned char *str;
1158 int len;
1159{
1160 int hash_idx, *hashp;
1161 unsigned char *buf;
1162 int embedded_rule; /* 1 if composition rule is embedded. */
1163 int chars; /* number of components. */
1164 int i;
1165 struct cmpchar_info *cmpcharp;
1166
1167 if (len < 5)
1168 /* Any composite char have at least 3-byte length. */
1169 return -1;
1170
1171 /* The second byte 0xFF means compostion rule is embedded. */
1172 embedded_rule = (str[1] == 0xFF);
1173
1174 /* At first, get the actual length of the composite character. */
1175 {
1176 unsigned char *p, *endp = str + 1, *lastp = str + len;
1177 int bytes;
1178
1179 while (endp < lastp && ! CHAR_HEAD_P (endp)) endp++;
1180 chars = 0;
1181 p = str + 1 + embedded_rule;
1182 while (p < endp)
1183 {
1184 /* No need of checking if *P is 0xA0 because
1185 BYTES_BY_CHAR_HEAD (0x80) surely returns 2. */
1186 p += (bytes = BYTES_BY_CHAR_HEAD (*p - 0x20) + embedded_rule);
1187 chars++;
1188 }
1189 len = (p -= embedded_rule) - str;
1190 if (p > endp)
1191 len -= - bytes, chars--;
1192
1193 if (chars < 2 || chars > MAX_COMPONENT_COUNT)
1194 /* Invalid number of components. */
1195 return -1;
1196 }
1197 hash_idx = hash_string (str, len) % CMPCHAR_HASH_TABLE_SIZE;
1198 hashp = cmpchar_hash_table[hash_idx];
1199
1200 /* Then, look into the hash table. */
1201 if (hashp != NULL)
1202 /* Find the correct one among composite characters of the same
1203 hash value. */
1204 for (i = 2; i < CMPCHAR_HASH_USED (hashp); i++)
1205 {
1206 cmpcharp = cmpchar_table[CMPCHAR_HASH_CMPCHAR_ID (hashp, i)];
1207 if (len == cmpcharp->len
1208 && ! bcmp (str, cmpcharp->data, len))
1209 return CMPCHAR_HASH_CMPCHAR_ID (hashp, i);
1210 }
1211
1212 /* We have to register the composite character in cmpchar_table. */
513ee442
KH
1213 if (n_cmpchars > (CHAR_FIELD2_MASK | CHAR_FIELD3_MASK))
1214 /* No, we have no more room for a new composite character. */
1215 return -1;
1216
4ed46869
KH
1217 /* Make the entry in hash table. */
1218 if (hashp == NULL)
1219 {
1220 /* Make a table for 8 composite characters initially. */
1221 hashp = (cmpchar_hash_table[hash_idx]
1222 = (int *) xmalloc (sizeof (int) * (2 + 8)));
1223 CMPCHAR_HASH_SIZE (hashp) = 10;
1224 CMPCHAR_HASH_USED (hashp) = 2;
1225 }
1226 else if (CMPCHAR_HASH_USED (hashp) >= CMPCHAR_HASH_SIZE (hashp))
1227 {
1228 CMPCHAR_HASH_SIZE (hashp) += 8;
1229 hashp = (cmpchar_hash_table[hash_idx]
1230 = (int *) xrealloc (hashp,
1231 sizeof (int) * CMPCHAR_HASH_SIZE (hashp)));
1232 }
1233 CMPCHAR_HASH_CMPCHAR_ID (hashp, CMPCHAR_HASH_USED (hashp)) = n_cmpchars;
1234 CMPCHAR_HASH_USED (hashp)++;
1235
1236 /* Set information of the composite character in cmpchar_table. */
1237 if (cmpchar_table_size == 0)
1238 {
1239 /* This is the first composite character to be registered. */
1240 cmpchar_table_size = 256;
1241 cmpchar_table
1242 = (struct cmpchar_info **) xmalloc (sizeof (cmpchar_table[0])
1243 * cmpchar_table_size);
1244 }
1245 else if (cmpchar_table_size <= n_cmpchars)
1246 {
1247 cmpchar_table_size += 256;
1248 cmpchar_table
1249 = (struct cmpchar_info **) xrealloc (cmpchar_table,
1250 sizeof (cmpchar_table[0])
1251 * cmpchar_table_size);
1252 }
1253
1254 cmpcharp = (struct cmpchar_info *) xmalloc (sizeof (struct cmpchar_info));
1255
1256 cmpcharp->len = len;
1257 cmpcharp->data = (unsigned char *) xmalloc (len + 1);
1258 bcopy (str, cmpcharp->data, len);
1259 cmpcharp->data[len] = 0;
1260 cmpcharp->glyph_len = chars;
1261 cmpcharp->glyph = (GLYPH *) xmalloc (sizeof (GLYPH) * chars);
1262 if (embedded_rule)
1263 {
1264 cmpcharp->cmp_rule = (unsigned char *) xmalloc (chars);
1265 cmpcharp->col_offset = (float *) xmalloc (sizeof (float) * chars);
1266 }
1267 else
1268 {
1269 cmpcharp->cmp_rule = NULL;
1270 cmpcharp->col_offset = NULL;
1271 }
1272
1273 /* Setup GLYPH data and composition rules (if any) so as not to make
1274 them every time on displaying. */
1275 {
1276 unsigned char *bufp;
1277 int width;
1278 float leftmost = 0.0, rightmost = 1.0;
1279
1280 if (embedded_rule)
1281 /* At first, col_offset[N] is set to relative to col_offset[0]. */
1282 cmpcharp->col_offset[0] = 0;
1283
1284 for (i = 0, bufp = cmpcharp->data + 1; i < chars; i++)
1285 {
1286 if (embedded_rule)
1287 cmpcharp->cmp_rule[i] = *bufp++;
1288
1289 if (*bufp == 0xA0) /* This is an ASCII character. */
1290 {
1291 cmpcharp->glyph[i] = FAST_MAKE_GLYPH ((*++bufp & 0x7F), 0);
1292 width = 1;
1293 bufp++;
1294 }
1295 else /* Multibyte character. */
1296 {
1297 /* Make `bufp' point normal multi-byte form temporally. */
1298 *bufp -= 0x20;
1299 cmpcharp->glyph[i]
1300 = FAST_MAKE_GLYPH (string_to_non_ascii_char (bufp, 4, 0), 0);
1301 width = WIDTH_BY_CHAR_HEAD (*bufp);
1302 *bufp += 0x20;
1303 bufp += BYTES_BY_CHAR_HEAD (*bufp - 0x20);
1304 }
1305
1306 if (embedded_rule && i > 0)
1307 {
1308 /* Reference points (global_ref and new_ref) are
1309 encoded as below:
1310
1311 0--1--2 -- ascent
1312 | |
1313 | |
1314 | 4 -+--- center
1315 -- 3 5 -- baseline
1316 | |
1317 6--7--8 -- descent
1318
1319 Now, we calculate the column offset of the new glyph
1320 from the left edge of the first glyph. This can avoid
1321 the same calculation everytime displaying this
1322 composite character. */
1323
1324 /* Reference points of global glyph and new glyph. */
1325 int global_ref = (cmpcharp->cmp_rule[i] - 0xA0) / 9;
1326 int new_ref = (cmpcharp->cmp_rule[i] - 0xA0) % 9;
1327 /* Column offset relative to the first glyph. */
1328 float left = (leftmost
1329 + (global_ref % 3) * (rightmost - leftmost) / 2.0
1330 - (new_ref % 3) * width / 2.0);
1331
1332 cmpcharp->col_offset[i] = left;
1333 if (left < leftmost)
1334 leftmost = left;
1335 if (left + width > rightmost)
1336 rightmost = left + width;
1337 }
1338 else
1339 {
1340 if (width > rightmost)
1341 rightmost = width;
1342 }
1343 }
1344 if (embedded_rule)
1345 {
1346 /* Now col_offset[N] are relative to the left edge of the
1347 first component. Make them relative to the left edge of
1348 overall glyph. */
1349 for (i = 0; i < chars; i++)
1350 cmpcharp->col_offset[i] -= leftmost;
1351 /* Make rightmost holds width of overall glyph. */
1352 rightmost -= leftmost;
1353 }
1354
1355 cmpcharp->width = rightmost;
1356 if (cmpcharp->width < rightmost)
1357 /* To get a ceiling integer value. */
1358 cmpcharp->width++;
1359 }
1360
1361 cmpchar_table[n_cmpchars] = cmpcharp;
1362
1363 return n_cmpchars++;
1364}
1365
1366/* Return the Nth element of the composite character C. */
1367int
1368cmpchar_component (c, n)
1369 unsigned int c, n;
1370{
1371 int id = COMPOSITE_CHAR_ID (c);
1372
1373 if (id >= n_cmpchars /* C is not a valid composite character. */
1374 || n >= cmpchar_table[id]->glyph_len) /* No such component. */
1375 return -1;
1376 /* No face data is stored in glyph code. */
1377 return ((int) (cmpchar_table[id]->glyph[n]));
1378}
1379
1380DEFUN ("cmpcharp", Fcmpcharp, Scmpcharp, 1, 1, 0,
1381 "T if CHAR is a composite character.")
1382 (ch)
1383 Lisp_Object ch;
1384{
1385 CHECK_NUMBER (ch, 0);
1386 return (COMPOSITE_CHAR_P (XINT (ch)) ? Qt : Qnil);
1387}
1388
1389DEFUN ("composite-char-component", Fcmpchar_component, Scmpchar_component,
1390 2, 2, 0,
1391 "Return the IDXth component character of composite character CHARACTER.")
1392 (character, idx)
1393 Lisp_Object character, idx;
1394{
1395 int c;
1396
1397 CHECK_NUMBER (character, 0);
1398 CHECK_NUMBER (idx, 1);
1399
1400 if ((c = cmpchar_component (XINT (character), XINT (idx))) < 0)
1401 args_out_of_range (character, idx);
1402
1403 return make_number (c);
1404}
1405
1406DEFUN ("composite-char-composition-rule", Fcmpchar_cmp_rule, Scmpchar_cmp_rule,
1407 2, 2, 0,
55001746
KH
1408 "Return the Nth composition rule embedded in composite character CHARACTER.\n\
1409The returned rule is for composing the Nth component\n\
1410on the (N-1)th component. If N is 0, the returned value is always 255.")
1411 (character, n)
1412 Lisp_Object character, n;
4ed46869
KH
1413{
1414 int id, i;
1415
1416 CHECK_NUMBER (character, 0);
55001746 1417 CHECK_NUMBER (n, 1);
4ed46869
KH
1418
1419 id = COMPOSITE_CHAR_ID (XINT (character));
1420 if (id < 0 || id >= n_cmpchars)
1421 error ("Invalid composite character: %d", XINT (character));
55001746 1422 i = XINT (n);
4ed46869 1423 if (i > cmpchar_table[id]->glyph_len)
55001746 1424 args_out_of_range (character, n);
4ed46869
KH
1425
1426 return make_number (cmpchar_table[id]->cmp_rule[i]);
1427}
1428
1429DEFUN ("composite-char-composition-rule-p", Fcmpchar_cmp_rule_p,
1430 Scmpchar_cmp_rule_p, 1, 1, 0,
1431 "Return non-nil if composite character CHARACTER contains a embedded rule.")
1432 (character)
1433 Lisp_Object character;
1434{
1435 int id;
1436
1437 CHECK_NUMBER (character, 0);
1438 id = COMPOSITE_CHAR_ID (XINT (character));
1439 if (id < 0 || id >= n_cmpchars)
1440 error ("Invalid composite character: %d", XINT (character));
1441
1442 return (cmpchar_table[id]->cmp_rule ? Qt : Qnil);
1443}
1444
1445DEFUN ("composite-char-component-count", Fcmpchar_cmp_count,
1446 Scmpchar_cmp_count, 1, 1, 0,
1447 "Return number of compoents of composite character CHARACTER.")
1448 (character)
1449 Lisp_Object character;
1450{
1451 int id;
1452
1453 CHECK_NUMBER (character, 0);
1454 id = COMPOSITE_CHAR_ID (XINT (character));
1455 if (id < 0 || id >= n_cmpchars)
1456 error ("Invalid composite character: %d", XINT (character));
1457
1458 return (make_number (cmpchar_table[id]->glyph_len));
1459}
1460
1461DEFUN ("compose-string", Fcompose_string, Scompose_string,
1462 1, 1, 0,
1463 "Return one char string composed from all characters in STRING.")
1464 (str)
1465 Lisp_Object str;
1466{
1467 unsigned char buf[MAX_LENGTH_OF_MULTI_BYTE_FORM], *p, *pend, *ptemp;
1468 int len, i;
1469
1470 CHECK_STRING (str, 0);
1471
1472 buf[0] = LEADING_CODE_COMPOSITION;
1473 p = XSTRING (str)->data;
1474 pend = p + XSTRING (str)->size;
1475 i = 1;
1476 while (p < pend)
1477 {
1478 if (*p < 0x20 || *p == 127) /* control code */
1479 error ("Invalid component character: %d", *p);
1480 else if (*p < 0x80) /* ASCII */
1481 {
1482 if (i + 2 >= MAX_LENGTH_OF_MULTI_BYTE_FORM)
1483 error ("Too long string to be composed: %s", XSTRING (str)->data);
1484 /* Prepend an ASCII charset indicator 0xA0, set MSB of the
1485 code itself. */
1486 buf[i++] = 0xA0;
1487 buf[i++] = *p++ + 0x80;
1488 }
1489 else if (*p == LEADING_CODE_COMPOSITION) /* composite char */
1490 {
1491 /* Already composed. Eliminate the heading
1492 LEADING_CODE_COMPOSITION, keep the remaining bytes
1493 unchanged. */
1494 p++;
1495 ptemp = p;
1496 while (! CHAR_HEAD_P (p)) p++;
1497 if (i + (p - ptemp) >= MAX_LENGTH_OF_MULTI_BYTE_FORM)
1498 error ("Too long string to be composed: %s", XSTRING (str)->data);
1499 bcopy (ptemp, buf + i, p - ptemp);
1500 i += p - ptemp;
1501 }
1502 else /* multibyte char */
1503 {
1504 /* Add 0x20 to the base leading-code, keep the remaining
1505 bytes unchanged. */
1506 len = BYTES_BY_CHAR_HEAD (*p);
1507 if (i + len >= MAX_LENGTH_OF_MULTI_BYTE_FORM)
1508 error ("Too long string to be composed: %s", XSTRING (str)->data);
1509 bcopy (p, buf + i, len);
1510 buf[i] += 0x20;
1511 p += len, i += len;
1512 }
1513 }
1514
1515 if (i < 5)
1516 /* STR contains only one character, which can't be composed. */
1517 error ("Too short string to be composed: %s", XSTRING (str)->data);
1518
1519 return make_string (buf, i);
1520}
1521
1522\f
1523charset_id_internal (charset_name)
1524 char *charset_name;
1525{
1526 Lisp_Object val = Fget (intern (charset_name), Qcharset);
1527
1528 if (!VECTORP (val))
1529 error ("Charset %s is not defined", charset_name);
1530
1531 return (XINT (XVECTOR (val)->contents[0]));
1532}
1533
1534DEFUN ("setup-special-charsets", Fsetup_special_charsets,
1535 Ssetup_special_charsets, 0, 0, 0, "Internal use only.")
1536 ()
1537{
1538 charset_latin_iso8859_1 = charset_id_internal ("latin-iso8859-1");
1539 charset_jisx0208_1978 = charset_id_internal ("japanese-jisx0208-1978");
1540 charset_jisx0208 = charset_id_internal ("japanese-jisx0208");
1541 charset_katakana_jisx0201 = charset_id_internal ("katakana-jisx0201");
1542 charset_latin_jisx0201 = charset_id_internal ("latin-jisx0201");
1543 charset_big5_1 = charset_id_internal ("chinese-big5-1");
1544 charset_big5_2 = charset_id_internal ("chinese-big5-2");
1545 return Qnil;
1546}
1547
1548init_charset_once ()
1549{
1550 int i, j, k;
1551
1552 staticpro (&Vcharset_table);
1553 staticpro (&Vcharset_symbol_table);
1554
1555 /* This has to be done here, before we call Fmake_char_table. */
1556 Qcharset_table = intern ("charset-table");
1557 staticpro (&Qcharset_table);
1558
1559 /* Intern this now in case it isn't already done.
1560 Setting this variable twice is harmless.
1561 But don't staticpro it here--that is done in alloc.c. */
1562 Qchar_table_extra_slots = intern ("char-table-extra-slots");
1563
1564 /* Now we are ready to set up this property, so we can
1565 create the charset table. */
1566 Fput (Qcharset_table, Qchar_table_extra_slots, make_number (0));
1567 Vcharset_table = Fmake_char_table (Qcharset_table, Qnil);
1568
513ee442 1569 Vcharset_symbol_table = Fmake_vector (make_number (MAX_CHARSET + 1), Qnil);
4ed46869
KH
1570
1571 /* Setup tables. */
1572 for (i = 0; i < 2; i++)
1573 for (j = 0; j < 2; j++)
1574 for (k = 0; k < 128; k++)
1575 iso_charset_table [i][j][k] = -1;
1576
1577 bzero (cmpchar_hash_table, sizeof cmpchar_hash_table);
1578 cmpchar_table_size = n_cmpchars = 0;
1579
1580 for (i = 0; i < 256; i++)
1581 BYTES_BY_CHAR_HEAD (i) = 1;
1582 BYTES_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_11) = 3;
1583 BYTES_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_12) = 3;
1584 BYTES_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_21) = 4;
1585 BYTES_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_22) = 4;
1586 /* The following doesn't reflect the actual bytes, but just to tell
1587 that it is a start of a multibyte character. */
1588 BYTES_BY_CHAR_HEAD (LEADING_CODE_COMPOSITION) = 2;
1589
1590 for (i = 0; i < 128; i++)
1591 WIDTH_BY_CHAR_HEAD (i) = 1;
1592 for (; i < 256; i++)
1593 WIDTH_BY_CHAR_HEAD (i) = 4;
1594 WIDTH_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_11) = 1;
1595 WIDTH_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_12) = 2;
1596 WIDTH_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_21) = 1;
1597 WIDTH_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_22) = 2;
1598}
1599
1600#ifdef emacs
1601
1602syms_of_charset ()
1603{
1604 Qascii = intern ("ascii");
1605 staticpro (&Qascii);
1606
1607 Qcharset = intern ("charset");
1608 staticpro (&Qcharset);
1609
1610 /* Define ASCII charset now. */
1611 update_charset_table (make_number (CHARSET_ASCII),
1612 make_number (1), make_number (94),
1613 make_number (1),
1614 make_number (0),
1615 make_number ('B'),
1616 make_number (0),
1617 build_string ("ASCII"),
1618 build_string ("ASCII"),
1619 build_string ("ASCII (ISO646 IRV)"));
1620 CHARSET_SYMBOL (CHARSET_ASCII) = Qascii;
1621 Fput (Qascii, Qcharset, CHARSET_TABLE_ENTRY (CHARSET_ASCII));
1622
1623 Qcomposition = intern ("composition");
1624 staticpro (&Qcomposition);
1625 CHARSET_SYMBOL (CHARSET_COMPOSITION) = Qcomposition;
1626
1627 defsubr (&Sdefine_charset);
1628 defsubr (&Sdeclare_equiv_charset);
1629 defsubr (&Sfind_charset_region);
1630 defsubr (&Sfind_charset_string);
1631 defsubr (&Smake_char_internal);
1632 defsubr (&Ssplit_char);
1633 defsubr (&Schar_charset);
1634 defsubr (&Siso_charset);
9d3d8cba 1635 defsubr (&Schar_valid_p);
4ed46869
KH
1636 defsubr (&Schar_bytes);
1637 defsubr (&Schar_width);
1638 defsubr (&Sstring_width);
1639 defsubr (&Schar_direction);
1640 defsubr (&Schars_in_string);
af4fecb4 1641 defsubr (&Schars_in_region);
4ed46869
KH
1642 defsubr (&Schar_boundary_p);
1643 defsubr (&Sconcat_chars);
1644 defsubr (&Scmpcharp);
1645 defsubr (&Scmpchar_component);
1646 defsubr (&Scmpchar_cmp_rule);
1647 defsubr (&Scmpchar_cmp_rule_p);
1648 defsubr (&Scmpchar_cmp_count);
1649 defsubr (&Scompose_string);
1650 defsubr (&Ssetup_special_charsets);
1651
1652 DEFVAR_LISP ("charset-list", &Vcharset_list,
1653 "List of charsets ever defined.");
1654 Vcharset_list = Fcons (Qascii, Qnil);
1655
1656 DEFVAR_INT ("leading-code-composition", &leading_code_composition,
1657 "Leading-code of composite characters.");
1658 leading_code_composition = LEADING_CODE_COMPOSITION;
1659
1660 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11,
1661 "Leading-code of private TYPE9N charset of column-width 1.");
1662 leading_code_private_11 = LEADING_CODE_PRIVATE_11;
1663
1664 DEFVAR_INT ("leading-code-private-12", &leading_code_private_12,
1665 "Leading-code of private TYPE9N charset of column-width 2.");
1666 leading_code_private_12 = LEADING_CODE_PRIVATE_12;
1667
1668 DEFVAR_INT ("leading-code-private-21", &leading_code_private_21,
1669 "Leading-code of private TYPE9Nx9N charset of column-width 1.");
1670 leading_code_private_21 = LEADING_CODE_PRIVATE_21;
1671
1672 DEFVAR_INT ("leading-code-private-22", &leading_code_private_22,
1673 "Leading-code of private TYPE9Nx9N charset of column-width 2.");
1674 leading_code_private_22 = LEADING_CODE_PRIVATE_22;
1675}
1676
1677#endif /* emacs */