Initial revision
[bpt/emacs.git] / src / charset.c
CommitLineData
4ed46869
KH
1/* Multilingual characters handler.
2 Ver.1.0
3
4 Copyright (C) 1995 Free Software Foundation, Inc.
5 Copyright (C) 1995 Electrotechnical Laboratory, JAPAN.
6
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
11
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
20
21/* At first, see the document in `charset.h' to understand the code in
22 this file. */
23
24#include <stdio.h>
25
26#ifdef emacs
27
28#include <sys/types.h>
29#include <config.h>
30#include "lisp.h"
31#include "buffer.h"
32#include "charset.h"
33#include "coding.h"
34
35#else /* not emacs */
36
37#include "mulelib.h"
38
39#endif /* emacs */
40
41Lisp_Object Qcharset, Qascii, Qcomposition;
42
43/* Declaration of special leading-codes. */
44int leading_code_composition; /* for composite characters */
45int leading_code_private_11; /* for private DIMENSION1 of 1-column */
46int leading_code_private_12; /* for private DIMENSION1 of 2-column */
47int leading_code_private_21; /* for private DIMENSION2 of 1-column */
48int leading_code_private_22; /* for private DIMENSION2 of 2-column */
49
50/* Declaration of special charsets. */
51int charset_ascii; /* ASCII */
52int charset_composition; /* for a composite character */
53int charset_latin_iso8859_1; /* ISO8859-1 (Latin-1) */
54int charset_jisx0208_1978; /* JISX0208.1978 (Japanese Kanji old set) */
55int charset_jisx0208; /* JISX0208.1983 (Japanese Kanji) */
56int charset_katakana_jisx0201; /* JISX0201.Kana (Japanese Katakana) */
57int charset_latin_jisx0201; /* JISX0201.Roman (Japanese Roman) */
58int charset_big5_1; /* Big5 Level 1 (Chinese Traditional) */
59int charset_big5_2; /* Big5 Level 2 (Chinese Traditional) */
60
61Lisp_Object Qcharset_table;
62
63/* A char-table containing information of each character set. */
64Lisp_Object Vcharset_table;
65
66/* A vector of charset symbol indexed by charset-id. This is used
67 only for returning charset symbol from C functions. */
68Lisp_Object Vcharset_symbol_table;
69
70/* A list of charset symbols ever defined. */
71Lisp_Object Vcharset_list;
72
73/* Tables used by macros BYTES_BY_CHAR_HEAD and WIDTH_BY_CHAR_HEAD. */
74int bytes_by_char_head[256];
75int width_by_char_head[256];
76
77/* Mapping table from ISO2022's charset (specified by DIMENSION,
78 CHARS, and FINAL-CHAR) to Emacs' charset. */
79int iso_charset_table[2][2][128];
80
81/* Variables used locally in the macro FETCH_MULTIBYTE_CHAR. */
82unsigned char *_fetch_multibyte_char_p;
83int _fetch_multibyte_char_len;
84
85/* Set STR a pointer to the multi-byte form of the character C. If C
86 is not a composite character, the multi-byte form is set in WORKBUF
87 and STR points WORKBUF. The caller should allocate at least 4-byte
88 area at WORKBUF in advance. Returns the length of the multi-byte
89 form.
90
91 Use macro `CHAR_STRING (C, WORKBUF, STR)' instead of calling this
92 function directly if C can be an ASCII character. */
93
94int
95non_ascii_char_to_string (c, workbuf, str)
96 int c;
97 unsigned char *workbuf, **str;
98{
99 int charset;
100 unsigned char c1, c2;
101
102 if (COMPOSITE_CHAR_P (c))
103 {
104 int cmpchar_id = COMPOSITE_CHAR_ID (c);
105
106 if (cmpchar_id < n_cmpchars)
107 {
108 *str = cmpchar_table[cmpchar_id]->data;
109 return cmpchar_table[cmpchar_id]->len;
110 }
111 else
112 {
113 *str = workbuf;
114 return 0;
115 }
116 }
117
118 SPLIT_NON_ASCII_CHAR (c, charset, c1, c2);
119
120 *str = workbuf;
121 *workbuf++ = CHARSET_LEADING_CODE_BASE (charset);
122 if (*workbuf = CHARSET_LEADING_CODE_EXT (charset))
123 workbuf++;
124 *workbuf++ = c1 | 0x80;
125 if (c2)
126 *workbuf++ = c2 | 0x80;
127
128 return (workbuf - *str);
129}
130
131/* Return a non-ASCII character of which multi-byte form is at STR of
132 length LEN. If ACTUAL_LEN is not NULL, the actual length of the
133 character is set to the address ACTUAL_LEN.
134
135 Use macro `STRING_CHAR (STR, LEN)' instead of calling this function
136 directly if STR can hold an ASCII character. */
137
138string_to_non_ascii_char (str, len, actual_len)
139 unsigned char *str;
140 int len, *actual_len;
141{
142 int charset;
143 unsigned char c1, c2;
144 register int c;
145
146 if (SPLIT_STRING (str, len, charset, c1, c2) == CHARSET_ASCII)
147 {
148 if (actual_len)
149 *actual_len = 1;
150 return (int) *str;
151 }
152
153 c = MAKE_NON_ASCII_CHAR (charset, c1, c2);
154
155 if (actual_len)
156 *actual_len = (charset == CHARSET_COMPOSITION
157 ? cmpchar_table[COMPOSITE_CHAR_ID (c)]->len
158 : BYTES_BY_CHAR_HEAD (*str));
159 return c;
160}
161
162/* Return the length of the multi-byte form at string STR of length LEN. */
163int
164multibyte_form_length (str, len)
165 unsigned char *str;
166 int len;
167{
168 int charset;
169 unsigned char c1, c2;
170 register int c;
171
172 if (SPLIT_STRING (str, len, charset, c1, c2) == CHARSET_ASCII)
173 return 1;
174
175 return (charset == CHARSET_COMPOSITION
176 ? cmpchar_table[(c1 << 7) | c2]->len
177 : BYTES_BY_CHAR_HEAD (*str));
178}
179
180/* Check if string STR of length LEN contains valid multi-byte form of
181 a character. If valid, charset and position codes of the character
182 is set at *CHARSET, *C1, and *C2, and return 0. If not valid,
183 return -1. This should be used only in the macro SPLIT_STRING
184 which checks range of STR in advance. */
185
186split_non_ascii_string (str, len, charset, c1, c2)
187 register unsigned char *str, *c1, *c2;
188 register int len, *charset;
189{
190 register unsigned int cs = *str++;
191
192 if (cs == LEADING_CODE_COMPOSITION)
193 {
194 int cmpchar_id = str_cmpchar_id (str - 1, len);
195
196 if (cmpchar_id < 0)
197 return -1;
198 *charset = cs, *c1 = cmpchar_id >> 7, *c2 = cmpchar_id & 0x7F;
199 }
200 else if ((cs < LEADING_CODE_PRIVATE_11 || (cs = *str++) >= 0xA0)
201 && CHARSET_DEFINED_P (cs))
202 {
203 *charset = cs;
204 if (*str < 0xA0)
205 return -1;
206 *c1 = (*str++) & 0x7F;
207 if (CHARSET_DIMENSION (cs) == 2)
208 {
209 if (*str < 0xA0)
210 return -1;
211 *c2 = (*str++) & 0x7F;
212 }
213 }
214 else
215 return -1;
216 return 0;
217}
218
219/* Update the table Vcharset_table with the given arguments (see the
220 document of `define-charset' for the meaning of each argument).
221 Several other table contents are also updated. The caller should
222 check the validity of CHARSET-ID and the remaining arguments in
223 advance. */
224
225void
226update_charset_table (charset_id, dimension, chars, width, direction,
227 iso_final_char, iso_graphic_plane,
228 short_name, long_name, description)
229 Lisp_Object charset_id, dimension, chars, width, direction;
230 Lisp_Object iso_final_char, iso_graphic_plane;
231 Lisp_Object short_name, long_name, description;
232{
233 int charset = XINT (charset_id);
234 int bytes;
235 unsigned char leading_code_base, leading_code_ext;
236
237 if (NILP (Faref (Vcharset_table, charset_id)))
238 Faset (Vcharset_table, charset_id,
239 Fmake_vector (make_number (CHARSET_MAX_IDX), Qnil));
240
241 /* Get byte length of multibyte form, base leading-code, and
242 extended leading-code of the charset. See the comment under the
243 title "GENERAL NOTE on CHARACTER SET (CHARSET)" in charset.h. */
244 bytes = XINT (dimension);
245 if (charset < MIN_CHARSET_PRIVATE_DIMENSION1)
246 {
247 /* Official charset, it doesn't have an extended leading-code. */
248 if (charset != CHARSET_ASCII)
249 bytes += 1; /* For a base leading-code. */
250 leading_code_base = charset;
251 leading_code_ext = 0;
252 }
253 else
254 {
255 /* Private charset. */
256 bytes += 2; /* For base and extended leading-codes. */
257 leading_code_base
258 = (charset < LEADING_CODE_EXT_12
259 ? LEADING_CODE_PRIVATE_11
260 : (charset < LEADING_CODE_EXT_21
261 ? LEADING_CODE_PRIVATE_12
262 : (charset < LEADING_CODE_EXT_22
263 ? LEADING_CODE_PRIVATE_21
264 : LEADING_CODE_PRIVATE_22)));
265 leading_code_ext = charset;
266 }
267
268 CHARSET_TABLE_INFO (charset, CHARSET_ID_IDX) = charset_id;
269 CHARSET_TABLE_INFO (charset, CHARSET_BYTES_IDX) = make_number (bytes);
270 CHARSET_TABLE_INFO (charset, CHARSET_DIMENSION_IDX) = dimension;
271 CHARSET_TABLE_INFO (charset, CHARSET_CHARS_IDX) = chars;
272 CHARSET_TABLE_INFO (charset, CHARSET_WIDTH_IDX) = width;
273 CHARSET_TABLE_INFO (charset, CHARSET_DIRECTION_IDX) = direction;
274 CHARSET_TABLE_INFO (charset, CHARSET_LEADING_CODE_BASE_IDX)
275 = make_number (leading_code_base);
276 CHARSET_TABLE_INFO (charset, CHARSET_LEADING_CODE_EXT_IDX)
277 = make_number (leading_code_ext);
278 CHARSET_TABLE_INFO (charset, CHARSET_ISO_FINAL_CHAR_IDX) = iso_final_char;
279 CHARSET_TABLE_INFO (charset, CHARSET_ISO_GRAPHIC_PLANE_IDX)
280 = iso_graphic_plane;
281 CHARSET_TABLE_INFO (charset, CHARSET_SHORT_NAME_IDX) = short_name;
282 CHARSET_TABLE_INFO (charset, CHARSET_LONG_NAME_IDX) = long_name;
283 CHARSET_TABLE_INFO (charset, CHARSET_DESCRIPTION_IDX) = description;
284 CHARSET_TABLE_INFO (charset, CHARSET_PLIST_IDX) = Qnil;
285
286 {
287 /* If we have already defined a charset which has the same
288 DIMENSION, CHARS and ISO-FINAL-CHAR but the different
289 DIRECTION, we must update the entry REVERSE-CHARSET of both
290 charsets. If there's no such charset, the value of the entry
291 is set to nil. */
292 int i;
293
294 for (i = 0; i < MAX_CHARSET; i++)
295 if (!NILP (CHARSET_TABLE_ENTRY (i)))
296 {
297 if (CHARSET_DIMENSION (i) == XINT (dimension)
298 && CHARSET_CHARS (i) == XINT (chars)
299 && CHARSET_ISO_FINAL_CHAR (i) == XINT (iso_final_char)
300 && CHARSET_DIRECTION (i) != XINT (direction))
301 {
302 CHARSET_TABLE_INFO (charset, CHARSET_REVERSE_CHARSET_IDX)
303 = make_number (i);
304 CHARSET_TABLE_INFO (i, CHARSET_REVERSE_CHARSET_IDX) = charset_id;
305 break;
306 }
307 }
308 if (i >= MAX_CHARSET)
309 /* No such a charset. */
310 CHARSET_TABLE_INFO (charset, CHARSET_REVERSE_CHARSET_IDX)
311 = make_number (-1);
312 }
313
314 if (charset != CHARSET_ASCII
315 && charset < MIN_CHARSET_PRIVATE_DIMENSION1)
316 {
317 /* Update tables bytes_by_char_head and width_by_char_head. */
318 bytes_by_char_head[leading_code_base] = bytes;
319 width_by_char_head[leading_code_base] = XINT (width);
320
321 /* Update table emacs_code_class. */
322 emacs_code_class[charset] = (bytes == 2
323 ? EMACS_leading_code_2
324 : (bytes == 3
325 ? EMACS_leading_code_3
326 : EMACS_leading_code_4));
327 }
328
329 /* Update table iso_charset_table. */
330 if (ISO_CHARSET_TABLE (dimension, chars, iso_final_char) < 0)
331 ISO_CHARSET_TABLE (dimension, chars, iso_final_char) = charset;
332}
333
334#ifdef emacs
335
336/* Return charset id of CHARSET_SYMBOL, or return -1 if CHARSET_SYMBOL
337 is invalid. */
338int
339get_charset_id (charset_symbol)
340 Lisp_Object charset_symbol;
341{
342 Lisp_Object val;
343 int charset;
344
345 return ((SYMBOLP (charset_symbol)
346 && (val = Fget (charset_symbol, Qcharset), VECTORP (val))
347 && (charset = XINT (XVECTOR (val)->contents[CHARSET_ID_IDX]),
348 CHARSET_VALID_P (charset)))
349 ? charset : -1);
350}
351
352/* Return an identification number for a new private charset of
353 DIMENSION and WIDTH. If there's no more room for the new charset,
354 return 0. */
355Lisp_Object
356get_new_private_charset_id (dimension, width)
357 int dimension, width;
358{
359 int charset, from, to;
360
361 if (dimension == 1)
362 {
363 if (width == 1)
364 from = LEADING_CODE_EXT_11, to = LEADING_CODE_EXT_12;
365 else
366 from = LEADING_CODE_EXT_12, to = LEADING_CODE_EXT_21;
367 }
368 else
369 {
370 if (width == 1)
371 from = LEADING_CODE_EXT_21, to = LEADING_CODE_EXT_22;
372 else
373 from = LEADING_CODE_EXT_22, to = LEADING_CODE_EXT_MAX - 1;
374 }
375
376 for (charset = from; charset < to; charset++)
377 if (!CHARSET_DEFINED_P (charset)) break;
378
379 return make_number (charset < to ? charset : 0);
380}
381
382DEFUN ("define-charset", Fdefine_charset, Sdefine_charset, 3, 3, 0,
383 "Define CHARSET-ID as the identification number of CHARSET with INFO-VECTOR.\n\
384If CHARSET-ID is nil, it is set automatically, which means CHARSET is\n\
385 treated as a private charset.\n\
386INFO-VECTOR is a vector of the format:\n\
387 [DIMENSION CHARS WIDTH DIRECTION ISO-FINAL-CHAR ISO-GRAPHIC-PLANE\n\
388 SHORT-NAME LONG-NAME DESCRIPTION]\n\
389The meanings of each elements is as follows:\n\
390DIMENSION (integer) is the number of bytes to represent a character: 1 or 2.\n\
391CHARS (integer) is the number of characters in a dimension: 94 or 96.\n\
392WIDTH (integer) is the number of columns a character in the charset\n\
393occupies on the screen: one of 0, 1, and 2.\n\
394\n\
395DIRECTION (integer) is the rendering direction of characters in the\n\
396charset when rendering. If 0, render from right to left, else\n\
397render from left to right.\n\
398\n\
399ISO-FINAL-CHAR (character) is the final character of the\n\
400corresponding ISO 2022 charset.\n\
401\n\
402ISO-GRAPHIC-PLANE (integer) is the graphic plane to be invoked\n\
403while encoding to variants of ISO 2022 coding system, one of the\n\
404following: 0/graphic-plane-left(GL), 1/graphic-plane-right(GR).\n\
405\n\
406SHORT-NAME (string) is the short name to refer to the charset.\n\
407\n\
408LONG-NAME (string) is the long name to refer to the charset.\n\
409\n\
410DESCRIPTION (string) is the description string of the charset.")
411 (charset_id, charset_symbol, info_vector)
412 Lisp_Object charset_id, charset_symbol, info_vector;
413{
414 Lisp_Object *vec;
415
416 if (!NILP (charset_id))
417 CHECK_NUMBER (charset_id, 0);
418 CHECK_SYMBOL (charset_symbol, 1);
419 CHECK_VECTOR (info_vector, 2);
420
421 if (! NILP (charset_id))
422 {
423 if (! CHARSET_VALID_P (XINT (charset_id)))
424 error ("Invalid CHARSET: %d", XINT (charset_id));
425 else if (CHARSET_DEFINED_P (XINT (charset_id)))
426 error ("Already defined charset: %d", XINT (charset_id));
427 }
428
429 vec = XVECTOR (info_vector)->contents;
430 if (XVECTOR (info_vector)->size != 9
431 || !INTEGERP (vec[0]) || !(XINT (vec[0]) == 1 || XINT (vec[0]) == 2)
432 || !INTEGERP (vec[1]) || !(XINT (vec[1]) == 94 || XINT (vec[1]) == 96)
433 || !INTEGERP (vec[2]) || !(XINT (vec[2]) == 1 || XINT (vec[2]) == 2)
434 || !INTEGERP (vec[3]) || !(XINT (vec[3]) == 0 || XINT (vec[3]) == 1)
435 || !INTEGERP (vec[4]) || !(XINT (vec[4]) >= '0' && XINT (vec[4]) <= '~')
436 || !INTEGERP (vec[5]) || !(XINT (vec[5]) == 0 || XINT (vec[5]) == 1)
437 || !STRINGP (vec[6])
438 || !STRINGP (vec[7])
439 || !STRINGP (vec[8]))
440 error ("Invalid info-vector argument for defining charset %s",
441 XSYMBOL (charset_symbol)->name->data);
442
443 if (NILP (charset_id))
444 {
445 charset_id = get_new_private_charset_id (XINT (vec[0]), XINT (vec[2]));
446 if (XINT (charset_id) == 0)
447 error ("There's no room for a new private charset %s",
448 XSYMBOL (charset_symbol)->name->data);
449 }
450
451 update_charset_table (charset_id, vec[0], vec[1], vec[2], vec[3],
452 vec[4], vec[5], vec[6], vec[7], vec[8]);
453 Fput (charset_symbol, Qcharset, Faref (Vcharset_table, charset_id));
454 CHARSET_SYMBOL (XINT (charset_id)) = charset_symbol;
455 Vcharset_list = Fcons (charset_symbol, Vcharset_list);
456 return Qnil;
457}
458
459DEFUN ("declare-equiv-charset", Fdeclare_equiv_charset, Sdeclare_equiv_charset,
460 4, 4, 0,
461 "Declare a charset of DIMENSION, CHARS, FINAL-CHAR is the same as CHARSET.\n\
462CHARSET should be defined by `defined-charset' in advance.")
463 (dimension, chars, final_char, charset_symbol)
464 Lisp_Object dimension, chars, final_char, charset_symbol;
465{
466 int charset;
467
468 CHECK_NUMBER (dimension, 0);
469 CHECK_NUMBER (chars, 1);
470 CHECK_NUMBER (final_char, 2);
471 CHECK_SYMBOL (charset_symbol, 3);
472
473 if (XINT (dimension) != 1 && XINT (dimension) != 2)
474 error ("Invalid DIMENSION %d, it should be 1 or 2", XINT (dimension));
475 if (XINT (chars) != 94 && XINT (chars) != 96)
476 error ("Invalid CHARS %d, it should be 94 or 96", XINT (chars));
477 if (XINT (final_char) < '0' || XFASTINT (final_char) > '~')
478 error ("Invalid FINAL-CHAR %c, it should be `0'..`~'", XINT (chars));
479 if ((charset = get_charset_id (charset_symbol)) < 0)
480 error ("Invalid charset %s", XSYMBOL (charset_symbol)->name->data);
481
482 ISO_CHARSET_TABLE (dimension, chars, final_char) = charset;
483 return Qnil;
484}
485
486/* Return number of different charsets in STR of length LEN. In
487 addition, for each found charset N, CHARSETS[N] is set 1. The
488 caller should allocate CHARSETS (MAX_CHARSET bytes) in advance. */
489
490int
491find_charset_in_str (str, len, charsets)
492 unsigned char *str, *charsets;
493 int len;
494{
495 int num = 0;
496
497 while (len > 0)
498 {
499 int bytes = BYTES_BY_CHAR_HEAD (*str);
500 int charset = CHARSET_AT (str);
501
502 if (!charsets[charset])
503 {
504 charsets[charset] = 1;
505 num += 1;
506 }
507 str += bytes;
508 len -= bytes;
509 }
510 return num;
511}
512
513DEFUN ("find-charset-region", Ffind_charset_region, Sfind_charset_region,
514 2, 2, 0,
515 "Return a list of charsets in the region between BEG and END.\n\
516BEG and END are buffer positions.")
517 (beg, end)
518 Lisp_Object beg, end;
519{
520 char charsets[MAX_CHARSET];
521 int from, to, stop, i;
522 Lisp_Object val;
523
524 validate_region (&beg, &end);
525 from = XFASTINT (beg);
526 stop = to = XFASTINT (end);
527 if (from < GPT && GPT < to)
528 stop = GPT;
529 bzero (charsets, MAX_CHARSET);
530 while (1)
531 {
532 find_charset_in_str (POS_ADDR (from), stop - from, charsets);
533 if (stop < to)
534 from = stop, stop = to;
535 else
536 break;
537 }
538 val = Qnil;
539 for (i = MAX_CHARSET - 1; i >= 0; i--)
540 if (charsets[i])
541 val = Fcons (CHARSET_SYMBOL (i), val);
542 return val;
543}
544
545DEFUN ("find-charset-string", Ffind_charset_string, Sfind_charset_string,
546 1, 1, 0,
547 "Return a list of charsets in STR.")
548 (str)
549 Lisp_Object str;
550{
551 char charsets[MAX_CHARSET];
552 int i;
553 Lisp_Object val;
554
555 CHECK_STRING (str, 0);
556 bzero (charsets, MAX_CHARSET);
557 find_charset_in_str (XSTRING (str)->data, XSTRING (str)->size, charsets);
558 val = Qnil;
559 for (i = MAX_CHARSET - 1; i >= 0; i--)
560 if (charsets[i])
561 val = Fcons (CHARSET_SYMBOL (i), val);
562 return val;
563}
564\f
565DEFUN ("make-char-internal", Fmake_char_internal, Smake_char_internal, 1, 3, 0,
566 "Return a character of CHARSET and position-codes CODE1 and CODE2.\n\
567CODE1 and CODE2 are optional, but if you don't supply\n\
568 sufficient position-codes, return a generic character which stands for\n\
569all characters or group of characters in the character sets.\n\
570A generic character can be an argument of `modify-syntax-entry' and\n\
571`modify-category-entry'.")
572 (charset, code1, code2)
573 Lisp_Object charset, code1, code2;
574{
575 CHECK_NUMBER (charset, 0);
576
577 if (NILP (code1))
578 XSETFASTINT (code1, 0);
579 else
580 CHECK_NUMBER (code1, 1);
581 if (NILP (code2))
582 XSETFASTINT (code2, 0);
583 else
584 CHECK_NUMBER (code2, 2);
585
586 if (!CHARSET_DEFINED_P (XINT (charset)))
587 error ("Invalid charset: %d", XINT (charset));
588
589 return make_number (MAKE_CHAR (XINT (charset), XINT (code1), XINT (code2)));
590}
591
592DEFUN ("split-char", Fsplit_char, Ssplit_char, 1, 1, 0,
593 "Return list of charset and one or two position-codes of CHAR.")
594 (ch)
595 Lisp_Object ch;
596{
597 Lisp_Object val;
598 int charset;
599 unsigned char c1, c2;
600
601 CHECK_NUMBER (ch, 0);
602 SPLIT_CHAR (XFASTINT (ch), charset, c1, c2);
603 return ((charset == CHARSET_COMPOSITION || CHARSET_DIMENSION (charset) == 2)
604 ? Fcons (CHARSET_SYMBOL (charset),
605 Fcons (make_number (c1), Fcons (make_number (c2), Qnil)))
606 : Fcons (CHARSET_SYMBOL (charset), Fcons (make_number (c1), Qnil)));
607}
608
609DEFUN ("char-charset", Fchar_charset, Schar_charset, 1, 1, 0,
610 "Return charset of CHAR.")
611 (ch)
612 Lisp_Object ch;
613{
614 CHECK_NUMBER (ch, 0);
615
616 return CHARSET_SYMBOL (CHAR_CHARSET (XINT (ch)));
617}
618
619DEFUN ("iso-charset", Fiso_charset, Siso_charset, 3, 3, 0,
620 "Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR.")
621 (dimension, chars, final_char)
622 Lisp_Object dimension, chars, final_char;
623{
624 int charset;
625
626 CHECK_NUMBER (dimension, 0);
627 CHECK_NUMBER (chars, 1);
628 CHECK_NUMBER (final_char, 2);
629
630 if ((charset = ISO_CHARSET_TABLE (dimension, chars, final_char)) < 0)
631 return Qnil;
632 return CHARSET_SYMBOL (charset);
633}
634
635DEFUN ("char-bytes", Fchar_bytes, Schar_bytes, 1, 1, 0,
636 "Return byte length of multi-byte form of CHAR.")
637 (ch)
638 Lisp_Object ch;
639{
640 Lisp_Object val;
641 int bytes;
642
643 CHECK_NUMBER (ch, 0);
644 if (COMPOSITE_CHAR_P (XFASTINT (ch)))
645 {
646 unsigned int id = COMPOSITE_CHAR_ID (XFASTINT (ch));
647
648 bytes = (id < n_cmpchars ? cmpchar_table[id]->len : 1);
649 }
650 else
651 {
652 int charset = CHAR_CHARSET (XFASTINT (ch));
653
654 bytes = CHARSET_DEFINED_P (charset) ? CHARSET_BYTES (charset) : 1;
655 }
656
657 XSETFASTINT (val, bytes);
658 return val;
659}
660
661/* Return the width of character of which multi-byte form starts with
662 C. The width is measured by how many columns occupied on the
663 screen when displayed in the current buffer. */
664
665#define ONE_BYTE_CHAR_WIDTH(c) \
666 (c < 0x20 \
667 ? (c == '\t' \
668 ? current_buffer->tab_width \
669 : (c == '\n' ? 0 : (NILP (current_buffer->ctl_arrow) ? 4 : 2))) \
670 : (c < 0x7f \
671 ? 1 \
672 : (c == 0x7F \
673 ? (NILP (current_buffer->ctl_arrow) ? 4 : 2) \
674 : ((! NILP (current_buffer->enable_multibyte_characters) \
675 && BASE_LEADING_CODE_P (c)) \
676 ? WIDTH_BY_CHAR_HEAD (c) \
677 : 4)))) \
678
679
680DEFUN ("char-width", Fchar_width, Schar_width, 1, 1, 0,
681 "Return width of CHAR when displayed in the current buffer.\n\
682The width is measured by how many columns it occupies on the screen.")
683 (ch)
684 Lisp_Object ch;
685{
686 Lisp_Object val;
687 int c;
688
689 CHECK_NUMBER (ch, 0);
690
691 c = XFASTINT (ch);
692 if (SINGLE_BYTE_CHAR_P (c))
693 XSETFASTINT (val, ONE_BYTE_CHAR_WIDTH (c));
694 else if (COMPOSITE_CHAR_P (c))
695 {
696 int id = COMPOSITE_CHAR_ID (XFASTINT (ch));
697 XSETFASTINT (val, (id < n_cmpchars ? cmpchar_table[id]->width : 0));
698 }
699 else
700 {
701 int charset = CHAR_CHARSET (c);
702
703 XSETFASTINT (val, CHARSET_WIDTH (charset));
704 }
705 return val;
706}
707
708/* Return width of string STR of length LEN when displayed in the
709 current buffer. The width is measured by how many columns it
710 occupies on the screen. */
711int
712strwidth (str, len)
713 unsigned char *str;
714 int len;
715{
716 unsigned char *endp = str + len;
717 int width = 0;
718
719 while (str < endp) {
720 if (*str == LEADING_CODE_COMPOSITION)
721 {
722 int id = str_cmpchar_id (str, endp - str);
723
724 if (id < 0)
725 {
726 width += 4;
727 str++;
728 }
729 else
730 {
731 width += cmpchar_table[id]->width;
732 str += cmpchar_table[id]->len;
733 }
734 }
735 else
736 {
737 width += ONE_BYTE_CHAR_WIDTH (*str);
738 str += BYTES_BY_CHAR_HEAD (*str);
739 }
740 }
741 return width;
742}
743
744DEFUN ("string-width", Fstring_width, Sstring_width, 1, 1, 0,
745 "Return width of STRING when displayed in the current buffer.\n\
746Width is measured by how many columns it occupies on the screen.\n\
747When calculating width of a multi-byte character in STRING,\n\
748 only the base leading-code is considered and the validity of\n\
749 the following bytes are not checked.")
750 (str)
751 Lisp_Object str;
752{
753 Lisp_Object val;
754
755 CHECK_STRING (str, 0);
756 XSETFASTINT (val, strwidth (XSTRING (str)->data, XSTRING (str)->size));
757 return val;
758}
759
760DEFUN ("char-direction", Fchar_direction, Schar_direction, 1, 1, 0,
761 "Return the direction of CHAR.\n\
762The returned value is 0 for left-to-right and 1 for right-to-left.")
763 (ch)
764 Lisp_Object ch;
765{
766 int charset;
767
768 CHECK_NUMBER (ch, 0);
769 charset = CHAR_CHARSET (XFASTINT (ch));
770 if (!CHARSET_DEFINED_P (charset))
771 error ("Invalid character: %d", XINT (ch));
772 return CHARSET_TABLE_INFO (charset, CHARSET_DIRECTION_IDX);
773}
774
775DEFUN ("chars-in-string", Fchars_in_string, Schars_in_string, 1, 1, 0,
776 "Return number of characters in STRING.")
777 (str)
778 Lisp_Object str;
779{
780 Lisp_Object val;
781 unsigned char *p, *endp;
782 int chars;
783
784 CHECK_STRING (str, 0);
785
786 p = XSTRING (str)->data; endp = p + XSTRING (str)->size;
787 chars = 0;
788 while (p < endp)
789 {
790 if (*p == LEADING_CODE_COMPOSITION)
791 {
792 p++;
793 while (p < endp && ! CHAR_HEAD_P (p)) p++;
794 }
795 else
796 p += BYTES_BY_CHAR_HEAD (*p);
797 chars++;
798 }
799
800 XSETFASTINT (val, chars);
801 return val;
802}
803
804DEFUN ("char-boundary-p", Fchar_boundary_p, Schar_boundary_p, 1, 1, 0,
805 "Return non-nil value if POS is at character boundary of multibyte form.\n\
806The return value is:\n\
807 0 if POS is at an ASCII character or at the end of range,\n\
808 1 if POS is at a head of 2-byte length multi-byte form,\n\
809 2 if POS is at a head of 3-byte length multi-byte form,\n\
810 3 if POS is at a head of 4-byte length multi-byte form,\n\
811 4 if POS is at a head of multi-byte form of a composite character.\n\
812If POS is out of range or not at character boundary, return NIL.")
813 (pos)
814 Lisp_Object pos;
815{
816 Lisp_Object val;
817 int n;
818
819 CHECK_NUMBER_COERCE_MARKER (pos, 0);
820
821 n = XINT (pos);
822 if (n < BEGV || n > ZV)
823 return Qnil;
824
825 if (n == ZV || NILP (current_buffer->enable_multibyte_characters))
826 XSETFASTINT (val, 0);
827 else
828 {
829 unsigned char *p = POS_ADDR (n);
830
831 if (SINGLE_BYTE_CHAR_P (*p))
832 XSETFASTINT (val, 0);
833 else if (*p == LEADING_CODE_COMPOSITION)
834 XSETFASTINT (val, 4);
835 else if (BYTES_BY_CHAR_HEAD (*p) > 1)
836 XSETFASTINT (val, BYTES_BY_CHAR_HEAD (*p) - 1);
837 else
838 val = Qnil;
839 }
840 return val;
841}
842
843DEFUN ("concat-chars", Fconcat_chars, Sconcat_chars, 1, MANY, 0,
844 "Concatenate all the argument characters and make the result a string.")
845 (nargs, args)
846 int nargs;
847 Lisp_Object *args;
848{
849 int i, n = XINT (nargs);
850 unsigned char *buf
851 = (unsigned char *) malloc (MAX_LENGTH_OF_MULTI_BYTE_FORM * n);
852 unsigned char *p = buf;
853 Lisp_Object val;
854
855 for (i = 0; i < n; i++)
856 {
857 int c, len;
858 unsigned char *str;
859
860 if (!INTEGERP (args[i]))
861 {
862 free (buf);
863 CHECK_NUMBER (args[i], 0);
864 }
865 c = XINT (args[i]);
866 len = CHAR_STRING (c, p, str);
867 if (p != str)
868 /* C is a composite character. */
869 bcopy (str, p, len);
870 p += len;
871 }
872
873 val = make_string (buf, p - buf);
874 free (buf);
875 return val;
876}
877
878#endif /* emacs */
879\f
880/*** Composite characters staffs ***/
881
882/* Each composite character is identified by CMPCHAR-ID which is
883 assigned when Emacs needs the character code of the composite
884 character (e.g. when displaying it on the screen). See the
885 document "GENERAL NOTE on COMPOSITE CHARACTER" in `charset.h' how a
886 composite character is represented in Emacs. */
887
888/* If `static' is defined, it means that it is defined to null string. */
889#ifndef static
890/* The following function is copied from lread.c. */
891static int
892hash_string (ptr, len)
893 unsigned char *ptr;
894 int len;
895{
896 register unsigned char *p = ptr;
897 register unsigned char *end = p + len;
898 register unsigned char c;
899 register int hash = 0;
900
901 while (p != end)
902 {
903 c = *p++;
904 if (c >= 0140) c -= 40;
905 hash = ((hash<<3) + (hash>>28) + c);
906 }
907 return hash & 07777777777;
908}
909#endif
910
911/* Table of pointers to the structure `cmpchar_info' indexed by
912 CMPCHAR-ID. */
913struct cmpchar_info **cmpchar_table;
914/* The current size of `cmpchar_table'. */
915static int cmpchar_table_size;
916/* Number of the current composite characters. */
917int n_cmpchars;
918
919#define CMPCHAR_HASH_TABLE_SIZE 0xFFF
920
921static int *cmpchar_hash_table[CMPCHAR_HASH_TABLE_SIZE];
922
923/* Each element of `cmpchar_hash_table' is a pointer to an array of
924 integer, where the 1st element is the size of the array, the 2nd
925 element is how many elements are actually used in the array, and
926 the remaining elements are CMPCHAR-IDs of composite characters of
927 the same hash value. */
928#define CMPCHAR_HASH_SIZE(table) table[0]
929#define CMPCHAR_HASH_USED(table) table[1]
930#define CMPCHAR_HASH_CMPCHAR_ID(table, i) table[i]
931
932/* Return CMPCHAR-ID of the composite character in STR of the length
933 LEN. If the composite character has not yet been registered,
934 register it in `cmpchar_table' and assign new CMPCHAR-ID. This
935 is the sole function for assigning CMPCHAR-ID. */
936int
937str_cmpchar_id (str, len)
938 unsigned char *str;
939 int len;
940{
941 int hash_idx, *hashp;
942 unsigned char *buf;
943 int embedded_rule; /* 1 if composition rule is embedded. */
944 int chars; /* number of components. */
945 int i;
946 struct cmpchar_info *cmpcharp;
947
948 if (len < 5)
949 /* Any composite char have at least 3-byte length. */
950 return -1;
951
952 /* The second byte 0xFF means compostion rule is embedded. */
953 embedded_rule = (str[1] == 0xFF);
954
955 /* At first, get the actual length of the composite character. */
956 {
957 unsigned char *p, *endp = str + 1, *lastp = str + len;
958 int bytes;
959
960 while (endp < lastp && ! CHAR_HEAD_P (endp)) endp++;
961 chars = 0;
962 p = str + 1 + embedded_rule;
963 while (p < endp)
964 {
965 /* No need of checking if *P is 0xA0 because
966 BYTES_BY_CHAR_HEAD (0x80) surely returns 2. */
967 p += (bytes = BYTES_BY_CHAR_HEAD (*p - 0x20) + embedded_rule);
968 chars++;
969 }
970 len = (p -= embedded_rule) - str;
971 if (p > endp)
972 len -= - bytes, chars--;
973
974 if (chars < 2 || chars > MAX_COMPONENT_COUNT)
975 /* Invalid number of components. */
976 return -1;
977 }
978 hash_idx = hash_string (str, len) % CMPCHAR_HASH_TABLE_SIZE;
979 hashp = cmpchar_hash_table[hash_idx];
980
981 /* Then, look into the hash table. */
982 if (hashp != NULL)
983 /* Find the correct one among composite characters of the same
984 hash value. */
985 for (i = 2; i < CMPCHAR_HASH_USED (hashp); i++)
986 {
987 cmpcharp = cmpchar_table[CMPCHAR_HASH_CMPCHAR_ID (hashp, i)];
988 if (len == cmpcharp->len
989 && ! bcmp (str, cmpcharp->data, len))
990 return CMPCHAR_HASH_CMPCHAR_ID (hashp, i);
991 }
992
993 /* We have to register the composite character in cmpchar_table. */
994 /* Make the entry in hash table. */
995 if (hashp == NULL)
996 {
997 /* Make a table for 8 composite characters initially. */
998 hashp = (cmpchar_hash_table[hash_idx]
999 = (int *) xmalloc (sizeof (int) * (2 + 8)));
1000 CMPCHAR_HASH_SIZE (hashp) = 10;
1001 CMPCHAR_HASH_USED (hashp) = 2;
1002 }
1003 else if (CMPCHAR_HASH_USED (hashp) >= CMPCHAR_HASH_SIZE (hashp))
1004 {
1005 CMPCHAR_HASH_SIZE (hashp) += 8;
1006 hashp = (cmpchar_hash_table[hash_idx]
1007 = (int *) xrealloc (hashp,
1008 sizeof (int) * CMPCHAR_HASH_SIZE (hashp)));
1009 }
1010 CMPCHAR_HASH_CMPCHAR_ID (hashp, CMPCHAR_HASH_USED (hashp)) = n_cmpchars;
1011 CMPCHAR_HASH_USED (hashp)++;
1012
1013 /* Set information of the composite character in cmpchar_table. */
1014 if (cmpchar_table_size == 0)
1015 {
1016 /* This is the first composite character to be registered. */
1017 cmpchar_table_size = 256;
1018 cmpchar_table
1019 = (struct cmpchar_info **) xmalloc (sizeof (cmpchar_table[0])
1020 * cmpchar_table_size);
1021 }
1022 else if (cmpchar_table_size <= n_cmpchars)
1023 {
1024 cmpchar_table_size += 256;
1025 cmpchar_table
1026 = (struct cmpchar_info **) xrealloc (cmpchar_table,
1027 sizeof (cmpchar_table[0])
1028 * cmpchar_table_size);
1029 }
1030
1031 cmpcharp = (struct cmpchar_info *) xmalloc (sizeof (struct cmpchar_info));
1032
1033 cmpcharp->len = len;
1034 cmpcharp->data = (unsigned char *) xmalloc (len + 1);
1035 bcopy (str, cmpcharp->data, len);
1036 cmpcharp->data[len] = 0;
1037 cmpcharp->glyph_len = chars;
1038 cmpcharp->glyph = (GLYPH *) xmalloc (sizeof (GLYPH) * chars);
1039 if (embedded_rule)
1040 {
1041 cmpcharp->cmp_rule = (unsigned char *) xmalloc (chars);
1042 cmpcharp->col_offset = (float *) xmalloc (sizeof (float) * chars);
1043 }
1044 else
1045 {
1046 cmpcharp->cmp_rule = NULL;
1047 cmpcharp->col_offset = NULL;
1048 }
1049
1050 /* Setup GLYPH data and composition rules (if any) so as not to make
1051 them every time on displaying. */
1052 {
1053 unsigned char *bufp;
1054 int width;
1055 float leftmost = 0.0, rightmost = 1.0;
1056
1057 if (embedded_rule)
1058 /* At first, col_offset[N] is set to relative to col_offset[0]. */
1059 cmpcharp->col_offset[0] = 0;
1060
1061 for (i = 0, bufp = cmpcharp->data + 1; i < chars; i++)
1062 {
1063 if (embedded_rule)
1064 cmpcharp->cmp_rule[i] = *bufp++;
1065
1066 if (*bufp == 0xA0) /* This is an ASCII character. */
1067 {
1068 cmpcharp->glyph[i] = FAST_MAKE_GLYPH ((*++bufp & 0x7F), 0);
1069 width = 1;
1070 bufp++;
1071 }
1072 else /* Multibyte character. */
1073 {
1074 /* Make `bufp' point normal multi-byte form temporally. */
1075 *bufp -= 0x20;
1076 cmpcharp->glyph[i]
1077 = FAST_MAKE_GLYPH (string_to_non_ascii_char (bufp, 4, 0), 0);
1078 width = WIDTH_BY_CHAR_HEAD (*bufp);
1079 *bufp += 0x20;
1080 bufp += BYTES_BY_CHAR_HEAD (*bufp - 0x20);
1081 }
1082
1083 if (embedded_rule && i > 0)
1084 {
1085 /* Reference points (global_ref and new_ref) are
1086 encoded as below:
1087
1088 0--1--2 -- ascent
1089 | |
1090 | |
1091 | 4 -+--- center
1092 -- 3 5 -- baseline
1093 | |
1094 6--7--8 -- descent
1095
1096 Now, we calculate the column offset of the new glyph
1097 from the left edge of the first glyph. This can avoid
1098 the same calculation everytime displaying this
1099 composite character. */
1100
1101 /* Reference points of global glyph and new glyph. */
1102 int global_ref = (cmpcharp->cmp_rule[i] - 0xA0) / 9;
1103 int new_ref = (cmpcharp->cmp_rule[i] - 0xA0) % 9;
1104 /* Column offset relative to the first glyph. */
1105 float left = (leftmost
1106 + (global_ref % 3) * (rightmost - leftmost) / 2.0
1107 - (new_ref % 3) * width / 2.0);
1108
1109 cmpcharp->col_offset[i] = left;
1110 if (left < leftmost)
1111 leftmost = left;
1112 if (left + width > rightmost)
1113 rightmost = left + width;
1114 }
1115 else
1116 {
1117 if (width > rightmost)
1118 rightmost = width;
1119 }
1120 }
1121 if (embedded_rule)
1122 {
1123 /* Now col_offset[N] are relative to the left edge of the
1124 first component. Make them relative to the left edge of
1125 overall glyph. */
1126 for (i = 0; i < chars; i++)
1127 cmpcharp->col_offset[i] -= leftmost;
1128 /* Make rightmost holds width of overall glyph. */
1129 rightmost -= leftmost;
1130 }
1131
1132 cmpcharp->width = rightmost;
1133 if (cmpcharp->width < rightmost)
1134 /* To get a ceiling integer value. */
1135 cmpcharp->width++;
1136 }
1137
1138 cmpchar_table[n_cmpchars] = cmpcharp;
1139
1140 return n_cmpchars++;
1141}
1142
1143/* Return the Nth element of the composite character C. */
1144int
1145cmpchar_component (c, n)
1146 unsigned int c, n;
1147{
1148 int id = COMPOSITE_CHAR_ID (c);
1149
1150 if (id >= n_cmpchars /* C is not a valid composite character. */
1151 || n >= cmpchar_table[id]->glyph_len) /* No such component. */
1152 return -1;
1153 /* No face data is stored in glyph code. */
1154 return ((int) (cmpchar_table[id]->glyph[n]));
1155}
1156
1157DEFUN ("cmpcharp", Fcmpcharp, Scmpcharp, 1, 1, 0,
1158 "T if CHAR is a composite character.")
1159 (ch)
1160 Lisp_Object ch;
1161{
1162 CHECK_NUMBER (ch, 0);
1163 return (COMPOSITE_CHAR_P (XINT (ch)) ? Qt : Qnil);
1164}
1165
1166DEFUN ("composite-char-component", Fcmpchar_component, Scmpchar_component,
1167 2, 2, 0,
1168 "Return the IDXth component character of composite character CHARACTER.")
1169 (character, idx)
1170 Lisp_Object character, idx;
1171{
1172 int c;
1173
1174 CHECK_NUMBER (character, 0);
1175 CHECK_NUMBER (idx, 1);
1176
1177 if ((c = cmpchar_component (XINT (character), XINT (idx))) < 0)
1178 args_out_of_range (character, idx);
1179
1180 return make_number (c);
1181}
1182
1183DEFUN ("composite-char-composition-rule", Fcmpchar_cmp_rule, Scmpchar_cmp_rule,
1184 2, 2, 0,
1185 "Return the IDXth composition rule embedded in composite character CHARACTER.
1186The returned rule is for composing the IDXth component
1187on the (IDX-1)th component. If IDX is 0, the returned value is always 255.")
1188 (character, idx)
1189 Lisp_Object character, idx;
1190{
1191 int id, i;
1192
1193 CHECK_NUMBER (character, 0);
1194 CHECK_NUMBER (idx, 1);
1195
1196 id = COMPOSITE_CHAR_ID (XINT (character));
1197 if (id < 0 || id >= n_cmpchars)
1198 error ("Invalid composite character: %d", XINT (character));
1199 i = XINT (idx);
1200 if (i > cmpchar_table[id]->glyph_len)
1201 args_out_of_range (character, idx);
1202
1203 return make_number (cmpchar_table[id]->cmp_rule[i]);
1204}
1205
1206DEFUN ("composite-char-composition-rule-p", Fcmpchar_cmp_rule_p,
1207 Scmpchar_cmp_rule_p, 1, 1, 0,
1208 "Return non-nil if composite character CHARACTER contains a embedded rule.")
1209 (character)
1210 Lisp_Object character;
1211{
1212 int id;
1213
1214 CHECK_NUMBER (character, 0);
1215 id = COMPOSITE_CHAR_ID (XINT (character));
1216 if (id < 0 || id >= n_cmpchars)
1217 error ("Invalid composite character: %d", XINT (character));
1218
1219 return (cmpchar_table[id]->cmp_rule ? Qt : Qnil);
1220}
1221
1222DEFUN ("composite-char-component-count", Fcmpchar_cmp_count,
1223 Scmpchar_cmp_count, 1, 1, 0,
1224 "Return number of compoents of composite character CHARACTER.")
1225 (character)
1226 Lisp_Object character;
1227{
1228 int id;
1229
1230 CHECK_NUMBER (character, 0);
1231 id = COMPOSITE_CHAR_ID (XINT (character));
1232 if (id < 0 || id >= n_cmpchars)
1233 error ("Invalid composite character: %d", XINT (character));
1234
1235 return (make_number (cmpchar_table[id]->glyph_len));
1236}
1237
1238DEFUN ("compose-string", Fcompose_string, Scompose_string,
1239 1, 1, 0,
1240 "Return one char string composed from all characters in STRING.")
1241 (str)
1242 Lisp_Object str;
1243{
1244 unsigned char buf[MAX_LENGTH_OF_MULTI_BYTE_FORM], *p, *pend, *ptemp;
1245 int len, i;
1246
1247 CHECK_STRING (str, 0);
1248
1249 buf[0] = LEADING_CODE_COMPOSITION;
1250 p = XSTRING (str)->data;
1251 pend = p + XSTRING (str)->size;
1252 i = 1;
1253 while (p < pend)
1254 {
1255 if (*p < 0x20 || *p == 127) /* control code */
1256 error ("Invalid component character: %d", *p);
1257 else if (*p < 0x80) /* ASCII */
1258 {
1259 if (i + 2 >= MAX_LENGTH_OF_MULTI_BYTE_FORM)
1260 error ("Too long string to be composed: %s", XSTRING (str)->data);
1261 /* Prepend an ASCII charset indicator 0xA0, set MSB of the
1262 code itself. */
1263 buf[i++] = 0xA0;
1264 buf[i++] = *p++ + 0x80;
1265 }
1266 else if (*p == LEADING_CODE_COMPOSITION) /* composite char */
1267 {
1268 /* Already composed. Eliminate the heading
1269 LEADING_CODE_COMPOSITION, keep the remaining bytes
1270 unchanged. */
1271 p++;
1272 ptemp = p;
1273 while (! CHAR_HEAD_P (p)) p++;
1274 if (i + (p - ptemp) >= MAX_LENGTH_OF_MULTI_BYTE_FORM)
1275 error ("Too long string to be composed: %s", XSTRING (str)->data);
1276 bcopy (ptemp, buf + i, p - ptemp);
1277 i += p - ptemp;
1278 }
1279 else /* multibyte char */
1280 {
1281 /* Add 0x20 to the base leading-code, keep the remaining
1282 bytes unchanged. */
1283 len = BYTES_BY_CHAR_HEAD (*p);
1284 if (i + len >= MAX_LENGTH_OF_MULTI_BYTE_FORM)
1285 error ("Too long string to be composed: %s", XSTRING (str)->data);
1286 bcopy (p, buf + i, len);
1287 buf[i] += 0x20;
1288 p += len, i += len;
1289 }
1290 }
1291
1292 if (i < 5)
1293 /* STR contains only one character, which can't be composed. */
1294 error ("Too short string to be composed: %s", XSTRING (str)->data);
1295
1296 return make_string (buf, i);
1297}
1298
1299\f
1300charset_id_internal (charset_name)
1301 char *charset_name;
1302{
1303 Lisp_Object val = Fget (intern (charset_name), Qcharset);
1304
1305 if (!VECTORP (val))
1306 error ("Charset %s is not defined", charset_name);
1307
1308 return (XINT (XVECTOR (val)->contents[0]));
1309}
1310
1311DEFUN ("setup-special-charsets", Fsetup_special_charsets,
1312 Ssetup_special_charsets, 0, 0, 0, "Internal use only.")
1313 ()
1314{
1315 charset_latin_iso8859_1 = charset_id_internal ("latin-iso8859-1");
1316 charset_jisx0208_1978 = charset_id_internal ("japanese-jisx0208-1978");
1317 charset_jisx0208 = charset_id_internal ("japanese-jisx0208");
1318 charset_katakana_jisx0201 = charset_id_internal ("katakana-jisx0201");
1319 charset_latin_jisx0201 = charset_id_internal ("latin-jisx0201");
1320 charset_big5_1 = charset_id_internal ("chinese-big5-1");
1321 charset_big5_2 = charset_id_internal ("chinese-big5-2");
1322 return Qnil;
1323}
1324
1325init_charset_once ()
1326{
1327 int i, j, k;
1328
1329 staticpro (&Vcharset_table);
1330 staticpro (&Vcharset_symbol_table);
1331
1332 /* This has to be done here, before we call Fmake_char_table. */
1333 Qcharset_table = intern ("charset-table");
1334 staticpro (&Qcharset_table);
1335
1336 /* Intern this now in case it isn't already done.
1337 Setting this variable twice is harmless.
1338 But don't staticpro it here--that is done in alloc.c. */
1339 Qchar_table_extra_slots = intern ("char-table-extra-slots");
1340
1341 /* Now we are ready to set up this property, so we can
1342 create the charset table. */
1343 Fput (Qcharset_table, Qchar_table_extra_slots, make_number (0));
1344 Vcharset_table = Fmake_char_table (Qcharset_table, Qnil);
1345
1346 Vcharset_symbol_table = Fmake_vector (make_number (MAX_CHARSET), Qnil);
1347
1348 /* Setup tables. */
1349 for (i = 0; i < 2; i++)
1350 for (j = 0; j < 2; j++)
1351 for (k = 0; k < 128; k++)
1352 iso_charset_table [i][j][k] = -1;
1353
1354 bzero (cmpchar_hash_table, sizeof cmpchar_hash_table);
1355 cmpchar_table_size = n_cmpchars = 0;
1356
1357 for (i = 0; i < 256; i++)
1358 BYTES_BY_CHAR_HEAD (i) = 1;
1359 BYTES_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_11) = 3;
1360 BYTES_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_12) = 3;
1361 BYTES_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_21) = 4;
1362 BYTES_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_22) = 4;
1363 /* The following doesn't reflect the actual bytes, but just to tell
1364 that it is a start of a multibyte character. */
1365 BYTES_BY_CHAR_HEAD (LEADING_CODE_COMPOSITION) = 2;
1366
1367 for (i = 0; i < 128; i++)
1368 WIDTH_BY_CHAR_HEAD (i) = 1;
1369 for (; i < 256; i++)
1370 WIDTH_BY_CHAR_HEAD (i) = 4;
1371 WIDTH_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_11) = 1;
1372 WIDTH_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_12) = 2;
1373 WIDTH_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_21) = 1;
1374 WIDTH_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_22) = 2;
1375}
1376
1377#ifdef emacs
1378
1379syms_of_charset ()
1380{
1381 Qascii = intern ("ascii");
1382 staticpro (&Qascii);
1383
1384 Qcharset = intern ("charset");
1385 staticpro (&Qcharset);
1386
1387 /* Define ASCII charset now. */
1388 update_charset_table (make_number (CHARSET_ASCII),
1389 make_number (1), make_number (94),
1390 make_number (1),
1391 make_number (0),
1392 make_number ('B'),
1393 make_number (0),
1394 build_string ("ASCII"),
1395 build_string ("ASCII"),
1396 build_string ("ASCII (ISO646 IRV)"));
1397 CHARSET_SYMBOL (CHARSET_ASCII) = Qascii;
1398 Fput (Qascii, Qcharset, CHARSET_TABLE_ENTRY (CHARSET_ASCII));
1399
1400 Qcomposition = intern ("composition");
1401 staticpro (&Qcomposition);
1402 CHARSET_SYMBOL (CHARSET_COMPOSITION) = Qcomposition;
1403
1404 defsubr (&Sdefine_charset);
1405 defsubr (&Sdeclare_equiv_charset);
1406 defsubr (&Sfind_charset_region);
1407 defsubr (&Sfind_charset_string);
1408 defsubr (&Smake_char_internal);
1409 defsubr (&Ssplit_char);
1410 defsubr (&Schar_charset);
1411 defsubr (&Siso_charset);
1412 defsubr (&Schar_bytes);
1413 defsubr (&Schar_width);
1414 defsubr (&Sstring_width);
1415 defsubr (&Schar_direction);
1416 defsubr (&Schars_in_string);
1417 defsubr (&Schar_boundary_p);
1418 defsubr (&Sconcat_chars);
1419 defsubr (&Scmpcharp);
1420 defsubr (&Scmpchar_component);
1421 defsubr (&Scmpchar_cmp_rule);
1422 defsubr (&Scmpchar_cmp_rule_p);
1423 defsubr (&Scmpchar_cmp_count);
1424 defsubr (&Scompose_string);
1425 defsubr (&Ssetup_special_charsets);
1426
1427 DEFVAR_LISP ("charset-list", &Vcharset_list,
1428 "List of charsets ever defined.");
1429 Vcharset_list = Fcons (Qascii, Qnil);
1430
1431 DEFVAR_INT ("leading-code-composition", &leading_code_composition,
1432 "Leading-code of composite characters.");
1433 leading_code_composition = LEADING_CODE_COMPOSITION;
1434
1435 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11,
1436 "Leading-code of private TYPE9N charset of column-width 1.");
1437 leading_code_private_11 = LEADING_CODE_PRIVATE_11;
1438
1439 DEFVAR_INT ("leading-code-private-12", &leading_code_private_12,
1440 "Leading-code of private TYPE9N charset of column-width 2.");
1441 leading_code_private_12 = LEADING_CODE_PRIVATE_12;
1442
1443 DEFVAR_INT ("leading-code-private-21", &leading_code_private_21,
1444 "Leading-code of private TYPE9Nx9N charset of column-width 1.");
1445 leading_code_private_21 = LEADING_CODE_PRIVATE_21;
1446
1447 DEFVAR_INT ("leading-code-private-22", &leading_code_private_22,
1448 "Leading-code of private TYPE9Nx9N charset of column-width 2.");
1449 leading_code_private_22 = LEADING_CODE_PRIVATE_22;
1450}
1451
1452#endif /* emacs */