(CHARSET_TABLE_ENTRY): Handle ASCII charset correctly.
[bpt/emacs.git] / src / charset.c
CommitLineData
4ed46869
KH
1/* Multilingual characters handler.
2 Ver.1.0
4ed46869
KH
3 Copyright (C) 1995 Free Software Foundation, Inc.
4 Copyright (C) 1995 Electrotechnical Laboratory, JAPAN.
5
369314dc
KH
6This file is part of GNU Emacs.
7
8GNU Emacs is free software; you can redistribute it and/or modify
9it under the terms of the GNU General Public License as published by
10the Free Software Foundation; either version 2, or (at your option)
11any later version.
4ed46869 12
369314dc
KH
13GNU Emacs is distributed in the hope that it will be useful,
14but WITHOUT ANY WARRANTY; without even the implied warranty of
15MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16GNU General Public License for more details.
4ed46869 17
369314dc
KH
18You should have received a copy of the GNU General Public License
19along with GNU Emacs; see the file COPYING. If not, write to
20the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21Boston, MA 02111-1307, USA. */
4ed46869
KH
22
23/* At first, see the document in `charset.h' to understand the code in
24 this file. */
25
26#include <stdio.h>
27
28#ifdef emacs
29
30#include <sys/types.h>
31#include <config.h>
32#include "lisp.h"
33#include "buffer.h"
34#include "charset.h"
35#include "coding.h"
36
37#else /* not emacs */
38
39#include "mulelib.h"
40
41#endif /* emacs */
42
43Lisp_Object Qcharset, Qascii, Qcomposition;
44
45/* Declaration of special leading-codes. */
46int leading_code_composition; /* for composite characters */
47int leading_code_private_11; /* for private DIMENSION1 of 1-column */
48int leading_code_private_12; /* for private DIMENSION1 of 2-column */
49int leading_code_private_21; /* for private DIMENSION2 of 1-column */
50int leading_code_private_22; /* for private DIMENSION2 of 2-column */
51
52/* Declaration of special charsets. */
53int charset_ascii; /* ASCII */
54int charset_composition; /* for a composite character */
55int charset_latin_iso8859_1; /* ISO8859-1 (Latin-1) */
56int charset_jisx0208_1978; /* JISX0208.1978 (Japanese Kanji old set) */
57int charset_jisx0208; /* JISX0208.1983 (Japanese Kanji) */
58int charset_katakana_jisx0201; /* JISX0201.Kana (Japanese Katakana) */
59int charset_latin_jisx0201; /* JISX0201.Roman (Japanese Roman) */
60int charset_big5_1; /* Big5 Level 1 (Chinese Traditional) */
61int charset_big5_2; /* Big5 Level 2 (Chinese Traditional) */
62
63Lisp_Object Qcharset_table;
64
65/* A char-table containing information of each character set. */
66Lisp_Object Vcharset_table;
67
68/* A vector of charset symbol indexed by charset-id. This is used
69 only for returning charset symbol from C functions. */
70Lisp_Object Vcharset_symbol_table;
71
72/* A list of charset symbols ever defined. */
73Lisp_Object Vcharset_list;
74
75/* Tables used by macros BYTES_BY_CHAR_HEAD and WIDTH_BY_CHAR_HEAD. */
76int bytes_by_char_head[256];
77int width_by_char_head[256];
78
79/* Mapping table from ISO2022's charset (specified by DIMENSION,
80 CHARS, and FINAL-CHAR) to Emacs' charset. */
81int iso_charset_table[2][2][128];
82
513ee442
KH
83/* Table of pointers to the structure `cmpchar_info' indexed by
84 CMPCHAR-ID. */
85struct cmpchar_info **cmpchar_table;
86/* The current size of `cmpchar_table'. */
87static int cmpchar_table_size;
88/* Number of the current composite characters. */
89int n_cmpchars;
90
4ed46869
KH
91/* Variables used locally in the macro FETCH_MULTIBYTE_CHAR. */
92unsigned char *_fetch_multibyte_char_p;
93int _fetch_multibyte_char_len;
94
95/* Set STR a pointer to the multi-byte form of the character C. If C
96 is not a composite character, the multi-byte form is set in WORKBUF
97 and STR points WORKBUF. The caller should allocate at least 4-byte
98 area at WORKBUF in advance. Returns the length of the multi-byte
99 form.
100
101 Use macro `CHAR_STRING (C, WORKBUF, STR)' instead of calling this
102 function directly if C can be an ASCII character. */
103
104int
105non_ascii_char_to_string (c, workbuf, str)
106 int c;
107 unsigned char *workbuf, **str;
108{
109 int charset;
110 unsigned char c1, c2;
111
112 if (COMPOSITE_CHAR_P (c))
113 {
114 int cmpchar_id = COMPOSITE_CHAR_ID (c);
115
116 if (cmpchar_id < n_cmpchars)
117 {
118 *str = cmpchar_table[cmpchar_id]->data;
119 return cmpchar_table[cmpchar_id]->len;
120 }
121 else
122 {
123 *str = workbuf;
124 return 0;
125 }
126 }
127
128 SPLIT_NON_ASCII_CHAR (c, charset, c1, c2);
129
130 *str = workbuf;
131 *workbuf++ = CHARSET_LEADING_CODE_BASE (charset);
132 if (*workbuf = CHARSET_LEADING_CODE_EXT (charset))
133 workbuf++;
134 *workbuf++ = c1 | 0x80;
135 if (c2)
136 *workbuf++ = c2 | 0x80;
137
138 return (workbuf - *str);
139}
140
141/* Return a non-ASCII character of which multi-byte form is at STR of
142 length LEN. If ACTUAL_LEN is not NULL, the actual length of the
143 character is set to the address ACTUAL_LEN.
144
145 Use macro `STRING_CHAR (STR, LEN)' instead of calling this function
146 directly if STR can hold an ASCII character. */
147
148string_to_non_ascii_char (str, len, actual_len)
149 unsigned char *str;
150 int len, *actual_len;
151{
152 int charset;
153 unsigned char c1, c2;
154 register int c;
155
156 if (SPLIT_STRING (str, len, charset, c1, c2) == CHARSET_ASCII)
157 {
158 if (actual_len)
159 *actual_len = 1;
160 return (int) *str;
161 }
162
163 c = MAKE_NON_ASCII_CHAR (charset, c1, c2);
164
165 if (actual_len)
166 *actual_len = (charset == CHARSET_COMPOSITION
167 ? cmpchar_table[COMPOSITE_CHAR_ID (c)]->len
168 : BYTES_BY_CHAR_HEAD (*str));
169 return c;
170}
171
172/* Return the length of the multi-byte form at string STR of length LEN. */
173int
174multibyte_form_length (str, len)
175 unsigned char *str;
176 int len;
177{
178 int charset;
179 unsigned char c1, c2;
180 register int c;
181
182 if (SPLIT_STRING (str, len, charset, c1, c2) == CHARSET_ASCII)
183 return 1;
184
185 return (charset == CHARSET_COMPOSITION
186 ? cmpchar_table[(c1 << 7) | c2]->len
187 : BYTES_BY_CHAR_HEAD (*str));
188}
189
190/* Check if string STR of length LEN contains valid multi-byte form of
191 a character. If valid, charset and position codes of the character
192 is set at *CHARSET, *C1, and *C2, and return 0. If not valid,
193 return -1. This should be used only in the macro SPLIT_STRING
194 which checks range of STR in advance. */
195
196split_non_ascii_string (str, len, charset, c1, c2)
197 register unsigned char *str, *c1, *c2;
198 register int len, *charset;
199{
200 register unsigned int cs = *str++;
201
202 if (cs == LEADING_CODE_COMPOSITION)
203 {
204 int cmpchar_id = str_cmpchar_id (str - 1, len);
205
206 if (cmpchar_id < 0)
207 return -1;
208 *charset = cs, *c1 = cmpchar_id >> 7, *c2 = cmpchar_id & 0x7F;
209 }
210 else if ((cs < LEADING_CODE_PRIVATE_11 || (cs = *str++) >= 0xA0)
211 && CHARSET_DEFINED_P (cs))
212 {
213 *charset = cs;
214 if (*str < 0xA0)
215 return -1;
216 *c1 = (*str++) & 0x7F;
217 if (CHARSET_DIMENSION (cs) == 2)
218 {
219 if (*str < 0xA0)
220 return -1;
221 *c2 = (*str++) & 0x7F;
222 }
223 }
224 else
225 return -1;
226 return 0;
227}
228
229/* Update the table Vcharset_table with the given arguments (see the
230 document of `define-charset' for the meaning of each argument).
231 Several other table contents are also updated. The caller should
232 check the validity of CHARSET-ID and the remaining arguments in
233 advance. */
234
235void
236update_charset_table (charset_id, dimension, chars, width, direction,
237 iso_final_char, iso_graphic_plane,
238 short_name, long_name, description)
239 Lisp_Object charset_id, dimension, chars, width, direction;
240 Lisp_Object iso_final_char, iso_graphic_plane;
241 Lisp_Object short_name, long_name, description;
242{
243 int charset = XINT (charset_id);
244 int bytes;
245 unsigned char leading_code_base, leading_code_ext;
246
247 if (NILP (Faref (Vcharset_table, charset_id)))
248 Faset (Vcharset_table, charset_id,
249 Fmake_vector (make_number (CHARSET_MAX_IDX), Qnil));
250
251 /* Get byte length of multibyte form, base leading-code, and
252 extended leading-code of the charset. See the comment under the
253 title "GENERAL NOTE on CHARACTER SET (CHARSET)" in charset.h. */
254 bytes = XINT (dimension);
255 if (charset < MIN_CHARSET_PRIVATE_DIMENSION1)
256 {
257 /* Official charset, it doesn't have an extended leading-code. */
258 if (charset != CHARSET_ASCII)
259 bytes += 1; /* For a base leading-code. */
260 leading_code_base = charset;
261 leading_code_ext = 0;
262 }
263 else
264 {
265 /* Private charset. */
266 bytes += 2; /* For base and extended leading-codes. */
267 leading_code_base
268 = (charset < LEADING_CODE_EXT_12
269 ? LEADING_CODE_PRIVATE_11
270 : (charset < LEADING_CODE_EXT_21
271 ? LEADING_CODE_PRIVATE_12
272 : (charset < LEADING_CODE_EXT_22
273 ? LEADING_CODE_PRIVATE_21
274 : LEADING_CODE_PRIVATE_22)));
275 leading_code_ext = charset;
276 }
277
278 CHARSET_TABLE_INFO (charset, CHARSET_ID_IDX) = charset_id;
279 CHARSET_TABLE_INFO (charset, CHARSET_BYTES_IDX) = make_number (bytes);
280 CHARSET_TABLE_INFO (charset, CHARSET_DIMENSION_IDX) = dimension;
281 CHARSET_TABLE_INFO (charset, CHARSET_CHARS_IDX) = chars;
282 CHARSET_TABLE_INFO (charset, CHARSET_WIDTH_IDX) = width;
283 CHARSET_TABLE_INFO (charset, CHARSET_DIRECTION_IDX) = direction;
284 CHARSET_TABLE_INFO (charset, CHARSET_LEADING_CODE_BASE_IDX)
285 = make_number (leading_code_base);
286 CHARSET_TABLE_INFO (charset, CHARSET_LEADING_CODE_EXT_IDX)
287 = make_number (leading_code_ext);
288 CHARSET_TABLE_INFO (charset, CHARSET_ISO_FINAL_CHAR_IDX) = iso_final_char;
289 CHARSET_TABLE_INFO (charset, CHARSET_ISO_GRAPHIC_PLANE_IDX)
290 = iso_graphic_plane;
291 CHARSET_TABLE_INFO (charset, CHARSET_SHORT_NAME_IDX) = short_name;
292 CHARSET_TABLE_INFO (charset, CHARSET_LONG_NAME_IDX) = long_name;
293 CHARSET_TABLE_INFO (charset, CHARSET_DESCRIPTION_IDX) = description;
294 CHARSET_TABLE_INFO (charset, CHARSET_PLIST_IDX) = Qnil;
295
296 {
297 /* If we have already defined a charset which has the same
298 DIMENSION, CHARS and ISO-FINAL-CHAR but the different
299 DIRECTION, we must update the entry REVERSE-CHARSET of both
300 charsets. If there's no such charset, the value of the entry
301 is set to nil. */
302 int i;
303
513ee442 304 for (i = 0; i <= MAX_CHARSET; i++)
4ed46869
KH
305 if (!NILP (CHARSET_TABLE_ENTRY (i)))
306 {
307 if (CHARSET_DIMENSION (i) == XINT (dimension)
308 && CHARSET_CHARS (i) == XINT (chars)
309 && CHARSET_ISO_FINAL_CHAR (i) == XINT (iso_final_char)
310 && CHARSET_DIRECTION (i) != XINT (direction))
311 {
312 CHARSET_TABLE_INFO (charset, CHARSET_REVERSE_CHARSET_IDX)
313 = make_number (i);
314 CHARSET_TABLE_INFO (i, CHARSET_REVERSE_CHARSET_IDX) = charset_id;
315 break;
316 }
317 }
513ee442 318 if (i > MAX_CHARSET)
4ed46869
KH
319 /* No such a charset. */
320 CHARSET_TABLE_INFO (charset, CHARSET_REVERSE_CHARSET_IDX)
321 = make_number (-1);
322 }
323
324 if (charset != CHARSET_ASCII
325 && charset < MIN_CHARSET_PRIVATE_DIMENSION1)
326 {
327 /* Update tables bytes_by_char_head and width_by_char_head. */
328 bytes_by_char_head[leading_code_base] = bytes;
329 width_by_char_head[leading_code_base] = XINT (width);
330
331 /* Update table emacs_code_class. */
332 emacs_code_class[charset] = (bytes == 2
333 ? EMACS_leading_code_2
334 : (bytes == 3
335 ? EMACS_leading_code_3
336 : EMACS_leading_code_4));
337 }
338
339 /* Update table iso_charset_table. */
340 if (ISO_CHARSET_TABLE (dimension, chars, iso_final_char) < 0)
341 ISO_CHARSET_TABLE (dimension, chars, iso_final_char) = charset;
342}
343
344#ifdef emacs
345
346/* Return charset id of CHARSET_SYMBOL, or return -1 if CHARSET_SYMBOL
347 is invalid. */
348int
349get_charset_id (charset_symbol)
350 Lisp_Object charset_symbol;
351{
352 Lisp_Object val;
353 int charset;
354
355 return ((SYMBOLP (charset_symbol)
356 && (val = Fget (charset_symbol, Qcharset), VECTORP (val))
357 && (charset = XINT (XVECTOR (val)->contents[CHARSET_ID_IDX]),
358 CHARSET_VALID_P (charset)))
359 ? charset : -1);
360}
361
362/* Return an identification number for a new private charset of
363 DIMENSION and WIDTH. If there's no more room for the new charset,
364 return 0. */
365Lisp_Object
366get_new_private_charset_id (dimension, width)
367 int dimension, width;
368{
369 int charset, from, to;
370
371 if (dimension == 1)
372 {
373 if (width == 1)
374 from = LEADING_CODE_EXT_11, to = LEADING_CODE_EXT_12;
375 else
376 from = LEADING_CODE_EXT_12, to = LEADING_CODE_EXT_21;
377 }
378 else
379 {
380 if (width == 1)
381 from = LEADING_CODE_EXT_21, to = LEADING_CODE_EXT_22;
382 else
383 from = LEADING_CODE_EXT_22, to = LEADING_CODE_EXT_MAX - 1;
384 }
385
386 for (charset = from; charset < to; charset++)
387 if (!CHARSET_DEFINED_P (charset)) break;
388
389 return make_number (charset < to ? charset : 0);
390}
391
392DEFUN ("define-charset", Fdefine_charset, Sdefine_charset, 3, 3, 0,
393 "Define CHARSET-ID as the identification number of CHARSET with INFO-VECTOR.\n\
394If CHARSET-ID is nil, it is set automatically, which means CHARSET is\n\
395 treated as a private charset.\n\
396INFO-VECTOR is a vector of the format:\n\
397 [DIMENSION CHARS WIDTH DIRECTION ISO-FINAL-CHAR ISO-GRAPHIC-PLANE\n\
398 SHORT-NAME LONG-NAME DESCRIPTION]\n\
399The meanings of each elements is as follows:\n\
400DIMENSION (integer) is the number of bytes to represent a character: 1 or 2.\n\
401CHARS (integer) is the number of characters in a dimension: 94 or 96.\n\
402WIDTH (integer) is the number of columns a character in the charset\n\
403occupies on the screen: one of 0, 1, and 2.\n\
404\n\
405DIRECTION (integer) is the rendering direction of characters in the\n\
406charset when rendering. If 0, render from right to left, else\n\
407render from left to right.\n\
408\n\
409ISO-FINAL-CHAR (character) is the final character of the\n\
410corresponding ISO 2022 charset.\n\
411\n\
412ISO-GRAPHIC-PLANE (integer) is the graphic plane to be invoked\n\
413while encoding to variants of ISO 2022 coding system, one of the\n\
414following: 0/graphic-plane-left(GL), 1/graphic-plane-right(GR).\n\
415\n\
416SHORT-NAME (string) is the short name to refer to the charset.\n\
417\n\
418LONG-NAME (string) is the long name to refer to the charset.\n\
419\n\
420DESCRIPTION (string) is the description string of the charset.")
421 (charset_id, charset_symbol, info_vector)
422 Lisp_Object charset_id, charset_symbol, info_vector;
423{
424 Lisp_Object *vec;
425
426 if (!NILP (charset_id))
427 CHECK_NUMBER (charset_id, 0);
428 CHECK_SYMBOL (charset_symbol, 1);
429 CHECK_VECTOR (info_vector, 2);
430
431 if (! NILP (charset_id))
432 {
433 if (! CHARSET_VALID_P (XINT (charset_id)))
434 error ("Invalid CHARSET: %d", XINT (charset_id));
435 else if (CHARSET_DEFINED_P (XINT (charset_id)))
436 error ("Already defined charset: %d", XINT (charset_id));
437 }
438
439 vec = XVECTOR (info_vector)->contents;
440 if (XVECTOR (info_vector)->size != 9
441 || !INTEGERP (vec[0]) || !(XINT (vec[0]) == 1 || XINT (vec[0]) == 2)
442 || !INTEGERP (vec[1]) || !(XINT (vec[1]) == 94 || XINT (vec[1]) == 96)
443 || !INTEGERP (vec[2]) || !(XINT (vec[2]) == 1 || XINT (vec[2]) == 2)
444 || !INTEGERP (vec[3]) || !(XINT (vec[3]) == 0 || XINT (vec[3]) == 1)
445 || !INTEGERP (vec[4]) || !(XINT (vec[4]) >= '0' && XINT (vec[4]) <= '~')
446 || !INTEGERP (vec[5]) || !(XINT (vec[5]) == 0 || XINT (vec[5]) == 1)
447 || !STRINGP (vec[6])
448 || !STRINGP (vec[7])
449 || !STRINGP (vec[8]))
450 error ("Invalid info-vector argument for defining charset %s",
451 XSYMBOL (charset_symbol)->name->data);
452
453 if (NILP (charset_id))
454 {
455 charset_id = get_new_private_charset_id (XINT (vec[0]), XINT (vec[2]));
456 if (XINT (charset_id) == 0)
457 error ("There's no room for a new private charset %s",
458 XSYMBOL (charset_symbol)->name->data);
459 }
460
461 update_charset_table (charset_id, vec[0], vec[1], vec[2], vec[3],
462 vec[4], vec[5], vec[6], vec[7], vec[8]);
463 Fput (charset_symbol, Qcharset, Faref (Vcharset_table, charset_id));
464 CHARSET_SYMBOL (XINT (charset_id)) = charset_symbol;
465 Vcharset_list = Fcons (charset_symbol, Vcharset_list);
466 return Qnil;
467}
468
469DEFUN ("declare-equiv-charset", Fdeclare_equiv_charset, Sdeclare_equiv_charset,
470 4, 4, 0,
471 "Declare a charset of DIMENSION, CHARS, FINAL-CHAR is the same as CHARSET.\n\
472CHARSET should be defined by `defined-charset' in advance.")
473 (dimension, chars, final_char, charset_symbol)
474 Lisp_Object dimension, chars, final_char, charset_symbol;
475{
476 int charset;
477
478 CHECK_NUMBER (dimension, 0);
479 CHECK_NUMBER (chars, 1);
480 CHECK_NUMBER (final_char, 2);
481 CHECK_SYMBOL (charset_symbol, 3);
482
483 if (XINT (dimension) != 1 && XINT (dimension) != 2)
484 error ("Invalid DIMENSION %d, it should be 1 or 2", XINT (dimension));
485 if (XINT (chars) != 94 && XINT (chars) != 96)
486 error ("Invalid CHARS %d, it should be 94 or 96", XINT (chars));
487 if (XINT (final_char) < '0' || XFASTINT (final_char) > '~')
488 error ("Invalid FINAL-CHAR %c, it should be `0'..`~'", XINT (chars));
489 if ((charset = get_charset_id (charset_symbol)) < 0)
490 error ("Invalid charset %s", XSYMBOL (charset_symbol)->name->data);
491
492 ISO_CHARSET_TABLE (dimension, chars, final_char) = charset;
493 return Qnil;
494}
495
496/* Return number of different charsets in STR of length LEN. In
497 addition, for each found charset N, CHARSETS[N] is set 1. The
513ee442 498 caller should allocate CHARSETS (MAX_CHARSET + 1 bytes) in advance. */
4ed46869
KH
499
500int
501find_charset_in_str (str, len, charsets)
502 unsigned char *str, *charsets;
503 int len;
504{
505 int num = 0;
506
507 while (len > 0)
508 {
509 int bytes = BYTES_BY_CHAR_HEAD (*str);
510 int charset = CHARSET_AT (str);
511
512 if (!charsets[charset])
513 {
514 charsets[charset] = 1;
515 num += 1;
516 }
517 str += bytes;
518 len -= bytes;
519 }
520 return num;
521}
522
523DEFUN ("find-charset-region", Ffind_charset_region, Sfind_charset_region,
524 2, 2, 0,
525 "Return a list of charsets in the region between BEG and END.\n\
526BEG and END are buffer positions.")
527 (beg, end)
528 Lisp_Object beg, end;
529{
513ee442 530 char charsets[MAX_CHARSET + 1];
4ed46869
KH
531 int from, to, stop, i;
532 Lisp_Object val;
533
534 validate_region (&beg, &end);
535 from = XFASTINT (beg);
536 stop = to = XFASTINT (end);
537 if (from < GPT && GPT < to)
538 stop = GPT;
513ee442 539 bzero (charsets, MAX_CHARSET + 1);
4ed46869
KH
540 while (1)
541 {
542 find_charset_in_str (POS_ADDR (from), stop - from, charsets);
543 if (stop < to)
544 from = stop, stop = to;
545 else
546 break;
547 }
548 val = Qnil;
513ee442 549 for (i = MAX_CHARSET; i >= 0; i--)
4ed46869
KH
550 if (charsets[i])
551 val = Fcons (CHARSET_SYMBOL (i), val);
552 return val;
553}
554
555DEFUN ("find-charset-string", Ffind_charset_string, Sfind_charset_string,
556 1, 1, 0,
557 "Return a list of charsets in STR.")
558 (str)
559 Lisp_Object str;
560{
513ee442 561 char charsets[MAX_CHARSET + 1];
4ed46869
KH
562 int i;
563 Lisp_Object val;
564
565 CHECK_STRING (str, 0);
513ee442 566 bzero (charsets, MAX_CHARSET + 1);
4ed46869
KH
567 find_charset_in_str (XSTRING (str)->data, XSTRING (str)->size, charsets);
568 val = Qnil;
513ee442 569 for (i = MAX_CHARSET; i >= 0; i--)
4ed46869
KH
570 if (charsets[i])
571 val = Fcons (CHARSET_SYMBOL (i), val);
572 return val;
573}
574\f
575DEFUN ("make-char-internal", Fmake_char_internal, Smake_char_internal, 1, 3, 0,
513ee442 576 "")
4ed46869
KH
577 (charset, code1, code2)
578 Lisp_Object charset, code1, code2;
579{
580 CHECK_NUMBER (charset, 0);
581
582 if (NILP (code1))
583 XSETFASTINT (code1, 0);
584 else
585 CHECK_NUMBER (code1, 1);
586 if (NILP (code2))
587 XSETFASTINT (code2, 0);
588 else
589 CHECK_NUMBER (code2, 2);
590
591 if (!CHARSET_DEFINED_P (XINT (charset)))
592 error ("Invalid charset: %d", XINT (charset));
593
594 return make_number (MAKE_CHAR (XINT (charset), XINT (code1), XINT (code2)));
595}
596
597DEFUN ("split-char", Fsplit_char, Ssplit_char, 1, 1, 0,
598 "Return list of charset and one or two position-codes of CHAR.")
599 (ch)
600 Lisp_Object ch;
601{
602 Lisp_Object val;
603 int charset;
604 unsigned char c1, c2;
605
606 CHECK_NUMBER (ch, 0);
607 SPLIT_CHAR (XFASTINT (ch), charset, c1, c2);
608 return ((charset == CHARSET_COMPOSITION || CHARSET_DIMENSION (charset) == 2)
609 ? Fcons (CHARSET_SYMBOL (charset),
610 Fcons (make_number (c1), Fcons (make_number (c2), Qnil)))
611 : Fcons (CHARSET_SYMBOL (charset), Fcons (make_number (c1), Qnil)));
612}
613
614DEFUN ("char-charset", Fchar_charset, Schar_charset, 1, 1, 0,
615 "Return charset of CHAR.")
616 (ch)
617 Lisp_Object ch;
618{
619 CHECK_NUMBER (ch, 0);
620
621 return CHARSET_SYMBOL (CHAR_CHARSET (XINT (ch)));
622}
623
624DEFUN ("iso-charset", Fiso_charset, Siso_charset, 3, 3, 0,
625 "Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR.")
626 (dimension, chars, final_char)
627 Lisp_Object dimension, chars, final_char;
628{
629 int charset;
630
631 CHECK_NUMBER (dimension, 0);
632 CHECK_NUMBER (chars, 1);
633 CHECK_NUMBER (final_char, 2);
634
635 if ((charset = ISO_CHARSET_TABLE (dimension, chars, final_char)) < 0)
636 return Qnil;
637 return CHARSET_SYMBOL (charset);
638}
639
640DEFUN ("char-bytes", Fchar_bytes, Schar_bytes, 1, 1, 0,
641 "Return byte length of multi-byte form of CHAR.")
642 (ch)
643 Lisp_Object ch;
644{
645 Lisp_Object val;
646 int bytes;
647
648 CHECK_NUMBER (ch, 0);
649 if (COMPOSITE_CHAR_P (XFASTINT (ch)))
650 {
651 unsigned int id = COMPOSITE_CHAR_ID (XFASTINT (ch));
652
653 bytes = (id < n_cmpchars ? cmpchar_table[id]->len : 1);
654 }
655 else
656 {
657 int charset = CHAR_CHARSET (XFASTINT (ch));
658
659 bytes = CHARSET_DEFINED_P (charset) ? CHARSET_BYTES (charset) : 1;
660 }
661
662 XSETFASTINT (val, bytes);
663 return val;
664}
665
666/* Return the width of character of which multi-byte form starts with
667 C. The width is measured by how many columns occupied on the
668 screen when displayed in the current buffer. */
669
670#define ONE_BYTE_CHAR_WIDTH(c) \
671 (c < 0x20 \
672 ? (c == '\t' \
673 ? current_buffer->tab_width \
674 : (c == '\n' ? 0 : (NILP (current_buffer->ctl_arrow) ? 4 : 2))) \
675 : (c < 0x7f \
676 ? 1 \
677 : (c == 0x7F \
678 ? (NILP (current_buffer->ctl_arrow) ? 4 : 2) \
679 : ((! NILP (current_buffer->enable_multibyte_characters) \
680 && BASE_LEADING_CODE_P (c)) \
681 ? WIDTH_BY_CHAR_HEAD (c) \
682 : 4)))) \
683
684
685DEFUN ("char-width", Fchar_width, Schar_width, 1, 1, 0,
686 "Return width of CHAR when displayed in the current buffer.\n\
687The width is measured by how many columns it occupies on the screen.")
688 (ch)
689 Lisp_Object ch;
690{
691 Lisp_Object val;
692 int c;
693
694 CHECK_NUMBER (ch, 0);
695
696 c = XFASTINT (ch);
697 if (SINGLE_BYTE_CHAR_P (c))
698 XSETFASTINT (val, ONE_BYTE_CHAR_WIDTH (c));
699 else if (COMPOSITE_CHAR_P (c))
700 {
701 int id = COMPOSITE_CHAR_ID (XFASTINT (ch));
702 XSETFASTINT (val, (id < n_cmpchars ? cmpchar_table[id]->width : 0));
703 }
704 else
705 {
706 int charset = CHAR_CHARSET (c);
707
708 XSETFASTINT (val, CHARSET_WIDTH (charset));
709 }
710 return val;
711}
712
713/* Return width of string STR of length LEN when displayed in the
714 current buffer. The width is measured by how many columns it
715 occupies on the screen. */
716int
717strwidth (str, len)
718 unsigned char *str;
719 int len;
720{
721 unsigned char *endp = str + len;
722 int width = 0;
723
724 while (str < endp) {
725 if (*str == LEADING_CODE_COMPOSITION)
726 {
727 int id = str_cmpchar_id (str, endp - str);
728
729 if (id < 0)
730 {
731 width += 4;
732 str++;
733 }
734 else
735 {
736 width += cmpchar_table[id]->width;
737 str += cmpchar_table[id]->len;
738 }
739 }
740 else
741 {
742 width += ONE_BYTE_CHAR_WIDTH (*str);
743 str += BYTES_BY_CHAR_HEAD (*str);
744 }
745 }
746 return width;
747}
748
749DEFUN ("string-width", Fstring_width, Sstring_width, 1, 1, 0,
750 "Return width of STRING when displayed in the current buffer.\n\
751Width is measured by how many columns it occupies on the screen.\n\
752When calculating width of a multi-byte character in STRING,\n\
753 only the base leading-code is considered and the validity of\n\
754 the following bytes are not checked.")
755 (str)
756 Lisp_Object str;
757{
758 Lisp_Object val;
759
760 CHECK_STRING (str, 0);
761 XSETFASTINT (val, strwidth (XSTRING (str)->data, XSTRING (str)->size));
762 return val;
763}
764
765DEFUN ("char-direction", Fchar_direction, Schar_direction, 1, 1, 0,
766 "Return the direction of CHAR.\n\
767The returned value is 0 for left-to-right and 1 for right-to-left.")
768 (ch)
769 Lisp_Object ch;
770{
771 int charset;
772
773 CHECK_NUMBER (ch, 0);
774 charset = CHAR_CHARSET (XFASTINT (ch));
775 if (!CHARSET_DEFINED_P (charset))
776 error ("Invalid character: %d", XINT (ch));
777 return CHARSET_TABLE_INFO (charset, CHARSET_DIRECTION_IDX);
778}
779
780DEFUN ("chars-in-string", Fchars_in_string, Schars_in_string, 1, 1, 0,
781 "Return number of characters in STRING.")
782 (str)
783 Lisp_Object str;
784{
785 Lisp_Object val;
786 unsigned char *p, *endp;
787 int chars;
788
789 CHECK_STRING (str, 0);
790
791 p = XSTRING (str)->data; endp = p + XSTRING (str)->size;
792 chars = 0;
793 while (p < endp)
794 {
795 if (*p == LEADING_CODE_COMPOSITION)
796 {
797 p++;
798 while (p < endp && ! CHAR_HEAD_P (p)) p++;
799 }
800 else
801 p += BYTES_BY_CHAR_HEAD (*p);
802 chars++;
803 }
804
805 XSETFASTINT (val, chars);
806 return val;
807}
808
809DEFUN ("char-boundary-p", Fchar_boundary_p, Schar_boundary_p, 1, 1, 0,
810 "Return non-nil value if POS is at character boundary of multibyte form.\n\
811The return value is:\n\
812 0 if POS is at an ASCII character or at the end of range,\n\
813 1 if POS is at a head of 2-byte length multi-byte form,\n\
814 2 if POS is at a head of 3-byte length multi-byte form,\n\
815 3 if POS is at a head of 4-byte length multi-byte form,\n\
816 4 if POS is at a head of multi-byte form of a composite character.\n\
817If POS is out of range or not at character boundary, return NIL.")
818 (pos)
819 Lisp_Object pos;
820{
821 Lisp_Object val;
822 int n;
823
824 CHECK_NUMBER_COERCE_MARKER (pos, 0);
825
826 n = XINT (pos);
827 if (n < BEGV || n > ZV)
828 return Qnil;
829
830 if (n == ZV || NILP (current_buffer->enable_multibyte_characters))
831 XSETFASTINT (val, 0);
832 else
833 {
834 unsigned char *p = POS_ADDR (n);
835
836 if (SINGLE_BYTE_CHAR_P (*p))
837 XSETFASTINT (val, 0);
838 else if (*p == LEADING_CODE_COMPOSITION)
839 XSETFASTINT (val, 4);
840 else if (BYTES_BY_CHAR_HEAD (*p) > 1)
841 XSETFASTINT (val, BYTES_BY_CHAR_HEAD (*p) - 1);
842 else
843 val = Qnil;
844 }
845 return val;
846}
847
848DEFUN ("concat-chars", Fconcat_chars, Sconcat_chars, 1, MANY, 0,
849 "Concatenate all the argument characters and make the result a string.")
850 (nargs, args)
851 int nargs;
852 Lisp_Object *args;
853{
854 int i, n = XINT (nargs);
855 unsigned char *buf
856 = (unsigned char *) malloc (MAX_LENGTH_OF_MULTI_BYTE_FORM * n);
857 unsigned char *p = buf;
858 Lisp_Object val;
859
860 for (i = 0; i < n; i++)
861 {
862 int c, len;
863 unsigned char *str;
864
865 if (!INTEGERP (args[i]))
866 {
867 free (buf);
868 CHECK_NUMBER (args[i], 0);
869 }
870 c = XINT (args[i]);
871 len = CHAR_STRING (c, p, str);
872 if (p != str)
873 /* C is a composite character. */
874 bcopy (str, p, len);
875 p += len;
876 }
877
878 val = make_string (buf, p - buf);
879 free (buf);
880 return val;
881}
882
883#endif /* emacs */
884\f
885/*** Composite characters staffs ***/
886
887/* Each composite character is identified by CMPCHAR-ID which is
888 assigned when Emacs needs the character code of the composite
889 character (e.g. when displaying it on the screen). See the
890 document "GENERAL NOTE on COMPOSITE CHARACTER" in `charset.h' how a
891 composite character is represented in Emacs. */
892
893/* If `static' is defined, it means that it is defined to null string. */
894#ifndef static
895/* The following function is copied from lread.c. */
896static int
897hash_string (ptr, len)
898 unsigned char *ptr;
899 int len;
900{
901 register unsigned char *p = ptr;
902 register unsigned char *end = p + len;
903 register unsigned char c;
904 register int hash = 0;
905
906 while (p != end)
907 {
908 c = *p++;
909 if (c >= 0140) c -= 40;
910 hash = ((hash<<3) + (hash>>28) + c);
911 }
912 return hash & 07777777777;
913}
914#endif
915
4ed46869
KH
916#define CMPCHAR_HASH_TABLE_SIZE 0xFFF
917
918static int *cmpchar_hash_table[CMPCHAR_HASH_TABLE_SIZE];
919
920/* Each element of `cmpchar_hash_table' is a pointer to an array of
921 integer, where the 1st element is the size of the array, the 2nd
922 element is how many elements are actually used in the array, and
923 the remaining elements are CMPCHAR-IDs of composite characters of
924 the same hash value. */
925#define CMPCHAR_HASH_SIZE(table) table[0]
926#define CMPCHAR_HASH_USED(table) table[1]
927#define CMPCHAR_HASH_CMPCHAR_ID(table, i) table[i]
928
929/* Return CMPCHAR-ID of the composite character in STR of the length
930 LEN. If the composite character has not yet been registered,
931 register it in `cmpchar_table' and assign new CMPCHAR-ID. This
932 is the sole function for assigning CMPCHAR-ID. */
933int
934str_cmpchar_id (str, len)
935 unsigned char *str;
936 int len;
937{
938 int hash_idx, *hashp;
939 unsigned char *buf;
940 int embedded_rule; /* 1 if composition rule is embedded. */
941 int chars; /* number of components. */
942 int i;
943 struct cmpchar_info *cmpcharp;
944
945 if (len < 5)
946 /* Any composite char have at least 3-byte length. */
947 return -1;
948
949 /* The second byte 0xFF means compostion rule is embedded. */
950 embedded_rule = (str[1] == 0xFF);
951
952 /* At first, get the actual length of the composite character. */
953 {
954 unsigned char *p, *endp = str + 1, *lastp = str + len;
955 int bytes;
956
957 while (endp < lastp && ! CHAR_HEAD_P (endp)) endp++;
958 chars = 0;
959 p = str + 1 + embedded_rule;
960 while (p < endp)
961 {
962 /* No need of checking if *P is 0xA0 because
963 BYTES_BY_CHAR_HEAD (0x80) surely returns 2. */
964 p += (bytes = BYTES_BY_CHAR_HEAD (*p - 0x20) + embedded_rule);
965 chars++;
966 }
967 len = (p -= embedded_rule) - str;
968 if (p > endp)
969 len -= - bytes, chars--;
970
971 if (chars < 2 || chars > MAX_COMPONENT_COUNT)
972 /* Invalid number of components. */
973 return -1;
974 }
975 hash_idx = hash_string (str, len) % CMPCHAR_HASH_TABLE_SIZE;
976 hashp = cmpchar_hash_table[hash_idx];
977
978 /* Then, look into the hash table. */
979 if (hashp != NULL)
980 /* Find the correct one among composite characters of the same
981 hash value. */
982 for (i = 2; i < CMPCHAR_HASH_USED (hashp); i++)
983 {
984 cmpcharp = cmpchar_table[CMPCHAR_HASH_CMPCHAR_ID (hashp, i)];
985 if (len == cmpcharp->len
986 && ! bcmp (str, cmpcharp->data, len))
987 return CMPCHAR_HASH_CMPCHAR_ID (hashp, i);
988 }
989
990 /* We have to register the composite character in cmpchar_table. */
513ee442
KH
991 if (n_cmpchars > (CHAR_FIELD2_MASK | CHAR_FIELD3_MASK))
992 /* No, we have no more room for a new composite character. */
993 return -1;
994
4ed46869
KH
995 /* Make the entry in hash table. */
996 if (hashp == NULL)
997 {
998 /* Make a table for 8 composite characters initially. */
999 hashp = (cmpchar_hash_table[hash_idx]
1000 = (int *) xmalloc (sizeof (int) * (2 + 8)));
1001 CMPCHAR_HASH_SIZE (hashp) = 10;
1002 CMPCHAR_HASH_USED (hashp) = 2;
1003 }
1004 else if (CMPCHAR_HASH_USED (hashp) >= CMPCHAR_HASH_SIZE (hashp))
1005 {
1006 CMPCHAR_HASH_SIZE (hashp) += 8;
1007 hashp = (cmpchar_hash_table[hash_idx]
1008 = (int *) xrealloc (hashp,
1009 sizeof (int) * CMPCHAR_HASH_SIZE (hashp)));
1010 }
1011 CMPCHAR_HASH_CMPCHAR_ID (hashp, CMPCHAR_HASH_USED (hashp)) = n_cmpchars;
1012 CMPCHAR_HASH_USED (hashp)++;
1013
1014 /* Set information of the composite character in cmpchar_table. */
1015 if (cmpchar_table_size == 0)
1016 {
1017 /* This is the first composite character to be registered. */
1018 cmpchar_table_size = 256;
1019 cmpchar_table
1020 = (struct cmpchar_info **) xmalloc (sizeof (cmpchar_table[0])
1021 * cmpchar_table_size);
1022 }
1023 else if (cmpchar_table_size <= n_cmpchars)
1024 {
1025 cmpchar_table_size += 256;
1026 cmpchar_table
1027 = (struct cmpchar_info **) xrealloc (cmpchar_table,
1028 sizeof (cmpchar_table[0])
1029 * cmpchar_table_size);
1030 }
1031
1032 cmpcharp = (struct cmpchar_info *) xmalloc (sizeof (struct cmpchar_info));
1033
1034 cmpcharp->len = len;
1035 cmpcharp->data = (unsigned char *) xmalloc (len + 1);
1036 bcopy (str, cmpcharp->data, len);
1037 cmpcharp->data[len] = 0;
1038 cmpcharp->glyph_len = chars;
1039 cmpcharp->glyph = (GLYPH *) xmalloc (sizeof (GLYPH) * chars);
1040 if (embedded_rule)
1041 {
1042 cmpcharp->cmp_rule = (unsigned char *) xmalloc (chars);
1043 cmpcharp->col_offset = (float *) xmalloc (sizeof (float) * chars);
1044 }
1045 else
1046 {
1047 cmpcharp->cmp_rule = NULL;
1048 cmpcharp->col_offset = NULL;
1049 }
1050
1051 /* Setup GLYPH data and composition rules (if any) so as not to make
1052 them every time on displaying. */
1053 {
1054 unsigned char *bufp;
1055 int width;
1056 float leftmost = 0.0, rightmost = 1.0;
1057
1058 if (embedded_rule)
1059 /* At first, col_offset[N] is set to relative to col_offset[0]. */
1060 cmpcharp->col_offset[0] = 0;
1061
1062 for (i = 0, bufp = cmpcharp->data + 1; i < chars; i++)
1063 {
1064 if (embedded_rule)
1065 cmpcharp->cmp_rule[i] = *bufp++;
1066
1067 if (*bufp == 0xA0) /* This is an ASCII character. */
1068 {
1069 cmpcharp->glyph[i] = FAST_MAKE_GLYPH ((*++bufp & 0x7F), 0);
1070 width = 1;
1071 bufp++;
1072 }
1073 else /* Multibyte character. */
1074 {
1075 /* Make `bufp' point normal multi-byte form temporally. */
1076 *bufp -= 0x20;
1077 cmpcharp->glyph[i]
1078 = FAST_MAKE_GLYPH (string_to_non_ascii_char (bufp, 4, 0), 0);
1079 width = WIDTH_BY_CHAR_HEAD (*bufp);
1080 *bufp += 0x20;
1081 bufp += BYTES_BY_CHAR_HEAD (*bufp - 0x20);
1082 }
1083
1084 if (embedded_rule && i > 0)
1085 {
1086 /* Reference points (global_ref and new_ref) are
1087 encoded as below:
1088
1089 0--1--2 -- ascent
1090 | |
1091 | |
1092 | 4 -+--- center
1093 -- 3 5 -- baseline
1094 | |
1095 6--7--8 -- descent
1096
1097 Now, we calculate the column offset of the new glyph
1098 from the left edge of the first glyph. This can avoid
1099 the same calculation everytime displaying this
1100 composite character. */
1101
1102 /* Reference points of global glyph and new glyph. */
1103 int global_ref = (cmpcharp->cmp_rule[i] - 0xA0) / 9;
1104 int new_ref = (cmpcharp->cmp_rule[i] - 0xA0) % 9;
1105 /* Column offset relative to the first glyph. */
1106 float left = (leftmost
1107 + (global_ref % 3) * (rightmost - leftmost) / 2.0
1108 - (new_ref % 3) * width / 2.0);
1109
1110 cmpcharp->col_offset[i] = left;
1111 if (left < leftmost)
1112 leftmost = left;
1113 if (left + width > rightmost)
1114 rightmost = left + width;
1115 }
1116 else
1117 {
1118 if (width > rightmost)
1119 rightmost = width;
1120 }
1121 }
1122 if (embedded_rule)
1123 {
1124 /* Now col_offset[N] are relative to the left edge of the
1125 first component. Make them relative to the left edge of
1126 overall glyph. */
1127 for (i = 0; i < chars; i++)
1128 cmpcharp->col_offset[i] -= leftmost;
1129 /* Make rightmost holds width of overall glyph. */
1130 rightmost -= leftmost;
1131 }
1132
1133 cmpcharp->width = rightmost;
1134 if (cmpcharp->width < rightmost)
1135 /* To get a ceiling integer value. */
1136 cmpcharp->width++;
1137 }
1138
1139 cmpchar_table[n_cmpchars] = cmpcharp;
1140
1141 return n_cmpchars++;
1142}
1143
1144/* Return the Nth element of the composite character C. */
1145int
1146cmpchar_component (c, n)
1147 unsigned int c, n;
1148{
1149 int id = COMPOSITE_CHAR_ID (c);
1150
1151 if (id >= n_cmpchars /* C is not a valid composite character. */
1152 || n >= cmpchar_table[id]->glyph_len) /* No such component. */
1153 return -1;
1154 /* No face data is stored in glyph code. */
1155 return ((int) (cmpchar_table[id]->glyph[n]));
1156}
1157
1158DEFUN ("cmpcharp", Fcmpcharp, Scmpcharp, 1, 1, 0,
1159 "T if CHAR is a composite character.")
1160 (ch)
1161 Lisp_Object ch;
1162{
1163 CHECK_NUMBER (ch, 0);
1164 return (COMPOSITE_CHAR_P (XINT (ch)) ? Qt : Qnil);
1165}
1166
1167DEFUN ("composite-char-component", Fcmpchar_component, Scmpchar_component,
1168 2, 2, 0,
1169 "Return the IDXth component character of composite character CHARACTER.")
1170 (character, idx)
1171 Lisp_Object character, idx;
1172{
1173 int c;
1174
1175 CHECK_NUMBER (character, 0);
1176 CHECK_NUMBER (idx, 1);
1177
1178 if ((c = cmpchar_component (XINT (character), XINT (idx))) < 0)
1179 args_out_of_range (character, idx);
1180
1181 return make_number (c);
1182}
1183
1184DEFUN ("composite-char-composition-rule", Fcmpchar_cmp_rule, Scmpchar_cmp_rule,
1185 2, 2, 0,
55001746
KH
1186 "Return the Nth composition rule embedded in composite character CHARACTER.\n\
1187The returned rule is for composing the Nth component\n\
1188on the (N-1)th component. If N is 0, the returned value is always 255.")
1189 (character, n)
1190 Lisp_Object character, n;
4ed46869
KH
1191{
1192 int id, i;
1193
1194 CHECK_NUMBER (character, 0);
55001746 1195 CHECK_NUMBER (n, 1);
4ed46869
KH
1196
1197 id = COMPOSITE_CHAR_ID (XINT (character));
1198 if (id < 0 || id >= n_cmpchars)
1199 error ("Invalid composite character: %d", XINT (character));
55001746 1200 i = XINT (n);
4ed46869 1201 if (i > cmpchar_table[id]->glyph_len)
55001746 1202 args_out_of_range (character, n);
4ed46869
KH
1203
1204 return make_number (cmpchar_table[id]->cmp_rule[i]);
1205}
1206
1207DEFUN ("composite-char-composition-rule-p", Fcmpchar_cmp_rule_p,
1208 Scmpchar_cmp_rule_p, 1, 1, 0,
1209 "Return non-nil if composite character CHARACTER contains a embedded rule.")
1210 (character)
1211 Lisp_Object character;
1212{
1213 int id;
1214
1215 CHECK_NUMBER (character, 0);
1216 id = COMPOSITE_CHAR_ID (XINT (character));
1217 if (id < 0 || id >= n_cmpchars)
1218 error ("Invalid composite character: %d", XINT (character));
1219
1220 return (cmpchar_table[id]->cmp_rule ? Qt : Qnil);
1221}
1222
1223DEFUN ("composite-char-component-count", Fcmpchar_cmp_count,
1224 Scmpchar_cmp_count, 1, 1, 0,
1225 "Return number of compoents of composite character CHARACTER.")
1226 (character)
1227 Lisp_Object character;
1228{
1229 int id;
1230
1231 CHECK_NUMBER (character, 0);
1232 id = COMPOSITE_CHAR_ID (XINT (character));
1233 if (id < 0 || id >= n_cmpchars)
1234 error ("Invalid composite character: %d", XINT (character));
1235
1236 return (make_number (cmpchar_table[id]->glyph_len));
1237}
1238
1239DEFUN ("compose-string", Fcompose_string, Scompose_string,
1240 1, 1, 0,
1241 "Return one char string composed from all characters in STRING.")
1242 (str)
1243 Lisp_Object str;
1244{
1245 unsigned char buf[MAX_LENGTH_OF_MULTI_BYTE_FORM], *p, *pend, *ptemp;
1246 int len, i;
1247
1248 CHECK_STRING (str, 0);
1249
1250 buf[0] = LEADING_CODE_COMPOSITION;
1251 p = XSTRING (str)->data;
1252 pend = p + XSTRING (str)->size;
1253 i = 1;
1254 while (p < pend)
1255 {
1256 if (*p < 0x20 || *p == 127) /* control code */
1257 error ("Invalid component character: %d", *p);
1258 else if (*p < 0x80) /* ASCII */
1259 {
1260 if (i + 2 >= MAX_LENGTH_OF_MULTI_BYTE_FORM)
1261 error ("Too long string to be composed: %s", XSTRING (str)->data);
1262 /* Prepend an ASCII charset indicator 0xA0, set MSB of the
1263 code itself. */
1264 buf[i++] = 0xA0;
1265 buf[i++] = *p++ + 0x80;
1266 }
1267 else if (*p == LEADING_CODE_COMPOSITION) /* composite char */
1268 {
1269 /* Already composed. Eliminate the heading
1270 LEADING_CODE_COMPOSITION, keep the remaining bytes
1271 unchanged. */
1272 p++;
1273 ptemp = p;
1274 while (! CHAR_HEAD_P (p)) p++;
1275 if (i + (p - ptemp) >= MAX_LENGTH_OF_MULTI_BYTE_FORM)
1276 error ("Too long string to be composed: %s", XSTRING (str)->data);
1277 bcopy (ptemp, buf + i, p - ptemp);
1278 i += p - ptemp;
1279 }
1280 else /* multibyte char */
1281 {
1282 /* Add 0x20 to the base leading-code, keep the remaining
1283 bytes unchanged. */
1284 len = BYTES_BY_CHAR_HEAD (*p);
1285 if (i + len >= MAX_LENGTH_OF_MULTI_BYTE_FORM)
1286 error ("Too long string to be composed: %s", XSTRING (str)->data);
1287 bcopy (p, buf + i, len);
1288 buf[i] += 0x20;
1289 p += len, i += len;
1290 }
1291 }
1292
1293 if (i < 5)
1294 /* STR contains only one character, which can't be composed. */
1295 error ("Too short string to be composed: %s", XSTRING (str)->data);
1296
1297 return make_string (buf, i);
1298}
1299
1300\f
1301charset_id_internal (charset_name)
1302 char *charset_name;
1303{
1304 Lisp_Object val = Fget (intern (charset_name), Qcharset);
1305
1306 if (!VECTORP (val))
1307 error ("Charset %s is not defined", charset_name);
1308
1309 return (XINT (XVECTOR (val)->contents[0]));
1310}
1311
1312DEFUN ("setup-special-charsets", Fsetup_special_charsets,
1313 Ssetup_special_charsets, 0, 0, 0, "Internal use only.")
1314 ()
1315{
1316 charset_latin_iso8859_1 = charset_id_internal ("latin-iso8859-1");
1317 charset_jisx0208_1978 = charset_id_internal ("japanese-jisx0208-1978");
1318 charset_jisx0208 = charset_id_internal ("japanese-jisx0208");
1319 charset_katakana_jisx0201 = charset_id_internal ("katakana-jisx0201");
1320 charset_latin_jisx0201 = charset_id_internal ("latin-jisx0201");
1321 charset_big5_1 = charset_id_internal ("chinese-big5-1");
1322 charset_big5_2 = charset_id_internal ("chinese-big5-2");
1323 return Qnil;
1324}
1325
1326init_charset_once ()
1327{
1328 int i, j, k;
1329
1330 staticpro (&Vcharset_table);
1331 staticpro (&Vcharset_symbol_table);
1332
1333 /* This has to be done here, before we call Fmake_char_table. */
1334 Qcharset_table = intern ("charset-table");
1335 staticpro (&Qcharset_table);
1336
1337 /* Intern this now in case it isn't already done.
1338 Setting this variable twice is harmless.
1339 But don't staticpro it here--that is done in alloc.c. */
1340 Qchar_table_extra_slots = intern ("char-table-extra-slots");
1341
1342 /* Now we are ready to set up this property, so we can
1343 create the charset table. */
1344 Fput (Qcharset_table, Qchar_table_extra_slots, make_number (0));
1345 Vcharset_table = Fmake_char_table (Qcharset_table, Qnil);
1346
513ee442 1347 Vcharset_symbol_table = Fmake_vector (make_number (MAX_CHARSET + 1), Qnil);
4ed46869
KH
1348
1349 /* Setup tables. */
1350 for (i = 0; i < 2; i++)
1351 for (j = 0; j < 2; j++)
1352 for (k = 0; k < 128; k++)
1353 iso_charset_table [i][j][k] = -1;
1354
1355 bzero (cmpchar_hash_table, sizeof cmpchar_hash_table);
1356 cmpchar_table_size = n_cmpchars = 0;
1357
1358 for (i = 0; i < 256; i++)
1359 BYTES_BY_CHAR_HEAD (i) = 1;
1360 BYTES_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_11) = 3;
1361 BYTES_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_12) = 3;
1362 BYTES_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_21) = 4;
1363 BYTES_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_22) = 4;
1364 /* The following doesn't reflect the actual bytes, but just to tell
1365 that it is a start of a multibyte character. */
1366 BYTES_BY_CHAR_HEAD (LEADING_CODE_COMPOSITION) = 2;
1367
1368 for (i = 0; i < 128; i++)
1369 WIDTH_BY_CHAR_HEAD (i) = 1;
1370 for (; i < 256; i++)
1371 WIDTH_BY_CHAR_HEAD (i) = 4;
1372 WIDTH_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_11) = 1;
1373 WIDTH_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_12) = 2;
1374 WIDTH_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_21) = 1;
1375 WIDTH_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_22) = 2;
1376}
1377
1378#ifdef emacs
1379
1380syms_of_charset ()
1381{
1382 Qascii = intern ("ascii");
1383 staticpro (&Qascii);
1384
1385 Qcharset = intern ("charset");
1386 staticpro (&Qcharset);
1387
1388 /* Define ASCII charset now. */
1389 update_charset_table (make_number (CHARSET_ASCII),
1390 make_number (1), make_number (94),
1391 make_number (1),
1392 make_number (0),
1393 make_number ('B'),
1394 make_number (0),
1395 build_string ("ASCII"),
1396 build_string ("ASCII"),
1397 build_string ("ASCII (ISO646 IRV)"));
1398 CHARSET_SYMBOL (CHARSET_ASCII) = Qascii;
1399 Fput (Qascii, Qcharset, CHARSET_TABLE_ENTRY (CHARSET_ASCII));
1400
1401 Qcomposition = intern ("composition");
1402 staticpro (&Qcomposition);
1403 CHARSET_SYMBOL (CHARSET_COMPOSITION) = Qcomposition;
1404
1405 defsubr (&Sdefine_charset);
1406 defsubr (&Sdeclare_equiv_charset);
1407 defsubr (&Sfind_charset_region);
1408 defsubr (&Sfind_charset_string);
1409 defsubr (&Smake_char_internal);
1410 defsubr (&Ssplit_char);
1411 defsubr (&Schar_charset);
1412 defsubr (&Siso_charset);
1413 defsubr (&Schar_bytes);
1414 defsubr (&Schar_width);
1415 defsubr (&Sstring_width);
1416 defsubr (&Schar_direction);
1417 defsubr (&Schars_in_string);
1418 defsubr (&Schar_boundary_p);
1419 defsubr (&Sconcat_chars);
1420 defsubr (&Scmpcharp);
1421 defsubr (&Scmpchar_component);
1422 defsubr (&Scmpchar_cmp_rule);
1423 defsubr (&Scmpchar_cmp_rule_p);
1424 defsubr (&Scmpchar_cmp_count);
1425 defsubr (&Scompose_string);
1426 defsubr (&Ssetup_special_charsets);
1427
1428 DEFVAR_LISP ("charset-list", &Vcharset_list,
1429 "List of charsets ever defined.");
1430 Vcharset_list = Fcons (Qascii, Qnil);
1431
1432 DEFVAR_INT ("leading-code-composition", &leading_code_composition,
1433 "Leading-code of composite characters.");
1434 leading_code_composition = LEADING_CODE_COMPOSITION;
1435
1436 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11,
1437 "Leading-code of private TYPE9N charset of column-width 1.");
1438 leading_code_private_11 = LEADING_CODE_PRIVATE_11;
1439
1440 DEFVAR_INT ("leading-code-private-12", &leading_code_private_12,
1441 "Leading-code of private TYPE9N charset of column-width 2.");
1442 leading_code_private_12 = LEADING_CODE_PRIVATE_12;
1443
1444 DEFVAR_INT ("leading-code-private-21", &leading_code_private_21,
1445 "Leading-code of private TYPE9Nx9N charset of column-width 1.");
1446 leading_code_private_21 = LEADING_CODE_PRIVATE_21;
1447
1448 DEFVAR_INT ("leading-code-private-22", &leading_code_private_22,
1449 "Leading-code of private TYPE9Nx9N charset of column-width 2.");
1450 leading_code_private_22 = LEADING_CODE_PRIVATE_22;
1451}
1452
1453#endif /* emacs */