(delete_window): Don't return value from void function.
[bpt/emacs.git] / src / charset.c
CommitLineData
4ed46869
KH
1/* Multilingual characters handler.
2 Ver.1.0
4ed46869
KH
3 Copyright (C) 1995 Free Software Foundation, Inc.
4 Copyright (C) 1995 Electrotechnical Laboratory, JAPAN.
5
369314dc
KH
6This file is part of GNU Emacs.
7
8GNU Emacs is free software; you can redistribute it and/or modify
9it under the terms of the GNU General Public License as published by
10the Free Software Foundation; either version 2, or (at your option)
11any later version.
4ed46869 12
369314dc
KH
13GNU Emacs is distributed in the hope that it will be useful,
14but WITHOUT ANY WARRANTY; without even the implied warranty of
15MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16GNU General Public License for more details.
4ed46869 17
369314dc
KH
18You should have received a copy of the GNU General Public License
19along with GNU Emacs; see the file COPYING. If not, write to
20the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21Boston, MA 02111-1307, USA. */
4ed46869
KH
22
23/* At first, see the document in `charset.h' to understand the code in
24 this file. */
25
26#include <stdio.h>
27
28#ifdef emacs
29
30#include <sys/types.h>
31#include <config.h>
32#include "lisp.h"
33#include "buffer.h"
34#include "charset.h"
35#include "coding.h"
36
37#else /* not emacs */
38
39#include "mulelib.h"
40
41#endif /* emacs */
42
43Lisp_Object Qcharset, Qascii, Qcomposition;
44
45/* Declaration of special leading-codes. */
46int leading_code_composition; /* for composite characters */
47int leading_code_private_11; /* for private DIMENSION1 of 1-column */
48int leading_code_private_12; /* for private DIMENSION1 of 2-column */
49int leading_code_private_21; /* for private DIMENSION2 of 1-column */
50int leading_code_private_22; /* for private DIMENSION2 of 2-column */
51
52/* Declaration of special charsets. */
53int charset_ascii; /* ASCII */
54int charset_composition; /* for a composite character */
55int charset_latin_iso8859_1; /* ISO8859-1 (Latin-1) */
56int charset_jisx0208_1978; /* JISX0208.1978 (Japanese Kanji old set) */
57int charset_jisx0208; /* JISX0208.1983 (Japanese Kanji) */
58int charset_katakana_jisx0201; /* JISX0201.Kana (Japanese Katakana) */
59int charset_latin_jisx0201; /* JISX0201.Roman (Japanese Roman) */
60int charset_big5_1; /* Big5 Level 1 (Chinese Traditional) */
61int charset_big5_2; /* Big5 Level 2 (Chinese Traditional) */
62
63Lisp_Object Qcharset_table;
64
65/* A char-table containing information of each character set. */
66Lisp_Object Vcharset_table;
67
68/* A vector of charset symbol indexed by charset-id. This is used
69 only for returning charset symbol from C functions. */
70Lisp_Object Vcharset_symbol_table;
71
72/* A list of charset symbols ever defined. */
73Lisp_Object Vcharset_list;
74
75/* Tables used by macros BYTES_BY_CHAR_HEAD and WIDTH_BY_CHAR_HEAD. */
76int bytes_by_char_head[256];
77int width_by_char_head[256];
78
79/* Mapping table from ISO2022's charset (specified by DIMENSION,
80 CHARS, and FINAL-CHAR) to Emacs' charset. */
81int iso_charset_table[2][2][128];
82
513ee442
KH
83/* Table of pointers to the structure `cmpchar_info' indexed by
84 CMPCHAR-ID. */
85struct cmpchar_info **cmpchar_table;
86/* The current size of `cmpchar_table'. */
87static int cmpchar_table_size;
88/* Number of the current composite characters. */
89int n_cmpchars;
90
4ed46869
KH
91/* Variables used locally in the macro FETCH_MULTIBYTE_CHAR. */
92unsigned char *_fetch_multibyte_char_p;
93int _fetch_multibyte_char_len;
94
95/* Set STR a pointer to the multi-byte form of the character C. If C
96 is not a composite character, the multi-byte form is set in WORKBUF
97 and STR points WORKBUF. The caller should allocate at least 4-byte
98 area at WORKBUF in advance. Returns the length of the multi-byte
99 form.
100
101 Use macro `CHAR_STRING (C, WORKBUF, STR)' instead of calling this
102 function directly if C can be an ASCII character. */
103
104int
105non_ascii_char_to_string (c, workbuf, str)
106 int c;
107 unsigned char *workbuf, **str;
108{
6dc0722d 109 int charset, c1, c2;
4ed46869
KH
110
111 if (COMPOSITE_CHAR_P (c))
112 {
113 int cmpchar_id = COMPOSITE_CHAR_ID (c);
114
115 if (cmpchar_id < n_cmpchars)
116 {
117 *str = cmpchar_table[cmpchar_id]->data;
118 return cmpchar_table[cmpchar_id]->len;
119 }
120 else
121 {
122 *str = workbuf;
123 return 0;
124 }
125 }
126
127 SPLIT_NON_ASCII_CHAR (c, charset, c1, c2);
128
129 *str = workbuf;
130 *workbuf++ = CHARSET_LEADING_CODE_BASE (charset);
131 if (*workbuf = CHARSET_LEADING_CODE_EXT (charset))
132 workbuf++;
133 *workbuf++ = c1 | 0x80;
6dc0722d 134 if (c2 >= 0)
4ed46869
KH
135 *workbuf++ = c2 | 0x80;
136
137 return (workbuf - *str);
138}
139
140/* Return a non-ASCII character of which multi-byte form is at STR of
141 length LEN. If ACTUAL_LEN is not NULL, the actual length of the
142 character is set to the address ACTUAL_LEN.
143
144 Use macro `STRING_CHAR (STR, LEN)' instead of calling this function
145 directly if STR can hold an ASCII character. */
146
147string_to_non_ascii_char (str, len, actual_len)
148 unsigned char *str;
149 int len, *actual_len;
150{
151 int charset;
152 unsigned char c1, c2;
153 register int c;
154
155 if (SPLIT_STRING (str, len, charset, c1, c2) == CHARSET_ASCII)
156 {
157 if (actual_len)
158 *actual_len = 1;
159 return (int) *str;
160 }
161
162 c = MAKE_NON_ASCII_CHAR (charset, c1, c2);
163
164 if (actual_len)
165 *actual_len = (charset == CHARSET_COMPOSITION
166 ? cmpchar_table[COMPOSITE_CHAR_ID (c)]->len
167 : BYTES_BY_CHAR_HEAD (*str));
168 return c;
169}
170
171/* Return the length of the multi-byte form at string STR of length LEN. */
172int
173multibyte_form_length (str, len)
174 unsigned char *str;
175 int len;
176{
177 int charset;
178 unsigned char c1, c2;
179 register int c;
180
181 if (SPLIT_STRING (str, len, charset, c1, c2) == CHARSET_ASCII)
182 return 1;
183
184 return (charset == CHARSET_COMPOSITION
185 ? cmpchar_table[(c1 << 7) | c2]->len
186 : BYTES_BY_CHAR_HEAD (*str));
187}
188
189/* Check if string STR of length LEN contains valid multi-byte form of
190 a character. If valid, charset and position codes of the character
191 is set at *CHARSET, *C1, and *C2, and return 0. If not valid,
192 return -1. This should be used only in the macro SPLIT_STRING
193 which checks range of STR in advance. */
194
195split_non_ascii_string (str, len, charset, c1, c2)
196 register unsigned char *str, *c1, *c2;
197 register int len, *charset;
198{
199 register unsigned int cs = *str++;
200
201 if (cs == LEADING_CODE_COMPOSITION)
202 {
203 int cmpchar_id = str_cmpchar_id (str - 1, len);
204
205 if (cmpchar_id < 0)
206 return -1;
207 *charset = cs, *c1 = cmpchar_id >> 7, *c2 = cmpchar_id & 0x7F;
208 }
209 else if ((cs < LEADING_CODE_PRIVATE_11 || (cs = *str++) >= 0xA0)
210 && CHARSET_DEFINED_P (cs))
211 {
212 *charset = cs;
213 if (*str < 0xA0)
214 return -1;
215 *c1 = (*str++) & 0x7F;
216 if (CHARSET_DIMENSION (cs) == 2)
217 {
218 if (*str < 0xA0)
219 return -1;
220 *c2 = (*str++) & 0x7F;
221 }
222 }
223 else
224 return -1;
225 return 0;
226}
227
228/* Update the table Vcharset_table with the given arguments (see the
229 document of `define-charset' for the meaning of each argument).
230 Several other table contents are also updated. The caller should
231 check the validity of CHARSET-ID and the remaining arguments in
232 advance. */
233
234void
235update_charset_table (charset_id, dimension, chars, width, direction,
236 iso_final_char, iso_graphic_plane,
237 short_name, long_name, description)
238 Lisp_Object charset_id, dimension, chars, width, direction;
239 Lisp_Object iso_final_char, iso_graphic_plane;
240 Lisp_Object short_name, long_name, description;
241{
242 int charset = XINT (charset_id);
243 int bytes;
244 unsigned char leading_code_base, leading_code_ext;
245
6dc0722d
KH
246 if (NILP (CHARSET_TABLE_ENTRY (charset)))
247 CHARSET_TABLE_ENTRY (charset)
248 = Fmake_vector (make_number (CHARSET_MAX_IDX), Qnil);
4ed46869
KH
249
250 /* Get byte length of multibyte form, base leading-code, and
251 extended leading-code of the charset. See the comment under the
252 title "GENERAL NOTE on CHARACTER SET (CHARSET)" in charset.h. */
253 bytes = XINT (dimension);
254 if (charset < MIN_CHARSET_PRIVATE_DIMENSION1)
255 {
256 /* Official charset, it doesn't have an extended leading-code. */
257 if (charset != CHARSET_ASCII)
258 bytes += 1; /* For a base leading-code. */
259 leading_code_base = charset;
260 leading_code_ext = 0;
261 }
262 else
263 {
264 /* Private charset. */
265 bytes += 2; /* For base and extended leading-codes. */
266 leading_code_base
267 = (charset < LEADING_CODE_EXT_12
268 ? LEADING_CODE_PRIVATE_11
269 : (charset < LEADING_CODE_EXT_21
270 ? LEADING_CODE_PRIVATE_12
271 : (charset < LEADING_CODE_EXT_22
272 ? LEADING_CODE_PRIVATE_21
273 : LEADING_CODE_PRIVATE_22)));
274 leading_code_ext = charset;
275 }
276
277 CHARSET_TABLE_INFO (charset, CHARSET_ID_IDX) = charset_id;
278 CHARSET_TABLE_INFO (charset, CHARSET_BYTES_IDX) = make_number (bytes);
279 CHARSET_TABLE_INFO (charset, CHARSET_DIMENSION_IDX) = dimension;
280 CHARSET_TABLE_INFO (charset, CHARSET_CHARS_IDX) = chars;
281 CHARSET_TABLE_INFO (charset, CHARSET_WIDTH_IDX) = width;
282 CHARSET_TABLE_INFO (charset, CHARSET_DIRECTION_IDX) = direction;
283 CHARSET_TABLE_INFO (charset, CHARSET_LEADING_CODE_BASE_IDX)
284 = make_number (leading_code_base);
285 CHARSET_TABLE_INFO (charset, CHARSET_LEADING_CODE_EXT_IDX)
286 = make_number (leading_code_ext);
287 CHARSET_TABLE_INFO (charset, CHARSET_ISO_FINAL_CHAR_IDX) = iso_final_char;
288 CHARSET_TABLE_INFO (charset, CHARSET_ISO_GRAPHIC_PLANE_IDX)
289 = iso_graphic_plane;
290 CHARSET_TABLE_INFO (charset, CHARSET_SHORT_NAME_IDX) = short_name;
291 CHARSET_TABLE_INFO (charset, CHARSET_LONG_NAME_IDX) = long_name;
292 CHARSET_TABLE_INFO (charset, CHARSET_DESCRIPTION_IDX) = description;
293 CHARSET_TABLE_INFO (charset, CHARSET_PLIST_IDX) = Qnil;
294
295 {
296 /* If we have already defined a charset which has the same
297 DIMENSION, CHARS and ISO-FINAL-CHAR but the different
298 DIRECTION, we must update the entry REVERSE-CHARSET of both
299 charsets. If there's no such charset, the value of the entry
300 is set to nil. */
301 int i;
302
513ee442 303 for (i = 0; i <= MAX_CHARSET; i++)
4ed46869
KH
304 if (!NILP (CHARSET_TABLE_ENTRY (i)))
305 {
306 if (CHARSET_DIMENSION (i) == XINT (dimension)
307 && CHARSET_CHARS (i) == XINT (chars)
308 && CHARSET_ISO_FINAL_CHAR (i) == XINT (iso_final_char)
309 && CHARSET_DIRECTION (i) != XINT (direction))
310 {
311 CHARSET_TABLE_INFO (charset, CHARSET_REVERSE_CHARSET_IDX)
312 = make_number (i);
313 CHARSET_TABLE_INFO (i, CHARSET_REVERSE_CHARSET_IDX) = charset_id;
314 break;
315 }
316 }
513ee442 317 if (i > MAX_CHARSET)
4ed46869
KH
318 /* No such a charset. */
319 CHARSET_TABLE_INFO (charset, CHARSET_REVERSE_CHARSET_IDX)
320 = make_number (-1);
321 }
322
323 if (charset != CHARSET_ASCII
324 && charset < MIN_CHARSET_PRIVATE_DIMENSION1)
325 {
326 /* Update tables bytes_by_char_head and width_by_char_head. */
327 bytes_by_char_head[leading_code_base] = bytes;
328 width_by_char_head[leading_code_base] = XINT (width);
329
330 /* Update table emacs_code_class. */
331 emacs_code_class[charset] = (bytes == 2
332 ? EMACS_leading_code_2
333 : (bytes == 3
334 ? EMACS_leading_code_3
335 : EMACS_leading_code_4));
336 }
337
338 /* Update table iso_charset_table. */
339 if (ISO_CHARSET_TABLE (dimension, chars, iso_final_char) < 0)
340 ISO_CHARSET_TABLE (dimension, chars, iso_final_char) = charset;
341}
342
343#ifdef emacs
344
345/* Return charset id of CHARSET_SYMBOL, or return -1 if CHARSET_SYMBOL
346 is invalid. */
347int
348get_charset_id (charset_symbol)
349 Lisp_Object charset_symbol;
350{
351 Lisp_Object val;
352 int charset;
353
354 return ((SYMBOLP (charset_symbol)
355 && (val = Fget (charset_symbol, Qcharset), VECTORP (val))
356 && (charset = XINT (XVECTOR (val)->contents[CHARSET_ID_IDX]),
357 CHARSET_VALID_P (charset)))
358 ? charset : -1);
359}
360
361/* Return an identification number for a new private charset of
362 DIMENSION and WIDTH. If there's no more room for the new charset,
363 return 0. */
364Lisp_Object
365get_new_private_charset_id (dimension, width)
366 int dimension, width;
367{
368 int charset, from, to;
369
370 if (dimension == 1)
371 {
372 if (width == 1)
373 from = LEADING_CODE_EXT_11, to = LEADING_CODE_EXT_12;
374 else
375 from = LEADING_CODE_EXT_12, to = LEADING_CODE_EXT_21;
376 }
377 else
378 {
379 if (width == 1)
380 from = LEADING_CODE_EXT_21, to = LEADING_CODE_EXT_22;
381 else
382 from = LEADING_CODE_EXT_22, to = LEADING_CODE_EXT_MAX - 1;
383 }
384
385 for (charset = from; charset < to; charset++)
386 if (!CHARSET_DEFINED_P (charset)) break;
387
388 return make_number (charset < to ? charset : 0);
389}
390
391DEFUN ("define-charset", Fdefine_charset, Sdefine_charset, 3, 3, 0,
392 "Define CHARSET-ID as the identification number of CHARSET with INFO-VECTOR.\n\
393If CHARSET-ID is nil, it is set automatically, which means CHARSET is\n\
394 treated as a private charset.\n\
395INFO-VECTOR is a vector of the format:\n\
396 [DIMENSION CHARS WIDTH DIRECTION ISO-FINAL-CHAR ISO-GRAPHIC-PLANE\n\
397 SHORT-NAME LONG-NAME DESCRIPTION]\n\
398The meanings of each elements is as follows:\n\
399DIMENSION (integer) is the number of bytes to represent a character: 1 or 2.\n\
400CHARS (integer) is the number of characters in a dimension: 94 or 96.\n\
401WIDTH (integer) is the number of columns a character in the charset\n\
402occupies on the screen: one of 0, 1, and 2.\n\
403\n\
404DIRECTION (integer) is the rendering direction of characters in the\n\
405charset when rendering. If 0, render from right to left, else\n\
406render from left to right.\n\
407\n\
408ISO-FINAL-CHAR (character) is the final character of the\n\
409corresponding ISO 2022 charset.\n\
410\n\
411ISO-GRAPHIC-PLANE (integer) is the graphic plane to be invoked\n\
412while encoding to variants of ISO 2022 coding system, one of the\n\
413following: 0/graphic-plane-left(GL), 1/graphic-plane-right(GR).\n\
414\n\
415SHORT-NAME (string) is the short name to refer to the charset.\n\
416\n\
417LONG-NAME (string) is the long name to refer to the charset.\n\
418\n\
419DESCRIPTION (string) is the description string of the charset.")
420 (charset_id, charset_symbol, info_vector)
421 Lisp_Object charset_id, charset_symbol, info_vector;
422{
423 Lisp_Object *vec;
424
425 if (!NILP (charset_id))
426 CHECK_NUMBER (charset_id, 0);
427 CHECK_SYMBOL (charset_symbol, 1);
428 CHECK_VECTOR (info_vector, 2);
429
430 if (! NILP (charset_id))
431 {
432 if (! CHARSET_VALID_P (XINT (charset_id)))
433 error ("Invalid CHARSET: %d", XINT (charset_id));
434 else if (CHARSET_DEFINED_P (XINT (charset_id)))
435 error ("Already defined charset: %d", XINT (charset_id));
436 }
437
438 vec = XVECTOR (info_vector)->contents;
439 if (XVECTOR (info_vector)->size != 9
440 || !INTEGERP (vec[0]) || !(XINT (vec[0]) == 1 || XINT (vec[0]) == 2)
441 || !INTEGERP (vec[1]) || !(XINT (vec[1]) == 94 || XINT (vec[1]) == 96)
442 || !INTEGERP (vec[2]) || !(XINT (vec[2]) == 1 || XINT (vec[2]) == 2)
443 || !INTEGERP (vec[3]) || !(XINT (vec[3]) == 0 || XINT (vec[3]) == 1)
444 || !INTEGERP (vec[4]) || !(XINT (vec[4]) >= '0' && XINT (vec[4]) <= '~')
445 || !INTEGERP (vec[5]) || !(XINT (vec[5]) == 0 || XINT (vec[5]) == 1)
446 || !STRINGP (vec[6])
447 || !STRINGP (vec[7])
448 || !STRINGP (vec[8]))
449 error ("Invalid info-vector argument for defining charset %s",
450 XSYMBOL (charset_symbol)->name->data);
451
452 if (NILP (charset_id))
453 {
454 charset_id = get_new_private_charset_id (XINT (vec[0]), XINT (vec[2]));
455 if (XINT (charset_id) == 0)
456 error ("There's no room for a new private charset %s",
457 XSYMBOL (charset_symbol)->name->data);
458 }
459
460 update_charset_table (charset_id, vec[0], vec[1], vec[2], vec[3],
461 vec[4], vec[5], vec[6], vec[7], vec[8]);
6dc0722d 462 Fput (charset_symbol, Qcharset, CHARSET_TABLE_ENTRY (XINT (charset_id)));
4ed46869
KH
463 CHARSET_SYMBOL (XINT (charset_id)) = charset_symbol;
464 Vcharset_list = Fcons (charset_symbol, Vcharset_list);
465 return Qnil;
466}
467
468DEFUN ("declare-equiv-charset", Fdeclare_equiv_charset, Sdeclare_equiv_charset,
469 4, 4, 0,
470 "Declare a charset of DIMENSION, CHARS, FINAL-CHAR is the same as CHARSET.\n\
471CHARSET should be defined by `defined-charset' in advance.")
472 (dimension, chars, final_char, charset_symbol)
473 Lisp_Object dimension, chars, final_char, charset_symbol;
474{
475 int charset;
476
477 CHECK_NUMBER (dimension, 0);
478 CHECK_NUMBER (chars, 1);
479 CHECK_NUMBER (final_char, 2);
480 CHECK_SYMBOL (charset_symbol, 3);
481
482 if (XINT (dimension) != 1 && XINT (dimension) != 2)
483 error ("Invalid DIMENSION %d, it should be 1 or 2", XINT (dimension));
484 if (XINT (chars) != 94 && XINT (chars) != 96)
485 error ("Invalid CHARS %d, it should be 94 or 96", XINT (chars));
486 if (XINT (final_char) < '0' || XFASTINT (final_char) > '~')
487 error ("Invalid FINAL-CHAR %c, it should be `0'..`~'", XINT (chars));
488 if ((charset = get_charset_id (charset_symbol)) < 0)
489 error ("Invalid charset %s", XSYMBOL (charset_symbol)->name->data);
490
491 ISO_CHARSET_TABLE (dimension, chars, final_char) = charset;
492 return Qnil;
493}
494
495/* Return number of different charsets in STR of length LEN. In
496 addition, for each found charset N, CHARSETS[N] is set 1. The
513ee442 497 caller should allocate CHARSETS (MAX_CHARSET + 1 bytes) in advance. */
4ed46869
KH
498
499int
500find_charset_in_str (str, len, charsets)
501 unsigned char *str, *charsets;
502 int len;
503{
504 int num = 0;
505
506 while (len > 0)
507 {
508 int bytes = BYTES_BY_CHAR_HEAD (*str);
509 int charset = CHARSET_AT (str);
510
511 if (!charsets[charset])
512 {
513 charsets[charset] = 1;
514 num += 1;
515 }
516 str += bytes;
517 len -= bytes;
518 }
519 return num;
520}
521
522DEFUN ("find-charset-region", Ffind_charset_region, Sfind_charset_region,
523 2, 2, 0,
524 "Return a list of charsets in the region between BEG and END.\n\
525BEG and END are buffer positions.")
526 (beg, end)
527 Lisp_Object beg, end;
528{
513ee442 529 char charsets[MAX_CHARSET + 1];
4ed46869
KH
530 int from, to, stop, i;
531 Lisp_Object val;
532
533 validate_region (&beg, &end);
534 from = XFASTINT (beg);
535 stop = to = XFASTINT (end);
536 if (from < GPT && GPT < to)
537 stop = GPT;
513ee442 538 bzero (charsets, MAX_CHARSET + 1);
4ed46869
KH
539 while (1)
540 {
541 find_charset_in_str (POS_ADDR (from), stop - from, charsets);
542 if (stop < to)
543 from = stop, stop = to;
544 else
545 break;
546 }
547 val = Qnil;
513ee442 548 for (i = MAX_CHARSET; i >= 0; i--)
4ed46869
KH
549 if (charsets[i])
550 val = Fcons (CHARSET_SYMBOL (i), val);
551 return val;
552}
553
554DEFUN ("find-charset-string", Ffind_charset_string, Sfind_charset_string,
555 1, 1, 0,
556 "Return a list of charsets in STR.")
557 (str)
558 Lisp_Object str;
559{
513ee442 560 char charsets[MAX_CHARSET + 1];
4ed46869
KH
561 int i;
562 Lisp_Object val;
563
564 CHECK_STRING (str, 0);
513ee442 565 bzero (charsets, MAX_CHARSET + 1);
4ed46869
KH
566 find_charset_in_str (XSTRING (str)->data, XSTRING (str)->size, charsets);
567 val = Qnil;
513ee442 568 for (i = MAX_CHARSET; i >= 0; i--)
4ed46869
KH
569 if (charsets[i])
570 val = Fcons (CHARSET_SYMBOL (i), val);
571 return val;
572}
573\f
574DEFUN ("make-char-internal", Fmake_char_internal, Smake_char_internal, 1, 3, 0,
513ee442 575 "")
4ed46869
KH
576 (charset, code1, code2)
577 Lisp_Object charset, code1, code2;
578{
579 CHECK_NUMBER (charset, 0);
580
581 if (NILP (code1))
582 XSETFASTINT (code1, 0);
583 else
584 CHECK_NUMBER (code1, 1);
585 if (NILP (code2))
586 XSETFASTINT (code2, 0);
587 else
588 CHECK_NUMBER (code2, 2);
589
590 if (!CHARSET_DEFINED_P (XINT (charset)))
591 error ("Invalid charset: %d", XINT (charset));
592
593 return make_number (MAKE_CHAR (XINT (charset), XINT (code1), XINT (code2)));
594}
595
596DEFUN ("split-char", Fsplit_char, Ssplit_char, 1, 1, 0,
597 "Return list of charset and one or two position-codes of CHAR.")
598 (ch)
599 Lisp_Object ch;
600{
601 Lisp_Object val;
6dc0722d 602 int charset, c1, c2;
4ed46869
KH
603
604 CHECK_NUMBER (ch, 0);
605 SPLIT_CHAR (XFASTINT (ch), charset, c1, c2);
6dc0722d 606 return (c2 >= 0
4ed46869
KH
607 ? Fcons (CHARSET_SYMBOL (charset),
608 Fcons (make_number (c1), Fcons (make_number (c2), Qnil)))
609 : Fcons (CHARSET_SYMBOL (charset), Fcons (make_number (c1), Qnil)));
610}
611
612DEFUN ("char-charset", Fchar_charset, Schar_charset, 1, 1, 0,
613 "Return charset of CHAR.")
614 (ch)
615 Lisp_Object ch;
616{
617 CHECK_NUMBER (ch, 0);
618
619 return CHARSET_SYMBOL (CHAR_CHARSET (XINT (ch)));
620}
621
622DEFUN ("iso-charset", Fiso_charset, Siso_charset, 3, 3, 0,
623 "Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR.")
624 (dimension, chars, final_char)
625 Lisp_Object dimension, chars, final_char;
626{
627 int charset;
628
629 CHECK_NUMBER (dimension, 0);
630 CHECK_NUMBER (chars, 1);
631 CHECK_NUMBER (final_char, 2);
632
633 if ((charset = ISO_CHARSET_TABLE (dimension, chars, final_char)) < 0)
634 return Qnil;
635 return CHARSET_SYMBOL (charset);
636}
637
638DEFUN ("char-bytes", Fchar_bytes, Schar_bytes, 1, 1, 0,
639 "Return byte length of multi-byte form of CHAR.")
640 (ch)
641 Lisp_Object ch;
642{
643 Lisp_Object val;
644 int bytes;
645
646 CHECK_NUMBER (ch, 0);
647 if (COMPOSITE_CHAR_P (XFASTINT (ch)))
648 {
649 unsigned int id = COMPOSITE_CHAR_ID (XFASTINT (ch));
650
651 bytes = (id < n_cmpchars ? cmpchar_table[id]->len : 1);
652 }
653 else
654 {
655 int charset = CHAR_CHARSET (XFASTINT (ch));
656
657 bytes = CHARSET_DEFINED_P (charset) ? CHARSET_BYTES (charset) : 1;
658 }
659
660 XSETFASTINT (val, bytes);
661 return val;
662}
663
664/* Return the width of character of which multi-byte form starts with
665 C. The width is measured by how many columns occupied on the
666 screen when displayed in the current buffer. */
667
668#define ONE_BYTE_CHAR_WIDTH(c) \
669 (c < 0x20 \
670 ? (c == '\t' \
671 ? current_buffer->tab_width \
672 : (c == '\n' ? 0 : (NILP (current_buffer->ctl_arrow) ? 4 : 2))) \
673 : (c < 0x7f \
674 ? 1 \
675 : (c == 0x7F \
676 ? (NILP (current_buffer->ctl_arrow) ? 4 : 2) \
677 : ((! NILP (current_buffer->enable_multibyte_characters) \
678 && BASE_LEADING_CODE_P (c)) \
679 ? WIDTH_BY_CHAR_HEAD (c) \
680 : 4)))) \
681
682
683DEFUN ("char-width", Fchar_width, Schar_width, 1, 1, 0,
684 "Return width of CHAR when displayed in the current buffer.\n\
685The width is measured by how many columns it occupies on the screen.")
686 (ch)
687 Lisp_Object ch;
688{
689 Lisp_Object val;
690 int c;
691
692 CHECK_NUMBER (ch, 0);
693
694 c = XFASTINT (ch);
695 if (SINGLE_BYTE_CHAR_P (c))
696 XSETFASTINT (val, ONE_BYTE_CHAR_WIDTH (c));
697 else if (COMPOSITE_CHAR_P (c))
698 {
699 int id = COMPOSITE_CHAR_ID (XFASTINT (ch));
700 XSETFASTINT (val, (id < n_cmpchars ? cmpchar_table[id]->width : 0));
701 }
702 else
703 {
704 int charset = CHAR_CHARSET (c);
705
706 XSETFASTINT (val, CHARSET_WIDTH (charset));
707 }
708 return val;
709}
710
711/* Return width of string STR of length LEN when displayed in the
712 current buffer. The width is measured by how many columns it
713 occupies on the screen. */
714int
715strwidth (str, len)
716 unsigned char *str;
717 int len;
718{
719 unsigned char *endp = str + len;
720 int width = 0;
721
722 while (str < endp) {
723 if (*str == LEADING_CODE_COMPOSITION)
724 {
725 int id = str_cmpchar_id (str, endp - str);
726
727 if (id < 0)
728 {
729 width += 4;
730 str++;
731 }
732 else
733 {
734 width += cmpchar_table[id]->width;
735 str += cmpchar_table[id]->len;
736 }
737 }
738 else
739 {
740 width += ONE_BYTE_CHAR_WIDTH (*str);
741 str += BYTES_BY_CHAR_HEAD (*str);
742 }
743 }
744 return width;
745}
746
747DEFUN ("string-width", Fstring_width, Sstring_width, 1, 1, 0,
748 "Return width of STRING when displayed in the current buffer.\n\
749Width is measured by how many columns it occupies on the screen.\n\
750When calculating width of a multi-byte character in STRING,\n\
751 only the base leading-code is considered and the validity of\n\
752 the following bytes are not checked.")
753 (str)
754 Lisp_Object str;
755{
756 Lisp_Object val;
757
758 CHECK_STRING (str, 0);
759 XSETFASTINT (val, strwidth (XSTRING (str)->data, XSTRING (str)->size));
760 return val;
761}
762
763DEFUN ("char-direction", Fchar_direction, Schar_direction, 1, 1, 0,
764 "Return the direction of CHAR.\n\
765The returned value is 0 for left-to-right and 1 for right-to-left.")
766 (ch)
767 Lisp_Object ch;
768{
769 int charset;
770
771 CHECK_NUMBER (ch, 0);
772 charset = CHAR_CHARSET (XFASTINT (ch));
773 if (!CHARSET_DEFINED_P (charset))
774 error ("Invalid character: %d", XINT (ch));
775 return CHARSET_TABLE_INFO (charset, CHARSET_DIRECTION_IDX);
776}
777
778DEFUN ("chars-in-string", Fchars_in_string, Schars_in_string, 1, 1, 0,
779 "Return number of characters in STRING.")
780 (str)
781 Lisp_Object str;
782{
783 Lisp_Object val;
784 unsigned char *p, *endp;
785 int chars;
786
787 CHECK_STRING (str, 0);
788
789 p = XSTRING (str)->data; endp = p + XSTRING (str)->size;
790 chars = 0;
791 while (p < endp)
792 {
793 if (*p == LEADING_CODE_COMPOSITION)
794 {
795 p++;
796 while (p < endp && ! CHAR_HEAD_P (p)) p++;
797 }
798 else
799 p += BYTES_BY_CHAR_HEAD (*p);
800 chars++;
801 }
802
803 XSETFASTINT (val, chars);
804 return val;
805}
806
807DEFUN ("char-boundary-p", Fchar_boundary_p, Schar_boundary_p, 1, 1, 0,
808 "Return non-nil value if POS is at character boundary of multibyte form.\n\
809The return value is:\n\
810 0 if POS is at an ASCII character or at the end of range,\n\
811 1 if POS is at a head of 2-byte length multi-byte form,\n\
812 2 if POS is at a head of 3-byte length multi-byte form,\n\
813 3 if POS is at a head of 4-byte length multi-byte form,\n\
814 4 if POS is at a head of multi-byte form of a composite character.\n\
815If POS is out of range or not at character boundary, return NIL.")
816 (pos)
817 Lisp_Object pos;
818{
819 Lisp_Object val;
820 int n;
821
822 CHECK_NUMBER_COERCE_MARKER (pos, 0);
823
824 n = XINT (pos);
825 if (n < BEGV || n > ZV)
826 return Qnil;
827
828 if (n == ZV || NILP (current_buffer->enable_multibyte_characters))
829 XSETFASTINT (val, 0);
830 else
831 {
832 unsigned char *p = POS_ADDR (n);
833
834 if (SINGLE_BYTE_CHAR_P (*p))
835 XSETFASTINT (val, 0);
836 else if (*p == LEADING_CODE_COMPOSITION)
837 XSETFASTINT (val, 4);
838 else if (BYTES_BY_CHAR_HEAD (*p) > 1)
839 XSETFASTINT (val, BYTES_BY_CHAR_HEAD (*p) - 1);
840 else
841 val = Qnil;
842 }
843 return val;
844}
845
846DEFUN ("concat-chars", Fconcat_chars, Sconcat_chars, 1, MANY, 0,
847 "Concatenate all the argument characters and make the result a string.")
848 (nargs, args)
849 int nargs;
850 Lisp_Object *args;
851{
852 int i, n = XINT (nargs);
853 unsigned char *buf
854 = (unsigned char *) malloc (MAX_LENGTH_OF_MULTI_BYTE_FORM * n);
855 unsigned char *p = buf;
856 Lisp_Object val;
857
858 for (i = 0; i < n; i++)
859 {
860 int c, len;
861 unsigned char *str;
862
863 if (!INTEGERP (args[i]))
864 {
865 free (buf);
866 CHECK_NUMBER (args[i], 0);
867 }
868 c = XINT (args[i]);
869 len = CHAR_STRING (c, p, str);
870 if (p != str)
871 /* C is a composite character. */
872 bcopy (str, p, len);
873 p += len;
874 }
875
876 val = make_string (buf, p - buf);
877 free (buf);
878 return val;
879}
880
881#endif /* emacs */
882\f
883/*** Composite characters staffs ***/
884
885/* Each composite character is identified by CMPCHAR-ID which is
886 assigned when Emacs needs the character code of the composite
887 character (e.g. when displaying it on the screen). See the
888 document "GENERAL NOTE on COMPOSITE CHARACTER" in `charset.h' how a
889 composite character is represented in Emacs. */
890
891/* If `static' is defined, it means that it is defined to null string. */
892#ifndef static
893/* The following function is copied from lread.c. */
894static int
895hash_string (ptr, len)
896 unsigned char *ptr;
897 int len;
898{
899 register unsigned char *p = ptr;
900 register unsigned char *end = p + len;
901 register unsigned char c;
902 register int hash = 0;
903
904 while (p != end)
905 {
906 c = *p++;
907 if (c >= 0140) c -= 40;
908 hash = ((hash<<3) + (hash>>28) + c);
909 }
910 return hash & 07777777777;
911}
912#endif
913
4ed46869
KH
914#define CMPCHAR_HASH_TABLE_SIZE 0xFFF
915
916static int *cmpchar_hash_table[CMPCHAR_HASH_TABLE_SIZE];
917
918/* Each element of `cmpchar_hash_table' is a pointer to an array of
919 integer, where the 1st element is the size of the array, the 2nd
920 element is how many elements are actually used in the array, and
921 the remaining elements are CMPCHAR-IDs of composite characters of
922 the same hash value. */
923#define CMPCHAR_HASH_SIZE(table) table[0]
924#define CMPCHAR_HASH_USED(table) table[1]
925#define CMPCHAR_HASH_CMPCHAR_ID(table, i) table[i]
926
927/* Return CMPCHAR-ID of the composite character in STR of the length
928 LEN. If the composite character has not yet been registered,
929 register it in `cmpchar_table' and assign new CMPCHAR-ID. This
930 is the sole function for assigning CMPCHAR-ID. */
931int
932str_cmpchar_id (str, len)
933 unsigned char *str;
934 int len;
935{
936 int hash_idx, *hashp;
937 unsigned char *buf;
938 int embedded_rule; /* 1 if composition rule is embedded. */
939 int chars; /* number of components. */
940 int i;
941 struct cmpchar_info *cmpcharp;
942
943 if (len < 5)
944 /* Any composite char have at least 3-byte length. */
945 return -1;
946
947 /* The second byte 0xFF means compostion rule is embedded. */
948 embedded_rule = (str[1] == 0xFF);
949
950 /* At first, get the actual length of the composite character. */
951 {
952 unsigned char *p, *endp = str + 1, *lastp = str + len;
953 int bytes;
954
955 while (endp < lastp && ! CHAR_HEAD_P (endp)) endp++;
956 chars = 0;
957 p = str + 1 + embedded_rule;
958 while (p < endp)
959 {
960 /* No need of checking if *P is 0xA0 because
961 BYTES_BY_CHAR_HEAD (0x80) surely returns 2. */
962 p += (bytes = BYTES_BY_CHAR_HEAD (*p - 0x20) + embedded_rule);
963 chars++;
964 }
965 len = (p -= embedded_rule) - str;
966 if (p > endp)
967 len -= - bytes, chars--;
968
969 if (chars < 2 || chars > MAX_COMPONENT_COUNT)
970 /* Invalid number of components. */
971 return -1;
972 }
973 hash_idx = hash_string (str, len) % CMPCHAR_HASH_TABLE_SIZE;
974 hashp = cmpchar_hash_table[hash_idx];
975
976 /* Then, look into the hash table. */
977 if (hashp != NULL)
978 /* Find the correct one among composite characters of the same
979 hash value. */
980 for (i = 2; i < CMPCHAR_HASH_USED (hashp); i++)
981 {
982 cmpcharp = cmpchar_table[CMPCHAR_HASH_CMPCHAR_ID (hashp, i)];
983 if (len == cmpcharp->len
984 && ! bcmp (str, cmpcharp->data, len))
985 return CMPCHAR_HASH_CMPCHAR_ID (hashp, i);
986 }
987
988 /* We have to register the composite character in cmpchar_table. */
513ee442
KH
989 if (n_cmpchars > (CHAR_FIELD2_MASK | CHAR_FIELD3_MASK))
990 /* No, we have no more room for a new composite character. */
991 return -1;
992
4ed46869
KH
993 /* Make the entry in hash table. */
994 if (hashp == NULL)
995 {
996 /* Make a table for 8 composite characters initially. */
997 hashp = (cmpchar_hash_table[hash_idx]
998 = (int *) xmalloc (sizeof (int) * (2 + 8)));
999 CMPCHAR_HASH_SIZE (hashp) = 10;
1000 CMPCHAR_HASH_USED (hashp) = 2;
1001 }
1002 else if (CMPCHAR_HASH_USED (hashp) >= CMPCHAR_HASH_SIZE (hashp))
1003 {
1004 CMPCHAR_HASH_SIZE (hashp) += 8;
1005 hashp = (cmpchar_hash_table[hash_idx]
1006 = (int *) xrealloc (hashp,
1007 sizeof (int) * CMPCHAR_HASH_SIZE (hashp)));
1008 }
1009 CMPCHAR_HASH_CMPCHAR_ID (hashp, CMPCHAR_HASH_USED (hashp)) = n_cmpchars;
1010 CMPCHAR_HASH_USED (hashp)++;
1011
1012 /* Set information of the composite character in cmpchar_table. */
1013 if (cmpchar_table_size == 0)
1014 {
1015 /* This is the first composite character to be registered. */
1016 cmpchar_table_size = 256;
1017 cmpchar_table
1018 = (struct cmpchar_info **) xmalloc (sizeof (cmpchar_table[0])
1019 * cmpchar_table_size);
1020 }
1021 else if (cmpchar_table_size <= n_cmpchars)
1022 {
1023 cmpchar_table_size += 256;
1024 cmpchar_table
1025 = (struct cmpchar_info **) xrealloc (cmpchar_table,
1026 sizeof (cmpchar_table[0])
1027 * cmpchar_table_size);
1028 }
1029
1030 cmpcharp = (struct cmpchar_info *) xmalloc (sizeof (struct cmpchar_info));
1031
1032 cmpcharp->len = len;
1033 cmpcharp->data = (unsigned char *) xmalloc (len + 1);
1034 bcopy (str, cmpcharp->data, len);
1035 cmpcharp->data[len] = 0;
1036 cmpcharp->glyph_len = chars;
1037 cmpcharp->glyph = (GLYPH *) xmalloc (sizeof (GLYPH) * chars);
1038 if (embedded_rule)
1039 {
1040 cmpcharp->cmp_rule = (unsigned char *) xmalloc (chars);
1041 cmpcharp->col_offset = (float *) xmalloc (sizeof (float) * chars);
1042 }
1043 else
1044 {
1045 cmpcharp->cmp_rule = NULL;
1046 cmpcharp->col_offset = NULL;
1047 }
1048
1049 /* Setup GLYPH data and composition rules (if any) so as not to make
1050 them every time on displaying. */
1051 {
1052 unsigned char *bufp;
1053 int width;
1054 float leftmost = 0.0, rightmost = 1.0;
1055
1056 if (embedded_rule)
1057 /* At first, col_offset[N] is set to relative to col_offset[0]. */
1058 cmpcharp->col_offset[0] = 0;
1059
1060 for (i = 0, bufp = cmpcharp->data + 1; i < chars; i++)
1061 {
1062 if (embedded_rule)
1063 cmpcharp->cmp_rule[i] = *bufp++;
1064
1065 if (*bufp == 0xA0) /* This is an ASCII character. */
1066 {
1067 cmpcharp->glyph[i] = FAST_MAKE_GLYPH ((*++bufp & 0x7F), 0);
1068 width = 1;
1069 bufp++;
1070 }
1071 else /* Multibyte character. */
1072 {
1073 /* Make `bufp' point normal multi-byte form temporally. */
1074 *bufp -= 0x20;
1075 cmpcharp->glyph[i]
1076 = FAST_MAKE_GLYPH (string_to_non_ascii_char (bufp, 4, 0), 0);
1077 width = WIDTH_BY_CHAR_HEAD (*bufp);
1078 *bufp += 0x20;
1079 bufp += BYTES_BY_CHAR_HEAD (*bufp - 0x20);
1080 }
1081
1082 if (embedded_rule && i > 0)
1083 {
1084 /* Reference points (global_ref and new_ref) are
1085 encoded as below:
1086
1087 0--1--2 -- ascent
1088 | |
1089 | |
1090 | 4 -+--- center
1091 -- 3 5 -- baseline
1092 | |
1093 6--7--8 -- descent
1094
1095 Now, we calculate the column offset of the new glyph
1096 from the left edge of the first glyph. This can avoid
1097 the same calculation everytime displaying this
1098 composite character. */
1099
1100 /* Reference points of global glyph and new glyph. */
1101 int global_ref = (cmpcharp->cmp_rule[i] - 0xA0) / 9;
1102 int new_ref = (cmpcharp->cmp_rule[i] - 0xA0) % 9;
1103 /* Column offset relative to the first glyph. */
1104 float left = (leftmost
1105 + (global_ref % 3) * (rightmost - leftmost) / 2.0
1106 - (new_ref % 3) * width / 2.0);
1107
1108 cmpcharp->col_offset[i] = left;
1109 if (left < leftmost)
1110 leftmost = left;
1111 if (left + width > rightmost)
1112 rightmost = left + width;
1113 }
1114 else
1115 {
1116 if (width > rightmost)
1117 rightmost = width;
1118 }
1119 }
1120 if (embedded_rule)
1121 {
1122 /* Now col_offset[N] are relative to the left edge of the
1123 first component. Make them relative to the left edge of
1124 overall glyph. */
1125 for (i = 0; i < chars; i++)
1126 cmpcharp->col_offset[i] -= leftmost;
1127 /* Make rightmost holds width of overall glyph. */
1128 rightmost -= leftmost;
1129 }
1130
1131 cmpcharp->width = rightmost;
1132 if (cmpcharp->width < rightmost)
1133 /* To get a ceiling integer value. */
1134 cmpcharp->width++;
1135 }
1136
1137 cmpchar_table[n_cmpchars] = cmpcharp;
1138
1139 return n_cmpchars++;
1140}
1141
1142/* Return the Nth element of the composite character C. */
1143int
1144cmpchar_component (c, n)
1145 unsigned int c, n;
1146{
1147 int id = COMPOSITE_CHAR_ID (c);
1148
1149 if (id >= n_cmpchars /* C is not a valid composite character. */
1150 || n >= cmpchar_table[id]->glyph_len) /* No such component. */
1151 return -1;
1152 /* No face data is stored in glyph code. */
1153 return ((int) (cmpchar_table[id]->glyph[n]));
1154}
1155
1156DEFUN ("cmpcharp", Fcmpcharp, Scmpcharp, 1, 1, 0,
1157 "T if CHAR is a composite character.")
1158 (ch)
1159 Lisp_Object ch;
1160{
1161 CHECK_NUMBER (ch, 0);
1162 return (COMPOSITE_CHAR_P (XINT (ch)) ? Qt : Qnil);
1163}
1164
1165DEFUN ("composite-char-component", Fcmpchar_component, Scmpchar_component,
1166 2, 2, 0,
1167 "Return the IDXth component character of composite character CHARACTER.")
1168 (character, idx)
1169 Lisp_Object character, idx;
1170{
1171 int c;
1172
1173 CHECK_NUMBER (character, 0);
1174 CHECK_NUMBER (idx, 1);
1175
1176 if ((c = cmpchar_component (XINT (character), XINT (idx))) < 0)
1177 args_out_of_range (character, idx);
1178
1179 return make_number (c);
1180}
1181
1182DEFUN ("composite-char-composition-rule", Fcmpchar_cmp_rule, Scmpchar_cmp_rule,
1183 2, 2, 0,
55001746
KH
1184 "Return the Nth composition rule embedded in composite character CHARACTER.\n\
1185The returned rule is for composing the Nth component\n\
1186on the (N-1)th component. If N is 0, the returned value is always 255.")
1187 (character, n)
1188 Lisp_Object character, n;
4ed46869
KH
1189{
1190 int id, i;
1191
1192 CHECK_NUMBER (character, 0);
55001746 1193 CHECK_NUMBER (n, 1);
4ed46869
KH
1194
1195 id = COMPOSITE_CHAR_ID (XINT (character));
1196 if (id < 0 || id >= n_cmpchars)
1197 error ("Invalid composite character: %d", XINT (character));
55001746 1198 i = XINT (n);
4ed46869 1199 if (i > cmpchar_table[id]->glyph_len)
55001746 1200 args_out_of_range (character, n);
4ed46869
KH
1201
1202 return make_number (cmpchar_table[id]->cmp_rule[i]);
1203}
1204
1205DEFUN ("composite-char-composition-rule-p", Fcmpchar_cmp_rule_p,
1206 Scmpchar_cmp_rule_p, 1, 1, 0,
1207 "Return non-nil if composite character CHARACTER contains a embedded rule.")
1208 (character)
1209 Lisp_Object character;
1210{
1211 int id;
1212
1213 CHECK_NUMBER (character, 0);
1214 id = COMPOSITE_CHAR_ID (XINT (character));
1215 if (id < 0 || id >= n_cmpchars)
1216 error ("Invalid composite character: %d", XINT (character));
1217
1218 return (cmpchar_table[id]->cmp_rule ? Qt : Qnil);
1219}
1220
1221DEFUN ("composite-char-component-count", Fcmpchar_cmp_count,
1222 Scmpchar_cmp_count, 1, 1, 0,
1223 "Return number of compoents of composite character CHARACTER.")
1224 (character)
1225 Lisp_Object character;
1226{
1227 int id;
1228
1229 CHECK_NUMBER (character, 0);
1230 id = COMPOSITE_CHAR_ID (XINT (character));
1231 if (id < 0 || id >= n_cmpchars)
1232 error ("Invalid composite character: %d", XINT (character));
1233
1234 return (make_number (cmpchar_table[id]->glyph_len));
1235}
1236
1237DEFUN ("compose-string", Fcompose_string, Scompose_string,
1238 1, 1, 0,
1239 "Return one char string composed from all characters in STRING.")
1240 (str)
1241 Lisp_Object str;
1242{
1243 unsigned char buf[MAX_LENGTH_OF_MULTI_BYTE_FORM], *p, *pend, *ptemp;
1244 int len, i;
1245
1246 CHECK_STRING (str, 0);
1247
1248 buf[0] = LEADING_CODE_COMPOSITION;
1249 p = XSTRING (str)->data;
1250 pend = p + XSTRING (str)->size;
1251 i = 1;
1252 while (p < pend)
1253 {
1254 if (*p < 0x20 || *p == 127) /* control code */
1255 error ("Invalid component character: %d", *p);
1256 else if (*p < 0x80) /* ASCII */
1257 {
1258 if (i + 2 >= MAX_LENGTH_OF_MULTI_BYTE_FORM)
1259 error ("Too long string to be composed: %s", XSTRING (str)->data);
1260 /* Prepend an ASCII charset indicator 0xA0, set MSB of the
1261 code itself. */
1262 buf[i++] = 0xA0;
1263 buf[i++] = *p++ + 0x80;
1264 }
1265 else if (*p == LEADING_CODE_COMPOSITION) /* composite char */
1266 {
1267 /* Already composed. Eliminate the heading
1268 LEADING_CODE_COMPOSITION, keep the remaining bytes
1269 unchanged. */
1270 p++;
1271 ptemp = p;
1272 while (! CHAR_HEAD_P (p)) p++;
1273 if (i + (p - ptemp) >= MAX_LENGTH_OF_MULTI_BYTE_FORM)
1274 error ("Too long string to be composed: %s", XSTRING (str)->data);
1275 bcopy (ptemp, buf + i, p - ptemp);
1276 i += p - ptemp;
1277 }
1278 else /* multibyte char */
1279 {
1280 /* Add 0x20 to the base leading-code, keep the remaining
1281 bytes unchanged. */
1282 len = BYTES_BY_CHAR_HEAD (*p);
1283 if (i + len >= MAX_LENGTH_OF_MULTI_BYTE_FORM)
1284 error ("Too long string to be composed: %s", XSTRING (str)->data);
1285 bcopy (p, buf + i, len);
1286 buf[i] += 0x20;
1287 p += len, i += len;
1288 }
1289 }
1290
1291 if (i < 5)
1292 /* STR contains only one character, which can't be composed. */
1293 error ("Too short string to be composed: %s", XSTRING (str)->data);
1294
1295 return make_string (buf, i);
1296}
1297
1298\f
1299charset_id_internal (charset_name)
1300 char *charset_name;
1301{
1302 Lisp_Object val = Fget (intern (charset_name), Qcharset);
1303
1304 if (!VECTORP (val))
1305 error ("Charset %s is not defined", charset_name);
1306
1307 return (XINT (XVECTOR (val)->contents[0]));
1308}
1309
1310DEFUN ("setup-special-charsets", Fsetup_special_charsets,
1311 Ssetup_special_charsets, 0, 0, 0, "Internal use only.")
1312 ()
1313{
1314 charset_latin_iso8859_1 = charset_id_internal ("latin-iso8859-1");
1315 charset_jisx0208_1978 = charset_id_internal ("japanese-jisx0208-1978");
1316 charset_jisx0208 = charset_id_internal ("japanese-jisx0208");
1317 charset_katakana_jisx0201 = charset_id_internal ("katakana-jisx0201");
1318 charset_latin_jisx0201 = charset_id_internal ("latin-jisx0201");
1319 charset_big5_1 = charset_id_internal ("chinese-big5-1");
1320 charset_big5_2 = charset_id_internal ("chinese-big5-2");
1321 return Qnil;
1322}
1323
1324init_charset_once ()
1325{
1326 int i, j, k;
1327
1328 staticpro (&Vcharset_table);
1329 staticpro (&Vcharset_symbol_table);
1330
1331 /* This has to be done here, before we call Fmake_char_table. */
1332 Qcharset_table = intern ("charset-table");
1333 staticpro (&Qcharset_table);
1334
1335 /* Intern this now in case it isn't already done.
1336 Setting this variable twice is harmless.
1337 But don't staticpro it here--that is done in alloc.c. */
1338 Qchar_table_extra_slots = intern ("char-table-extra-slots");
1339
1340 /* Now we are ready to set up this property, so we can
1341 create the charset table. */
1342 Fput (Qcharset_table, Qchar_table_extra_slots, make_number (0));
1343 Vcharset_table = Fmake_char_table (Qcharset_table, Qnil);
1344
513ee442 1345 Vcharset_symbol_table = Fmake_vector (make_number (MAX_CHARSET + 1), Qnil);
4ed46869
KH
1346
1347 /* Setup tables. */
1348 for (i = 0; i < 2; i++)
1349 for (j = 0; j < 2; j++)
1350 for (k = 0; k < 128; k++)
1351 iso_charset_table [i][j][k] = -1;
1352
1353 bzero (cmpchar_hash_table, sizeof cmpchar_hash_table);
1354 cmpchar_table_size = n_cmpchars = 0;
1355
1356 for (i = 0; i < 256; i++)
1357 BYTES_BY_CHAR_HEAD (i) = 1;
1358 BYTES_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_11) = 3;
1359 BYTES_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_12) = 3;
1360 BYTES_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_21) = 4;
1361 BYTES_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_22) = 4;
1362 /* The following doesn't reflect the actual bytes, but just to tell
1363 that it is a start of a multibyte character. */
1364 BYTES_BY_CHAR_HEAD (LEADING_CODE_COMPOSITION) = 2;
1365
1366 for (i = 0; i < 128; i++)
1367 WIDTH_BY_CHAR_HEAD (i) = 1;
1368 for (; i < 256; i++)
1369 WIDTH_BY_CHAR_HEAD (i) = 4;
1370 WIDTH_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_11) = 1;
1371 WIDTH_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_12) = 2;
1372 WIDTH_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_21) = 1;
1373 WIDTH_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_22) = 2;
1374}
1375
1376#ifdef emacs
1377
1378syms_of_charset ()
1379{
1380 Qascii = intern ("ascii");
1381 staticpro (&Qascii);
1382
1383 Qcharset = intern ("charset");
1384 staticpro (&Qcharset);
1385
1386 /* Define ASCII charset now. */
1387 update_charset_table (make_number (CHARSET_ASCII),
1388 make_number (1), make_number (94),
1389 make_number (1),
1390 make_number (0),
1391 make_number ('B'),
1392 make_number (0),
1393 build_string ("ASCII"),
1394 build_string ("ASCII"),
1395 build_string ("ASCII (ISO646 IRV)"));
1396 CHARSET_SYMBOL (CHARSET_ASCII) = Qascii;
1397 Fput (Qascii, Qcharset, CHARSET_TABLE_ENTRY (CHARSET_ASCII));
1398
1399 Qcomposition = intern ("composition");
1400 staticpro (&Qcomposition);
1401 CHARSET_SYMBOL (CHARSET_COMPOSITION) = Qcomposition;
1402
1403 defsubr (&Sdefine_charset);
1404 defsubr (&Sdeclare_equiv_charset);
1405 defsubr (&Sfind_charset_region);
1406 defsubr (&Sfind_charset_string);
1407 defsubr (&Smake_char_internal);
1408 defsubr (&Ssplit_char);
1409 defsubr (&Schar_charset);
1410 defsubr (&Siso_charset);
1411 defsubr (&Schar_bytes);
1412 defsubr (&Schar_width);
1413 defsubr (&Sstring_width);
1414 defsubr (&Schar_direction);
1415 defsubr (&Schars_in_string);
1416 defsubr (&Schar_boundary_p);
1417 defsubr (&Sconcat_chars);
1418 defsubr (&Scmpcharp);
1419 defsubr (&Scmpchar_component);
1420 defsubr (&Scmpchar_cmp_rule);
1421 defsubr (&Scmpchar_cmp_rule_p);
1422 defsubr (&Scmpchar_cmp_count);
1423 defsubr (&Scompose_string);
1424 defsubr (&Ssetup_special_charsets);
1425
1426 DEFVAR_LISP ("charset-list", &Vcharset_list,
1427 "List of charsets ever defined.");
1428 Vcharset_list = Fcons (Qascii, Qnil);
1429
1430 DEFVAR_INT ("leading-code-composition", &leading_code_composition,
1431 "Leading-code of composite characters.");
1432 leading_code_composition = LEADING_CODE_COMPOSITION;
1433
1434 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11,
1435 "Leading-code of private TYPE9N charset of column-width 1.");
1436 leading_code_private_11 = LEADING_CODE_PRIVATE_11;
1437
1438 DEFVAR_INT ("leading-code-private-12", &leading_code_private_12,
1439 "Leading-code of private TYPE9N charset of column-width 2.");
1440 leading_code_private_12 = LEADING_CODE_PRIVATE_12;
1441
1442 DEFVAR_INT ("leading-code-private-21", &leading_code_private_21,
1443 "Leading-code of private TYPE9Nx9N charset of column-width 1.");
1444 leading_code_private_21 = LEADING_CODE_PRIVATE_21;
1445
1446 DEFVAR_INT ("leading-code-private-22", &leading_code_private_22,
1447 "Leading-code of private TYPE9Nx9N charset of column-width 2.");
1448 leading_code_private_22 = LEADING_CODE_PRIVATE_22;
1449}
1450
1451#endif /* emacs */