(MAX_CHARSET): Definition changed to the actual
[bpt/emacs.git] / src / charset.c
CommitLineData
4ed46869
KH
1/* Multilingual characters handler.
2 Ver.1.0
4ed46869
KH
3 Copyright (C) 1995 Free Software Foundation, Inc.
4 Copyright (C) 1995 Electrotechnical Laboratory, JAPAN.
5
369314dc
KH
6This file is part of GNU Emacs.
7
8GNU Emacs is free software; you can redistribute it and/or modify
9it under the terms of the GNU General Public License as published by
10the Free Software Foundation; either version 2, or (at your option)
11any later version.
4ed46869 12
369314dc
KH
13GNU Emacs is distributed in the hope that it will be useful,
14but WITHOUT ANY WARRANTY; without even the implied warranty of
15MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16GNU General Public License for more details.
4ed46869 17
369314dc
KH
18You should have received a copy of the GNU General Public License
19along with GNU Emacs; see the file COPYING. If not, write to
20the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21Boston, MA 02111-1307, USA. */
4ed46869
KH
22
23/* At first, see the document in `charset.h' to understand the code in
24 this file. */
25
26#include <stdio.h>
27
28#ifdef emacs
29
30#include <sys/types.h>
31#include <config.h>
32#include "lisp.h"
33#include "buffer.h"
34#include "charset.h"
35#include "coding.h"
36
37#else /* not emacs */
38
39#include "mulelib.h"
40
41#endif /* emacs */
42
43Lisp_Object Qcharset, Qascii, Qcomposition;
44
45/* Declaration of special leading-codes. */
46int leading_code_composition; /* for composite characters */
47int leading_code_private_11; /* for private DIMENSION1 of 1-column */
48int leading_code_private_12; /* for private DIMENSION1 of 2-column */
49int leading_code_private_21; /* for private DIMENSION2 of 1-column */
50int leading_code_private_22; /* for private DIMENSION2 of 2-column */
51
52/* Declaration of special charsets. */
53int charset_ascii; /* ASCII */
54int charset_composition; /* for a composite character */
55int charset_latin_iso8859_1; /* ISO8859-1 (Latin-1) */
56int charset_jisx0208_1978; /* JISX0208.1978 (Japanese Kanji old set) */
57int charset_jisx0208; /* JISX0208.1983 (Japanese Kanji) */
58int charset_katakana_jisx0201; /* JISX0201.Kana (Japanese Katakana) */
59int charset_latin_jisx0201; /* JISX0201.Roman (Japanese Roman) */
60int charset_big5_1; /* Big5 Level 1 (Chinese Traditional) */
61int charset_big5_2; /* Big5 Level 2 (Chinese Traditional) */
62
63Lisp_Object Qcharset_table;
64
65/* A char-table containing information of each character set. */
66Lisp_Object Vcharset_table;
67
68/* A vector of charset symbol indexed by charset-id. This is used
69 only for returning charset symbol from C functions. */
70Lisp_Object Vcharset_symbol_table;
71
72/* A list of charset symbols ever defined. */
73Lisp_Object Vcharset_list;
74
75/* Tables used by macros BYTES_BY_CHAR_HEAD and WIDTH_BY_CHAR_HEAD. */
76int bytes_by_char_head[256];
77int width_by_char_head[256];
78
79/* Mapping table from ISO2022's charset (specified by DIMENSION,
80 CHARS, and FINAL-CHAR) to Emacs' charset. */
81int iso_charset_table[2][2][128];
82
83/* Variables used locally in the macro FETCH_MULTIBYTE_CHAR. */
84unsigned char *_fetch_multibyte_char_p;
85int _fetch_multibyte_char_len;
86
87/* Set STR a pointer to the multi-byte form of the character C. If C
88 is not a composite character, the multi-byte form is set in WORKBUF
89 and STR points WORKBUF. The caller should allocate at least 4-byte
90 area at WORKBUF in advance. Returns the length of the multi-byte
91 form.
92
93 Use macro `CHAR_STRING (C, WORKBUF, STR)' instead of calling this
94 function directly if C can be an ASCII character. */
95
96int
97non_ascii_char_to_string (c, workbuf, str)
98 int c;
99 unsigned char *workbuf, **str;
100{
101 int charset;
102 unsigned char c1, c2;
103
104 if (COMPOSITE_CHAR_P (c))
105 {
106 int cmpchar_id = COMPOSITE_CHAR_ID (c);
107
108 if (cmpchar_id < n_cmpchars)
109 {
110 *str = cmpchar_table[cmpchar_id]->data;
111 return cmpchar_table[cmpchar_id]->len;
112 }
113 else
114 {
115 *str = workbuf;
116 return 0;
117 }
118 }
119
120 SPLIT_NON_ASCII_CHAR (c, charset, c1, c2);
121
122 *str = workbuf;
123 *workbuf++ = CHARSET_LEADING_CODE_BASE (charset);
124 if (*workbuf = CHARSET_LEADING_CODE_EXT (charset))
125 workbuf++;
126 *workbuf++ = c1 | 0x80;
127 if (c2)
128 *workbuf++ = c2 | 0x80;
129
130 return (workbuf - *str);
131}
132
133/* Return a non-ASCII character of which multi-byte form is at STR of
134 length LEN. If ACTUAL_LEN is not NULL, the actual length of the
135 character is set to the address ACTUAL_LEN.
136
137 Use macro `STRING_CHAR (STR, LEN)' instead of calling this function
138 directly if STR can hold an ASCII character. */
139
140string_to_non_ascii_char (str, len, actual_len)
141 unsigned char *str;
142 int len, *actual_len;
143{
144 int charset;
145 unsigned char c1, c2;
146 register int c;
147
148 if (SPLIT_STRING (str, len, charset, c1, c2) == CHARSET_ASCII)
149 {
150 if (actual_len)
151 *actual_len = 1;
152 return (int) *str;
153 }
154
155 c = MAKE_NON_ASCII_CHAR (charset, c1, c2);
156
157 if (actual_len)
158 *actual_len = (charset == CHARSET_COMPOSITION
159 ? cmpchar_table[COMPOSITE_CHAR_ID (c)]->len
160 : BYTES_BY_CHAR_HEAD (*str));
161 return c;
162}
163
164/* Return the length of the multi-byte form at string STR of length LEN. */
165int
166multibyte_form_length (str, len)
167 unsigned char *str;
168 int len;
169{
170 int charset;
171 unsigned char c1, c2;
172 register int c;
173
174 if (SPLIT_STRING (str, len, charset, c1, c2) == CHARSET_ASCII)
175 return 1;
176
177 return (charset == CHARSET_COMPOSITION
178 ? cmpchar_table[(c1 << 7) | c2]->len
179 : BYTES_BY_CHAR_HEAD (*str));
180}
181
182/* Check if string STR of length LEN contains valid multi-byte form of
183 a character. If valid, charset and position codes of the character
184 is set at *CHARSET, *C1, and *C2, and return 0. If not valid,
185 return -1. This should be used only in the macro SPLIT_STRING
186 which checks range of STR in advance. */
187
188split_non_ascii_string (str, len, charset, c1, c2)
189 register unsigned char *str, *c1, *c2;
190 register int len, *charset;
191{
192 register unsigned int cs = *str++;
193
194 if (cs == LEADING_CODE_COMPOSITION)
195 {
196 int cmpchar_id = str_cmpchar_id (str - 1, len);
197
198 if (cmpchar_id < 0)
199 return -1;
200 *charset = cs, *c1 = cmpchar_id >> 7, *c2 = cmpchar_id & 0x7F;
201 }
202 else if ((cs < LEADING_CODE_PRIVATE_11 || (cs = *str++) >= 0xA0)
203 && CHARSET_DEFINED_P (cs))
204 {
205 *charset = cs;
206 if (*str < 0xA0)
207 return -1;
208 *c1 = (*str++) & 0x7F;
209 if (CHARSET_DIMENSION (cs) == 2)
210 {
211 if (*str < 0xA0)
212 return -1;
213 *c2 = (*str++) & 0x7F;
214 }
215 }
216 else
217 return -1;
218 return 0;
219}
220
221/* Update the table Vcharset_table with the given arguments (see the
222 document of `define-charset' for the meaning of each argument).
223 Several other table contents are also updated. The caller should
224 check the validity of CHARSET-ID and the remaining arguments in
225 advance. */
226
227void
228update_charset_table (charset_id, dimension, chars, width, direction,
229 iso_final_char, iso_graphic_plane,
230 short_name, long_name, description)
231 Lisp_Object charset_id, dimension, chars, width, direction;
232 Lisp_Object iso_final_char, iso_graphic_plane;
233 Lisp_Object short_name, long_name, description;
234{
235 int charset = XINT (charset_id);
236 int bytes;
237 unsigned char leading_code_base, leading_code_ext;
238
239 if (NILP (Faref (Vcharset_table, charset_id)))
240 Faset (Vcharset_table, charset_id,
241 Fmake_vector (make_number (CHARSET_MAX_IDX), Qnil));
242
243 /* Get byte length of multibyte form, base leading-code, and
244 extended leading-code of the charset. See the comment under the
245 title "GENERAL NOTE on CHARACTER SET (CHARSET)" in charset.h. */
246 bytes = XINT (dimension);
247 if (charset < MIN_CHARSET_PRIVATE_DIMENSION1)
248 {
249 /* Official charset, it doesn't have an extended leading-code. */
250 if (charset != CHARSET_ASCII)
251 bytes += 1; /* For a base leading-code. */
252 leading_code_base = charset;
253 leading_code_ext = 0;
254 }
255 else
256 {
257 /* Private charset. */
258 bytes += 2; /* For base and extended leading-codes. */
259 leading_code_base
260 = (charset < LEADING_CODE_EXT_12
261 ? LEADING_CODE_PRIVATE_11
262 : (charset < LEADING_CODE_EXT_21
263 ? LEADING_CODE_PRIVATE_12
264 : (charset < LEADING_CODE_EXT_22
265 ? LEADING_CODE_PRIVATE_21
266 : LEADING_CODE_PRIVATE_22)));
267 leading_code_ext = charset;
268 }
269
270 CHARSET_TABLE_INFO (charset, CHARSET_ID_IDX) = charset_id;
271 CHARSET_TABLE_INFO (charset, CHARSET_BYTES_IDX) = make_number (bytes);
272 CHARSET_TABLE_INFO (charset, CHARSET_DIMENSION_IDX) = dimension;
273 CHARSET_TABLE_INFO (charset, CHARSET_CHARS_IDX) = chars;
274 CHARSET_TABLE_INFO (charset, CHARSET_WIDTH_IDX) = width;
275 CHARSET_TABLE_INFO (charset, CHARSET_DIRECTION_IDX) = direction;
276 CHARSET_TABLE_INFO (charset, CHARSET_LEADING_CODE_BASE_IDX)
277 = make_number (leading_code_base);
278 CHARSET_TABLE_INFO (charset, CHARSET_LEADING_CODE_EXT_IDX)
279 = make_number (leading_code_ext);
280 CHARSET_TABLE_INFO (charset, CHARSET_ISO_FINAL_CHAR_IDX) = iso_final_char;
281 CHARSET_TABLE_INFO (charset, CHARSET_ISO_GRAPHIC_PLANE_IDX)
282 = iso_graphic_plane;
283 CHARSET_TABLE_INFO (charset, CHARSET_SHORT_NAME_IDX) = short_name;
284 CHARSET_TABLE_INFO (charset, CHARSET_LONG_NAME_IDX) = long_name;
285 CHARSET_TABLE_INFO (charset, CHARSET_DESCRIPTION_IDX) = description;
286 CHARSET_TABLE_INFO (charset, CHARSET_PLIST_IDX) = Qnil;
287
288 {
289 /* If we have already defined a charset which has the same
290 DIMENSION, CHARS and ISO-FINAL-CHAR but the different
291 DIRECTION, we must update the entry REVERSE-CHARSET of both
292 charsets. If there's no such charset, the value of the entry
293 is set to nil. */
294 int i;
295
296 for (i = 0; i < MAX_CHARSET; i++)
297 if (!NILP (CHARSET_TABLE_ENTRY (i)))
298 {
299 if (CHARSET_DIMENSION (i) == XINT (dimension)
300 && CHARSET_CHARS (i) == XINT (chars)
301 && CHARSET_ISO_FINAL_CHAR (i) == XINT (iso_final_char)
302 && CHARSET_DIRECTION (i) != XINT (direction))
303 {
304 CHARSET_TABLE_INFO (charset, CHARSET_REVERSE_CHARSET_IDX)
305 = make_number (i);
306 CHARSET_TABLE_INFO (i, CHARSET_REVERSE_CHARSET_IDX) = charset_id;
307 break;
308 }
309 }
310 if (i >= MAX_CHARSET)
311 /* No such a charset. */
312 CHARSET_TABLE_INFO (charset, CHARSET_REVERSE_CHARSET_IDX)
313 = make_number (-1);
314 }
315
316 if (charset != CHARSET_ASCII
317 && charset < MIN_CHARSET_PRIVATE_DIMENSION1)
318 {
319 /* Update tables bytes_by_char_head and width_by_char_head. */
320 bytes_by_char_head[leading_code_base] = bytes;
321 width_by_char_head[leading_code_base] = XINT (width);
322
323 /* Update table emacs_code_class. */
324 emacs_code_class[charset] = (bytes == 2
325 ? EMACS_leading_code_2
326 : (bytes == 3
327 ? EMACS_leading_code_3
328 : EMACS_leading_code_4));
329 }
330
331 /* Update table iso_charset_table. */
332 if (ISO_CHARSET_TABLE (dimension, chars, iso_final_char) < 0)
333 ISO_CHARSET_TABLE (dimension, chars, iso_final_char) = charset;
334}
335
336#ifdef emacs
337
338/* Return charset id of CHARSET_SYMBOL, or return -1 if CHARSET_SYMBOL
339 is invalid. */
340int
341get_charset_id (charset_symbol)
342 Lisp_Object charset_symbol;
343{
344 Lisp_Object val;
345 int charset;
346
347 return ((SYMBOLP (charset_symbol)
348 && (val = Fget (charset_symbol, Qcharset), VECTORP (val))
349 && (charset = XINT (XVECTOR (val)->contents[CHARSET_ID_IDX]),
350 CHARSET_VALID_P (charset)))
351 ? charset : -1);
352}
353
354/* Return an identification number for a new private charset of
355 DIMENSION and WIDTH. If there's no more room for the new charset,
356 return 0. */
357Lisp_Object
358get_new_private_charset_id (dimension, width)
359 int dimension, width;
360{
361 int charset, from, to;
362
363 if (dimension == 1)
364 {
365 if (width == 1)
366 from = LEADING_CODE_EXT_11, to = LEADING_CODE_EXT_12;
367 else
368 from = LEADING_CODE_EXT_12, to = LEADING_CODE_EXT_21;
369 }
370 else
371 {
372 if (width == 1)
373 from = LEADING_CODE_EXT_21, to = LEADING_CODE_EXT_22;
374 else
375 from = LEADING_CODE_EXT_22, to = LEADING_CODE_EXT_MAX - 1;
376 }
377
378 for (charset = from; charset < to; charset++)
379 if (!CHARSET_DEFINED_P (charset)) break;
380
381 return make_number (charset < to ? charset : 0);
382}
383
384DEFUN ("define-charset", Fdefine_charset, Sdefine_charset, 3, 3, 0,
385 "Define CHARSET-ID as the identification number of CHARSET with INFO-VECTOR.\n\
386If CHARSET-ID is nil, it is set automatically, which means CHARSET is\n\
387 treated as a private charset.\n\
388INFO-VECTOR is a vector of the format:\n\
389 [DIMENSION CHARS WIDTH DIRECTION ISO-FINAL-CHAR ISO-GRAPHIC-PLANE\n\
390 SHORT-NAME LONG-NAME DESCRIPTION]\n\
391The meanings of each elements is as follows:\n\
392DIMENSION (integer) is the number of bytes to represent a character: 1 or 2.\n\
393CHARS (integer) is the number of characters in a dimension: 94 or 96.\n\
394WIDTH (integer) is the number of columns a character in the charset\n\
395occupies on the screen: one of 0, 1, and 2.\n\
396\n\
397DIRECTION (integer) is the rendering direction of characters in the\n\
398charset when rendering. If 0, render from right to left, else\n\
399render from left to right.\n\
400\n\
401ISO-FINAL-CHAR (character) is the final character of the\n\
402corresponding ISO 2022 charset.\n\
403\n\
404ISO-GRAPHIC-PLANE (integer) is the graphic plane to be invoked\n\
405while encoding to variants of ISO 2022 coding system, one of the\n\
406following: 0/graphic-plane-left(GL), 1/graphic-plane-right(GR).\n\
407\n\
408SHORT-NAME (string) is the short name to refer to the charset.\n\
409\n\
410LONG-NAME (string) is the long name to refer to the charset.\n\
411\n\
412DESCRIPTION (string) is the description string of the charset.")
413 (charset_id, charset_symbol, info_vector)
414 Lisp_Object charset_id, charset_symbol, info_vector;
415{
416 Lisp_Object *vec;
417
418 if (!NILP (charset_id))
419 CHECK_NUMBER (charset_id, 0);
420 CHECK_SYMBOL (charset_symbol, 1);
421 CHECK_VECTOR (info_vector, 2);
422
423 if (! NILP (charset_id))
424 {
425 if (! CHARSET_VALID_P (XINT (charset_id)))
426 error ("Invalid CHARSET: %d", XINT (charset_id));
427 else if (CHARSET_DEFINED_P (XINT (charset_id)))
428 error ("Already defined charset: %d", XINT (charset_id));
429 }
430
431 vec = XVECTOR (info_vector)->contents;
432 if (XVECTOR (info_vector)->size != 9
433 || !INTEGERP (vec[0]) || !(XINT (vec[0]) == 1 || XINT (vec[0]) == 2)
434 || !INTEGERP (vec[1]) || !(XINT (vec[1]) == 94 || XINT (vec[1]) == 96)
435 || !INTEGERP (vec[2]) || !(XINT (vec[2]) == 1 || XINT (vec[2]) == 2)
436 || !INTEGERP (vec[3]) || !(XINT (vec[3]) == 0 || XINT (vec[3]) == 1)
437 || !INTEGERP (vec[4]) || !(XINT (vec[4]) >= '0' && XINT (vec[4]) <= '~')
438 || !INTEGERP (vec[5]) || !(XINT (vec[5]) == 0 || XINT (vec[5]) == 1)
439 || !STRINGP (vec[6])
440 || !STRINGP (vec[7])
441 || !STRINGP (vec[8]))
442 error ("Invalid info-vector argument for defining charset %s",
443 XSYMBOL (charset_symbol)->name->data);
444
445 if (NILP (charset_id))
446 {
447 charset_id = get_new_private_charset_id (XINT (vec[0]), XINT (vec[2]));
448 if (XINT (charset_id) == 0)
449 error ("There's no room for a new private charset %s",
450 XSYMBOL (charset_symbol)->name->data);
451 }
452
453 update_charset_table (charset_id, vec[0], vec[1], vec[2], vec[3],
454 vec[4], vec[5], vec[6], vec[7], vec[8]);
455 Fput (charset_symbol, Qcharset, Faref (Vcharset_table, charset_id));
456 CHARSET_SYMBOL (XINT (charset_id)) = charset_symbol;
457 Vcharset_list = Fcons (charset_symbol, Vcharset_list);
458 return Qnil;
459}
460
461DEFUN ("declare-equiv-charset", Fdeclare_equiv_charset, Sdeclare_equiv_charset,
462 4, 4, 0,
463 "Declare a charset of DIMENSION, CHARS, FINAL-CHAR is the same as CHARSET.\n\
464CHARSET should be defined by `defined-charset' in advance.")
465 (dimension, chars, final_char, charset_symbol)
466 Lisp_Object dimension, chars, final_char, charset_symbol;
467{
468 int charset;
469
470 CHECK_NUMBER (dimension, 0);
471 CHECK_NUMBER (chars, 1);
472 CHECK_NUMBER (final_char, 2);
473 CHECK_SYMBOL (charset_symbol, 3);
474
475 if (XINT (dimension) != 1 && XINT (dimension) != 2)
476 error ("Invalid DIMENSION %d, it should be 1 or 2", XINT (dimension));
477 if (XINT (chars) != 94 && XINT (chars) != 96)
478 error ("Invalid CHARS %d, it should be 94 or 96", XINT (chars));
479 if (XINT (final_char) < '0' || XFASTINT (final_char) > '~')
480 error ("Invalid FINAL-CHAR %c, it should be `0'..`~'", XINT (chars));
481 if ((charset = get_charset_id (charset_symbol)) < 0)
482 error ("Invalid charset %s", XSYMBOL (charset_symbol)->name->data);
483
484 ISO_CHARSET_TABLE (dimension, chars, final_char) = charset;
485 return Qnil;
486}
487
488/* Return number of different charsets in STR of length LEN. In
489 addition, for each found charset N, CHARSETS[N] is set 1. The
490 caller should allocate CHARSETS (MAX_CHARSET bytes) in advance. */
491
492int
493find_charset_in_str (str, len, charsets)
494 unsigned char *str, *charsets;
495 int len;
496{
497 int num = 0;
498
499 while (len > 0)
500 {
501 int bytes = BYTES_BY_CHAR_HEAD (*str);
502 int charset = CHARSET_AT (str);
503
504 if (!charsets[charset])
505 {
506 charsets[charset] = 1;
507 num += 1;
508 }
509 str += bytes;
510 len -= bytes;
511 }
512 return num;
513}
514
515DEFUN ("find-charset-region", Ffind_charset_region, Sfind_charset_region,
516 2, 2, 0,
517 "Return a list of charsets in the region between BEG and END.\n\
518BEG and END are buffer positions.")
519 (beg, end)
520 Lisp_Object beg, end;
521{
522 char charsets[MAX_CHARSET];
523 int from, to, stop, i;
524 Lisp_Object val;
525
526 validate_region (&beg, &end);
527 from = XFASTINT (beg);
528 stop = to = XFASTINT (end);
529 if (from < GPT && GPT < to)
530 stop = GPT;
531 bzero (charsets, MAX_CHARSET);
532 while (1)
533 {
534 find_charset_in_str (POS_ADDR (from), stop - from, charsets);
535 if (stop < to)
536 from = stop, stop = to;
537 else
538 break;
539 }
540 val = Qnil;
541 for (i = MAX_CHARSET - 1; i >= 0; i--)
542 if (charsets[i])
543 val = Fcons (CHARSET_SYMBOL (i), val);
544 return val;
545}
546
547DEFUN ("find-charset-string", Ffind_charset_string, Sfind_charset_string,
548 1, 1, 0,
549 "Return a list of charsets in STR.")
550 (str)
551 Lisp_Object str;
552{
553 char charsets[MAX_CHARSET];
554 int i;
555 Lisp_Object val;
556
557 CHECK_STRING (str, 0);
558 bzero (charsets, MAX_CHARSET);
559 find_charset_in_str (XSTRING (str)->data, XSTRING (str)->size, charsets);
560 val = Qnil;
561 for (i = MAX_CHARSET - 1; i >= 0; i--)
562 if (charsets[i])
563 val = Fcons (CHARSET_SYMBOL (i), val);
564 return val;
565}
566\f
567DEFUN ("make-char-internal", Fmake_char_internal, Smake_char_internal, 1, 3, 0,
568 "Return a character of CHARSET and position-codes CODE1 and CODE2.\n\
569CODE1 and CODE2 are optional, but if you don't supply\n\
570 sufficient position-codes, return a generic character which stands for\n\
571all characters or group of characters in the character sets.\n\
572A generic character can be an argument of `modify-syntax-entry' and\n\
573`modify-category-entry'.")
574 (charset, code1, code2)
575 Lisp_Object charset, code1, code2;
576{
577 CHECK_NUMBER (charset, 0);
578
579 if (NILP (code1))
580 XSETFASTINT (code1, 0);
581 else
582 CHECK_NUMBER (code1, 1);
583 if (NILP (code2))
584 XSETFASTINT (code2, 0);
585 else
586 CHECK_NUMBER (code2, 2);
587
588 if (!CHARSET_DEFINED_P (XINT (charset)))
589 error ("Invalid charset: %d", XINT (charset));
590
591 return make_number (MAKE_CHAR (XINT (charset), XINT (code1), XINT (code2)));
592}
593
594DEFUN ("split-char", Fsplit_char, Ssplit_char, 1, 1, 0,
595 "Return list of charset and one or two position-codes of CHAR.")
596 (ch)
597 Lisp_Object ch;
598{
599 Lisp_Object val;
600 int charset;
601 unsigned char c1, c2;
602
603 CHECK_NUMBER (ch, 0);
604 SPLIT_CHAR (XFASTINT (ch), charset, c1, c2);
605 return ((charset == CHARSET_COMPOSITION || CHARSET_DIMENSION (charset) == 2)
606 ? Fcons (CHARSET_SYMBOL (charset),
607 Fcons (make_number (c1), Fcons (make_number (c2), Qnil)))
608 : Fcons (CHARSET_SYMBOL (charset), Fcons (make_number (c1), Qnil)));
609}
610
611DEFUN ("char-charset", Fchar_charset, Schar_charset, 1, 1, 0,
612 "Return charset of CHAR.")
613 (ch)
614 Lisp_Object ch;
615{
616 CHECK_NUMBER (ch, 0);
617
618 return CHARSET_SYMBOL (CHAR_CHARSET (XINT (ch)));
619}
620
621DEFUN ("iso-charset", Fiso_charset, Siso_charset, 3, 3, 0,
622 "Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR.")
623 (dimension, chars, final_char)
624 Lisp_Object dimension, chars, final_char;
625{
626 int charset;
627
628 CHECK_NUMBER (dimension, 0);
629 CHECK_NUMBER (chars, 1);
630 CHECK_NUMBER (final_char, 2);
631
632 if ((charset = ISO_CHARSET_TABLE (dimension, chars, final_char)) < 0)
633 return Qnil;
634 return CHARSET_SYMBOL (charset);
635}
636
637DEFUN ("char-bytes", Fchar_bytes, Schar_bytes, 1, 1, 0,
638 "Return byte length of multi-byte form of CHAR.")
639 (ch)
640 Lisp_Object ch;
641{
642 Lisp_Object val;
643 int bytes;
644
645 CHECK_NUMBER (ch, 0);
646 if (COMPOSITE_CHAR_P (XFASTINT (ch)))
647 {
648 unsigned int id = COMPOSITE_CHAR_ID (XFASTINT (ch));
649
650 bytes = (id < n_cmpchars ? cmpchar_table[id]->len : 1);
651 }
652 else
653 {
654 int charset = CHAR_CHARSET (XFASTINT (ch));
655
656 bytes = CHARSET_DEFINED_P (charset) ? CHARSET_BYTES (charset) : 1;
657 }
658
659 XSETFASTINT (val, bytes);
660 return val;
661}
662
663/* Return the width of character of which multi-byte form starts with
664 C. The width is measured by how many columns occupied on the
665 screen when displayed in the current buffer. */
666
667#define ONE_BYTE_CHAR_WIDTH(c) \
668 (c < 0x20 \
669 ? (c == '\t' \
670 ? current_buffer->tab_width \
671 : (c == '\n' ? 0 : (NILP (current_buffer->ctl_arrow) ? 4 : 2))) \
672 : (c < 0x7f \
673 ? 1 \
674 : (c == 0x7F \
675 ? (NILP (current_buffer->ctl_arrow) ? 4 : 2) \
676 : ((! NILP (current_buffer->enable_multibyte_characters) \
677 && BASE_LEADING_CODE_P (c)) \
678 ? WIDTH_BY_CHAR_HEAD (c) \
679 : 4)))) \
680
681
682DEFUN ("char-width", Fchar_width, Schar_width, 1, 1, 0,
683 "Return width of CHAR when displayed in the current buffer.\n\
684The width is measured by how many columns it occupies on the screen.")
685 (ch)
686 Lisp_Object ch;
687{
688 Lisp_Object val;
689 int c;
690
691 CHECK_NUMBER (ch, 0);
692
693 c = XFASTINT (ch);
694 if (SINGLE_BYTE_CHAR_P (c))
695 XSETFASTINT (val, ONE_BYTE_CHAR_WIDTH (c));
696 else if (COMPOSITE_CHAR_P (c))
697 {
698 int id = COMPOSITE_CHAR_ID (XFASTINT (ch));
699 XSETFASTINT (val, (id < n_cmpchars ? cmpchar_table[id]->width : 0));
700 }
701 else
702 {
703 int charset = CHAR_CHARSET (c);
704
705 XSETFASTINT (val, CHARSET_WIDTH (charset));
706 }
707 return val;
708}
709
710/* Return width of string STR of length LEN when displayed in the
711 current buffer. The width is measured by how many columns it
712 occupies on the screen. */
713int
714strwidth (str, len)
715 unsigned char *str;
716 int len;
717{
718 unsigned char *endp = str + len;
719 int width = 0;
720
721 while (str < endp) {
722 if (*str == LEADING_CODE_COMPOSITION)
723 {
724 int id = str_cmpchar_id (str, endp - str);
725
726 if (id < 0)
727 {
728 width += 4;
729 str++;
730 }
731 else
732 {
733 width += cmpchar_table[id]->width;
734 str += cmpchar_table[id]->len;
735 }
736 }
737 else
738 {
739 width += ONE_BYTE_CHAR_WIDTH (*str);
740 str += BYTES_BY_CHAR_HEAD (*str);
741 }
742 }
743 return width;
744}
745
746DEFUN ("string-width", Fstring_width, Sstring_width, 1, 1, 0,
747 "Return width of STRING when displayed in the current buffer.\n\
748Width is measured by how many columns it occupies on the screen.\n\
749When calculating width of a multi-byte character in STRING,\n\
750 only the base leading-code is considered and the validity of\n\
751 the following bytes are not checked.")
752 (str)
753 Lisp_Object str;
754{
755 Lisp_Object val;
756
757 CHECK_STRING (str, 0);
758 XSETFASTINT (val, strwidth (XSTRING (str)->data, XSTRING (str)->size));
759 return val;
760}
761
762DEFUN ("char-direction", Fchar_direction, Schar_direction, 1, 1, 0,
763 "Return the direction of CHAR.\n\
764The returned value is 0 for left-to-right and 1 for right-to-left.")
765 (ch)
766 Lisp_Object ch;
767{
768 int charset;
769
770 CHECK_NUMBER (ch, 0);
771 charset = CHAR_CHARSET (XFASTINT (ch));
772 if (!CHARSET_DEFINED_P (charset))
773 error ("Invalid character: %d", XINT (ch));
774 return CHARSET_TABLE_INFO (charset, CHARSET_DIRECTION_IDX);
775}
776
777DEFUN ("chars-in-string", Fchars_in_string, Schars_in_string, 1, 1, 0,
778 "Return number of characters in STRING.")
779 (str)
780 Lisp_Object str;
781{
782 Lisp_Object val;
783 unsigned char *p, *endp;
784 int chars;
785
786 CHECK_STRING (str, 0);
787
788 p = XSTRING (str)->data; endp = p + XSTRING (str)->size;
789 chars = 0;
790 while (p < endp)
791 {
792 if (*p == LEADING_CODE_COMPOSITION)
793 {
794 p++;
795 while (p < endp && ! CHAR_HEAD_P (p)) p++;
796 }
797 else
798 p += BYTES_BY_CHAR_HEAD (*p);
799 chars++;
800 }
801
802 XSETFASTINT (val, chars);
803 return val;
804}
805
806DEFUN ("char-boundary-p", Fchar_boundary_p, Schar_boundary_p, 1, 1, 0,
807 "Return non-nil value if POS is at character boundary of multibyte form.\n\
808The return value is:\n\
809 0 if POS is at an ASCII character or at the end of range,\n\
810 1 if POS is at a head of 2-byte length multi-byte form,\n\
811 2 if POS is at a head of 3-byte length multi-byte form,\n\
812 3 if POS is at a head of 4-byte length multi-byte form,\n\
813 4 if POS is at a head of multi-byte form of a composite character.\n\
814If POS is out of range or not at character boundary, return NIL.")
815 (pos)
816 Lisp_Object pos;
817{
818 Lisp_Object val;
819 int n;
820
821 CHECK_NUMBER_COERCE_MARKER (pos, 0);
822
823 n = XINT (pos);
824 if (n < BEGV || n > ZV)
825 return Qnil;
826
827 if (n == ZV || NILP (current_buffer->enable_multibyte_characters))
828 XSETFASTINT (val, 0);
829 else
830 {
831 unsigned char *p = POS_ADDR (n);
832
833 if (SINGLE_BYTE_CHAR_P (*p))
834 XSETFASTINT (val, 0);
835 else if (*p == LEADING_CODE_COMPOSITION)
836 XSETFASTINT (val, 4);
837 else if (BYTES_BY_CHAR_HEAD (*p) > 1)
838 XSETFASTINT (val, BYTES_BY_CHAR_HEAD (*p) - 1);
839 else
840 val = Qnil;
841 }
842 return val;
843}
844
845DEFUN ("concat-chars", Fconcat_chars, Sconcat_chars, 1, MANY, 0,
846 "Concatenate all the argument characters and make the result a string.")
847 (nargs, args)
848 int nargs;
849 Lisp_Object *args;
850{
851 int i, n = XINT (nargs);
852 unsigned char *buf
853 = (unsigned char *) malloc (MAX_LENGTH_OF_MULTI_BYTE_FORM * n);
854 unsigned char *p = buf;
855 Lisp_Object val;
856
857 for (i = 0; i < n; i++)
858 {
859 int c, len;
860 unsigned char *str;
861
862 if (!INTEGERP (args[i]))
863 {
864 free (buf);
865 CHECK_NUMBER (args[i], 0);
866 }
867 c = XINT (args[i]);
868 len = CHAR_STRING (c, p, str);
869 if (p != str)
870 /* C is a composite character. */
871 bcopy (str, p, len);
872 p += len;
873 }
874
875 val = make_string (buf, p - buf);
876 free (buf);
877 return val;
878}
879
880#endif /* emacs */
881\f
882/*** Composite characters staffs ***/
883
884/* Each composite character is identified by CMPCHAR-ID which is
885 assigned when Emacs needs the character code of the composite
886 character (e.g. when displaying it on the screen). See the
887 document "GENERAL NOTE on COMPOSITE CHARACTER" in `charset.h' how a
888 composite character is represented in Emacs. */
889
890/* If `static' is defined, it means that it is defined to null string. */
891#ifndef static
892/* The following function is copied from lread.c. */
893static int
894hash_string (ptr, len)
895 unsigned char *ptr;
896 int len;
897{
898 register unsigned char *p = ptr;
899 register unsigned char *end = p + len;
900 register unsigned char c;
901 register int hash = 0;
902
903 while (p != end)
904 {
905 c = *p++;
906 if (c >= 0140) c -= 40;
907 hash = ((hash<<3) + (hash>>28) + c);
908 }
909 return hash & 07777777777;
910}
911#endif
912
913/* Table of pointers to the structure `cmpchar_info' indexed by
914 CMPCHAR-ID. */
915struct cmpchar_info **cmpchar_table;
916/* The current size of `cmpchar_table'. */
917static int cmpchar_table_size;
918/* Number of the current composite characters. */
919int n_cmpchars;
920
921#define CMPCHAR_HASH_TABLE_SIZE 0xFFF
922
923static int *cmpchar_hash_table[CMPCHAR_HASH_TABLE_SIZE];
924
925/* Each element of `cmpchar_hash_table' is a pointer to an array of
926 integer, where the 1st element is the size of the array, the 2nd
927 element is how many elements are actually used in the array, and
928 the remaining elements are CMPCHAR-IDs of composite characters of
929 the same hash value. */
930#define CMPCHAR_HASH_SIZE(table) table[0]
931#define CMPCHAR_HASH_USED(table) table[1]
932#define CMPCHAR_HASH_CMPCHAR_ID(table, i) table[i]
933
934/* Return CMPCHAR-ID of the composite character in STR of the length
935 LEN. If the composite character has not yet been registered,
936 register it in `cmpchar_table' and assign new CMPCHAR-ID. This
937 is the sole function for assigning CMPCHAR-ID. */
938int
939str_cmpchar_id (str, len)
940 unsigned char *str;
941 int len;
942{
943 int hash_idx, *hashp;
944 unsigned char *buf;
945 int embedded_rule; /* 1 if composition rule is embedded. */
946 int chars; /* number of components. */
947 int i;
948 struct cmpchar_info *cmpcharp;
949
950 if (len < 5)
951 /* Any composite char have at least 3-byte length. */
952 return -1;
953
954 /* The second byte 0xFF means compostion rule is embedded. */
955 embedded_rule = (str[1] == 0xFF);
956
957 /* At first, get the actual length of the composite character. */
958 {
959 unsigned char *p, *endp = str + 1, *lastp = str + len;
960 int bytes;
961
962 while (endp < lastp && ! CHAR_HEAD_P (endp)) endp++;
963 chars = 0;
964 p = str + 1 + embedded_rule;
965 while (p < endp)
966 {
967 /* No need of checking if *P is 0xA0 because
968 BYTES_BY_CHAR_HEAD (0x80) surely returns 2. */
969 p += (bytes = BYTES_BY_CHAR_HEAD (*p - 0x20) + embedded_rule);
970 chars++;
971 }
972 len = (p -= embedded_rule) - str;
973 if (p > endp)
974 len -= - bytes, chars--;
975
976 if (chars < 2 || chars > MAX_COMPONENT_COUNT)
977 /* Invalid number of components. */
978 return -1;
979 }
980 hash_idx = hash_string (str, len) % CMPCHAR_HASH_TABLE_SIZE;
981 hashp = cmpchar_hash_table[hash_idx];
982
983 /* Then, look into the hash table. */
984 if (hashp != NULL)
985 /* Find the correct one among composite characters of the same
986 hash value. */
987 for (i = 2; i < CMPCHAR_HASH_USED (hashp); i++)
988 {
989 cmpcharp = cmpchar_table[CMPCHAR_HASH_CMPCHAR_ID (hashp, i)];
990 if (len == cmpcharp->len
991 && ! bcmp (str, cmpcharp->data, len))
992 return CMPCHAR_HASH_CMPCHAR_ID (hashp, i);
993 }
994
995 /* We have to register the composite character in cmpchar_table. */
996 /* Make the entry in hash table. */
997 if (hashp == NULL)
998 {
999 /* Make a table for 8 composite characters initially. */
1000 hashp = (cmpchar_hash_table[hash_idx]
1001 = (int *) xmalloc (sizeof (int) * (2 + 8)));
1002 CMPCHAR_HASH_SIZE (hashp) = 10;
1003 CMPCHAR_HASH_USED (hashp) = 2;
1004 }
1005 else if (CMPCHAR_HASH_USED (hashp) >= CMPCHAR_HASH_SIZE (hashp))
1006 {
1007 CMPCHAR_HASH_SIZE (hashp) += 8;
1008 hashp = (cmpchar_hash_table[hash_idx]
1009 = (int *) xrealloc (hashp,
1010 sizeof (int) * CMPCHAR_HASH_SIZE (hashp)));
1011 }
1012 CMPCHAR_HASH_CMPCHAR_ID (hashp, CMPCHAR_HASH_USED (hashp)) = n_cmpchars;
1013 CMPCHAR_HASH_USED (hashp)++;
1014
1015 /* Set information of the composite character in cmpchar_table. */
1016 if (cmpchar_table_size == 0)
1017 {
1018 /* This is the first composite character to be registered. */
1019 cmpchar_table_size = 256;
1020 cmpchar_table
1021 = (struct cmpchar_info **) xmalloc (sizeof (cmpchar_table[0])
1022 * cmpchar_table_size);
1023 }
1024 else if (cmpchar_table_size <= n_cmpchars)
1025 {
1026 cmpchar_table_size += 256;
1027 cmpchar_table
1028 = (struct cmpchar_info **) xrealloc (cmpchar_table,
1029 sizeof (cmpchar_table[0])
1030 * cmpchar_table_size);
1031 }
1032
1033 cmpcharp = (struct cmpchar_info *) xmalloc (sizeof (struct cmpchar_info));
1034
1035 cmpcharp->len = len;
1036 cmpcharp->data = (unsigned char *) xmalloc (len + 1);
1037 bcopy (str, cmpcharp->data, len);
1038 cmpcharp->data[len] = 0;
1039 cmpcharp->glyph_len = chars;
1040 cmpcharp->glyph = (GLYPH *) xmalloc (sizeof (GLYPH) * chars);
1041 if (embedded_rule)
1042 {
1043 cmpcharp->cmp_rule = (unsigned char *) xmalloc (chars);
1044 cmpcharp->col_offset = (float *) xmalloc (sizeof (float) * chars);
1045 }
1046 else
1047 {
1048 cmpcharp->cmp_rule = NULL;
1049 cmpcharp->col_offset = NULL;
1050 }
1051
1052 /* Setup GLYPH data and composition rules (if any) so as not to make
1053 them every time on displaying. */
1054 {
1055 unsigned char *bufp;
1056 int width;
1057 float leftmost = 0.0, rightmost = 1.0;
1058
1059 if (embedded_rule)
1060 /* At first, col_offset[N] is set to relative to col_offset[0]. */
1061 cmpcharp->col_offset[0] = 0;
1062
1063 for (i = 0, bufp = cmpcharp->data + 1; i < chars; i++)
1064 {
1065 if (embedded_rule)
1066 cmpcharp->cmp_rule[i] = *bufp++;
1067
1068 if (*bufp == 0xA0) /* This is an ASCII character. */
1069 {
1070 cmpcharp->glyph[i] = FAST_MAKE_GLYPH ((*++bufp & 0x7F), 0);
1071 width = 1;
1072 bufp++;
1073 }
1074 else /* Multibyte character. */
1075 {
1076 /* Make `bufp' point normal multi-byte form temporally. */
1077 *bufp -= 0x20;
1078 cmpcharp->glyph[i]
1079 = FAST_MAKE_GLYPH (string_to_non_ascii_char (bufp, 4, 0), 0);
1080 width = WIDTH_BY_CHAR_HEAD (*bufp);
1081 *bufp += 0x20;
1082 bufp += BYTES_BY_CHAR_HEAD (*bufp - 0x20);
1083 }
1084
1085 if (embedded_rule && i > 0)
1086 {
1087 /* Reference points (global_ref and new_ref) are
1088 encoded as below:
1089
1090 0--1--2 -- ascent
1091 | |
1092 | |
1093 | 4 -+--- center
1094 -- 3 5 -- baseline
1095 | |
1096 6--7--8 -- descent
1097
1098 Now, we calculate the column offset of the new glyph
1099 from the left edge of the first glyph. This can avoid
1100 the same calculation everytime displaying this
1101 composite character. */
1102
1103 /* Reference points of global glyph and new glyph. */
1104 int global_ref = (cmpcharp->cmp_rule[i] - 0xA0) / 9;
1105 int new_ref = (cmpcharp->cmp_rule[i] - 0xA0) % 9;
1106 /* Column offset relative to the first glyph. */
1107 float left = (leftmost
1108 + (global_ref % 3) * (rightmost - leftmost) / 2.0
1109 - (new_ref % 3) * width / 2.0);
1110
1111 cmpcharp->col_offset[i] = left;
1112 if (left < leftmost)
1113 leftmost = left;
1114 if (left + width > rightmost)
1115 rightmost = left + width;
1116 }
1117 else
1118 {
1119 if (width > rightmost)
1120 rightmost = width;
1121 }
1122 }
1123 if (embedded_rule)
1124 {
1125 /* Now col_offset[N] are relative to the left edge of the
1126 first component. Make them relative to the left edge of
1127 overall glyph. */
1128 for (i = 0; i < chars; i++)
1129 cmpcharp->col_offset[i] -= leftmost;
1130 /* Make rightmost holds width of overall glyph. */
1131 rightmost -= leftmost;
1132 }
1133
1134 cmpcharp->width = rightmost;
1135 if (cmpcharp->width < rightmost)
1136 /* To get a ceiling integer value. */
1137 cmpcharp->width++;
1138 }
1139
1140 cmpchar_table[n_cmpchars] = cmpcharp;
1141
1142 return n_cmpchars++;
1143}
1144
1145/* Return the Nth element of the composite character C. */
1146int
1147cmpchar_component (c, n)
1148 unsigned int c, n;
1149{
1150 int id = COMPOSITE_CHAR_ID (c);
1151
1152 if (id >= n_cmpchars /* C is not a valid composite character. */
1153 || n >= cmpchar_table[id]->glyph_len) /* No such component. */
1154 return -1;
1155 /* No face data is stored in glyph code. */
1156 return ((int) (cmpchar_table[id]->glyph[n]));
1157}
1158
1159DEFUN ("cmpcharp", Fcmpcharp, Scmpcharp, 1, 1, 0,
1160 "T if CHAR is a composite character.")
1161 (ch)
1162 Lisp_Object ch;
1163{
1164 CHECK_NUMBER (ch, 0);
1165 return (COMPOSITE_CHAR_P (XINT (ch)) ? Qt : Qnil);
1166}
1167
1168DEFUN ("composite-char-component", Fcmpchar_component, Scmpchar_component,
1169 2, 2, 0,
1170 "Return the IDXth component character of composite character CHARACTER.")
1171 (character, idx)
1172 Lisp_Object character, idx;
1173{
1174 int c;
1175
1176 CHECK_NUMBER (character, 0);
1177 CHECK_NUMBER (idx, 1);
1178
1179 if ((c = cmpchar_component (XINT (character), XINT (idx))) < 0)
1180 args_out_of_range (character, idx);
1181
1182 return make_number (c);
1183}
1184
1185DEFUN ("composite-char-composition-rule", Fcmpchar_cmp_rule, Scmpchar_cmp_rule,
1186 2, 2, 0,
55001746
KH
1187 "Return the Nth composition rule embedded in composite character CHARACTER.\n\
1188The returned rule is for composing the Nth component\n\
1189on the (N-1)th component. If N is 0, the returned value is always 255.")
1190 (character, n)
1191 Lisp_Object character, n;
4ed46869
KH
1192{
1193 int id, i;
1194
1195 CHECK_NUMBER (character, 0);
55001746 1196 CHECK_NUMBER (n, 1);
4ed46869
KH
1197
1198 id = COMPOSITE_CHAR_ID (XINT (character));
1199 if (id < 0 || id >= n_cmpchars)
1200 error ("Invalid composite character: %d", XINT (character));
55001746 1201 i = XINT (n);
4ed46869 1202 if (i > cmpchar_table[id]->glyph_len)
55001746 1203 args_out_of_range (character, n);
4ed46869
KH
1204
1205 return make_number (cmpchar_table[id]->cmp_rule[i]);
1206}
1207
1208DEFUN ("composite-char-composition-rule-p", Fcmpchar_cmp_rule_p,
1209 Scmpchar_cmp_rule_p, 1, 1, 0,
1210 "Return non-nil if composite character CHARACTER contains a embedded rule.")
1211 (character)
1212 Lisp_Object character;
1213{
1214 int id;
1215
1216 CHECK_NUMBER (character, 0);
1217 id = COMPOSITE_CHAR_ID (XINT (character));
1218 if (id < 0 || id >= n_cmpchars)
1219 error ("Invalid composite character: %d", XINT (character));
1220
1221 return (cmpchar_table[id]->cmp_rule ? Qt : Qnil);
1222}
1223
1224DEFUN ("composite-char-component-count", Fcmpchar_cmp_count,
1225 Scmpchar_cmp_count, 1, 1, 0,
1226 "Return number of compoents of composite character CHARACTER.")
1227 (character)
1228 Lisp_Object character;
1229{
1230 int id;
1231
1232 CHECK_NUMBER (character, 0);
1233 id = COMPOSITE_CHAR_ID (XINT (character));
1234 if (id < 0 || id >= n_cmpchars)
1235 error ("Invalid composite character: %d", XINT (character));
1236
1237 return (make_number (cmpchar_table[id]->glyph_len));
1238}
1239
1240DEFUN ("compose-string", Fcompose_string, Scompose_string,
1241 1, 1, 0,
1242 "Return one char string composed from all characters in STRING.")
1243 (str)
1244 Lisp_Object str;
1245{
1246 unsigned char buf[MAX_LENGTH_OF_MULTI_BYTE_FORM], *p, *pend, *ptemp;
1247 int len, i;
1248
1249 CHECK_STRING (str, 0);
1250
1251 buf[0] = LEADING_CODE_COMPOSITION;
1252 p = XSTRING (str)->data;
1253 pend = p + XSTRING (str)->size;
1254 i = 1;
1255 while (p < pend)
1256 {
1257 if (*p < 0x20 || *p == 127) /* control code */
1258 error ("Invalid component character: %d", *p);
1259 else if (*p < 0x80) /* ASCII */
1260 {
1261 if (i + 2 >= MAX_LENGTH_OF_MULTI_BYTE_FORM)
1262 error ("Too long string to be composed: %s", XSTRING (str)->data);
1263 /* Prepend an ASCII charset indicator 0xA0, set MSB of the
1264 code itself. */
1265 buf[i++] = 0xA0;
1266 buf[i++] = *p++ + 0x80;
1267 }
1268 else if (*p == LEADING_CODE_COMPOSITION) /* composite char */
1269 {
1270 /* Already composed. Eliminate the heading
1271 LEADING_CODE_COMPOSITION, keep the remaining bytes
1272 unchanged. */
1273 p++;
1274 ptemp = p;
1275 while (! CHAR_HEAD_P (p)) p++;
1276 if (i + (p - ptemp) >= MAX_LENGTH_OF_MULTI_BYTE_FORM)
1277 error ("Too long string to be composed: %s", XSTRING (str)->data);
1278 bcopy (ptemp, buf + i, p - ptemp);
1279 i += p - ptemp;
1280 }
1281 else /* multibyte char */
1282 {
1283 /* Add 0x20 to the base leading-code, keep the remaining
1284 bytes unchanged. */
1285 len = BYTES_BY_CHAR_HEAD (*p);
1286 if (i + len >= MAX_LENGTH_OF_MULTI_BYTE_FORM)
1287 error ("Too long string to be composed: %s", XSTRING (str)->data);
1288 bcopy (p, buf + i, len);
1289 buf[i] += 0x20;
1290 p += len, i += len;
1291 }
1292 }
1293
1294 if (i < 5)
1295 /* STR contains only one character, which can't be composed. */
1296 error ("Too short string to be composed: %s", XSTRING (str)->data);
1297
1298 return make_string (buf, i);
1299}
1300
1301\f
1302charset_id_internal (charset_name)
1303 char *charset_name;
1304{
1305 Lisp_Object val = Fget (intern (charset_name), Qcharset);
1306
1307 if (!VECTORP (val))
1308 error ("Charset %s is not defined", charset_name);
1309
1310 return (XINT (XVECTOR (val)->contents[0]));
1311}
1312
1313DEFUN ("setup-special-charsets", Fsetup_special_charsets,
1314 Ssetup_special_charsets, 0, 0, 0, "Internal use only.")
1315 ()
1316{
1317 charset_latin_iso8859_1 = charset_id_internal ("latin-iso8859-1");
1318 charset_jisx0208_1978 = charset_id_internal ("japanese-jisx0208-1978");
1319 charset_jisx0208 = charset_id_internal ("japanese-jisx0208");
1320 charset_katakana_jisx0201 = charset_id_internal ("katakana-jisx0201");
1321 charset_latin_jisx0201 = charset_id_internal ("latin-jisx0201");
1322 charset_big5_1 = charset_id_internal ("chinese-big5-1");
1323 charset_big5_2 = charset_id_internal ("chinese-big5-2");
1324 return Qnil;
1325}
1326
1327init_charset_once ()
1328{
1329 int i, j, k;
1330
1331 staticpro (&Vcharset_table);
1332 staticpro (&Vcharset_symbol_table);
1333
1334 /* This has to be done here, before we call Fmake_char_table. */
1335 Qcharset_table = intern ("charset-table");
1336 staticpro (&Qcharset_table);
1337
1338 /* Intern this now in case it isn't already done.
1339 Setting this variable twice is harmless.
1340 But don't staticpro it here--that is done in alloc.c. */
1341 Qchar_table_extra_slots = intern ("char-table-extra-slots");
1342
1343 /* Now we are ready to set up this property, so we can
1344 create the charset table. */
1345 Fput (Qcharset_table, Qchar_table_extra_slots, make_number (0));
1346 Vcharset_table = Fmake_char_table (Qcharset_table, Qnil);
1347
1348 Vcharset_symbol_table = Fmake_vector (make_number (MAX_CHARSET), Qnil);
1349
1350 /* Setup tables. */
1351 for (i = 0; i < 2; i++)
1352 for (j = 0; j < 2; j++)
1353 for (k = 0; k < 128; k++)
1354 iso_charset_table [i][j][k] = -1;
1355
1356 bzero (cmpchar_hash_table, sizeof cmpchar_hash_table);
1357 cmpchar_table_size = n_cmpchars = 0;
1358
1359 for (i = 0; i < 256; i++)
1360 BYTES_BY_CHAR_HEAD (i) = 1;
1361 BYTES_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_11) = 3;
1362 BYTES_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_12) = 3;
1363 BYTES_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_21) = 4;
1364 BYTES_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_22) = 4;
1365 /* The following doesn't reflect the actual bytes, but just to tell
1366 that it is a start of a multibyte character. */
1367 BYTES_BY_CHAR_HEAD (LEADING_CODE_COMPOSITION) = 2;
1368
1369 for (i = 0; i < 128; i++)
1370 WIDTH_BY_CHAR_HEAD (i) = 1;
1371 for (; i < 256; i++)
1372 WIDTH_BY_CHAR_HEAD (i) = 4;
1373 WIDTH_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_11) = 1;
1374 WIDTH_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_12) = 2;
1375 WIDTH_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_21) = 1;
1376 WIDTH_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_22) = 2;
1377}
1378
1379#ifdef emacs
1380
1381syms_of_charset ()
1382{
1383 Qascii = intern ("ascii");
1384 staticpro (&Qascii);
1385
1386 Qcharset = intern ("charset");
1387 staticpro (&Qcharset);
1388
1389 /* Define ASCII charset now. */
1390 update_charset_table (make_number (CHARSET_ASCII),
1391 make_number (1), make_number (94),
1392 make_number (1),
1393 make_number (0),
1394 make_number ('B'),
1395 make_number (0),
1396 build_string ("ASCII"),
1397 build_string ("ASCII"),
1398 build_string ("ASCII (ISO646 IRV)"));
1399 CHARSET_SYMBOL (CHARSET_ASCII) = Qascii;
1400 Fput (Qascii, Qcharset, CHARSET_TABLE_ENTRY (CHARSET_ASCII));
1401
1402 Qcomposition = intern ("composition");
1403 staticpro (&Qcomposition);
1404 CHARSET_SYMBOL (CHARSET_COMPOSITION) = Qcomposition;
1405
1406 defsubr (&Sdefine_charset);
1407 defsubr (&Sdeclare_equiv_charset);
1408 defsubr (&Sfind_charset_region);
1409 defsubr (&Sfind_charset_string);
1410 defsubr (&Smake_char_internal);
1411 defsubr (&Ssplit_char);
1412 defsubr (&Schar_charset);
1413 defsubr (&Siso_charset);
1414 defsubr (&Schar_bytes);
1415 defsubr (&Schar_width);
1416 defsubr (&Sstring_width);
1417 defsubr (&Schar_direction);
1418 defsubr (&Schars_in_string);
1419 defsubr (&Schar_boundary_p);
1420 defsubr (&Sconcat_chars);
1421 defsubr (&Scmpcharp);
1422 defsubr (&Scmpchar_component);
1423 defsubr (&Scmpchar_cmp_rule);
1424 defsubr (&Scmpchar_cmp_rule_p);
1425 defsubr (&Scmpchar_cmp_count);
1426 defsubr (&Scompose_string);
1427 defsubr (&Ssetup_special_charsets);
1428
1429 DEFVAR_LISP ("charset-list", &Vcharset_list,
1430 "List of charsets ever defined.");
1431 Vcharset_list = Fcons (Qascii, Qnil);
1432
1433 DEFVAR_INT ("leading-code-composition", &leading_code_composition,
1434 "Leading-code of composite characters.");
1435 leading_code_composition = LEADING_CODE_COMPOSITION;
1436
1437 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11,
1438 "Leading-code of private TYPE9N charset of column-width 1.");
1439 leading_code_private_11 = LEADING_CODE_PRIVATE_11;
1440
1441 DEFVAR_INT ("leading-code-private-12", &leading_code_private_12,
1442 "Leading-code of private TYPE9N charset of column-width 2.");
1443 leading_code_private_12 = LEADING_CODE_PRIVATE_12;
1444
1445 DEFVAR_INT ("leading-code-private-21", &leading_code_private_21,
1446 "Leading-code of private TYPE9Nx9N charset of column-width 1.");
1447 leading_code_private_21 = LEADING_CODE_PRIVATE_21;
1448
1449 DEFVAR_INT ("leading-code-private-22", &leading_code_private_22,
1450 "Leading-code of private TYPE9Nx9N charset of column-width 2.");
1451 leading_code_private_22 = LEADING_CODE_PRIVATE_22;
1452}
1453
1454#endif /* emacs */