1 /* Basic multilingual character support.
2 Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
3 Licensed to the Free Software Foundation.
5 This file is part of GNU Emacs.
7 GNU Emacs is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GNU Emacs is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU Emacs; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
22 /* At first, see the document in `charset.h' to understand the code in
29 #include <sys/types.h>
43 Lisp_Object Qcharset
, Qascii
, Qcomposition
;
45 /* Declaration of special leading-codes. */
46 int leading_code_composition
; /* for composite characters */
47 int leading_code_private_11
; /* for private DIMENSION1 of 1-column */
48 int leading_code_private_12
; /* for private DIMENSION1 of 2-column */
49 int leading_code_private_21
; /* for private DIMENSION2 of 1-column */
50 int leading_code_private_22
; /* for private DIMENSION2 of 2-column */
52 /* Declaration of special charsets. */
53 int charset_ascii
; /* ASCII */
54 int charset_composition
; /* for a composite character */
55 int charset_latin_iso8859_1
; /* ISO8859-1 (Latin-1) */
56 int charset_jisx0208_1978
; /* JISX0208.1978 (Japanese Kanji old set) */
57 int charset_jisx0208
; /* JISX0208.1983 (Japanese Kanji) */
58 int charset_katakana_jisx0201
; /* JISX0201.Kana (Japanese Katakana) */
59 int charset_latin_jisx0201
; /* JISX0201.Roman (Japanese Roman) */
60 int charset_big5_1
; /* Big5 Level 1 (Chinese Traditional) */
61 int charset_big5_2
; /* Big5 Level 2 (Chinese Traditional) */
63 int min_composite_char
;
65 Lisp_Object Qcharset_table
;
67 /* A char-table containing information of each character set. */
68 Lisp_Object Vcharset_table
;
70 /* A vector of charset symbol indexed by charset-id. This is used
71 only for returning charset symbol from C functions. */
72 Lisp_Object Vcharset_symbol_table
;
74 /* A list of charset symbols ever defined. */
75 Lisp_Object Vcharset_list
;
77 /* Vector of unification table ever defined.
78 An ID of a unification table is an index of this vector. */
79 Lisp_Object Vcharacter_unification_table_vector
;
81 /* Tables used by macros BYTES_BY_CHAR_HEAD and WIDTH_BY_CHAR_HEAD. */
82 int bytes_by_char_head
[256];
83 int width_by_char_head
[256];
85 /* Mapping table from ISO2022's charset (specified by DIMENSION,
86 CHARS, and FINAL-CHAR) to Emacs' charset. */
87 int iso_charset_table
[2][2][128];
89 /* Table of pointers to the structure `cmpchar_info' indexed by
91 struct cmpchar_info
**cmpchar_table
;
92 /* The current size of `cmpchar_table'. */
93 static int cmpchar_table_size
;
94 /* Number of the current composite characters. */
97 /* Variables used locally in the macro FETCH_MULTIBYTE_CHAR. */
98 unsigned char *_fetch_multibyte_char_p
;
99 int _fetch_multibyte_char_len
;
101 /* Offset to add to a non-ASCII value when inserting it. */
102 int nonascii_insert_offset
;
104 /* Translation table for converting non-ASCII unibyte characters
105 to multibyte codes, or nil. */
106 Lisp_Object Vnonascii_translate_table
;
108 #define min(X, Y) ((X) < (Y) ? (X) : (Y))
109 #define max(X, Y) ((X) > (Y) ? (X) : (Y))
112 invalid_character (c
)
115 error ("Invalid character: %o, %d, 0x%x", c
);
119 /* Set STR a pointer to the multi-byte form of the character C. If C
120 is not a composite character, the multi-byte form is set in WORKBUF
121 and STR points WORKBUF. The caller should allocate at least 4-byte
122 area at WORKBUF in advance. Returns the length of the multi-byte
123 form. If C is an invalid character to have a multi-byte form,
126 Use macro `CHAR_STRING (C, WORKBUF, STR)' instead of calling this
127 function directly if C can be an ASCII character. */
130 non_ascii_char_to_string (c
, workbuf
, str
)
132 unsigned char *workbuf
, **str
;
136 if (COMPOSITE_CHAR_P (c
))
138 int cmpchar_id
= COMPOSITE_CHAR_ID (c
);
140 if (cmpchar_id
< n_cmpchars
)
142 *str
= cmpchar_table
[cmpchar_id
]->data
;
143 return cmpchar_table
[cmpchar_id
]->len
;
147 invalid_character (c
);
151 SPLIT_NON_ASCII_CHAR (c
, charset
, c1
, c2
);
153 || ! CHARSET_DEFINED_P (charset
)
154 || c1
>= 0 && c1
< 32
155 || c2
>= 0 && c2
< 32)
156 invalid_character (c
);
159 *workbuf
++ = CHARSET_LEADING_CODE_BASE (charset
);
160 if (*workbuf
= CHARSET_LEADING_CODE_EXT (charset
))
162 *workbuf
++ = c1
| 0x80;
164 *workbuf
++ = c2
| 0x80;
166 return (workbuf
- *str
);
169 /* Return a non-ASCII character of which multi-byte form is at STR of
170 length LEN. If ACTUAL_LEN is not NULL, the actual length of the
171 character is set to the address ACTUAL_LEN.
173 Use macro `STRING_CHAR (STR, LEN)' instead of calling this function
174 directly if STR can hold an ASCII character. */
176 string_to_non_ascii_char (str
, len
, actual_len
)
177 const unsigned char *str
;
178 int len
, *actual_len
;
181 unsigned char c1
, c2
;
182 register int c
, bytes
;
187 if (BASE_LEADING_CODE_P (c
))
189 while (bytes
< len
&& ! CHAR_HEAD_P (str
[bytes
])) bytes
++;
191 if (c
== LEADING_CODE_COMPOSITION
)
193 int cmpchar_id
= str_cmpchar_id (str
, bytes
);
196 c
= MAKE_COMPOSITE_CHAR (cmpchar_id
);
200 int charset
= c
, c1
, c2
= 0;
203 if (c
>= LEADING_CODE_PRIVATE_11
)
205 if (BYTES_BY_CHAR_HEAD (c
) <= bytes
&& CHARSET_DEFINED_P (charset
))
208 if (CHARSET_DIMENSION (charset
) == 2)
210 c
= MAKE_NON_ASCII_CHAR (charset
, c1
, c2
);
220 /* Return the length of the multi-byte form at string STR of length LEN. */
222 multibyte_form_length (str
, len
)
223 const unsigned char *str
;
228 if (BASE_LEADING_CODE_P (*str
))
229 while (bytes
< len
&& ! CHAR_HEAD_P (str
[bytes
])) bytes
++;
234 /* Check if string STR of length LEN contains valid multi-byte form of
235 a character. If valid, charset and position codes of the character
236 is set at *CHARSET, *C1, and *C2, and return 0. If not valid,
237 return -1. This should be used only in the macro SPLIT_STRING
238 which checks range of STR in advance. */
240 split_non_ascii_string (str
, len
, charset
, c1
, c2
)
241 register const unsigned char *str
;
242 register unsigned char *c1
, *c2
;
243 register int len
, *charset
;
245 register unsigned int cs
= *str
++;
247 if (cs
== LEADING_CODE_COMPOSITION
)
249 int cmpchar_id
= str_cmpchar_id (str
- 1, len
);
253 *charset
= cs
, *c1
= cmpchar_id
>> 7, *c2
= cmpchar_id
& 0x7F;
255 else if ((cs
< LEADING_CODE_PRIVATE_11
|| (cs
= *str
++) >= 0xA0)
256 && CHARSET_DEFINED_P (cs
))
261 *c1
= (*str
++) & 0x7F;
262 if (CHARSET_DIMENSION (cs
) == 2)
266 *c2
= (*str
++) & 0x7F;
274 /* Return a character unified with C (or a character made of CHARSET,
275 C1, and C2) in unification table TABLE. If no unification is found
276 in TABLE, return C. */
277 unify_char (table
, c
, charset
, c1
, c2
)
279 int c
, charset
, c1
, c2
;
282 int alt_charset
, alt_c1
, alt_c2
, dimension
;
284 if (c
< 0) c
= MAKE_CHAR (charset
, c1
, c2
);
285 if (!CHAR_TABLE_P (table
)
286 || (ch
= Faref (table
, make_number (c
)), !INTEGERP (ch
))
290 SPLIT_CHAR (XFASTINT (ch
), alt_charset
, alt_c1
, alt_c2
);
291 dimension
= CHARSET_DIMENSION (alt_charset
);
292 if (dimension
== 1 && alt_c1
> 0 || dimension
== 2 && alt_c2
> 0)
293 /* CH is not a generic character, just return it. */
294 return XFASTINT (ch
);
296 /* Since CH is a generic character, we must return a specific
297 charater which has the same position codes as C from CH. */
299 SPLIT_CHAR (c
, charset
, c1
, c2
);
300 if (dimension
!= CHARSET_DIMENSION (charset
))
301 /* We can't make such a character because of dimension mismatch. */
303 return MAKE_CHAR (alt_charset
, c1
, c2
);
306 /* Convert the unibyte character C to multibyte based on
307 Vnonascii_translate_table or nonascii_insert_offset. If they can't
308 convert C to a valid multibyte character, convert it based on
309 DEFAULT_NONASCII_INSERT_OFFSET which makes C a Latin-1 character. */
311 unibyte_char_to_multibyte (c
)
314 if (c
>= 0240 && c
< 0400)
318 if (! NILP (Vnonascii_translate_table
))
319 c
= XINT (Faref (Vnonascii_translate_table
, make_number (c
)));
320 else if (nonascii_insert_offset
> 0)
321 c
+= nonascii_insert_offset
;
322 if (c
>= 0240 && (c
< 0400 || ! VALID_MULTIBYTE_CHAR_P (c
)))
323 c
= c_save
+ DEFAULT_NONASCII_INSERT_OFFSET
;
328 /* Update the table Vcharset_table with the given arguments (see the
329 document of `define-charset' for the meaning of each argument).
330 Several other table contents are also updated. The caller should
331 check the validity of CHARSET-ID and the remaining arguments in
335 update_charset_table (charset_id
, dimension
, chars
, width
, direction
,
336 iso_final_char
, iso_graphic_plane
,
337 short_name
, long_name
, description
)
338 Lisp_Object charset_id
, dimension
, chars
, width
, direction
;
339 Lisp_Object iso_final_char
, iso_graphic_plane
;
340 Lisp_Object short_name
, long_name
, description
;
342 int charset
= XINT (charset_id
);
344 unsigned char leading_code_base
, leading_code_ext
;
346 if (NILP (CHARSET_TABLE_ENTRY (charset
)))
347 CHARSET_TABLE_ENTRY (charset
)
348 = Fmake_vector (make_number (CHARSET_MAX_IDX
), Qnil
);
350 /* Get byte length of multibyte form, base leading-code, and
351 extended leading-code of the charset. See the comment under the
352 title "GENERAL NOTE on CHARACTER SET (CHARSET)" in charset.h. */
353 bytes
= XINT (dimension
);
354 if (charset
< MIN_CHARSET_PRIVATE_DIMENSION1
)
356 /* Official charset, it doesn't have an extended leading-code. */
357 if (charset
!= CHARSET_ASCII
)
358 bytes
+= 1; /* For a base leading-code. */
359 leading_code_base
= charset
;
360 leading_code_ext
= 0;
364 /* Private charset. */
365 bytes
+= 2; /* For base and extended leading-codes. */
367 = (charset
< LEADING_CODE_EXT_12
368 ? LEADING_CODE_PRIVATE_11
369 : (charset
< LEADING_CODE_EXT_21
370 ? LEADING_CODE_PRIVATE_12
371 : (charset
< LEADING_CODE_EXT_22
372 ? LEADING_CODE_PRIVATE_21
373 : LEADING_CODE_PRIVATE_22
)));
374 leading_code_ext
= charset
;
377 CHARSET_TABLE_INFO (charset
, CHARSET_ID_IDX
) = charset_id
;
378 CHARSET_TABLE_INFO (charset
, CHARSET_BYTES_IDX
) = make_number (bytes
);
379 CHARSET_TABLE_INFO (charset
, CHARSET_DIMENSION_IDX
) = dimension
;
380 CHARSET_TABLE_INFO (charset
, CHARSET_CHARS_IDX
) = chars
;
381 CHARSET_TABLE_INFO (charset
, CHARSET_WIDTH_IDX
) = width
;
382 CHARSET_TABLE_INFO (charset
, CHARSET_DIRECTION_IDX
) = direction
;
383 CHARSET_TABLE_INFO (charset
, CHARSET_LEADING_CODE_BASE_IDX
)
384 = make_number (leading_code_base
);
385 CHARSET_TABLE_INFO (charset
, CHARSET_LEADING_CODE_EXT_IDX
)
386 = make_number (leading_code_ext
);
387 CHARSET_TABLE_INFO (charset
, CHARSET_ISO_FINAL_CHAR_IDX
) = iso_final_char
;
388 CHARSET_TABLE_INFO (charset
, CHARSET_ISO_GRAPHIC_PLANE_IDX
)
390 CHARSET_TABLE_INFO (charset
, CHARSET_SHORT_NAME_IDX
) = short_name
;
391 CHARSET_TABLE_INFO (charset
, CHARSET_LONG_NAME_IDX
) = long_name
;
392 CHARSET_TABLE_INFO (charset
, CHARSET_DESCRIPTION_IDX
) = description
;
393 CHARSET_TABLE_INFO (charset
, CHARSET_PLIST_IDX
) = Qnil
;
396 /* If we have already defined a charset which has the same
397 DIMENSION, CHARS and ISO-FINAL-CHAR but the different
398 DIRECTION, we must update the entry REVERSE-CHARSET of both
399 charsets. If there's no such charset, the value of the entry
403 for (i
= 0; i
<= MAX_CHARSET
; i
++)
404 if (!NILP (CHARSET_TABLE_ENTRY (i
)))
406 if (CHARSET_DIMENSION (i
) == XINT (dimension
)
407 && CHARSET_CHARS (i
) == XINT (chars
)
408 && CHARSET_ISO_FINAL_CHAR (i
) == XINT (iso_final_char
)
409 && CHARSET_DIRECTION (i
) != XINT (direction
))
411 CHARSET_TABLE_INFO (charset
, CHARSET_REVERSE_CHARSET_IDX
)
413 CHARSET_TABLE_INFO (i
, CHARSET_REVERSE_CHARSET_IDX
) = charset_id
;
418 /* No such a charset. */
419 CHARSET_TABLE_INFO (charset
, CHARSET_REVERSE_CHARSET_IDX
)
423 if (charset
!= CHARSET_ASCII
424 && charset
< MIN_CHARSET_PRIVATE_DIMENSION1
)
426 /* Update tables bytes_by_char_head and width_by_char_head. */
427 bytes_by_char_head
[leading_code_base
] = bytes
;
428 width_by_char_head
[leading_code_base
] = XINT (width
);
430 /* Update table emacs_code_class. */
431 emacs_code_class
[charset
] = (bytes
== 2
432 ? EMACS_leading_code_2
434 ? EMACS_leading_code_3
435 : EMACS_leading_code_4
));
438 /* Update table iso_charset_table. */
439 if (ISO_CHARSET_TABLE (dimension
, chars
, iso_final_char
) < 0)
440 ISO_CHARSET_TABLE (dimension
, chars
, iso_final_char
) = charset
;
445 /* Return charset id of CHARSET_SYMBOL, or return -1 if CHARSET_SYMBOL
448 get_charset_id (charset_symbol
)
449 Lisp_Object charset_symbol
;
454 return ((SYMBOLP (charset_symbol
)
455 && (val
= Fget (charset_symbol
, Qcharset
), VECTORP (val
))
456 && (charset
= XINT (XVECTOR (val
)->contents
[CHARSET_ID_IDX
]),
457 CHARSET_VALID_P (charset
)))
461 /* Return an identification number for a new private charset of
462 DIMENSION and WIDTH. If there's no more room for the new charset,
465 get_new_private_charset_id (dimension
, width
)
466 int dimension
, width
;
468 int charset
, from
, to
;
473 from
= LEADING_CODE_EXT_11
, to
= LEADING_CODE_EXT_12
;
475 from
= LEADING_CODE_EXT_12
, to
= LEADING_CODE_EXT_21
;
480 from
= LEADING_CODE_EXT_21
, to
= LEADING_CODE_EXT_22
;
482 from
= LEADING_CODE_EXT_22
, to
= LEADING_CODE_EXT_MAX
+ 1;
485 for (charset
= from
; charset
< to
; charset
++)
486 if (!CHARSET_DEFINED_P (charset
)) break;
488 return make_number (charset
< to
? charset
: 0);
491 DEFUN ("define-charset", Fdefine_charset
, Sdefine_charset
, 3, 3, 0,
492 "Define CHARSET-ID as the identification number of CHARSET with INFO-VECTOR.\n\
493 If CHARSET-ID is nil, it is decided automatically, which means CHARSET is\n\
494 treated as a private charset.\n\
495 INFO-VECTOR is a vector of the format:\n\
496 [DIMENSION CHARS WIDTH DIRECTION ISO-FINAL-CHAR ISO-GRAPHIC-PLANE\n\
497 SHORT-NAME LONG-NAME DESCRIPTION]\n\
498 The meanings of each elements is as follows:\n\
499 DIMENSION (integer) is the number of bytes to represent a character: 1 or 2.\n\
500 CHARS (integer) is the number of characters in a dimension: 94 or 96.\n\
501 WIDTH (integer) is the number of columns a character in the charset\n\
502 occupies on the screen: one of 0, 1, and 2.\n\
504 DIRECTION (integer) is the rendering direction of characters in the\n\
505 charset when rendering. If 0, render from right to left, else\n\
506 render from left to right.\n\
508 ISO-FINAL-CHAR (character) is the final character of the\n\
509 corresponding ISO 2022 charset.\n\
511 ISO-GRAPHIC-PLANE (integer) is the graphic plane to be invoked\n\
512 while encoding to variants of ISO 2022 coding system, one of the\n\
513 following: 0/graphic-plane-left(GL), 1/graphic-plane-right(GR).\n\
515 SHORT-NAME (string) is the short name to refer to the charset.\n\
517 LONG-NAME (string) is the long name to refer to the charset.\n\
519 DESCRIPTION (string) is the description string of the charset.")
520 (charset_id
, charset_symbol
, info_vector
)
521 Lisp_Object charset_id
, charset_symbol
, info_vector
;
525 if (!NILP (charset_id
))
526 CHECK_NUMBER (charset_id
, 0);
527 CHECK_SYMBOL (charset_symbol
, 1);
528 CHECK_VECTOR (info_vector
, 2);
530 if (! NILP (charset_id
))
532 if (! CHARSET_VALID_P (XINT (charset_id
)))
533 error ("Invalid CHARSET: %d", XINT (charset_id
));
534 else if (CHARSET_DEFINED_P (XINT (charset_id
)))
535 error ("Already defined charset: %d", XINT (charset_id
));
538 vec
= XVECTOR (info_vector
)->contents
;
539 if (XVECTOR (info_vector
)->size
!= 9
540 || !INTEGERP (vec
[0]) || !(XINT (vec
[0]) == 1 || XINT (vec
[0]) == 2)
541 || !INTEGERP (vec
[1]) || !(XINT (vec
[1]) == 94 || XINT (vec
[1]) == 96)
542 || !INTEGERP (vec
[2]) || !(XINT (vec
[2]) == 1 || XINT (vec
[2]) == 2)
543 || !INTEGERP (vec
[3]) || !(XINT (vec
[3]) == 0 || XINT (vec
[3]) == 1)
544 || !INTEGERP (vec
[4]) || !(XINT (vec
[4]) >= '0' && XINT (vec
[4]) <= '~')
545 || !INTEGERP (vec
[5]) || !(XINT (vec
[5]) == 0 || XINT (vec
[5]) == 1)
548 || !STRINGP (vec
[8]))
549 error ("Invalid info-vector argument for defining charset %s",
550 XSYMBOL (charset_symbol
)->name
->data
);
552 if (NILP (charset_id
))
554 charset_id
= get_new_private_charset_id (XINT (vec
[0]), XINT (vec
[2]));
555 if (XINT (charset_id
) == 0)
556 error ("There's no room for a new private charset %s",
557 XSYMBOL (charset_symbol
)->name
->data
);
560 update_charset_table (charset_id
, vec
[0], vec
[1], vec
[2], vec
[3],
561 vec
[4], vec
[5], vec
[6], vec
[7], vec
[8]);
562 Fput (charset_symbol
, Qcharset
, CHARSET_TABLE_ENTRY (XINT (charset_id
)));
563 CHARSET_SYMBOL (XINT (charset_id
)) = charset_symbol
;
564 Vcharset_list
= Fcons (charset_symbol
, Vcharset_list
);
568 DEFUN ("get-unused-iso-final-char", Fget_unused_iso_final_char
,
569 Sget_unused_iso_final_char
, 2, 2, 0,
570 "Return an unsed ISO's final char for a charset of DIMENISION and CHARS.\n\
571 DIMENSION is the number of bytes to represent a character: 1 or 2.\n\
572 CHARS is the number of characters in a dimension: 94 or 96.\n\
574 This final char is for private use, thus the range is `0' (48) .. `?' (63).\n\
575 If there's no unused final char for the specified kind of charset,\n\
578 Lisp_Object dimension
, chars
;
582 CHECK_NUMBER (dimension
, 0);
583 CHECK_NUMBER (chars
, 1);
584 if (XINT (dimension
) != 1 && XINT (dimension
) != 2)
585 error ("Invalid charset dimension %d, it should be 1 or 2",
587 if (XINT (chars
) != 94 && XINT (chars
) != 96)
588 error ("Invalid charset chars %d, it should be 94 or 96",
590 for (final_char
= '0'; final_char
<= '?'; final_char
++)
592 if (ISO_CHARSET_TABLE (dimension
, chars
, make_number (final_char
)) < 0)
595 return (final_char
<= '?' ? make_number (final_char
) : Qnil
);
598 DEFUN ("declare-equiv-charset", Fdeclare_equiv_charset
, Sdeclare_equiv_charset
,
600 "Declare a charset of DIMENSION, CHARS, FINAL-CHAR is the same as CHARSET.\n\
601 CHARSET should be defined by `defined-charset' in advance.")
602 (dimension
, chars
, final_char
, charset_symbol
)
603 Lisp_Object dimension
, chars
, final_char
, charset_symbol
;
607 CHECK_NUMBER (dimension
, 0);
608 CHECK_NUMBER (chars
, 1);
609 CHECK_NUMBER (final_char
, 2);
610 CHECK_SYMBOL (charset_symbol
, 3);
612 if (XINT (dimension
) != 1 && XINT (dimension
) != 2)
613 error ("Invalid DIMENSION %d, it should be 1 or 2", XINT (dimension
));
614 if (XINT (chars
) != 94 && XINT (chars
) != 96)
615 error ("Invalid CHARS %d, it should be 94 or 96", XINT (chars
));
616 if (XINT (final_char
) < '0' || XFASTINT (final_char
) > '~')
617 error ("Invalid FINAL-CHAR %c, it should be `0'..`~'", XINT (chars
));
618 if ((charset
= get_charset_id (charset_symbol
)) < 0)
619 error ("Invalid charset %s", XSYMBOL (charset_symbol
)->name
->data
);
621 ISO_CHARSET_TABLE (dimension
, chars
, final_char
) = charset
;
625 /* Return number of different charsets in STR of length LEN. In
626 addition, for each found charset N, CHARSETS[N] is set 1. The
627 caller should allocate CHARSETS (MAX_CHARSET + 1 elements) in advance.
628 It may lookup a unification table TABLE if supplied. */
631 find_charset_in_str (str
, len
, charsets
, table
)
636 register int num
= 0, c
;
638 if (! CHAR_TABLE_P (table
))
646 if (c
== LEADING_CODE_COMPOSITION
)
648 int cmpchar_id
= str_cmpchar_id (str
, len
);
653 struct cmpchar_info
*cmpcharp
= cmpchar_table
[cmpchar_id
];
656 for (i
= 0; i
< cmpcharp
->glyph_len
; i
++)
658 c
= cmpcharp
->glyph
[i
];
661 if ((c
= unify_char (table
, c
, 0, 0, 0)) < 0)
662 c
= cmpcharp
->glyph
[i
];
664 if ((charset
= CHAR_CHARSET (c
)) < 0)
665 charset
= CHARSET_ASCII
;
666 if (!charsets
[charset
])
668 charsets
[charset
] = 1;
672 str
+= cmpcharp
->len
;
673 len
-= cmpcharp
->len
;
677 charset
= CHARSET_ASCII
;
682 c
= STRING_CHAR_AND_LENGTH (str
, len
, bytes
);
685 int c1
= unify_char (table
, c
, 0, 0, 0);
689 charset
= CHAR_CHARSET (c
);
692 if (!charsets
[charset
])
694 charsets
[charset
] = 1;
703 DEFUN ("find-charset-region", Ffind_charset_region
, Sfind_charset_region
,
705 "Return a list of charsets in the region between BEG and END.\n\
706 BEG and END are buffer positions.\n\
707 Optional arg TABLE if non-nil is a unification table to look up.")
709 Lisp_Object beg
, end
, table
;
711 int charsets
[MAX_CHARSET
+ 1];
712 int from
, from_byte
, to
, stop
, stop_byte
, i
;
715 validate_region (&beg
, &end
);
716 from
= XFASTINT (beg
);
717 stop
= to
= XFASTINT (end
);
719 if (from
< GPT
&& GPT
< to
)
722 stop_byte
= GPT_BYTE
;
725 stop_byte
= CHAR_TO_BYTE (stop
);
727 from_byte
= CHAR_TO_BYTE (from
);
729 bzero (charsets
, (MAX_CHARSET
+ 1) * sizeof (int));
732 find_charset_in_str (BYTE_POS_ADDR (from_byte
), stop_byte
- from_byte
,
736 from
= stop
, from_byte
= stop_byte
;
737 stop
= to
, stop_byte
= CHAR_TO_BYTE (stop
);
744 for (i
= MAX_CHARSET
; i
>= 0; i
--)
746 val
= Fcons (CHARSET_SYMBOL (i
), val
);
750 DEFUN ("find-charset-string", Ffind_charset_string
, Sfind_charset_string
,
752 "Return a list of charsets in STR.\n\
753 Optional arg TABLE if non-nil is a unification table to look up.")
755 Lisp_Object str
, table
;
757 int charsets
[MAX_CHARSET
+ 1];
761 CHECK_STRING (str
, 0);
763 if (! STRING_MULTIBYTE (str
))
766 bzero (charsets
, (MAX_CHARSET
+ 1) * sizeof (int));
767 find_charset_in_str (XSTRING (str
)->data
, STRING_BYTES (XSTRING (str
)),
770 for (i
= MAX_CHARSET
; i
>= 0; i
--)
772 val
= Fcons (CHARSET_SYMBOL (i
), val
);
776 DEFUN ("make-char-internal", Fmake_char_internal
, Smake_char_internal
, 1, 3, 0,
778 (charset
, code1
, code2
)
779 Lisp_Object charset
, code1
, code2
;
781 CHECK_NUMBER (charset
, 0);
784 XSETFASTINT (code1
, 0);
786 CHECK_NUMBER (code1
, 1);
788 XSETFASTINT (code2
, 0);
790 CHECK_NUMBER (code2
, 2);
792 if (!CHARSET_DEFINED_P (XINT (charset
)))
793 error ("Invalid charset: %d", XINT (charset
));
795 return make_number (MAKE_CHAR (XINT (charset
), XINT (code1
), XINT (code2
)));
798 DEFUN ("split-char", Fsplit_char
, Ssplit_char
, 1, 1, 0,
799 "Return list of charset and one or two position-codes of CHAR.")
806 CHECK_NUMBER (ch
, 0);
807 SPLIT_CHAR (XFASTINT (ch
), charset
, c1
, c2
);
809 ? Fcons (CHARSET_SYMBOL (charset
),
810 Fcons (make_number (c1
), Fcons (make_number (c2
), Qnil
)))
811 : Fcons (CHARSET_SYMBOL (charset
), Fcons (make_number (c1
), Qnil
)));
814 DEFUN ("char-charset", Fchar_charset
, Schar_charset
, 1, 1, 0,
815 "Return charset of CHAR.")
819 CHECK_NUMBER (ch
, 0);
821 return CHARSET_SYMBOL (CHAR_CHARSET (XINT (ch
)));
824 DEFUN ("charset-after", Fcharset_after
, Scharset_after
, 0, 1, 0,
825 "Return charset of a character in current buffer at position POS.\n\
826 If POS is nil, it defauls to the current point.")
830 register int pos_byte
, c
, charset
;
831 register unsigned char *p
;
835 else if (MARKERP (pos
))
836 pos_byte
= marker_byte_position (pos
);
839 CHECK_NUMBER (pos
, 0);
840 pos_byte
= CHAR_TO_BYTE (XINT (pos
));
842 p
= BYTE_POS_ADDR (pos_byte
);
843 c
= STRING_CHAR (p
, Z_BYTE
- pos_byte
);
844 charset
= CHAR_CHARSET (c
);
845 return CHARSET_SYMBOL (charset
);
848 DEFUN ("iso-charset", Fiso_charset
, Siso_charset
, 3, 3, 0,
849 "Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR.\n\
851 ISO 2022's designation sequence (escape sequence) distinguishes charsets\n\
852 by their DIMENSION, CHARS, and FINAL-CHAR,\n\
853 where as Emacs distinguishes them by charset symbol.\n\
854 See the documentation of the function `charset-info' for the meanings of\n\
855 DIMENSION, CHARS, and FINAL-CHAR.")
856 (dimension
, chars
, final_char
)
857 Lisp_Object dimension
, chars
, final_char
;
861 CHECK_NUMBER (dimension
, 0);
862 CHECK_NUMBER (chars
, 1);
863 CHECK_NUMBER (final_char
, 2);
865 if ((charset
= ISO_CHARSET_TABLE (dimension
, chars
, final_char
)) < 0)
867 return CHARSET_SYMBOL (charset
);
870 /* If GENERICP is nonzero, return nonzero iff C is a valid normal or
871 generic character. If GENERICP is zero, return nonzero iff C is a
872 valid normal character. Do not call this function directly,
873 instead use macro CHAR_VALID_P. */
875 char_valid_p (c
, genericp
)
882 if (SINGLE_BYTE_CHAR_P (c
))
884 SPLIT_NON_ASCII_CHAR (c
, charset
, c1
, c2
);
885 if (!CHARSET_VALID_P (charset
))
887 return (c
< MIN_CHAR_COMPOSITION
888 ? ((c
& CHAR_FIELD1_MASK
) /* i.e. dimension of C is two. */
889 ? (genericp
&& c1
== 0 && c2
== 0
890 || c1
>= 32 && c2
>= 32)
891 : (genericp
&& c1
== 0
893 : c
< MIN_CHAR_COMPOSITION
+ n_cmpchars
);
896 DEFUN ("char-valid-p", Fchar_valid_p
, Schar_valid_p
, 1, 2, 0,
897 "Return t if OBJECT is a valid normal character.\n\
898 If optional arg GENERICP is non-nil, also return t if OBJECT is\n\
899 a valid generic character.")
901 Lisp_Object object
, genericp
;
903 if (! NATNUMP (object
))
905 return (CHAR_VALID_P (XFASTINT (object
), !NILP (genericp
)) ? Qt
: Qnil
);
908 DEFUN ("unibyte-char-to-multibyte", Funibyte_char_to_multibyte
,
909 Sunibyte_char_to_multibyte
, 1, 1, 0,
910 "Convert the unibyte character CH to multibyte character.\n\
911 The conversion is done based on nonascii-translate-table (which see)\n\
912 or nonascii-insert-offset (which see).")
918 CHECK_NUMBER (ch
, 0);
920 if (c
< 0 || c
>= 0400)
921 error ("Invalid unibyte character: %d", c
);
922 c
= unibyte_char_to_multibyte (c
);
924 error ("Can't convert to multibyte character: %d", XINT (ch
));
925 return make_number (c
);
928 DEFUN ("char-bytes", Fchar_bytes
, Schar_bytes
, 1, 1, 0,
929 "Return byte length of multi-byte form of CHAR.")
936 CHECK_NUMBER (ch
, 0);
937 if (COMPOSITE_CHAR_P (XFASTINT (ch
)))
939 unsigned int id
= COMPOSITE_CHAR_ID (XFASTINT (ch
));
941 bytes
= (id
< n_cmpchars
? cmpchar_table
[id
]->len
: 1);
945 int charset
= CHAR_CHARSET (XFASTINT (ch
));
947 bytes
= CHARSET_DEFINED_P (charset
) ? CHARSET_BYTES (charset
) : 1;
950 XSETFASTINT (val
, bytes
);
954 /* Return the width of character of which multi-byte form starts with
955 C. The width is measured by how many columns occupied on the
956 screen when displayed in the current buffer. */
958 #define ONE_BYTE_CHAR_WIDTH(c) \
961 ? XFASTINT (current_buffer->tab_width) \
962 : (c == '\n' ? 0 : (NILP (current_buffer->ctl_arrow) ? 4 : 2))) \
966 ? (NILP (current_buffer->ctl_arrow) ? 4 : 2) \
967 : ((! NILP (current_buffer->enable_multibyte_characters) \
968 && BASE_LEADING_CODE_P (c)) \
969 ? WIDTH_BY_CHAR_HEAD (c) \
973 DEFUN ("char-width", Fchar_width, Schar_width, 1, 1, 0,
974 "Return width of CHAR when displayed in the current buffer.\n\
975 The width is measured by how many columns it occupies on the screen.")
979 Lisp_Object val
, disp
;
981 struct Lisp_Char_Table
*dp
= buffer_display_table ();
983 CHECK_NUMBER (ch
, 0);
987 /* Get the way the display table would display it. */
988 disp
= dp
? DISP_CHAR_VECTOR (dp
, c
) : Qnil
;
991 XSETINT (val
, XVECTOR (disp
)->size
);
992 else if (SINGLE_BYTE_CHAR_P (c
))
993 XSETINT (val
, ONE_BYTE_CHAR_WIDTH (c
));
994 else if (COMPOSITE_CHAR_P (c
))
996 int id
= COMPOSITE_CHAR_ID (XFASTINT (ch
));
997 XSETFASTINT (val
, (id
< n_cmpchars
? cmpchar_table
[id
]->width
: 0));
1001 int charset
= CHAR_CHARSET (c
);
1003 XSETFASTINT (val
, CHARSET_WIDTH (charset
));
1008 /* Return width of string STR of length LEN when displayed in the
1009 current buffer. The width is measured by how many columns it
1010 occupies on the screen. */
1017 unsigned char *endp
= str
+ len
;
1019 struct Lisp_Char_Table
*dp
= buffer_display_table ();
1023 if (*str
== LEADING_CODE_COMPOSITION
)
1025 int id
= str_cmpchar_id (str
, endp
- str
);
1034 width
+= cmpchar_table
[id
]->width
;
1035 str
+= cmpchar_table
[id
]->len
;
1042 int c
= STRING_CHAR_AND_LENGTH (str
, endp
- str
, thislen
);
1044 /* Get the way the display table would display it. */
1046 disp
= DISP_CHAR_VECTOR (dp
, c
);
1051 width
+= XVECTOR (disp
)->size
;
1053 width
+= ONE_BYTE_CHAR_WIDTH (*str
);
1061 DEFUN ("string-width", Fstring_width
, Sstring_width
, 1, 1, 0,
1062 "Return width of STRING when displayed in the current buffer.\n\
1063 Width is measured by how many columns it occupies on the screen.\n\
1064 When calculating width of a multibyte character in STRING,\n\
1065 only the base leading-code is considered; the validity of\n\
1066 the following bytes is not checked.")
1072 CHECK_STRING (str
, 0);
1073 XSETFASTINT (val
, strwidth (XSTRING (str
)->data
,
1074 STRING_BYTES (XSTRING (str
))));
1078 DEFUN ("char-direction", Fchar_direction
, Schar_direction
, 1, 1, 0,
1079 "Return the direction of CHAR.\n\
1080 The returned value is 0 for left-to-right and 1 for right-to-left.")
1086 CHECK_NUMBER (ch
, 0);
1087 charset
= CHAR_CHARSET (XFASTINT (ch
));
1088 if (!CHARSET_DEFINED_P (charset
))
1089 invalid_character (XINT (ch
));
1090 return CHARSET_TABLE_INFO (charset
, CHARSET_DIRECTION_IDX
);
1093 DEFUN ("chars-in-region", Fchars_in_region
, Schars_in_region
, 2, 2, 0,
1094 "Return number of characters between BEG and END.")
1096 Lisp_Object beg
, end
;
1100 from
= min (XFASTINT (beg
), XFASTINT (end
));
1101 to
= max (XFASTINT (beg
), XFASTINT (end
));
1106 /* Return the number of characters in the NBYTES bytes at PTR.
1107 This works by looking at the contents and checking for multibyte sequences.
1108 However, if the current buffer has enable-multibyte-characters = nil,
1109 we treat each byte as a character. */
1112 chars_in_text (ptr
, nbytes
)
1116 unsigned char *endp
, c
;
1119 /* current_buffer is null at early stages of Emacs initialization. */
1120 if (current_buffer
== 0
1121 || NILP (current_buffer
->enable_multibyte_characters
))
1124 endp
= ptr
+ nbytes
;
1131 if (BASE_LEADING_CODE_P (c
))
1132 while (ptr
< endp
&& ! CHAR_HEAD_P (*ptr
)) ptr
++;
1139 /* Return the number of characters in the NBYTES bytes at PTR.
1140 This works by looking at the contents and checking for multibyte sequences.
1141 It ignores enable-multibyte-characters. */
1144 multibyte_chars_in_text (ptr
, nbytes
)
1148 unsigned char *endp
, c
;
1151 endp
= ptr
+ nbytes
;
1158 if (BASE_LEADING_CODE_P (c
))
1159 while (ptr
< endp
&& ! CHAR_HEAD_P (*ptr
)) ptr
++;
1166 DEFUN ("string", Fstring
, Sstring
, 1, MANY
, 0,
1167 "Concatenate all the argument characters and make the result a string.")
1174 = (unsigned char *) alloca (MAX_LENGTH_OF_MULTI_BYTE_FORM
* n
);
1175 unsigned char *p
= buf
;
1178 for (i
= 0; i
< n
; i
++)
1183 if (!INTEGERP (args
[i
]))
1184 CHECK_NUMBER (args
[i
], 0);
1186 len
= CHAR_STRING (c
, p
, str
);
1188 /* C is a composite character. */
1189 bcopy (str
, p
, len
);
1193 val
= make_string_from_bytes (buf
, n
, p
- buf
);
1199 /*** Composite characters staffs ***/
1201 /* Each composite character is identified by CMPCHAR-ID which is
1202 assigned when Emacs needs the character code of the composite
1203 character (e.g. when displaying it on the screen). See the
1204 document "GENERAL NOTE on COMPOSITE CHARACTER" in `charset.h' how a
1205 composite character is represented in Emacs. */
1207 /* If `static' is defined, it means that it is defined to null string. */
1209 /* The following function is copied from lread.c. */
1211 hash_string (ptr
, len
)
1215 register unsigned char *p
= ptr
;
1216 register unsigned char *end
= p
+ len
;
1217 register unsigned char c
;
1218 register int hash
= 0;
1223 if (c
>= 0140) c
-= 40;
1224 hash
= ((hash
<<3) + (hash
>>28) + c
);
1226 return hash
& 07777777777;
1230 #define CMPCHAR_HASH_TABLE_SIZE 0xFFF
1232 static int *cmpchar_hash_table
[CMPCHAR_HASH_TABLE_SIZE
];
1234 /* Each element of `cmpchar_hash_table' is a pointer to an array of
1235 integer, where the 1st element is the size of the array, the 2nd
1236 element is how many elements are actually used in the array, and
1237 the remaining elements are CMPCHAR-IDs of composite characters of
1238 the same hash value. */
1239 #define CMPCHAR_HASH_SIZE(table) table[0]
1240 #define CMPCHAR_HASH_USED(table) table[1]
1241 #define CMPCHAR_HASH_CMPCHAR_ID(table, i) table[i]
1243 /* Return CMPCHAR-ID of the composite character in STR of the length
1244 LEN. If the composite character has not yet been registered,
1245 register it in `cmpchar_table' and assign new CMPCHAR-ID. This
1246 is the sole function for assigning CMPCHAR-ID. */
1248 str_cmpchar_id (str
, len
)
1249 const unsigned char *str
;
1252 int hash_idx
, *hashp
;
1254 int embedded_rule
; /* 1 if composition rule is embedded. */
1255 int chars
; /* number of components. */
1257 struct cmpchar_info
*cmpcharp
;
1259 /* The second byte 0xFF means compostion rule is embedded. */
1260 embedded_rule
= (str
[1] == 0xFF);
1262 /* At first, get the actual length of the composite character. */
1264 const unsigned char *p
, *endp
= str
+ 1, *lastp
= str
+ len
;
1267 while (endp
< lastp
&& ! CHAR_HEAD_P (*endp
)) endp
++;
1269 /* Any composite char have at least 5-byte length. */
1276 if (embedded_rule
) p
++;
1277 /* No need of checking if *P is 0xA0 because
1278 BYTES_BY_CHAR_HEAD (0x80) surely returns 2. */
1279 p
+= BYTES_BY_CHAR_HEAD (*p
- 0x20);
1282 if (p
> endp
|| chars
< 2 || chars
> MAX_COMPONENT_COUNT
)
1283 /* Invalid components. */
1287 hash_idx
= hash_string (str
, len
) % CMPCHAR_HASH_TABLE_SIZE
;
1288 hashp
= cmpchar_hash_table
[hash_idx
];
1290 /* Then, look into the hash table. */
1292 /* Find the correct one among composite characters of the same
1294 for (i
= 2; i
< CMPCHAR_HASH_USED (hashp
); i
++)
1296 cmpcharp
= cmpchar_table
[CMPCHAR_HASH_CMPCHAR_ID (hashp
, i
)];
1297 if (len
== cmpcharp
->len
1298 && ! bcmp (str
, cmpcharp
->data
, len
))
1299 return CMPCHAR_HASH_CMPCHAR_ID (hashp
, i
);
1302 /* We have to register the composite character in cmpchar_table. */
1303 if (n_cmpchars
> (CHAR_FIELD2_MASK
| CHAR_FIELD3_MASK
))
1304 /* No, we have no more room for a new composite character. */
1307 /* Make the entry in hash table. */
1310 /* Make a table for 8 composite characters initially. */
1311 hashp
= (cmpchar_hash_table
[hash_idx
]
1312 = (int *) xmalloc (sizeof (int) * (2 + 8)));
1313 CMPCHAR_HASH_SIZE (hashp
) = 10;
1314 CMPCHAR_HASH_USED (hashp
) = 2;
1316 else if (CMPCHAR_HASH_USED (hashp
) >= CMPCHAR_HASH_SIZE (hashp
))
1318 CMPCHAR_HASH_SIZE (hashp
) += 8;
1319 hashp
= (cmpchar_hash_table
[hash_idx
]
1320 = (int *) xrealloc (hashp
,
1321 sizeof (int) * CMPCHAR_HASH_SIZE (hashp
)));
1323 CMPCHAR_HASH_CMPCHAR_ID (hashp
, CMPCHAR_HASH_USED (hashp
)) = n_cmpchars
;
1324 CMPCHAR_HASH_USED (hashp
)++;
1326 /* Set information of the composite character in cmpchar_table. */
1327 if (cmpchar_table_size
== 0)
1329 /* This is the first composite character to be registered. */
1330 cmpchar_table_size
= 256;
1332 = (struct cmpchar_info
**) xmalloc (sizeof (cmpchar_table
[0])
1333 * cmpchar_table_size
);
1335 else if (cmpchar_table_size
<= n_cmpchars
)
1337 cmpchar_table_size
+= 256;
1339 = (struct cmpchar_info
**) xrealloc (cmpchar_table
,
1340 sizeof (cmpchar_table
[0])
1341 * cmpchar_table_size
);
1344 cmpcharp
= (struct cmpchar_info
*) xmalloc (sizeof (struct cmpchar_info
));
1346 cmpcharp
->len
= len
;
1347 cmpcharp
->data
= (unsigned char *) xmalloc (len
+ 1);
1348 bcopy (str
, cmpcharp
->data
, len
);
1349 cmpcharp
->data
[len
] = 0;
1350 cmpcharp
->glyph_len
= chars
;
1351 cmpcharp
->glyph
= (GLYPH
*) xmalloc (sizeof (GLYPH
) * chars
);
1354 cmpcharp
->cmp_rule
= (unsigned char *) xmalloc (chars
);
1355 cmpcharp
->col_offset
= (float *) xmalloc (sizeof (float) * chars
);
1359 cmpcharp
->cmp_rule
= NULL
;
1360 cmpcharp
->col_offset
= NULL
;
1363 /* Setup GLYPH data and composition rules (if any) so as not to make
1364 them every time on displaying. */
1366 unsigned char *bufp
;
1368 float leftmost
= 0.0, rightmost
= 1.0;
1371 /* At first, col_offset[N] is set to relative to col_offset[0]. */
1372 cmpcharp
->col_offset
[0] = 0;
1374 for (i
= 0, bufp
= cmpcharp
->data
+ 1; i
< chars
; i
++)
1377 cmpcharp
->cmp_rule
[i
] = *bufp
++;
1379 if (*bufp
== 0xA0) /* This is an ASCII character. */
1381 cmpcharp
->glyph
[i
] = FAST_MAKE_GLYPH ((*++bufp
& 0x7F), 0);
1385 else /* Multibyte character. */
1387 /* Make `bufp' point normal multi-byte form temporally. */
1390 = FAST_MAKE_GLYPH (string_to_non_ascii_char (bufp
, 4, 0), 0);
1391 width
= WIDTH_BY_CHAR_HEAD (*bufp
);
1393 bufp
+= BYTES_BY_CHAR_HEAD (*bufp
- 0x20);
1396 if (embedded_rule
&& i
> 0)
1398 /* Reference points (global_ref and new_ref) are
1409 Now, we calculate the column offset of the new glyph
1410 from the left edge of the first glyph. This can avoid
1411 the same calculation everytime displaying this
1412 composite character. */
1414 /* Reference points of global glyph and new glyph. */
1415 int global_ref
= (cmpcharp
->cmp_rule
[i
] - 0xA0) / 9;
1416 int new_ref
= (cmpcharp
->cmp_rule
[i
] - 0xA0) % 9;
1417 /* Column offset relative to the first glyph. */
1418 float left
= (leftmost
1419 + (global_ref
% 3) * (rightmost
- leftmost
) / 2.0
1420 - (new_ref
% 3) * width
/ 2.0);
1422 cmpcharp
->col_offset
[i
] = left
;
1423 if (left
< leftmost
)
1425 if (left
+ width
> rightmost
)
1426 rightmost
= left
+ width
;
1430 if (width
> rightmost
)
1436 /* Now col_offset[N] are relative to the left edge of the
1437 first component. Make them relative to the left edge of
1439 for (i
= 0; i
< chars
; i
++)
1440 cmpcharp
->col_offset
[i
] -= leftmost
;
1441 /* Make rightmost holds width of overall glyph. */
1442 rightmost
-= leftmost
;
1445 cmpcharp
->width
= rightmost
;
1446 if (cmpcharp
->width
< rightmost
)
1447 /* To get a ceiling integer value. */
1451 cmpchar_table
[n_cmpchars
] = cmpcharp
;
1453 return n_cmpchars
++;
1456 /* Return the Nth element of the composite character C. */
1458 cmpchar_component (c
, n
)
1461 int id
= COMPOSITE_CHAR_ID (c
);
1463 if (id
>= n_cmpchars
/* C is not a valid composite character. */
1464 || n
>= cmpchar_table
[id
]->glyph_len
) /* No such component. */
1466 /* No face data is stored in glyph code. */
1467 return ((int) (cmpchar_table
[id
]->glyph
[n
]));
1470 DEFUN ("cmpcharp", Fcmpcharp
, Scmpcharp
, 1, 1, 0,
1471 "T if CHAR is a composite character.")
1475 CHECK_NUMBER (ch
, 0);
1476 return (COMPOSITE_CHAR_P (XINT (ch
)) ? Qt
: Qnil
);
1479 DEFUN ("composite-char-component", Fcmpchar_component
, Scmpchar_component
,
1481 "Return the IDXth component character of composite character CHARACTER.")
1483 Lisp_Object character
, idx
;
1487 CHECK_NUMBER (character
, 0);
1488 CHECK_NUMBER (idx
, 1);
1490 if ((c
= cmpchar_component (XINT (character
), XINT (idx
))) < 0)
1491 args_out_of_range (character
, idx
);
1493 return make_number (c
);
1496 DEFUN ("composite-char-composition-rule", Fcmpchar_cmp_rule
, Scmpchar_cmp_rule
,
1498 "Return the Nth composition rule embedded in composite character CHARACTER.\n\
1499 The returned rule is for composing the Nth component\n\
1500 on the (N-1)th component. If N is 0, the returned value is always 255.")
1502 Lisp_Object character
, n
;
1506 CHECK_NUMBER (character
, 0);
1507 CHECK_NUMBER (n
, 1);
1509 id
= COMPOSITE_CHAR_ID (XINT (character
));
1510 if (id
< 0 || id
>= n_cmpchars
)
1511 error ("Invalid composite character: %d", XINT (character
));
1513 if (i
> cmpchar_table
[id
]->glyph_len
)
1514 args_out_of_range (character
, n
);
1516 return make_number (cmpchar_table
[id
]->cmp_rule
[i
]);
1519 DEFUN ("composite-char-composition-rule-p", Fcmpchar_cmp_rule_p
,
1520 Scmpchar_cmp_rule_p
, 1, 1, 0,
1521 "Return non-nil if composite character CHARACTER contains a embedded rule.")
1523 Lisp_Object character
;
1527 CHECK_NUMBER (character
, 0);
1528 id
= COMPOSITE_CHAR_ID (XINT (character
));
1529 if (id
< 0 || id
>= n_cmpchars
)
1530 error ("Invalid composite character: %d", XINT (character
));
1532 return (cmpchar_table
[id
]->cmp_rule
? Qt
: Qnil
);
1535 DEFUN ("composite-char-component-count", Fcmpchar_cmp_count
,
1536 Scmpchar_cmp_count
, 1, 1, 0,
1537 "Return number of compoents of composite character CHARACTER.")
1539 Lisp_Object character
;
1543 CHECK_NUMBER (character
, 0);
1544 id
= COMPOSITE_CHAR_ID (XINT (character
));
1545 if (id
< 0 || id
>= n_cmpchars
)
1546 error ("Invalid composite character: %d", XINT (character
));
1548 return (make_number (cmpchar_table
[id
]->glyph_len
));
1551 DEFUN ("compose-string", Fcompose_string
, Scompose_string
,
1553 "Return one char string composed from all characters in STRING.")
1557 unsigned char buf
[MAX_LENGTH_OF_MULTI_BYTE_FORM
], *p
, *pend
, *ptemp
;
1560 CHECK_STRING (str
, 0);
1562 buf
[0] = LEADING_CODE_COMPOSITION
;
1563 p
= XSTRING (str
)->data
;
1564 pend
= p
+ STRING_BYTES (XSTRING (str
));
1568 if (*p
< 0x20 || *p
== 127) /* control code */
1569 error ("Invalid component character: %d", *p
);
1570 else if (*p
< 0x80) /* ASCII */
1572 if (i
+ 2 >= MAX_LENGTH_OF_MULTI_BYTE_FORM
)
1573 error ("Too long string to be composed: %s", XSTRING (str
)->data
);
1574 /* Prepend an ASCII charset indicator 0xA0, set MSB of the
1577 buf
[i
++] = *p
++ + 0x80;
1579 else if (*p
== LEADING_CODE_COMPOSITION
) /* composite char */
1581 /* Already composed. Eliminate the heading
1582 LEADING_CODE_COMPOSITION, keep the remaining bytes
1586 while (! CHAR_HEAD_P (*p
)) p
++;
1587 if (i
+ (p
- ptemp
) >= MAX_LENGTH_OF_MULTI_BYTE_FORM
)
1588 error ("Too long string to be composed: %s", XSTRING (str
)->data
);
1589 bcopy (ptemp
, buf
+ i
, p
- ptemp
);
1592 else /* multibyte char */
1594 /* Add 0x20 to the base leading-code, keep the remaining
1596 len
= BYTES_BY_CHAR_HEAD (*p
);
1597 if (i
+ len
>= MAX_LENGTH_OF_MULTI_BYTE_FORM
)
1598 error ("Too long string to be composed: %s", XSTRING (str
)->data
);
1599 bcopy (p
, buf
+ i
, len
);
1606 /* STR contains only one character, which can't be composed. */
1607 error ("Too short string to be composed: %s", XSTRING (str
)->data
);
1609 return make_string_from_bytes (buf
, 1, i
);
1613 charset_id_internal (charset_name
)
1616 Lisp_Object val
= Fget (intern (charset_name
), Qcharset
);
1619 error ("Charset %s is not defined", charset_name
);
1621 return (XINT (XVECTOR (val
)->contents
[0]));
1624 DEFUN ("setup-special-charsets", Fsetup_special_charsets
,
1625 Ssetup_special_charsets
, 0, 0, 0, "Internal use only.")
1628 charset_latin_iso8859_1
= charset_id_internal ("latin-iso8859-1");
1629 charset_jisx0208_1978
= charset_id_internal ("japanese-jisx0208-1978");
1630 charset_jisx0208
= charset_id_internal ("japanese-jisx0208");
1631 charset_katakana_jisx0201
= charset_id_internal ("katakana-jisx0201");
1632 charset_latin_jisx0201
= charset_id_internal ("latin-jisx0201");
1633 charset_big5_1
= charset_id_internal ("chinese-big5-1");
1634 charset_big5_2
= charset_id_internal ("chinese-big5-2");
1638 init_charset_once ()
1642 staticpro (&Vcharset_table
);
1643 staticpro (&Vcharset_symbol_table
);
1645 /* This has to be done here, before we call Fmake_char_table. */
1646 Qcharset_table
= intern ("charset-table");
1647 staticpro (&Qcharset_table
);
1649 /* Intern this now in case it isn't already done.
1650 Setting this variable twice is harmless.
1651 But don't staticpro it here--that is done in alloc.c. */
1652 Qchar_table_extra_slots
= intern ("char-table-extra-slots");
1654 /* Now we are ready to set up this property, so we can
1655 create the charset table. */
1656 Fput (Qcharset_table
, Qchar_table_extra_slots
, make_number (0));
1657 Vcharset_table
= Fmake_char_table (Qcharset_table
, Qnil
);
1659 Vcharset_symbol_table
= Fmake_vector (make_number (MAX_CHARSET
+ 1), Qnil
);
1662 for (i
= 0; i
< 2; i
++)
1663 for (j
= 0; j
< 2; j
++)
1664 for (k
= 0; k
< 128; k
++)
1665 iso_charset_table
[i
][j
][k
] = -1;
1667 bzero (cmpchar_hash_table
, sizeof cmpchar_hash_table
);
1668 cmpchar_table_size
= n_cmpchars
= 0;
1670 for (i
= 0; i
< 256; i
++)
1671 BYTES_BY_CHAR_HEAD (i
) = 1;
1672 BYTES_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_11
) = 3;
1673 BYTES_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_12
) = 3;
1674 BYTES_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_21
) = 4;
1675 BYTES_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_22
) = 4;
1676 /* The following doesn't reflect the actual bytes, but just to tell
1677 that it is a start of a multibyte character. */
1678 BYTES_BY_CHAR_HEAD (LEADING_CODE_COMPOSITION
) = 2;
1680 for (i
= 0; i
< 128; i
++)
1681 WIDTH_BY_CHAR_HEAD (i
) = 1;
1682 for (; i
< 256; i
++)
1683 WIDTH_BY_CHAR_HEAD (i
) = 4;
1684 WIDTH_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_11
) = 1;
1685 WIDTH_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_12
) = 2;
1686 WIDTH_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_21
) = 1;
1687 WIDTH_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_22
) = 2;
1694 Qascii
= intern ("ascii");
1695 staticpro (&Qascii
);
1697 Qcharset
= intern ("charset");
1698 staticpro (&Qcharset
);
1700 /* Define ASCII charset now. */
1701 update_charset_table (make_number (CHARSET_ASCII
),
1702 make_number (1), make_number (94),
1707 build_string ("ASCII"),
1708 build_string ("ASCII"),
1709 build_string ("ASCII (ISO646 IRV)"));
1710 CHARSET_SYMBOL (CHARSET_ASCII
) = Qascii
;
1711 Fput (Qascii
, Qcharset
, CHARSET_TABLE_ENTRY (CHARSET_ASCII
));
1713 Qcomposition
= intern ("composition");
1714 staticpro (&Qcomposition
);
1715 CHARSET_SYMBOL (CHARSET_COMPOSITION
) = Qcomposition
;
1717 defsubr (&Sdefine_charset
);
1718 defsubr (&Sget_unused_iso_final_char
);
1719 defsubr (&Sdeclare_equiv_charset
);
1720 defsubr (&Sfind_charset_region
);
1721 defsubr (&Sfind_charset_string
);
1722 defsubr (&Smake_char_internal
);
1723 defsubr (&Ssplit_char
);
1724 defsubr (&Schar_charset
);
1725 defsubr (&Scharset_after
);
1726 defsubr (&Siso_charset
);
1727 defsubr (&Schar_valid_p
);
1728 defsubr (&Sunibyte_char_to_multibyte
);
1729 defsubr (&Schar_bytes
);
1730 defsubr (&Schar_width
);
1731 defsubr (&Sstring_width
);
1732 defsubr (&Schar_direction
);
1733 defsubr (&Schars_in_region
);
1735 defsubr (&Scmpcharp
);
1736 defsubr (&Scmpchar_component
);
1737 defsubr (&Scmpchar_cmp_rule
);
1738 defsubr (&Scmpchar_cmp_rule_p
);
1739 defsubr (&Scmpchar_cmp_count
);
1740 defsubr (&Scompose_string
);
1741 defsubr (&Ssetup_special_charsets
);
1743 DEFVAR_LISP ("charset-list", &Vcharset_list
,
1744 "List of charsets ever defined.");
1745 Vcharset_list
= Fcons (Qascii
, Qnil
);
1747 DEFVAR_LISP ("character-unification-table-vector",
1748 &Vcharacter_unification_table_vector
,
1749 "Vector of cons cell of a symbol and unification table ever defined.\n\
1750 An ID of a unification table is an index of this vector.");
1751 Vcharacter_unification_table_vector
= Fmake_vector (make_number (16), Qnil
);
1753 DEFVAR_INT ("leading-code-composition", &leading_code_composition
,
1754 "Leading-code of composite characters.");
1755 leading_code_composition
= LEADING_CODE_COMPOSITION
;
1757 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11
,
1758 "Leading-code of private TYPE9N charset of column-width 1.");
1759 leading_code_private_11
= LEADING_CODE_PRIVATE_11
;
1761 DEFVAR_INT ("leading-code-private-12", &leading_code_private_12
,
1762 "Leading-code of private TYPE9N charset of column-width 2.");
1763 leading_code_private_12
= LEADING_CODE_PRIVATE_12
;
1765 DEFVAR_INT ("leading-code-private-21", &leading_code_private_21
,
1766 "Leading-code of private TYPE9Nx9N charset of column-width 1.");
1767 leading_code_private_21
= LEADING_CODE_PRIVATE_21
;
1769 DEFVAR_INT ("leading-code-private-22", &leading_code_private_22
,
1770 "Leading-code of private TYPE9Nx9N charset of column-width 2.");
1771 leading_code_private_22
= LEADING_CODE_PRIVATE_22
;
1773 DEFVAR_INT ("nonascii-insert-offset", &nonascii_insert_offset
,
1774 "Offset for converting non-ASCII unibyte codes 0240...0377 to multibyte.\n\
1775 This is used for converting unibyte text to multibyte,\n\
1776 and for inserting character codes specified by number.\n\n\
1777 Conversion is performed only when multibyte characters are enabled,\n\
1778 and it serves to convert a Latin-1 or similar 8-bit character code\n\
1779 to the corresponding Emacs character code.\n\
1780 If `nonascii-translate-table' is non-nil, it overrides this variable.");
1781 nonascii_insert_offset
= 0;
1783 DEFVAR_LISP ("nonascii-translate-table", &Vnonascii_translate_table
,
1784 "Translate table for converting non-ASCII unibyte codes to multibyte.\n\
1785 This is used for converting unibyte text to multibyte,\n\
1786 and for inserting character codes specified by number.\n\n\
1787 Conversion is performed only when multibyte characters are enabled,\n\
1788 and it serves to convert a Latin-1 or similar 8-bit character code\n\
1789 to the corresponding Emacs character code.\n\n\
1790 If this is nil, `nonascii-insert-offset' is used instead.");
1791 Vnonascii_translate_table
= Qnil
;
1793 DEFVAR_INT ("min-composite-char", &min_composite_char
,
1794 "Minimum character code of a composite character.");
1795 min_composite_char
= MIN_CHAR_COMPOSITION
;