1 /* Basic character set support.
2 Copyright (C) 2001, 2002, 2003, 2004, 2005,
3 2006 Free Software Foundation, Inc.
4 Copyright (C) 1995, 1997, 1998, 1999, 2000, 2001
5 National Institute of Advanced Industrial Science and Technology (AIST)
6 Registration Number H14PRO021
7 Copyright (C) 2003, 2004
8 National Institute of Advanced Industrial Science and Technology (AIST)
9 Registration Number H13PRO009
11 This file is part of GNU Emacs.
13 GNU Emacs is free software; you can redistribute it and/or modify
14 it under the terms of the GNU General Public License as published by
15 the Free Software Foundation; either version 2, or (at your option)
18 GNU Emacs is distributed in the hope that it will be useful,
19 but WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 GNU General Public License for more details.
23 You should have received a copy of the GNU General Public License
24 along with GNU Emacs; see the file COPYING. If not, write to
25 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
26 Boston, MA 02110-1301, USA. */
33 #include <sys/types.h>
35 #include "character.h"
41 /*** GENERAL NOTES on CODED CHARACTER SETS (CHARSETS) ***
43 A coded character set ("charset" hereafter) is a meaningful
44 collection (i.e. language, culture, functionality, etc.) of
45 characters. Emacs handles multiple charsets at once. In Emacs Lisp
46 code, a charset is represented by a symbol. In C code, a charset is
47 represented by its ID number or by a pointer to a struct charset.
49 The actual information about each charset is stored in two places.
50 Lispy information is stored in the hash table Vcharset_hash_table as
51 a vector (charset attributes). The other information is stored in
52 charset_table as a struct charset.
56 /* List of all charsets. This variable is used only from Emacs
58 Lisp_Object Vcharset_list
;
60 /* Hash table that contains attributes of each charset. Keys are
61 charset symbols, and values are vectors of charset attributes. */
62 Lisp_Object Vcharset_hash_table
;
64 /* Table of struct charset. */
65 struct charset
*charset_table
;
67 static int charset_table_size
;
68 static int charset_table_used
;
70 Lisp_Object Qcharsetp
;
72 /* Special charset symbols. */
74 Lisp_Object Qeight_bit
;
75 Lisp_Object Qiso_8859_1
;
78 /* The corresponding charsets. */
80 int charset_eight_bit
;
81 int charset_iso_8859_1
;
84 /* The other special charsets. */
85 int charset_jisx0201_roman
;
86 int charset_jisx0208_1978
;
89 /* Value of charset attribute `charset-iso-plane'. */
92 /* Charset of unibyte characters. */
95 /* List of charsets ordered by the priority. */
96 Lisp_Object Vcharset_ordered_list
;
98 /* Incremented everytime we change Vcharset_ordered_list. This is
99 unsigned short so that it fits in Lisp_Int and never matches
101 unsigned short charset_ordered_list_tick
;
103 /* List of iso-2022 charsets. */
104 Lisp_Object Viso_2022_charset_list
;
106 /* List of emacs-mule charsets. */
107 Lisp_Object Vemacs_mule_charset_list
;
109 struct charset
*emacs_mule_charset
[256];
111 /* Mapping table from ISO2022's charset (specified by DIMENSION,
112 CHARS, and FINAL-CHAR) to Emacs' charset. */
113 int iso_charset_table
[ISO_MAX_DIMENSION
][ISO_MAX_CHARS
][ISO_MAX_FINAL
];
115 Lisp_Object Vcharset_map_path
;
117 Lisp_Object Vchar_unified_charset_table
;
119 /* Defined in chartab.c */
121 map_char_table_for_charset
P_ ((void (*c_function
) (Lisp_Object
, Lisp_Object
),
122 Lisp_Object function
, Lisp_Object table
,
123 Lisp_Object arg
, struct charset
*charset
,
124 unsigned from
, unsigned to
));
126 #define CODE_POINT_TO_INDEX(charset, code) \
127 ((charset)->code_linear_p \
128 ? (code) - (charset)->min_code \
129 : (((charset)->code_space_mask[(code) >> 24] & 0x8) \
130 && ((charset)->code_space_mask[((code) >> 16) & 0xFF] & 0x4) \
131 && ((charset)->code_space_mask[((code) >> 8) & 0xFF] & 0x2) \
132 && ((charset)->code_space_mask[(code) & 0xFF] & 0x1)) \
133 ? (((((code) >> 24) - (charset)->code_space[12]) \
134 * (charset)->code_space[11]) \
135 + (((((code) >> 16) & 0xFF) - (charset)->code_space[8]) \
136 * (charset)->code_space[7]) \
137 + (((((code) >> 8) & 0xFF) - (charset)->code_space[4]) \
138 * (charset)->code_space[3]) \
139 + (((code) & 0xFF) - (charset)->code_space[0]) \
140 - ((charset)->char_index_offset)) \
144 /* Convert the character index IDX to code-point CODE for CHARSET.
145 It is assumed that IDX is in a valid range. */
147 #define INDEX_TO_CODE_POINT(charset, idx) \
148 ((charset)->code_linear_p \
149 ? (idx) + (charset)->min_code \
150 : (idx += (charset)->char_index_offset, \
151 (((charset)->code_space[0] + (idx) % (charset)->code_space[2]) \
152 | (((charset)->code_space[4] \
153 + ((idx) / (charset)->code_space[3] % (charset)->code_space[6])) \
155 | (((charset)->code_space[8] \
156 + ((idx) / (charset)->code_space[7] % (charset)->code_space[10])) \
158 | (((charset)->code_space[12] + ((idx) / (charset)->code_space[11])) \
164 /* Set to 1 to warn that a charset map is loaded and thus a buffer
165 text and a string data may be relocated. */
166 int charset_map_loaded
;
168 struct charset_map_entries
174 struct charset_map_entries
*next
;
177 /* Load the mapping information for CHARSET from ENTRIES.
179 If CONTROL_FLAG is 0, setup CHARSET->min_char and CHARSET->max_char.
181 If CONTROL_FLAG is 1, setup CHARSET->min_char, CHARSET->max_char,
182 CHARSET->decoder, and CHARSET->encoder.
184 If CONTROL_FLAG is 2, setup CHARSET->deunifier and
185 Vchar_unify_table. If Vchar_unified_charset_table is non-nil,
189 load_charset_map (charset
, entries
, n_entries
, control_flag
)
190 struct charset
*charset
;
191 struct charset_map_entries
*entries
;
195 Lisp_Object vec
, table
;
196 unsigned max_code
= CHARSET_MAX_CODE (charset
);
197 int ascii_compatible_p
= charset
->ascii_compatible_p
;
198 int min_char
, max_char
, nonascii_min_char
;
200 unsigned char *fast_map
= charset
->fast_map
;
205 if (control_flag
> 0)
207 int n
= CODE_POINT_TO_INDEX (charset
, max_code
) + 1;
209 table
= Fmake_char_table (Qnil
, Qnil
);
210 if (control_flag
== 1)
211 vec
= Fmake_vector (make_number (n
), make_number (-1));
212 else if (! CHAR_TABLE_P (Vchar_unify_table
))
213 Vchar_unify_table
= Fmake_char_table (Qnil
, Qnil
);
215 charset_map_loaded
= 1;
218 min_char
= max_char
= entries
->entry
[0].c
;
219 nonascii_min_char
= MAX_CHAR
;
220 for (i
= 0; i
< n_entries
; i
++)
223 int from_index
, to_index
;
225 int idx
= i
% 0x10000;
227 if (i
> 0 && idx
== 0)
228 entries
= entries
->next
;
229 from
= entries
->entry
[idx
].from
;
230 to
= entries
->entry
[idx
].to
;
231 from_c
= entries
->entry
[idx
].c
;
232 from_index
= CODE_POINT_TO_INDEX (charset
, from
);
235 to_index
= from_index
;
240 to_index
= CODE_POINT_TO_INDEX (charset
, to
);
241 to_c
= from_c
+ (to_index
- from_index
);
243 if (from_index
< 0 || to_index
< 0)
246 if (control_flag
< 2)
252 else if (from_c
< min_char
)
254 if (ascii_compatible_p
)
256 if (! ASCII_BYTE_P (from_c
))
258 if (from_c
< nonascii_min_char
)
259 nonascii_min_char
= from_c
;
261 else if (! ASCII_BYTE_P (to_c
))
263 nonascii_min_char
= 0x80;
267 for (c
= from_c
; c
<= to_c
; c
++)
268 CHARSET_FAST_MAP_SET (c
, fast_map
);
270 if (control_flag
== 1)
272 unsigned code
= from
;
274 if (CHARSET_COMPACT_CODES_P (charset
))
277 ASET (vec
, from_index
, make_number (from_c
));
278 if (NILP (CHAR_TABLE_REF (table
, from_c
)))
279 CHAR_TABLE_SET (table
, from_c
, make_number (code
));
280 if (from_index
== to_index
)
282 from_index
++, from_c
++;
283 code
= INDEX_TO_CODE_POINT (charset
, from_index
);
286 for (; from_index
<= to_index
; from_index
++, from_c
++)
288 ASET (vec
, from_index
, make_number (from_c
));
289 if (NILP (CHAR_TABLE_REF (table
, from_c
)))
290 CHAR_TABLE_SET (table
, from_c
, make_number (from_index
));
296 unsigned code
= from
;
300 int c1
= DECODE_CHAR (charset
, code
);
304 CHAR_TABLE_SET (table
, from_c
, make_number (c1
));
305 CHAR_TABLE_SET (Vchar_unify_table
, c1
, make_number (from_c
));
306 if (CHAR_TABLE_P (Vchar_unified_charset_table
))
307 CHAR_TABLE_SET (Vchar_unified_charset_table
, c1
,
308 CHARSET_NAME (charset
));
310 if (from_index
== to_index
)
312 from_index
++, from_c
++;
313 code
= INDEX_TO_CODE_POINT (charset
, from_index
);
318 if (control_flag
< 2)
320 CHARSET_MIN_CHAR (charset
) = (ascii_compatible_p
321 ? nonascii_min_char
: min_char
);
322 CHARSET_MAX_CHAR (charset
) = max_char
;
323 if (control_flag
== 1)
325 CHARSET_DECODER (charset
) = vec
;
326 CHARSET_ENCODER (charset
) = table
;
330 CHARSET_DEUNIFIER (charset
) = table
;
334 /* Read a hexadecimal number (preceded by "0x") from the file FP while
335 paying attention to comment charcter '#'. */
337 static INLINE
unsigned
345 while ((c
= getc (fp
)) != EOF
)
349 while ((c
= getc (fp
)) != EOF
&& c
!= '\n');
353 if ((c
= getc (fp
)) == EOF
|| c
== 'x')
365 while ((c
= getc (fp
)) != EOF
&& isxdigit (c
))
367 | (c
<= '9' ? c
- '0' : c
<= 'F' ? c
- 'A' + 10 : c
- 'a' + 10));
369 while ((c
= getc (fp
)) != EOF
&& isdigit (c
))
370 n
= (n
* 10) + c
- '0';
377 /* Return a mapping vector for CHARSET loaded from MAPFILE.
378 Each line of MAPFILE has this form
380 where 0xAAAA is a code-point and 0xCCCC is the corresponding
381 character code, or this form
383 where 0xAAAA and 0xBBBB are code-points specifying a range, and
384 0xCCCC is the first character code of the range.
386 The returned vector has this form:
387 [ CODE1 CHAR1 CODE2 CHAR2 .... ]
388 where CODE1 is a code-point or a cons of code-points specifying a
391 extern void add_to_log
P_ ((char *, Lisp_Object
, Lisp_Object
));
394 load_charset_map_from_file (charset
, mapfile
, control_flag
)
395 struct charset
*charset
;
399 unsigned min_code
= CHARSET_MIN_CODE (charset
);
400 unsigned max_code
= CHARSET_MAX_CODE (charset
);
404 Lisp_Object suffixes
;
405 struct charset_map_entries
*head
, *entries
;
408 suffixes
= Fcons (build_string (".map"),
409 Fcons (build_string (".TXT"), Qnil
));
411 fd
= openp (Vcharset_map_path
, mapfile
, suffixes
, NULL
, Qnil
);
413 || ! (fp
= fdopen (fd
, "r")))
415 add_to_log ("Failure in loading charset map: %S", mapfile
, Qnil
);
419 head
= entries
= ((struct charset_map_entries
*)
420 alloca (sizeof (struct charset_map_entries
)));
429 from
= read_hex (fp
, &eof
);
432 if (getc (fp
) == '-')
433 to
= read_hex (fp
, &eof
);
436 c
= (int) read_hex (fp
, &eof
);
438 if (from
< min_code
|| to
> max_code
|| from
> to
|| c
> MAX_CHAR
)
441 if (n_entries
> 0 && (n_entries
% 0x10000) == 0)
443 entries
->next
= ((struct charset_map_entries
*)
444 alloca (sizeof (struct charset_map_entries
)));
445 entries
= entries
->next
;
447 idx
= n_entries
% 0x10000;
448 entries
->entry
[idx
].from
= from
;
449 entries
->entry
[idx
].to
= to
;
450 entries
->entry
[idx
].c
= c
;
456 load_charset_map (charset
, head
, n_entries
, control_flag
);
460 load_charset_map_from_vector (charset
, vec
, control_flag
)
461 struct charset
*charset
;
465 unsigned min_code
= CHARSET_MIN_CODE (charset
);
466 unsigned max_code
= CHARSET_MAX_CODE (charset
);
467 struct charset_map_entries
*head
, *entries
;
469 int len
= ASIZE (vec
);
474 add_to_log ("Failure in loading charset map: %V", vec
, Qnil
);
478 head
= entries
= ((struct charset_map_entries
*)
479 alloca (sizeof (struct charset_map_entries
)));
481 for (i
= 0; i
< len
; i
+= 2)
483 Lisp_Object val
, val2
;
495 from
= XFASTINT (val
);
496 to
= XFASTINT (val2
);
501 from
= to
= XFASTINT (val
);
503 val
= AREF (vec
, i
+ 1);
507 if (from
< min_code
|| to
> max_code
|| from
> to
|| c
> MAX_CHAR
)
510 if (n_entries
> 0 && (n_entries
% 0x10000) == 0)
512 entries
->next
= ((struct charset_map_entries
*)
513 alloca (sizeof (struct charset_map_entries
)));
514 entries
= entries
->next
;
516 idx
= n_entries
% 0x10000;
517 entries
->entry
[idx
].from
= from
;
518 entries
->entry
[idx
].to
= to
;
519 entries
->entry
[idx
].c
= c
;
523 load_charset_map (charset
, head
, n_entries
, control_flag
);
527 load_charset (charset
)
528 struct charset
*charset
;
530 if (CHARSET_METHOD (charset
) == CHARSET_METHOD_MAP_DEFERRED
)
534 map
= CHARSET_MAP (charset
);
536 load_charset_map_from_file (charset
, map
, 1);
538 load_charset_map_from_vector (charset
, map
, 1);
539 CHARSET_METHOD (charset
) = CHARSET_METHOD_MAP
;
544 DEFUN ("charsetp", Fcharsetp
, Scharsetp
, 1, 1, 0,
545 doc
: /* Return non-nil if and only if OBJECT is a charset.*/)
549 return (CHARSETP (object
) ? Qt
: Qnil
);
554 map_charset_chars (c_function
, function
, arg
,
556 void (*c_function
) P_ ((Lisp_Object
, Lisp_Object
));
557 Lisp_Object function
, arg
;
558 struct charset
*charset
;
564 if (CHARSET_METHOD (charset
) == CHARSET_METHOD_MAP_DEFERRED
)
565 load_charset (charset
);
567 partial
= (from
> CHARSET_MIN_CODE (charset
)
568 || to
< CHARSET_MAX_CODE (charset
));
570 if (CHARSET_UNIFIED_P (charset
)
571 && CHAR_TABLE_P (CHARSET_DEUNIFIER (charset
)))
573 map_char_table_for_charset (c_function
, function
,
574 CHARSET_DEUNIFIER (charset
), arg
,
575 partial
? charset
: NULL
, from
, to
);
578 if (CHARSET_METHOD (charset
) == CHARSET_METHOD_OFFSET
)
580 int from_idx
= CODE_POINT_TO_INDEX (charset
, from
);
581 int to_idx
= CODE_POINT_TO_INDEX (charset
, to
);
582 int from_c
= from_idx
+ CHARSET_CODE_OFFSET (charset
);
583 int to_c
= to_idx
+ CHARSET_CODE_OFFSET (charset
);
585 range
= Fcons (make_number (from_c
), make_number (to_c
));
587 (*c_function
) (arg
, range
);
589 call2 (function
, range
, arg
);
591 else if (CHARSET_METHOD (charset
) == CHARSET_METHOD_MAP
)
593 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset
)))
595 map_char_table_for_charset (c_function
, function
,
596 CHARSET_ENCODER (charset
), arg
,
597 partial
? charset
: NULL
, from
, to
);
599 else if (CHARSET_METHOD (charset
) == CHARSET_METHOD_SUBSET
)
601 Lisp_Object subset_info
;
604 subset_info
= CHARSET_SUBSET (charset
);
605 charset
= CHARSET_FROM_ID (XFASTINT (AREF (subset_info
, 0)));
606 offset
= XINT (AREF (subset_info
, 3));
608 if (from
< XFASTINT (AREF (subset_info
, 1)))
609 from
= XFASTINT (AREF (subset_info
, 1));
611 if (to
> XFASTINT (AREF (subset_info
, 2)))
612 to
= XFASTINT (AREF (subset_info
, 2));
613 map_charset_chars (c_function
, function
, arg
, charset
, from
, to
);
615 else /* i.e. CHARSET_METHOD_SUPERSET */
619 for (parents
= CHARSET_SUPERSET (charset
); CONSP (parents
);
620 parents
= XCDR (parents
))
623 unsigned this_from
, this_to
;
625 charset
= CHARSET_FROM_ID (XFASTINT (XCAR (XCAR (parents
))));
626 offset
= XINT (XCDR (XCAR (parents
)));
627 this_from
= from
- offset
;
628 this_to
= to
- offset
;
629 if (this_from
< CHARSET_MIN_CODE (charset
))
630 this_from
= CHARSET_MIN_CODE (charset
);
631 if (this_to
> CHARSET_MAX_CODE (charset
))
632 this_to
= CHARSET_MAX_CODE (charset
);
633 map_charset_chars (c_function
, function
, arg
, charset
,
639 DEFUN ("map-charset-chars", Fmap_charset_chars
, Smap_charset_chars
, 2, 5, 0,
640 doc
: /* Call FUNCTION for all characters in CHARSET.
641 FUNCTION is called with an argument RANGE and the optional 3rd
644 RANGE is a cons (FROM . TO), where FROM and TO indicate a range of
645 characters contained in CHARSET.
647 The optional 4th and 5th arguments FROM-CODE and TO-CODE specify the
648 range of code points of target characters. */)
649 (function
, charset
, arg
, from_code
, to_code
)
650 Lisp_Object function
, charset
, arg
, from_code
, to_code
;
655 CHECK_CHARSET_GET_CHARSET (charset
, cs
);
656 if (NILP (from_code
))
657 from
= CHARSET_MIN_CODE (cs
);
660 CHECK_NATNUM (from_code
);
661 from
= XINT (from_code
);
662 if (from
< CHARSET_MIN_CODE (cs
))
663 from
= CHARSET_MIN_CODE (cs
);
666 to
= CHARSET_MAX_CODE (cs
);
669 CHECK_NATNUM (to_code
);
671 if (to
> CHARSET_MAX_CODE (cs
))
672 to
= CHARSET_MAX_CODE (cs
);
674 map_charset_chars (NULL
, function
, arg
, cs
, from
, to
);
679 /* Define a charset according to the arguments. The Nth argument is
680 the Nth attribute of the charset (the last attribute `charset-id'
681 is not included). See the docstring of `define-charset' for the
684 DEFUN ("define-charset-internal", Fdefine_charset_internal
,
685 Sdefine_charset_internal
, charset_arg_max
, MANY
, 0,
686 doc
: /* For internal use only.
687 usage: (define-charset-internal ...) */)
692 /* Charset attr vector. */
696 struct Lisp_Hash_Table
*hash_table
= XHASH_TABLE (Vcharset_hash_table
);
698 struct charset charset
;
701 int new_definition_p
;
704 if (nargs
!= charset_arg_max
)
705 return Fsignal (Qwrong_number_of_arguments
,
706 Fcons (intern ("define-charset-internal"),
707 make_number (nargs
)));
709 attrs
= Fmake_vector (make_number (charset_attr_max
), Qnil
);
711 CHECK_SYMBOL (args
[charset_arg_name
]);
712 ASET (attrs
, charset_name
, args
[charset_arg_name
]);
714 val
= args
[charset_arg_code_space
];
715 for (i
= 0, dimension
= 0, nchars
= 1; i
< 4; i
++)
717 int min_byte
, max_byte
;
719 min_byte
= XINT (Faref (val
, make_number (i
* 2)));
720 max_byte
= XINT (Faref (val
, make_number (i
* 2 + 1)));
721 if (min_byte
< 0 || min_byte
> max_byte
|| max_byte
>= 256)
722 error ("Invalid :code-space value");
723 charset
.code_space
[i
* 4] = min_byte
;
724 charset
.code_space
[i
* 4 + 1] = max_byte
;
725 charset
.code_space
[i
* 4 + 2] = max_byte
- min_byte
+ 1;
726 nchars
*= charset
.code_space
[i
* 4 + 2];
727 charset
.code_space
[i
* 4 + 3] = nchars
;
732 val
= args
[charset_arg_dimension
];
734 charset
.dimension
= dimension
;
738 charset
.dimension
= XINT (val
);
739 if (charset
.dimension
< 1 || charset
.dimension
> 4)
740 args_out_of_range_3 (val
, make_number (1), make_number (4));
743 charset
.code_linear_p
744 = (charset
.dimension
== 1
745 || (charset
.code_space
[2] == 256
746 && (charset
.dimension
== 2
747 || (charset
.code_space
[6] == 256
748 && (charset
.dimension
== 3
749 || charset
.code_space
[10] == 256)))));
751 if (! charset
.code_linear_p
)
753 charset
.code_space_mask
= (unsigned char *) xmalloc (256);
754 bzero (charset
.code_space_mask
, 256);
755 for (i
= 0; i
< 4; i
++)
756 for (j
= charset
.code_space
[i
* 4]; j
<= charset
.code_space
[i
* 4 + 1];
758 charset
.code_space_mask
[j
] |= (1 << i
);
761 charset
.iso_chars_96
= charset
.code_space
[2] == 96;
763 charset
.min_code
= (charset
.code_space
[0]
764 | (charset
.code_space
[4] << 8)
765 | (charset
.code_space
[8] << 16)
766 | (charset
.code_space
[12] << 24));
767 charset
.max_code
= (charset
.code_space
[1]
768 | (charset
.code_space
[5] << 8)
769 | (charset
.code_space
[9] << 16)
770 | (charset
.code_space
[13] << 24));
771 charset
.char_index_offset
= 0;
773 val
= args
[charset_arg_min_code
];
783 CHECK_NUMBER_CAR (val
);
784 CHECK_NUMBER_CDR (val
);
785 code
= (XINT (XCAR (val
)) << 16) | (XINT (XCDR (val
)));
787 if (code
< charset
.min_code
788 || code
> charset
.max_code
)
789 args_out_of_range_3 (make_number (charset
.min_code
),
790 make_number (charset
.max_code
), val
);
791 charset
.char_index_offset
= CODE_POINT_TO_INDEX (&charset
, code
);
792 charset
.min_code
= code
;
795 val
= args
[charset_arg_max_code
];
805 CHECK_NUMBER_CAR (val
);
806 CHECK_NUMBER_CDR (val
);
807 code
= (XINT (XCAR (val
)) << 16) | (XINT (XCDR (val
)));
809 if (code
< charset
.min_code
810 || code
> charset
.max_code
)
811 args_out_of_range_3 (make_number (charset
.min_code
),
812 make_number (charset
.max_code
), val
);
813 charset
.max_code
= code
;
816 charset
.compact_codes_p
= charset
.max_code
< 0x1000000;
818 val
= args
[charset_arg_invalid_code
];
821 if (charset
.min_code
> 0)
822 charset
.invalid_code
= 0;
825 XSETINT (val
, charset
.max_code
+ 1);
826 if (XINT (val
) == charset
.max_code
+ 1)
827 charset
.invalid_code
= charset
.max_code
+ 1;
829 error ("Attribute :invalid-code must be specified");
835 charset
.invalid_code
= XFASTINT (val
);
838 val
= args
[charset_arg_iso_final
];
840 charset
.iso_final
= -1;
844 if (XINT (val
) < '0' || XINT (val
) > 127)
845 error ("Invalid iso-final-char: %d", XINT (val
));
846 charset
.iso_final
= XINT (val
);
849 val
= args
[charset_arg_iso_revision
];
851 charset
.iso_revision
= -1;
856 args_out_of_range (make_number (63), val
);
857 charset
.iso_revision
= XINT (val
);
860 val
= args
[charset_arg_emacs_mule_id
];
862 charset
.emacs_mule_id
= -1;
866 if ((XINT (val
) > 0 && XINT (val
) <= 128) || XINT (val
) >= 256)
867 error ("Invalid emacs-mule-id: %d", XINT (val
));
868 charset
.emacs_mule_id
= XINT (val
);
871 charset
.ascii_compatible_p
= ! NILP (args
[charset_arg_ascii_compatible_p
]);
873 charset
.supplementary_p
= ! NILP (args
[charset_arg_supplementary_p
]);
875 charset
.unified_p
= 0;
877 bzero (charset
.fast_map
, sizeof (charset
.fast_map
));
879 if (! NILP (args
[charset_arg_code_offset
]))
881 val
= args
[charset_arg_code_offset
];
884 charset
.method
= CHARSET_METHOD_OFFSET
;
885 charset
.code_offset
= XINT (val
);
887 i
= CODE_POINT_TO_INDEX (&charset
, charset
.min_code
);
888 charset
.min_char
= i
+ charset
.code_offset
;
889 i
= CODE_POINT_TO_INDEX (&charset
, charset
.max_code
);
890 charset
.max_char
= i
+ charset
.code_offset
;
891 if (charset
.max_char
> MAX_CHAR
)
892 error ("Unsupported max char: %d", charset
.max_char
);
894 i
= (charset
.min_char
>> 7) << 7;
895 for (; i
< 0x10000 && i
<= charset
.max_char
; i
+= 128)
896 CHARSET_FAST_MAP_SET (i
, charset
.fast_map
);
898 for (; i
<= charset
.max_char
; i
+= 0x1000)
899 CHARSET_FAST_MAP_SET (i
, charset
.fast_map
);
901 else if (! NILP (args
[charset_arg_map
]))
903 val
= args
[charset_arg_map
];
904 ASET (attrs
, charset_map
, val
);
906 load_charset_map_from_file (&charset
, val
, 0);
908 load_charset_map_from_vector (&charset
, val
, 0);
909 charset
.method
= CHARSET_METHOD_MAP_DEFERRED
;
911 else if (! NILP (args
[charset_arg_subset
]))
914 Lisp_Object parent_min_code
, parent_max_code
, parent_code_offset
;
915 struct charset
*parent_charset
;
917 val
= args
[charset_arg_subset
];
919 CHECK_CHARSET_GET_CHARSET (parent
, parent_charset
);
920 parent_min_code
= Fnth (make_number (1), val
);
921 CHECK_NATNUM (parent_min_code
);
922 parent_max_code
= Fnth (make_number (2), val
);
923 CHECK_NATNUM (parent_max_code
);
924 parent_code_offset
= Fnth (make_number (3), val
);
925 CHECK_NUMBER (parent_code_offset
);
926 val
= Fmake_vector (make_number (4), Qnil
);
927 ASET (val
, 0, make_number (parent_charset
->id
));
928 ASET (val
, 1, parent_min_code
);
929 ASET (val
, 2, parent_max_code
);
930 ASET (val
, 3, parent_code_offset
);
931 ASET (attrs
, charset_subset
, val
);
933 charset
.method
= CHARSET_METHOD_SUBSET
;
934 /* Here, we just copy the parent's fast_map. It's not accurate,
935 but at least it works for quickly detecting which character
936 DOESN'T belong to this charset. */
937 for (i
= 0; i
< 190; i
++)
938 charset
.fast_map
[i
] = parent_charset
->fast_map
[i
];
940 /* We also copy these for parents. */
941 charset
.min_char
= parent_charset
->min_char
;
942 charset
.max_char
= parent_charset
->max_char
;
944 else if (! NILP (args
[charset_arg_superset
]))
946 val
= args
[charset_arg_superset
];
947 charset
.method
= CHARSET_METHOD_SUPERSET
;
948 val
= Fcopy_sequence (val
);
949 ASET (attrs
, charset_superset
, val
);
951 charset
.min_char
= MAX_CHAR
;
952 charset
.max_char
= 0;
953 for (; ! NILP (val
); val
= Fcdr (val
))
955 Lisp_Object elt
, car_part
, cdr_part
;
957 struct charset
*this_charset
;
962 car_part
= XCAR (elt
);
963 cdr_part
= XCDR (elt
);
964 CHECK_CHARSET_GET_ID (car_part
, this_id
);
965 CHECK_NUMBER (cdr_part
);
966 offset
= XINT (cdr_part
);
970 CHECK_CHARSET_GET_ID (elt
, this_id
);
973 XSETCAR (val
, Fcons (make_number (this_id
), make_number (offset
)));
975 this_charset
= CHARSET_FROM_ID (this_id
);
976 if (charset
.min_char
> this_charset
->min_char
)
977 charset
.min_char
= this_charset
->min_char
;
978 if (charset
.max_char
< this_charset
->max_char
)
979 charset
.max_char
= this_charset
->max_char
;
980 for (i
= 0; i
< 190; i
++)
981 charset
.fast_map
[i
] |= this_charset
->fast_map
[i
];
985 error ("None of :code-offset, :map, :parents are specified");
987 val
= args
[charset_arg_unify_map
];
988 if (! NILP (val
) && !STRINGP (val
))
990 ASET (attrs
, charset_unify_map
, val
);
992 CHECK_LIST (args
[charset_arg_plist
]);
993 ASET (attrs
, charset_plist
, args
[charset_arg_plist
]);
995 charset
.hash_index
= hash_lookup (hash_table
, args
[charset_arg_name
],
997 if (charset
.hash_index
>= 0)
999 new_definition_p
= 0;
1000 id
= XFASTINT (CHARSET_SYMBOL_ID (args
[charset_arg_name
]));
1001 HASH_VALUE (hash_table
, charset
.hash_index
) = attrs
;
1005 charset
.hash_index
= hash_put (hash_table
, args
[charset_arg_name
], attrs
,
1007 if (charset_table_used
== charset_table_size
)
1009 struct charset
*new_table
1010 = (struct charset
*) xmalloc (sizeof (struct charset
)
1011 * (charset_table_size
+ 16));
1012 bcopy (charset_table
, new_table
,
1013 sizeof (struct charset
) * charset_table_size
);
1014 charset_table_size
+= 16;
1015 charset_table
= new_table
;
1017 id
= charset_table_used
++;
1018 new_definition_p
= 1;
1021 ASET (attrs
, charset_id
, make_number (id
));
1023 charset_table
[id
] = charset
;
1025 if (charset
.iso_final
>= 0)
1027 ISO_CHARSET_TABLE (charset
.dimension
, charset
.iso_chars_96
,
1028 charset
.iso_final
) = id
;
1029 if (new_definition_p
)
1030 Viso_2022_charset_list
= nconc2 (Viso_2022_charset_list
,
1031 Fcons (make_number (id
), Qnil
));
1032 if (ISO_CHARSET_TABLE (1, 0, 'J') == id
)
1033 charset_jisx0201_roman
= id
;
1034 else if (ISO_CHARSET_TABLE (2, 0, '@') == id
)
1035 charset_jisx0208_1978
= id
;
1036 else if (ISO_CHARSET_TABLE (2, 0, 'B') == id
)
1037 charset_jisx0208
= id
;
1040 if (charset
.emacs_mule_id
>= 0)
1042 emacs_mule_charset
[charset
.emacs_mule_id
] = CHARSET_FROM_ID (id
);
1043 if (charset
.emacs_mule_id
< 0xA0)
1044 emacs_mule_bytes
[charset
.emacs_mule_id
] = charset
.dimension
+ 1;
1046 emacs_mule_bytes
[charset
.emacs_mule_id
] = charset
.dimension
+ 2;
1047 if (new_definition_p
)
1048 Vemacs_mule_charset_list
= nconc2 (Vemacs_mule_charset_list
,
1049 Fcons (make_number (id
), Qnil
));
1052 if (new_definition_p
)
1054 Vcharset_list
= Fcons (args
[charset_arg_name
], Vcharset_list
);
1055 Vcharset_ordered_list
= nconc2 (Vcharset_ordered_list
,
1056 Fcons (make_number (id
), Qnil
));
1057 charset_ordered_list_tick
++;
1064 /* Same as Fdefine_charset_internal but arguments are more convenient
1065 to call from C (typically in syms_of_charset). This can define a
1066 charset of `offset' method only. Return the ID of the new
1070 define_charset_internal (name
, dimension
, code_space
, min_code
, max_code
,
1071 iso_final
, iso_revision
, emacs_mule_id
,
1072 ascii_compatible
, supplementary
,
1076 unsigned char *code_space
;
1077 unsigned min_code
, max_code
;
1078 int iso_final
, iso_revision
, emacs_mule_id
;
1079 int ascii_compatible
, supplementary
;
1082 Lisp_Object args
[charset_arg_max
];
1083 Lisp_Object plist
[14];
1087 args
[charset_arg_name
] = name
;
1088 args
[charset_arg_dimension
] = make_number (dimension
);
1089 val
= Fmake_vector (make_number (8), make_number (0));
1090 for (i
= 0; i
< 8; i
++)
1091 ASET (val
, i
, make_number (code_space
[i
]));
1092 args
[charset_arg_code_space
] = val
;
1093 args
[charset_arg_min_code
] = make_number (min_code
);
1094 args
[charset_arg_max_code
] = make_number (max_code
);
1095 args
[charset_arg_iso_final
]
1096 = (iso_final
< 0 ? Qnil
: make_number (iso_final
));
1097 args
[charset_arg_iso_revision
] = make_number (iso_revision
);
1098 args
[charset_arg_emacs_mule_id
]
1099 = (emacs_mule_id
< 0 ? Qnil
: make_number (emacs_mule_id
));
1100 args
[charset_arg_ascii_compatible_p
] = ascii_compatible
? Qt
: Qnil
;
1101 args
[charset_arg_supplementary_p
] = supplementary
? Qt
: Qnil
;
1102 args
[charset_arg_invalid_code
] = Qnil
;
1103 args
[charset_arg_code_offset
] = make_number (code_offset
);
1104 args
[charset_arg_map
] = Qnil
;
1105 args
[charset_arg_subset
] = Qnil
;
1106 args
[charset_arg_superset
] = Qnil
;
1107 args
[charset_arg_unify_map
] = Qnil
;
1109 plist
[0] = intern (":name");
1110 plist
[1] = args
[charset_arg_name
];
1111 plist
[2] = intern (":dimension");
1112 plist
[3] = args
[charset_arg_dimension
];
1113 plist
[4] = intern (":code-space");
1114 plist
[5] = args
[charset_arg_code_space
];
1115 plist
[6] = intern (":iso-final-char");
1116 plist
[7] = args
[charset_arg_iso_final
];
1117 plist
[8] = intern (":emacs-mule-id");
1118 plist
[9] = args
[charset_arg_emacs_mule_id
];
1119 plist
[10] = intern (":ascii-compatible-p");
1120 plist
[11] = args
[charset_arg_ascii_compatible_p
];
1121 plist
[12] = intern (":code-offset");
1122 plist
[13] = args
[charset_arg_code_offset
];
1124 args
[charset_arg_plist
] = Flist (14, plist
);
1125 Fdefine_charset_internal (charset_arg_max
, args
);
1127 return XINT (CHARSET_SYMBOL_ID (name
));
1131 DEFUN ("define-charset-alias", Fdefine_charset_alias
,
1132 Sdefine_charset_alias
, 2, 2, 0,
1133 doc
: /* Define ALIAS as an alias for charset CHARSET. */)
1135 Lisp_Object alias
, charset
;
1139 CHECK_CHARSET_GET_ATTR (charset
, attr
);
1140 Fputhash (alias
, attr
, Vcharset_hash_table
);
1141 Vcharset_list
= Fcons (alias
, Vcharset_list
);
1146 DEFUN ("unibyte-charset", Funibyte_charset
, Sunibyte_charset
, 0, 0, 0,
1147 doc
: /* Return the unibyte charset (set by `set-unibyte-charset'). */)
1150 return CHARSET_NAME (CHARSET_FROM_ID (charset_unibyte
));
1154 DEFUN ("set-unibyte-charset", Fset_unibyte_charset
, Sset_unibyte_charset
,
1156 doc
: /* Set the unibyte charset to CHARSET.
1157 This determines how unibyte/multibyte conversion is done. See also
1158 function `unibyte-charset'. */)
1160 Lisp_Object charset
;
1165 CHECK_CHARSET_GET_CHARSET (charset
, cs
);
1166 if (! cs
->ascii_compatible_p
1167 || cs
->dimension
!= 1)
1168 error ("Inappropriate unibyte charset: %s", SDATA (SYMBOL_NAME (charset
)));
1169 charset_unibyte
= cs
->id
;
1170 memset (unibyte_has_multibyte_table
, 1, 128);
1171 for (i
= 128; i
< 256; i
++)
1173 c
= DECODE_CHAR (cs
, i
);
1174 unibyte_to_multibyte_table
[i
] = (c
< 0 ? BYTE8_TO_CHAR (i
) : c
);
1175 unibyte_has_multibyte_table
[i
] = c
>= 0;
1182 DEFUN ("charset-plist", Fcharset_plist
, Scharset_plist
, 1, 1, 0,
1183 doc
: /* Return the property list of CHARSET. */)
1185 Lisp_Object charset
;
1189 CHECK_CHARSET_GET_ATTR (charset
, attrs
);
1190 return CHARSET_ATTR_PLIST (attrs
);
1194 DEFUN ("set-charset-plist", Fset_charset_plist
, Sset_charset_plist
, 2, 2, 0,
1195 doc
: /* Set CHARSET's property list to PLIST. */)
1197 Lisp_Object charset
, plist
;
1201 CHECK_CHARSET_GET_ATTR (charset
, attrs
);
1202 CHARSET_ATTR_PLIST (attrs
) = plist
;
1207 DEFUN ("unify-charset", Funify_charset
, Sunify_charset
, 1, 3, 0,
1208 doc
: /* Unify characters of CHARSET with Unicode.
1209 This means reading the relevant file and installing the table defined
1210 by CHARSET's `:unify-map' property.
1212 Optional second arg UNIFY-MAP is a file name string or a vector. It has
1213 the same meaning as the `:unify-map' attribute in the function
1214 `define-charset' (which see).
1216 Optional third argument DEUNIFY, if non-nil, means to de-unify CHARSET. */)
1217 (charset
, unify_map
, deunify
)
1218 Lisp_Object charset
, unify_map
, deunify
;
1223 CHECK_CHARSET_GET_ID (charset
, id
);
1224 cs
= CHARSET_FROM_ID (id
);
1225 if (CHARSET_METHOD (cs
) == CHARSET_METHOD_MAP_DEFERRED
)
1228 ? CHARSET_UNIFIED_P (cs
) && ! NILP (CHARSET_DEUNIFIER (cs
))
1229 : ! CHARSET_UNIFIED_P (cs
))
1232 CHARSET_UNIFIED_P (cs
) = 0;
1235 if (CHARSET_METHOD (cs
) != CHARSET_METHOD_OFFSET
)
1236 error ("Can't unify charset: %s", SDATA (SYMBOL_NAME (charset
)));
1237 if (NILP (unify_map
))
1238 unify_map
= CHARSET_UNIFY_MAP (cs
);
1239 if (STRINGP (unify_map
))
1240 load_charset_map_from_file (cs
, unify_map
, 2);
1241 else if (VECTORP (unify_map
))
1242 load_charset_map_from_vector (cs
, unify_map
, 2);
1243 else if (NILP (unify_map
))
1244 error ("No unify-map for charset");
1246 error ("Bad unify-map arg");
1247 CHARSET_UNIFIED_P (cs
) = 1;
1249 else if (CHAR_TABLE_P (Vchar_unify_table
))
1251 int min_code
= CHARSET_MIN_CODE (cs
);
1252 int max_code
= CHARSET_MAX_CODE (cs
);
1253 int min_char
= DECODE_CHAR (cs
, min_code
);
1254 int max_char
= DECODE_CHAR (cs
, max_code
);
1256 char_table_set_range (Vchar_unify_table
, min_char
, max_char
, Qnil
);
1262 DEFUN ("get-unused-iso-final-char", Fget_unused_iso_final_char
,
1263 Sget_unused_iso_final_char
, 2, 2, 0,
1265 Return an unused ISO final char for a charset of DIMENISION and CHARS.
1266 DIMENSION is the number of bytes to represent a character: 1 or 2.
1267 CHARS is the number of characters in a dimension: 94 or 96.
1269 This final char is for private use, thus the range is `0' (48) .. `?' (63).
1270 If there's no unused final char for the specified kind of charset,
1273 Lisp_Object dimension
, chars
;
1277 CHECK_NUMBER (dimension
);
1278 CHECK_NUMBER (chars
);
1279 if (XINT (dimension
) != 1 && XINT (dimension
) != 2 && XINT (dimension
) != 3)
1280 args_out_of_range_3 (dimension
, make_number (1), make_number (3));
1281 if (XINT (chars
) != 94 && XINT (chars
) != 96)
1282 args_out_of_range_3 (chars
, make_number (94), make_number (96));
1283 for (final_char
= '0'; final_char
<= '?'; final_char
++)
1284 if (ISO_CHARSET_TABLE (XINT (dimension
), XINT (chars
), final_char
) < 0)
1286 return (final_char
<= '?' ? make_number (final_char
) : Qnil
);
1290 check_iso_charset_parameter (dimension
, chars
, final_char
)
1291 Lisp_Object dimension
, chars
, final_char
;
1293 CHECK_NATNUM (dimension
);
1294 CHECK_NATNUM (chars
);
1295 CHECK_NATNUM (final_char
);
1297 if (XINT (dimension
) > 3)
1298 error ("Invalid DIMENSION %d, it should be 1, 2, or 3", XINT (dimension
));
1299 if (XINT (chars
) != 94 && XINT (chars
) != 96)
1300 error ("Invalid CHARS %d, it should be 94 or 96", XINT (chars
));
1301 if (XINT (final_char
) < '0' || XINT (final_char
) > '~')
1302 error ("Invalid FINAL-CHAR %c, it should be `0'..`~'", XINT (chars
));
1306 DEFUN ("declare-equiv-charset", Fdeclare_equiv_charset
, Sdeclare_equiv_charset
,
1308 doc
: /* Declare an equivalent charset for ISO-2022 decoding.
1310 On decoding by an ISO-2022 base coding system, when a charset
1311 specified by DIMENSION, CHARS, and FINAL-CHAR is designated, behave as
1312 if CHARSET is designated instead. */)
1313 (dimension
, chars
, final_char
, charset
)
1314 Lisp_Object dimension
, chars
, final_char
, charset
;
1319 CHECK_CHARSET_GET_ID (charset
, id
);
1320 check_iso_charset_parameter (dimension
, chars
, final_char
);
1321 chars_flag
= XINT (chars
) == 96;
1322 ISO_CHARSET_TABLE (XINT (dimension
), chars_flag
, XINT (final_char
)) = id
;
1327 /* Return information about charsets in the text at PTR of NBYTES
1328 bytes, which are NCHARS characters. The value is:
1330 0: Each character is represented by one byte. This is always
1331 true for a unibyte string. For a multibyte string, true if
1332 it contains only ASCII characters.
1334 1: No charsets other than ascii, control-1, and latin-1 are
1341 string_xstring_p (string
)
1344 const unsigned char *p
= SDATA (string
);
1345 const unsigned char *endp
= p
+ SBYTES (string
);
1347 if (SCHARS (string
) == SBYTES (string
))
1352 int c
= STRING_CHAR_ADVANCE (p
);
1361 /* Find charsets in the string at PTR of NCHARS and NBYTES.
1363 CHARSETS is a vector. If Nth element is non-nil, it means the
1364 charset whose id is N is already found.
1366 It may lookup a translation table TABLE if supplied. */
1369 find_charsets_in_text (ptr
, nchars
, nbytes
, charsets
, table
, multibyte
)
1370 const unsigned char *ptr
;
1371 EMACS_INT nchars
, nbytes
;
1372 Lisp_Object charsets
, table
;
1375 const unsigned char *pend
= ptr
+ nbytes
;
1377 if (nchars
== nbytes
)
1380 ASET (charsets
, charset_ascii
, Qt
);
1387 c
= translate_char (table
, c
);
1388 if (ASCII_BYTE_P (c
))
1389 ASET (charsets
, charset_ascii
, Qt
);
1391 ASET (charsets
, charset_eight_bit
, Qt
);
1398 int c
= STRING_CHAR_ADVANCE (ptr
);
1399 struct charset
*charset
;
1402 c
= translate_char (table
, c
);
1403 charset
= CHAR_CHARSET (c
);
1404 ASET (charsets
, CHARSET_ID (charset
), Qt
);
1409 DEFUN ("find-charset-region", Ffind_charset_region
, Sfind_charset_region
,
1411 doc
: /* Return a list of charsets in the region between BEG and END.
1412 BEG and END are buffer positions.
1413 Optional arg TABLE if non-nil is a translation table to look up.
1415 If the current buffer is unibyte, the returned list may contain
1416 only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
1418 Lisp_Object beg
, end
, table
;
1420 Lisp_Object charsets
;
1421 EMACS_INT from
, from_byte
, to
, stop
, stop_byte
;
1424 int multibyte
= ! NILP (current_buffer
->enable_multibyte_characters
);
1426 validate_region (&beg
, &end
);
1427 from
= XFASTINT (beg
);
1428 stop
= to
= XFASTINT (end
);
1430 if (from
< GPT
&& GPT
< to
)
1433 stop_byte
= GPT_BYTE
;
1436 stop_byte
= CHAR_TO_BYTE (stop
);
1438 from_byte
= CHAR_TO_BYTE (from
);
1440 charsets
= Fmake_vector (make_number (charset_table_used
), Qnil
);
1443 find_charsets_in_text (BYTE_POS_ADDR (from_byte
), stop
- from
,
1444 stop_byte
- from_byte
, charsets
, table
,
1448 from
= stop
, from_byte
= stop_byte
;
1449 stop
= to
, stop_byte
= CHAR_TO_BYTE (stop
);
1456 for (i
= charset_table_used
- 1; i
>= 0; i
--)
1457 if (!NILP (AREF (charsets
, i
)))
1458 val
= Fcons (CHARSET_NAME (charset_table
+ i
), val
);
1462 DEFUN ("find-charset-string", Ffind_charset_string
, Sfind_charset_string
,
1464 doc
: /* Return a list of charsets in STR.
1465 Optional arg TABLE if non-nil is a translation table to look up.
1467 If STR is unibyte, the returned list may contain
1468 only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
1470 Lisp_Object str
, table
;
1472 Lisp_Object charsets
;
1478 charsets
= Fmake_vector (make_number (charset_table_used
), Qnil
);
1479 find_charsets_in_text (SDATA (str
), SCHARS (str
), SBYTES (str
),
1481 STRING_MULTIBYTE (str
));
1483 for (i
= charset_table_used
- 1; i
>= 0; i
--)
1484 if (!NILP (AREF (charsets
, i
)))
1485 val
= Fcons (CHARSET_NAME (charset_table
+ i
), val
);
1491 /* Return a character correponding to the code-point CODE of
1495 decode_char (charset
, code
)
1496 struct charset
*charset
;
1500 enum charset_method method
= CHARSET_METHOD (charset
);
1502 if (code
< CHARSET_MIN_CODE (charset
) || code
> CHARSET_MAX_CODE (charset
))
1505 if (method
== CHARSET_METHOD_MAP_DEFERRED
)
1507 load_charset (charset
);
1508 method
= CHARSET_METHOD (charset
);
1511 if (method
== CHARSET_METHOD_SUBSET
)
1513 Lisp_Object subset_info
;
1515 subset_info
= CHARSET_SUBSET (charset
);
1516 charset
= CHARSET_FROM_ID (XFASTINT (AREF (subset_info
, 0)));
1517 code
-= XINT (AREF (subset_info
, 3));
1518 if (code
< XFASTINT (AREF (subset_info
, 1))
1519 || code
> XFASTINT (AREF (subset_info
, 2)))
1522 c
= DECODE_CHAR (charset
, code
);
1524 else if (method
== CHARSET_METHOD_SUPERSET
)
1526 Lisp_Object parents
;
1528 parents
= CHARSET_SUPERSET (charset
);
1530 for (; CONSP (parents
); parents
= XCDR (parents
))
1532 int id
= XINT (XCAR (XCAR (parents
)));
1533 int code_offset
= XINT (XCDR (XCAR (parents
)));
1534 unsigned this_code
= code
- code_offset
;
1536 charset
= CHARSET_FROM_ID (id
);
1537 if ((c
= DECODE_CHAR (charset
, this_code
)) >= 0)
1543 char_index
= CODE_POINT_TO_INDEX (charset
, code
);
1547 if (method
== CHARSET_METHOD_MAP
)
1549 Lisp_Object decoder
;
1551 decoder
= CHARSET_DECODER (charset
);
1552 if (! VECTORP (decoder
))
1554 c
= XINT (AREF (decoder
, char_index
));
1558 c
= char_index
+ CHARSET_CODE_OFFSET (charset
);
1562 if (CHARSET_UNIFIED_P (charset
)
1565 MAYBE_UNIFY_CHAR (c
);
1571 /* Variable used temporarily by the macro ENCODE_CHAR. */
1572 Lisp_Object charset_work
;
1574 /* Return a code-point of CHAR in CHARSET. If CHAR doesn't belong to
1575 CHARSET, return CHARSET_INVALID_CODE (CHARSET). If STRICT is true,
1576 use CHARSET's strict_max_char instead of max_char. */
1579 encode_char (charset
, c
)
1580 struct charset
*charset
;
1584 enum charset_method method
= CHARSET_METHOD (charset
);
1586 if (CHARSET_UNIFIED_P (charset
))
1588 Lisp_Object deunifier
, deunified
;
1590 deunifier
= CHARSET_DEUNIFIER (charset
);
1591 if (! CHAR_TABLE_P (deunifier
))
1593 Funify_charset (CHARSET_NAME (charset
), Qnil
, Qnil
);
1594 deunifier
= CHARSET_DEUNIFIER (charset
);
1596 deunified
= CHAR_TABLE_REF (deunifier
, c
);
1597 if (! NILP (deunified
))
1598 c
= XINT (deunified
);
1601 if (method
== CHARSET_METHOD_SUBSET
)
1603 Lisp_Object subset_info
;
1604 struct charset
*this_charset
;
1606 subset_info
= CHARSET_SUBSET (charset
);
1607 this_charset
= CHARSET_FROM_ID (XFASTINT (AREF (subset_info
, 0)));
1608 code
= ENCODE_CHAR (this_charset
, c
);
1609 if (code
== CHARSET_INVALID_CODE (this_charset
)
1610 || code
< XFASTINT (AREF (subset_info
, 1))
1611 || code
> XFASTINT (AREF (subset_info
, 2)))
1612 return CHARSET_INVALID_CODE (charset
);
1613 code
+= XINT (AREF (subset_info
, 3));
1617 if (method
== CHARSET_METHOD_SUPERSET
)
1619 Lisp_Object parents
;
1621 parents
= CHARSET_SUPERSET (charset
);
1622 for (; CONSP (parents
); parents
= XCDR (parents
))
1624 int id
= XINT (XCAR (XCAR (parents
)));
1625 int code_offset
= XINT (XCDR (XCAR (parents
)));
1626 struct charset
*this_charset
= CHARSET_FROM_ID (id
);
1628 code
= ENCODE_CHAR (this_charset
, c
);
1629 if (code
!= CHARSET_INVALID_CODE (this_charset
))
1630 return code
+ code_offset
;
1632 return CHARSET_INVALID_CODE (charset
);
1635 if (! CHARSET_FAST_MAP_REF ((c
), charset
->fast_map
)
1636 || c
< CHARSET_MIN_CHAR (charset
) || c
> CHARSET_MAX_CHAR (charset
))
1637 return CHARSET_INVALID_CODE (charset
);
1639 if (method
== CHARSET_METHOD_MAP_DEFERRED
)
1641 load_charset (charset
);
1642 method
= CHARSET_METHOD (charset
);
1645 if (method
== CHARSET_METHOD_MAP
)
1647 Lisp_Object encoder
;
1650 encoder
= CHARSET_ENCODER (charset
);
1651 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset
)))
1652 return CHARSET_INVALID_CODE (charset
);
1653 val
= CHAR_TABLE_REF (encoder
, c
);
1655 return CHARSET_INVALID_CODE (charset
);
1657 if (! CHARSET_COMPACT_CODES_P (charset
))
1658 code
= INDEX_TO_CODE_POINT (charset
, code
);
1660 else /* method == CHARSET_METHOD_OFFSET */
1662 code
= c
- CHARSET_CODE_OFFSET (charset
);
1663 code
= INDEX_TO_CODE_POINT (charset
, code
);
1670 DEFUN ("decode-char", Fdecode_char
, Sdecode_char
, 2, 3, 0,
1671 doc
: /* Decode the pair of CHARSET and CODE-POINT into a character.
1672 Return nil if CODE-POINT is not valid in CHARSET.
1674 CODE-POINT may be a cons (HIGHER-16-BIT-VALUE . LOWER-16-BIT-VALUE).
1676 Optional argument RESTRICTION specifies a way to map the pair of CCS
1677 and CODE-POINT to a chracter. Currently not supported and just ignored. */)
1678 (charset
, code_point
, restriction
)
1679 Lisp_Object charset
, code_point
, restriction
;
1683 struct charset
*charsetp
;
1685 CHECK_CHARSET_GET_ID (charset
, id
);
1686 if (CONSP (code_point
))
1688 CHECK_NATNUM_CAR (code_point
);
1689 CHECK_NATNUM_CDR (code_point
);
1690 code
= (XINT (XCAR (code_point
)) << 16) | (XINT (XCDR (code_point
)));
1694 CHECK_NATNUM (code_point
);
1695 code
= XINT (code_point
);
1697 charsetp
= CHARSET_FROM_ID (id
);
1698 c
= DECODE_CHAR (charsetp
, code
);
1699 return (c
>= 0 ? make_number (c
) : Qnil
);
1703 DEFUN ("encode-char", Fencode_char
, Sencode_char
, 2, 3, 0,
1704 doc
: /* Encode the character CH into a code-point of CHARSET.
1705 Return nil if CHARSET doesn't include CH.
1707 Optional argument RESTRICTION specifies a way to map CHAR to a
1708 code-point in CCS. Currently not supported and just ignored. */)
1709 (ch
, charset
, restriction
)
1710 Lisp_Object ch
, charset
, restriction
;
1714 struct charset
*charsetp
;
1716 CHECK_CHARSET_GET_ID (charset
, id
);
1718 charsetp
= CHARSET_FROM_ID (id
);
1719 code
= ENCODE_CHAR (charsetp
, XINT (ch
));
1720 if (code
== CHARSET_INVALID_CODE (charsetp
))
1722 if (code
> 0x7FFFFFF)
1723 return Fcons (make_number (code
>> 16), make_number (code
& 0xFFFF));
1724 return make_number (code
);
1728 DEFUN ("make-char", Fmake_char
, Smake_char
, 1, 5, 0,
1730 /* Return a character of CHARSET whose position codes are CODEn.
1732 CODE1 through CODE4 are optional, but if you don't supply sufficient
1733 position codes, it is assumed that the minimum code in each dimension
1735 (charset
, code1
, code2
, code3
, code4
)
1736 Lisp_Object charset
, code1
, code2
, code3
, code4
;
1739 struct charset
*charsetp
;
1743 CHECK_CHARSET_GET_ID (charset
, id
);
1744 charsetp
= CHARSET_FROM_ID (id
);
1746 dimension
= CHARSET_DIMENSION (charsetp
);
1748 code
= (CHARSET_ASCII_COMPATIBLE_P (charsetp
)
1749 ? 0 : CHARSET_MIN_CODE (charsetp
));
1752 CHECK_NATNUM (code1
);
1753 if (XFASTINT (code1
) >= 0x100)
1754 args_out_of_range (make_number (0xFF), code1
);
1755 code
= XFASTINT (code1
);
1761 code
|= charsetp
->code_space
[(dimension
- 2) * 4];
1764 CHECK_NATNUM (code2
);
1765 if (XFASTINT (code2
) >= 0x100)
1766 args_out_of_range (make_number (0xFF), code2
);
1767 code
|= XFASTINT (code2
);
1774 code
|= charsetp
->code_space
[(dimension
- 3) * 4];
1777 CHECK_NATNUM (code3
);
1778 if (XFASTINT (code3
) >= 0x100)
1779 args_out_of_range (make_number (0xFF), code3
);
1780 code
|= XFASTINT (code3
);
1787 code
|= charsetp
->code_space
[0];
1790 CHECK_NATNUM (code4
);
1791 if (XFASTINT (code4
) >= 0x100)
1792 args_out_of_range (make_number (0xFF), code4
);
1793 code
|= XFASTINT (code4
);
1800 if (CHARSET_ISO_FINAL (charsetp
) >= 0)
1802 c
= DECODE_CHAR (charsetp
, code
);
1804 error ("Invalid code(s)");
1805 return make_number (c
);
1809 /* Return the first charset in CHARSET_LIST that contains C.
1810 CHARSET_LIST is a list of charset IDs. If it is nil, use
1811 Vcharset_ordered_list. */
1814 char_charset (c
, charset_list
, code_return
)
1816 Lisp_Object charset_list
;
1817 unsigned *code_return
;
1819 if (NILP (charset_list
))
1820 charset_list
= Vcharset_ordered_list
;
1822 while (CONSP (charset_list
))
1824 struct charset
*charset
= CHARSET_FROM_ID (XINT (XCAR (charset_list
)));
1825 unsigned code
= ENCODE_CHAR (charset
, c
);
1827 if (code
!= CHARSET_INVALID_CODE (charset
))
1830 *code_return
= code
;
1833 charset_list
= XCDR (charset_list
);
1839 DEFUN ("split-char", Fsplit_char
, Ssplit_char
, 1, 1, 0,
1841 /*Return list of charset and one to four position-codes of CHAR.
1842 The charset is decided by the current priority order of charsets.
1843 A position-code is a byte value of each dimension of the code-point of
1844 CHAR in the charset. */)
1848 struct charset
*charset
;
1853 CHECK_CHARACTER (ch
);
1855 charset
= CHAR_CHARSET (c
);
1858 code
= ENCODE_CHAR (charset
, c
);
1859 if (code
== CHARSET_INVALID_CODE (charset
))
1861 dimension
= CHARSET_DIMENSION (charset
);
1862 for (val
= Qnil
; dimension
> 0; dimension
--)
1864 val
= Fcons (make_number (code
& 0xFF), val
);
1867 return Fcons (CHARSET_NAME (charset
), val
);
1871 DEFUN ("char-charset", Fchar_charset
, Schar_charset
, 1, 1, 0,
1872 doc
: /* Return the charset of highest priority that contains CH. */)
1876 struct charset
*charset
;
1878 CHECK_CHARACTER (ch
);
1879 charset
= CHAR_CHARSET (XINT (ch
));
1880 return (CHARSET_NAME (charset
));
1884 DEFUN ("charset-after", Fcharset_after
, Scharset_after
, 0, 1, 0,
1886 Return charset of a character in the current buffer at position POS.
1887 If POS is nil, it defauls to the current point.
1888 If POS is out of range, the value is nil. */)
1893 struct charset
*charset
;
1895 ch
= Fchar_after (pos
);
1896 if (! INTEGERP (ch
))
1898 charset
= CHAR_CHARSET (XINT (ch
));
1899 return (CHARSET_NAME (charset
));
1903 DEFUN ("iso-charset", Fiso_charset
, Siso_charset
, 3, 3, 0,
1905 Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR.
1907 ISO 2022's designation sequence (escape sequence) distinguishes charsets
1908 by their DIMENSION, CHARS, and FINAL-CHAR,
1909 where as Emacs distinguishes them by charset symbol.
1910 See the documentation of the function `charset-info' for the meanings of
1911 DIMENSION, CHARS, and FINAL-CHAR. */)
1912 (dimension
, chars
, final_char
)
1913 Lisp_Object dimension
, chars
, final_char
;
1918 check_iso_charset_parameter (dimension
, chars
, final_char
);
1919 chars_flag
= XFASTINT (chars
) == 96;
1920 id
= ISO_CHARSET_TABLE (XFASTINT (dimension
), chars_flag
,
1921 XFASTINT (final_char
));
1922 return (id
>= 0 ? CHARSET_NAME (CHARSET_FROM_ID (id
)) : Qnil
);
1926 DEFUN ("clear-charset-maps", Fclear_charset_maps
, Sclear_charset_maps
,
1929 Clear encoder and decoder of charsets that are loaded from mapfiles. */)
1933 struct charset
*charset
;
1936 for (i
= 0; i
< charset_table_used
; i
++)
1938 charset
= CHARSET_FROM_ID (i
);
1939 attrs
= CHARSET_ATTRIBUTES (charset
);
1941 if (CHARSET_METHOD (charset
) == CHARSET_METHOD_MAP
)
1943 CHARSET_ATTR_DECODER (attrs
) = Qnil
;
1944 CHARSET_ATTR_ENCODER (attrs
) = Qnil
;
1945 CHARSET_METHOD (charset
) = CHARSET_METHOD_MAP_DEFERRED
;
1948 if (CHARSET_UNIFIED_P (charset
))
1949 CHARSET_ATTR_DEUNIFIER (attrs
) = Qnil
;
1952 if (CHAR_TABLE_P (Vchar_unified_charset_table
))
1954 Foptimize_char_table (Vchar_unified_charset_table
);
1955 Vchar_unify_table
= Vchar_unified_charset_table
;
1956 Vchar_unified_charset_table
= Qnil
;
1962 DEFUN ("charset-priority-list", Fcharset_priority_list
,
1963 Scharset_priority_list
, 0, 1, 0,
1964 doc
: /* Return the list of charsets ordered by priority.
1965 HIGHESTP non-nil means just return the highest priority one. */)
1967 Lisp_Object highestp
;
1969 Lisp_Object val
= Qnil
, list
= Vcharset_ordered_list
;
1971 if (!NILP (highestp
))
1972 return CHARSET_NAME (CHARSET_FROM_ID (XINT (Fcar (list
))));
1974 while (!NILP (list
))
1976 val
= Fcons (CHARSET_NAME (CHARSET_FROM_ID (XINT (XCAR (list
)))), val
);
1979 return Fnreverse (val
);
1982 DEFUN ("set-charset-priority", Fset_charset_priority
, Sset_charset_priority
,
1984 doc
: /* Assign higher priority to the charsets given as arguments.
1985 usage: (set-charset-priority &rest charsets) */)
1990 Lisp_Object new_head
, old_list
, arglist
[2];
1991 Lisp_Object list_2022
, list_emacs_mule
;
1994 old_list
= Fcopy_sequence (Vcharset_ordered_list
);
1996 for (i
= 0; i
< nargs
; i
++)
1998 CHECK_CHARSET_GET_ID (args
[i
], id
);
1999 if (! NILP (Fmemq (make_number (id
), old_list
)))
2001 old_list
= Fdelq (make_number (id
), old_list
);
2002 new_head
= Fcons (make_number (id
), new_head
);
2005 arglist
[0] = Fnreverse (new_head
);
2006 arglist
[1] = old_list
;
2007 Vcharset_ordered_list
= Fnconc (2, arglist
);
2008 charset_ordered_list_tick
++;
2010 for (old_list
= Vcharset_ordered_list
, list_2022
= list_emacs_mule
= Qnil
;
2011 CONSP (old_list
); old_list
= XCDR (old_list
))
2013 if (! NILP (Fmemq (XCAR (old_list
), Viso_2022_charset_list
)))
2014 list_2022
= Fcons (XCAR (old_list
), list_2022
);
2015 if (! NILP (Fmemq (XCAR (old_list
), Vemacs_mule_charset_list
)))
2016 list_emacs_mule
= Fcons (XCAR (old_list
), list_emacs_mule
);
2018 Viso_2022_charset_list
= Fnreverse (list_2022
);
2019 Vemacs_mule_charset_list
= Fnreverse (list_emacs_mule
);
2024 DEFUN ("charset-id-internal", Fcharset_id_internal
, Scharset_id_internal
,
2026 doc
: /* Internal use only.
2027 Return charset identification number of CHARSET. */)
2029 Lisp_Object charset
;
2033 CHECK_CHARSET_GET_ID (charset
, id
);
2034 return make_number (id
);
2042 = Fcons (Fexpand_file_name (build_string ("charsets"), Vdata_directory
),
2048 init_charset_once ()
2052 for (i
= 0; i
< ISO_MAX_DIMENSION
; i
++)
2053 for (j
= 0; j
< ISO_MAX_CHARS
; j
++)
2054 for (k
= 0; k
< ISO_MAX_FINAL
; k
++)
2055 iso_charset_table
[i
][j
][k
] = -1;
2057 for (i
= 0; i
< 256; i
++)
2058 emacs_mule_charset
[i
] = NULL
;
2060 charset_jisx0201_roman
= -1;
2061 charset_jisx0208_1978
= -1;
2062 charset_jisx0208
= -1;
2064 for (i
= 0; i
< 128; i
++)
2065 unibyte_to_multibyte_table
[i
] = i
;
2066 for (; i
< 256; i
++)
2067 unibyte_to_multibyte_table
[i
] = BYTE8_TO_CHAR (i
);
2075 DEFSYM (Qcharsetp
, "charsetp");
2077 DEFSYM (Qascii
, "ascii");
2078 DEFSYM (Qunicode
, "unicode");
2079 DEFSYM (Qeight_bit
, "eight-bit");
2080 DEFSYM (Qiso_8859_1
, "iso-8859-1");
2085 staticpro (&Vcharset_ordered_list
);
2086 Vcharset_ordered_list
= Qnil
;
2088 staticpro (&Viso_2022_charset_list
);
2089 Viso_2022_charset_list
= Qnil
;
2091 staticpro (&Vemacs_mule_charset_list
);
2092 Vemacs_mule_charset_list
= Qnil
;
2094 staticpro (&Vcharset_hash_table
);
2096 Lisp_Object args
[2];
2099 Vcharset_hash_table
= Fmake_hash_table (2, args
);
2102 charset_table_size
= 128;
2103 charset_table
= ((struct charset
*)
2104 xmalloc (sizeof (struct charset
) * charset_table_size
));
2105 charset_table_used
= 0;
2107 staticpro (&Vchar_unified_charset_table
);
2108 Vchar_unified_charset_table
= Fmake_char_table (Qnil
, make_number (-1));
2110 defsubr (&Scharsetp
);
2111 defsubr (&Smap_charset_chars
);
2112 defsubr (&Sdefine_charset_internal
);
2113 defsubr (&Sdefine_charset_alias
);
2114 defsubr (&Sunibyte_charset
);
2115 defsubr (&Sset_unibyte_charset
);
2116 defsubr (&Scharset_plist
);
2117 defsubr (&Sset_charset_plist
);
2118 defsubr (&Sunify_charset
);
2119 defsubr (&Sget_unused_iso_final_char
);
2120 defsubr (&Sdeclare_equiv_charset
);
2121 defsubr (&Sfind_charset_region
);
2122 defsubr (&Sfind_charset_string
);
2123 defsubr (&Sdecode_char
);
2124 defsubr (&Sencode_char
);
2125 defsubr (&Ssplit_char
);
2126 defsubr (&Smake_char
);
2127 defsubr (&Schar_charset
);
2128 defsubr (&Scharset_after
);
2129 defsubr (&Siso_charset
);
2130 defsubr (&Sclear_charset_maps
);
2131 defsubr (&Scharset_priority_list
);
2132 defsubr (&Sset_charset_priority
);
2133 defsubr (&Scharset_id_internal
);
2135 DEFVAR_LISP ("charset-map-path", &Vcharset_map_path
,
2136 doc
: /* *Lisp of directories to search for charset map files. */);
2137 Vcharset_map_path
= Qnil
;
2139 DEFVAR_LISP ("charset-list", &Vcharset_list
,
2140 doc
: /* List of all charsets ever defined. */);
2141 Vcharset_list
= Qnil
;
2144 = define_charset_internal (Qascii
, 1, "\x00\x7F\x00\x00\x00\x00",
2145 0, 127, 'B', -1, 0, 1, 0, 0);
2147 = define_charset_internal (Qiso_8859_1
, 1, "\x00\xFF\x00\x00\x00\x00",
2148 0, 255, -1, -1, -1, 1, 0, 0);
2150 = define_charset_internal (Qunicode
, 3, "\x00\xFF\x00\xFF\x00\x10",
2151 0, MAX_UNICODE_CHAR
, -1, 0, -1, 1, 0, 0);
2153 = define_charset_internal (Qeight_bit
, 1, "\x80\xFF\x00\x00\x00\x00",
2154 128, 255, -1, 0, -1, 0, 0,
2155 MAX_5_BYTE_CHAR
+ 1);
2160 /* arch-tag: 66a89b8d-4c28-47d3-9ca1-56f78440d69f
2161 (do not change this comment) */