1 /* Basic character set support.
2 Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007,
3 2008, 2009, 2010 Free Software Foundation, Inc.
4 Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
5 2005, 2006, 2007, 2008, 2009, 2010
6 National Institute of Advanced Industrial Science and Technology (AIST)
7 Registration Number H14PRO021
9 Copyright (C) 2003, 2004
10 National Institute of Advanced Industrial Science and Technology (AIST)
11 Registration Number H13PRO009
13 This file is part of GNU Emacs.
15 GNU Emacs is free software: you can redistribute it and/or modify
16 it under the terms of the GNU General Public License as published by
17 the Free Software Foundation, either version 3 of the License, or
18 (at your option) any later version.
20 GNU Emacs is distributed in the hope that it will be useful,
21 but WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 GNU General Public License for more details.
25 You should have received a copy of the GNU General Public License
26 along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
33 #include <sys/types.h>
36 #include "character.h"
42 /*** GENERAL NOTES on CODED CHARACTER SETS (CHARSETS) ***
44 A coded character set ("charset" hereafter) is a meaningful
45 collection (i.e. language, culture, functionality, etc.) of
46 characters. Emacs handles multiple charsets at once. In Emacs Lisp
47 code, a charset is represented by a symbol. In C code, a charset is
48 represented by its ID number or by a pointer to a struct charset.
50 The actual information about each charset is stored in two places.
51 Lispy information is stored in the hash table Vcharset_hash_table as
52 a vector (charset attributes). The other information is stored in
53 charset_table as a struct charset.
57 /* List of all charsets. This variable is used only from Emacs
59 Lisp_Object Vcharset_list
;
61 /* Hash table that contains attributes of each charset. Keys are
62 charset symbols, and values are vectors of charset attributes. */
63 Lisp_Object Vcharset_hash_table
;
65 /* Table of struct charset. */
66 struct charset
*charset_table
;
68 static int charset_table_size
;
69 static int charset_table_used
;
71 Lisp_Object Qcharsetp
;
73 /* Special charset symbols. */
75 Lisp_Object Qeight_bit
;
76 Lisp_Object Qiso_8859_1
;
80 /* The corresponding charsets. */
82 int charset_eight_bit
;
83 int charset_iso_8859_1
;
87 /* The other special charsets. */
88 int charset_jisx0201_roman
;
89 int charset_jisx0208_1978
;
93 /* Value of charset attribute `charset-iso-plane'. */
96 /* Charset of unibyte characters. */
99 /* List of charsets ordered by the priority. */
100 Lisp_Object Vcharset_ordered_list
;
102 /* Sub-list of Vcharset_ordered_list that contains all non-preferred
104 Lisp_Object Vcharset_non_preferred_head
;
106 /* Incremented everytime we change Vcharset_ordered_list. This is
107 unsigned short so that it fits in Lisp_Int and never matches
109 unsigned short charset_ordered_list_tick
;
111 /* List of iso-2022 charsets. */
112 Lisp_Object Viso_2022_charset_list
;
114 /* List of emacs-mule charsets. */
115 Lisp_Object Vemacs_mule_charset_list
;
117 struct charset
*emacs_mule_charset
[256];
119 /* Mapping table from ISO2022's charset (specified by DIMENSION,
120 CHARS, and FINAL-CHAR) to Emacs' charset. */
121 int iso_charset_table
[ISO_MAX_DIMENSION
][ISO_MAX_CHARS
][ISO_MAX_FINAL
];
123 Lisp_Object Vcharset_map_path
;
125 /* If nonzero, don't load charset maps. */
126 int inhibit_load_charset_map
;
128 Lisp_Object Vcurrent_iso639_language
;
130 /* Defined in chartab.c */
132 map_char_table_for_charset (void (*c_function
) (Lisp_Object
, Lisp_Object
),
133 Lisp_Object function
, Lisp_Object table
,
134 Lisp_Object arg
, struct charset
*charset
,
135 unsigned from
, unsigned to
);
137 #define CODE_POINT_TO_INDEX(charset, code) \
138 ((charset)->code_linear_p \
139 ? (code) - (charset)->min_code \
140 : (((charset)->code_space_mask[(code) >> 24] & 0x8) \
141 && ((charset)->code_space_mask[((code) >> 16) & 0xFF] & 0x4) \
142 && ((charset)->code_space_mask[((code) >> 8) & 0xFF] & 0x2) \
143 && ((charset)->code_space_mask[(code) & 0xFF] & 0x1)) \
144 ? (((((code) >> 24) - (charset)->code_space[12]) \
145 * (charset)->code_space[11]) \
146 + (((((code) >> 16) & 0xFF) - (charset)->code_space[8]) \
147 * (charset)->code_space[7]) \
148 + (((((code) >> 8) & 0xFF) - (charset)->code_space[4]) \
149 * (charset)->code_space[3]) \
150 + (((code) & 0xFF) - (charset)->code_space[0]) \
151 - ((charset)->char_index_offset)) \
155 /* Convert the character index IDX to code-point CODE for CHARSET.
156 It is assumed that IDX is in a valid range. */
158 #define INDEX_TO_CODE_POINT(charset, idx) \
159 ((charset)->code_linear_p \
160 ? (idx) + (charset)->min_code \
161 : (idx += (charset)->char_index_offset, \
162 (((charset)->code_space[0] + (idx) % (charset)->code_space[2]) \
163 | (((charset)->code_space[4] \
164 + ((idx) / (charset)->code_space[3] % (charset)->code_space[6])) \
166 | (((charset)->code_space[8] \
167 + ((idx) / (charset)->code_space[7] % (charset)->code_space[10])) \
169 | (((charset)->code_space[12] + ((idx) / (charset)->code_space[11])) \
172 /* Structure to hold mapping tables for a charset. Used by temacs
173 invoked for dumping. */
177 /* The current charset for which the following tables are setup. */
178 struct charset
*current
;
180 /* 1 iff the following table is used for encoder. */
183 /* When the following table is used for encoding, mininum and
184 maxinum character of the current charset. */
185 int min_char
, max_char
;
187 /* A Unicode character correspoinding to the code indice 0 (i.e. the
188 minimum code-point) of the current charset, or -1 if the code
189 indice 0 is not a Unicode character. This is checked when
190 table.encoder[CHAR] is zero. */
194 /* Table mapping code-indices (not code-points) of the current
195 charset to Unicode characters. If decoder[CHAR] is -1, CHAR
196 doesn't belong to the current charset. */
197 int decoder
[0x10000];
198 /* Table mapping Unicode characters to code-indices of the current
199 charset. The first 0x10000 elements are for BMP (0..0xFFFF),
200 and the last 0x10000 are for SMP (0x10000..0x1FFFF) or SIP
201 (0x20000..0x2FFFF). Note that there is no charset map that
202 uses both SMP and SIP. */
203 unsigned short encoder
[0x20000];
205 } *temp_charset_work
;
207 #define SET_TEMP_CHARSET_WORK_ENCODER(C, CODE) \
210 temp_charset_work->zero_index_char = (C); \
211 else if ((C) < 0x20000) \
212 temp_charset_work->table.encoder[(C)] = (CODE); \
214 temp_charset_work->table.encoder[(C) - 0x10000] = (CODE); \
217 #define GET_TEMP_CHARSET_WORK_ENCODER(C) \
218 ((C) == temp_charset_work->zero_index_char ? 0 \
219 : (C) < 0x20000 ? (temp_charset_work->table.encoder[(C)] \
220 ? (int) temp_charset_work->table.encoder[(C)] : -1) \
221 : temp_charset_work->table.encoder[(C) - 0x10000] \
222 ? temp_charset_work->table.encoder[(C) - 0x10000] : -1)
224 #define SET_TEMP_CHARSET_WORK_DECODER(C, CODE) \
225 (temp_charset_work->table.decoder[(CODE)] = (C))
227 #define GET_TEMP_CHARSET_WORK_DECODER(CODE) \
228 (temp_charset_work->table.decoder[(CODE)])
231 /* Set to 1 to warn that a charset map is loaded and thus a buffer
232 text and a string data may be relocated. */
233 int charset_map_loaded
;
235 struct charset_map_entries
241 struct charset_map_entries
*next
;
244 /* Load the mapping information of CHARSET from ENTRIES for
245 initializing (CONTROL_FLAG == 0), decoding (CONTROL_FLAG == 1), and
246 encoding (CONTROL_FLAG == 2).
248 If CONTROL_FLAG is 0, setup CHARSET->min_char, CHARSET->max_char,
249 and CHARSET->fast_map.
251 If CONTROL_FLAG is 1, setup the following tables according to
252 CHARSET->method and inhibit_load_charset_map.
254 CHARSET->method | inhibit_lcm == 0 | inhibit_lcm == 1
255 ----------------------+--------------------+---------------------------
256 CHARSET_METHOD_MAP | CHARSET->decoder | temp_charset_work->decoder
257 ----------------------+--------------------+---------------------------
258 CHARSET_METHOD_OFFSET | Vchar_unify_table | temp_charset_work->decoder
260 If CONTROL_FLAG is 2, setup the following tables.
262 CHARSET->method | inhibit_lcm == 0 | inhibit_lcm == 1
263 ----------------------+--------------------+---------------------------
264 CHARSET_METHOD_MAP | CHARSET->encoder | temp_charset_work->encoder
265 ----------------------+--------------------+--------------------------
266 CHARSET_METHOD_OFFSET | CHARSET->deunifier | temp_charset_work->encoder
270 load_charset_map (struct charset
*charset
, struct charset_map_entries
*entries
, int n_entries
, int control_flag
)
272 Lisp_Object vec
, table
;
273 unsigned max_code
= CHARSET_MAX_CODE (charset
);
274 int ascii_compatible_p
= charset
->ascii_compatible_p
;
275 int min_char
, max_char
, nonascii_min_char
;
277 unsigned char *fast_map
= charset
->fast_map
;
284 if (! inhibit_load_charset_map
)
286 if (control_flag
== 1)
288 if (charset
->method
== CHARSET_METHOD_MAP
)
290 int n
= CODE_POINT_TO_INDEX (charset
, max_code
) + 1;
292 vec
= CHARSET_DECODER (charset
)
293 = Fmake_vector (make_number (n
), make_number (-1));
297 char_table_set_range (Vchar_unify_table
,
298 charset
->min_char
, charset
->max_char
,
304 table
= Fmake_char_table (Qnil
, Qnil
);
305 if (charset
->method
== CHARSET_METHOD_MAP
)
306 CHARSET_ENCODER (charset
) = table
;
308 CHARSET_DEUNIFIER (charset
) = table
;
313 if (! temp_charset_work
)
314 temp_charset_work
= malloc (sizeof (*temp_charset_work
));
315 if (control_flag
== 1)
317 memset (temp_charset_work
->table
.decoder
, -1,
318 sizeof (int) * 0x10000);
322 memset (temp_charset_work
->table
.encoder
, 0,
323 sizeof (unsigned short) * 0x20000);
324 temp_charset_work
->zero_index_char
= -1;
326 temp_charset_work
->current
= charset
;
327 temp_charset_work
->for_encoder
= (control_flag
== 2);
330 charset_map_loaded
= 1;
333 min_char
= max_char
= entries
->entry
[0].c
;
334 nonascii_min_char
= MAX_CHAR
;
335 for (i
= 0; i
< n_entries
; i
++)
338 int from_index
, to_index
;
340 int idx
= i
% 0x10000;
342 if (i
> 0 && idx
== 0)
343 entries
= entries
->next
;
344 from
= entries
->entry
[idx
].from
;
345 to
= entries
->entry
[idx
].to
;
346 from_c
= entries
->entry
[idx
].c
;
347 from_index
= CODE_POINT_TO_INDEX (charset
, from
);
350 to_index
= from_index
;
355 to_index
= CODE_POINT_TO_INDEX (charset
, to
);
356 to_c
= from_c
+ (to_index
- from_index
);
358 if (from_index
< 0 || to_index
< 0)
363 else if (from_c
< min_char
)
366 if (control_flag
== 1)
368 if (charset
->method
== CHARSET_METHOD_MAP
)
369 for (; from_index
<= to_index
; from_index
++, from_c
++)
370 ASET (vec
, from_index
, make_number (from_c
));
372 for (; from_index
<= to_index
; from_index
++, from_c
++)
373 CHAR_TABLE_SET (Vchar_unify_table
,
374 CHARSET_CODE_OFFSET (charset
) + from_index
,
375 make_number (from_c
));
377 else if (control_flag
== 2)
379 if (charset
->method
== CHARSET_METHOD_MAP
380 && CHARSET_COMPACT_CODES_P (charset
))
381 for (; from_index
<= to_index
; from_index
++, from_c
++)
383 unsigned code
= INDEX_TO_CODE_POINT (charset
, from_index
);
385 if (NILP (CHAR_TABLE_REF (table
, from_c
)))
386 CHAR_TABLE_SET (table
, from_c
, make_number (code
));
389 for (; from_index
<= to_index
; from_index
++, from_c
++)
391 if (NILP (CHAR_TABLE_REF (table
, from_c
)))
392 CHAR_TABLE_SET (table
, from_c
, make_number (from_index
));
395 else if (control_flag
== 3)
396 for (; from_index
<= to_index
; from_index
++, from_c
++)
397 SET_TEMP_CHARSET_WORK_DECODER (from_c
, from_index
);
398 else if (control_flag
== 4)
399 for (; from_index
<= to_index
; from_index
++, from_c
++)
400 SET_TEMP_CHARSET_WORK_ENCODER (from_c
, from_index
);
401 else /* control_flag == 0 */
403 if (ascii_compatible_p
)
405 if (! ASCII_BYTE_P (from_c
))
407 if (from_c
< nonascii_min_char
)
408 nonascii_min_char
= from_c
;
410 else if (! ASCII_BYTE_P (to_c
))
412 nonascii_min_char
= 0x80;
416 for (; from_c
<= to_c
; from_c
++)
417 CHARSET_FAST_MAP_SET (from_c
, fast_map
);
421 if (control_flag
== 0)
423 CHARSET_MIN_CHAR (charset
) = (ascii_compatible_p
424 ? nonascii_min_char
: min_char
);
425 CHARSET_MAX_CHAR (charset
) = max_char
;
427 else if (control_flag
== 4)
429 temp_charset_work
->min_char
= min_char
;
430 temp_charset_work
->max_char
= max_char
;
435 /* Read a hexadecimal number (preceded by "0x") from the file FP while
436 paying attention to comment charcter '#'. */
438 static INLINE
unsigned
439 read_hex (FILE *fp
, int *eof
)
444 while ((c
= getc (fp
)) != EOF
)
448 while ((c
= getc (fp
)) != EOF
&& c
!= '\n');
452 if ((c
= getc (fp
)) == EOF
|| c
== 'x')
464 while ((c
= getc (fp
)) != EOF
&& isxdigit (c
))
466 | (c
<= '9' ? c
- '0' : c
<= 'F' ? c
- 'A' + 10 : c
- 'a' + 10));
468 while ((c
= getc (fp
)) != EOF
&& isdigit (c
))
469 n
= (n
* 10) + c
- '0';
475 extern Lisp_Object Qfile_name_handler_alist
;
477 /* Return a mapping vector for CHARSET loaded from MAPFILE.
478 Each line of MAPFILE has this form
480 where 0xAAAA is a code-point and 0xCCCC is the corresponding
481 character code, or this form
483 where 0xAAAA and 0xBBBB are code-points specifying a range, and
484 0xCCCC is the first character code of the range.
486 The returned vector has this form:
487 [ CODE1 CHAR1 CODE2 CHAR2 .... ]
488 where CODE1 is a code-point or a cons of code-points specifying a
491 Note that this function uses `openp' to open MAPFILE but ignores
492 `file-name-handler-alist' to avoid running any Lisp code. */
494 extern void add_to_log (char *, Lisp_Object
, Lisp_Object
);
497 load_charset_map_from_file (struct charset
*charset
, Lisp_Object mapfile
, int control_flag
)
499 unsigned min_code
= CHARSET_MIN_CODE (charset
);
500 unsigned max_code
= CHARSET_MAX_CODE (charset
);
504 Lisp_Object suffixes
;
505 struct charset_map_entries
*head
, *entries
;
506 int n_entries
, count
;
509 suffixes
= Fcons (build_string (".map"),
510 Fcons (build_string (".TXT"), Qnil
));
512 count
= SPECPDL_INDEX ();
513 specbind (Qfile_name_handler_alist
, Qnil
);
514 fd
= openp (Vcharset_map_path
, mapfile
, suffixes
, NULL
, Qnil
);
515 unbind_to (count
, Qnil
);
517 || ! (fp
= fdopen (fd
, "r")))
518 error ("Failure in loading charset map: %S", SDATA (mapfile
));
520 /* Use SAFE_ALLOCA instead of alloca, as `charset_map_entries' is
521 large (larger than MAX_ALLOCA). */
522 SAFE_ALLOCA (head
, struct charset_map_entries
*,
523 sizeof (struct charset_map_entries
));
525 memset (entries
, 0, sizeof (struct charset_map_entries
));
535 from
= read_hex (fp
, &eof
);
538 if (getc (fp
) == '-')
539 to
= read_hex (fp
, &eof
);
542 c
= (int) read_hex (fp
, &eof
);
544 if (from
< min_code
|| to
> max_code
|| from
> to
|| c
> MAX_CHAR
)
547 if (n_entries
> 0 && (n_entries
% 0x10000) == 0)
549 SAFE_ALLOCA (entries
->next
, struct charset_map_entries
*,
550 sizeof (struct charset_map_entries
));
551 entries
= entries
->next
;
552 memset (entries
, 0, sizeof (struct charset_map_entries
));
554 idx
= n_entries
% 0x10000;
555 entries
->entry
[idx
].from
= from
;
556 entries
->entry
[idx
].to
= to
;
557 entries
->entry
[idx
].c
= c
;
562 load_charset_map (charset
, head
, n_entries
, control_flag
);
567 load_charset_map_from_vector (struct charset
*charset
, Lisp_Object vec
, int control_flag
)
569 unsigned min_code
= CHARSET_MIN_CODE (charset
);
570 unsigned max_code
= CHARSET_MAX_CODE (charset
);
571 struct charset_map_entries
*head
, *entries
;
573 int len
= ASIZE (vec
);
579 add_to_log ("Failure in loading charset map: %V", vec
, Qnil
);
583 /* Use SAFE_ALLOCA instead of alloca, as `charset_map_entries' is
584 large (larger than MAX_ALLOCA). */
585 SAFE_ALLOCA (head
, struct charset_map_entries
*,
586 sizeof (struct charset_map_entries
));
588 memset (entries
, 0, sizeof (struct charset_map_entries
));
591 for (i
= 0; i
< len
; i
+= 2)
593 Lisp_Object val
, val2
;
605 from
= XFASTINT (val
);
606 to
= XFASTINT (val2
);
611 from
= to
= XFASTINT (val
);
613 val
= AREF (vec
, i
+ 1);
617 if (from
< min_code
|| to
> max_code
|| from
> to
|| c
> MAX_CHAR
)
620 if (n_entries
> 0 && (n_entries
% 0x10000) == 0)
622 SAFE_ALLOCA (entries
->next
, struct charset_map_entries
*,
623 sizeof (struct charset_map_entries
));
624 entries
= entries
->next
;
625 memset (entries
, 0, sizeof (struct charset_map_entries
));
627 idx
= n_entries
% 0x10000;
628 entries
->entry
[idx
].from
= from
;
629 entries
->entry
[idx
].to
= to
;
630 entries
->entry
[idx
].c
= c
;
634 load_charset_map (charset
, head
, n_entries
, control_flag
);
639 /* Load a mapping table for CHARSET. CONTROL-FLAG tells what kind of
640 map it is (see the comment of load_charset_map for the detail). */
643 load_charset (struct charset
*charset
, int control_flag
)
647 if (inhibit_load_charset_map
649 && charset
== temp_charset_work
->current
650 && ((control_flag
== 2) == temp_charset_work
->for_encoder
))
653 if (CHARSET_METHOD (charset
) == CHARSET_METHOD_MAP
)
654 map
= CHARSET_MAP (charset
);
655 else if (CHARSET_UNIFIED_P (charset
))
656 map
= CHARSET_UNIFY_MAP (charset
);
658 load_charset_map_from_file (charset
, map
, control_flag
);
660 load_charset_map_from_vector (charset
, map
, control_flag
);
664 DEFUN ("charsetp", Fcharsetp
, Scharsetp
, 1, 1, 0,
665 doc
: /* Return non-nil if and only if OBJECT is a charset.*/)
669 return (CHARSETP (object
) ? Qt
: Qnil
);
673 void map_charset_for_dump (void (*c_function
) (Lisp_Object
, Lisp_Object
),
674 Lisp_Object function
, Lisp_Object arg
,
675 unsigned from
, unsigned to
);
678 map_charset_for_dump (void (*c_function
) (Lisp_Object
, Lisp_Object
), Lisp_Object function
, Lisp_Object arg
, unsigned int from
, unsigned int to
)
680 int from_idx
= CODE_POINT_TO_INDEX (temp_charset_work
->current
, from
);
681 int to_idx
= CODE_POINT_TO_INDEX (temp_charset_work
->current
, to
);
686 range
= Fcons (Qnil
, Qnil
);
689 c
= temp_charset_work
->min_char
;
690 stop
= (temp_charset_work
->max_char
< 0x20000
691 ? temp_charset_work
->max_char
: 0xFFFF);
695 int index
= GET_TEMP_CHARSET_WORK_ENCODER (c
);
697 if (index
>= from_idx
&& index
<= to_idx
)
699 if (NILP (XCAR (range
)))
700 XSETCAR (range
, make_number (c
));
702 else if (! NILP (XCAR (range
)))
704 XSETCDR (range
, make_number (c
- 1));
706 (*c_function
) (arg
, range
);
708 call2 (function
, range
, arg
);
709 XSETCAR (range
, Qnil
);
713 if (c
== temp_charset_work
->max_char
)
715 if (! NILP (XCAR (range
)))
717 XSETCDR (range
, make_number (c
));
719 (*c_function
) (arg
, range
);
721 call2 (function
, range
, arg
);
726 stop
= temp_charset_work
->max_char
;
734 map_charset_chars (c_function
, function
, arg
,
736 void (*c_function
) (Lisp_Object
, Lisp_Object
);
737 Lisp_Object function
, arg
;
738 struct charset
*charset
;
744 partial
= (from
> CHARSET_MIN_CODE (charset
)
745 || to
< CHARSET_MAX_CODE (charset
));
747 if (CHARSET_METHOD (charset
) == CHARSET_METHOD_OFFSET
)
749 int from_idx
= CODE_POINT_TO_INDEX (charset
, from
);
750 int to_idx
= CODE_POINT_TO_INDEX (charset
, to
);
751 int from_c
= from_idx
+ CHARSET_CODE_OFFSET (charset
);
752 int to_c
= to_idx
+ CHARSET_CODE_OFFSET (charset
);
754 if (CHARSET_UNIFIED_P (charset
))
756 if (! CHAR_TABLE_P (CHARSET_DEUNIFIER (charset
)))
757 load_charset (charset
, 2);
758 if (CHAR_TABLE_P (CHARSET_DEUNIFIER (charset
)))
759 map_char_table_for_charset (c_function
, function
,
760 CHARSET_DEUNIFIER (charset
), arg
,
761 partial
? charset
: NULL
, from
, to
);
763 map_charset_for_dump (c_function
, function
, arg
, from
, to
);
766 range
= Fcons (make_number (from_c
), make_number (to_c
));
768 (*c_function
) (arg
, range
);
770 call2 (function
, range
, arg
);
772 else if (CHARSET_METHOD (charset
) == CHARSET_METHOD_MAP
)
774 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset
)))
775 load_charset (charset
, 2);
776 if (CHAR_TABLE_P (CHARSET_ENCODER (charset
)))
777 map_char_table_for_charset (c_function
, function
,
778 CHARSET_ENCODER (charset
), arg
,
779 partial
? charset
: NULL
, from
, to
);
781 map_charset_for_dump (c_function
, function
, arg
, from
, to
);
783 else if (CHARSET_METHOD (charset
) == CHARSET_METHOD_SUBSET
)
785 Lisp_Object subset_info
;
788 subset_info
= CHARSET_SUBSET (charset
);
789 charset
= CHARSET_FROM_ID (XFASTINT (AREF (subset_info
, 0)));
790 offset
= XINT (AREF (subset_info
, 3));
792 if (from
< XFASTINT (AREF (subset_info
, 1)))
793 from
= XFASTINT (AREF (subset_info
, 1));
795 if (to
> XFASTINT (AREF (subset_info
, 2)))
796 to
= XFASTINT (AREF (subset_info
, 2));
797 map_charset_chars (c_function
, function
, arg
, charset
, from
, to
);
799 else /* i.e. CHARSET_METHOD_SUPERSET */
803 for (parents
= CHARSET_SUPERSET (charset
); CONSP (parents
);
804 parents
= XCDR (parents
))
807 unsigned this_from
, this_to
;
809 charset
= CHARSET_FROM_ID (XFASTINT (XCAR (XCAR (parents
))));
810 offset
= XINT (XCDR (XCAR (parents
)));
811 this_from
= from
> offset
? from
- offset
: 0;
812 this_to
= to
> offset
? to
- offset
: 0;
813 if (this_from
< CHARSET_MIN_CODE (charset
))
814 this_from
= CHARSET_MIN_CODE (charset
);
815 if (this_to
> CHARSET_MAX_CODE (charset
))
816 this_to
= CHARSET_MAX_CODE (charset
);
817 map_charset_chars (c_function
, function
, arg
, charset
,
823 DEFUN ("map-charset-chars", Fmap_charset_chars
, Smap_charset_chars
, 2, 5, 0,
824 doc
: /* Call FUNCTION for all characters in CHARSET.
825 FUNCTION is called with an argument RANGE and the optional 3rd
828 RANGE is a cons (FROM . TO), where FROM and TO indicate a range of
829 characters contained in CHARSET.
831 The optional 4th and 5th arguments FROM-CODE and TO-CODE specify the
832 range of code points (in CHARSET) of target characters. */)
833 (function
, charset
, arg
, from_code
, to_code
)
834 Lisp_Object function
, charset
, arg
, from_code
, to_code
;
839 CHECK_CHARSET_GET_CHARSET (charset
, cs
);
840 if (NILP (from_code
))
841 from
= CHARSET_MIN_CODE (cs
);
844 CHECK_NATNUM (from_code
);
845 from
= XINT (from_code
);
846 if (from
< CHARSET_MIN_CODE (cs
))
847 from
= CHARSET_MIN_CODE (cs
);
850 to
= CHARSET_MAX_CODE (cs
);
853 CHECK_NATNUM (to_code
);
855 if (to
> CHARSET_MAX_CODE (cs
))
856 to
= CHARSET_MAX_CODE (cs
);
858 map_charset_chars (NULL
, function
, arg
, cs
, from
, to
);
863 /* Define a charset according to the arguments. The Nth argument is
864 the Nth attribute of the charset (the last attribute `charset-id'
865 is not included). See the docstring of `define-charset' for the
868 DEFUN ("define-charset-internal", Fdefine_charset_internal
,
869 Sdefine_charset_internal
, charset_arg_max
, MANY
, 0,
870 doc
: /* For internal use only.
871 usage: (define-charset-internal ...) */)
876 /* Charset attr vector. */
880 struct Lisp_Hash_Table
*hash_table
= XHASH_TABLE (Vcharset_hash_table
);
882 struct charset charset
;
885 int new_definition_p
;
888 if (nargs
!= charset_arg_max
)
889 return Fsignal (Qwrong_number_of_arguments
,
890 Fcons (intern ("define-charset-internal"),
891 make_number (nargs
)));
893 attrs
= Fmake_vector (make_number (charset_attr_max
), Qnil
);
895 CHECK_SYMBOL (args
[charset_arg_name
]);
896 ASET (attrs
, charset_name
, args
[charset_arg_name
]);
898 val
= args
[charset_arg_code_space
];
899 for (i
= 0, dimension
= 0, nchars
= 1; i
< 4; i
++)
901 int min_byte
, max_byte
;
903 min_byte
= XINT (Faref (val
, make_number (i
* 2)));
904 max_byte
= XINT (Faref (val
, make_number (i
* 2 + 1)));
905 if (min_byte
< 0 || min_byte
> max_byte
|| max_byte
>= 256)
906 error ("Invalid :code-space value");
907 charset
.code_space
[i
* 4] = min_byte
;
908 charset
.code_space
[i
* 4 + 1] = max_byte
;
909 charset
.code_space
[i
* 4 + 2] = max_byte
- min_byte
+ 1;
910 nchars
*= charset
.code_space
[i
* 4 + 2];
911 charset
.code_space
[i
* 4 + 3] = nchars
;
916 val
= args
[charset_arg_dimension
];
918 charset
.dimension
= dimension
;
922 charset
.dimension
= XINT (val
);
923 if (charset
.dimension
< 1 || charset
.dimension
> 4)
924 args_out_of_range_3 (val
, make_number (1), make_number (4));
927 charset
.code_linear_p
928 = (charset
.dimension
== 1
929 || (charset
.code_space
[2] == 256
930 && (charset
.dimension
== 2
931 || (charset
.code_space
[6] == 256
932 && (charset
.dimension
== 3
933 || charset
.code_space
[10] == 256)))));
935 if (! charset
.code_linear_p
)
937 charset
.code_space_mask
= (unsigned char *) xmalloc (256);
938 memset (charset
.code_space_mask
, 0, 256);
939 for (i
= 0; i
< 4; i
++)
940 for (j
= charset
.code_space
[i
* 4]; j
<= charset
.code_space
[i
* 4 + 1];
942 charset
.code_space_mask
[j
] |= (1 << i
);
945 charset
.iso_chars_96
= charset
.code_space
[2] == 96;
947 charset
.min_code
= (charset
.code_space
[0]
948 | (charset
.code_space
[4] << 8)
949 | (charset
.code_space
[8] << 16)
950 | (charset
.code_space
[12] << 24));
951 charset
.max_code
= (charset
.code_space
[1]
952 | (charset
.code_space
[5] << 8)
953 | (charset
.code_space
[9] << 16)
954 | (charset
.code_space
[13] << 24));
955 charset
.char_index_offset
= 0;
957 val
= args
[charset_arg_min_code
];
967 CHECK_NUMBER_CAR (val
);
968 CHECK_NUMBER_CDR (val
);
969 code
= (XINT (XCAR (val
)) << 16) | (XINT (XCDR (val
)));
971 if (code
< charset
.min_code
972 || code
> charset
.max_code
)
973 args_out_of_range_3 (make_number (charset
.min_code
),
974 make_number (charset
.max_code
), val
);
975 charset
.char_index_offset
= CODE_POINT_TO_INDEX (&charset
, code
);
976 charset
.min_code
= code
;
979 val
= args
[charset_arg_max_code
];
989 CHECK_NUMBER_CAR (val
);
990 CHECK_NUMBER_CDR (val
);
991 code
= (XINT (XCAR (val
)) << 16) | (XINT (XCDR (val
)));
993 if (code
< charset
.min_code
994 || code
> charset
.max_code
)
995 args_out_of_range_3 (make_number (charset
.min_code
),
996 make_number (charset
.max_code
), val
);
997 charset
.max_code
= code
;
1000 charset
.compact_codes_p
= charset
.max_code
< 0x10000;
1002 val
= args
[charset_arg_invalid_code
];
1005 if (charset
.min_code
> 0)
1006 charset
.invalid_code
= 0;
1009 XSETINT (val
, charset
.max_code
+ 1);
1010 if (XINT (val
) == charset
.max_code
+ 1)
1011 charset
.invalid_code
= charset
.max_code
+ 1;
1013 error ("Attribute :invalid-code must be specified");
1019 charset
.invalid_code
= XFASTINT (val
);
1022 val
= args
[charset_arg_iso_final
];
1024 charset
.iso_final
= -1;
1028 if (XINT (val
) < '0' || XINT (val
) > 127)
1029 error ("Invalid iso-final-char: %d", XINT (val
));
1030 charset
.iso_final
= XINT (val
);
1033 val
= args
[charset_arg_iso_revision
];
1035 charset
.iso_revision
= -1;
1039 if (XINT (val
) > 63)
1040 args_out_of_range (make_number (63), val
);
1041 charset
.iso_revision
= XINT (val
);
1044 val
= args
[charset_arg_emacs_mule_id
];
1046 charset
.emacs_mule_id
= -1;
1050 if ((XINT (val
) > 0 && XINT (val
) <= 128) || XINT (val
) >= 256)
1051 error ("Invalid emacs-mule-id: %d", XINT (val
));
1052 charset
.emacs_mule_id
= XINT (val
);
1055 charset
.ascii_compatible_p
= ! NILP (args
[charset_arg_ascii_compatible_p
]);
1057 charset
.supplementary_p
= ! NILP (args
[charset_arg_supplementary_p
]);
1059 charset
.unified_p
= 0;
1061 memset (charset
.fast_map
, 0, sizeof (charset
.fast_map
));
1063 if (! NILP (args
[charset_arg_code_offset
]))
1065 val
= args
[charset_arg_code_offset
];
1068 charset
.method
= CHARSET_METHOD_OFFSET
;
1069 charset
.code_offset
= XINT (val
);
1071 i
= CODE_POINT_TO_INDEX (&charset
, charset
.min_code
);
1072 charset
.min_char
= i
+ charset
.code_offset
;
1073 i
= CODE_POINT_TO_INDEX (&charset
, charset
.max_code
);
1074 charset
.max_char
= i
+ charset
.code_offset
;
1075 if (charset
.max_char
> MAX_CHAR
)
1076 error ("Unsupported max char: %d", charset
.max_char
);
1078 i
= (charset
.min_char
>> 7) << 7;
1079 for (; i
< 0x10000 && i
<= charset
.max_char
; i
+= 128)
1080 CHARSET_FAST_MAP_SET (i
, charset
.fast_map
);
1081 i
= (i
>> 12) << 12;
1082 for (; i
<= charset
.max_char
; i
+= 0x1000)
1083 CHARSET_FAST_MAP_SET (i
, charset
.fast_map
);
1084 if (charset
.code_offset
== 0 && charset
.max_char
>= 0x80)
1085 charset
.ascii_compatible_p
= 1;
1087 else if (! NILP (args
[charset_arg_map
]))
1089 val
= args
[charset_arg_map
];
1090 ASET (attrs
, charset_map
, val
);
1091 charset
.method
= CHARSET_METHOD_MAP
;
1093 else if (! NILP (args
[charset_arg_subset
]))
1096 Lisp_Object parent_min_code
, parent_max_code
, parent_code_offset
;
1097 struct charset
*parent_charset
;
1099 val
= args
[charset_arg_subset
];
1100 parent
= Fcar (val
);
1101 CHECK_CHARSET_GET_CHARSET (parent
, parent_charset
);
1102 parent_min_code
= Fnth (make_number (1), val
);
1103 CHECK_NATNUM (parent_min_code
);
1104 parent_max_code
= Fnth (make_number (2), val
);
1105 CHECK_NATNUM (parent_max_code
);
1106 parent_code_offset
= Fnth (make_number (3), val
);
1107 CHECK_NUMBER (parent_code_offset
);
1108 val
= Fmake_vector (make_number (4), Qnil
);
1109 ASET (val
, 0, make_number (parent_charset
->id
));
1110 ASET (val
, 1, parent_min_code
);
1111 ASET (val
, 2, parent_max_code
);
1112 ASET (val
, 3, parent_code_offset
);
1113 ASET (attrs
, charset_subset
, val
);
1115 charset
.method
= CHARSET_METHOD_SUBSET
;
1116 /* Here, we just copy the parent's fast_map. It's not accurate,
1117 but at least it works for quickly detecting which character
1118 DOESN'T belong to this charset. */
1119 for (i
= 0; i
< 190; i
++)
1120 charset
.fast_map
[i
] = parent_charset
->fast_map
[i
];
1122 /* We also copy these for parents. */
1123 charset
.min_char
= parent_charset
->min_char
;
1124 charset
.max_char
= parent_charset
->max_char
;
1126 else if (! NILP (args
[charset_arg_superset
]))
1128 val
= args
[charset_arg_superset
];
1129 charset
.method
= CHARSET_METHOD_SUPERSET
;
1130 val
= Fcopy_sequence (val
);
1131 ASET (attrs
, charset_superset
, val
);
1133 charset
.min_char
= MAX_CHAR
;
1134 charset
.max_char
= 0;
1135 for (; ! NILP (val
); val
= Fcdr (val
))
1137 Lisp_Object elt
, car_part
, cdr_part
;
1138 int this_id
, offset
;
1139 struct charset
*this_charset
;
1144 car_part
= XCAR (elt
);
1145 cdr_part
= XCDR (elt
);
1146 CHECK_CHARSET_GET_ID (car_part
, this_id
);
1147 CHECK_NUMBER (cdr_part
);
1148 offset
= XINT (cdr_part
);
1152 CHECK_CHARSET_GET_ID (elt
, this_id
);
1155 XSETCAR (val
, Fcons (make_number (this_id
), make_number (offset
)));
1157 this_charset
= CHARSET_FROM_ID (this_id
);
1158 if (charset
.min_char
> this_charset
->min_char
)
1159 charset
.min_char
= this_charset
->min_char
;
1160 if (charset
.max_char
< this_charset
->max_char
)
1161 charset
.max_char
= this_charset
->max_char
;
1162 for (i
= 0; i
< 190; i
++)
1163 charset
.fast_map
[i
] |= this_charset
->fast_map
[i
];
1167 error ("None of :code-offset, :map, :parents are specified");
1169 val
= args
[charset_arg_unify_map
];
1170 if (! NILP (val
) && !STRINGP (val
))
1172 ASET (attrs
, charset_unify_map
, val
);
1174 CHECK_LIST (args
[charset_arg_plist
]);
1175 ASET (attrs
, charset_plist
, args
[charset_arg_plist
]);
1177 charset
.hash_index
= hash_lookup (hash_table
, args
[charset_arg_name
],
1179 if (charset
.hash_index
>= 0)
1181 new_definition_p
= 0;
1182 id
= XFASTINT (CHARSET_SYMBOL_ID (args
[charset_arg_name
]));
1183 HASH_VALUE (hash_table
, charset
.hash_index
) = attrs
;
1187 charset
.hash_index
= hash_put (hash_table
, args
[charset_arg_name
], attrs
,
1189 if (charset_table_used
== charset_table_size
)
1191 struct charset
*new_table
1192 = (struct charset
*) xmalloc (sizeof (struct charset
)
1193 * (charset_table_size
+ 16));
1194 memcpy (new_table
, charset_table
,
1195 sizeof (struct charset
) * charset_table_size
);
1196 charset_table_size
+= 16;
1197 charset_table
= new_table
;
1199 id
= charset_table_used
++;
1200 new_definition_p
= 1;
1203 ASET (attrs
, charset_id
, make_number (id
));
1205 charset_table
[id
] = charset
;
1207 if (charset
.method
== CHARSET_METHOD_MAP
)
1209 load_charset (&charset
, 0);
1210 charset_table
[id
] = charset
;
1213 if (charset
.iso_final
>= 0)
1215 ISO_CHARSET_TABLE (charset
.dimension
, charset
.iso_chars_96
,
1216 charset
.iso_final
) = id
;
1217 if (new_definition_p
)
1218 Viso_2022_charset_list
= nconc2 (Viso_2022_charset_list
,
1219 Fcons (make_number (id
), Qnil
));
1220 if (ISO_CHARSET_TABLE (1, 0, 'J') == id
)
1221 charset_jisx0201_roman
= id
;
1222 else if (ISO_CHARSET_TABLE (2, 0, '@') == id
)
1223 charset_jisx0208_1978
= id
;
1224 else if (ISO_CHARSET_TABLE (2, 0, 'B') == id
)
1225 charset_jisx0208
= id
;
1226 else if (ISO_CHARSET_TABLE (2, 0, 'C') == id
)
1227 charset_ksc5601
= id
;
1230 if (charset
.emacs_mule_id
>= 0)
1232 emacs_mule_charset
[charset
.emacs_mule_id
] = CHARSET_FROM_ID (id
);
1233 if (charset
.emacs_mule_id
< 0xA0)
1234 emacs_mule_bytes
[charset
.emacs_mule_id
] = charset
.dimension
+ 1;
1236 emacs_mule_bytes
[charset
.emacs_mule_id
] = charset
.dimension
+ 2;
1237 if (new_definition_p
)
1238 Vemacs_mule_charset_list
= nconc2 (Vemacs_mule_charset_list
,
1239 Fcons (make_number (id
), Qnil
));
1242 if (new_definition_p
)
1244 Vcharset_list
= Fcons (args
[charset_arg_name
], Vcharset_list
);
1245 if (charset
.supplementary_p
)
1246 Vcharset_ordered_list
= nconc2 (Vcharset_ordered_list
,
1247 Fcons (make_number (id
), Qnil
));
1252 for (tail
= Vcharset_ordered_list
; CONSP (tail
); tail
= XCDR (tail
))
1254 struct charset
*cs
= CHARSET_FROM_ID (XINT (XCAR (tail
)));
1256 if (cs
->supplementary_p
)
1259 if (EQ (tail
, Vcharset_ordered_list
))
1260 Vcharset_ordered_list
= Fcons (make_number (id
),
1261 Vcharset_ordered_list
);
1262 else if (NILP (tail
))
1263 Vcharset_ordered_list
= nconc2 (Vcharset_ordered_list
,
1264 Fcons (make_number (id
), Qnil
));
1267 val
= Fcons (XCAR (tail
), XCDR (tail
));
1268 XSETCDR (tail
, val
);
1269 XSETCAR (tail
, make_number (id
));
1272 charset_ordered_list_tick
++;
1279 /* Same as Fdefine_charset_internal but arguments are more convenient
1280 to call from C (typically in syms_of_charset). This can define a
1281 charset of `offset' method only. Return the ID of the new
1285 define_charset_internal (Lisp_Object name
,
1287 unsigned char *code_space
,
1288 unsigned min_code
, unsigned max_code
,
1289 int iso_final
, int iso_revision
, int emacs_mule_id
,
1290 int ascii_compatible
, int supplementary
,
1293 Lisp_Object args
[charset_arg_max
];
1294 Lisp_Object plist
[14];
1298 args
[charset_arg_name
] = name
;
1299 args
[charset_arg_dimension
] = make_number (dimension
);
1300 val
= Fmake_vector (make_number (8), make_number (0));
1301 for (i
= 0; i
< 8; i
++)
1302 ASET (val
, i
, make_number (code_space
[i
]));
1303 args
[charset_arg_code_space
] = val
;
1304 args
[charset_arg_min_code
] = make_number (min_code
);
1305 args
[charset_arg_max_code
] = make_number (max_code
);
1306 args
[charset_arg_iso_final
]
1307 = (iso_final
< 0 ? Qnil
: make_number (iso_final
));
1308 args
[charset_arg_iso_revision
] = make_number (iso_revision
);
1309 args
[charset_arg_emacs_mule_id
]
1310 = (emacs_mule_id
< 0 ? Qnil
: make_number (emacs_mule_id
));
1311 args
[charset_arg_ascii_compatible_p
] = ascii_compatible
? Qt
: Qnil
;
1312 args
[charset_arg_supplementary_p
] = supplementary
? Qt
: Qnil
;
1313 args
[charset_arg_invalid_code
] = Qnil
;
1314 args
[charset_arg_code_offset
] = make_number (code_offset
);
1315 args
[charset_arg_map
] = Qnil
;
1316 args
[charset_arg_subset
] = Qnil
;
1317 args
[charset_arg_superset
] = Qnil
;
1318 args
[charset_arg_unify_map
] = Qnil
;
1320 plist
[0] = intern_c_string (":name");
1321 plist
[1] = args
[charset_arg_name
];
1322 plist
[2] = intern_c_string (":dimension");
1323 plist
[3] = args
[charset_arg_dimension
];
1324 plist
[4] = intern_c_string (":code-space");
1325 plist
[5] = args
[charset_arg_code_space
];
1326 plist
[6] = intern_c_string (":iso-final-char");
1327 plist
[7] = args
[charset_arg_iso_final
];
1328 plist
[8] = intern_c_string (":emacs-mule-id");
1329 plist
[9] = args
[charset_arg_emacs_mule_id
];
1330 plist
[10] = intern_c_string (":ascii-compatible-p");
1331 plist
[11] = args
[charset_arg_ascii_compatible_p
];
1332 plist
[12] = intern_c_string (":code-offset");
1333 plist
[13] = args
[charset_arg_code_offset
];
1335 args
[charset_arg_plist
] = Flist (14, plist
);
1336 Fdefine_charset_internal (charset_arg_max
, args
);
1338 return XINT (CHARSET_SYMBOL_ID (name
));
1342 DEFUN ("define-charset-alias", Fdefine_charset_alias
,
1343 Sdefine_charset_alias
, 2, 2, 0,
1344 doc
: /* Define ALIAS as an alias for charset CHARSET. */)
1346 Lisp_Object alias
, charset
;
1350 CHECK_CHARSET_GET_ATTR (charset
, attr
);
1351 Fputhash (alias
, attr
, Vcharset_hash_table
);
1352 Vcharset_list
= Fcons (alias
, Vcharset_list
);
1357 DEFUN ("charset-plist", Fcharset_plist
, Scharset_plist
, 1, 1, 0,
1358 doc
: /* Return the property list of CHARSET. */)
1360 Lisp_Object charset
;
1364 CHECK_CHARSET_GET_ATTR (charset
, attrs
);
1365 return CHARSET_ATTR_PLIST (attrs
);
1369 DEFUN ("set-charset-plist", Fset_charset_plist
, Sset_charset_plist
, 2, 2, 0,
1370 doc
: /* Set CHARSET's property list to PLIST. */)
1372 Lisp_Object charset
, plist
;
1376 CHECK_CHARSET_GET_ATTR (charset
, attrs
);
1377 CHARSET_ATTR_PLIST (attrs
) = plist
;
1382 DEFUN ("unify-charset", Funify_charset
, Sunify_charset
, 1, 3, 0,
1383 doc
: /* Unify characters of CHARSET with Unicode.
1384 This means reading the relevant file and installing the table defined
1385 by CHARSET's `:unify-map' property.
1387 Optional second arg UNIFY-MAP is a file name string or a vector. It has
1388 the same meaning as the `:unify-map' attribute in the function
1389 `define-charset' (which see).
1391 Optional third argument DEUNIFY, if non-nil, means to de-unify CHARSET. */)
1392 (charset
, unify_map
, deunify
)
1393 Lisp_Object charset
, unify_map
, deunify
;
1398 CHECK_CHARSET_GET_ID (charset
, id
);
1399 cs
= CHARSET_FROM_ID (id
);
1401 ? CHARSET_UNIFIED_P (cs
) && ! NILP (CHARSET_DEUNIFIER (cs
))
1402 : ! CHARSET_UNIFIED_P (cs
))
1405 CHARSET_UNIFIED_P (cs
) = 0;
1408 if (CHARSET_METHOD (cs
) != CHARSET_METHOD_OFFSET
1409 || CHARSET_CODE_OFFSET (cs
) < 0x110000)
1410 error ("Can't unify charset: %s", SDATA (SYMBOL_NAME (charset
)));
1411 if (NILP (unify_map
))
1412 unify_map
= CHARSET_UNIFY_MAP (cs
);
1415 if (! STRINGP (unify_map
) && ! VECTORP (unify_map
))
1416 signal_error ("Bad unify-map", unify_map
);
1417 CHARSET_UNIFY_MAP (cs
) = unify_map
;
1419 if (NILP (Vchar_unify_table
))
1420 Vchar_unify_table
= Fmake_char_table (Qnil
, Qnil
);
1421 char_table_set_range (Vchar_unify_table
,
1422 cs
->min_char
, cs
->max_char
, charset
);
1423 CHARSET_UNIFIED_P (cs
) = 1;
1425 else if (CHAR_TABLE_P (Vchar_unify_table
))
1427 int min_code
= CHARSET_MIN_CODE (cs
);
1428 int max_code
= CHARSET_MAX_CODE (cs
);
1429 int min_char
= DECODE_CHAR (cs
, min_code
);
1430 int max_char
= DECODE_CHAR (cs
, max_code
);
1432 char_table_set_range (Vchar_unify_table
, min_char
, max_char
, Qnil
);
1438 DEFUN ("get-unused-iso-final-char", Fget_unused_iso_final_char
,
1439 Sget_unused_iso_final_char
, 2, 2, 0,
1441 Return an unused ISO final char for a charset of DIMENSION and CHARS.
1442 DIMENSION is the number of bytes to represent a character: 1 or 2.
1443 CHARS is the number of characters in a dimension: 94 or 96.
1445 This final char is for private use, thus the range is `0' (48) .. `?' (63).
1446 If there's no unused final char for the specified kind of charset,
1449 Lisp_Object dimension
, chars
;
1453 CHECK_NUMBER (dimension
);
1454 CHECK_NUMBER (chars
);
1455 if (XINT (dimension
) != 1 && XINT (dimension
) != 2 && XINT (dimension
) != 3)
1456 args_out_of_range_3 (dimension
, make_number (1), make_number (3));
1457 if (XINT (chars
) != 94 && XINT (chars
) != 96)
1458 args_out_of_range_3 (chars
, make_number (94), make_number (96));
1459 for (final_char
= '0'; final_char
<= '?'; final_char
++)
1460 if (ISO_CHARSET_TABLE (XINT (dimension
), XINT (chars
), final_char
) < 0)
1462 return (final_char
<= '?' ? make_number (final_char
) : Qnil
);
1466 check_iso_charset_parameter (Lisp_Object dimension
, Lisp_Object chars
, Lisp_Object final_char
)
1468 CHECK_NATNUM (dimension
);
1469 CHECK_NATNUM (chars
);
1470 CHECK_NATNUM (final_char
);
1472 if (XINT (dimension
) > 3)
1473 error ("Invalid DIMENSION %d, it should be 1, 2, or 3", XINT (dimension
));
1474 if (XINT (chars
) != 94 && XINT (chars
) != 96)
1475 error ("Invalid CHARS %d, it should be 94 or 96", XINT (chars
));
1476 if (XINT (final_char
) < '0' || XINT (final_char
) > '~')
1477 error ("Invalid FINAL-CHAR %c, it should be `0'..`~'", XINT (chars
));
1481 DEFUN ("declare-equiv-charset", Fdeclare_equiv_charset
, Sdeclare_equiv_charset
,
1483 doc
: /* Declare an equivalent charset for ISO-2022 decoding.
1485 On decoding by an ISO-2022 base coding system, when a charset
1486 specified by DIMENSION, CHARS, and FINAL-CHAR is designated, behave as
1487 if CHARSET is designated instead. */)
1488 (dimension
, chars
, final_char
, charset
)
1489 Lisp_Object dimension
, chars
, final_char
, charset
;
1494 CHECK_CHARSET_GET_ID (charset
, id
);
1495 check_iso_charset_parameter (dimension
, chars
, final_char
);
1496 chars_flag
= XINT (chars
) == 96;
1497 ISO_CHARSET_TABLE (XINT (dimension
), chars_flag
, XINT (final_char
)) = id
;
1502 /* Return information about charsets in the text at PTR of NBYTES
1503 bytes, which are NCHARS characters. The value is:
1505 0: Each character is represented by one byte. This is always
1506 true for a unibyte string. For a multibyte string, true if
1507 it contains only ASCII characters.
1509 1: No charsets other than ascii, control-1, and latin-1 are
1516 string_xstring_p (Lisp_Object string
)
1518 const unsigned char *p
= SDATA (string
);
1519 const unsigned char *endp
= p
+ SBYTES (string
);
1521 if (SCHARS (string
) == SBYTES (string
))
1526 int c
= STRING_CHAR_ADVANCE (p
);
1535 /* Find charsets in the string at PTR of NCHARS and NBYTES.
1537 CHARSETS is a vector. If Nth element is non-nil, it means the
1538 charset whose id is N is already found.
1540 It may lookup a translation table TABLE if supplied. */
1543 find_charsets_in_text (const unsigned char *ptr
, EMACS_INT nchars
, EMACS_INT nbytes
, Lisp_Object charsets
, Lisp_Object table
, int multibyte
)
1545 const unsigned char *pend
= ptr
+ nbytes
;
1547 if (nchars
== nbytes
)
1550 ASET (charsets
, charset_ascii
, Qt
);
1557 c
= translate_char (table
, c
);
1558 if (ASCII_BYTE_P (c
))
1559 ASET (charsets
, charset_ascii
, Qt
);
1561 ASET (charsets
, charset_eight_bit
, Qt
);
1568 int c
= STRING_CHAR_ADVANCE (ptr
);
1569 struct charset
*charset
;
1572 c
= translate_char (table
, c
);
1573 charset
= CHAR_CHARSET (c
);
1574 ASET (charsets
, CHARSET_ID (charset
), Qt
);
1579 DEFUN ("find-charset-region", Ffind_charset_region
, Sfind_charset_region
,
1581 doc
: /* Return a list of charsets in the region between BEG and END.
1582 BEG and END are buffer positions.
1583 Optional arg TABLE if non-nil is a translation table to look up.
1585 If the current buffer is unibyte, the returned list may contain
1586 only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
1588 Lisp_Object beg
, end
, table
;
1590 Lisp_Object charsets
;
1591 EMACS_INT from
, from_byte
, to
, stop
, stop_byte
;
1594 int multibyte
= ! NILP (current_buffer
->enable_multibyte_characters
);
1596 validate_region (&beg
, &end
);
1597 from
= XFASTINT (beg
);
1598 stop
= to
= XFASTINT (end
);
1600 if (from
< GPT
&& GPT
< to
)
1603 stop_byte
= GPT_BYTE
;
1606 stop_byte
= CHAR_TO_BYTE (stop
);
1608 from_byte
= CHAR_TO_BYTE (from
);
1610 charsets
= Fmake_vector (make_number (charset_table_used
), Qnil
);
1613 find_charsets_in_text (BYTE_POS_ADDR (from_byte
), stop
- from
,
1614 stop_byte
- from_byte
, charsets
, table
,
1618 from
= stop
, from_byte
= stop_byte
;
1619 stop
= to
, stop_byte
= CHAR_TO_BYTE (stop
);
1626 for (i
= charset_table_used
- 1; i
>= 0; i
--)
1627 if (!NILP (AREF (charsets
, i
)))
1628 val
= Fcons (CHARSET_NAME (charset_table
+ i
), val
);
1632 DEFUN ("find-charset-string", Ffind_charset_string
, Sfind_charset_string
,
1634 doc
: /* Return a list of charsets in STR.
1635 Optional arg TABLE if non-nil is a translation table to look up.
1637 If STR is unibyte, the returned list may contain
1638 only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
1640 Lisp_Object str
, table
;
1642 Lisp_Object charsets
;
1648 charsets
= Fmake_vector (make_number (charset_table_used
), Qnil
);
1649 find_charsets_in_text (SDATA (str
), SCHARS (str
), SBYTES (str
),
1651 STRING_MULTIBYTE (str
));
1653 for (i
= charset_table_used
- 1; i
>= 0; i
--)
1654 if (!NILP (AREF (charsets
, i
)))
1655 val
= Fcons (CHARSET_NAME (charset_table
+ i
), val
);
1661 /* Return a unified character code for C (>= 0x110000). VAL is a
1662 value of Vchar_unify_table for C; i.e. it is nil, an integer, or a
1665 maybe_unify_char (int c
, Lisp_Object val
)
1667 struct charset
*charset
;
1674 CHECK_CHARSET_GET_CHARSET (val
, charset
);
1675 load_charset (charset
, 1);
1676 if (! inhibit_load_charset_map
)
1678 val
= CHAR_TABLE_REF (Vchar_unify_table
, c
);
1684 int code_index
= c
- CHARSET_CODE_OFFSET (charset
);
1685 int unified
= GET_TEMP_CHARSET_WORK_DECODER (code_index
);
1694 /* Return a character correponding to the code-point CODE of
1698 decode_char (struct charset
*charset
, unsigned int code
)
1701 enum charset_method method
= CHARSET_METHOD (charset
);
1703 if (code
< CHARSET_MIN_CODE (charset
) || code
> CHARSET_MAX_CODE (charset
))
1706 if (method
== CHARSET_METHOD_SUBSET
)
1708 Lisp_Object subset_info
;
1710 subset_info
= CHARSET_SUBSET (charset
);
1711 charset
= CHARSET_FROM_ID (XFASTINT (AREF (subset_info
, 0)));
1712 code
-= XINT (AREF (subset_info
, 3));
1713 if (code
< XFASTINT (AREF (subset_info
, 1))
1714 || code
> XFASTINT (AREF (subset_info
, 2)))
1717 c
= DECODE_CHAR (charset
, code
);
1719 else if (method
== CHARSET_METHOD_SUPERSET
)
1721 Lisp_Object parents
;
1723 parents
= CHARSET_SUPERSET (charset
);
1725 for (; CONSP (parents
); parents
= XCDR (parents
))
1727 int id
= XINT (XCAR (XCAR (parents
)));
1728 int code_offset
= XINT (XCDR (XCAR (parents
)));
1729 unsigned this_code
= code
- code_offset
;
1731 charset
= CHARSET_FROM_ID (id
);
1732 if ((c
= DECODE_CHAR (charset
, this_code
)) >= 0)
1738 char_index
= CODE_POINT_TO_INDEX (charset
, code
);
1742 if (method
== CHARSET_METHOD_MAP
)
1744 Lisp_Object decoder
;
1746 decoder
= CHARSET_DECODER (charset
);
1747 if (! VECTORP (decoder
))
1749 load_charset (charset
, 1);
1750 decoder
= CHARSET_DECODER (charset
);
1752 if (VECTORP (decoder
))
1753 c
= XINT (AREF (decoder
, char_index
));
1755 c
= GET_TEMP_CHARSET_WORK_DECODER (char_index
);
1757 else /* method == CHARSET_METHOD_OFFSET */
1759 c
= char_index
+ CHARSET_CODE_OFFSET (charset
);
1760 if (CHARSET_UNIFIED_P (charset
)
1761 && c
> MAX_UNICODE_CHAR
)
1762 MAYBE_UNIFY_CHAR (c
);
1769 /* Variable used temporarily by the macro ENCODE_CHAR. */
1770 Lisp_Object charset_work
;
1772 /* Return a code-point of CHAR in CHARSET. If CHAR doesn't belong to
1773 CHARSET, return CHARSET_INVALID_CODE (CHARSET). If STRICT is true,
1774 use CHARSET's strict_max_char instead of max_char. */
1777 encode_char (struct charset
*charset
, int c
)
1780 enum charset_method method
= CHARSET_METHOD (charset
);
1782 if (CHARSET_UNIFIED_P (charset
))
1784 Lisp_Object deunifier
;
1785 int code_index
= -1;
1787 deunifier
= CHARSET_DEUNIFIER (charset
);
1788 if (! CHAR_TABLE_P (deunifier
))
1790 load_charset (charset
, 2);
1791 deunifier
= CHARSET_DEUNIFIER (charset
);
1793 if (CHAR_TABLE_P (deunifier
))
1795 Lisp_Object deunified
= CHAR_TABLE_REF (deunifier
, c
);
1797 if (INTEGERP (deunified
))
1798 code_index
= XINT (deunified
);
1802 code_index
= GET_TEMP_CHARSET_WORK_ENCODER (c
);
1804 if (code_index
>= 0)
1805 c
= CHARSET_CODE_OFFSET (charset
) + code_index
;
1808 if (method
== CHARSET_METHOD_SUBSET
)
1810 Lisp_Object subset_info
;
1811 struct charset
*this_charset
;
1813 subset_info
= CHARSET_SUBSET (charset
);
1814 this_charset
= CHARSET_FROM_ID (XFASTINT (AREF (subset_info
, 0)));
1815 code
= ENCODE_CHAR (this_charset
, c
);
1816 if (code
== CHARSET_INVALID_CODE (this_charset
)
1817 || code
< XFASTINT (AREF (subset_info
, 1))
1818 || code
> XFASTINT (AREF (subset_info
, 2)))
1819 return CHARSET_INVALID_CODE (charset
);
1820 code
+= XINT (AREF (subset_info
, 3));
1824 if (method
== CHARSET_METHOD_SUPERSET
)
1826 Lisp_Object parents
;
1828 parents
= CHARSET_SUPERSET (charset
);
1829 for (; CONSP (parents
); parents
= XCDR (parents
))
1831 int id
= XINT (XCAR (XCAR (parents
)));
1832 int code_offset
= XINT (XCDR (XCAR (parents
)));
1833 struct charset
*this_charset
= CHARSET_FROM_ID (id
);
1835 code
= ENCODE_CHAR (this_charset
, c
);
1836 if (code
!= CHARSET_INVALID_CODE (this_charset
))
1837 return code
+ code_offset
;
1839 return CHARSET_INVALID_CODE (charset
);
1842 if (! CHARSET_FAST_MAP_REF ((c
), charset
->fast_map
)
1843 || c
< CHARSET_MIN_CHAR (charset
) || c
> CHARSET_MAX_CHAR (charset
))
1844 return CHARSET_INVALID_CODE (charset
);
1846 if (method
== CHARSET_METHOD_MAP
)
1848 Lisp_Object encoder
;
1851 encoder
= CHARSET_ENCODER (charset
);
1852 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset
)))
1854 load_charset (charset
, 2);
1855 encoder
= CHARSET_ENCODER (charset
);
1857 if (CHAR_TABLE_P (encoder
))
1859 val
= CHAR_TABLE_REF (encoder
, c
);
1861 return CHARSET_INVALID_CODE (charset
);
1863 if (! CHARSET_COMPACT_CODES_P (charset
))
1864 code
= INDEX_TO_CODE_POINT (charset
, code
);
1868 code
= GET_TEMP_CHARSET_WORK_ENCODER (c
);
1869 code
= INDEX_TO_CODE_POINT (charset
, code
);
1872 else /* method == CHARSET_METHOD_OFFSET */
1874 int code_index
= c
- CHARSET_CODE_OFFSET (charset
);
1876 code
= INDEX_TO_CODE_POINT (charset
, code_index
);
1883 DEFUN ("decode-char", Fdecode_char
, Sdecode_char
, 2, 3, 0,
1884 doc
: /* Decode the pair of CHARSET and CODE-POINT into a character.
1885 Return nil if CODE-POINT is not valid in CHARSET.
1887 CODE-POINT may be a cons (HIGHER-16-BIT-VALUE . LOWER-16-BIT-VALUE).
1889 Optional argument RESTRICTION specifies a way to map the pair of CCS
1890 and CODE-POINT to a character. Currently not supported and just ignored. */)
1891 (charset
, code_point
, restriction
)
1892 Lisp_Object charset
, code_point
, restriction
;
1896 struct charset
*charsetp
;
1898 CHECK_CHARSET_GET_ID (charset
, id
);
1899 if (CONSP (code_point
))
1901 CHECK_NATNUM_CAR (code_point
);
1902 CHECK_NATNUM_CDR (code_point
);
1903 code
= (XINT (XCAR (code_point
)) << 16) | (XINT (XCDR (code_point
)));
1907 CHECK_NATNUM (code_point
);
1908 code
= XINT (code_point
);
1910 charsetp
= CHARSET_FROM_ID (id
);
1911 c
= DECODE_CHAR (charsetp
, code
);
1912 return (c
>= 0 ? make_number (c
) : Qnil
);
1916 DEFUN ("encode-char", Fencode_char
, Sencode_char
, 2, 3, 0,
1917 doc
: /* Encode the character CH into a code-point of CHARSET.
1918 Return nil if CHARSET doesn't include CH.
1920 Optional argument RESTRICTION specifies a way to map CH to a
1921 code-point in CCS. Currently not supported and just ignored. */)
1922 (ch
, charset
, restriction
)
1923 Lisp_Object ch
, charset
, restriction
;
1927 struct charset
*charsetp
;
1929 CHECK_CHARSET_GET_ID (charset
, id
);
1931 charsetp
= CHARSET_FROM_ID (id
);
1932 code
= ENCODE_CHAR (charsetp
, XINT (ch
));
1933 if (code
== CHARSET_INVALID_CODE (charsetp
))
1935 if (code
> 0x7FFFFFF)
1936 return Fcons (make_number (code
>> 16), make_number (code
& 0xFFFF));
1937 return make_number (code
);
1941 DEFUN ("make-char", Fmake_char
, Smake_char
, 1, 5, 0,
1943 /* Return a character of CHARSET whose position codes are CODEn.
1945 CODE1 through CODE4 are optional, but if you don't supply sufficient
1946 position codes, it is assumed that the minimum code in each dimension
1948 (charset
, code1
, code2
, code3
, code4
)
1949 Lisp_Object charset
, code1
, code2
, code3
, code4
;
1952 struct charset
*charsetp
;
1956 CHECK_CHARSET_GET_ID (charset
, id
);
1957 charsetp
= CHARSET_FROM_ID (id
);
1959 dimension
= CHARSET_DIMENSION (charsetp
);
1961 code
= (CHARSET_ASCII_COMPATIBLE_P (charsetp
)
1962 ? 0 : CHARSET_MIN_CODE (charsetp
));
1965 CHECK_NATNUM (code1
);
1966 if (XFASTINT (code1
) >= 0x100)
1967 args_out_of_range (make_number (0xFF), code1
);
1968 code
= XFASTINT (code1
);
1974 code
|= charsetp
->code_space
[(dimension
- 2) * 4];
1977 CHECK_NATNUM (code2
);
1978 if (XFASTINT (code2
) >= 0x100)
1979 args_out_of_range (make_number (0xFF), code2
);
1980 code
|= XFASTINT (code2
);
1987 code
|= charsetp
->code_space
[(dimension
- 3) * 4];
1990 CHECK_NATNUM (code3
);
1991 if (XFASTINT (code3
) >= 0x100)
1992 args_out_of_range (make_number (0xFF), code3
);
1993 code
|= XFASTINT (code3
);
2000 code
|= charsetp
->code_space
[0];
2003 CHECK_NATNUM (code4
);
2004 if (XFASTINT (code4
) >= 0x100)
2005 args_out_of_range (make_number (0xFF), code4
);
2006 code
|= XFASTINT (code4
);
2013 if (CHARSET_ISO_FINAL (charsetp
) >= 0)
2015 c
= DECODE_CHAR (charsetp
, code
);
2017 error ("Invalid code(s)");
2018 return make_number (c
);
2022 /* Return the first charset in CHARSET_LIST that contains C.
2023 CHARSET_LIST is a list of charset IDs. If it is nil, use
2024 Vcharset_ordered_list. */
2027 char_charset (int c
, Lisp_Object charset_list
, unsigned int *code_return
)
2031 if (NILP (charset_list
))
2032 charset_list
= Vcharset_ordered_list
;
2036 while (CONSP (charset_list
))
2038 struct charset
*charset
= CHARSET_FROM_ID (XINT (XCAR (charset_list
)));
2039 unsigned code
= ENCODE_CHAR (charset
, c
);
2041 if (code
!= CHARSET_INVALID_CODE (charset
))
2044 *code_return
= code
;
2047 charset_list
= XCDR (charset_list
);
2049 && c
<= MAX_UNICODE_CHAR
2050 && EQ (charset_list
, Vcharset_non_preferred_head
))
2051 return CHARSET_FROM_ID (charset_unicode
);
2053 return (maybe_null
? NULL
2054 : c
<= MAX_5_BYTE_CHAR
? CHARSET_FROM_ID (charset_emacs
)
2055 : CHARSET_FROM_ID (charset_eight_bit
));
2059 DEFUN ("split-char", Fsplit_char
, Ssplit_char
, 1, 1, 0,
2061 /*Return list of charset and one to four position-codes of CH.
2062 The charset is decided by the current priority order of charsets.
2063 A position-code is a byte value of each dimension of the code-point of
2064 CH in the charset. */)
2068 struct charset
*charset
;
2073 CHECK_CHARACTER (ch
);
2075 charset
= CHAR_CHARSET (c
);
2078 code
= ENCODE_CHAR (charset
, c
);
2079 if (code
== CHARSET_INVALID_CODE (charset
))
2081 dimension
= CHARSET_DIMENSION (charset
);
2082 for (val
= Qnil
; dimension
> 0; dimension
--)
2084 val
= Fcons (make_number (code
& 0xFF), val
);
2087 return Fcons (CHARSET_NAME (charset
), val
);
2091 DEFUN ("char-charset", Fchar_charset
, Schar_charset
, 1, 2, 0,
2092 doc
: /* Return the charset of highest priority that contains CH.
2093 If optional 2nd arg RESTRICTION is non-nil, it is a list of charsets
2094 from which to find the charset. It may also be a coding system. In
2095 that case, find the charset from what supported by that coding system. */)
2097 Lisp_Object ch
, restriction
;
2099 struct charset
*charset
;
2101 CHECK_CHARACTER (ch
);
2102 if (NILP (restriction
))
2103 charset
= CHAR_CHARSET (XINT (ch
));
2106 Lisp_Object charset_list
;
2108 if (CONSP (restriction
))
2110 for (charset_list
= Qnil
; CONSP (restriction
);
2111 restriction
= XCDR (restriction
))
2115 CHECK_CHARSET_GET_ID (XCAR (restriction
), id
);
2116 charset_list
= Fcons (make_number (id
), charset_list
);
2118 charset_list
= Fnreverse (charset_list
);
2121 charset_list
= coding_system_charset_list (restriction
);
2122 charset
= char_charset (XINT (ch
), charset_list
, NULL
);
2126 return (CHARSET_NAME (charset
));
2130 DEFUN ("charset-after", Fcharset_after
, Scharset_after
, 0, 1, 0,
2132 Return charset of a character in the current buffer at position POS.
2133 If POS is nil, it defauls to the current point.
2134 If POS is out of range, the value is nil. */)
2139 struct charset
*charset
;
2141 ch
= Fchar_after (pos
);
2142 if (! INTEGERP (ch
))
2144 charset
= CHAR_CHARSET (XINT (ch
));
2145 return (CHARSET_NAME (charset
));
2149 DEFUN ("iso-charset", Fiso_charset
, Siso_charset
, 3, 3, 0,
2151 Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR.
2153 ISO 2022's designation sequence (escape sequence) distinguishes charsets
2154 by their DIMENSION, CHARS, and FINAL-CHAR,
2155 whereas Emacs distinguishes them by charset symbol.
2156 See the documentation of the function `charset-info' for the meanings of
2157 DIMENSION, CHARS, and FINAL-CHAR. */)
2158 (dimension
, chars
, final_char
)
2159 Lisp_Object dimension
, chars
, final_char
;
2164 check_iso_charset_parameter (dimension
, chars
, final_char
);
2165 chars_flag
= XFASTINT (chars
) == 96;
2166 id
= ISO_CHARSET_TABLE (XFASTINT (dimension
), chars_flag
,
2167 XFASTINT (final_char
));
2168 return (id
>= 0 ? CHARSET_NAME (CHARSET_FROM_ID (id
)) : Qnil
);
2172 DEFUN ("clear-charset-maps", Fclear_charset_maps
, Sclear_charset_maps
,
2176 Clear temporary charset mapping tables.
2177 It should be called only from temacs invoked for dumping. */)
2180 if (temp_charset_work
)
2182 free (temp_charset_work
);
2183 temp_charset_work
= NULL
;
2186 if (CHAR_TABLE_P (Vchar_unify_table
))
2187 Foptimize_char_table (Vchar_unify_table
, Qnil
);
2192 DEFUN ("charset-priority-list", Fcharset_priority_list
,
2193 Scharset_priority_list
, 0, 1, 0,
2194 doc
: /* Return the list of charsets ordered by priority.
2195 HIGHESTP non-nil means just return the highest priority one. */)
2197 Lisp_Object highestp
;
2199 Lisp_Object val
= Qnil
, list
= Vcharset_ordered_list
;
2201 if (!NILP (highestp
))
2202 return CHARSET_NAME (CHARSET_FROM_ID (XINT (Fcar (list
))));
2204 while (!NILP (list
))
2206 val
= Fcons (CHARSET_NAME (CHARSET_FROM_ID (XINT (XCAR (list
)))), val
);
2209 return Fnreverse (val
);
2212 DEFUN ("set-charset-priority", Fset_charset_priority
, Sset_charset_priority
,
2214 doc
: /* Assign higher priority to the charsets given as arguments.
2215 usage: (set-charset-priority &rest charsets) */)
2220 Lisp_Object new_head
, old_list
, arglist
[2];
2221 Lisp_Object list_2022
, list_emacs_mule
;
2224 old_list
= Fcopy_sequence (Vcharset_ordered_list
);
2226 for (i
= 0; i
< nargs
; i
++)
2228 CHECK_CHARSET_GET_ID (args
[i
], id
);
2229 if (! NILP (Fmemq (make_number (id
), old_list
)))
2231 old_list
= Fdelq (make_number (id
), old_list
);
2232 new_head
= Fcons (make_number (id
), new_head
);
2235 arglist
[0] = Fnreverse (new_head
);
2236 arglist
[1] = Vcharset_non_preferred_head
= old_list
;
2237 Vcharset_ordered_list
= Fnconc (2, arglist
);
2238 charset_ordered_list_tick
++;
2240 charset_unibyte
= -1;
2241 for (old_list
= Vcharset_ordered_list
, list_2022
= list_emacs_mule
= Qnil
;
2242 CONSP (old_list
); old_list
= XCDR (old_list
))
2244 if (! NILP (Fmemq (XCAR (old_list
), Viso_2022_charset_list
)))
2245 list_2022
= Fcons (XCAR (old_list
), list_2022
);
2246 if (! NILP (Fmemq (XCAR (old_list
), Vemacs_mule_charset_list
)))
2247 list_emacs_mule
= Fcons (XCAR (old_list
), list_emacs_mule
);
2248 if (charset_unibyte
< 0)
2250 struct charset
*charset
= CHARSET_FROM_ID (XINT (XCAR (old_list
)));
2252 if (CHARSET_DIMENSION (charset
) == 1
2253 && CHARSET_ASCII_COMPATIBLE_P (charset
)
2254 && CHARSET_MAX_CHAR (charset
) >= 0x80)
2255 charset_unibyte
= CHARSET_ID (charset
);
2258 Viso_2022_charset_list
= Fnreverse (list_2022
);
2259 Vemacs_mule_charset_list
= Fnreverse (list_emacs_mule
);
2260 if (charset_unibyte
< 0)
2261 charset_unibyte
= charset_iso_8859_1
;
2266 DEFUN ("charset-id-internal", Fcharset_id_internal
, Scharset_id_internal
,
2268 doc
: /* Internal use only.
2269 Return charset identification number of CHARSET. */)
2271 Lisp_Object charset
;
2275 CHECK_CHARSET_GET_ID (charset
, id
);
2276 return make_number (id
);
2283 Lisp_Object tempdir
;
2284 tempdir
= Fexpand_file_name (build_string ("charsets"), Vdata_directory
);
2285 if (access ((char *) SDATA (tempdir
), 0) < 0)
2287 dir_warning ("Error: charsets directory (%s) does not exist.\n\
2288 Emacs will not function correctly without the character map files.\n\
2289 Please check your installation!\n",
2291 /* TODO should this be a fatal error? (Bug#909) */
2294 Vcharset_map_path
= Fcons (tempdir
, Qnil
);
2299 init_charset_once (void)
2303 for (i
= 0; i
< ISO_MAX_DIMENSION
; i
++)
2304 for (j
= 0; j
< ISO_MAX_CHARS
; j
++)
2305 for (k
= 0; k
< ISO_MAX_FINAL
; k
++)
2306 iso_charset_table
[i
][j
][k
] = -1;
2308 for (i
= 0; i
< 256; i
++)
2309 emacs_mule_charset
[i
] = NULL
;
2311 charset_jisx0201_roman
= -1;
2312 charset_jisx0208_1978
= -1;
2313 charset_jisx0208
= -1;
2314 charset_ksc5601
= -1;
2320 syms_of_charset (void)
2322 DEFSYM (Qcharsetp
, "charsetp");
2324 DEFSYM (Qascii
, "ascii");
2325 DEFSYM (Qunicode
, "unicode");
2326 DEFSYM (Qemacs
, "emacs");
2327 DEFSYM (Qeight_bit
, "eight-bit");
2328 DEFSYM (Qiso_8859_1
, "iso-8859-1");
2333 staticpro (&Vcharset_ordered_list
);
2334 Vcharset_ordered_list
= Qnil
;
2336 staticpro (&Viso_2022_charset_list
);
2337 Viso_2022_charset_list
= Qnil
;
2339 staticpro (&Vemacs_mule_charset_list
);
2340 Vemacs_mule_charset_list
= Qnil
;
2342 /* Don't staticpro them here. It's done in syms_of_fns. */
2343 QCtest
= intern (":test");
2344 Qeq
= intern ("eq");
2346 staticpro (&Vcharset_hash_table
);
2348 Lisp_Object args
[2];
2351 Vcharset_hash_table
= Fmake_hash_table (2, args
);
2354 charset_table_size
= 128;
2355 charset_table
= ((struct charset
*)
2356 xmalloc (sizeof (struct charset
) * charset_table_size
));
2357 charset_table_used
= 0;
2359 defsubr (&Scharsetp
);
2360 defsubr (&Smap_charset_chars
);
2361 defsubr (&Sdefine_charset_internal
);
2362 defsubr (&Sdefine_charset_alias
);
2363 defsubr (&Scharset_plist
);
2364 defsubr (&Sset_charset_plist
);
2365 defsubr (&Sunify_charset
);
2366 defsubr (&Sget_unused_iso_final_char
);
2367 defsubr (&Sdeclare_equiv_charset
);
2368 defsubr (&Sfind_charset_region
);
2369 defsubr (&Sfind_charset_string
);
2370 defsubr (&Sdecode_char
);
2371 defsubr (&Sencode_char
);
2372 defsubr (&Ssplit_char
);
2373 defsubr (&Smake_char
);
2374 defsubr (&Schar_charset
);
2375 defsubr (&Scharset_after
);
2376 defsubr (&Siso_charset
);
2377 defsubr (&Sclear_charset_maps
);
2378 defsubr (&Scharset_priority_list
);
2379 defsubr (&Sset_charset_priority
);
2380 defsubr (&Scharset_id_internal
);
2382 DEFVAR_LISP ("charset-map-path", &Vcharset_map_path
,
2383 doc
: /* *List of directories to search for charset map files. */);
2384 Vcharset_map_path
= Qnil
;
2386 DEFVAR_BOOL ("inhibit-load-charset-map", &inhibit_load_charset_map
,
2387 doc
: /* Inhibit loading of charset maps. Used when dumping Emacs. */);
2388 inhibit_load_charset_map
= 0;
2390 DEFVAR_LISP ("charset-list", &Vcharset_list
,
2391 doc
: /* List of all charsets ever defined. */);
2392 Vcharset_list
= Qnil
;
2394 DEFVAR_LISP ("current-iso639-language", &Vcurrent_iso639_language
,
2395 doc
: /* ISO639 language mnemonic symbol for the current language environment.
2396 If the current language environment is for multiple languages (e.g. "Latin-1"),
2397 the value may be a list of mnemonics. */);
2398 Vcurrent_iso639_language
= Qnil
;
2401 = define_charset_internal (Qascii
, 1, "\x00\x7F\x00\x00\x00\x00",
2402 0, 127, 'B', -1, 0, 1, 0, 0);
2404 = define_charset_internal (Qiso_8859_1
, 1, "\x00\xFF\x00\x00\x00\x00",
2405 0, 255, -1, -1, -1, 1, 0, 0);
2407 = define_charset_internal (Qunicode
, 3, "\x00\xFF\x00\xFF\x00\x10",
2408 0, MAX_UNICODE_CHAR
, -1, 0, -1, 1, 0, 0);
2410 = define_charset_internal (Qemacs
, 3, "\x00\xFF\x00\xFF\x00\x3F",
2411 0, MAX_5_BYTE_CHAR
, -1, 0, -1, 1, 1, 0);
2413 = define_charset_internal (Qeight_bit
, 1, "\x80\xFF\x00\x00\x00\x00",
2414 128, 255, -1, 0, -1, 0, 1,
2415 MAX_5_BYTE_CHAR
+ 1);
2416 charset_unibyte
= charset_iso_8859_1
;
2421 /* arch-tag: 66a89b8d-4c28-47d3-9ca1-56f78440d69f
2422 (do not change this comment) */