1 /* Basic character set support.
2 Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007,
3 2008, 2009, 2010 Free Software Foundation, Inc.
4 Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
5 2005, 2006, 2007, 2008, 2009, 2010
6 National Institute of Advanced Industrial Science and Technology (AIST)
7 Registration Number H14PRO021
9 Copyright (C) 2003, 2004
10 National Institute of Advanced Industrial Science and Technology (AIST)
11 Registration Number H13PRO009
13 This file is part of GNU Emacs.
15 GNU Emacs is free software: you can redistribute it and/or modify
16 it under the terms of the GNU General Public License as published by
17 the Free Software Foundation, either version 3 of the License, or
18 (at your option) any later version.
20 GNU Emacs is distributed in the hope that it will be useful,
21 but WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 GNU General Public License for more details.
25 You should have received a copy of the GNU General Public License
26 along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
33 #include <sys/types.h>
36 #include "character.h"
42 /*** GENERAL NOTES on CODED CHARACTER SETS (CHARSETS) ***
44 A coded character set ("charset" hereafter) is a meaningful
45 collection (i.e. language, culture, functionality, etc.) of
46 characters. Emacs handles multiple charsets at once. In Emacs Lisp
47 code, a charset is represented by a symbol. In C code, a charset is
48 represented by its ID number or by a pointer to a struct charset.
50 The actual information about each charset is stored in two places.
51 Lispy information is stored in the hash table Vcharset_hash_table as
52 a vector (charset attributes). The other information is stored in
53 charset_table as a struct charset.
57 /* List of all charsets. This variable is used only from Emacs
59 Lisp_Object Vcharset_list
;
61 /* Hash table that contains attributes of each charset. Keys are
62 charset symbols, and values are vectors of charset attributes. */
63 Lisp_Object Vcharset_hash_table
;
65 /* Table of struct charset. */
66 struct charset
*charset_table
;
68 static int charset_table_size
;
69 static int charset_table_used
;
71 Lisp_Object Qcharsetp
;
73 /* Special charset symbols. */
75 Lisp_Object Qeight_bit
;
76 Lisp_Object Qiso_8859_1
;
80 /* The corresponding charsets. */
82 int charset_eight_bit
;
83 int charset_iso_8859_1
;
87 /* The other special charsets. */
88 int charset_jisx0201_roman
;
89 int charset_jisx0208_1978
;
93 /* Value of charset attribute `charset-iso-plane'. */
96 /* Charset of unibyte characters. */
99 /* List of charsets ordered by the priority. */
100 Lisp_Object Vcharset_ordered_list
;
102 /* Sub-list of Vcharset_ordered_list that contains all non-preferred
104 Lisp_Object Vcharset_non_preferred_head
;
106 /* Incremented everytime we change Vcharset_ordered_list. This is
107 unsigned short so that it fits in Lisp_Int and never matches
109 unsigned short charset_ordered_list_tick
;
111 /* List of iso-2022 charsets. */
112 Lisp_Object Viso_2022_charset_list
;
114 /* List of emacs-mule charsets. */
115 Lisp_Object Vemacs_mule_charset_list
;
117 struct charset
*emacs_mule_charset
[256];
119 /* Mapping table from ISO2022's charset (specified by DIMENSION,
120 CHARS, and FINAL-CHAR) to Emacs' charset. */
121 int iso_charset_table
[ISO_MAX_DIMENSION
][ISO_MAX_CHARS
][ISO_MAX_FINAL
];
123 Lisp_Object Vcharset_map_path
;
125 /* If nonzero, don't load charset maps. */
126 int inhibit_load_charset_map
;
128 Lisp_Object Vcurrent_iso639_language
;
130 /* Defined in chartab.c */
132 map_char_table_for_charset (void (*c_function
) (Lisp_Object
, Lisp_Object
),
133 Lisp_Object function
, Lisp_Object table
,
134 Lisp_Object arg
, struct charset
*charset
,
135 unsigned from
, unsigned to
);
137 #define CODE_POINT_TO_INDEX(charset, code) \
138 ((charset)->code_linear_p \
139 ? (code) - (charset)->min_code \
140 : (((charset)->code_space_mask[(code) >> 24] & 0x8) \
141 && ((charset)->code_space_mask[((code) >> 16) & 0xFF] & 0x4) \
142 && ((charset)->code_space_mask[((code) >> 8) & 0xFF] & 0x2) \
143 && ((charset)->code_space_mask[(code) & 0xFF] & 0x1)) \
144 ? (((((code) >> 24) - (charset)->code_space[12]) \
145 * (charset)->code_space[11]) \
146 + (((((code) >> 16) & 0xFF) - (charset)->code_space[8]) \
147 * (charset)->code_space[7]) \
148 + (((((code) >> 8) & 0xFF) - (charset)->code_space[4]) \
149 * (charset)->code_space[3]) \
150 + (((code) & 0xFF) - (charset)->code_space[0]) \
151 - ((charset)->char_index_offset)) \
155 /* Convert the character index IDX to code-point CODE for CHARSET.
156 It is assumed that IDX is in a valid range. */
158 #define INDEX_TO_CODE_POINT(charset, idx) \
159 ((charset)->code_linear_p \
160 ? (idx) + (charset)->min_code \
161 : (idx += (charset)->char_index_offset, \
162 (((charset)->code_space[0] + (idx) % (charset)->code_space[2]) \
163 | (((charset)->code_space[4] \
164 + ((idx) / (charset)->code_space[3] % (charset)->code_space[6])) \
166 | (((charset)->code_space[8] \
167 + ((idx) / (charset)->code_space[7] % (charset)->code_space[10])) \
169 | (((charset)->code_space[12] + ((idx) / (charset)->code_space[11])) \
172 /* Structure to hold mapping tables for a charset. Used by temacs
173 invoked for dumping. */
177 /* The current charset for which the following tables are setup. */
178 struct charset
*current
;
180 /* 1 iff the following table is used for encoder. */
183 /* When the following table is used for encoding, mininum and
184 maxinum character of the current charset. */
185 int min_char
, max_char
;
187 /* A Unicode character correspoinding to the code indice 0 (i.e. the
188 minimum code-point) of the current charset, or -1 if the code
189 indice 0 is not a Unicode character. This is checked when
190 table.encoder[CHAR] is zero. */
194 /* Table mapping code-indices (not code-points) of the current
195 charset to Unicode characters. If decoder[CHAR] is -1, CHAR
196 doesn't belong to the current charset. */
197 int decoder
[0x10000];
198 /* Table mapping Unicode characters to code-indices of the current
199 charset. The first 0x10000 elements are for BMP (0..0xFFFF),
200 and the last 0x10000 are for SMP (0x10000..0x1FFFF) or SIP
201 (0x20000..0x2FFFF). Note that there is no charset map that
202 uses both SMP and SIP. */
203 unsigned short encoder
[0x20000];
205 } *temp_charset_work
;
207 #define SET_TEMP_CHARSET_WORK_ENCODER(C, CODE) \
210 temp_charset_work->zero_index_char = (C); \
211 else if ((C) < 0x20000) \
212 temp_charset_work->table.encoder[(C)] = (CODE); \
214 temp_charset_work->table.encoder[(C) - 0x10000] = (CODE); \
217 #define GET_TEMP_CHARSET_WORK_ENCODER(C) \
218 ((C) == temp_charset_work->zero_index_char ? 0 \
219 : (C) < 0x20000 ? (temp_charset_work->table.encoder[(C)] \
220 ? (int) temp_charset_work->table.encoder[(C)] : -1) \
221 : temp_charset_work->table.encoder[(C) - 0x10000] \
222 ? temp_charset_work->table.encoder[(C) - 0x10000] : -1)
224 #define SET_TEMP_CHARSET_WORK_DECODER(C, CODE) \
225 (temp_charset_work->table.decoder[(CODE)] = (C))
227 #define GET_TEMP_CHARSET_WORK_DECODER(CODE) \
228 (temp_charset_work->table.decoder[(CODE)])
231 /* Set to 1 to warn that a charset map is loaded and thus a buffer
232 text and a string data may be relocated. */
233 int charset_map_loaded
;
235 struct charset_map_entries
241 struct charset_map_entries
*next
;
244 /* Load the mapping information of CHARSET from ENTRIES for
245 initializing (CONTROL_FLAG == 0), decoding (CONTROL_FLAG == 1), and
246 encoding (CONTROL_FLAG == 2).
248 If CONTROL_FLAG is 0, setup CHARSET->min_char, CHARSET->max_char,
249 and CHARSET->fast_map.
251 If CONTROL_FLAG is 1, setup the following tables according to
252 CHARSET->method and inhibit_load_charset_map.
254 CHARSET->method | inhibit_lcm == 0 | inhibit_lcm == 1
255 ----------------------+--------------------+---------------------------
256 CHARSET_METHOD_MAP | CHARSET->decoder | temp_charset_work->decoder
257 ----------------------+--------------------+---------------------------
258 CHARSET_METHOD_OFFSET | Vchar_unify_table | temp_charset_work->decoder
260 If CONTROL_FLAG is 2, setup the following tables.
262 CHARSET->method | inhibit_lcm == 0 | inhibit_lcm == 1
263 ----------------------+--------------------+---------------------------
264 CHARSET_METHOD_MAP | CHARSET->encoder | temp_charset_work->encoder
265 ----------------------+--------------------+--------------------------
266 CHARSET_METHOD_OFFSET | CHARSET->deunifier | temp_charset_work->encoder
270 load_charset_map (struct charset
*charset
, struct charset_map_entries
*entries
, int n_entries
, int control_flag
)
272 Lisp_Object vec
, table
;
273 unsigned max_code
= CHARSET_MAX_CODE (charset
);
274 int ascii_compatible_p
= charset
->ascii_compatible_p
;
275 int min_char
, max_char
, nonascii_min_char
;
277 unsigned char *fast_map
= charset
->fast_map
;
284 if (! inhibit_load_charset_map
)
286 if (control_flag
== 1)
288 if (charset
->method
== CHARSET_METHOD_MAP
)
290 int n
= CODE_POINT_TO_INDEX (charset
, max_code
) + 1;
292 vec
= CHARSET_DECODER (charset
)
293 = Fmake_vector (make_number (n
), make_number (-1));
297 char_table_set_range (Vchar_unify_table
,
298 charset
->min_char
, charset
->max_char
,
304 table
= Fmake_char_table (Qnil
, Qnil
);
305 if (charset
->method
== CHARSET_METHOD_MAP
)
306 CHARSET_ENCODER (charset
) = table
;
308 CHARSET_DEUNIFIER (charset
) = table
;
313 if (! temp_charset_work
)
314 temp_charset_work
= malloc (sizeof (*temp_charset_work
));
315 if (control_flag
== 1)
317 memset (temp_charset_work
->table
.decoder
, -1,
318 sizeof (int) * 0x10000);
322 memset (temp_charset_work
->table
.encoder
, 0,
323 sizeof (unsigned short) * 0x20000);
324 temp_charset_work
->zero_index_char
= -1;
326 temp_charset_work
->current
= charset
;
327 temp_charset_work
->for_encoder
= (control_flag
== 2);
330 charset_map_loaded
= 1;
333 min_char
= max_char
= entries
->entry
[0].c
;
334 nonascii_min_char
= MAX_CHAR
;
335 for (i
= 0; i
< n_entries
; i
++)
338 int from_index
, to_index
;
340 int idx
= i
% 0x10000;
342 if (i
> 0 && idx
== 0)
343 entries
= entries
->next
;
344 from
= entries
->entry
[idx
].from
;
345 to
= entries
->entry
[idx
].to
;
346 from_c
= entries
->entry
[idx
].c
;
347 from_index
= CODE_POINT_TO_INDEX (charset
, from
);
350 to_index
= from_index
;
355 to_index
= CODE_POINT_TO_INDEX (charset
, to
);
356 to_c
= from_c
+ (to_index
- from_index
);
358 if (from_index
< 0 || to_index
< 0)
363 else if (from_c
< min_char
)
366 if (control_flag
== 1)
368 if (charset
->method
== CHARSET_METHOD_MAP
)
369 for (; from_index
<= to_index
; from_index
++, from_c
++)
370 ASET (vec
, from_index
, make_number (from_c
));
372 for (; from_index
<= to_index
; from_index
++, from_c
++)
373 CHAR_TABLE_SET (Vchar_unify_table
,
374 CHARSET_CODE_OFFSET (charset
) + from_index
,
375 make_number (from_c
));
377 else if (control_flag
== 2)
379 if (charset
->method
== CHARSET_METHOD_MAP
380 && CHARSET_COMPACT_CODES_P (charset
))
381 for (; from_index
<= to_index
; from_index
++, from_c
++)
383 unsigned code
= INDEX_TO_CODE_POINT (charset
, from_index
);
385 if (NILP (CHAR_TABLE_REF (table
, from_c
)))
386 CHAR_TABLE_SET (table
, from_c
, make_number (code
));
389 for (; from_index
<= to_index
; from_index
++, from_c
++)
391 if (NILP (CHAR_TABLE_REF (table
, from_c
)))
392 CHAR_TABLE_SET (table
, from_c
, make_number (from_index
));
395 else if (control_flag
== 3)
396 for (; from_index
<= to_index
; from_index
++, from_c
++)
397 SET_TEMP_CHARSET_WORK_DECODER (from_c
, from_index
);
398 else if (control_flag
== 4)
399 for (; from_index
<= to_index
; from_index
++, from_c
++)
400 SET_TEMP_CHARSET_WORK_ENCODER (from_c
, from_index
);
401 else /* control_flag == 0 */
403 if (ascii_compatible_p
)
405 if (! ASCII_BYTE_P (from_c
))
407 if (from_c
< nonascii_min_char
)
408 nonascii_min_char
= from_c
;
410 else if (! ASCII_BYTE_P (to_c
))
412 nonascii_min_char
= 0x80;
416 for (; from_c
<= to_c
; from_c
++)
417 CHARSET_FAST_MAP_SET (from_c
, fast_map
);
421 if (control_flag
== 0)
423 CHARSET_MIN_CHAR (charset
) = (ascii_compatible_p
424 ? nonascii_min_char
: min_char
);
425 CHARSET_MAX_CHAR (charset
) = max_char
;
427 else if (control_flag
== 4)
429 temp_charset_work
->min_char
= min_char
;
430 temp_charset_work
->max_char
= max_char
;
435 /* Read a hexadecimal number (preceded by "0x") from the file FP while
436 paying attention to comment charcter '#'. */
438 static INLINE
unsigned
439 read_hex (FILE *fp
, int *eof
)
444 while ((c
= getc (fp
)) != EOF
)
448 while ((c
= getc (fp
)) != EOF
&& c
!= '\n');
452 if ((c
= getc (fp
)) == EOF
|| c
== 'x')
464 while ((c
= getc (fp
)) != EOF
&& isxdigit (c
))
466 | (c
<= '9' ? c
- '0' : c
<= 'F' ? c
- 'A' + 10 : c
- 'a' + 10));
468 while ((c
= getc (fp
)) != EOF
&& isdigit (c
))
469 n
= (n
* 10) + c
- '0';
475 extern Lisp_Object Qfile_name_handler_alist
;
477 /* Return a mapping vector for CHARSET loaded from MAPFILE.
478 Each line of MAPFILE has this form
480 where 0xAAAA is a code-point and 0xCCCC is the corresponding
481 character code, or this form
483 where 0xAAAA and 0xBBBB are code-points specifying a range, and
484 0xCCCC is the first character code of the range.
486 The returned vector has this form:
487 [ CODE1 CHAR1 CODE2 CHAR2 .... ]
488 where CODE1 is a code-point or a cons of code-points specifying a
491 Note that this function uses `openp' to open MAPFILE but ignores
492 `file-name-handler-alist' to avoid running any Lisp code. */
494 extern void add_to_log (char *, Lisp_Object
, Lisp_Object
);
497 load_charset_map_from_file (struct charset
*charset
, Lisp_Object mapfile
, int control_flag
)
499 unsigned min_code
= CHARSET_MIN_CODE (charset
);
500 unsigned max_code
= CHARSET_MAX_CODE (charset
);
504 Lisp_Object suffixes
;
505 struct charset_map_entries
*head
, *entries
;
506 int n_entries
, count
;
509 suffixes
= Fcons (build_string (".map"),
510 Fcons (build_string (".TXT"), Qnil
));
512 count
= SPECPDL_INDEX ();
513 specbind (Qfile_name_handler_alist
, Qnil
);
514 fd
= openp (Vcharset_map_path
, mapfile
, suffixes
, NULL
, Qnil
);
515 unbind_to (count
, Qnil
);
517 || ! (fp
= fdopen (fd
, "r")))
518 error ("Failure in loading charset map: %S", SDATA (mapfile
));
520 /* Use SAFE_ALLOCA instead of alloca, as `charset_map_entries' is
521 large (larger than MAX_ALLOCA). */
522 SAFE_ALLOCA (head
, struct charset_map_entries
*,
523 sizeof (struct charset_map_entries
));
525 bzero (entries
, sizeof (struct charset_map_entries
));
535 from
= read_hex (fp
, &eof
);
538 if (getc (fp
) == '-')
539 to
= read_hex (fp
, &eof
);
542 c
= (int) read_hex (fp
, &eof
);
544 if (from
< min_code
|| to
> max_code
|| from
> to
|| c
> MAX_CHAR
)
547 if (n_entries
> 0 && (n_entries
% 0x10000) == 0)
549 SAFE_ALLOCA (entries
->next
, struct charset_map_entries
*,
550 sizeof (struct charset_map_entries
));
551 entries
= entries
->next
;
552 bzero (entries
, sizeof (struct charset_map_entries
));
554 idx
= n_entries
% 0x10000;
555 entries
->entry
[idx
].from
= from
;
556 entries
->entry
[idx
].to
= to
;
557 entries
->entry
[idx
].c
= c
;
562 load_charset_map (charset
, head
, n_entries
, control_flag
);
567 load_charset_map_from_vector (struct charset
*charset
, Lisp_Object vec
, int control_flag
)
569 unsigned min_code
= CHARSET_MIN_CODE (charset
);
570 unsigned max_code
= CHARSET_MAX_CODE (charset
);
571 struct charset_map_entries
*head
, *entries
;
573 int len
= ASIZE (vec
);
579 add_to_log ("Failure in loading charset map: %V", vec
, Qnil
);
583 /* Use SAFE_ALLOCA instead of alloca, as `charset_map_entries' is
584 large (larger than MAX_ALLOCA). */
585 SAFE_ALLOCA (head
, struct charset_map_entries
*,
586 sizeof (struct charset_map_entries
));
588 bzero (entries
, sizeof (struct charset_map_entries
));
591 for (i
= 0; i
< len
; i
+= 2)
593 Lisp_Object val
, val2
;
605 from
= XFASTINT (val
);
606 to
= XFASTINT (val2
);
611 from
= to
= XFASTINT (val
);
613 val
= AREF (vec
, i
+ 1);
617 if (from
< min_code
|| to
> max_code
|| from
> to
|| c
> MAX_CHAR
)
620 if (n_entries
> 0 && (n_entries
% 0x10000) == 0)
622 SAFE_ALLOCA (entries
->next
, struct charset_map_entries
*,
623 sizeof (struct charset_map_entries
));
624 entries
= entries
->next
;
625 bzero (entries
, sizeof (struct charset_map_entries
));
627 idx
= n_entries
% 0x10000;
628 entries
->entry
[idx
].from
= from
;
629 entries
->entry
[idx
].to
= to
;
630 entries
->entry
[idx
].c
= c
;
634 load_charset_map (charset
, head
, n_entries
, control_flag
);
639 /* Load a mapping table for CHARSET. CONTROL-FLAG tells what kind of
640 map it is (see the comment of load_charset_map for the detail). */
643 load_charset (struct charset
*charset
, int control_flag
)
647 if (inhibit_load_charset_map
649 && charset
== temp_charset_work
->current
650 && ((control_flag
== 2) == temp_charset_work
->for_encoder
))
653 if (CHARSET_METHOD (charset
) == CHARSET_METHOD_MAP
)
654 map
= CHARSET_MAP (charset
);
655 else if (CHARSET_UNIFIED_P (charset
))
656 map
= CHARSET_UNIFY_MAP (charset
);
658 load_charset_map_from_file (charset
, map
, control_flag
);
660 load_charset_map_from_vector (charset
, map
, control_flag
);
664 DEFUN ("charsetp", Fcharsetp
, Scharsetp
, 1, 1, 0,
665 doc
: /* Return non-nil if and only if OBJECT is a charset.*/)
669 return (CHARSETP (object
) ? Qt
: Qnil
);
673 void map_charset_for_dump (void (*c_function
) (Lisp_Object
, Lisp_Object
),
674 Lisp_Object function
, Lisp_Object arg
,
675 unsigned from
, unsigned to
);
678 map_charset_for_dump (void (*c_function
) (Lisp_Object
, Lisp_Object
), Lisp_Object function
, Lisp_Object arg
, unsigned int from
, unsigned int to
)
680 int from_idx
= CODE_POINT_TO_INDEX (temp_charset_work
->current
, from
);
681 int to_idx
= CODE_POINT_TO_INDEX (temp_charset_work
->current
, to
);
686 range
= Fcons (Qnil
, Qnil
);
689 c
= temp_charset_work
->min_char
;
690 stop
= (temp_charset_work
->max_char
< 0x20000
691 ? temp_charset_work
->max_char
: 0xFFFF);
695 int index
= GET_TEMP_CHARSET_WORK_ENCODER (c
);
697 if (index
>= from_idx
&& index
<= to_idx
)
699 if (NILP (XCAR (range
)))
700 XSETCAR (range
, make_number (c
));
702 else if (! NILP (XCAR (range
)))
704 XSETCDR (range
, make_number (c
- 1));
706 (*c_function
) (arg
, range
);
708 call2 (function
, range
, arg
);
709 XSETCAR (range
, Qnil
);
713 if (c
== temp_charset_work
->max_char
)
715 if (! NILP (XCAR (range
)))
717 XSETCDR (range
, make_number (c
));
719 (*c_function
) (arg
, range
);
721 call2 (function
, range
, arg
);
726 stop
= temp_charset_work
->max_char
;
734 map_charset_chars (c_function
, function
, arg
,
736 void (*c_function
) (Lisp_Object
, Lisp_Object
);
737 Lisp_Object function
, arg
;
738 struct charset
*charset
;
744 partial
= (from
> CHARSET_MIN_CODE (charset
)
745 || to
< CHARSET_MAX_CODE (charset
));
747 if (CHARSET_METHOD (charset
) == CHARSET_METHOD_OFFSET
)
749 int from_idx
= CODE_POINT_TO_INDEX (charset
, from
);
750 int to_idx
= CODE_POINT_TO_INDEX (charset
, to
);
751 int from_c
= from_idx
+ CHARSET_CODE_OFFSET (charset
);
752 int to_c
= to_idx
+ CHARSET_CODE_OFFSET (charset
);
754 if (CHARSET_UNIFIED_P (charset
))
756 if (! CHAR_TABLE_P (CHARSET_DEUNIFIER (charset
)))
757 load_charset (charset
, 2);
758 if (CHAR_TABLE_P (CHARSET_DEUNIFIER (charset
)))
759 map_char_table_for_charset (c_function
, function
,
760 CHARSET_DEUNIFIER (charset
), arg
,
761 partial
? charset
: NULL
, from
, to
);
763 map_charset_for_dump (c_function
, function
, arg
, from
, to
);
766 range
= Fcons (make_number (from_c
), make_number (to_c
));
768 (*c_function
) (arg
, range
);
770 call2 (function
, range
, arg
);
772 else if (CHARSET_METHOD (charset
) == CHARSET_METHOD_MAP
)
774 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset
)))
775 load_charset (charset
, 2);
776 if (CHAR_TABLE_P (CHARSET_ENCODER (charset
)))
777 map_char_table_for_charset (c_function
, function
,
778 CHARSET_ENCODER (charset
), arg
,
779 partial
? charset
: NULL
, from
, to
);
781 map_charset_for_dump (c_function
, function
, arg
, from
, to
);
783 else if (CHARSET_METHOD (charset
) == CHARSET_METHOD_SUBSET
)
785 Lisp_Object subset_info
;
788 subset_info
= CHARSET_SUBSET (charset
);
789 charset
= CHARSET_FROM_ID (XFASTINT (AREF (subset_info
, 0)));
790 offset
= XINT (AREF (subset_info
, 3));
792 if (from
< XFASTINT (AREF (subset_info
, 1)))
793 from
= XFASTINT (AREF (subset_info
, 1));
795 if (to
> XFASTINT (AREF (subset_info
, 2)))
796 to
= XFASTINT (AREF (subset_info
, 2));
797 map_charset_chars (c_function
, function
, arg
, charset
, from
, to
);
799 else /* i.e. CHARSET_METHOD_SUPERSET */
803 for (parents
= CHARSET_SUPERSET (charset
); CONSP (parents
);
804 parents
= XCDR (parents
))
807 unsigned this_from
, this_to
;
809 charset
= CHARSET_FROM_ID (XFASTINT (XCAR (XCAR (parents
))));
810 offset
= XINT (XCDR (XCAR (parents
)));
811 this_from
= from
> offset
? from
- offset
: 0;
812 this_to
= to
> offset
? to
- offset
: 0;
813 if (this_from
< CHARSET_MIN_CODE (charset
))
814 this_from
= CHARSET_MIN_CODE (charset
);
815 if (this_to
> CHARSET_MAX_CODE (charset
))
816 this_to
= CHARSET_MAX_CODE (charset
);
817 map_charset_chars (c_function
, function
, arg
, charset
,
823 DEFUN ("map-charset-chars", Fmap_charset_chars
, Smap_charset_chars
, 2, 5, 0,
824 doc
: /* Call FUNCTION for all characters in CHARSET.
825 FUNCTION is called with an argument RANGE and the optional 3rd
828 RANGE is a cons (FROM . TO), where FROM and TO indicate a range of
829 characters contained in CHARSET.
831 The optional 4th and 5th arguments FROM-CODE and TO-CODE specify the
832 range of code points (in CHARSET) of target characters. */)
833 (function
, charset
, arg
, from_code
, to_code
)
834 Lisp_Object function
, charset
, arg
, from_code
, to_code
;
839 CHECK_CHARSET_GET_CHARSET (charset
, cs
);
840 if (NILP (from_code
))
841 from
= CHARSET_MIN_CODE (cs
);
844 CHECK_NATNUM (from_code
);
845 from
= XINT (from_code
);
846 if (from
< CHARSET_MIN_CODE (cs
))
847 from
= CHARSET_MIN_CODE (cs
);
850 to
= CHARSET_MAX_CODE (cs
);
853 CHECK_NATNUM (to_code
);
855 if (to
> CHARSET_MAX_CODE (cs
))
856 to
= CHARSET_MAX_CODE (cs
);
858 map_charset_chars (NULL
, function
, arg
, cs
, from
, to
);
863 /* Define a charset according to the arguments. The Nth argument is
864 the Nth attribute of the charset (the last attribute `charset-id'
865 is not included). See the docstring of `define-charset' for the
868 DEFUN ("define-charset-internal", Fdefine_charset_internal
,
869 Sdefine_charset_internal
, charset_arg_max
, MANY
, 0,
870 doc
: /* For internal use only.
871 usage: (define-charset-internal ...) */)
876 /* Charset attr vector. */
880 struct Lisp_Hash_Table
*hash_table
= XHASH_TABLE (Vcharset_hash_table
);
882 struct charset charset
;
885 int new_definition_p
;
888 if (nargs
!= charset_arg_max
)
889 return Fsignal (Qwrong_number_of_arguments
,
890 Fcons (intern ("define-charset-internal"),
891 make_number (nargs
)));
893 attrs
= Fmake_vector (make_number (charset_attr_max
), Qnil
);
895 CHECK_SYMBOL (args
[charset_arg_name
]);
896 ASET (attrs
, charset_name
, args
[charset_arg_name
]);
898 val
= args
[charset_arg_code_space
];
899 for (i
= 0, dimension
= 0, nchars
= 1; i
< 4; i
++)
901 int min_byte
, max_byte
;
903 min_byte
= XINT (Faref (val
, make_number (i
* 2)));
904 max_byte
= XINT (Faref (val
, make_number (i
* 2 + 1)));
905 if (min_byte
< 0 || min_byte
> max_byte
|| max_byte
>= 256)
906 error ("Invalid :code-space value");
907 charset
.code_space
[i
* 4] = min_byte
;
908 charset
.code_space
[i
* 4 + 1] = max_byte
;
909 charset
.code_space
[i
* 4 + 2] = max_byte
- min_byte
+ 1;
910 nchars
*= charset
.code_space
[i
* 4 + 2];
911 charset
.code_space
[i
* 4 + 3] = nchars
;
916 val
= args
[charset_arg_dimension
];
918 charset
.dimension
= dimension
;
922 charset
.dimension
= XINT (val
);
923 if (charset
.dimension
< 1 || charset
.dimension
> 4)
924 args_out_of_range_3 (val
, make_number (1), make_number (4));
927 charset
.code_linear_p
928 = (charset
.dimension
== 1
929 || (charset
.code_space
[2] == 256
930 && (charset
.dimension
== 2
931 || (charset
.code_space
[6] == 256
932 && (charset
.dimension
== 3
933 || charset
.code_space
[10] == 256)))));
935 if (! charset
.code_linear_p
)
937 charset
.code_space_mask
= (unsigned char *) xmalloc (256);
938 bzero (charset
.code_space_mask
, 256);
939 for (i
= 0; i
< 4; i
++)
940 for (j
= charset
.code_space
[i
* 4]; j
<= charset
.code_space
[i
* 4 + 1];
942 charset
.code_space_mask
[j
] |= (1 << i
);
945 charset
.iso_chars_96
= charset
.code_space
[2] == 96;
947 charset
.min_code
= (charset
.code_space
[0]
948 | (charset
.code_space
[4] << 8)
949 | (charset
.code_space
[8] << 16)
950 | (charset
.code_space
[12] << 24));
951 charset
.max_code
= (charset
.code_space
[1]
952 | (charset
.code_space
[5] << 8)
953 | (charset
.code_space
[9] << 16)
954 | (charset
.code_space
[13] << 24));
955 charset
.char_index_offset
= 0;
957 val
= args
[charset_arg_min_code
];
967 CHECK_NUMBER_CAR (val
);
968 CHECK_NUMBER_CDR (val
);
969 code
= (XINT (XCAR (val
)) << 16) | (XINT (XCDR (val
)));
971 if (code
< charset
.min_code
972 || code
> charset
.max_code
)
973 args_out_of_range_3 (make_number (charset
.min_code
),
974 make_number (charset
.max_code
), val
);
975 charset
.char_index_offset
= CODE_POINT_TO_INDEX (&charset
, code
);
976 charset
.min_code
= code
;
979 val
= args
[charset_arg_max_code
];
989 CHECK_NUMBER_CAR (val
);
990 CHECK_NUMBER_CDR (val
);
991 code
= (XINT (XCAR (val
)) << 16) | (XINT (XCDR (val
)));
993 if (code
< charset
.min_code
994 || code
> charset
.max_code
)
995 args_out_of_range_3 (make_number (charset
.min_code
),
996 make_number (charset
.max_code
), val
);
997 charset
.max_code
= code
;
1000 charset
.compact_codes_p
= charset
.max_code
< 0x10000;
1002 val
= args
[charset_arg_invalid_code
];
1005 if (charset
.min_code
> 0)
1006 charset
.invalid_code
= 0;
1009 XSETINT (val
, charset
.max_code
+ 1);
1010 if (XINT (val
) == charset
.max_code
+ 1)
1011 charset
.invalid_code
= charset
.max_code
+ 1;
1013 error ("Attribute :invalid-code must be specified");
1019 charset
.invalid_code
= XFASTINT (val
);
1022 val
= args
[charset_arg_iso_final
];
1024 charset
.iso_final
= -1;
1028 if (XINT (val
) < '0' || XINT (val
) > 127)
1029 error ("Invalid iso-final-char: %d", XINT (val
));
1030 charset
.iso_final
= XINT (val
);
1033 val
= args
[charset_arg_iso_revision
];
1035 charset
.iso_revision
= -1;
1039 if (XINT (val
) > 63)
1040 args_out_of_range (make_number (63), val
);
1041 charset
.iso_revision
= XINT (val
);
1044 val
= args
[charset_arg_emacs_mule_id
];
1046 charset
.emacs_mule_id
= -1;
1050 if ((XINT (val
) > 0 && XINT (val
) <= 128) || XINT (val
) >= 256)
1051 error ("Invalid emacs-mule-id: %d", XINT (val
));
1052 charset
.emacs_mule_id
= XINT (val
);
1055 charset
.ascii_compatible_p
= ! NILP (args
[charset_arg_ascii_compatible_p
]);
1057 charset
.supplementary_p
= ! NILP (args
[charset_arg_supplementary_p
]);
1059 charset
.unified_p
= 0;
1061 bzero (charset
.fast_map
, sizeof (charset
.fast_map
));
1063 if (! NILP (args
[charset_arg_code_offset
]))
1065 val
= args
[charset_arg_code_offset
];
1068 charset
.method
= CHARSET_METHOD_OFFSET
;
1069 charset
.code_offset
= XINT (val
);
1071 i
= CODE_POINT_TO_INDEX (&charset
, charset
.min_code
);
1072 charset
.min_char
= i
+ charset
.code_offset
;
1073 i
= CODE_POINT_TO_INDEX (&charset
, charset
.max_code
);
1074 charset
.max_char
= i
+ charset
.code_offset
;
1075 if (charset
.max_char
> MAX_CHAR
)
1076 error ("Unsupported max char: %d", charset
.max_char
);
1078 i
= (charset
.min_char
>> 7) << 7;
1079 for (; i
< 0x10000 && i
<= charset
.max_char
; i
+= 128)
1080 CHARSET_FAST_MAP_SET (i
, charset
.fast_map
);
1081 i
= (i
>> 12) << 12;
1082 for (; i
<= charset
.max_char
; i
+= 0x1000)
1083 CHARSET_FAST_MAP_SET (i
, charset
.fast_map
);
1084 if (charset
.code_offset
== 0 && charset
.max_char
>= 0x80)
1085 charset
.ascii_compatible_p
= 1;
1087 else if (! NILP (args
[charset_arg_map
]))
1089 val
= args
[charset_arg_map
];
1090 ASET (attrs
, charset_map
, val
);
1091 charset
.method
= CHARSET_METHOD_MAP
;
1093 else if (! NILP (args
[charset_arg_subset
]))
1096 Lisp_Object parent_min_code
, parent_max_code
, parent_code_offset
;
1097 struct charset
*parent_charset
;
1099 val
= args
[charset_arg_subset
];
1100 parent
= Fcar (val
);
1101 CHECK_CHARSET_GET_CHARSET (parent
, parent_charset
);
1102 parent_min_code
= Fnth (make_number (1), val
);
1103 CHECK_NATNUM (parent_min_code
);
1104 parent_max_code
= Fnth (make_number (2), val
);
1105 CHECK_NATNUM (parent_max_code
);
1106 parent_code_offset
= Fnth (make_number (3), val
);
1107 CHECK_NUMBER (parent_code_offset
);
1108 val
= Fmake_vector (make_number (4), Qnil
);
1109 ASET (val
, 0, make_number (parent_charset
->id
));
1110 ASET (val
, 1, parent_min_code
);
1111 ASET (val
, 2, parent_max_code
);
1112 ASET (val
, 3, parent_code_offset
);
1113 ASET (attrs
, charset_subset
, val
);
1115 charset
.method
= CHARSET_METHOD_SUBSET
;
1116 /* Here, we just copy the parent's fast_map. It's not accurate,
1117 but at least it works for quickly detecting which character
1118 DOESN'T belong to this charset. */
1119 for (i
= 0; i
< 190; i
++)
1120 charset
.fast_map
[i
] = parent_charset
->fast_map
[i
];
1122 /* We also copy these for parents. */
1123 charset
.min_char
= parent_charset
->min_char
;
1124 charset
.max_char
= parent_charset
->max_char
;
1126 else if (! NILP (args
[charset_arg_superset
]))
1128 val
= args
[charset_arg_superset
];
1129 charset
.method
= CHARSET_METHOD_SUPERSET
;
1130 val
= Fcopy_sequence (val
);
1131 ASET (attrs
, charset_superset
, val
);
1133 charset
.min_char
= MAX_CHAR
;
1134 charset
.max_char
= 0;
1135 for (; ! NILP (val
); val
= Fcdr (val
))
1137 Lisp_Object elt
, car_part
, cdr_part
;
1138 int this_id
, offset
;
1139 struct charset
*this_charset
;
1144 car_part
= XCAR (elt
);
1145 cdr_part
= XCDR (elt
);
1146 CHECK_CHARSET_GET_ID (car_part
, this_id
);
1147 CHECK_NUMBER (cdr_part
);
1148 offset
= XINT (cdr_part
);
1152 CHECK_CHARSET_GET_ID (elt
, this_id
);
1155 XSETCAR (val
, Fcons (make_number (this_id
), make_number (offset
)));
1157 this_charset
= CHARSET_FROM_ID (this_id
);
1158 if (charset
.min_char
> this_charset
->min_char
)
1159 charset
.min_char
= this_charset
->min_char
;
1160 if (charset
.max_char
< this_charset
->max_char
)
1161 charset
.max_char
= this_charset
->max_char
;
1162 for (i
= 0; i
< 190; i
++)
1163 charset
.fast_map
[i
] |= this_charset
->fast_map
[i
];
1167 error ("None of :code-offset, :map, :parents are specified");
1169 val
= args
[charset_arg_unify_map
];
1170 if (! NILP (val
) && !STRINGP (val
))
1172 ASET (attrs
, charset_unify_map
, val
);
1174 CHECK_LIST (args
[charset_arg_plist
]);
1175 ASET (attrs
, charset_plist
, args
[charset_arg_plist
]);
1177 charset
.hash_index
= hash_lookup (hash_table
, args
[charset_arg_name
],
1179 if (charset
.hash_index
>= 0)
1181 new_definition_p
= 0;
1182 id
= XFASTINT (CHARSET_SYMBOL_ID (args
[charset_arg_name
]));
1183 HASH_VALUE (hash_table
, charset
.hash_index
) = attrs
;
1187 charset
.hash_index
= hash_put (hash_table
, args
[charset_arg_name
], attrs
,
1189 if (charset_table_used
== charset_table_size
)
1191 struct charset
*new_table
1192 = (struct charset
*) xmalloc (sizeof (struct charset
)
1193 * (charset_table_size
+ 16));
1194 bcopy (charset_table
, new_table
,
1195 sizeof (struct charset
) * charset_table_size
);
1196 charset_table_size
+= 16;
1197 charset_table
= new_table
;
1199 id
= charset_table_used
++;
1200 new_definition_p
= 1;
1203 ASET (attrs
, charset_id
, make_number (id
));
1205 charset_table
[id
] = charset
;
1207 if (charset
.method
== CHARSET_METHOD_MAP
)
1209 load_charset (&charset
, 0);
1210 charset_table
[id
] = charset
;
1213 if (charset
.iso_final
>= 0)
1215 ISO_CHARSET_TABLE (charset
.dimension
, charset
.iso_chars_96
,
1216 charset
.iso_final
) = id
;
1217 if (new_definition_p
)
1218 Viso_2022_charset_list
= nconc2 (Viso_2022_charset_list
,
1219 Fcons (make_number (id
), Qnil
));
1220 if (ISO_CHARSET_TABLE (1, 0, 'J') == id
)
1221 charset_jisx0201_roman
= id
;
1222 else if (ISO_CHARSET_TABLE (2, 0, '@') == id
)
1223 charset_jisx0208_1978
= id
;
1224 else if (ISO_CHARSET_TABLE (2, 0, 'B') == id
)
1225 charset_jisx0208
= id
;
1226 else if (ISO_CHARSET_TABLE (2, 0, 'C') == id
)
1227 charset_ksc5601
= id
;
1230 if (charset
.emacs_mule_id
>= 0)
1232 emacs_mule_charset
[charset
.emacs_mule_id
] = CHARSET_FROM_ID (id
);
1233 if (charset
.emacs_mule_id
< 0xA0)
1234 emacs_mule_bytes
[charset
.emacs_mule_id
] = charset
.dimension
+ 1;
1236 emacs_mule_bytes
[charset
.emacs_mule_id
] = charset
.dimension
+ 2;
1237 if (new_definition_p
)
1238 Vemacs_mule_charset_list
= nconc2 (Vemacs_mule_charset_list
,
1239 Fcons (make_number (id
), Qnil
));
1242 if (new_definition_p
)
1244 Vcharset_list
= Fcons (args
[charset_arg_name
], Vcharset_list
);
1245 if (charset
.supplementary_p
)
1246 Vcharset_ordered_list
= nconc2 (Vcharset_ordered_list
,
1247 Fcons (make_number (id
), Qnil
));
1252 for (tail
= Vcharset_ordered_list
; CONSP (tail
); tail
= XCDR (tail
))
1254 struct charset
*cs
= CHARSET_FROM_ID (XINT (XCAR (tail
)));
1256 if (cs
->supplementary_p
)
1259 if (EQ (tail
, Vcharset_ordered_list
))
1260 Vcharset_ordered_list
= Fcons (make_number (id
),
1261 Vcharset_ordered_list
);
1262 else if (NILP (tail
))
1263 Vcharset_ordered_list
= nconc2 (Vcharset_ordered_list
,
1264 Fcons (make_number (id
), Qnil
));
1267 val
= Fcons (XCAR (tail
), XCDR (tail
));
1268 XSETCDR (tail
, val
);
1269 XSETCAR (tail
, make_number (id
));
1272 charset_ordered_list_tick
++;
1279 /* Same as Fdefine_charset_internal but arguments are more convenient
1280 to call from C (typically in syms_of_charset). This can define a
1281 charset of `offset' method only. Return the ID of the new
1285 define_charset_internal (name
, dimension
, code_space
, min_code
, max_code
,
1286 iso_final
, iso_revision
, emacs_mule_id
,
1287 ascii_compatible
, supplementary
,
1291 unsigned char *code_space
;
1292 unsigned min_code
, max_code
;
1293 int iso_final
, iso_revision
, emacs_mule_id
;
1294 int ascii_compatible
, supplementary
;
1297 Lisp_Object args
[charset_arg_max
];
1298 Lisp_Object plist
[14];
1302 args
[charset_arg_name
] = name
;
1303 args
[charset_arg_dimension
] = make_number (dimension
);
1304 val
= Fmake_vector (make_number (8), make_number (0));
1305 for (i
= 0; i
< 8; i
++)
1306 ASET (val
, i
, make_number (code_space
[i
]));
1307 args
[charset_arg_code_space
] = val
;
1308 args
[charset_arg_min_code
] = make_number (min_code
);
1309 args
[charset_arg_max_code
] = make_number (max_code
);
1310 args
[charset_arg_iso_final
]
1311 = (iso_final
< 0 ? Qnil
: make_number (iso_final
));
1312 args
[charset_arg_iso_revision
] = make_number (iso_revision
);
1313 args
[charset_arg_emacs_mule_id
]
1314 = (emacs_mule_id
< 0 ? Qnil
: make_number (emacs_mule_id
));
1315 args
[charset_arg_ascii_compatible_p
] = ascii_compatible
? Qt
: Qnil
;
1316 args
[charset_arg_supplementary_p
] = supplementary
? Qt
: Qnil
;
1317 args
[charset_arg_invalid_code
] = Qnil
;
1318 args
[charset_arg_code_offset
] = make_number (code_offset
);
1319 args
[charset_arg_map
] = Qnil
;
1320 args
[charset_arg_subset
] = Qnil
;
1321 args
[charset_arg_superset
] = Qnil
;
1322 args
[charset_arg_unify_map
] = Qnil
;
1324 plist
[0] = intern_c_string (":name");
1325 plist
[1] = args
[charset_arg_name
];
1326 plist
[2] = intern_c_string (":dimension");
1327 plist
[3] = args
[charset_arg_dimension
];
1328 plist
[4] = intern_c_string (":code-space");
1329 plist
[5] = args
[charset_arg_code_space
];
1330 plist
[6] = intern_c_string (":iso-final-char");
1331 plist
[7] = args
[charset_arg_iso_final
];
1332 plist
[8] = intern_c_string (":emacs-mule-id");
1333 plist
[9] = args
[charset_arg_emacs_mule_id
];
1334 plist
[10] = intern_c_string (":ascii-compatible-p");
1335 plist
[11] = args
[charset_arg_ascii_compatible_p
];
1336 plist
[12] = intern_c_string (":code-offset");
1337 plist
[13] = args
[charset_arg_code_offset
];
1339 args
[charset_arg_plist
] = Flist (14, plist
);
1340 Fdefine_charset_internal (charset_arg_max
, args
);
1342 return XINT (CHARSET_SYMBOL_ID (name
));
1346 DEFUN ("define-charset-alias", Fdefine_charset_alias
,
1347 Sdefine_charset_alias
, 2, 2, 0,
1348 doc
: /* Define ALIAS as an alias for charset CHARSET. */)
1350 Lisp_Object alias
, charset
;
1354 CHECK_CHARSET_GET_ATTR (charset
, attr
);
1355 Fputhash (alias
, attr
, Vcharset_hash_table
);
1356 Vcharset_list
= Fcons (alias
, Vcharset_list
);
1361 DEFUN ("charset-plist", Fcharset_plist
, Scharset_plist
, 1, 1, 0,
1362 doc
: /* Return the property list of CHARSET. */)
1364 Lisp_Object charset
;
1368 CHECK_CHARSET_GET_ATTR (charset
, attrs
);
1369 return CHARSET_ATTR_PLIST (attrs
);
1373 DEFUN ("set-charset-plist", Fset_charset_plist
, Sset_charset_plist
, 2, 2, 0,
1374 doc
: /* Set CHARSET's property list to PLIST. */)
1376 Lisp_Object charset
, plist
;
1380 CHECK_CHARSET_GET_ATTR (charset
, attrs
);
1381 CHARSET_ATTR_PLIST (attrs
) = plist
;
1386 DEFUN ("unify-charset", Funify_charset
, Sunify_charset
, 1, 3, 0,
1387 doc
: /* Unify characters of CHARSET with Unicode.
1388 This means reading the relevant file and installing the table defined
1389 by CHARSET's `:unify-map' property.
1391 Optional second arg UNIFY-MAP is a file name string or a vector. It has
1392 the same meaning as the `:unify-map' attribute in the function
1393 `define-charset' (which see).
1395 Optional third argument DEUNIFY, if non-nil, means to de-unify CHARSET. */)
1396 (charset
, unify_map
, deunify
)
1397 Lisp_Object charset
, unify_map
, deunify
;
1402 CHECK_CHARSET_GET_ID (charset
, id
);
1403 cs
= CHARSET_FROM_ID (id
);
1405 ? CHARSET_UNIFIED_P (cs
) && ! NILP (CHARSET_DEUNIFIER (cs
))
1406 : ! CHARSET_UNIFIED_P (cs
))
1409 CHARSET_UNIFIED_P (cs
) = 0;
1412 if (CHARSET_METHOD (cs
) != CHARSET_METHOD_OFFSET
1413 || CHARSET_CODE_OFFSET (cs
) < 0x110000)
1414 error ("Can't unify charset: %s", SDATA (SYMBOL_NAME (charset
)));
1415 if (NILP (unify_map
))
1416 unify_map
= CHARSET_UNIFY_MAP (cs
);
1419 if (! STRINGP (unify_map
) && ! VECTORP (unify_map
))
1420 signal_error ("Bad unify-map", unify_map
);
1421 CHARSET_UNIFY_MAP (cs
) = unify_map
;
1423 if (NILP (Vchar_unify_table
))
1424 Vchar_unify_table
= Fmake_char_table (Qnil
, Qnil
);
1425 char_table_set_range (Vchar_unify_table
,
1426 cs
->min_char
, cs
->max_char
, charset
);
1427 CHARSET_UNIFIED_P (cs
) = 1;
1429 else if (CHAR_TABLE_P (Vchar_unify_table
))
1431 int min_code
= CHARSET_MIN_CODE (cs
);
1432 int max_code
= CHARSET_MAX_CODE (cs
);
1433 int min_char
= DECODE_CHAR (cs
, min_code
);
1434 int max_char
= DECODE_CHAR (cs
, max_code
);
1436 char_table_set_range (Vchar_unify_table
, min_char
, max_char
, Qnil
);
1442 DEFUN ("get-unused-iso-final-char", Fget_unused_iso_final_char
,
1443 Sget_unused_iso_final_char
, 2, 2, 0,
1445 Return an unused ISO final char for a charset of DIMENSION and CHARS.
1446 DIMENSION is the number of bytes to represent a character: 1 or 2.
1447 CHARS is the number of characters in a dimension: 94 or 96.
1449 This final char is for private use, thus the range is `0' (48) .. `?' (63).
1450 If there's no unused final char for the specified kind of charset,
1453 Lisp_Object dimension
, chars
;
1457 CHECK_NUMBER (dimension
);
1458 CHECK_NUMBER (chars
);
1459 if (XINT (dimension
) != 1 && XINT (dimension
) != 2 && XINT (dimension
) != 3)
1460 args_out_of_range_3 (dimension
, make_number (1), make_number (3));
1461 if (XINT (chars
) != 94 && XINT (chars
) != 96)
1462 args_out_of_range_3 (chars
, make_number (94), make_number (96));
1463 for (final_char
= '0'; final_char
<= '?'; final_char
++)
1464 if (ISO_CHARSET_TABLE (XINT (dimension
), XINT (chars
), final_char
) < 0)
1466 return (final_char
<= '?' ? make_number (final_char
) : Qnil
);
1470 check_iso_charset_parameter (Lisp_Object dimension
, Lisp_Object chars
, Lisp_Object final_char
)
1472 CHECK_NATNUM (dimension
);
1473 CHECK_NATNUM (chars
);
1474 CHECK_NATNUM (final_char
);
1476 if (XINT (dimension
) > 3)
1477 error ("Invalid DIMENSION %d, it should be 1, 2, or 3", XINT (dimension
));
1478 if (XINT (chars
) != 94 && XINT (chars
) != 96)
1479 error ("Invalid CHARS %d, it should be 94 or 96", XINT (chars
));
1480 if (XINT (final_char
) < '0' || XINT (final_char
) > '~')
1481 error ("Invalid FINAL-CHAR %c, it should be `0'..`~'", XINT (chars
));
1485 DEFUN ("declare-equiv-charset", Fdeclare_equiv_charset
, Sdeclare_equiv_charset
,
1487 doc
: /* Declare an equivalent charset for ISO-2022 decoding.
1489 On decoding by an ISO-2022 base coding system, when a charset
1490 specified by DIMENSION, CHARS, and FINAL-CHAR is designated, behave as
1491 if CHARSET is designated instead. */)
1492 (dimension
, chars
, final_char
, charset
)
1493 Lisp_Object dimension
, chars
, final_char
, charset
;
1498 CHECK_CHARSET_GET_ID (charset
, id
);
1499 check_iso_charset_parameter (dimension
, chars
, final_char
);
1500 chars_flag
= XINT (chars
) == 96;
1501 ISO_CHARSET_TABLE (XINT (dimension
), chars_flag
, XINT (final_char
)) = id
;
1506 /* Return information about charsets in the text at PTR of NBYTES
1507 bytes, which are NCHARS characters. The value is:
1509 0: Each character is represented by one byte. This is always
1510 true for a unibyte string. For a multibyte string, true if
1511 it contains only ASCII characters.
1513 1: No charsets other than ascii, control-1, and latin-1 are
1520 string_xstring_p (Lisp_Object string
)
1522 const unsigned char *p
= SDATA (string
);
1523 const unsigned char *endp
= p
+ SBYTES (string
);
1525 if (SCHARS (string
) == SBYTES (string
))
1530 int c
= STRING_CHAR_ADVANCE (p
);
1539 /* Find charsets in the string at PTR of NCHARS and NBYTES.
1541 CHARSETS is a vector. If Nth element is non-nil, it means the
1542 charset whose id is N is already found.
1544 It may lookup a translation table TABLE if supplied. */
1547 find_charsets_in_text (const unsigned char *ptr
, EMACS_INT nchars
, EMACS_INT nbytes
, Lisp_Object charsets
, Lisp_Object table
, int multibyte
)
1549 const unsigned char *pend
= ptr
+ nbytes
;
1551 if (nchars
== nbytes
)
1554 ASET (charsets
, charset_ascii
, Qt
);
1561 c
= translate_char (table
, c
);
1562 if (ASCII_BYTE_P (c
))
1563 ASET (charsets
, charset_ascii
, Qt
);
1565 ASET (charsets
, charset_eight_bit
, Qt
);
1572 int c
= STRING_CHAR_ADVANCE (ptr
);
1573 struct charset
*charset
;
1576 c
= translate_char (table
, c
);
1577 charset
= CHAR_CHARSET (c
);
1578 ASET (charsets
, CHARSET_ID (charset
), Qt
);
1583 DEFUN ("find-charset-region", Ffind_charset_region
, Sfind_charset_region
,
1585 doc
: /* Return a list of charsets in the region between BEG and END.
1586 BEG and END are buffer positions.
1587 Optional arg TABLE if non-nil is a translation table to look up.
1589 If the current buffer is unibyte, the returned list may contain
1590 only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
1592 Lisp_Object beg
, end
, table
;
1594 Lisp_Object charsets
;
1595 EMACS_INT from
, from_byte
, to
, stop
, stop_byte
;
1598 int multibyte
= ! NILP (current_buffer
->enable_multibyte_characters
);
1600 validate_region (&beg
, &end
);
1601 from
= XFASTINT (beg
);
1602 stop
= to
= XFASTINT (end
);
1604 if (from
< GPT
&& GPT
< to
)
1607 stop_byte
= GPT_BYTE
;
1610 stop_byte
= CHAR_TO_BYTE (stop
);
1612 from_byte
= CHAR_TO_BYTE (from
);
1614 charsets
= Fmake_vector (make_number (charset_table_used
), Qnil
);
1617 find_charsets_in_text (BYTE_POS_ADDR (from_byte
), stop
- from
,
1618 stop_byte
- from_byte
, charsets
, table
,
1622 from
= stop
, from_byte
= stop_byte
;
1623 stop
= to
, stop_byte
= CHAR_TO_BYTE (stop
);
1630 for (i
= charset_table_used
- 1; i
>= 0; i
--)
1631 if (!NILP (AREF (charsets
, i
)))
1632 val
= Fcons (CHARSET_NAME (charset_table
+ i
), val
);
1636 DEFUN ("find-charset-string", Ffind_charset_string
, Sfind_charset_string
,
1638 doc
: /* Return a list of charsets in STR.
1639 Optional arg TABLE if non-nil is a translation table to look up.
1641 If STR is unibyte, the returned list may contain
1642 only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
1644 Lisp_Object str
, table
;
1646 Lisp_Object charsets
;
1652 charsets
= Fmake_vector (make_number (charset_table_used
), Qnil
);
1653 find_charsets_in_text (SDATA (str
), SCHARS (str
), SBYTES (str
),
1655 STRING_MULTIBYTE (str
));
1657 for (i
= charset_table_used
- 1; i
>= 0; i
--)
1658 if (!NILP (AREF (charsets
, i
)))
1659 val
= Fcons (CHARSET_NAME (charset_table
+ i
), val
);
1665 /* Return a unified character code for C (>= 0x110000). VAL is a
1666 value of Vchar_unify_table for C; i.e. it is nil, an integer, or a
1669 maybe_unify_char (int c
, Lisp_Object val
)
1671 struct charset
*charset
;
1678 CHECK_CHARSET_GET_CHARSET (val
, charset
);
1679 load_charset (charset
, 1);
1680 if (! inhibit_load_charset_map
)
1682 val
= CHAR_TABLE_REF (Vchar_unify_table
, c
);
1688 int code_index
= c
- CHARSET_CODE_OFFSET (charset
);
1689 int unified
= GET_TEMP_CHARSET_WORK_DECODER (code_index
);
1698 /* Return a character correponding to the code-point CODE of
1702 decode_char (struct charset
*charset
, unsigned int code
)
1705 enum charset_method method
= CHARSET_METHOD (charset
);
1707 if (code
< CHARSET_MIN_CODE (charset
) || code
> CHARSET_MAX_CODE (charset
))
1710 if (method
== CHARSET_METHOD_SUBSET
)
1712 Lisp_Object subset_info
;
1714 subset_info
= CHARSET_SUBSET (charset
);
1715 charset
= CHARSET_FROM_ID (XFASTINT (AREF (subset_info
, 0)));
1716 code
-= XINT (AREF (subset_info
, 3));
1717 if (code
< XFASTINT (AREF (subset_info
, 1))
1718 || code
> XFASTINT (AREF (subset_info
, 2)))
1721 c
= DECODE_CHAR (charset
, code
);
1723 else if (method
== CHARSET_METHOD_SUPERSET
)
1725 Lisp_Object parents
;
1727 parents
= CHARSET_SUPERSET (charset
);
1729 for (; CONSP (parents
); parents
= XCDR (parents
))
1731 int id
= XINT (XCAR (XCAR (parents
)));
1732 int code_offset
= XINT (XCDR (XCAR (parents
)));
1733 unsigned this_code
= code
- code_offset
;
1735 charset
= CHARSET_FROM_ID (id
);
1736 if ((c
= DECODE_CHAR (charset
, this_code
)) >= 0)
1742 char_index
= CODE_POINT_TO_INDEX (charset
, code
);
1746 if (method
== CHARSET_METHOD_MAP
)
1748 Lisp_Object decoder
;
1750 decoder
= CHARSET_DECODER (charset
);
1751 if (! VECTORP (decoder
))
1753 load_charset (charset
, 1);
1754 decoder
= CHARSET_DECODER (charset
);
1756 if (VECTORP (decoder
))
1757 c
= XINT (AREF (decoder
, char_index
));
1759 c
= GET_TEMP_CHARSET_WORK_DECODER (char_index
);
1761 else /* method == CHARSET_METHOD_OFFSET */
1763 c
= char_index
+ CHARSET_CODE_OFFSET (charset
);
1764 if (CHARSET_UNIFIED_P (charset
)
1765 && c
> MAX_UNICODE_CHAR
)
1766 MAYBE_UNIFY_CHAR (c
);
1773 /* Variable used temporarily by the macro ENCODE_CHAR. */
1774 Lisp_Object charset_work
;
1776 /* Return a code-point of CHAR in CHARSET. If CHAR doesn't belong to
1777 CHARSET, return CHARSET_INVALID_CODE (CHARSET). If STRICT is true,
1778 use CHARSET's strict_max_char instead of max_char. */
1781 encode_char (struct charset
*charset
, int c
)
1784 enum charset_method method
= CHARSET_METHOD (charset
);
1786 if (CHARSET_UNIFIED_P (charset
))
1788 Lisp_Object deunifier
;
1789 int code_index
= -1;
1791 deunifier
= CHARSET_DEUNIFIER (charset
);
1792 if (! CHAR_TABLE_P (deunifier
))
1794 load_charset (charset
, 2);
1795 deunifier
= CHARSET_DEUNIFIER (charset
);
1797 if (CHAR_TABLE_P (deunifier
))
1799 Lisp_Object deunified
= CHAR_TABLE_REF (deunifier
, c
);
1801 if (INTEGERP (deunified
))
1802 code_index
= XINT (deunified
);
1806 code_index
= GET_TEMP_CHARSET_WORK_ENCODER (c
);
1808 if (code_index
>= 0)
1809 c
= CHARSET_CODE_OFFSET (charset
) + code_index
;
1812 if (method
== CHARSET_METHOD_SUBSET
)
1814 Lisp_Object subset_info
;
1815 struct charset
*this_charset
;
1817 subset_info
= CHARSET_SUBSET (charset
);
1818 this_charset
= CHARSET_FROM_ID (XFASTINT (AREF (subset_info
, 0)));
1819 code
= ENCODE_CHAR (this_charset
, c
);
1820 if (code
== CHARSET_INVALID_CODE (this_charset
)
1821 || code
< XFASTINT (AREF (subset_info
, 1))
1822 || code
> XFASTINT (AREF (subset_info
, 2)))
1823 return CHARSET_INVALID_CODE (charset
);
1824 code
+= XINT (AREF (subset_info
, 3));
1828 if (method
== CHARSET_METHOD_SUPERSET
)
1830 Lisp_Object parents
;
1832 parents
= CHARSET_SUPERSET (charset
);
1833 for (; CONSP (parents
); parents
= XCDR (parents
))
1835 int id
= XINT (XCAR (XCAR (parents
)));
1836 int code_offset
= XINT (XCDR (XCAR (parents
)));
1837 struct charset
*this_charset
= CHARSET_FROM_ID (id
);
1839 code
= ENCODE_CHAR (this_charset
, c
);
1840 if (code
!= CHARSET_INVALID_CODE (this_charset
))
1841 return code
+ code_offset
;
1843 return CHARSET_INVALID_CODE (charset
);
1846 if (! CHARSET_FAST_MAP_REF ((c
), charset
->fast_map
)
1847 || c
< CHARSET_MIN_CHAR (charset
) || c
> CHARSET_MAX_CHAR (charset
))
1848 return CHARSET_INVALID_CODE (charset
);
1850 if (method
== CHARSET_METHOD_MAP
)
1852 Lisp_Object encoder
;
1855 encoder
= CHARSET_ENCODER (charset
);
1856 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset
)))
1858 load_charset (charset
, 2);
1859 encoder
= CHARSET_ENCODER (charset
);
1861 if (CHAR_TABLE_P (encoder
))
1863 val
= CHAR_TABLE_REF (encoder
, c
);
1865 return CHARSET_INVALID_CODE (charset
);
1867 if (! CHARSET_COMPACT_CODES_P (charset
))
1868 code
= INDEX_TO_CODE_POINT (charset
, code
);
1872 code
= GET_TEMP_CHARSET_WORK_ENCODER (c
);
1873 code
= INDEX_TO_CODE_POINT (charset
, code
);
1876 else /* method == CHARSET_METHOD_OFFSET */
1878 int code_index
= c
- CHARSET_CODE_OFFSET (charset
);
1880 code
= INDEX_TO_CODE_POINT (charset
, code_index
);
1887 DEFUN ("decode-char", Fdecode_char
, Sdecode_char
, 2, 3, 0,
1888 doc
: /* Decode the pair of CHARSET and CODE-POINT into a character.
1889 Return nil if CODE-POINT is not valid in CHARSET.
1891 CODE-POINT may be a cons (HIGHER-16-BIT-VALUE . LOWER-16-BIT-VALUE).
1893 Optional argument RESTRICTION specifies a way to map the pair of CCS
1894 and CODE-POINT to a character. Currently not supported and just ignored. */)
1895 (charset
, code_point
, restriction
)
1896 Lisp_Object charset
, code_point
, restriction
;
1900 struct charset
*charsetp
;
1902 CHECK_CHARSET_GET_ID (charset
, id
);
1903 if (CONSP (code_point
))
1905 CHECK_NATNUM_CAR (code_point
);
1906 CHECK_NATNUM_CDR (code_point
);
1907 code
= (XINT (XCAR (code_point
)) << 16) | (XINT (XCDR (code_point
)));
1911 CHECK_NATNUM (code_point
);
1912 code
= XINT (code_point
);
1914 charsetp
= CHARSET_FROM_ID (id
);
1915 c
= DECODE_CHAR (charsetp
, code
);
1916 return (c
>= 0 ? make_number (c
) : Qnil
);
1920 DEFUN ("encode-char", Fencode_char
, Sencode_char
, 2, 3, 0,
1921 doc
: /* Encode the character CH into a code-point of CHARSET.
1922 Return nil if CHARSET doesn't include CH.
1924 Optional argument RESTRICTION specifies a way to map CH to a
1925 code-point in CCS. Currently not supported and just ignored. */)
1926 (ch
, charset
, restriction
)
1927 Lisp_Object ch
, charset
, restriction
;
1931 struct charset
*charsetp
;
1933 CHECK_CHARSET_GET_ID (charset
, id
);
1935 charsetp
= CHARSET_FROM_ID (id
);
1936 code
= ENCODE_CHAR (charsetp
, XINT (ch
));
1937 if (code
== CHARSET_INVALID_CODE (charsetp
))
1939 if (code
> 0x7FFFFFF)
1940 return Fcons (make_number (code
>> 16), make_number (code
& 0xFFFF));
1941 return make_number (code
);
1945 DEFUN ("make-char", Fmake_char
, Smake_char
, 1, 5, 0,
1947 /* Return a character of CHARSET whose position codes are CODEn.
1949 CODE1 through CODE4 are optional, but if you don't supply sufficient
1950 position codes, it is assumed that the minimum code in each dimension
1952 (charset
, code1
, code2
, code3
, code4
)
1953 Lisp_Object charset
, code1
, code2
, code3
, code4
;
1956 struct charset
*charsetp
;
1960 CHECK_CHARSET_GET_ID (charset
, id
);
1961 charsetp
= CHARSET_FROM_ID (id
);
1963 dimension
= CHARSET_DIMENSION (charsetp
);
1965 code
= (CHARSET_ASCII_COMPATIBLE_P (charsetp
)
1966 ? 0 : CHARSET_MIN_CODE (charsetp
));
1969 CHECK_NATNUM (code1
);
1970 if (XFASTINT (code1
) >= 0x100)
1971 args_out_of_range (make_number (0xFF), code1
);
1972 code
= XFASTINT (code1
);
1978 code
|= charsetp
->code_space
[(dimension
- 2) * 4];
1981 CHECK_NATNUM (code2
);
1982 if (XFASTINT (code2
) >= 0x100)
1983 args_out_of_range (make_number (0xFF), code2
);
1984 code
|= XFASTINT (code2
);
1991 code
|= charsetp
->code_space
[(dimension
- 3) * 4];
1994 CHECK_NATNUM (code3
);
1995 if (XFASTINT (code3
) >= 0x100)
1996 args_out_of_range (make_number (0xFF), code3
);
1997 code
|= XFASTINT (code3
);
2004 code
|= charsetp
->code_space
[0];
2007 CHECK_NATNUM (code4
);
2008 if (XFASTINT (code4
) >= 0x100)
2009 args_out_of_range (make_number (0xFF), code4
);
2010 code
|= XFASTINT (code4
);
2017 if (CHARSET_ISO_FINAL (charsetp
) >= 0)
2019 c
= DECODE_CHAR (charsetp
, code
);
2021 error ("Invalid code(s)");
2022 return make_number (c
);
2026 /* Return the first charset in CHARSET_LIST that contains C.
2027 CHARSET_LIST is a list of charset IDs. If it is nil, use
2028 Vcharset_ordered_list. */
2031 char_charset (int c
, Lisp_Object charset_list
, unsigned int *code_return
)
2035 if (NILP (charset_list
))
2036 charset_list
= Vcharset_ordered_list
;
2040 while (CONSP (charset_list
))
2042 struct charset
*charset
= CHARSET_FROM_ID (XINT (XCAR (charset_list
)));
2043 unsigned code
= ENCODE_CHAR (charset
, c
);
2045 if (code
!= CHARSET_INVALID_CODE (charset
))
2048 *code_return
= code
;
2051 charset_list
= XCDR (charset_list
);
2053 && c
<= MAX_UNICODE_CHAR
2054 && EQ (charset_list
, Vcharset_non_preferred_head
))
2055 return CHARSET_FROM_ID (charset_unicode
);
2057 return (maybe_null
? NULL
2058 : c
<= MAX_5_BYTE_CHAR
? CHARSET_FROM_ID (charset_emacs
)
2059 : CHARSET_FROM_ID (charset_eight_bit
));
2063 DEFUN ("split-char", Fsplit_char
, Ssplit_char
, 1, 1, 0,
2065 /*Return list of charset and one to four position-codes of CH.
2066 The charset is decided by the current priority order of charsets.
2067 A position-code is a byte value of each dimension of the code-point of
2068 CH in the charset. */)
2072 struct charset
*charset
;
2077 CHECK_CHARACTER (ch
);
2079 charset
= CHAR_CHARSET (c
);
2082 code
= ENCODE_CHAR (charset
, c
);
2083 if (code
== CHARSET_INVALID_CODE (charset
))
2085 dimension
= CHARSET_DIMENSION (charset
);
2086 for (val
= Qnil
; dimension
> 0; dimension
--)
2088 val
= Fcons (make_number (code
& 0xFF), val
);
2091 return Fcons (CHARSET_NAME (charset
), val
);
2095 DEFUN ("char-charset", Fchar_charset
, Schar_charset
, 1, 2, 0,
2096 doc
: /* Return the charset of highest priority that contains CH.
2097 If optional 2nd arg RESTRICTION is non-nil, it is a list of charsets
2098 from which to find the charset. It may also be a coding system. In
2099 that case, find the charset from what supported by that coding system. */)
2101 Lisp_Object ch
, restriction
;
2103 struct charset
*charset
;
2105 CHECK_CHARACTER (ch
);
2106 if (NILP (restriction
))
2107 charset
= CHAR_CHARSET (XINT (ch
));
2110 Lisp_Object charset_list
;
2112 if (CONSP (restriction
))
2114 for (charset_list
= Qnil
; CONSP (restriction
);
2115 restriction
= XCDR (restriction
))
2119 CHECK_CHARSET_GET_ID (XCAR (restriction
), id
);
2120 charset_list
= Fcons (make_number (id
), charset_list
);
2122 charset_list
= Fnreverse (charset_list
);
2125 charset_list
= coding_system_charset_list (restriction
);
2126 charset
= char_charset (XINT (ch
), charset_list
, NULL
);
2130 return (CHARSET_NAME (charset
));
2134 DEFUN ("charset-after", Fcharset_after
, Scharset_after
, 0, 1, 0,
2136 Return charset of a character in the current buffer at position POS.
2137 If POS is nil, it defauls to the current point.
2138 If POS is out of range, the value is nil. */)
2143 struct charset
*charset
;
2145 ch
= Fchar_after (pos
);
2146 if (! INTEGERP (ch
))
2148 charset
= CHAR_CHARSET (XINT (ch
));
2149 return (CHARSET_NAME (charset
));
2153 DEFUN ("iso-charset", Fiso_charset
, Siso_charset
, 3, 3, 0,
2155 Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR.
2157 ISO 2022's designation sequence (escape sequence) distinguishes charsets
2158 by their DIMENSION, CHARS, and FINAL-CHAR,
2159 whereas Emacs distinguishes them by charset symbol.
2160 See the documentation of the function `charset-info' for the meanings of
2161 DIMENSION, CHARS, and FINAL-CHAR. */)
2162 (dimension
, chars
, final_char
)
2163 Lisp_Object dimension
, chars
, final_char
;
2168 check_iso_charset_parameter (dimension
, chars
, final_char
);
2169 chars_flag
= XFASTINT (chars
) == 96;
2170 id
= ISO_CHARSET_TABLE (XFASTINT (dimension
), chars_flag
,
2171 XFASTINT (final_char
));
2172 return (id
>= 0 ? CHARSET_NAME (CHARSET_FROM_ID (id
)) : Qnil
);
2176 DEFUN ("clear-charset-maps", Fclear_charset_maps
, Sclear_charset_maps
,
2180 Clear temporary charset mapping tables.
2181 It should be called only from temacs invoked for dumping. */)
2184 if (temp_charset_work
)
2186 free (temp_charset_work
);
2187 temp_charset_work
= NULL
;
2190 if (CHAR_TABLE_P (Vchar_unify_table
))
2191 Foptimize_char_table (Vchar_unify_table
, Qnil
);
2196 DEFUN ("charset-priority-list", Fcharset_priority_list
,
2197 Scharset_priority_list
, 0, 1, 0,
2198 doc
: /* Return the list of charsets ordered by priority.
2199 HIGHESTP non-nil means just return the highest priority one. */)
2201 Lisp_Object highestp
;
2203 Lisp_Object val
= Qnil
, list
= Vcharset_ordered_list
;
2205 if (!NILP (highestp
))
2206 return CHARSET_NAME (CHARSET_FROM_ID (XINT (Fcar (list
))));
2208 while (!NILP (list
))
2210 val
= Fcons (CHARSET_NAME (CHARSET_FROM_ID (XINT (XCAR (list
)))), val
);
2213 return Fnreverse (val
);
2216 DEFUN ("set-charset-priority", Fset_charset_priority
, Sset_charset_priority
,
2218 doc
: /* Assign higher priority to the charsets given as arguments.
2219 usage: (set-charset-priority &rest charsets) */)
2224 Lisp_Object new_head
, old_list
, arglist
[2];
2225 Lisp_Object list_2022
, list_emacs_mule
;
2228 old_list
= Fcopy_sequence (Vcharset_ordered_list
);
2230 for (i
= 0; i
< nargs
; i
++)
2232 CHECK_CHARSET_GET_ID (args
[i
], id
);
2233 if (! NILP (Fmemq (make_number (id
), old_list
)))
2235 old_list
= Fdelq (make_number (id
), old_list
);
2236 new_head
= Fcons (make_number (id
), new_head
);
2239 arglist
[0] = Fnreverse (new_head
);
2240 arglist
[1] = Vcharset_non_preferred_head
= old_list
;
2241 Vcharset_ordered_list
= Fnconc (2, arglist
);
2242 charset_ordered_list_tick
++;
2244 charset_unibyte
= -1;
2245 for (old_list
= Vcharset_ordered_list
, list_2022
= list_emacs_mule
= Qnil
;
2246 CONSP (old_list
); old_list
= XCDR (old_list
))
2248 if (! NILP (Fmemq (XCAR (old_list
), Viso_2022_charset_list
)))
2249 list_2022
= Fcons (XCAR (old_list
), list_2022
);
2250 if (! NILP (Fmemq (XCAR (old_list
), Vemacs_mule_charset_list
)))
2251 list_emacs_mule
= Fcons (XCAR (old_list
), list_emacs_mule
);
2252 if (charset_unibyte
< 0)
2254 struct charset
*charset
= CHARSET_FROM_ID (XINT (XCAR (old_list
)));
2256 if (CHARSET_DIMENSION (charset
) == 1
2257 && CHARSET_ASCII_COMPATIBLE_P (charset
)
2258 && CHARSET_MAX_CHAR (charset
) >= 0x80)
2259 charset_unibyte
= CHARSET_ID (charset
);
2262 Viso_2022_charset_list
= Fnreverse (list_2022
);
2263 Vemacs_mule_charset_list
= Fnreverse (list_emacs_mule
);
2264 if (charset_unibyte
< 0)
2265 charset_unibyte
= charset_iso_8859_1
;
2270 DEFUN ("charset-id-internal", Fcharset_id_internal
, Scharset_id_internal
,
2272 doc
: /* Internal use only.
2273 Return charset identification number of CHARSET. */)
2275 Lisp_Object charset
;
2279 CHECK_CHARSET_GET_ID (charset
, id
);
2280 return make_number (id
);
2287 Lisp_Object tempdir
;
2288 tempdir
= Fexpand_file_name (build_string ("charsets"), Vdata_directory
);
2289 if (access ((char *) SDATA (tempdir
), 0) < 0)
2291 dir_warning ("Error: charsets directory (%s) does not exist.\n\
2292 Emacs will not function correctly without the character map files.\n\
2293 Please check your installation!\n",
2295 /* TODO should this be a fatal error? (Bug#909) */
2298 Vcharset_map_path
= Fcons (tempdir
, Qnil
);
2303 init_charset_once (void)
2307 for (i
= 0; i
< ISO_MAX_DIMENSION
; i
++)
2308 for (j
= 0; j
< ISO_MAX_CHARS
; j
++)
2309 for (k
= 0; k
< ISO_MAX_FINAL
; k
++)
2310 iso_charset_table
[i
][j
][k
] = -1;
2312 for (i
= 0; i
< 256; i
++)
2313 emacs_mule_charset
[i
] = NULL
;
2315 charset_jisx0201_roman
= -1;
2316 charset_jisx0208_1978
= -1;
2317 charset_jisx0208
= -1;
2318 charset_ksc5601
= -1;
2324 syms_of_charset (void)
2326 DEFSYM (Qcharsetp
, "charsetp");
2328 DEFSYM (Qascii
, "ascii");
2329 DEFSYM (Qunicode
, "unicode");
2330 DEFSYM (Qemacs
, "emacs");
2331 DEFSYM (Qeight_bit
, "eight-bit");
2332 DEFSYM (Qiso_8859_1
, "iso-8859-1");
2337 staticpro (&Vcharset_ordered_list
);
2338 Vcharset_ordered_list
= Qnil
;
2340 staticpro (&Viso_2022_charset_list
);
2341 Viso_2022_charset_list
= Qnil
;
2343 staticpro (&Vemacs_mule_charset_list
);
2344 Vemacs_mule_charset_list
= Qnil
;
2346 /* Don't staticpro them here. It's done in syms_of_fns. */
2347 QCtest
= intern (":test");
2348 Qeq
= intern ("eq");
2350 staticpro (&Vcharset_hash_table
);
2352 Lisp_Object args
[2];
2355 Vcharset_hash_table
= Fmake_hash_table (2, args
);
2358 charset_table_size
= 128;
2359 charset_table
= ((struct charset
*)
2360 xmalloc (sizeof (struct charset
) * charset_table_size
));
2361 charset_table_used
= 0;
2363 defsubr (&Scharsetp
);
2364 defsubr (&Smap_charset_chars
);
2365 defsubr (&Sdefine_charset_internal
);
2366 defsubr (&Sdefine_charset_alias
);
2367 defsubr (&Scharset_plist
);
2368 defsubr (&Sset_charset_plist
);
2369 defsubr (&Sunify_charset
);
2370 defsubr (&Sget_unused_iso_final_char
);
2371 defsubr (&Sdeclare_equiv_charset
);
2372 defsubr (&Sfind_charset_region
);
2373 defsubr (&Sfind_charset_string
);
2374 defsubr (&Sdecode_char
);
2375 defsubr (&Sencode_char
);
2376 defsubr (&Ssplit_char
);
2377 defsubr (&Smake_char
);
2378 defsubr (&Schar_charset
);
2379 defsubr (&Scharset_after
);
2380 defsubr (&Siso_charset
);
2381 defsubr (&Sclear_charset_maps
);
2382 defsubr (&Scharset_priority_list
);
2383 defsubr (&Sset_charset_priority
);
2384 defsubr (&Scharset_id_internal
);
2386 DEFVAR_LISP ("charset-map-path", &Vcharset_map_path
,
2387 doc
: /* *List of directories to search for charset map files. */);
2388 Vcharset_map_path
= Qnil
;
2390 DEFVAR_BOOL ("inhibit-load-charset-map", &inhibit_load_charset_map
,
2391 doc
: /* Inhibit loading of charset maps. Used when dumping Emacs. */);
2392 inhibit_load_charset_map
= 0;
2394 DEFVAR_LISP ("charset-list", &Vcharset_list
,
2395 doc
: /* List of all charsets ever defined. */);
2396 Vcharset_list
= Qnil
;
2398 DEFVAR_LISP ("current-iso639-language", &Vcurrent_iso639_language
,
2399 doc
: /* ISO639 language mnemonic symbol for the current language environment.
2400 If the current language environment is for multiple languages (e.g. "Latin-1"),
2401 the value may be a list of mnemonics. */);
2402 Vcurrent_iso639_language
= Qnil
;
2405 = define_charset_internal (Qascii
, 1, "\x00\x7F\x00\x00\x00\x00",
2406 0, 127, 'B', -1, 0, 1, 0, 0);
2408 = define_charset_internal (Qiso_8859_1
, 1, "\x00\xFF\x00\x00\x00\x00",
2409 0, 255, -1, -1, -1, 1, 0, 0);
2411 = define_charset_internal (Qunicode
, 3, "\x00\xFF\x00\xFF\x00\x10",
2412 0, MAX_UNICODE_CHAR
, -1, 0, -1, 1, 0, 0);
2414 = define_charset_internal (Qemacs
, 3, "\x00\xFF\x00\xFF\x00\x3F",
2415 0, MAX_5_BYTE_CHAR
, -1, 0, -1, 1, 1, 0);
2417 = define_charset_internal (Qeight_bit
, 1, "\x80\xFF\x00\x00\x00\x00",
2418 128, 255, -1, 0, -1, 0, 1,
2419 MAX_5_BYTE_CHAR
+ 1);
2420 charset_unibyte
= charset_iso_8859_1
;
2425 /* arch-tag: 66a89b8d-4c28-47d3-9ca1-56f78440d69f
2426 (do not change this comment) */