1 /* Basic character set support.
2 Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007,
3 2008, 2009, 2010 Free Software Foundation, Inc.
4 Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
5 2005, 2006, 2007, 2008, 2009, 2010
6 National Institute of Advanced Industrial Science and Technology (AIST)
7 Registration Number H14PRO021
9 Copyright (C) 2003, 2004
10 National Institute of Advanced Industrial Science and Technology (AIST)
11 Registration Number H13PRO009
13 This file is part of GNU Emacs.
15 GNU Emacs is free software: you can redistribute it and/or modify
16 it under the terms of the GNU General Public License as published by
17 the Free Software Foundation, either version 3 of the License, or
18 (at your option) any later version.
20 GNU Emacs is distributed in the hope that it will be useful,
21 but WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 GNU General Public License for more details.
25 You should have received a copy of the GNU General Public License
26 along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
33 #include <sys/types.h>
36 #include "character.h"
42 /*** GENERAL NOTES on CODED CHARACTER SETS (CHARSETS) ***
44 A coded character set ("charset" hereafter) is a meaningful
45 collection (i.e. language, culture, functionality, etc.) of
46 characters. Emacs handles multiple charsets at once. In Emacs Lisp
47 code, a charset is represented by a symbol. In C code, a charset is
48 represented by its ID number or by a pointer to a struct charset.
50 The actual information about each charset is stored in two places.
51 Lispy information is stored in the hash table Vcharset_hash_table as
52 a vector (charset attributes). The other information is stored in
53 charset_table as a struct charset.
57 /* List of all charsets. This variable is used only from Emacs
59 Lisp_Object Vcharset_list
;
61 /* Hash table that contains attributes of each charset. Keys are
62 charset symbols, and values are vectors of charset attributes. */
63 Lisp_Object Vcharset_hash_table
;
65 /* Table of struct charset. */
66 struct charset
*charset_table
;
68 static int charset_table_size
;
69 static int charset_table_used
;
71 Lisp_Object Qcharsetp
;
73 /* Special charset symbols. */
75 Lisp_Object Qeight_bit
;
76 Lisp_Object Qiso_8859_1
;
80 /* The corresponding charsets. */
82 int charset_eight_bit
;
83 int charset_iso_8859_1
;
87 /* The other special charsets. */
88 int charset_jisx0201_roman
;
89 int charset_jisx0208_1978
;
93 /* Value of charset attribute `charset-iso-plane'. */
96 /* Charset of unibyte characters. */
99 /* List of charsets ordered by the priority. */
100 Lisp_Object Vcharset_ordered_list
;
102 /* Sub-list of Vcharset_ordered_list that contains all non-preferred
104 Lisp_Object Vcharset_non_preferred_head
;
106 /* Incremented everytime we change Vcharset_ordered_list. This is
107 unsigned short so that it fits in Lisp_Int and never matches
109 unsigned short charset_ordered_list_tick
;
111 /* List of iso-2022 charsets. */
112 Lisp_Object Viso_2022_charset_list
;
114 /* List of emacs-mule charsets. */
115 Lisp_Object Vemacs_mule_charset_list
;
117 struct charset
*emacs_mule_charset
[256];
119 /* Mapping table from ISO2022's charset (specified by DIMENSION,
120 CHARS, and FINAL-CHAR) to Emacs' charset. */
121 int iso_charset_table
[ISO_MAX_DIMENSION
][ISO_MAX_CHARS
][ISO_MAX_FINAL
];
123 Lisp_Object Vcharset_map_path
;
125 /* If nonzero, don't load charset maps. */
126 int inhibit_load_charset_map
;
128 Lisp_Object Vcurrent_iso639_language
;
130 /* Defined in chartab.c */
132 map_char_table_for_charset
P_ ((void (*c_function
) (Lisp_Object
, Lisp_Object
),
133 Lisp_Object function
, Lisp_Object table
,
134 Lisp_Object arg
, struct charset
*charset
,
135 unsigned from
, unsigned to
));
137 #define CODE_POINT_TO_INDEX(charset, code) \
138 ((charset)->code_linear_p \
139 ? (code) - (charset)->min_code \
140 : (((charset)->code_space_mask[(code) >> 24] & 0x8) \
141 && ((charset)->code_space_mask[((code) >> 16) & 0xFF] & 0x4) \
142 && ((charset)->code_space_mask[((code) >> 8) & 0xFF] & 0x2) \
143 && ((charset)->code_space_mask[(code) & 0xFF] & 0x1)) \
144 ? (((((code) >> 24) - (charset)->code_space[12]) \
145 * (charset)->code_space[11]) \
146 + (((((code) >> 16) & 0xFF) - (charset)->code_space[8]) \
147 * (charset)->code_space[7]) \
148 + (((((code) >> 8) & 0xFF) - (charset)->code_space[4]) \
149 * (charset)->code_space[3]) \
150 + (((code) & 0xFF) - (charset)->code_space[0]) \
151 - ((charset)->char_index_offset)) \
155 /* Convert the character index IDX to code-point CODE for CHARSET.
156 It is assumed that IDX is in a valid range. */
158 #define INDEX_TO_CODE_POINT(charset, idx) \
159 ((charset)->code_linear_p \
160 ? (idx) + (charset)->min_code \
161 : (idx += (charset)->char_index_offset, \
162 (((charset)->code_space[0] + (idx) % (charset)->code_space[2]) \
163 | (((charset)->code_space[4] \
164 + ((idx) / (charset)->code_space[3] % (charset)->code_space[6])) \
166 | (((charset)->code_space[8] \
167 + ((idx) / (charset)->code_space[7] % (charset)->code_space[10])) \
169 | (((charset)->code_space[12] + ((idx) / (charset)->code_space[11])) \
172 /* Structure to hold mapping tables for a charset. Used by temacs
173 invoked for dumping. */
177 /* The current charset for which the following tables are setup. */
178 struct charset
*current
;
180 /* 1 iff the following table is used for encoder. */
183 /* When the following table is used for encoding, mininum and
184 maxinum character of the current charset. */
185 int min_char
, max_char
;
187 /* A Unicode character correspoinding to the code indice 0 (i.e. the
188 minimum code-point) of the current charset, or -1 if the code
189 indice 0 is not a Unicode character. This is checked when
190 table.encoder[CHAR] is zero. */
194 /* Table mapping code-indices (not code-points) of the current
195 charset to Unicode characters. If decoder[CHAR] is -1, CHAR
196 doesn't belong to the current charset. */
197 int decoder
[0x10000];
198 /* Table mapping Unicode characters to code-indices of the current
199 charset. The first 0x10000 elements are for BMP (0..0xFFFF),
200 and the last 0x10000 are for SMP (0x10000..0x1FFFF) or SIP
201 (0x20000..0x2FFFF). Note that there is no charset map that
202 uses both SMP and SIP. */
203 unsigned short encoder
[0x20000];
205 } *temp_charset_work
;
207 #define SET_TEMP_CHARSET_WORK_ENCODER(C, CODE) \
210 temp_charset_work->zero_index_char = (C); \
211 else if ((C) < 0x20000) \
212 temp_charset_work->table.encoder[(C)] = (CODE); \
214 temp_charset_work->table.encoder[(C) - 0x10000] = (CODE); \
217 #define GET_TEMP_CHARSET_WORK_ENCODER(C) \
218 ((C) == temp_charset_work->zero_index_char ? 0 \
219 : (C) < 0x20000 ? (temp_charset_work->table.encoder[(C)] \
220 ? (int) temp_charset_work->table.encoder[(C)] : -1) \
221 : temp_charset_work->table.encoder[(C) - 0x10000] \
222 ? temp_charset_work->table.encoder[(C) - 0x10000] : -1)
224 #define SET_TEMP_CHARSET_WORK_DECODER(C, CODE) \
225 (temp_charset_work->table.decoder[(CODE)] = (C))
227 #define GET_TEMP_CHARSET_WORK_DECODER(CODE) \
228 (temp_charset_work->table.decoder[(CODE)])
231 /* Set to 1 to warn that a charset map is loaded and thus a buffer
232 text and a string data may be relocated. */
233 int charset_map_loaded
;
235 struct charset_map_entries
241 struct charset_map_entries
*next
;
244 /* Load the mapping information of CHARSET from ENTRIES for
245 initializing (CONTROL_FLAG == 0), decoding (CONTROL_FLAG == 1), and
246 encoding (CONTROL_FLAG == 2).
248 If CONTROL_FLAG is 0, setup CHARSET->min_char, CHARSET->max_char,
249 and CHARSET->fast_map.
251 If CONTROL_FLAG is 1, setup the following tables according to
252 CHARSET->method and inhibit_load_charset_map.
254 CHARSET->method | inhibit_lcm == 0 | inhibit_lcm == 1
255 ----------------------+--------------------+---------------------------
256 CHARSET_METHOD_MAP | CHARSET->decoder | temp_charset_work->decoder
257 ----------------------+--------------------+---------------------------
258 CHARSET_METHOD_OFFSET | Vchar_unify_table | temp_charset_work->decoder
260 If CONTROL_FLAG is 2, setup the following tables.
262 CHARSET->method | inhibit_lcm == 0 | inhibit_lcm == 1
263 ----------------------+--------------------+---------------------------
264 CHARSET_METHOD_MAP | CHARSET->encoder | temp_charset_work->encoder
265 ----------------------+--------------------+--------------------------
266 CHARSET_METHOD_OFFSET | CHARSET->deunifier | temp_charset_work->encoder
270 load_charset_map (charset
, entries
, n_entries
, control_flag
)
271 struct charset
*charset
;
272 struct charset_map_entries
*entries
;
276 Lisp_Object vec
, table
;
277 unsigned max_code
= CHARSET_MAX_CODE (charset
);
278 int ascii_compatible_p
= charset
->ascii_compatible_p
;
279 int min_char
, max_char
, nonascii_min_char
;
281 unsigned char *fast_map
= charset
->fast_map
;
288 if (! inhibit_load_charset_map
)
290 if (control_flag
== 1)
292 if (charset
->method
== CHARSET_METHOD_MAP
)
294 int n
= CODE_POINT_TO_INDEX (charset
, max_code
) + 1;
296 vec
= CHARSET_DECODER (charset
)
297 = Fmake_vector (make_number (n
), make_number (-1));
301 char_table_set_range (Vchar_unify_table
,
302 charset
->min_char
, charset
->max_char
,
308 table
= Fmake_char_table (Qnil
, Qnil
);
309 if (charset
->method
== CHARSET_METHOD_MAP
)
310 CHARSET_ENCODER (charset
) = table
;
312 CHARSET_DEUNIFIER (charset
) = table
;
317 if (! temp_charset_work
)
318 temp_charset_work
= malloc (sizeof (*temp_charset_work
));
319 if (control_flag
== 1)
321 memset (temp_charset_work
->table
.decoder
, -1,
322 sizeof (int) * 0x10000);
326 memset (temp_charset_work
->table
.encoder
, 0,
327 sizeof (unsigned short) * 0x20000);
328 temp_charset_work
->zero_index_char
= -1;
330 temp_charset_work
->current
= charset
;
331 temp_charset_work
->for_encoder
= (control_flag
== 2);
334 charset_map_loaded
= 1;
337 min_char
= max_char
= entries
->entry
[0].c
;
338 nonascii_min_char
= MAX_CHAR
;
339 for (i
= 0; i
< n_entries
; i
++)
342 int from_index
, to_index
;
344 int idx
= i
% 0x10000;
346 if (i
> 0 && idx
== 0)
347 entries
= entries
->next
;
348 from
= entries
->entry
[idx
].from
;
349 to
= entries
->entry
[idx
].to
;
350 from_c
= entries
->entry
[idx
].c
;
351 from_index
= CODE_POINT_TO_INDEX (charset
, from
);
354 to_index
= from_index
;
359 to_index
= CODE_POINT_TO_INDEX (charset
, to
);
360 to_c
= from_c
+ (to_index
- from_index
);
362 if (from_index
< 0 || to_index
< 0)
367 else if (from_c
< min_char
)
370 if (control_flag
== 1)
372 if (charset
->method
== CHARSET_METHOD_MAP
)
373 for (; from_index
<= to_index
; from_index
++, from_c
++)
374 ASET (vec
, from_index
, make_number (from_c
));
376 for (; from_index
<= to_index
; from_index
++, from_c
++)
377 CHAR_TABLE_SET (Vchar_unify_table
,
378 CHARSET_CODE_OFFSET (charset
) + from_index
,
379 make_number (from_c
));
381 else if (control_flag
== 2)
383 if (charset
->method
== CHARSET_METHOD_MAP
384 && CHARSET_COMPACT_CODES_P (charset
))
385 for (; from_index
<= to_index
; from_index
++, from_c
++)
387 unsigned code
= INDEX_TO_CODE_POINT (charset
, from_index
);
389 if (NILP (CHAR_TABLE_REF (table
, from_c
)))
390 CHAR_TABLE_SET (table
, from_c
, make_number (code
));
393 for (; from_index
<= to_index
; from_index
++, from_c
++)
395 if (NILP (CHAR_TABLE_REF (table
, from_c
)))
396 CHAR_TABLE_SET (table
, from_c
, make_number (from_index
));
399 else if (control_flag
== 3)
400 for (; from_index
<= to_index
; from_index
++, from_c
++)
401 SET_TEMP_CHARSET_WORK_DECODER (from_c
, from_index
);
402 else if (control_flag
== 4)
403 for (; from_index
<= to_index
; from_index
++, from_c
++)
404 SET_TEMP_CHARSET_WORK_ENCODER (from_c
, from_index
);
405 else /* control_flag == 0 */
407 if (ascii_compatible_p
)
409 if (! ASCII_BYTE_P (from_c
))
411 if (from_c
< nonascii_min_char
)
412 nonascii_min_char
= from_c
;
414 else if (! ASCII_BYTE_P (to_c
))
416 nonascii_min_char
= 0x80;
420 for (; from_c
<= to_c
; from_c
++)
421 CHARSET_FAST_MAP_SET (from_c
, fast_map
);
425 if (control_flag
== 0)
427 CHARSET_MIN_CHAR (charset
) = (ascii_compatible_p
428 ? nonascii_min_char
: min_char
);
429 CHARSET_MAX_CHAR (charset
) = max_char
;
431 else if (control_flag
== 4)
433 temp_charset_work
->min_char
= min_char
;
434 temp_charset_work
->max_char
= max_char
;
439 /* Read a hexadecimal number (preceded by "0x") from the file FP while
440 paying attention to comment charcter '#'. */
442 static INLINE
unsigned
450 while ((c
= getc (fp
)) != EOF
)
454 while ((c
= getc (fp
)) != EOF
&& c
!= '\n');
458 if ((c
= getc (fp
)) == EOF
|| c
== 'x')
470 while ((c
= getc (fp
)) != EOF
&& isxdigit (c
))
472 | (c
<= '9' ? c
- '0' : c
<= 'F' ? c
- 'A' + 10 : c
- 'a' + 10));
474 while ((c
= getc (fp
)) != EOF
&& isdigit (c
))
475 n
= (n
* 10) + c
- '0';
481 extern Lisp_Object Qfile_name_handler_alist
;
483 /* Return a mapping vector for CHARSET loaded from MAPFILE.
484 Each line of MAPFILE has this form
486 where 0xAAAA is a code-point and 0xCCCC is the corresponding
487 character code, or this form
489 where 0xAAAA and 0xBBBB are code-points specifying a range, and
490 0xCCCC is the first character code of the range.
492 The returned vector has this form:
493 [ CODE1 CHAR1 CODE2 CHAR2 .... ]
494 where CODE1 is a code-point or a cons of code-points specifying a
497 Note that this function uses `openp' to open MAPFILE but ignores
498 `file-name-handler-alist' to avoid running any Lisp code. */
500 extern void add_to_log
P_ ((char *, Lisp_Object
, Lisp_Object
));
503 load_charset_map_from_file (charset
, mapfile
, control_flag
)
504 struct charset
*charset
;
508 unsigned min_code
= CHARSET_MIN_CODE (charset
);
509 unsigned max_code
= CHARSET_MAX_CODE (charset
);
513 Lisp_Object suffixes
;
514 struct charset_map_entries
*head
, *entries
;
515 int n_entries
, count
;
518 suffixes
= Fcons (build_string (".map"),
519 Fcons (build_string (".TXT"), Qnil
));
521 count
= SPECPDL_INDEX ();
522 specbind (Qfile_name_handler_alist
, Qnil
);
523 fd
= openp (Vcharset_map_path
, mapfile
, suffixes
, NULL
, Qnil
);
524 unbind_to (count
, Qnil
);
526 || ! (fp
= fdopen (fd
, "r")))
527 error ("Failure in loading charset map: %S", SDATA (mapfile
));
529 /* Use SAFE_ALLOCA instead of alloca, as `charset_map_entries' is
530 large (larger than MAX_ALLOCA). */
531 SAFE_ALLOCA (head
, struct charset_map_entries
*,
532 sizeof (struct charset_map_entries
));
543 from
= read_hex (fp
, &eof
);
546 if (getc (fp
) == '-')
547 to
= read_hex (fp
, &eof
);
550 c
= (int) read_hex (fp
, &eof
);
552 if (from
< min_code
|| to
> max_code
|| from
> to
|| c
> MAX_CHAR
)
555 if (n_entries
> 0 && (n_entries
% 0x10000) == 0)
557 SAFE_ALLOCA (entries
->next
, struct charset_map_entries
*,
558 sizeof (struct charset_map_entries
));
559 entries
= entries
->next
;
561 idx
= n_entries
% 0x10000;
562 entries
->entry
[idx
].from
= from
;
563 entries
->entry
[idx
].to
= to
;
564 entries
->entry
[idx
].c
= c
;
570 load_charset_map (charset
, head
, n_entries
, control_flag
);
575 load_charset_map_from_vector (charset
, vec
, control_flag
)
576 struct charset
*charset
;
580 unsigned min_code
= CHARSET_MIN_CODE (charset
);
581 unsigned max_code
= CHARSET_MAX_CODE (charset
);
582 struct charset_map_entries
*head
, *entries
;
584 int len
= ASIZE (vec
);
590 add_to_log ("Failure in loading charset map: %V", vec
, Qnil
);
594 /* Use SAFE_ALLOCA instead of alloca, as `charset_map_entries' is
595 large (larger than MAX_ALLOCA). */
596 SAFE_ALLOCA (head
, struct charset_map_entries
*,
597 sizeof (struct charset_map_entries
));
601 for (i
= 0; i
< len
; i
+= 2)
603 Lisp_Object val
, val2
;
615 from
= XFASTINT (val
);
616 to
= XFASTINT (val2
);
621 from
= to
= XFASTINT (val
);
623 val
= AREF (vec
, i
+ 1);
627 if (from
< min_code
|| to
> max_code
|| from
> to
|| c
> MAX_CHAR
)
630 if (n_entries
> 0 && (n_entries
% 0x10000) == 0)
632 SAFE_ALLOCA (entries
->next
, struct charset_map_entries
*,
633 sizeof (struct charset_map_entries
));
634 entries
= entries
->next
;
636 idx
= n_entries
% 0x10000;
637 entries
->entry
[idx
].from
= from
;
638 entries
->entry
[idx
].to
= to
;
639 entries
->entry
[idx
].c
= c
;
643 load_charset_map (charset
, head
, n_entries
, control_flag
);
648 /* Load a mapping table for CHARSET. CONTROL-FLAG tells what kind of
649 map it is (see the comment of load_charset_map for the detail). */
652 load_charset (charset
, control_flag
)
653 struct charset
*charset
;
658 if (inhibit_load_charset_map
660 && charset
== temp_charset_work
->current
661 && ((control_flag
== 2) == temp_charset_work
->for_encoder
))
664 if (CHARSET_METHOD (charset
) == CHARSET_METHOD_MAP
)
665 map
= CHARSET_MAP (charset
);
666 else if (CHARSET_UNIFIED_P (charset
))
667 map
= CHARSET_UNIFY_MAP (charset
);
669 load_charset_map_from_file (charset
, map
, control_flag
);
671 load_charset_map_from_vector (charset
, map
, control_flag
);
675 DEFUN ("charsetp", Fcharsetp
, Scharsetp
, 1, 1, 0,
676 doc
: /* Return non-nil if and only if OBJECT is a charset.*/)
680 return (CHARSETP (object
) ? Qt
: Qnil
);
684 void map_charset_for_dump
P_ ((void (*c_function
) (Lisp_Object
, Lisp_Object
),
685 Lisp_Object function
, Lisp_Object arg
,
686 unsigned from
, unsigned to
));
689 map_charset_for_dump (c_function
, function
, arg
, from
, to
)
690 void (*c_function
) (Lisp_Object
, Lisp_Object
);
691 Lisp_Object function
, arg
;
694 int from_idx
= CODE_POINT_TO_INDEX (temp_charset_work
->current
, from
);
695 int to_idx
= CODE_POINT_TO_INDEX (temp_charset_work
->current
, to
);
700 range
= Fcons (Qnil
, Qnil
);
703 c
= temp_charset_work
->min_char
;
704 stop
= (temp_charset_work
->max_char
< 0x20000
705 ? temp_charset_work
->max_char
: 0xFFFF);
709 int index
= GET_TEMP_CHARSET_WORK_ENCODER (c
);
711 if (index
>= from_idx
&& index
<= to_idx
)
713 if (NILP (XCAR (range
)))
714 XSETCAR (range
, make_number (c
));
716 else if (! NILP (XCAR (range
)))
718 XSETCDR (range
, make_number (c
- 1));
720 (*c_function
) (arg
, range
);
722 call2 (function
, range
, arg
);
723 XSETCAR (range
, Qnil
);
727 if (c
== temp_charset_work
->max_char
)
729 if (! NILP (XCAR (range
)))
731 XSETCDR (range
, make_number (c
));
733 (*c_function
) (arg
, range
);
735 call2 (function
, range
, arg
);
740 stop
= temp_charset_work
->max_char
;
748 map_charset_chars (c_function
, function
, arg
,
750 void (*c_function
) P_ ((Lisp_Object
, Lisp_Object
));
751 Lisp_Object function
, arg
;
752 struct charset
*charset
;
758 partial
= (from
> CHARSET_MIN_CODE (charset
)
759 || to
< CHARSET_MAX_CODE (charset
));
761 if (CHARSET_METHOD (charset
) == CHARSET_METHOD_OFFSET
)
763 int from_idx
= CODE_POINT_TO_INDEX (charset
, from
);
764 int to_idx
= CODE_POINT_TO_INDEX (charset
, to
);
765 int from_c
= from_idx
+ CHARSET_CODE_OFFSET (charset
);
766 int to_c
= to_idx
+ CHARSET_CODE_OFFSET (charset
);
768 if (CHARSET_UNIFIED_P (charset
))
770 if (! CHAR_TABLE_P (CHARSET_DEUNIFIER (charset
)))
771 load_charset (charset
, 2);
772 if (CHAR_TABLE_P (CHARSET_DEUNIFIER (charset
)))
773 map_char_table_for_charset (c_function
, function
,
774 CHARSET_DEUNIFIER (charset
), arg
,
775 partial
? charset
: NULL
, from
, to
);
777 map_charset_for_dump (c_function
, function
, arg
, from
, to
);
780 range
= Fcons (make_number (from_c
), make_number (to_c
));
782 (*c_function
) (arg
, range
);
784 call2 (function
, range
, arg
);
786 else if (CHARSET_METHOD (charset
) == CHARSET_METHOD_MAP
)
788 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset
)))
789 load_charset (charset
, 2);
790 if (CHAR_TABLE_P (CHARSET_ENCODER (charset
)))
791 map_char_table_for_charset (c_function
, function
,
792 CHARSET_ENCODER (charset
), arg
,
793 partial
? charset
: NULL
, from
, to
);
795 map_charset_for_dump (c_function
, function
, arg
, from
, to
);
797 else if (CHARSET_METHOD (charset
) == CHARSET_METHOD_SUBSET
)
799 Lisp_Object subset_info
;
802 subset_info
= CHARSET_SUBSET (charset
);
803 charset
= CHARSET_FROM_ID (XFASTINT (AREF (subset_info
, 0)));
804 offset
= XINT (AREF (subset_info
, 3));
806 if (from
< XFASTINT (AREF (subset_info
, 1)))
807 from
= XFASTINT (AREF (subset_info
, 1));
809 if (to
> XFASTINT (AREF (subset_info
, 2)))
810 to
= XFASTINT (AREF (subset_info
, 2));
811 map_charset_chars (c_function
, function
, arg
, charset
, from
, to
);
813 else /* i.e. CHARSET_METHOD_SUPERSET */
817 for (parents
= CHARSET_SUPERSET (charset
); CONSP (parents
);
818 parents
= XCDR (parents
))
821 unsigned this_from
, this_to
;
823 charset
= CHARSET_FROM_ID (XFASTINT (XCAR (XCAR (parents
))));
824 offset
= XINT (XCDR (XCAR (parents
)));
825 this_from
= from
> offset
? from
- offset
: 0;
826 this_to
= to
> offset
? to
- offset
: 0;
827 if (this_from
< CHARSET_MIN_CODE (charset
))
828 this_from
= CHARSET_MIN_CODE (charset
);
829 if (this_to
> CHARSET_MAX_CODE (charset
))
830 this_to
= CHARSET_MAX_CODE (charset
);
831 map_charset_chars (c_function
, function
, arg
, charset
,
837 DEFUN ("map-charset-chars", Fmap_charset_chars
, Smap_charset_chars
, 2, 5, 0,
838 doc
: /* Call FUNCTION for all characters in CHARSET.
839 FUNCTION is called with an argument RANGE and the optional 3rd
842 RANGE is a cons (FROM . TO), where FROM and TO indicate a range of
843 characters contained in CHARSET.
845 The optional 4th and 5th arguments FROM-CODE and TO-CODE specify the
846 range of code points (in CHARSET) of target characters. */)
847 (function
, charset
, arg
, from_code
, to_code
)
848 Lisp_Object function
, charset
, arg
, from_code
, to_code
;
853 CHECK_CHARSET_GET_CHARSET (charset
, cs
);
854 if (NILP (from_code
))
855 from
= CHARSET_MIN_CODE (cs
);
858 CHECK_NATNUM (from_code
);
859 from
= XINT (from_code
);
860 if (from
< CHARSET_MIN_CODE (cs
))
861 from
= CHARSET_MIN_CODE (cs
);
864 to
= CHARSET_MAX_CODE (cs
);
867 CHECK_NATNUM (to_code
);
869 if (to
> CHARSET_MAX_CODE (cs
))
870 to
= CHARSET_MAX_CODE (cs
);
872 map_charset_chars (NULL
, function
, arg
, cs
, from
, to
);
877 /* Define a charset according to the arguments. The Nth argument is
878 the Nth attribute of the charset (the last attribute `charset-id'
879 is not included). See the docstring of `define-charset' for the
882 DEFUN ("define-charset-internal", Fdefine_charset_internal
,
883 Sdefine_charset_internal
, charset_arg_max
, MANY
, 0,
884 doc
: /* For internal use only.
885 usage: (define-charset-internal ...) */)
890 /* Charset attr vector. */
894 struct Lisp_Hash_Table
*hash_table
= XHASH_TABLE (Vcharset_hash_table
);
896 struct charset charset
;
899 int new_definition_p
;
902 if (nargs
!= charset_arg_max
)
903 return Fsignal (Qwrong_number_of_arguments
,
904 Fcons (intern ("define-charset-internal"),
905 make_number (nargs
)));
907 attrs
= Fmake_vector (make_number (charset_attr_max
), Qnil
);
909 CHECK_SYMBOL (args
[charset_arg_name
]);
910 ASET (attrs
, charset_name
, args
[charset_arg_name
]);
912 val
= args
[charset_arg_code_space
];
913 for (i
= 0, dimension
= 0, nchars
= 1; i
< 4; i
++)
915 int min_byte
, max_byte
;
917 min_byte
= XINT (Faref (val
, make_number (i
* 2)));
918 max_byte
= XINT (Faref (val
, make_number (i
* 2 + 1)));
919 if (min_byte
< 0 || min_byte
> max_byte
|| max_byte
>= 256)
920 error ("Invalid :code-space value");
921 charset
.code_space
[i
* 4] = min_byte
;
922 charset
.code_space
[i
* 4 + 1] = max_byte
;
923 charset
.code_space
[i
* 4 + 2] = max_byte
- min_byte
+ 1;
924 nchars
*= charset
.code_space
[i
* 4 + 2];
925 charset
.code_space
[i
* 4 + 3] = nchars
;
930 val
= args
[charset_arg_dimension
];
932 charset
.dimension
= dimension
;
936 charset
.dimension
= XINT (val
);
937 if (charset
.dimension
< 1 || charset
.dimension
> 4)
938 args_out_of_range_3 (val
, make_number (1), make_number (4));
941 charset
.code_linear_p
942 = (charset
.dimension
== 1
943 || (charset
.code_space
[2] == 256
944 && (charset
.dimension
== 2
945 || (charset
.code_space
[6] == 256
946 && (charset
.dimension
== 3
947 || charset
.code_space
[10] == 256)))));
949 if (! charset
.code_linear_p
)
951 charset
.code_space_mask
= (unsigned char *) xmalloc (256);
952 bzero (charset
.code_space_mask
, 256);
953 for (i
= 0; i
< 4; i
++)
954 for (j
= charset
.code_space
[i
* 4]; j
<= charset
.code_space
[i
* 4 + 1];
956 charset
.code_space_mask
[j
] |= (1 << i
);
959 charset
.iso_chars_96
= charset
.code_space
[2] == 96;
961 charset
.min_code
= (charset
.code_space
[0]
962 | (charset
.code_space
[4] << 8)
963 | (charset
.code_space
[8] << 16)
964 | (charset
.code_space
[12] << 24));
965 charset
.max_code
= (charset
.code_space
[1]
966 | (charset
.code_space
[5] << 8)
967 | (charset
.code_space
[9] << 16)
968 | (charset
.code_space
[13] << 24));
969 charset
.char_index_offset
= 0;
971 val
= args
[charset_arg_min_code
];
981 CHECK_NUMBER_CAR (val
);
982 CHECK_NUMBER_CDR (val
);
983 code
= (XINT (XCAR (val
)) << 16) | (XINT (XCDR (val
)));
985 if (code
< charset
.min_code
986 || code
> charset
.max_code
)
987 args_out_of_range_3 (make_number (charset
.min_code
),
988 make_number (charset
.max_code
), val
);
989 charset
.char_index_offset
= CODE_POINT_TO_INDEX (&charset
, code
);
990 charset
.min_code
= code
;
993 val
= args
[charset_arg_max_code
];
1003 CHECK_NUMBER_CAR (val
);
1004 CHECK_NUMBER_CDR (val
);
1005 code
= (XINT (XCAR (val
)) << 16) | (XINT (XCDR (val
)));
1007 if (code
< charset
.min_code
1008 || code
> charset
.max_code
)
1009 args_out_of_range_3 (make_number (charset
.min_code
),
1010 make_number (charset
.max_code
), val
);
1011 charset
.max_code
= code
;
1014 charset
.compact_codes_p
= charset
.max_code
< 0x10000;
1016 val
= args
[charset_arg_invalid_code
];
1019 if (charset
.min_code
> 0)
1020 charset
.invalid_code
= 0;
1023 XSETINT (val
, charset
.max_code
+ 1);
1024 if (XINT (val
) == charset
.max_code
+ 1)
1025 charset
.invalid_code
= charset
.max_code
+ 1;
1027 error ("Attribute :invalid-code must be specified");
1033 charset
.invalid_code
= XFASTINT (val
);
1036 val
= args
[charset_arg_iso_final
];
1038 charset
.iso_final
= -1;
1042 if (XINT (val
) < '0' || XINT (val
) > 127)
1043 error ("Invalid iso-final-char: %d", XINT (val
));
1044 charset
.iso_final
= XINT (val
);
1047 val
= args
[charset_arg_iso_revision
];
1049 charset
.iso_revision
= -1;
1053 if (XINT (val
) > 63)
1054 args_out_of_range (make_number (63), val
);
1055 charset
.iso_revision
= XINT (val
);
1058 val
= args
[charset_arg_emacs_mule_id
];
1060 charset
.emacs_mule_id
= -1;
1064 if ((XINT (val
) > 0 && XINT (val
) <= 128) || XINT (val
) >= 256)
1065 error ("Invalid emacs-mule-id: %d", XINT (val
));
1066 charset
.emacs_mule_id
= XINT (val
);
1069 charset
.ascii_compatible_p
= ! NILP (args
[charset_arg_ascii_compatible_p
]);
1071 charset
.supplementary_p
= ! NILP (args
[charset_arg_supplementary_p
]);
1073 charset
.unified_p
= 0;
1075 bzero (charset
.fast_map
, sizeof (charset
.fast_map
));
1077 if (! NILP (args
[charset_arg_code_offset
]))
1079 val
= args
[charset_arg_code_offset
];
1082 charset
.method
= CHARSET_METHOD_OFFSET
;
1083 charset
.code_offset
= XINT (val
);
1085 i
= CODE_POINT_TO_INDEX (&charset
, charset
.min_code
);
1086 charset
.min_char
= i
+ charset
.code_offset
;
1087 i
= CODE_POINT_TO_INDEX (&charset
, charset
.max_code
);
1088 charset
.max_char
= i
+ charset
.code_offset
;
1089 if (charset
.max_char
> MAX_CHAR
)
1090 error ("Unsupported max char: %d", charset
.max_char
);
1092 i
= (charset
.min_char
>> 7) << 7;
1093 for (; i
< 0x10000 && i
<= charset
.max_char
; i
+= 128)
1094 CHARSET_FAST_MAP_SET (i
, charset
.fast_map
);
1095 i
= (i
>> 12) << 12;
1096 for (; i
<= charset
.max_char
; i
+= 0x1000)
1097 CHARSET_FAST_MAP_SET (i
, charset
.fast_map
);
1098 if (charset
.code_offset
== 0 && charset
.max_char
>= 0x80)
1099 charset
.ascii_compatible_p
= 1;
1101 else if (! NILP (args
[charset_arg_map
]))
1103 val
= args
[charset_arg_map
];
1104 ASET (attrs
, charset_map
, val
);
1105 charset
.method
= CHARSET_METHOD_MAP
;
1107 else if (! NILP (args
[charset_arg_subset
]))
1110 Lisp_Object parent_min_code
, parent_max_code
, parent_code_offset
;
1111 struct charset
*parent_charset
;
1113 val
= args
[charset_arg_subset
];
1114 parent
= Fcar (val
);
1115 CHECK_CHARSET_GET_CHARSET (parent
, parent_charset
);
1116 parent_min_code
= Fnth (make_number (1), val
);
1117 CHECK_NATNUM (parent_min_code
);
1118 parent_max_code
= Fnth (make_number (2), val
);
1119 CHECK_NATNUM (parent_max_code
);
1120 parent_code_offset
= Fnth (make_number (3), val
);
1121 CHECK_NUMBER (parent_code_offset
);
1122 val
= Fmake_vector (make_number (4), Qnil
);
1123 ASET (val
, 0, make_number (parent_charset
->id
));
1124 ASET (val
, 1, parent_min_code
);
1125 ASET (val
, 2, parent_max_code
);
1126 ASET (val
, 3, parent_code_offset
);
1127 ASET (attrs
, charset_subset
, val
);
1129 charset
.method
= CHARSET_METHOD_SUBSET
;
1130 /* Here, we just copy the parent's fast_map. It's not accurate,
1131 but at least it works for quickly detecting which character
1132 DOESN'T belong to this charset. */
1133 for (i
= 0; i
< 190; i
++)
1134 charset
.fast_map
[i
] = parent_charset
->fast_map
[i
];
1136 /* We also copy these for parents. */
1137 charset
.min_char
= parent_charset
->min_char
;
1138 charset
.max_char
= parent_charset
->max_char
;
1140 else if (! NILP (args
[charset_arg_superset
]))
1142 val
= args
[charset_arg_superset
];
1143 charset
.method
= CHARSET_METHOD_SUPERSET
;
1144 val
= Fcopy_sequence (val
);
1145 ASET (attrs
, charset_superset
, val
);
1147 charset
.min_char
= MAX_CHAR
;
1148 charset
.max_char
= 0;
1149 for (; ! NILP (val
); val
= Fcdr (val
))
1151 Lisp_Object elt
, car_part
, cdr_part
;
1152 int this_id
, offset
;
1153 struct charset
*this_charset
;
1158 car_part
= XCAR (elt
);
1159 cdr_part
= XCDR (elt
);
1160 CHECK_CHARSET_GET_ID (car_part
, this_id
);
1161 CHECK_NUMBER (cdr_part
);
1162 offset
= XINT (cdr_part
);
1166 CHECK_CHARSET_GET_ID (elt
, this_id
);
1169 XSETCAR (val
, Fcons (make_number (this_id
), make_number (offset
)));
1171 this_charset
= CHARSET_FROM_ID (this_id
);
1172 if (charset
.min_char
> this_charset
->min_char
)
1173 charset
.min_char
= this_charset
->min_char
;
1174 if (charset
.max_char
< this_charset
->max_char
)
1175 charset
.max_char
= this_charset
->max_char
;
1176 for (i
= 0; i
< 190; i
++)
1177 charset
.fast_map
[i
] |= this_charset
->fast_map
[i
];
1181 error ("None of :code-offset, :map, :parents are specified");
1183 val
= args
[charset_arg_unify_map
];
1184 if (! NILP (val
) && !STRINGP (val
))
1186 ASET (attrs
, charset_unify_map
, val
);
1188 CHECK_LIST (args
[charset_arg_plist
]);
1189 ASET (attrs
, charset_plist
, args
[charset_arg_plist
]);
1191 charset
.hash_index
= hash_lookup (hash_table
, args
[charset_arg_name
],
1193 if (charset
.hash_index
>= 0)
1195 new_definition_p
= 0;
1196 id
= XFASTINT (CHARSET_SYMBOL_ID (args
[charset_arg_name
]));
1197 HASH_VALUE (hash_table
, charset
.hash_index
) = attrs
;
1201 charset
.hash_index
= hash_put (hash_table
, args
[charset_arg_name
], attrs
,
1203 if (charset_table_used
== charset_table_size
)
1205 struct charset
*new_table
1206 = (struct charset
*) xmalloc (sizeof (struct charset
)
1207 * (charset_table_size
+ 16));
1208 bcopy (charset_table
, new_table
,
1209 sizeof (struct charset
) * charset_table_size
);
1210 charset_table_size
+= 16;
1211 charset_table
= new_table
;
1213 id
= charset_table_used
++;
1214 new_definition_p
= 1;
1217 ASET (attrs
, charset_id
, make_number (id
));
1219 charset_table
[id
] = charset
;
1221 if (charset
.method
== CHARSET_METHOD_MAP
)
1223 load_charset (&charset
, 0);
1224 charset_table
[id
] = charset
;
1227 if (charset
.iso_final
>= 0)
1229 ISO_CHARSET_TABLE (charset
.dimension
, charset
.iso_chars_96
,
1230 charset
.iso_final
) = id
;
1231 if (new_definition_p
)
1232 Viso_2022_charset_list
= nconc2 (Viso_2022_charset_list
,
1233 Fcons (make_number (id
), Qnil
));
1234 if (ISO_CHARSET_TABLE (1, 0, 'J') == id
)
1235 charset_jisx0201_roman
= id
;
1236 else if (ISO_CHARSET_TABLE (2, 0, '@') == id
)
1237 charset_jisx0208_1978
= id
;
1238 else if (ISO_CHARSET_TABLE (2, 0, 'B') == id
)
1239 charset_jisx0208
= id
;
1240 else if (ISO_CHARSET_TABLE (2, 0, 'C') == id
)
1241 charset_ksc5601
= id
;
1244 if (charset
.emacs_mule_id
>= 0)
1246 emacs_mule_charset
[charset
.emacs_mule_id
] = CHARSET_FROM_ID (id
);
1247 if (charset
.emacs_mule_id
< 0xA0)
1248 emacs_mule_bytes
[charset
.emacs_mule_id
] = charset
.dimension
+ 1;
1250 emacs_mule_bytes
[charset
.emacs_mule_id
] = charset
.dimension
+ 2;
1251 if (new_definition_p
)
1252 Vemacs_mule_charset_list
= nconc2 (Vemacs_mule_charset_list
,
1253 Fcons (make_number (id
), Qnil
));
1256 if (new_definition_p
)
1258 Vcharset_list
= Fcons (args
[charset_arg_name
], Vcharset_list
);
1259 if (charset
.supplementary_p
)
1260 Vcharset_ordered_list
= nconc2 (Vcharset_ordered_list
,
1261 Fcons (make_number (id
), Qnil
));
1266 for (tail
= Vcharset_ordered_list
; CONSP (tail
); tail
= XCDR (tail
))
1268 struct charset
*cs
= CHARSET_FROM_ID (XINT (XCAR (tail
)));
1270 if (cs
->supplementary_p
)
1273 if (EQ (tail
, Vcharset_ordered_list
))
1274 Vcharset_ordered_list
= Fcons (make_number (id
),
1275 Vcharset_ordered_list
);
1276 else if (NILP (tail
))
1277 Vcharset_ordered_list
= nconc2 (Vcharset_ordered_list
,
1278 Fcons (make_number (id
), Qnil
));
1281 val
= Fcons (XCAR (tail
), XCDR (tail
));
1282 XSETCDR (tail
, val
);
1283 XSETCAR (tail
, make_number (id
));
1286 charset_ordered_list_tick
++;
1293 /* Same as Fdefine_charset_internal but arguments are more convenient
1294 to call from C (typically in syms_of_charset). This can define a
1295 charset of `offset' method only. Return the ID of the new
1299 define_charset_internal (name
, dimension
, code_space
, min_code
, max_code
,
1300 iso_final
, iso_revision
, emacs_mule_id
,
1301 ascii_compatible
, supplementary
,
1305 unsigned char *code_space
;
1306 unsigned min_code
, max_code
;
1307 int iso_final
, iso_revision
, emacs_mule_id
;
1308 int ascii_compatible
, supplementary
;
1311 Lisp_Object args
[charset_arg_max
];
1312 Lisp_Object plist
[14];
1316 args
[charset_arg_name
] = name
;
1317 args
[charset_arg_dimension
] = make_number (dimension
);
1318 val
= Fmake_vector (make_number (8), make_number (0));
1319 for (i
= 0; i
< 8; i
++)
1320 ASET (val
, i
, make_number (code_space
[i
]));
1321 args
[charset_arg_code_space
] = val
;
1322 args
[charset_arg_min_code
] = make_number (min_code
);
1323 args
[charset_arg_max_code
] = make_number (max_code
);
1324 args
[charset_arg_iso_final
]
1325 = (iso_final
< 0 ? Qnil
: make_number (iso_final
));
1326 args
[charset_arg_iso_revision
] = make_number (iso_revision
);
1327 args
[charset_arg_emacs_mule_id
]
1328 = (emacs_mule_id
< 0 ? Qnil
: make_number (emacs_mule_id
));
1329 args
[charset_arg_ascii_compatible_p
] = ascii_compatible
? Qt
: Qnil
;
1330 args
[charset_arg_supplementary_p
] = supplementary
? Qt
: Qnil
;
1331 args
[charset_arg_invalid_code
] = Qnil
;
1332 args
[charset_arg_code_offset
] = make_number (code_offset
);
1333 args
[charset_arg_map
] = Qnil
;
1334 args
[charset_arg_subset
] = Qnil
;
1335 args
[charset_arg_superset
] = Qnil
;
1336 args
[charset_arg_unify_map
] = Qnil
;
1338 plist
[0] = intern_c_string (":name");
1339 plist
[1] = args
[charset_arg_name
];
1340 plist
[2] = intern_c_string (":dimension");
1341 plist
[3] = args
[charset_arg_dimension
];
1342 plist
[4] = intern_c_string (":code-space");
1343 plist
[5] = args
[charset_arg_code_space
];
1344 plist
[6] = intern_c_string (":iso-final-char");
1345 plist
[7] = args
[charset_arg_iso_final
];
1346 plist
[8] = intern_c_string (":emacs-mule-id");
1347 plist
[9] = args
[charset_arg_emacs_mule_id
];
1348 plist
[10] = intern_c_string (":ascii-compatible-p");
1349 plist
[11] = args
[charset_arg_ascii_compatible_p
];
1350 plist
[12] = intern_c_string (":code-offset");
1351 plist
[13] = args
[charset_arg_code_offset
];
1353 args
[charset_arg_plist
] = Flist (14, plist
);
1354 Fdefine_charset_internal (charset_arg_max
, args
);
1356 return XINT (CHARSET_SYMBOL_ID (name
));
1360 DEFUN ("define-charset-alias", Fdefine_charset_alias
,
1361 Sdefine_charset_alias
, 2, 2, 0,
1362 doc
: /* Define ALIAS as an alias for charset CHARSET. */)
1364 Lisp_Object alias
, charset
;
1368 CHECK_CHARSET_GET_ATTR (charset
, attr
);
1369 Fputhash (alias
, attr
, Vcharset_hash_table
);
1370 Vcharset_list
= Fcons (alias
, Vcharset_list
);
1375 DEFUN ("charset-plist", Fcharset_plist
, Scharset_plist
, 1, 1, 0,
1376 doc
: /* Return the property list of CHARSET. */)
1378 Lisp_Object charset
;
1382 CHECK_CHARSET_GET_ATTR (charset
, attrs
);
1383 return CHARSET_ATTR_PLIST (attrs
);
1387 DEFUN ("set-charset-plist", Fset_charset_plist
, Sset_charset_plist
, 2, 2, 0,
1388 doc
: /* Set CHARSET's property list to PLIST. */)
1390 Lisp_Object charset
, plist
;
1394 CHECK_CHARSET_GET_ATTR (charset
, attrs
);
1395 CHARSET_ATTR_PLIST (attrs
) = plist
;
1400 DEFUN ("unify-charset", Funify_charset
, Sunify_charset
, 1, 3, 0,
1401 doc
: /* Unify characters of CHARSET with Unicode.
1402 This means reading the relevant file and installing the table defined
1403 by CHARSET's `:unify-map' property.
1405 Optional second arg UNIFY-MAP is a file name string or a vector. It has
1406 the same meaning as the `:unify-map' attribute in the function
1407 `define-charset' (which see).
1409 Optional third argument DEUNIFY, if non-nil, means to de-unify CHARSET. */)
1410 (charset
, unify_map
, deunify
)
1411 Lisp_Object charset
, unify_map
, deunify
;
1416 CHECK_CHARSET_GET_ID (charset
, id
);
1417 cs
= CHARSET_FROM_ID (id
);
1419 ? CHARSET_UNIFIED_P (cs
) && ! NILP (CHARSET_DEUNIFIER (cs
))
1420 : ! CHARSET_UNIFIED_P (cs
))
1423 CHARSET_UNIFIED_P (cs
) = 0;
1426 if (CHARSET_METHOD (cs
) != CHARSET_METHOD_OFFSET
1427 || CHARSET_CODE_OFFSET (cs
) < 0x110000)
1428 error ("Can't unify charset: %s", SDATA (SYMBOL_NAME (charset
)));
1429 if (NILP (unify_map
))
1430 unify_map
= CHARSET_UNIFY_MAP (cs
);
1433 if (! STRINGP (unify_map
) && ! VECTORP (unify_map
))
1434 signal_error ("Bad unify-map", unify_map
);
1435 CHARSET_UNIFY_MAP (cs
) = unify_map
;
1437 if (NILP (Vchar_unify_table
))
1438 Vchar_unify_table
= Fmake_char_table (Qnil
, Qnil
);
1439 char_table_set_range (Vchar_unify_table
,
1440 cs
->min_char
, cs
->max_char
, charset
);
1441 CHARSET_UNIFIED_P (cs
) = 1;
1443 else if (CHAR_TABLE_P (Vchar_unify_table
))
1445 int min_code
= CHARSET_MIN_CODE (cs
);
1446 int max_code
= CHARSET_MAX_CODE (cs
);
1447 int min_char
= DECODE_CHAR (cs
, min_code
);
1448 int max_char
= DECODE_CHAR (cs
, max_code
);
1450 char_table_set_range (Vchar_unify_table
, min_char
, max_char
, Qnil
);
1456 DEFUN ("get-unused-iso-final-char", Fget_unused_iso_final_char
,
1457 Sget_unused_iso_final_char
, 2, 2, 0,
1459 Return an unused ISO final char for a charset of DIMENSION and CHARS.
1460 DIMENSION is the number of bytes to represent a character: 1 or 2.
1461 CHARS is the number of characters in a dimension: 94 or 96.
1463 This final char is for private use, thus the range is `0' (48) .. `?' (63).
1464 If there's no unused final char for the specified kind of charset,
1467 Lisp_Object dimension
, chars
;
1471 CHECK_NUMBER (dimension
);
1472 CHECK_NUMBER (chars
);
1473 if (XINT (dimension
) != 1 && XINT (dimension
) != 2 && XINT (dimension
) != 3)
1474 args_out_of_range_3 (dimension
, make_number (1), make_number (3));
1475 if (XINT (chars
) != 94 && XINT (chars
) != 96)
1476 args_out_of_range_3 (chars
, make_number (94), make_number (96));
1477 for (final_char
= '0'; final_char
<= '?'; final_char
++)
1478 if (ISO_CHARSET_TABLE (XINT (dimension
), XINT (chars
), final_char
) < 0)
1480 return (final_char
<= '?' ? make_number (final_char
) : Qnil
);
1484 check_iso_charset_parameter (dimension
, chars
, final_char
)
1485 Lisp_Object dimension
, chars
, final_char
;
1487 CHECK_NATNUM (dimension
);
1488 CHECK_NATNUM (chars
);
1489 CHECK_NATNUM (final_char
);
1491 if (XINT (dimension
) > 3)
1492 error ("Invalid DIMENSION %d, it should be 1, 2, or 3", XINT (dimension
));
1493 if (XINT (chars
) != 94 && XINT (chars
) != 96)
1494 error ("Invalid CHARS %d, it should be 94 or 96", XINT (chars
));
1495 if (XINT (final_char
) < '0' || XINT (final_char
) > '~')
1496 error ("Invalid FINAL-CHAR %c, it should be `0'..`~'", XINT (chars
));
1500 DEFUN ("declare-equiv-charset", Fdeclare_equiv_charset
, Sdeclare_equiv_charset
,
1502 doc
: /* Declare an equivalent charset for ISO-2022 decoding.
1504 On decoding by an ISO-2022 base coding system, when a charset
1505 specified by DIMENSION, CHARS, and FINAL-CHAR is designated, behave as
1506 if CHARSET is designated instead. */)
1507 (dimension
, chars
, final_char
, charset
)
1508 Lisp_Object dimension
, chars
, final_char
, charset
;
1513 CHECK_CHARSET_GET_ID (charset
, id
);
1514 check_iso_charset_parameter (dimension
, chars
, final_char
);
1515 chars_flag
= XINT (chars
) == 96;
1516 ISO_CHARSET_TABLE (XINT (dimension
), chars_flag
, XINT (final_char
)) = id
;
1521 /* Return information about charsets in the text at PTR of NBYTES
1522 bytes, which are NCHARS characters. The value is:
1524 0: Each character is represented by one byte. This is always
1525 true for a unibyte string. For a multibyte string, true if
1526 it contains only ASCII characters.
1528 1: No charsets other than ascii, control-1, and latin-1 are
1535 string_xstring_p (string
)
1538 const unsigned char *p
= SDATA (string
);
1539 const unsigned char *endp
= p
+ SBYTES (string
);
1541 if (SCHARS (string
) == SBYTES (string
))
1546 int c
= STRING_CHAR_ADVANCE (p
);
1555 /* Find charsets in the string at PTR of NCHARS and NBYTES.
1557 CHARSETS is a vector. If Nth element is non-nil, it means the
1558 charset whose id is N is already found.
1560 It may lookup a translation table TABLE if supplied. */
1563 find_charsets_in_text (ptr
, nchars
, nbytes
, charsets
, table
, multibyte
)
1564 const unsigned char *ptr
;
1565 EMACS_INT nchars
, nbytes
;
1566 Lisp_Object charsets
, table
;
1569 const unsigned char *pend
= ptr
+ nbytes
;
1571 if (nchars
== nbytes
)
1574 ASET (charsets
, charset_ascii
, Qt
);
1581 c
= translate_char (table
, c
);
1582 if (ASCII_BYTE_P (c
))
1583 ASET (charsets
, charset_ascii
, Qt
);
1585 ASET (charsets
, charset_eight_bit
, Qt
);
1592 int c
= STRING_CHAR_ADVANCE (ptr
);
1593 struct charset
*charset
;
1596 c
= translate_char (table
, c
);
1597 charset
= CHAR_CHARSET (c
);
1598 ASET (charsets
, CHARSET_ID (charset
), Qt
);
1603 DEFUN ("find-charset-region", Ffind_charset_region
, Sfind_charset_region
,
1605 doc
: /* Return a list of charsets in the region between BEG and END.
1606 BEG and END are buffer positions.
1607 Optional arg TABLE if non-nil is a translation table to look up.
1609 If the current buffer is unibyte, the returned list may contain
1610 only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
1612 Lisp_Object beg
, end
, table
;
1614 Lisp_Object charsets
;
1615 EMACS_INT from
, from_byte
, to
, stop
, stop_byte
;
1618 int multibyte
= ! NILP (current_buffer
->enable_multibyte_characters
);
1620 validate_region (&beg
, &end
);
1621 from
= XFASTINT (beg
);
1622 stop
= to
= XFASTINT (end
);
1624 if (from
< GPT
&& GPT
< to
)
1627 stop_byte
= GPT_BYTE
;
1630 stop_byte
= CHAR_TO_BYTE (stop
);
1632 from_byte
= CHAR_TO_BYTE (from
);
1634 charsets
= Fmake_vector (make_number (charset_table_used
), Qnil
);
1637 find_charsets_in_text (BYTE_POS_ADDR (from_byte
), stop
- from
,
1638 stop_byte
- from_byte
, charsets
, table
,
1642 from
= stop
, from_byte
= stop_byte
;
1643 stop
= to
, stop_byte
= CHAR_TO_BYTE (stop
);
1650 for (i
= charset_table_used
- 1; i
>= 0; i
--)
1651 if (!NILP (AREF (charsets
, i
)))
1652 val
= Fcons (CHARSET_NAME (charset_table
+ i
), val
);
1656 DEFUN ("find-charset-string", Ffind_charset_string
, Sfind_charset_string
,
1658 doc
: /* Return a list of charsets in STR.
1659 Optional arg TABLE if non-nil is a translation table to look up.
1661 If STR is unibyte, the returned list may contain
1662 only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
1664 Lisp_Object str
, table
;
1666 Lisp_Object charsets
;
1672 charsets
= Fmake_vector (make_number (charset_table_used
), Qnil
);
1673 find_charsets_in_text (SDATA (str
), SCHARS (str
), SBYTES (str
),
1675 STRING_MULTIBYTE (str
));
1677 for (i
= charset_table_used
- 1; i
>= 0; i
--)
1678 if (!NILP (AREF (charsets
, i
)))
1679 val
= Fcons (CHARSET_NAME (charset_table
+ i
), val
);
1685 /* Return a unified character code for C (>= 0x110000). VAL is a
1686 value of Vchar_unify_table for C; i.e. it is nil, an integer, or a
1689 maybe_unify_char (c
, val
)
1693 struct charset
*charset
;
1700 CHECK_CHARSET_GET_CHARSET (val
, charset
);
1701 load_charset (charset
, 1);
1702 if (! inhibit_load_charset_map
)
1704 val
= CHAR_TABLE_REF (Vchar_unify_table
, c
);
1710 int code_index
= c
- CHARSET_CODE_OFFSET (charset
);
1711 int unified
= GET_TEMP_CHARSET_WORK_DECODER (code_index
);
1720 /* Return a character correponding to the code-point CODE of
1724 decode_char (charset
, code
)
1725 struct charset
*charset
;
1729 enum charset_method method
= CHARSET_METHOD (charset
);
1731 if (code
< CHARSET_MIN_CODE (charset
) || code
> CHARSET_MAX_CODE (charset
))
1734 if (method
== CHARSET_METHOD_SUBSET
)
1736 Lisp_Object subset_info
;
1738 subset_info
= CHARSET_SUBSET (charset
);
1739 charset
= CHARSET_FROM_ID (XFASTINT (AREF (subset_info
, 0)));
1740 code
-= XINT (AREF (subset_info
, 3));
1741 if (code
< XFASTINT (AREF (subset_info
, 1))
1742 || code
> XFASTINT (AREF (subset_info
, 2)))
1745 c
= DECODE_CHAR (charset
, code
);
1747 else if (method
== CHARSET_METHOD_SUPERSET
)
1749 Lisp_Object parents
;
1751 parents
= CHARSET_SUPERSET (charset
);
1753 for (; CONSP (parents
); parents
= XCDR (parents
))
1755 int id
= XINT (XCAR (XCAR (parents
)));
1756 int code_offset
= XINT (XCDR (XCAR (parents
)));
1757 unsigned this_code
= code
- code_offset
;
1759 charset
= CHARSET_FROM_ID (id
);
1760 if ((c
= DECODE_CHAR (charset
, this_code
)) >= 0)
1766 char_index
= CODE_POINT_TO_INDEX (charset
, code
);
1770 if (method
== CHARSET_METHOD_MAP
)
1772 Lisp_Object decoder
;
1774 decoder
= CHARSET_DECODER (charset
);
1775 if (! VECTORP (decoder
))
1777 load_charset (charset
, 1);
1778 decoder
= CHARSET_DECODER (charset
);
1780 if (VECTORP (decoder
))
1781 c
= XINT (AREF (decoder
, char_index
));
1783 c
= GET_TEMP_CHARSET_WORK_DECODER (char_index
);
1785 else /* method == CHARSET_METHOD_OFFSET */
1787 c
= char_index
+ CHARSET_CODE_OFFSET (charset
);
1788 if (CHARSET_UNIFIED_P (charset
)
1789 && c
> MAX_UNICODE_CHAR
)
1790 MAYBE_UNIFY_CHAR (c
);
1797 /* Variable used temporarily by the macro ENCODE_CHAR. */
1798 Lisp_Object charset_work
;
1800 /* Return a code-point of CHAR in CHARSET. If CHAR doesn't belong to
1801 CHARSET, return CHARSET_INVALID_CODE (CHARSET). If STRICT is true,
1802 use CHARSET's strict_max_char instead of max_char. */
1805 encode_char (charset
, c
)
1806 struct charset
*charset
;
1810 enum charset_method method
= CHARSET_METHOD (charset
);
1812 if (CHARSET_UNIFIED_P (charset
))
1814 Lisp_Object deunifier
;
1815 int code_index
= -1;
1817 deunifier
= CHARSET_DEUNIFIER (charset
);
1818 if (! CHAR_TABLE_P (deunifier
))
1820 load_charset (charset
, 2);
1821 deunifier
= CHARSET_DEUNIFIER (charset
);
1823 if (CHAR_TABLE_P (deunifier
))
1825 Lisp_Object deunified
= CHAR_TABLE_REF (deunifier
, c
);
1827 if (INTEGERP (deunified
))
1828 code_index
= XINT (deunified
);
1832 code_index
= GET_TEMP_CHARSET_WORK_ENCODER (c
);
1834 if (code_index
>= 0)
1835 c
= CHARSET_CODE_OFFSET (charset
) + code_index
;
1838 if (method
== CHARSET_METHOD_SUBSET
)
1840 Lisp_Object subset_info
;
1841 struct charset
*this_charset
;
1843 subset_info
= CHARSET_SUBSET (charset
);
1844 this_charset
= CHARSET_FROM_ID (XFASTINT (AREF (subset_info
, 0)));
1845 code
= ENCODE_CHAR (this_charset
, c
);
1846 if (code
== CHARSET_INVALID_CODE (this_charset
)
1847 || code
< XFASTINT (AREF (subset_info
, 1))
1848 || code
> XFASTINT (AREF (subset_info
, 2)))
1849 return CHARSET_INVALID_CODE (charset
);
1850 code
+= XINT (AREF (subset_info
, 3));
1854 if (method
== CHARSET_METHOD_SUPERSET
)
1856 Lisp_Object parents
;
1858 parents
= CHARSET_SUPERSET (charset
);
1859 for (; CONSP (parents
); parents
= XCDR (parents
))
1861 int id
= XINT (XCAR (XCAR (parents
)));
1862 int code_offset
= XINT (XCDR (XCAR (parents
)));
1863 struct charset
*this_charset
= CHARSET_FROM_ID (id
);
1865 code
= ENCODE_CHAR (this_charset
, c
);
1866 if (code
!= CHARSET_INVALID_CODE (this_charset
))
1867 return code
+ code_offset
;
1869 return CHARSET_INVALID_CODE (charset
);
1872 if (! CHARSET_FAST_MAP_REF ((c
), charset
->fast_map
)
1873 || c
< CHARSET_MIN_CHAR (charset
) || c
> CHARSET_MAX_CHAR (charset
))
1874 return CHARSET_INVALID_CODE (charset
);
1876 if (method
== CHARSET_METHOD_MAP
)
1878 Lisp_Object encoder
;
1881 encoder
= CHARSET_ENCODER (charset
);
1882 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset
)))
1884 load_charset (charset
, 2);
1885 encoder
= CHARSET_ENCODER (charset
);
1887 if (CHAR_TABLE_P (encoder
))
1889 val
= CHAR_TABLE_REF (encoder
, c
);
1891 return CHARSET_INVALID_CODE (charset
);
1893 if (! CHARSET_COMPACT_CODES_P (charset
))
1894 code
= INDEX_TO_CODE_POINT (charset
, code
);
1898 code
= GET_TEMP_CHARSET_WORK_ENCODER (c
);
1899 code
= INDEX_TO_CODE_POINT (charset
, code
);
1902 else /* method == CHARSET_METHOD_OFFSET */
1904 int code_index
= c
- CHARSET_CODE_OFFSET (charset
);
1906 code
= INDEX_TO_CODE_POINT (charset
, code_index
);
1913 DEFUN ("decode-char", Fdecode_char
, Sdecode_char
, 2, 3, 0,
1914 doc
: /* Decode the pair of CHARSET and CODE-POINT into a character.
1915 Return nil if CODE-POINT is not valid in CHARSET.
1917 CODE-POINT may be a cons (HIGHER-16-BIT-VALUE . LOWER-16-BIT-VALUE).
1919 Optional argument RESTRICTION specifies a way to map the pair of CCS
1920 and CODE-POINT to a character. Currently not supported and just ignored. */)
1921 (charset
, code_point
, restriction
)
1922 Lisp_Object charset
, code_point
, restriction
;
1926 struct charset
*charsetp
;
1928 CHECK_CHARSET_GET_ID (charset
, id
);
1929 if (CONSP (code_point
))
1931 CHECK_NATNUM_CAR (code_point
);
1932 CHECK_NATNUM_CDR (code_point
);
1933 code
= (XINT (XCAR (code_point
)) << 16) | (XINT (XCDR (code_point
)));
1937 CHECK_NATNUM (code_point
);
1938 code
= XINT (code_point
);
1940 charsetp
= CHARSET_FROM_ID (id
);
1941 c
= DECODE_CHAR (charsetp
, code
);
1942 return (c
>= 0 ? make_number (c
) : Qnil
);
1946 DEFUN ("encode-char", Fencode_char
, Sencode_char
, 2, 3, 0,
1947 doc
: /* Encode the character CH into a code-point of CHARSET.
1948 Return nil if CHARSET doesn't include CH.
1950 Optional argument RESTRICTION specifies a way to map CH to a
1951 code-point in CCS. Currently not supported and just ignored. */)
1952 (ch
, charset
, restriction
)
1953 Lisp_Object ch
, charset
, restriction
;
1957 struct charset
*charsetp
;
1959 CHECK_CHARSET_GET_ID (charset
, id
);
1961 charsetp
= CHARSET_FROM_ID (id
);
1962 code
= ENCODE_CHAR (charsetp
, XINT (ch
));
1963 if (code
== CHARSET_INVALID_CODE (charsetp
))
1965 if (code
> 0x7FFFFFF)
1966 return Fcons (make_number (code
>> 16), make_number (code
& 0xFFFF));
1967 return make_number (code
);
1971 DEFUN ("make-char", Fmake_char
, Smake_char
, 1, 5, 0,
1973 /* Return a character of CHARSET whose position codes are CODEn.
1975 CODE1 through CODE4 are optional, but if you don't supply sufficient
1976 position codes, it is assumed that the minimum code in each dimension
1978 (charset
, code1
, code2
, code3
, code4
)
1979 Lisp_Object charset
, code1
, code2
, code3
, code4
;
1982 struct charset
*charsetp
;
1986 CHECK_CHARSET_GET_ID (charset
, id
);
1987 charsetp
= CHARSET_FROM_ID (id
);
1989 dimension
= CHARSET_DIMENSION (charsetp
);
1991 code
= (CHARSET_ASCII_COMPATIBLE_P (charsetp
)
1992 ? 0 : CHARSET_MIN_CODE (charsetp
));
1995 CHECK_NATNUM (code1
);
1996 if (XFASTINT (code1
) >= 0x100)
1997 args_out_of_range (make_number (0xFF), code1
);
1998 code
= XFASTINT (code1
);
2004 code
|= charsetp
->code_space
[(dimension
- 2) * 4];
2007 CHECK_NATNUM (code2
);
2008 if (XFASTINT (code2
) >= 0x100)
2009 args_out_of_range (make_number (0xFF), code2
);
2010 code
|= XFASTINT (code2
);
2017 code
|= charsetp
->code_space
[(dimension
- 3) * 4];
2020 CHECK_NATNUM (code3
);
2021 if (XFASTINT (code3
) >= 0x100)
2022 args_out_of_range (make_number (0xFF), code3
);
2023 code
|= XFASTINT (code3
);
2030 code
|= charsetp
->code_space
[0];
2033 CHECK_NATNUM (code4
);
2034 if (XFASTINT (code4
) >= 0x100)
2035 args_out_of_range (make_number (0xFF), code4
);
2036 code
|= XFASTINT (code4
);
2043 if (CHARSET_ISO_FINAL (charsetp
) >= 0)
2045 c
= DECODE_CHAR (charsetp
, code
);
2047 error ("Invalid code(s)");
2048 return make_number (c
);
2052 /* Return the first charset in CHARSET_LIST that contains C.
2053 CHARSET_LIST is a list of charset IDs. If it is nil, use
2054 Vcharset_ordered_list. */
2057 char_charset (c
, charset_list
, code_return
)
2059 Lisp_Object charset_list
;
2060 unsigned *code_return
;
2064 if (NILP (charset_list
))
2065 charset_list
= Vcharset_ordered_list
;
2069 while (CONSP (charset_list
))
2071 struct charset
*charset
= CHARSET_FROM_ID (XINT (XCAR (charset_list
)));
2072 unsigned code
= ENCODE_CHAR (charset
, c
);
2074 if (code
!= CHARSET_INVALID_CODE (charset
))
2077 *code_return
= code
;
2080 charset_list
= XCDR (charset_list
);
2081 if (c
<= MAX_UNICODE_CHAR
2082 && EQ (charset_list
, Vcharset_non_preferred_head
))
2083 return CHARSET_FROM_ID (charset_unicode
);
2085 return (maybe_null
? NULL
2086 : c
<= MAX_5_BYTE_CHAR
? CHARSET_FROM_ID (charset_emacs
)
2087 : CHARSET_FROM_ID (charset_eight_bit
));
2091 DEFUN ("split-char", Fsplit_char
, Ssplit_char
, 1, 1, 0,
2093 /*Return list of charset and one to four position-codes of CH.
2094 The charset is decided by the current priority order of charsets.
2095 A position-code is a byte value of each dimension of the code-point of
2096 CH in the charset. */)
2100 struct charset
*charset
;
2105 CHECK_CHARACTER (ch
);
2107 charset
= CHAR_CHARSET (c
);
2110 code
= ENCODE_CHAR (charset
, c
);
2111 if (code
== CHARSET_INVALID_CODE (charset
))
2113 dimension
= CHARSET_DIMENSION (charset
);
2114 for (val
= Qnil
; dimension
> 0; dimension
--)
2116 val
= Fcons (make_number (code
& 0xFF), val
);
2119 return Fcons (CHARSET_NAME (charset
), val
);
2123 DEFUN ("char-charset", Fchar_charset
, Schar_charset
, 1, 2, 0,
2124 doc
: /* Return the charset of highest priority that contains CH.
2125 If optional 2nd arg RESTRICTION is non-nil, it is a list of charsets
2126 from which to find the charset. It may also be a coding system. In
2127 that case, find the charset from what supported by that coding system. */)
2129 Lisp_Object ch
, restriction
;
2131 struct charset
*charset
;
2133 CHECK_CHARACTER (ch
);
2134 if (NILP (restriction
))
2135 charset
= CHAR_CHARSET (XINT (ch
));
2138 Lisp_Object charset_list
;
2140 if (CONSP (restriction
))
2142 for (charset_list
= Qnil
; CONSP (restriction
);
2143 restriction
= XCDR (restriction
))
2147 CHECK_CHARSET_GET_ID (XCAR (restriction
), id
);
2148 charset_list
= Fcons (make_number (id
), charset_list
);
2150 charset_list
= Fnreverse (charset_list
);
2153 charset_list
= coding_system_charset_list (restriction
);
2154 charset
= char_charset (XINT (ch
), charset_list
, NULL
);
2158 return (CHARSET_NAME (charset
));
2162 DEFUN ("charset-after", Fcharset_after
, Scharset_after
, 0, 1, 0,
2164 Return charset of a character in the current buffer at position POS.
2165 If POS is nil, it defauls to the current point.
2166 If POS is out of range, the value is nil. */)
2171 struct charset
*charset
;
2173 ch
= Fchar_after (pos
);
2174 if (! INTEGERP (ch
))
2176 charset
= CHAR_CHARSET (XINT (ch
));
2177 return (CHARSET_NAME (charset
));
2181 DEFUN ("iso-charset", Fiso_charset
, Siso_charset
, 3, 3, 0,
2183 Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR.
2185 ISO 2022's designation sequence (escape sequence) distinguishes charsets
2186 by their DIMENSION, CHARS, and FINAL-CHAR,
2187 whereas Emacs distinguishes them by charset symbol.
2188 See the documentation of the function `charset-info' for the meanings of
2189 DIMENSION, CHARS, and FINAL-CHAR. */)
2190 (dimension
, chars
, final_char
)
2191 Lisp_Object dimension
, chars
, final_char
;
2196 check_iso_charset_parameter (dimension
, chars
, final_char
);
2197 chars_flag
= XFASTINT (chars
) == 96;
2198 id
= ISO_CHARSET_TABLE (XFASTINT (dimension
), chars_flag
,
2199 XFASTINT (final_char
));
2200 return (id
>= 0 ? CHARSET_NAME (CHARSET_FROM_ID (id
)) : Qnil
);
2204 DEFUN ("clear-charset-maps", Fclear_charset_maps
, Sclear_charset_maps
,
2208 Clear temporary charset mapping tables.
2209 It should be called only from temacs invoked for dumping. */)
2212 if (temp_charset_work
)
2214 free (temp_charset_work
);
2215 temp_charset_work
= NULL
;
2218 if (CHAR_TABLE_P (Vchar_unify_table
))
2219 Foptimize_char_table (Vchar_unify_table
, Qnil
);
2224 DEFUN ("charset-priority-list", Fcharset_priority_list
,
2225 Scharset_priority_list
, 0, 1, 0,
2226 doc
: /* Return the list of charsets ordered by priority.
2227 HIGHESTP non-nil means just return the highest priority one. */)
2229 Lisp_Object highestp
;
2231 Lisp_Object val
= Qnil
, list
= Vcharset_ordered_list
;
2233 if (!NILP (highestp
))
2234 return CHARSET_NAME (CHARSET_FROM_ID (XINT (Fcar (list
))));
2236 while (!NILP (list
))
2238 val
= Fcons (CHARSET_NAME (CHARSET_FROM_ID (XINT (XCAR (list
)))), val
);
2241 return Fnreverse (val
);
2244 DEFUN ("set-charset-priority", Fset_charset_priority
, Sset_charset_priority
,
2246 doc
: /* Assign higher priority to the charsets given as arguments.
2247 usage: (set-charset-priority &rest charsets) */)
2252 Lisp_Object new_head
, old_list
, arglist
[2];
2253 Lisp_Object list_2022
, list_emacs_mule
;
2256 old_list
= Fcopy_sequence (Vcharset_ordered_list
);
2258 for (i
= 0; i
< nargs
; i
++)
2260 CHECK_CHARSET_GET_ID (args
[i
], id
);
2261 if (! NILP (Fmemq (make_number (id
), old_list
)))
2263 old_list
= Fdelq (make_number (id
), old_list
);
2264 new_head
= Fcons (make_number (id
), new_head
);
2267 arglist
[0] = Fnreverse (new_head
);
2268 arglist
[1] = Vcharset_non_preferred_head
= old_list
;
2269 Vcharset_ordered_list
= Fnconc (2, arglist
);
2270 charset_ordered_list_tick
++;
2272 charset_unibyte
= -1;
2273 for (old_list
= Vcharset_ordered_list
, list_2022
= list_emacs_mule
= Qnil
;
2274 CONSP (old_list
); old_list
= XCDR (old_list
))
2276 if (! NILP (Fmemq (XCAR (old_list
), Viso_2022_charset_list
)))
2277 list_2022
= Fcons (XCAR (old_list
), list_2022
);
2278 if (! NILP (Fmemq (XCAR (old_list
), Vemacs_mule_charset_list
)))
2279 list_emacs_mule
= Fcons (XCAR (old_list
), list_emacs_mule
);
2280 if (charset_unibyte
< 0)
2282 struct charset
*charset
= CHARSET_FROM_ID (XINT (XCAR (old_list
)));
2284 if (CHARSET_DIMENSION (charset
) == 1
2285 && CHARSET_ASCII_COMPATIBLE_P (charset
)
2286 && CHARSET_MAX_CHAR (charset
) >= 0x80)
2287 charset_unibyte
= CHARSET_ID (charset
);
2290 Viso_2022_charset_list
= Fnreverse (list_2022
);
2291 Vemacs_mule_charset_list
= Fnreverse (list_emacs_mule
);
2292 if (charset_unibyte
< 0)
2293 charset_unibyte
= charset_iso_8859_1
;
2298 DEFUN ("charset-id-internal", Fcharset_id_internal
, Scharset_id_internal
,
2300 doc
: /* Internal use only.
2301 Return charset identification number of CHARSET. */)
2303 Lisp_Object charset
;
2307 CHECK_CHARSET_GET_ID (charset
, id
);
2308 return make_number (id
);
2315 Lisp_Object tempdir
;
2316 tempdir
= Fexpand_file_name (build_string ("charsets"), Vdata_directory
);
2317 if (access ((char *) SDATA (tempdir
), 0) < 0)
2319 dir_warning ("Error: charsets directory (%s) does not exist.\n\
2320 Emacs will not function correctly without the character map files.\n\
2321 Please check your installation!\n",
2323 /* TODO should this be a fatal error? (Bug#909) */
2326 Vcharset_map_path
= Fcons (tempdir
, Qnil
);
2331 init_charset_once ()
2335 for (i
= 0; i
< ISO_MAX_DIMENSION
; i
++)
2336 for (j
= 0; j
< ISO_MAX_CHARS
; j
++)
2337 for (k
= 0; k
< ISO_MAX_FINAL
; k
++)
2338 iso_charset_table
[i
][j
][k
] = -1;
2340 for (i
= 0; i
< 256; i
++)
2341 emacs_mule_charset
[i
] = NULL
;
2343 charset_jisx0201_roman
= -1;
2344 charset_jisx0208_1978
= -1;
2345 charset_jisx0208
= -1;
2346 charset_ksc5601
= -1;
2354 DEFSYM (Qcharsetp
, "charsetp");
2356 DEFSYM (Qascii
, "ascii");
2357 DEFSYM (Qunicode
, "unicode");
2358 DEFSYM (Qemacs
, "emacs");
2359 DEFSYM (Qeight_bit
, "eight-bit");
2360 DEFSYM (Qiso_8859_1
, "iso-8859-1");
2365 staticpro (&Vcharset_ordered_list
);
2366 Vcharset_ordered_list
= Qnil
;
2368 staticpro (&Viso_2022_charset_list
);
2369 Viso_2022_charset_list
= Qnil
;
2371 staticpro (&Vemacs_mule_charset_list
);
2372 Vemacs_mule_charset_list
= Qnil
;
2374 /* Don't staticpro them here. It's done in syms_of_fns. */
2375 QCtest
= intern (":test");
2376 Qeq
= intern ("eq");
2378 staticpro (&Vcharset_hash_table
);
2380 Lisp_Object args
[2];
2383 Vcharset_hash_table
= Fmake_hash_table (2, args
);
2386 charset_table_size
= 128;
2387 charset_table
= ((struct charset
*)
2388 xmalloc (sizeof (struct charset
) * charset_table_size
));
2389 charset_table_used
= 0;
2391 defsubr (&Scharsetp
);
2392 defsubr (&Smap_charset_chars
);
2393 defsubr (&Sdefine_charset_internal
);
2394 defsubr (&Sdefine_charset_alias
);
2395 defsubr (&Scharset_plist
);
2396 defsubr (&Sset_charset_plist
);
2397 defsubr (&Sunify_charset
);
2398 defsubr (&Sget_unused_iso_final_char
);
2399 defsubr (&Sdeclare_equiv_charset
);
2400 defsubr (&Sfind_charset_region
);
2401 defsubr (&Sfind_charset_string
);
2402 defsubr (&Sdecode_char
);
2403 defsubr (&Sencode_char
);
2404 defsubr (&Ssplit_char
);
2405 defsubr (&Smake_char
);
2406 defsubr (&Schar_charset
);
2407 defsubr (&Scharset_after
);
2408 defsubr (&Siso_charset
);
2409 defsubr (&Sclear_charset_maps
);
2410 defsubr (&Scharset_priority_list
);
2411 defsubr (&Sset_charset_priority
);
2412 defsubr (&Scharset_id_internal
);
2414 DEFVAR_LISP ("charset-map-path", &Vcharset_map_path
,
2415 doc
: /* *List of directories to search for charset map files. */);
2416 Vcharset_map_path
= Qnil
;
2418 DEFVAR_BOOL ("inhibit-load-charset-map", &inhibit_load_charset_map
,
2419 doc
: /* Inhibit loading of charset maps. Used when dumping Emacs. */);
2420 inhibit_load_charset_map
= 0;
2422 DEFVAR_LISP ("charset-list", &Vcharset_list
,
2423 doc
: /* List of all charsets ever defined. */);
2424 Vcharset_list
= Qnil
;
2426 DEFVAR_LISP ("current-iso639-language", &Vcurrent_iso639_language
,
2427 doc
: /* ISO639 language mnemonic symbol for the current language environment.
2428 If the current language environment is for multiple languages (e.g. "Latin-1"),
2429 the value may be a list of mnemonics. */);
2430 Vcurrent_iso639_language
= Qnil
;
2433 = define_charset_internal (Qascii
, 1, "\x00\x7F\x00\x00\x00\x00",
2434 0, 127, 'B', -1, 0, 1, 0, 0);
2436 = define_charset_internal (Qiso_8859_1
, 1, "\x00\xFF\x00\x00\x00\x00",
2437 0, 255, -1, -1, -1, 1, 0, 0);
2439 = define_charset_internal (Qunicode
, 3, "\x00\xFF\x00\xFF\x00\x10",
2440 0, MAX_UNICODE_CHAR
, -1, 0, -1, 1, 0, 0);
2442 = define_charset_internal (Qemacs
, 3, "\x00\xFF\x00\xFF\x00\x3F",
2443 0, MAX_5_BYTE_CHAR
, -1, 0, -1, 1, 1, 0);
2445 = define_charset_internal (Qeight_bit
, 1, "\x80\xFF\x00\x00\x00\x00",
2446 128, 255, -1, 0, -1, 0, 1,
2447 MAX_5_BYTE_CHAR
+ 1);
2448 charset_unibyte
= charset_iso_8859_1
;
2453 /* arch-tag: 66a89b8d-4c28-47d3-9ca1-56f78440d69f
2454 (do not change this comment) */