1 /* Basic character set support.
3 Copyright (C) 2001-2014 Free Software Foundation, Inc.
5 Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
6 2005, 2006, 2007, 2008, 2009, 2010, 2011
7 National Institute of Advanced Industrial Science and Technology (AIST)
8 Registration Number H14PRO021
10 Copyright (C) 2003, 2004
11 National Institute of Advanced Industrial Science and Technology (AIST)
12 Registration Number H13PRO009
14 This file is part of GNU Emacs.
16 GNU Emacs is free software: you can redistribute it and/or modify
17 it under the terms of the GNU General Public License as published by
18 the Free Software Foundation, either version 3 of the License, or
19 (at your option) any later version.
21 GNU Emacs is distributed in the hope that it will be useful,
22 but WITHOUT ANY WARRANTY; without even the implied warranty of
23 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24 GNU General Public License for more details.
26 You should have received a copy of the GNU General Public License
27 along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
35 #include <sys/types.h>
38 #include "character.h"
44 /*** GENERAL NOTES on CODED CHARACTER SETS (CHARSETS) ***
46 A coded character set ("charset" hereafter) is a meaningful
47 collection (i.e. language, culture, functionality, etc.) of
48 characters. Emacs handles multiple charsets at once. In Emacs Lisp
49 code, a charset is represented by a symbol. In C code, a charset is
50 represented by its ID number or by a pointer to a struct charset.
52 The actual information about each charset is stored in two places.
53 Lispy information is stored in the hash table Vcharset_hash_table as
54 a vector (charset attributes). The other information is stored in
55 charset_table as a struct charset.
59 /* Hash table that contains attributes of each charset. Keys are
60 charset symbols, and values are vectors of charset attributes. */
61 Lisp_Object Vcharset_hash_table
;
63 /* Table of struct charset. */
64 struct charset
*charset_table
;
66 static ptrdiff_t charset_table_size
;
67 static int charset_table_used
;
69 Lisp_Object Qcharsetp
;
71 /* Special charset symbols. */
73 static Lisp_Object Qeight_bit
;
74 static Lisp_Object Qiso_8859_1
;
75 static Lisp_Object Qunicode
;
76 static Lisp_Object Qemacs
;
78 /* The corresponding charsets. */
80 int charset_eight_bit
;
81 static int charset_iso_8859_1
;
83 static int charset_emacs
;
85 /* The other special charsets. */
86 int charset_jisx0201_roman
;
87 int charset_jisx0208_1978
;
91 /* Value of charset attribute `charset-iso-plane'. */
92 static Lisp_Object Qgl
, Qgr
;
94 /* Charset of unibyte characters. */
97 /* List of charsets ordered by the priority. */
98 Lisp_Object Vcharset_ordered_list
;
100 /* Sub-list of Vcharset_ordered_list that contains all non-preferred
102 Lisp_Object Vcharset_non_preferred_head
;
104 /* Incremented everytime we change Vcharset_ordered_list. This is
105 unsigned short so that it fits in Lisp_Int and never matches
107 unsigned short charset_ordered_list_tick
;
109 /* List of iso-2022 charsets. */
110 Lisp_Object Viso_2022_charset_list
;
112 /* List of emacs-mule charsets. */
113 Lisp_Object Vemacs_mule_charset_list
;
115 int emacs_mule_charset
[256];
117 /* Mapping table from ISO2022's charset (specified by DIMENSION,
118 CHARS, and FINAL-CHAR) to Emacs' charset. */
119 int iso_charset_table
[ISO_MAX_DIMENSION
][ISO_MAX_CHARS
][ISO_MAX_FINAL
];
121 #define CODE_POINT_TO_INDEX(charset, code) \
122 ((charset)->code_linear_p \
123 ? (int) ((code) - (charset)->min_code) \
124 : (((charset)->code_space_mask[(code) >> 24] & 0x8) \
125 && ((charset)->code_space_mask[((code) >> 16) & 0xFF] & 0x4) \
126 && ((charset)->code_space_mask[((code) >> 8) & 0xFF] & 0x2) \
127 && ((charset)->code_space_mask[(code) & 0xFF] & 0x1)) \
128 ? (int) (((((code) >> 24) - (charset)->code_space[12]) \
129 * (charset)->code_space[11]) \
130 + (((((code) >> 16) & 0xFF) - (charset)->code_space[8]) \
131 * (charset)->code_space[7]) \
132 + (((((code) >> 8) & 0xFF) - (charset)->code_space[4]) \
133 * (charset)->code_space[3]) \
134 + (((code) & 0xFF) - (charset)->code_space[0]) \
135 - ((charset)->char_index_offset)) \
139 /* Return the code-point for the character index IDX in CHARSET.
140 IDX should be an unsigned int variable in a valid range (which is
141 always in nonnegative int range too). IDX contains garbage afterwards. */
143 #define INDEX_TO_CODE_POINT(charset, idx) \
144 ((charset)->code_linear_p \
145 ? (idx) + (charset)->min_code \
146 : (idx += (charset)->char_index_offset, \
147 (((charset)->code_space[0] + (idx) % (charset)->code_space[2]) \
148 | (((charset)->code_space[4] \
149 + ((idx) / (charset)->code_space[3] % (charset)->code_space[6])) \
151 | (((charset)->code_space[8] \
152 + ((idx) / (charset)->code_space[7] % (charset)->code_space[10])) \
154 | (((charset)->code_space[12] + ((idx) / (charset)->code_space[11])) \
157 /* Structure to hold mapping tables for a charset. Used by temacs
158 invoked for dumping. */
162 /* The current charset for which the following tables are setup. */
163 struct charset
*current
;
165 /* 1 iff the following table is used for encoder. */
168 /* When the following table is used for encoding, minimum and
169 maximum character of the current charset. */
170 int min_char
, max_char
;
172 /* A Unicode character corresponding to the code index 0 (i.e. the
173 minimum code-point) of the current charset, or -1 if the code
174 index 0 is not a Unicode character. This is checked when
175 table.encoder[CHAR] is zero. */
179 /* Table mapping code-indices (not code-points) of the current
180 charset to Unicode characters. If decoder[CHAR] is -1, CHAR
181 doesn't belong to the current charset. */
182 int decoder
[0x10000];
183 /* Table mapping Unicode characters to code-indices of the current
184 charset. The first 0x10000 elements are for BMP (0..0xFFFF),
185 and the last 0x10000 are for SMP (0x10000..0x1FFFF) or SIP
186 (0x20000..0x2FFFF). Note that there is no charset map that
187 uses both SMP and SIP. */
188 unsigned short encoder
[0x20000];
190 } *temp_charset_work
;
192 #define SET_TEMP_CHARSET_WORK_ENCODER(C, CODE) \
195 temp_charset_work->zero_index_char = (C); \
196 else if ((C) < 0x20000) \
197 temp_charset_work->table.encoder[(C)] = (CODE); \
199 temp_charset_work->table.encoder[(C) - 0x10000] = (CODE); \
202 #define GET_TEMP_CHARSET_WORK_ENCODER(C) \
203 ((C) == temp_charset_work->zero_index_char ? 0 \
204 : (C) < 0x20000 ? (temp_charset_work->table.encoder[(C)] \
205 ? (int) temp_charset_work->table.encoder[(C)] : -1) \
206 : temp_charset_work->table.encoder[(C) - 0x10000] \
207 ? temp_charset_work->table.encoder[(C) - 0x10000] : -1)
209 #define SET_TEMP_CHARSET_WORK_DECODER(C, CODE) \
210 (temp_charset_work->table.decoder[(CODE)] = (C))
212 #define GET_TEMP_CHARSET_WORK_DECODER(CODE) \
213 (temp_charset_work->table.decoder[(CODE)])
216 /* Set to 1 to warn that a charset map is loaded and thus a buffer
217 text and a string data may be relocated. */
218 bool charset_map_loaded
;
220 struct charset_map_entries
225 } *entry
; /* [0x10000] */
226 struct charset_map_entries
*next
;
229 /* Load the mapping information of CHARSET from ENTRIES for
230 initializing (CONTROL_FLAG == 0), decoding (CONTROL_FLAG == 1), and
231 encoding (CONTROL_FLAG == 2).
233 If CONTROL_FLAG is 0, setup CHARSET->min_char, CHARSET->max_char,
234 and CHARSET->fast_map.
236 If CONTROL_FLAG is 1, setup the following tables according to
237 CHARSET->method and inhibit_load_charset_map.
239 CHARSET->method | inhibit_lcm == 0 | inhibit_lcm == 1
240 ----------------------+--------------------+---------------------------
241 CHARSET_METHOD_MAP | CHARSET->decoder | temp_charset_work->decoder
242 ----------------------+--------------------+---------------------------
243 CHARSET_METHOD_OFFSET | Vchar_unify_table | temp_charset_work->decoder
245 If CONTROL_FLAG is 2, setup the following tables.
247 CHARSET->method | inhibit_lcm == 0 | inhibit_lcm == 1
248 ----------------------+--------------------+---------------------------
249 CHARSET_METHOD_MAP | CHARSET->encoder | temp_charset_work->encoder
250 ----------------------+--------------------+--------------------------
251 CHARSET_METHOD_OFFSET | CHARSET->deunifier | temp_charset_work->encoder
255 load_charset_map (struct charset
*charset
, struct charset_map_entries
*entries
, int n_entries
, int control_flag
)
257 Lisp_Object vec
IF_LINT (= Qnil
), table
IF_LINT (= Qnil
);
258 unsigned max_code
= CHARSET_MAX_CODE (charset
);
259 bool ascii_compatible_p
= charset
->ascii_compatible_p
;
260 int min_char
, max_char
, nonascii_min_char
;
262 unsigned char *fast_map
= charset
->fast_map
;
269 if (! inhibit_load_charset_map
)
271 if (control_flag
== 1)
273 if (charset
->method
== CHARSET_METHOD_MAP
)
275 int n
= CODE_POINT_TO_INDEX (charset
, max_code
) + 1;
277 vec
= Fmake_vector (make_number (n
), make_number (-1));
278 set_charset_attr (charset
, charset_decoder
, vec
);
282 char_table_set_range (Vchar_unify_table
,
283 charset
->min_char
, charset
->max_char
,
289 table
= Fmake_char_table (Qnil
, Qnil
);
290 set_charset_attr (charset
,
291 (charset
->method
== CHARSET_METHOD_MAP
292 ? charset_encoder
: charset_deunifier
),
298 if (! temp_charset_work
)
299 temp_charset_work
= xmalloc (sizeof *temp_charset_work
);
300 if (control_flag
== 1)
302 memset (temp_charset_work
->table
.decoder
, -1,
303 sizeof (int) * 0x10000);
307 memset (temp_charset_work
->table
.encoder
, 0,
308 sizeof (unsigned short) * 0x20000);
309 temp_charset_work
->zero_index_char
= -1;
311 temp_charset_work
->current
= charset
;
312 temp_charset_work
->for_encoder
= (control_flag
== 2);
315 charset_map_loaded
= 1;
318 min_char
= max_char
= entries
->entry
[0].c
;
319 nonascii_min_char
= MAX_CHAR
;
320 for (i
= 0; i
< n_entries
; i
++)
323 int from_index
, to_index
, lim_index
;
325 int idx
= i
% 0x10000;
327 if (i
> 0 && idx
== 0)
328 entries
= entries
->next
;
329 from
= entries
->entry
[idx
].from
;
330 to
= entries
->entry
[idx
].to
;
331 from_c
= entries
->entry
[idx
].c
;
332 from_index
= CODE_POINT_TO_INDEX (charset
, from
);
335 to_index
= from_index
;
340 to_index
= CODE_POINT_TO_INDEX (charset
, to
);
341 to_c
= from_c
+ (to_index
- from_index
);
343 if (from_index
< 0 || to_index
< 0)
345 lim_index
= to_index
+ 1;
349 else if (from_c
< min_char
)
352 if (control_flag
== 1)
354 if (charset
->method
== CHARSET_METHOD_MAP
)
355 for (; from_index
< lim_index
; from_index
++, from_c
++)
356 ASET (vec
, from_index
, make_number (from_c
));
358 for (; from_index
< lim_index
; from_index
++, from_c
++)
359 CHAR_TABLE_SET (Vchar_unify_table
,
360 CHARSET_CODE_OFFSET (charset
) + from_index
,
361 make_number (from_c
));
363 else if (control_flag
== 2)
365 if (charset
->method
== CHARSET_METHOD_MAP
366 && CHARSET_COMPACT_CODES_P (charset
))
367 for (; from_index
< lim_index
; from_index
++, from_c
++)
369 unsigned code
= from_index
;
370 code
= INDEX_TO_CODE_POINT (charset
, code
);
372 if (NILP (CHAR_TABLE_REF (table
, from_c
)))
373 CHAR_TABLE_SET (table
, from_c
, make_number (code
));
376 for (; from_index
< lim_index
; from_index
++, from_c
++)
378 if (NILP (CHAR_TABLE_REF (table
, from_c
)))
379 CHAR_TABLE_SET (table
, from_c
, make_number (from_index
));
382 else if (control_flag
== 3)
383 for (; from_index
< lim_index
; from_index
++, from_c
++)
384 SET_TEMP_CHARSET_WORK_DECODER (from_c
, from_index
);
385 else if (control_flag
== 4)
386 for (; from_index
< lim_index
; from_index
++, from_c
++)
387 SET_TEMP_CHARSET_WORK_ENCODER (from_c
, from_index
);
388 else /* control_flag == 0 */
390 if (ascii_compatible_p
)
392 if (! ASCII_CHAR_P (from_c
))
394 if (from_c
< nonascii_min_char
)
395 nonascii_min_char
= from_c
;
397 else if (! ASCII_CHAR_P (to_c
))
399 nonascii_min_char
= 0x80;
403 for (; from_c
<= to_c
; from_c
++)
404 CHARSET_FAST_MAP_SET (from_c
, fast_map
);
408 if (control_flag
== 0)
410 CHARSET_MIN_CHAR (charset
) = (ascii_compatible_p
411 ? nonascii_min_char
: min_char
);
412 CHARSET_MAX_CHAR (charset
) = max_char
;
414 else if (control_flag
== 4)
416 temp_charset_work
->min_char
= min_char
;
417 temp_charset_work
->max_char
= max_char
;
422 /* Read a hexadecimal number (preceded by "0x") from the file FP while
423 paying attention to comment character '#'. */
426 read_hex (FILE *fp
, bool *eof
, bool *overflow
)
431 while ((c
= getc (fp
)) != EOF
)
435 while ((c
= getc (fp
)) != EOF
&& c
!= '\n');
439 if ((c
= getc (fp
)) == EOF
|| c
== 'x')
449 while (c_isxdigit (c
= getc (fp
)))
451 if (UINT_MAX
>> 4 < n
)
454 | (c
- ('0' <= c
&& c
<= '9' ? '0'
455 : 'A' <= c
&& c
<= 'F' ? 'A' - 10
463 /* Return a mapping vector for CHARSET loaded from MAPFILE.
464 Each line of MAPFILE has this form
466 where 0xAAAA is a code-point and 0xCCCC is the corresponding
467 character code, or this form
469 where 0xAAAA and 0xBBBB are code-points specifying a range, and
470 0xCCCC is the first character code of the range.
472 The returned vector has this form:
473 [ CODE1 CHAR1 CODE2 CHAR2 .... ]
474 where CODE1 is a code-point or a cons of code-points specifying a
477 Note that this function uses `openp' to open MAPFILE but ignores
478 `file-name-handler-alist' to avoid running any Lisp code. */
481 load_charset_map_from_file (struct charset
*charset
, Lisp_Object mapfile
,
484 unsigned min_code
= CHARSET_MIN_CODE (charset
);
485 unsigned max_code
= CHARSET_MAX_CODE (charset
);
488 Lisp_Object suffixes
;
489 struct charset_map_entries
*head
, *entries
;
493 suffixes
= list2 (build_string (".map"), build_string (".TXT"));
496 record_unwind_protect_ptr (fclose_ptr_unwind
, &fp
);
499 specbind (Qfile_name_handler_alist
, Qnil
);
500 fd
= openp (Vcharset_map_path
, mapfile
, suffixes
, NULL
, Qnil
, false);
501 fp
= fd
< 0 ? 0 : fdopen (fd
, "r");
504 int open_errno
= errno
;
506 report_file_errno ("Loading charset map", mapfile
, open_errno
);
511 /* Use record, as `charset_map_entries' is large (larger than
513 head
= xmalloc (sizeof *head
);
514 head
->entry
= xmalloc_atomic (0x10000 * sizeof (*head
->entry
));
520 unsigned from
, to
, c
;
522 bool eof
= 0, overflow
= 0;
524 from
= read_hex (fp
, &eof
, &overflow
);
527 if (getc (fp
) == '-')
528 to
= read_hex (fp
, &eof
, &overflow
);
533 c
= read_hex (fp
, &eof
, &overflow
);
539 if (from
< min_code
|| to
> max_code
|| from
> to
|| c
> MAX_CHAR
)
542 if (n_entries
== 0x10000)
544 entries
->next
= xmalloc (sizeof *entries
->next
);
545 entries
->next
->entry
= xmalloc_atomic (0x10000 * (sizeof *entries
->next
));
546 entries
= entries
->next
;
550 entries
->entry
[idx
].from
= from
;
551 entries
->entry
[idx
].to
= to
;
552 entries
->entry
[idx
].c
= c
;
558 load_charset_map (charset
, head
, n_entries
, control_flag
);
563 load_charset_map_from_vector (struct charset
*charset
, Lisp_Object vec
, int control_flag
)
565 unsigned min_code
= CHARSET_MIN_CODE (charset
);
566 unsigned max_code
= CHARSET_MAX_CODE (charset
);
567 struct charset_map_entries
*head
, *entries
;
569 int len
= ASIZE (vec
);
575 add_to_log ("Failure in loading charset map: %V", vec
, Qnil
);
579 /* Use SAFE_ALLOCA instead of alloca, as `charset_map_entries' is
580 large (larger than MAX_ALLOCA). */
581 head
= SAFE_ALLOCA (sizeof *head
);
582 head
->entry
= xmalloc_atomic (0x10000 * (sizeof *head
->entry
));
584 memset (entries
, 0, sizeof (struct charset_map_entries
));
587 for (i
= 0; i
< len
; i
+= 2)
589 Lisp_Object val
, val2
;
599 from
= XFASTINT (val
);
600 to
= XFASTINT (val2
);
603 from
= to
= XFASTINT (val
);
604 val
= AREF (vec
, i
+ 1);
608 if (from
< min_code
|| to
> max_code
|| from
> to
|| c
> MAX_CHAR
)
611 if (n_entries
> 0 && (n_entries
% 0x10000) == 0)
613 entries
->next
= SAFE_ALLOCA (sizeof *entries
->next
);
614 entries
= entries
->next
;
615 memset (entries
, 0, sizeof (struct charset_map_entries
));
617 idx
= n_entries
% 0x10000;
618 entries
->entry
[idx
].from
= from
;
619 entries
->entry
[idx
].to
= to
;
620 entries
->entry
[idx
].c
= c
;
624 load_charset_map (charset
, head
, n_entries
, control_flag
);
629 /* Load a mapping table for CHARSET. CONTROL-FLAG tells what kind of
630 map it is (see the comment of load_charset_map for the detail). */
633 load_charset (struct charset
*charset
, int control_flag
)
637 if (inhibit_load_charset_map
639 && charset
== temp_charset_work
->current
640 && ((control_flag
== 2) == temp_charset_work
->for_encoder
))
643 if (CHARSET_METHOD (charset
) == CHARSET_METHOD_MAP
)
644 map
= CHARSET_MAP (charset
);
647 if (! CHARSET_UNIFIED_P (charset
))
649 map
= CHARSET_UNIFY_MAP (charset
);
652 load_charset_map_from_file (charset
, map
, control_flag
);
654 load_charset_map_from_vector (charset
, map
, control_flag
);
658 DEFUN ("charsetp", Fcharsetp
, Scharsetp
, 1, 1, 0,
659 doc
: /* Return non-nil if and only if OBJECT is a charset.*/)
662 return (CHARSETP (object
) ? Qt
: Qnil
);
667 map_charset_for_dump (void (*c_function
) (Lisp_Object
, Lisp_Object
),
668 Lisp_Object function
, Lisp_Object arg
,
669 unsigned int from
, unsigned int to
)
671 int from_idx
= CODE_POINT_TO_INDEX (temp_charset_work
->current
, from
);
672 int to_idx
= CODE_POINT_TO_INDEX (temp_charset_work
->current
, to
);
677 range
= Fcons (Qnil
, Qnil
);
680 c
= temp_charset_work
->min_char
;
681 stop
= (temp_charset_work
->max_char
< 0x20000
682 ? temp_charset_work
->max_char
: 0xFFFF);
686 int idx
= GET_TEMP_CHARSET_WORK_ENCODER (c
);
688 if (idx
>= from_idx
&& idx
<= to_idx
)
690 if (NILP (XCAR (range
)))
691 XSETCAR (range
, make_number (c
));
693 else if (! NILP (XCAR (range
)))
695 XSETCDR (range
, make_number (c
- 1));
697 (*c_function
) (arg
, range
);
699 call2 (function
, range
, arg
);
700 XSETCAR (range
, Qnil
);
704 if (c
== temp_charset_work
->max_char
)
706 if (! NILP (XCAR (range
)))
708 XSETCDR (range
, make_number (c
));
710 (*c_function
) (arg
, range
);
712 call2 (function
, range
, arg
);
717 stop
= temp_charset_work
->max_char
;
725 map_charset_chars (void (*c_function
)(Lisp_Object
, Lisp_Object
), Lisp_Object function
,
726 Lisp_Object arg
, struct charset
*charset
, unsigned from
, unsigned to
)
729 bool partial
= (from
> CHARSET_MIN_CODE (charset
)
730 || to
< CHARSET_MAX_CODE (charset
));
732 if (CHARSET_METHOD (charset
) == CHARSET_METHOD_OFFSET
)
734 int from_idx
= CODE_POINT_TO_INDEX (charset
, from
);
735 int to_idx
= CODE_POINT_TO_INDEX (charset
, to
);
736 int from_c
= from_idx
+ CHARSET_CODE_OFFSET (charset
);
737 int to_c
= to_idx
+ CHARSET_CODE_OFFSET (charset
);
739 if (CHARSET_UNIFIED_P (charset
))
741 if (! CHAR_TABLE_P (CHARSET_DEUNIFIER (charset
)))
742 load_charset (charset
, 2);
743 if (CHAR_TABLE_P (CHARSET_DEUNIFIER (charset
)))
744 map_char_table_for_charset (c_function
, function
,
745 CHARSET_DEUNIFIER (charset
), arg
,
746 partial
? charset
: NULL
, from
, to
);
748 map_charset_for_dump (c_function
, function
, arg
, from
, to
);
751 range
= Fcons (make_number (from_c
), make_number (to_c
));
753 (*c_function
) (arg
, range
);
755 call2 (function
, range
, arg
);
757 else if (CHARSET_METHOD (charset
) == CHARSET_METHOD_MAP
)
759 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset
)))
760 load_charset (charset
, 2);
761 if (CHAR_TABLE_P (CHARSET_ENCODER (charset
)))
762 map_char_table_for_charset (c_function
, function
,
763 CHARSET_ENCODER (charset
), arg
,
764 partial
? charset
: NULL
, from
, to
);
766 map_charset_for_dump (c_function
, function
, arg
, from
, to
);
768 else if (CHARSET_METHOD (charset
) == CHARSET_METHOD_SUBSET
)
770 Lisp_Object subset_info
;
773 subset_info
= CHARSET_SUBSET (charset
);
774 charset
= CHARSET_FROM_ID (XFASTINT (AREF (subset_info
, 0)));
775 offset
= XINT (AREF (subset_info
, 3));
777 if (from
< XFASTINT (AREF (subset_info
, 1)))
778 from
= XFASTINT (AREF (subset_info
, 1));
780 if (to
> XFASTINT (AREF (subset_info
, 2)))
781 to
= XFASTINT (AREF (subset_info
, 2));
782 map_charset_chars (c_function
, function
, arg
, charset
, from
, to
);
784 else /* i.e. CHARSET_METHOD_SUPERSET */
788 for (parents
= CHARSET_SUPERSET (charset
); CONSP (parents
);
789 parents
= XCDR (parents
))
792 unsigned this_from
, this_to
;
794 charset
= CHARSET_FROM_ID (XFASTINT (XCAR (XCAR (parents
))));
795 offset
= XINT (XCDR (XCAR (parents
)));
796 this_from
= from
> offset
? from
- offset
: 0;
797 this_to
= to
> offset
? to
- offset
: 0;
798 if (this_from
< CHARSET_MIN_CODE (charset
))
799 this_from
= CHARSET_MIN_CODE (charset
);
800 if (this_to
> CHARSET_MAX_CODE (charset
))
801 this_to
= CHARSET_MAX_CODE (charset
);
802 map_charset_chars (c_function
, function
, arg
, charset
,
808 DEFUN ("map-charset-chars", Fmap_charset_chars
, Smap_charset_chars
, 2, 5, 0,
809 doc
: /* Call FUNCTION for all characters in CHARSET.
810 FUNCTION is called with an argument RANGE and the optional 3rd
813 RANGE is a cons (FROM . TO), where FROM and TO indicate a range of
814 characters contained in CHARSET.
816 The optional 4th and 5th arguments FROM-CODE and TO-CODE specify the
817 range of code points (in CHARSET) of target characters. */)
818 (Lisp_Object function
, Lisp_Object charset
, Lisp_Object arg
, Lisp_Object from_code
, Lisp_Object to_code
)
823 CHECK_CHARSET_GET_CHARSET (charset
, cs
);
824 if (NILP (from_code
))
825 from
= CHARSET_MIN_CODE (cs
);
828 from
= XINT (from_code
);
829 if (from
< CHARSET_MIN_CODE (cs
))
830 from
= CHARSET_MIN_CODE (cs
);
833 to
= CHARSET_MAX_CODE (cs
);
837 if (to
> CHARSET_MAX_CODE (cs
))
838 to
= CHARSET_MAX_CODE (cs
);
840 map_charset_chars (NULL
, function
, arg
, cs
, from
, to
);
845 /* Define a charset according to the arguments. The Nth argument is
846 the Nth attribute of the charset (the last attribute `charset-id'
847 is not included). See the docstring of `define-charset' for the
850 DEFUN ("define-charset-internal", Fdefine_charset_internal
,
851 Sdefine_charset_internal
, charset_arg_max
, MANY
, 0,
852 doc
: /* For internal use only.
853 usage: (define-charset-internal ...) */)
854 (ptrdiff_t nargs
, Lisp_Object
*args
)
856 /* Charset attr vector. */
859 EMACS_UINT hash_code
;
860 struct Lisp_Hash_Table
*hash_table
= XHASH_TABLE (Vcharset_hash_table
);
862 struct charset charset
;
865 bool new_definition_p
;
868 if (nargs
!= charset_arg_max
)
869 return Fsignal (Qwrong_number_of_arguments
,
870 Fcons (intern ("define-charset-internal"),
871 make_number (nargs
)));
873 attrs
= Fmake_vector (make_number (charset_attr_max
), Qnil
);
875 CHECK_SYMBOL (args
[charset_arg_name
]);
876 ASET (attrs
, charset_name
, args
[charset_arg_name
]);
878 val
= args
[charset_arg_code_space
];
879 for (i
= 0, dimension
= 0, nchars
= 1; ; i
++)
881 Lisp_Object min_byte_obj
, max_byte_obj
;
882 int min_byte
, max_byte
;
884 min_byte_obj
= Faref (val
, make_number (i
* 2));
885 max_byte_obj
= Faref (val
, make_number (i
* 2 + 1));
886 CHECK_RANGED_INTEGER (min_byte_obj
, 0, 255);
887 min_byte
= XINT (min_byte_obj
);
888 CHECK_RANGED_INTEGER (max_byte_obj
, min_byte
, 255);
889 max_byte
= XINT (max_byte_obj
);
890 charset
.code_space
[i
* 4] = min_byte
;
891 charset
.code_space
[i
* 4 + 1] = max_byte
;
892 charset
.code_space
[i
* 4 + 2] = max_byte
- min_byte
+ 1;
897 nchars
*= charset
.code_space
[i
* 4 + 2];
898 charset
.code_space
[i
* 4 + 3] = nchars
;
901 val
= args
[charset_arg_dimension
];
903 charset
.dimension
= dimension
;
906 CHECK_RANGED_INTEGER (val
, 1, 4);
907 charset
.dimension
= XINT (val
);
910 charset
.code_linear_p
911 = (charset
.dimension
== 1
912 || (charset
.code_space
[2] == 256
913 && (charset
.dimension
== 2
914 || (charset
.code_space
[6] == 256
915 && (charset
.dimension
== 3
916 || charset
.code_space
[10] == 256)))));
918 if (! charset
.code_linear_p
)
920 charset
.code_space_mask
= xzalloc_atomic (256);
921 for (i
= 0; i
< 4; i
++)
922 for (j
= charset
.code_space
[i
* 4]; j
<= charset
.code_space
[i
* 4 + 1];
924 charset
.code_space_mask
[j
] |= (1 << i
);
927 charset
.iso_chars_96
= charset
.code_space
[2] == 96;
929 charset
.min_code
= (charset
.code_space
[0]
930 | (charset
.code_space
[4] << 8)
931 | (charset
.code_space
[8] << 16)
932 | ((unsigned) charset
.code_space
[12] << 24));
933 charset
.max_code
= (charset
.code_space
[1]
934 | (charset
.code_space
[5] << 8)
935 | (charset
.code_space
[9] << 16)
936 | ((unsigned) charset
.code_space
[13] << 24));
937 charset
.char_index_offset
= 0;
939 val
= args
[charset_arg_min_code
];
942 unsigned code
= cons_to_unsigned (val
, UINT_MAX
);
944 if (code
< charset
.min_code
945 || code
> charset
.max_code
)
946 args_out_of_range_3 (make_fixnum_or_float (charset
.min_code
),
947 make_fixnum_or_float (charset
.max_code
), val
);
948 charset
.char_index_offset
= CODE_POINT_TO_INDEX (&charset
, code
);
949 charset
.min_code
= code
;
952 val
= args
[charset_arg_max_code
];
955 unsigned code
= cons_to_unsigned (val
, UINT_MAX
);
957 if (code
< charset
.min_code
958 || code
> charset
.max_code
)
959 args_out_of_range_3 (make_fixnum_or_float (charset
.min_code
),
960 make_fixnum_or_float (charset
.max_code
), val
);
961 charset
.max_code
= code
;
964 charset
.compact_codes_p
= charset
.max_code
< 0x10000;
966 val
= args
[charset_arg_invalid_code
];
969 if (charset
.min_code
> 0)
970 charset
.invalid_code
= 0;
973 if (charset
.max_code
< UINT_MAX
)
974 charset
.invalid_code
= charset
.max_code
+ 1;
976 error ("Attribute :invalid-code must be specified");
980 charset
.invalid_code
= cons_to_unsigned (val
, UINT_MAX
);
982 val
= args
[charset_arg_iso_final
];
984 charset
.iso_final
= -1;
988 if (XINT (val
) < '0' || XINT (val
) > 127)
989 error ("Invalid iso-final-char: %"pI
"d", XINT (val
));
990 charset
.iso_final
= XINT (val
);
993 val
= args
[charset_arg_iso_revision
];
995 charset
.iso_revision
= -1;
998 CHECK_RANGED_INTEGER (val
, -1, 63);
999 charset
.iso_revision
= XINT (val
);
1002 val
= args
[charset_arg_emacs_mule_id
];
1004 charset
.emacs_mule_id
= -1;
1008 if ((XINT (val
) > 0 && XINT (val
) <= 128) || XINT (val
) >= 256)
1009 error ("Invalid emacs-mule-id: %"pI
"d", XINT (val
));
1010 charset
.emacs_mule_id
= XINT (val
);
1013 charset
.ascii_compatible_p
= ! NILP (args
[charset_arg_ascii_compatible_p
]);
1015 charset
.supplementary_p
= ! NILP (args
[charset_arg_supplementary_p
]);
1017 charset
.unified_p
= 0;
1019 memset (charset
.fast_map
, 0, sizeof (charset
.fast_map
));
1021 if (! NILP (args
[charset_arg_code_offset
]))
1023 val
= args
[charset_arg_code_offset
];
1024 CHECK_CHARACTER (val
);
1026 charset
.method
= CHARSET_METHOD_OFFSET
;
1027 charset
.code_offset
= XINT (val
);
1029 i
= CODE_POINT_TO_INDEX (&charset
, charset
.max_code
);
1030 if (MAX_CHAR
- charset
.code_offset
< i
)
1031 error ("Unsupported max char: %d", charset
.max_char
);
1032 charset
.max_char
= i
+ charset
.code_offset
;
1033 i
= CODE_POINT_TO_INDEX (&charset
, charset
.min_code
);
1034 charset
.min_char
= i
+ charset
.code_offset
;
1036 i
= (charset
.min_char
>> 7) << 7;
1037 for (; i
< 0x10000 && i
<= charset
.max_char
; i
+= 128)
1038 CHARSET_FAST_MAP_SET (i
, charset
.fast_map
);
1039 i
= (i
>> 12) << 12;
1040 for (; i
<= charset
.max_char
; i
+= 0x1000)
1041 CHARSET_FAST_MAP_SET (i
, charset
.fast_map
);
1042 if (charset
.code_offset
== 0 && charset
.max_char
>= 0x80)
1043 charset
.ascii_compatible_p
= 1;
1045 else if (! NILP (args
[charset_arg_map
]))
1047 val
= args
[charset_arg_map
];
1048 ASET (attrs
, charset_map
, val
);
1049 charset
.method
= CHARSET_METHOD_MAP
;
1051 else if (! NILP (args
[charset_arg_subset
]))
1054 Lisp_Object parent_min_code
, parent_max_code
, parent_code_offset
;
1055 struct charset
*parent_charset
;
1057 val
= args
[charset_arg_subset
];
1058 parent
= Fcar (val
);
1059 CHECK_CHARSET_GET_CHARSET (parent
, parent_charset
);
1060 parent_min_code
= Fnth (make_number (1), val
);
1061 CHECK_NATNUM (parent_min_code
);
1062 parent_max_code
= Fnth (make_number (2), val
);
1063 CHECK_NATNUM (parent_max_code
);
1064 parent_code_offset
= Fnth (make_number (3), val
);
1065 CHECK_NUMBER (parent_code_offset
);
1066 val
= make_uninit_vector (4);
1067 ASET (val
, 0, make_number (parent_charset
->id
));
1068 ASET (val
, 1, parent_min_code
);
1069 ASET (val
, 2, parent_max_code
);
1070 ASET (val
, 3, parent_code_offset
);
1071 ASET (attrs
, charset_subset
, val
);
1073 charset
.method
= CHARSET_METHOD_SUBSET
;
1074 /* Here, we just copy the parent's fast_map. It's not accurate,
1075 but at least it works for quickly detecting which character
1076 DOESN'T belong to this charset. */
1077 for (i
= 0; i
< 190; i
++)
1078 charset
.fast_map
[i
] = parent_charset
->fast_map
[i
];
1080 /* We also copy these for parents. */
1081 charset
.min_char
= parent_charset
->min_char
;
1082 charset
.max_char
= parent_charset
->max_char
;
1084 else if (! NILP (args
[charset_arg_superset
]))
1086 val
= args
[charset_arg_superset
];
1087 charset
.method
= CHARSET_METHOD_SUPERSET
;
1088 val
= Fcopy_sequence (val
);
1089 ASET (attrs
, charset_superset
, val
);
1091 charset
.min_char
= MAX_CHAR
;
1092 charset
.max_char
= 0;
1093 for (; ! NILP (val
); val
= Fcdr (val
))
1095 Lisp_Object elt
, car_part
, cdr_part
;
1096 int this_id
, offset
;
1097 struct charset
*this_charset
;
1102 car_part
= XCAR (elt
);
1103 cdr_part
= XCDR (elt
);
1104 CHECK_CHARSET_GET_ID (car_part
, this_id
);
1105 CHECK_TYPE_RANGED_INTEGER (int, cdr_part
);
1106 offset
= XINT (cdr_part
);
1110 CHECK_CHARSET_GET_ID (elt
, this_id
);
1113 XSETCAR (val
, Fcons (make_number (this_id
), make_number (offset
)));
1115 this_charset
= CHARSET_FROM_ID (this_id
);
1116 if (charset
.min_char
> this_charset
->min_char
)
1117 charset
.min_char
= this_charset
->min_char
;
1118 if (charset
.max_char
< this_charset
->max_char
)
1119 charset
.max_char
= this_charset
->max_char
;
1120 for (i
= 0; i
< 190; i
++)
1121 charset
.fast_map
[i
] |= this_charset
->fast_map
[i
];
1125 error ("None of :code-offset, :map, :parents are specified");
1127 val
= args
[charset_arg_unify_map
];
1128 if (! NILP (val
) && !STRINGP (val
))
1130 ASET (attrs
, charset_unify_map
, val
);
1132 CHECK_LIST (args
[charset_arg_plist
]);
1133 ASET (attrs
, charset_plist
, args
[charset_arg_plist
]);
1135 charset
.hash_index
= hash_lookup (hash_table
, args
[charset_arg_name
],
1137 if (charset
.hash_index
>= 0)
1139 new_definition_p
= 0;
1140 id
= XFASTINT (CHARSET_SYMBOL_ID (args
[charset_arg_name
]));
1141 set_hash_value_slot (hash_table
, charset
.hash_index
, attrs
);
1145 charset
.hash_index
= hash_put (hash_table
, args
[charset_arg_name
], attrs
,
1147 if (charset_table_used
== charset_table_size
)
1149 /* Ensure that charset IDs fit into 'int' as well as into the
1150 restriction imposed by fixnums. Although the 'int' restriction
1151 could be removed, too much other code would need altering; for
1152 example, the IDs are stuffed into struct
1153 coding_system.charbuf[i] entries, which are 'int'. */
1154 int old_size
= charset_table_size
;
1155 ptrdiff_t new_size
= old_size
;
1156 struct charset
*new_table
=
1157 xpalloc (0, &new_size
, 1,
1158 min (INT_MAX
, MOST_POSITIVE_FIXNUM
),
1159 sizeof *charset_table
);
1160 memcpy (new_table
, charset_table
, old_size
* sizeof *new_table
);
1161 charset_table
= new_table
;
1162 charset_table_size
= new_size
;
1163 /* FIXME: This leaks memory, as the old charset_table becomes
1164 unreachable. If the old charset table is charset_table_init
1165 then this leak is intentional; otherwise, it's unclear.
1166 If the latter memory leak is intentional, a
1167 comment should be added to explain this. If not, the old
1168 charset_table should be freed, by passing it as the 1st argument
1169 to xpalloc and removing the memcpy. */
1171 id
= charset_table_used
++;
1172 new_definition_p
= 1;
1175 ASET (attrs
, charset_id
, make_number (id
));
1177 charset_table
[id
] = charset
;
1179 if (charset
.method
== CHARSET_METHOD_MAP
)
1181 load_charset (&charset
, 0);
1182 charset_table
[id
] = charset
;
1185 if (charset
.iso_final
>= 0)
1187 ISO_CHARSET_TABLE (charset
.dimension
, charset
.iso_chars_96
,
1188 charset
.iso_final
) = id
;
1189 if (new_definition_p
)
1190 Viso_2022_charset_list
= nconc2 (Viso_2022_charset_list
,
1191 list1 (make_number (id
)));
1192 if (ISO_CHARSET_TABLE (1, 0, 'J') == id
)
1193 charset_jisx0201_roman
= id
;
1194 else if (ISO_CHARSET_TABLE (2, 0, '@') == id
)
1195 charset_jisx0208_1978
= id
;
1196 else if (ISO_CHARSET_TABLE (2, 0, 'B') == id
)
1197 charset_jisx0208
= id
;
1198 else if (ISO_CHARSET_TABLE (2, 0, 'C') == id
)
1199 charset_ksc5601
= id
;
1202 if (charset
.emacs_mule_id
>= 0)
1204 emacs_mule_charset
[charset
.emacs_mule_id
] = id
;
1205 if (charset
.emacs_mule_id
< 0xA0)
1206 emacs_mule_bytes
[charset
.emacs_mule_id
] = charset
.dimension
+ 1;
1208 emacs_mule_bytes
[charset
.emacs_mule_id
] = charset
.dimension
+ 2;
1209 if (new_definition_p
)
1210 Vemacs_mule_charset_list
= nconc2 (Vemacs_mule_charset_list
,
1211 list1 (make_number (id
)));
1214 if (new_definition_p
)
1216 Vcharset_list
= Fcons (args
[charset_arg_name
], Vcharset_list
);
1217 if (charset
.supplementary_p
)
1218 Vcharset_ordered_list
= nconc2 (Vcharset_ordered_list
,
1219 list1 (make_number (id
)));
1224 for (tail
= Vcharset_ordered_list
; CONSP (tail
); tail
= XCDR (tail
))
1226 struct charset
*cs
= CHARSET_FROM_ID (XINT (XCAR (tail
)));
1228 if (cs
->supplementary_p
)
1231 if (EQ (tail
, Vcharset_ordered_list
))
1232 Vcharset_ordered_list
= Fcons (make_number (id
),
1233 Vcharset_ordered_list
);
1234 else if (NILP (tail
))
1235 Vcharset_ordered_list
= nconc2 (Vcharset_ordered_list
,
1236 list1 (make_number (id
)));
1239 val
= Fcons (XCAR (tail
), XCDR (tail
));
1240 XSETCDR (tail
, val
);
1241 XSETCAR (tail
, make_number (id
));
1244 charset_ordered_list_tick
++;
1251 /* Same as Fdefine_charset_internal but arguments are more convenient
1252 to call from C (typically in syms_of_charset). This can define a
1253 charset of `offset' method only. Return the ID of the new
1257 define_charset_internal (Lisp_Object name
,
1259 const char *code_space_chars
,
1260 unsigned min_code
, unsigned max_code
,
1261 int iso_final
, int iso_revision
, int emacs_mule_id
,
1262 bool ascii_compatible
, bool supplementary
,
1265 const unsigned char *code_space
= (const unsigned char *) code_space_chars
;
1266 Lisp_Object args
[charset_arg_max
];
1270 args
[charset_arg_name
] = name
;
1271 args
[charset_arg_dimension
] = make_number (dimension
);
1272 val
= make_uninit_vector (8);
1273 for (i
= 0; i
< 8; i
++)
1274 ASET (val
, i
, make_number (code_space
[i
]));
1275 args
[charset_arg_code_space
] = val
;
1276 args
[charset_arg_min_code
] = make_number (min_code
);
1277 args
[charset_arg_max_code
] = make_number (max_code
);
1278 args
[charset_arg_iso_final
]
1279 = (iso_final
< 0 ? Qnil
: make_number (iso_final
));
1280 args
[charset_arg_iso_revision
] = make_number (iso_revision
);
1281 args
[charset_arg_emacs_mule_id
]
1282 = (emacs_mule_id
< 0 ? Qnil
: make_number (emacs_mule_id
));
1283 args
[charset_arg_ascii_compatible_p
] = ascii_compatible
? Qt
: Qnil
;
1284 args
[charset_arg_supplementary_p
] = supplementary
? Qt
: Qnil
;
1285 args
[charset_arg_invalid_code
] = Qnil
;
1286 args
[charset_arg_code_offset
] = make_number (code_offset
);
1287 args
[charset_arg_map
] = Qnil
;
1288 args
[charset_arg_subset
] = Qnil
;
1289 args
[charset_arg_superset
] = Qnil
;
1290 args
[charset_arg_unify_map
] = Qnil
;
1292 args
[charset_arg_plist
] =
1293 listn (CONSTYPE_HEAP
, 14,
1294 intern_c_string (":name"),
1295 args
[charset_arg_name
],
1296 intern_c_string (":dimension"),
1297 args
[charset_arg_dimension
],
1298 intern_c_string (":code-space"),
1299 args
[charset_arg_code_space
],
1300 intern_c_string (":iso-final-char"),
1301 args
[charset_arg_iso_final
],
1302 intern_c_string (":emacs-mule-id"),
1303 args
[charset_arg_emacs_mule_id
],
1304 intern_c_string (":ascii-compatible-p"),
1305 args
[charset_arg_ascii_compatible_p
],
1306 intern_c_string (":code-offset"),
1307 args
[charset_arg_code_offset
]);
1308 Fdefine_charset_internal (charset_arg_max
, args
);
1310 return XINT (CHARSET_SYMBOL_ID (name
));
1314 DEFUN ("define-charset-alias", Fdefine_charset_alias
,
1315 Sdefine_charset_alias
, 2, 2, 0,
1316 doc
: /* Define ALIAS as an alias for charset CHARSET. */)
1317 (Lisp_Object alias
, Lisp_Object charset
)
1321 CHECK_CHARSET_GET_ATTR (charset
, attr
);
1322 Fputhash (alias
, attr
, Vcharset_hash_table
);
1323 Vcharset_list
= Fcons (alias
, Vcharset_list
);
1328 DEFUN ("charset-plist", Fcharset_plist
, Scharset_plist
, 1, 1, 0,
1329 doc
: /* Return the property list of CHARSET. */)
1330 (Lisp_Object charset
)
1334 CHECK_CHARSET_GET_ATTR (charset
, attrs
);
1335 return CHARSET_ATTR_PLIST (attrs
);
1339 DEFUN ("set-charset-plist", Fset_charset_plist
, Sset_charset_plist
, 2, 2, 0,
1340 doc
: /* Set CHARSET's property list to PLIST. */)
1341 (Lisp_Object charset
, Lisp_Object plist
)
1345 CHECK_CHARSET_GET_ATTR (charset
, attrs
);
1346 ASET (attrs
, charset_plist
, plist
);
1351 DEFUN ("unify-charset", Funify_charset
, Sunify_charset
, 1, 3, 0,
1352 doc
: /* Unify characters of CHARSET with Unicode.
1353 This means reading the relevant file and installing the table defined
1354 by CHARSET's `:unify-map' property.
1356 Optional second arg UNIFY-MAP is a file name string or a vector. It has
1357 the same meaning as the `:unify-map' attribute in the function
1358 `define-charset' (which see).
1360 Optional third argument DEUNIFY, if non-nil, means to de-unify CHARSET. */)
1361 (Lisp_Object charset
, Lisp_Object unify_map
, Lisp_Object deunify
)
1366 CHECK_CHARSET_GET_ID (charset
, id
);
1367 cs
= CHARSET_FROM_ID (id
);
1369 ? CHARSET_UNIFIED_P (cs
) && ! NILP (CHARSET_DEUNIFIER (cs
))
1370 : ! CHARSET_UNIFIED_P (cs
))
1373 CHARSET_UNIFIED_P (cs
) = 0;
1376 if (CHARSET_METHOD (cs
) != CHARSET_METHOD_OFFSET
1377 || CHARSET_CODE_OFFSET (cs
) < 0x110000)
1378 error ("Can't unify charset: %s", SDATA (SYMBOL_NAME (charset
)));
1379 if (NILP (unify_map
))
1380 unify_map
= CHARSET_UNIFY_MAP (cs
);
1383 if (! STRINGP (unify_map
) && ! VECTORP (unify_map
))
1384 signal_error ("Bad unify-map", unify_map
);
1385 set_charset_attr (cs
, charset_unify_map
, unify_map
);
1387 if (NILP (Vchar_unify_table
))
1388 Vchar_unify_table
= Fmake_char_table (Qnil
, Qnil
);
1389 char_table_set_range (Vchar_unify_table
,
1390 cs
->min_char
, cs
->max_char
, charset
);
1391 CHARSET_UNIFIED_P (cs
) = 1;
1393 else if (CHAR_TABLE_P (Vchar_unify_table
))
1395 unsigned min_code
= CHARSET_MIN_CODE (cs
);
1396 unsigned max_code
= CHARSET_MAX_CODE (cs
);
1397 int min_char
= DECODE_CHAR (cs
, min_code
);
1398 int max_char
= DECODE_CHAR (cs
, max_code
);
1400 char_table_set_range (Vchar_unify_table
, min_char
, max_char
, Qnil
);
1406 DEFUN ("get-unused-iso-final-char", Fget_unused_iso_final_char
,
1407 Sget_unused_iso_final_char
, 2, 2, 0,
1409 Return an unused ISO final char for a charset of DIMENSION and CHARS.
1410 DIMENSION is the number of bytes to represent a character: 1 or 2.
1411 CHARS is the number of characters in a dimension: 94 or 96.
1413 This final char is for private use, thus the range is `0' (48) .. `?' (63).
1414 If there's no unused final char for the specified kind of charset,
1416 (Lisp_Object dimension
, Lisp_Object chars
)
1420 CHECK_NUMBER (dimension
);
1421 CHECK_NUMBER (chars
);
1422 if (XINT (dimension
) != 1 && XINT (dimension
) != 2 && XINT (dimension
) != 3)
1423 args_out_of_range_3 (dimension
, make_number (1), make_number (3));
1424 if (XINT (chars
) != 94 && XINT (chars
) != 96)
1425 args_out_of_range_3 (chars
, make_number (94), make_number (96));
1426 for (final_char
= '0'; final_char
<= '?'; final_char
++)
1427 if (ISO_CHARSET_TABLE (XINT (dimension
), XINT (chars
), final_char
) < 0)
1429 return (final_char
<= '?' ? make_number (final_char
) : Qnil
);
1433 check_iso_charset_parameter (Lisp_Object dimension
, Lisp_Object chars
, Lisp_Object final_char
)
1435 CHECK_NATNUM (dimension
);
1436 CHECK_NATNUM (chars
);
1437 CHECK_CHARACTER (final_char
);
1439 if (XINT (dimension
) > 3)
1440 error ("Invalid DIMENSION %"pI
"d, it should be 1, 2, or 3",
1442 if (XINT (chars
) != 94 && XINT (chars
) != 96)
1443 error ("Invalid CHARS %"pI
"d, it should be 94 or 96", XINT (chars
));
1444 if (XINT (final_char
) < '0' || XINT (final_char
) > '~')
1445 error ("Invalid FINAL-CHAR %c, it should be `0'..`~'",
1446 (int)XINT (final_char
));
1450 DEFUN ("declare-equiv-charset", Fdeclare_equiv_charset
, Sdeclare_equiv_charset
,
1452 doc
: /* Declare an equivalent charset for ISO-2022 decoding.
1454 On decoding by an ISO-2022 base coding system, when a charset
1455 specified by DIMENSION, CHARS, and FINAL-CHAR is designated, behave as
1456 if CHARSET is designated instead. */)
1457 (Lisp_Object dimension
, Lisp_Object chars
, Lisp_Object final_char
, Lisp_Object charset
)
1462 CHECK_CHARSET_GET_ID (charset
, id
);
1463 check_iso_charset_parameter (dimension
, chars
, final_char
);
1464 chars_flag
= XINT (chars
) == 96;
1465 ISO_CHARSET_TABLE (XINT (dimension
), chars_flag
, XINT (final_char
)) = id
;
1470 /* Return information about charsets in the text at PTR of NBYTES
1471 bytes, which are NCHARS characters. The value is:
1473 0: Each character is represented by one byte. This is always
1474 true for a unibyte string. For a multibyte string, true if
1475 it contains only ASCII characters.
1477 1: No charsets other than ascii, control-1, and latin-1 are
1484 string_xstring_p (Lisp_Object string
)
1486 const unsigned char *p
= SDATA (string
);
1487 const unsigned char *endp
= p
+ SBYTES (string
);
1489 if (SCHARS (string
) == SBYTES (string
))
1494 int c
= STRING_CHAR_ADVANCE (p
);
1503 /* Find charsets in the string at PTR of NCHARS and NBYTES.
1505 CHARSETS is a vector. If Nth element is non-nil, it means the
1506 charset whose id is N is already found.
1508 It may lookup a translation table TABLE if supplied. */
1511 find_charsets_in_text (const unsigned char *ptr
, ptrdiff_t nchars
,
1512 ptrdiff_t nbytes
, Lisp_Object charsets
,
1513 Lisp_Object table
, bool multibyte
)
1515 const unsigned char *pend
= ptr
+ nbytes
;
1517 if (nchars
== nbytes
)
1520 ASET (charsets
, charset_ascii
, Qt
);
1527 c
= translate_char (table
, c
);
1528 if (ASCII_CHAR_P (c
))
1529 ASET (charsets
, charset_ascii
, Qt
);
1531 ASET (charsets
, charset_eight_bit
, Qt
);
1538 int c
= STRING_CHAR_ADVANCE (ptr
);
1539 struct charset
*charset
;
1542 c
= translate_char (table
, c
);
1543 charset
= CHAR_CHARSET (c
);
1544 ASET (charsets
, CHARSET_ID (charset
), Qt
);
1549 DEFUN ("find-charset-region", Ffind_charset_region
, Sfind_charset_region
,
1551 doc
: /* Return a list of charsets in the region between BEG and END.
1552 BEG and END are buffer positions.
1553 Optional arg TABLE if non-nil is a translation table to look up.
1555 If the current buffer is unibyte, the returned list may contain
1556 only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
1557 (Lisp_Object beg
, Lisp_Object end
, Lisp_Object table
)
1559 Lisp_Object charsets
;
1560 ptrdiff_t from
, from_byte
, to
, stop
, stop_byte
;
1563 bool multibyte
= ! NILP (BVAR (current_buffer
, enable_multibyte_characters
));
1565 validate_region (&beg
, &end
);
1566 from
= XFASTINT (beg
);
1567 stop
= to
= XFASTINT (end
);
1569 if (from
< GPT
&& GPT
< to
)
1572 stop_byte
= GPT_BYTE
;
1575 stop_byte
= CHAR_TO_BYTE (stop
);
1577 from_byte
= CHAR_TO_BYTE (from
);
1579 charsets
= Fmake_vector (make_number (charset_table_used
), Qnil
);
1582 find_charsets_in_text (BYTE_POS_ADDR (from_byte
), stop
- from
,
1583 stop_byte
- from_byte
, charsets
, table
,
1587 from
= stop
, from_byte
= stop_byte
;
1588 stop
= to
, stop_byte
= CHAR_TO_BYTE (stop
);
1595 for (i
= charset_table_used
- 1; i
>= 0; i
--)
1596 if (!NILP (AREF (charsets
, i
)))
1597 val
= Fcons (CHARSET_NAME (charset_table
+ i
), val
);
1601 DEFUN ("find-charset-string", Ffind_charset_string
, Sfind_charset_string
,
1603 doc
: /* Return a list of charsets in STR.
1604 Optional arg TABLE if non-nil is a translation table to look up.
1606 If STR is unibyte, the returned list may contain
1607 only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
1608 (Lisp_Object str
, Lisp_Object table
)
1610 Lisp_Object charsets
;
1616 charsets
= Fmake_vector (make_number (charset_table_used
), Qnil
);
1617 find_charsets_in_text (SDATA (str
), SCHARS (str
), SBYTES (str
),
1619 STRING_MULTIBYTE (str
));
1621 for (i
= charset_table_used
- 1; i
>= 0; i
--)
1622 if (!NILP (AREF (charsets
, i
)))
1623 val
= Fcons (CHARSET_NAME (charset_table
+ i
), val
);
1629 /* Return a unified character code for C (>= 0x110000). VAL is a
1630 value of Vchar_unify_table for C; i.e. it is nil, an integer, or a
1633 maybe_unify_char (int c
, Lisp_Object val
)
1635 struct charset
*charset
;
1638 return XFASTINT (val
);
1642 CHECK_CHARSET_GET_CHARSET (val
, charset
);
1644 /* The call to load_charset below can allocate memory, which screws
1645 callers of this function through STRING_CHAR_* macros that hold C
1646 pointers to buffer text, if REL_ALLOC is used. */
1647 r_alloc_inhibit_buffer_relocation (1);
1649 load_charset (charset
, 1);
1650 if (! inhibit_load_charset_map
)
1652 val
= CHAR_TABLE_REF (Vchar_unify_table
, c
);
1658 int code_index
= c
- CHARSET_CODE_OFFSET (charset
);
1659 int unified
= GET_TEMP_CHARSET_WORK_DECODER (code_index
);
1665 r_alloc_inhibit_buffer_relocation (0);
1671 /* Return a character corresponding to the code-point CODE of
1675 decode_char (struct charset
*charset
, unsigned int code
)
1678 enum charset_method method
= CHARSET_METHOD (charset
);
1680 if (code
< CHARSET_MIN_CODE (charset
) || code
> CHARSET_MAX_CODE (charset
))
1683 if (method
== CHARSET_METHOD_SUBSET
)
1685 Lisp_Object subset_info
;
1687 subset_info
= CHARSET_SUBSET (charset
);
1688 charset
= CHARSET_FROM_ID (XFASTINT (AREF (subset_info
, 0)));
1689 code
-= XINT (AREF (subset_info
, 3));
1690 if (code
< XFASTINT (AREF (subset_info
, 1))
1691 || code
> XFASTINT (AREF (subset_info
, 2)))
1694 c
= DECODE_CHAR (charset
, code
);
1696 else if (method
== CHARSET_METHOD_SUPERSET
)
1698 Lisp_Object parents
;
1700 parents
= CHARSET_SUPERSET (charset
);
1702 for (; CONSP (parents
); parents
= XCDR (parents
))
1704 int id
= XINT (XCAR (XCAR (parents
)));
1705 int code_offset
= XINT (XCDR (XCAR (parents
)));
1706 unsigned this_code
= code
- code_offset
;
1708 charset
= CHARSET_FROM_ID (id
);
1709 if ((c
= DECODE_CHAR (charset
, this_code
)) >= 0)
1715 char_index
= CODE_POINT_TO_INDEX (charset
, code
);
1719 if (method
== CHARSET_METHOD_MAP
)
1721 Lisp_Object decoder
;
1723 decoder
= CHARSET_DECODER (charset
);
1724 if (! VECTORP (decoder
))
1726 load_charset (charset
, 1);
1727 decoder
= CHARSET_DECODER (charset
);
1729 if (VECTORP (decoder
))
1730 c
= XINT (AREF (decoder
, char_index
));
1732 c
= GET_TEMP_CHARSET_WORK_DECODER (char_index
);
1734 else /* method == CHARSET_METHOD_OFFSET */
1736 c
= char_index
+ CHARSET_CODE_OFFSET (charset
);
1737 if (CHARSET_UNIFIED_P (charset
)
1738 && MAX_UNICODE_CHAR
< c
&& c
<= MAX_5_BYTE_CHAR
)
1740 /* Unify C with a Unicode character if possible. */
1741 Lisp_Object val
= CHAR_TABLE_REF (Vchar_unify_table
, c
);
1742 c
= maybe_unify_char (c
, val
);
1750 /* Variable used temporarily by the macro ENCODE_CHAR. */
1751 Lisp_Object charset_work
;
1753 /* Return a code-point of C in CHARSET. If C doesn't belong to
1754 CHARSET, return CHARSET_INVALID_CODE (CHARSET). If STRICT is true,
1755 use CHARSET's strict_max_char instead of max_char. */
1758 encode_char (struct charset
*charset
, int c
)
1761 enum charset_method method
= CHARSET_METHOD (charset
);
1763 if (CHARSET_UNIFIED_P (charset
))
1765 Lisp_Object deunifier
;
1766 int code_index
= -1;
1768 deunifier
= CHARSET_DEUNIFIER (charset
);
1769 if (! CHAR_TABLE_P (deunifier
))
1771 load_charset (charset
, 2);
1772 deunifier
= CHARSET_DEUNIFIER (charset
);
1774 if (CHAR_TABLE_P (deunifier
))
1776 Lisp_Object deunified
= CHAR_TABLE_REF (deunifier
, c
);
1778 if (INTEGERP (deunified
))
1779 code_index
= XINT (deunified
);
1783 code_index
= GET_TEMP_CHARSET_WORK_ENCODER (c
);
1785 if (code_index
>= 0)
1786 c
= CHARSET_CODE_OFFSET (charset
) + code_index
;
1789 if (method
== CHARSET_METHOD_SUBSET
)
1791 Lisp_Object subset_info
;
1792 struct charset
*this_charset
;
1794 subset_info
= CHARSET_SUBSET (charset
);
1795 this_charset
= CHARSET_FROM_ID (XFASTINT (AREF (subset_info
, 0)));
1796 code
= ENCODE_CHAR (this_charset
, c
);
1797 if (code
== CHARSET_INVALID_CODE (this_charset
)
1798 || code
< XFASTINT (AREF (subset_info
, 1))
1799 || code
> XFASTINT (AREF (subset_info
, 2)))
1800 return CHARSET_INVALID_CODE (charset
);
1801 code
+= XINT (AREF (subset_info
, 3));
1805 if (method
== CHARSET_METHOD_SUPERSET
)
1807 Lisp_Object parents
;
1809 parents
= CHARSET_SUPERSET (charset
);
1810 for (; CONSP (parents
); parents
= XCDR (parents
))
1812 int id
= XINT (XCAR (XCAR (parents
)));
1813 int code_offset
= XINT (XCDR (XCAR (parents
)));
1814 struct charset
*this_charset
= CHARSET_FROM_ID (id
);
1816 code
= ENCODE_CHAR (this_charset
, c
);
1817 if (code
!= CHARSET_INVALID_CODE (this_charset
))
1818 return code
+ code_offset
;
1820 return CHARSET_INVALID_CODE (charset
);
1823 if (! CHARSET_FAST_MAP_REF ((c
), charset
->fast_map
)
1824 || c
< CHARSET_MIN_CHAR (charset
) || c
> CHARSET_MAX_CHAR (charset
))
1825 return CHARSET_INVALID_CODE (charset
);
1827 if (method
== CHARSET_METHOD_MAP
)
1829 Lisp_Object encoder
;
1832 encoder
= CHARSET_ENCODER (charset
);
1833 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset
)))
1835 load_charset (charset
, 2);
1836 encoder
= CHARSET_ENCODER (charset
);
1838 if (CHAR_TABLE_P (encoder
))
1840 val
= CHAR_TABLE_REF (encoder
, c
);
1842 return CHARSET_INVALID_CODE (charset
);
1844 if (! CHARSET_COMPACT_CODES_P (charset
))
1845 code
= INDEX_TO_CODE_POINT (charset
, code
);
1849 code
= GET_TEMP_CHARSET_WORK_ENCODER (c
);
1850 code
= INDEX_TO_CODE_POINT (charset
, code
);
1853 else /* method == CHARSET_METHOD_OFFSET */
1855 unsigned code_index
= c
- CHARSET_CODE_OFFSET (charset
);
1857 code
= INDEX_TO_CODE_POINT (charset
, code_index
);
1864 DEFUN ("decode-char", Fdecode_char
, Sdecode_char
, 2, 3, 0,
1865 doc
: /* Decode the pair of CHARSET and CODE-POINT into a character.
1866 Return nil if CODE-POINT is not valid in CHARSET.
1868 CODE-POINT may be a cons (HIGHER-16-BIT-VALUE . LOWER-16-BIT-VALUE). */)
1869 (Lisp_Object charset
, Lisp_Object code_point
, Lisp_Object restriction
)
1873 struct charset
*charsetp
;
1875 CHECK_CHARSET_GET_ID (charset
, id
);
1876 code
= cons_to_unsigned (code_point
, UINT_MAX
);
1877 charsetp
= CHARSET_FROM_ID (id
);
1878 c
= DECODE_CHAR (charsetp
, code
);
1879 return (c
>= 0 ? make_number (c
) : Qnil
);
1883 DEFUN ("encode-char", Fencode_char
, Sencode_char
, 2, 3, 0,
1884 doc
: /* Encode the character CH into a code-point of CHARSET.
1885 Return nil if CHARSET doesn't include CH. */)
1886 (Lisp_Object ch
, Lisp_Object charset
, Lisp_Object restriction
)
1890 struct charset
*charsetp
;
1892 CHECK_CHARSET_GET_ID (charset
, id
);
1893 CHECK_CHARACTER (ch
);
1895 charsetp
= CHARSET_FROM_ID (id
);
1896 code
= ENCODE_CHAR (charsetp
, c
);
1897 if (code
== CHARSET_INVALID_CODE (charsetp
))
1899 return INTEGER_TO_CONS (code
);
1903 DEFUN ("make-char", Fmake_char
, Smake_char
, 1, 5, 0,
1905 /* Return a character of CHARSET whose position codes are CODEn.
1907 CODE1 through CODE4 are optional, but if you don't supply sufficient
1908 position codes, it is assumed that the minimum code in each dimension
1910 (Lisp_Object charset
, Lisp_Object code1
, Lisp_Object code2
, Lisp_Object code3
, Lisp_Object code4
)
1913 struct charset
*charsetp
;
1917 CHECK_CHARSET_GET_ID (charset
, id
);
1918 charsetp
= CHARSET_FROM_ID (id
);
1920 dimension
= CHARSET_DIMENSION (charsetp
);
1922 code
= (CHARSET_ASCII_COMPATIBLE_P (charsetp
)
1923 ? 0 : CHARSET_MIN_CODE (charsetp
));
1926 CHECK_NATNUM (code1
);
1927 if (XFASTINT (code1
) >= 0x100)
1928 args_out_of_range (make_number (0xFF), code1
);
1929 code
= XFASTINT (code1
);
1935 code
|= charsetp
->code_space
[(dimension
- 2) * 4];
1938 CHECK_NATNUM (code2
);
1939 if (XFASTINT (code2
) >= 0x100)
1940 args_out_of_range (make_number (0xFF), code2
);
1941 code
|= XFASTINT (code2
);
1948 code
|= charsetp
->code_space
[(dimension
- 3) * 4];
1951 CHECK_NATNUM (code3
);
1952 if (XFASTINT (code3
) >= 0x100)
1953 args_out_of_range (make_number (0xFF), code3
);
1954 code
|= XFASTINT (code3
);
1961 code
|= charsetp
->code_space
[0];
1964 CHECK_NATNUM (code4
);
1965 if (XFASTINT (code4
) >= 0x100)
1966 args_out_of_range (make_number (0xFF), code4
);
1967 code
|= XFASTINT (code4
);
1974 if (CHARSET_ISO_FINAL (charsetp
) >= 0)
1976 c
= DECODE_CHAR (charsetp
, code
);
1978 error ("Invalid code(s)");
1979 return make_number (c
);
1983 /* Return the first charset in CHARSET_LIST that contains C.
1984 CHARSET_LIST is a list of charset IDs. If it is nil, use
1985 Vcharset_ordered_list. */
1988 char_charset (int c
, Lisp_Object charset_list
, unsigned int *code_return
)
1990 bool maybe_null
= 0;
1992 if (NILP (charset_list
))
1993 charset_list
= Vcharset_ordered_list
;
1997 while (CONSP (charset_list
))
1999 struct charset
*charset
= CHARSET_FROM_ID (XINT (XCAR (charset_list
)));
2000 unsigned code
= ENCODE_CHAR (charset
, c
);
2002 if (code
!= CHARSET_INVALID_CODE (charset
))
2005 *code_return
= code
;
2008 charset_list
= XCDR (charset_list
);
2010 && c
<= MAX_UNICODE_CHAR
2011 && EQ (charset_list
, Vcharset_non_preferred_head
))
2012 return CHARSET_FROM_ID (charset_unicode
);
2014 return (maybe_null
? NULL
2015 : c
<= MAX_5_BYTE_CHAR
? CHARSET_FROM_ID (charset_emacs
)
2016 : CHARSET_FROM_ID (charset_eight_bit
));
2020 DEFUN ("split-char", Fsplit_char
, Ssplit_char
, 1, 1, 0,
2022 /*Return list of charset and one to four position-codes of CH.
2023 The charset is decided by the current priority order of charsets.
2024 A position-code is a byte value of each dimension of the code-point of
2025 CH in the charset. */)
2028 struct charset
*charset
;
2033 CHECK_CHARACTER (ch
);
2035 charset
= CHAR_CHARSET (c
);
2038 code
= ENCODE_CHAR (charset
, c
);
2039 if (code
== CHARSET_INVALID_CODE (charset
))
2041 dimension
= CHARSET_DIMENSION (charset
);
2042 for (val
= Qnil
; dimension
> 0; dimension
--)
2044 val
= Fcons (make_number (code
& 0xFF), val
);
2047 return Fcons (CHARSET_NAME (charset
), val
);
2051 DEFUN ("char-charset", Fchar_charset
, Schar_charset
, 1, 2, 0,
2052 doc
: /* Return the charset of highest priority that contains CH.
2053 ASCII characters are an exception: for them, this function always
2055 If optional 2nd arg RESTRICTION is non-nil, it is a list of charsets
2056 from which to find the charset. It may also be a coding system. In
2057 that case, find the charset from what supported by that coding system. */)
2058 (Lisp_Object ch
, Lisp_Object restriction
)
2060 struct charset
*charset
;
2062 CHECK_CHARACTER (ch
);
2063 if (NILP (restriction
))
2064 charset
= CHAR_CHARSET (XINT (ch
));
2067 if (CONSP (restriction
))
2069 int c
= XFASTINT (ch
);
2071 for (; CONSP (restriction
); restriction
= XCDR (restriction
))
2073 struct charset
*rcharset
;
2075 CHECK_CHARSET_GET_CHARSET (XCAR (restriction
), rcharset
);
2076 if (ENCODE_CHAR (rcharset
, c
) != CHARSET_INVALID_CODE (rcharset
))
2077 return XCAR (restriction
);
2081 restriction
= coding_system_charset_list (restriction
);
2082 charset
= char_charset (XINT (ch
), restriction
, NULL
);
2086 return (CHARSET_NAME (charset
));
2090 DEFUN ("charset-after", Fcharset_after
, Scharset_after
, 0, 1, 0,
2092 Return charset of a character in the current buffer at position POS.
2093 If POS is nil, it defaults to the current point.
2094 If POS is out of range, the value is nil. */)
2098 struct charset
*charset
;
2100 ch
= Fchar_after (pos
);
2101 if (! INTEGERP (ch
))
2103 charset
= CHAR_CHARSET (XINT (ch
));
2104 return (CHARSET_NAME (charset
));
2108 DEFUN ("iso-charset", Fiso_charset
, Siso_charset
, 3, 3, 0,
2110 Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR.
2112 ISO 2022's designation sequence (escape sequence) distinguishes charsets
2113 by their DIMENSION, CHARS, and FINAL-CHAR,
2114 whereas Emacs distinguishes them by charset symbol.
2115 See the documentation of the function `charset-info' for the meanings of
2116 DIMENSION, CHARS, and FINAL-CHAR. */)
2117 (Lisp_Object dimension
, Lisp_Object chars
, Lisp_Object final_char
)
2122 check_iso_charset_parameter (dimension
, chars
, final_char
);
2123 chars_flag
= XFASTINT (chars
) == 96;
2124 id
= ISO_CHARSET_TABLE (XFASTINT (dimension
), chars_flag
,
2125 XFASTINT (final_char
));
2126 return (id
>= 0 ? CHARSET_NAME (CHARSET_FROM_ID (id
)) : Qnil
);
2130 DEFUN ("clear-charset-maps", Fclear_charset_maps
, Sclear_charset_maps
,
2134 Clear temporary charset mapping tables.
2135 It should be called only from temacs invoked for dumping. */)
2138 if (temp_charset_work
)
2140 xfree (temp_charset_work
);
2141 temp_charset_work
= NULL
;
2144 if (CHAR_TABLE_P (Vchar_unify_table
))
2145 Foptimize_char_table (Vchar_unify_table
, Qnil
);
2150 DEFUN ("charset-priority-list", Fcharset_priority_list
,
2151 Scharset_priority_list
, 0, 1, 0,
2152 doc
: /* Return the list of charsets ordered by priority.
2153 HIGHESTP non-nil means just return the highest priority one. */)
2154 (Lisp_Object highestp
)
2156 Lisp_Object val
= Qnil
, list
= Vcharset_ordered_list
;
2158 if (!NILP (highestp
))
2159 return CHARSET_NAME (CHARSET_FROM_ID (XINT (Fcar (list
))));
2161 while (!NILP (list
))
2163 val
= Fcons (CHARSET_NAME (CHARSET_FROM_ID (XINT (XCAR (list
)))), val
);
2166 return Fnreverse (val
);
2169 DEFUN ("set-charset-priority", Fset_charset_priority
, Sset_charset_priority
,
2171 doc
: /* Assign higher priority to the charsets given as arguments.
2172 usage: (set-charset-priority &rest charsets) */)
2173 (ptrdiff_t nargs
, Lisp_Object
*args
)
2175 Lisp_Object new_head
, old_list
, arglist
[2];
2176 Lisp_Object list_2022
, list_emacs_mule
;
2180 old_list
= Fcopy_sequence (Vcharset_ordered_list
);
2182 for (i
= 0; i
< nargs
; i
++)
2184 CHECK_CHARSET_GET_ID (args
[i
], id
);
2185 if (! NILP (Fmemq (make_number (id
), old_list
)))
2187 old_list
= Fdelq (make_number (id
), old_list
);
2188 new_head
= Fcons (make_number (id
), new_head
);
2191 arglist
[0] = Fnreverse (new_head
);
2192 arglist
[1] = Vcharset_non_preferred_head
= old_list
;
2193 Vcharset_ordered_list
= Fnconc (2, arglist
);
2194 charset_ordered_list_tick
++;
2196 charset_unibyte
= -1;
2197 for (old_list
= Vcharset_ordered_list
, list_2022
= list_emacs_mule
= Qnil
;
2198 CONSP (old_list
); old_list
= XCDR (old_list
))
2200 if (! NILP (Fmemq (XCAR (old_list
), Viso_2022_charset_list
)))
2201 list_2022
= Fcons (XCAR (old_list
), list_2022
);
2202 if (! NILP (Fmemq (XCAR (old_list
), Vemacs_mule_charset_list
)))
2203 list_emacs_mule
= Fcons (XCAR (old_list
), list_emacs_mule
);
2204 if (charset_unibyte
< 0)
2206 struct charset
*charset
= CHARSET_FROM_ID (XINT (XCAR (old_list
)));
2208 if (CHARSET_DIMENSION (charset
) == 1
2209 && CHARSET_ASCII_COMPATIBLE_P (charset
)
2210 && CHARSET_MAX_CHAR (charset
) >= 0x80)
2211 charset_unibyte
= CHARSET_ID (charset
);
2214 Viso_2022_charset_list
= Fnreverse (list_2022
);
2215 Vemacs_mule_charset_list
= Fnreverse (list_emacs_mule
);
2216 if (charset_unibyte
< 0)
2217 charset_unibyte
= charset_iso_8859_1
;
2222 DEFUN ("charset-id-internal", Fcharset_id_internal
, Scharset_id_internal
,
2224 doc
: /* Internal use only.
2225 Return charset identification number of CHARSET. */)
2226 (Lisp_Object charset
)
2230 CHECK_CHARSET_GET_ID (charset
, id
);
2231 return make_number (id
);
2234 struct charset_sort_data
2236 Lisp_Object charset
;
2242 charset_compare (const void *d1
, const void *d2
)
2244 const struct charset_sort_data
*data1
= d1
, *data2
= d2
;
2245 if (data1
->priority
!= data2
->priority
)
2246 return data1
->priority
< data2
->priority
? -1 : 1;
2250 DEFUN ("sort-charsets", Fsort_charsets
, Ssort_charsets
, 1, 1, 0,
2251 doc
: /* Sort charset list CHARSETS by a priority of each charset.
2252 Return the sorted list. CHARSETS is modified by side effects.
2253 See also `charset-priority-list' and `set-charset-priority'. */)
2254 (Lisp_Object charsets
)
2256 Lisp_Object len
= Flength (charsets
);
2257 ptrdiff_t n
= XFASTINT (len
), i
, j
;
2259 Lisp_Object tail
, elt
, attrs
;
2260 struct charset_sort_data
*sort_data
;
2261 int id
, min_id
= INT_MAX
, max_id
= INT_MIN
;
2266 SAFE_NALLOCA (sort_data
, 1, n
);
2267 for (tail
= charsets
, i
= 0; CONSP (tail
); tail
= XCDR (tail
), i
++)
2270 CHECK_CHARSET_GET_ATTR (elt
, attrs
);
2271 sort_data
[i
].charset
= elt
;
2272 sort_data
[i
].id
= id
= XINT (CHARSET_ATTR_ID (attrs
));
2278 for (done
= 0, tail
= Vcharset_ordered_list
, i
= 0;
2279 done
< n
&& CONSP (tail
); tail
= XCDR (tail
), i
++)
2282 id
= XFASTINT (elt
);
2283 if (id
>= min_id
&& id
<= max_id
)
2284 for (j
= 0; j
< n
; j
++)
2285 if (sort_data
[j
].id
== id
)
2287 sort_data
[j
].priority
= i
;
2291 qsort (sort_data
, n
, sizeof *sort_data
, charset_compare
);
2292 for (i
= 0, tail
= charsets
; CONSP (tail
); tail
= XCDR (tail
), i
++)
2293 XSETCAR (tail
, sort_data
[i
].charset
);
2302 Lisp_Object tempdir
;
2303 tempdir
= Fexpand_file_name (build_string ("charsets"), Vdata_directory
);
2304 if (! file_accessible_directory_p (SSDATA (tempdir
)))
2306 /* This used to be non-fatal (dir_warning), but it should not
2307 happen, and if it does sooner or later it will cause some
2308 obscure problem (eg bug#6401), so better abort. */
2309 fprintf (stderr
, "Error: charsets directory not found:\n\
2311 Emacs will not function correctly without the character map files.\n%s\
2312 Please check your installation!\n",
2314 egetenv("EMACSDATA") ? "The EMACSDATA environment \
2315 variable is set, maybe it has the wrong value?\n" : "");
2319 Vcharset_map_path
= list1 (tempdir
);
2324 init_charset_once (void)
2328 for (i
= 0; i
< ISO_MAX_DIMENSION
; i
++)
2329 for (j
= 0; j
< ISO_MAX_CHARS
; j
++)
2330 for (k
= 0; k
< ISO_MAX_FINAL
; k
++)
2331 iso_charset_table
[i
][j
][k
] = -1;
2333 for (i
= 0; i
< 256; i
++)
2334 emacs_mule_charset
[i
] = -1;
2336 charset_jisx0201_roman
= -1;
2337 charset_jisx0208_1978
= -1;
2338 charset_jisx0208
= -1;
2339 charset_ksc5601
= -1;
2344 /* Allocate an initial charset table that is large enough to handle
2345 Emacs while it is bootstrapping. As of September 2011, the size
2346 needs to be at least 166; make it a bit bigger to allow for future
2349 Don't make the value so small that the table is reallocated during
2350 bootstrapping, as glibc malloc calls larger than just under 64 KiB
2351 during an initial bootstrap wreak havoc after dumping; see the
2352 M_MMAP_THRESHOLD value in alloc.c, plus there is a extra overhead
2353 internal to glibc malloc and perhaps to Emacs malloc debugging. */
2354 static struct charset charset_table_init
[180];
2357 syms_of_charset (void)
2359 #include "charset.x"
2361 DEFSYM (Qcharsetp
, "charsetp");
2363 DEFSYM (Qascii
, "ascii");
2364 DEFSYM (Qunicode
, "unicode");
2365 DEFSYM (Qemacs
, "emacs");
2366 DEFSYM (Qeight_bit
, "eight-bit");
2367 DEFSYM (Qiso_8859_1
, "iso-8859-1");
2372 staticpro (&Vcharset_ordered_list
);
2373 Vcharset_ordered_list
= Qnil
;
2375 staticpro (&Viso_2022_charset_list
);
2376 Viso_2022_charset_list
= Qnil
;
2378 staticpro (&Vemacs_mule_charset_list
);
2379 Vemacs_mule_charset_list
= Qnil
;
2381 /* Don't staticpro them here. It's done in syms_of_fns. */
2382 QCtest
= intern_c_string (":test");
2383 Qeq
= intern_c_string ("eq");
2385 staticpro (&Vcharset_hash_table
);
2387 Lisp_Object args
[2];
2390 Vcharset_hash_table
= Fmake_hash_table (2, args
);
2393 charset_table
= charset_table_init
;
2394 charset_table_size
= ARRAYELTS (charset_table_init
);
2395 charset_table_used
= 0;
2397 DEFVAR_LISP ("charset-map-path", Vcharset_map_path
,
2398 doc
: /* List of directories to search for charset map files. */);
2399 Vcharset_map_path
= Qnil
;
2401 DEFVAR_BOOL ("inhibit-load-charset-map", inhibit_load_charset_map
,
2402 doc
: /* Inhibit loading of charset maps. Used when dumping Emacs. */);
2403 inhibit_load_charset_map
= 0;
2405 DEFVAR_LISP ("charset-list", Vcharset_list
,
2406 doc
: /* List of all charsets ever defined. */);
2407 Vcharset_list
= Qnil
;
2409 DEFVAR_LISP ("current-iso639-language", Vcurrent_iso639_language
,
2410 doc
: /* ISO639 language mnemonic symbol for the current language environment.
2411 If the current language environment is for multiple languages (e.g. "Latin-1"),
2412 the value may be a list of mnemonics. */);
2413 Vcurrent_iso639_language
= Qnil
;
2416 = define_charset_internal (Qascii
, 1, "\x00\x7F\0\0\0\0\0",
2417 0, 127, 'B', -1, 0, 1, 0, 0);
2419 = define_charset_internal (Qiso_8859_1
, 1, "\x00\xFF\0\0\0\0\0",
2420 0, 255, -1, -1, -1, 1, 0, 0);
2422 = define_charset_internal (Qunicode
, 3, "\x00\xFF\x00\xFF\x00\x10\0",
2423 0, MAX_UNICODE_CHAR
, -1, 0, -1, 1, 0, 0);
2425 = define_charset_internal (Qemacs
, 3, "\x00\xFF\x00\xFF\x00\x3F\0",
2426 0, MAX_5_BYTE_CHAR
, -1, 0, -1, 1, 1, 0);
2428 = define_charset_internal (Qeight_bit
, 1, "\x80\xFF\0\0\0\0\0",
2429 128, 255, -1, 0, -1, 0, 1,
2430 MAX_5_BYTE_CHAR
+ 1);
2431 charset_unibyte
= charset_iso_8859_1
;