1 /* Basic character set support.
2 Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007,
3 2008, 2009 Free Software Foundation, Inc.
4 Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
5 2005, 2006, 2007, 2008, 2009
6 National Institute of Advanced Industrial Science and Technology (AIST)
7 Registration Number H14PRO021
9 Copyright (C) 2003, 2004
10 National Institute of Advanced Industrial Science and Technology (AIST)
11 Registration Number H13PRO009
13 This file is part of GNU Emacs.
15 GNU Emacs is free software: you can redistribute it and/or modify
16 it under the terms of the GNU General Public License as published by
17 the Free Software Foundation, either version 3 of the License, or
18 (at your option) any later version.
20 GNU Emacs is distributed in the hope that it will be useful,
21 but WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 GNU General Public License for more details.
25 You should have received a copy of the GNU General Public License
26 along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
33 #include <sys/types.h>
35 #include "character.h"
41 /*** GENERAL NOTES on CODED CHARACTER SETS (CHARSETS) ***
43 A coded character set ("charset" hereafter) is a meaningful
44 collection (i.e. language, culture, functionality, etc.) of
45 characters. Emacs handles multiple charsets at once. In Emacs Lisp
46 code, a charset is represented by a symbol. In C code, a charset is
47 represented by its ID number or by a pointer to a struct charset.
49 The actual information about each charset is stored in two places.
50 Lispy information is stored in the hash table Vcharset_hash_table as
51 a vector (charset attributes). The other information is stored in
52 charset_table as a struct charset.
56 /* List of all charsets. This variable is used only from Emacs
58 Lisp_Object Vcharset_list
;
60 /* Hash table that contains attributes of each charset. Keys are
61 charset symbols, and values are vectors of charset attributes. */
62 Lisp_Object Vcharset_hash_table
;
64 /* Table of struct charset. */
65 struct charset
*charset_table
;
67 static int charset_table_size
;
68 static int charset_table_used
;
70 Lisp_Object Qcharsetp
;
72 /* Special charset symbols. */
74 Lisp_Object Qeight_bit
;
75 Lisp_Object Qiso_8859_1
;
79 /* The corresponding charsets. */
81 int charset_eight_bit
;
82 int charset_iso_8859_1
;
86 /* The other special charsets. */
87 int charset_jisx0201_roman
;
88 int charset_jisx0208_1978
;
91 /* Value of charset attribute `charset-iso-plane'. */
94 /* Charset of unibyte characters. */
97 /* List of charsets ordered by the priority. */
98 Lisp_Object Vcharset_ordered_list
;
100 /* Sub-list of Vcharset_ordered_list that contains all non-preferred
102 Lisp_Object Vcharset_non_preferred_head
;
104 /* Incremented everytime we change Vcharset_ordered_list. This is
105 unsigned short so that it fits in Lisp_Int and never matches
107 unsigned short charset_ordered_list_tick
;
109 /* List of iso-2022 charsets. */
110 Lisp_Object Viso_2022_charset_list
;
112 /* List of emacs-mule charsets. */
113 Lisp_Object Vemacs_mule_charset_list
;
115 struct charset
*emacs_mule_charset
[256];
117 /* Mapping table from ISO2022's charset (specified by DIMENSION,
118 CHARS, and FINAL-CHAR) to Emacs' charset. */
119 int iso_charset_table
[ISO_MAX_DIMENSION
][ISO_MAX_CHARS
][ISO_MAX_FINAL
];
121 Lisp_Object Vcharset_map_path
;
123 /* If nonzero, don't load charset maps. */
124 int inhibit_load_charset_map
;
126 Lisp_Object Vcurrent_iso639_language
;
128 /* Defined in chartab.c */
130 map_char_table_for_charset
P_ ((void (*c_function
) (Lisp_Object
, Lisp_Object
),
131 Lisp_Object function
, Lisp_Object table
,
132 Lisp_Object arg
, struct charset
*charset
,
133 unsigned from
, unsigned to
));
135 #define CODE_POINT_TO_INDEX(charset, code) \
136 ((charset)->code_linear_p \
137 ? (code) - (charset)->min_code \
138 : (((charset)->code_space_mask[(code) >> 24] & 0x8) \
139 && ((charset)->code_space_mask[((code) >> 16) & 0xFF] & 0x4) \
140 && ((charset)->code_space_mask[((code) >> 8) & 0xFF] & 0x2) \
141 && ((charset)->code_space_mask[(code) & 0xFF] & 0x1)) \
142 ? (((((code) >> 24) - (charset)->code_space[12]) \
143 * (charset)->code_space[11]) \
144 + (((((code) >> 16) & 0xFF) - (charset)->code_space[8]) \
145 * (charset)->code_space[7]) \
146 + (((((code) >> 8) & 0xFF) - (charset)->code_space[4]) \
147 * (charset)->code_space[3]) \
148 + (((code) & 0xFF) - (charset)->code_space[0]) \
149 - ((charset)->char_index_offset)) \
153 /* Convert the character index IDX to code-point CODE for CHARSET.
154 It is assumed that IDX is in a valid range. */
156 #define INDEX_TO_CODE_POINT(charset, idx) \
157 ((charset)->code_linear_p \
158 ? (idx) + (charset)->min_code \
159 : (idx += (charset)->char_index_offset, \
160 (((charset)->code_space[0] + (idx) % (charset)->code_space[2]) \
161 | (((charset)->code_space[4] \
162 + ((idx) / (charset)->code_space[3] % (charset)->code_space[6])) \
164 | (((charset)->code_space[8] \
165 + ((idx) / (charset)->code_space[7] % (charset)->code_space[10])) \
167 | (((charset)->code_space[12] + ((idx) / (charset)->code_space[11])) \
170 /* Structure to hold mapping tables for a charset. Used by temacs
171 invoked for dumping. */
175 /* The current charset for which the following tables are setup. */
176 struct charset
*current
;
178 /* 1 iff the following table is used for encoder. */
181 /* When the following table is used for encoding, mininum and
182 maxinum character of the current charset. */
183 int min_char
, max_char
;
185 /* A Unicode character correspoinding to the code indice 0 (i.e. the
186 minimum code-point) of the current charset, or -1 if the code
187 indice 0 is not a Unicode character. This is checked when
188 table.encoder[CHAR] is zero. */
192 /* Table mapping code-indices (not code-points) of the current
193 charset to Unicode characters. If decoder[CHAR] is -1, CHAR
194 doesn't belong to the current charset. */
195 int decoder
[0x10000];
196 /* Table mapping Unicode characters to code-indices of the current
197 charset. The first 0x10000 elements are for BMP (0..0xFFFF),
198 and the last 0x10000 are for SMP (0x10000..0x1FFFF) or SIP
199 (0x20000..0x2FFFF). Note that there is no charset map that
200 uses both SMP and SIP. */
201 unsigned short encoder
[0x20000];
203 } *temp_charset_work
;
205 #define SET_TEMP_CHARSET_WORK_ENCODER(C, CODE) \
208 temp_charset_work->zero_index_char = (C); \
209 else if ((C) < 0x20000) \
210 temp_charset_work->table.encoder[(C)] = (CODE); \
212 temp_charset_work->table.encoder[(C) - 0x10000] = (CODE); \
215 #define GET_TEMP_CHARSET_WORK_ENCODER(C) \
216 ((C) == temp_charset_work->zero_index_char ? 0 \
217 : (C) < 0x20000 ? (temp_charset_work->table.encoder[(C)] \
218 ? (int) temp_charset_work->table.encoder[(C)] : -1) \
219 : temp_charset_work->table.encoder[(C) - 0x10000] \
220 ? temp_charset_work->table.encoder[(C) - 0x10000] : -1)
222 #define SET_TEMP_CHARSET_WORK_DECODER(C, CODE) \
223 (temp_charset_work->table.decoder[(CODE)] = (C))
225 #define GET_TEMP_CHARSET_WORK_DECODER(CODE) \
226 (temp_charset_work->table.decoder[(CODE)])
229 /* Set to 1 to warn that a charset map is loaded and thus a buffer
230 text and a string data may be relocated. */
231 int charset_map_loaded
;
233 struct charset_map_entries
239 struct charset_map_entries
*next
;
242 /* Load the mapping information of CHARSET from ENTRIES for
243 initializing (CONTROL_FLAG == 0), decoding (CONTROL_FLAG == 1), and
244 encoding (CONTROL_FLAG == 2).
246 If CONTROL_FLAG is 0, setup CHARSET->min_char, CHARSET->max_char,
247 and CHARSET->fast_map.
249 If CONTROL_FLAG is 1, setup the following tables according to
250 CHARSET->method and inhibit_load_charset_map.
252 CHARSET->method | inhibit_lcm == 0 | inhibit_lcm == 1
253 ----------------------+--------------------+---------------------------
254 CHARSET_METHOD_MAP | CHARSET->decoder | temp_charset_work->decoder
255 ----------------------+--------------------+---------------------------
256 CHARSET_METHOD_OFFSET | Vchar_unify_table | temp_charset_work->decoder
258 If CONTROL_FLAG is 2, setup the following tables.
260 CHARSET->method | inhibit_lcm == 0 | inhibit_lcm == 1
261 ----------------------+--------------------+---------------------------
262 CHARSET_METHOD_MAP | CHARSET->encoder | temp_charset_work->encoder
263 ----------------------+--------------------+--------------------------
264 CHARSET_METHOD_OFFSET | CHARSET->deunifier | temp_charset_work->encoder
268 load_charset_map (charset
, entries
, n_entries
, control_flag
)
269 struct charset
*charset
;
270 struct charset_map_entries
*entries
;
274 Lisp_Object vec
, table
;
275 unsigned max_code
= CHARSET_MAX_CODE (charset
);
276 int ascii_compatible_p
= charset
->ascii_compatible_p
;
277 int min_char
, max_char
, nonascii_min_char
;
279 unsigned char *fast_map
= charset
->fast_map
;
286 if (! inhibit_load_charset_map
)
288 if (control_flag
== 1)
290 if (charset
->method
== CHARSET_METHOD_MAP
)
292 int n
= CODE_POINT_TO_INDEX (charset
, max_code
) + 1;
294 vec
= CHARSET_DECODER (charset
)
295 = Fmake_vector (make_number (n
), make_number (-1));
299 char_table_set_range (Vchar_unify_table
,
300 charset
->min_char
, charset
->max_char
,
306 table
= Fmake_char_table (Qnil
, Qnil
);
307 if (charset
->method
== CHARSET_METHOD_MAP
)
308 CHARSET_ENCODER (charset
) = table
;
310 CHARSET_DEUNIFIER (charset
) = table
;
315 if (! temp_charset_work
)
316 temp_charset_work
= malloc (sizeof (*temp_charset_work
));
317 if (control_flag
== 1)
319 memset (temp_charset_work
->table
.decoder
, -1,
320 sizeof (int) * 0x10000);
321 temp_charset_work
->for_encoder
= 0;
325 memset (temp_charset_work
->table
.encoder
, 0,
326 sizeof (unsigned short) * 0x20000);
327 temp_charset_work
->zero_index_char
= -1;
329 temp_charset_work
->current
= charset
;
330 temp_charset_work
->for_encoder
= (control_flag
== 2);
333 charset_map_loaded
= 1;
336 min_char
= max_char
= entries
->entry
[0].c
;
337 nonascii_min_char
= MAX_CHAR
;
338 for (i
= 0; i
< n_entries
; i
++)
341 int from_index
, to_index
;
343 int idx
= i
% 0x10000;
345 if (i
> 0 && idx
== 0)
346 entries
= entries
->next
;
347 from
= entries
->entry
[idx
].from
;
348 to
= entries
->entry
[idx
].to
;
349 from_c
= entries
->entry
[idx
].c
;
350 from_index
= CODE_POINT_TO_INDEX (charset
, from
);
353 to_index
= from_index
;
358 to_index
= CODE_POINT_TO_INDEX (charset
, to
);
359 to_c
= from_c
+ (to_index
- from_index
);
361 if (from_index
< 0 || to_index
< 0)
366 else if (from_c
< min_char
)
369 if (control_flag
== 1)
371 if (charset
->method
== CHARSET_METHOD_MAP
)
372 for (; from_index
<= to_index
; from_index
++, from_c
++)
373 ASET (vec
, from_index
, make_number (from_c
));
375 for (; from_index
<= to_index
; from_index
++, from_c
++)
376 CHAR_TABLE_SET (Vchar_unify_table
,
377 CHARSET_CODE_OFFSET (charset
) + from_index
,
378 make_number (from_c
));
380 else if (control_flag
== 2)
382 if (charset
->method
== CHARSET_METHOD_MAP
383 && CHARSET_COMPACT_CODES_P (charset
))
384 for (; from_index
<= to_index
; from_index
++, from_c
++)
386 unsigned code
= INDEX_TO_CODE_POINT (charset
, from_index
);
388 if (NILP (CHAR_TABLE_REF (table
, from_c
)))
389 CHAR_TABLE_SET (table
, from_c
, make_number (code
));
392 for (; from_index
<= to_index
; from_index
++, from_c
++)
394 if (NILP (CHAR_TABLE_REF (table
, from_c
)))
395 CHAR_TABLE_SET (table
, from_c
, make_number (from_index
));
398 else if (control_flag
== 3)
399 for (; from_index
<= to_index
; from_index
++, from_c
++)
400 SET_TEMP_CHARSET_WORK_DECODER (from_c
, from_index
);
401 else if (control_flag
== 4)
402 for (; from_index
<= to_index
; from_index
++, from_c
++)
403 SET_TEMP_CHARSET_WORK_ENCODER (from_c
, from_index
);
404 else /* control_flag == 0 */
406 if (ascii_compatible_p
)
408 if (! ASCII_BYTE_P (from_c
))
410 if (from_c
< nonascii_min_char
)
411 nonascii_min_char
= from_c
;
413 else if (! ASCII_BYTE_P (to_c
))
415 nonascii_min_char
= 0x80;
419 for (; from_c
<= to_c
; from_c
++)
420 CHARSET_FAST_MAP_SET (from_c
, fast_map
);
424 if (control_flag
== 0)
426 CHARSET_MIN_CHAR (charset
) = (ascii_compatible_p
427 ? nonascii_min_char
: min_char
);
428 CHARSET_MAX_CHAR (charset
) = max_char
;
430 else if (control_flag
== 4)
432 temp_charset_work
->min_char
= min_char
;
433 temp_charset_work
->max_char
= max_char
;
438 /* Read a hexadecimal number (preceded by "0x") from the file FP while
439 paying attention to comment charcter '#'. */
441 static INLINE
unsigned
449 while ((c
= getc (fp
)) != EOF
)
453 while ((c
= getc (fp
)) != EOF
&& c
!= '\n');
457 if ((c
= getc (fp
)) == EOF
|| c
== 'x')
469 while ((c
= getc (fp
)) != EOF
&& isxdigit (c
))
471 | (c
<= '9' ? c
- '0' : c
<= 'F' ? c
- 'A' + 10 : c
- 'a' + 10));
473 while ((c
= getc (fp
)) != EOF
&& isdigit (c
))
474 n
= (n
* 10) + c
- '0';
480 extern Lisp_Object Qfile_name_handler_alist
;
482 /* Return a mapping vector for CHARSET loaded from MAPFILE.
483 Each line of MAPFILE has this form
485 where 0xAAAA is a code-point and 0xCCCC is the corresponding
486 character code, or this form
488 where 0xAAAA and 0xBBBB are code-points specifying a range, and
489 0xCCCC is the first character code of the range.
491 The returned vector has this form:
492 [ CODE1 CHAR1 CODE2 CHAR2 .... ]
493 where CODE1 is a code-point or a cons of code-points specifying a
496 Note that this function uses `openp' to open MAPFILE but ignores
497 `file-name-handler-alist' to avoid running any Lisp code. */
499 extern void add_to_log
P_ ((char *, Lisp_Object
, Lisp_Object
));
502 load_charset_map_from_file (charset
, mapfile
, control_flag
)
503 struct charset
*charset
;
507 unsigned min_code
= CHARSET_MIN_CODE (charset
);
508 unsigned max_code
= CHARSET_MAX_CODE (charset
);
512 Lisp_Object suffixes
;
513 struct charset_map_entries
*head
, *entries
;
515 int count
= SPECPDL_INDEX ();
517 suffixes
= Fcons (build_string (".map"),
518 Fcons (build_string (".TXT"), Qnil
));
520 specbind (Qfile_name_handler_alist
, Qnil
);
521 fd
= openp (Vcharset_map_path
, mapfile
, suffixes
, NULL
, Qnil
);
522 unbind_to (count
, Qnil
);
524 || ! (fp
= fdopen (fd
, "r")))
525 error ("Failure in loading charset map: %S", SDATA (mapfile
));
527 head
= entries
= ((struct charset_map_entries
*)
528 alloca (sizeof (struct charset_map_entries
)));
537 from
= read_hex (fp
, &eof
);
540 if (getc (fp
) == '-')
541 to
= read_hex (fp
, &eof
);
544 c
= (int) read_hex (fp
, &eof
);
546 if (from
< min_code
|| to
> max_code
|| from
> to
|| c
> MAX_CHAR
)
549 if (n_entries
> 0 && (n_entries
% 0x10000) == 0)
551 entries
->next
= ((struct charset_map_entries
*)
552 alloca (sizeof (struct charset_map_entries
)));
553 entries
= entries
->next
;
555 idx
= n_entries
% 0x10000;
556 entries
->entry
[idx
].from
= from
;
557 entries
->entry
[idx
].to
= to
;
558 entries
->entry
[idx
].c
= c
;
564 load_charset_map (charset
, head
, n_entries
, control_flag
);
568 load_charset_map_from_vector (charset
, vec
, control_flag
)
569 struct charset
*charset
;
573 unsigned min_code
= CHARSET_MIN_CODE (charset
);
574 unsigned max_code
= CHARSET_MAX_CODE (charset
);
575 struct charset_map_entries
*head
, *entries
;
577 int len
= ASIZE (vec
);
582 add_to_log ("Failure in loading charset map: %V", vec
, Qnil
);
586 head
= entries
= ((struct charset_map_entries
*)
587 alloca (sizeof (struct charset_map_entries
)));
589 for (i
= 0; i
< len
; i
+= 2)
591 Lisp_Object val
, val2
;
603 from
= XFASTINT (val
);
604 to
= XFASTINT (val2
);
609 from
= to
= XFASTINT (val
);
611 val
= AREF (vec
, i
+ 1);
615 if (from
< min_code
|| to
> max_code
|| from
> to
|| c
> MAX_CHAR
)
618 if (n_entries
> 0 && (n_entries
% 0x10000) == 0)
620 entries
->next
= ((struct charset_map_entries
*)
621 alloca (sizeof (struct charset_map_entries
)));
622 entries
= entries
->next
;
624 idx
= n_entries
% 0x10000;
625 entries
->entry
[idx
].from
= from
;
626 entries
->entry
[idx
].to
= to
;
627 entries
->entry
[idx
].c
= c
;
631 load_charset_map (charset
, head
, n_entries
, control_flag
);
635 /* Load a mapping table for CHARSET. CONTROL-FLAG tells what kind of
636 map it is (see the comment of load_charset_map for the detail). */
639 load_charset (charset
, control_flag
)
640 struct charset
*charset
;
645 if (inhibit_load_charset_map
647 && charset
== temp_charset_work
->current
648 && (control_flag
== 2 == temp_charset_work
->for_encoder
))
651 if (CHARSET_METHOD (charset
) == CHARSET_METHOD_MAP
)
652 map
= CHARSET_MAP (charset
);
653 else if (CHARSET_UNIFIED_P (charset
))
654 map
= CHARSET_UNIFY_MAP (charset
);
656 load_charset_map_from_file (charset
, map
, control_flag
);
658 load_charset_map_from_vector (charset
, map
, control_flag
);
662 DEFUN ("charsetp", Fcharsetp
, Scharsetp
, 1, 1, 0,
663 doc
: /* Return non-nil if and only if OBJECT is a charset.*/)
667 return (CHARSETP (object
) ? Qt
: Qnil
);
671 void map_charset_for_dump
P_ ((void (*c_function
) (Lisp_Object
, Lisp_Object
),
672 Lisp_Object function
, Lisp_Object arg
,
673 unsigned from
, unsigned to
));
676 map_charset_for_dump (c_function
, function
, arg
, from
, to
)
677 void (*c_function
) (Lisp_Object
, Lisp_Object
);
678 Lisp_Object function
, arg
;
681 int from_idx
= CODE_POINT_TO_INDEX (temp_charset_work
->current
, from
);
682 int to_idx
= CODE_POINT_TO_INDEX (temp_charset_work
->current
, to
);
687 range
= Fcons (Qnil
, Qnil
);
690 c
= temp_charset_work
->min_char
;
691 stop
= (temp_charset_work
->max_char
< 0x20000
692 ? temp_charset_work
->max_char
: 0xFFFF);
696 int index
= GET_TEMP_CHARSET_WORK_ENCODER (c
);
698 if (index
>= from_idx
&& index
<= to_idx
)
700 if (NILP (XCAR (range
)))
701 XSETCAR (range
, make_number (c
));
703 else if (! NILP (XCAR (range
)))
705 XSETCDR (range
, make_number (c
- 1));
707 (*c_function
) (arg
, range
);
709 call2 (function
, range
, arg
);
710 XSETCAR (range
, Qnil
);
714 if (c
== temp_charset_work
->max_char
)
716 if (! NILP (XCAR (range
)))
718 XSETCDR (range
, make_number (c
));
720 (*c_function
) (arg
, range
);
722 call2 (function
, range
, arg
);
727 stop
= temp_charset_work
->max_char
;
734 map_charset_chars (c_function
, function
, arg
,
736 void (*c_function
) P_ ((Lisp_Object
, Lisp_Object
));
737 Lisp_Object function
, arg
;
738 struct charset
*charset
;
744 partial
= (from
> CHARSET_MIN_CODE (charset
)
745 || to
< CHARSET_MAX_CODE (charset
));
747 if (CHARSET_METHOD (charset
) == CHARSET_METHOD_OFFSET
)
749 int from_idx
= CODE_POINT_TO_INDEX (charset
, from
);
750 int to_idx
= CODE_POINT_TO_INDEX (charset
, to
);
751 int from_c
= from_idx
+ CHARSET_CODE_OFFSET (charset
);
752 int to_c
= to_idx
+ CHARSET_CODE_OFFSET (charset
);
754 if (CHARSET_UNIFIED_P (charset
))
756 if (! CHAR_TABLE_P (CHARSET_DEUNIFIER (charset
)))
757 load_charset (charset
, 2);
758 if (CHAR_TABLE_P (CHARSET_DEUNIFIER (charset
)))
759 map_char_table_for_charset (c_function
, function
,
760 CHARSET_DEUNIFIER (charset
), arg
,
761 partial
? charset
: NULL
, from
, to
);
763 map_charset_for_dump (c_function
, function
, arg
, from
, to
);
766 range
= Fcons (make_number (from_c
), make_number (to_c
));
768 (*c_function
) (arg
, range
);
770 call2 (function
, range
, arg
);
772 else if (CHARSET_METHOD (charset
) == CHARSET_METHOD_MAP
)
774 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset
)))
775 load_charset (charset
, 2);
776 if (CHAR_TABLE_P (CHARSET_ENCODER (charset
)))
777 map_char_table_for_charset (c_function
, function
,
778 CHARSET_ENCODER (charset
), arg
,
779 partial
? charset
: NULL
, from
, to
);
781 map_charset_for_dump (c_function
, function
, arg
, from
, to
);
783 else if (CHARSET_METHOD (charset
) == CHARSET_METHOD_SUBSET
)
785 Lisp_Object subset_info
;
788 subset_info
= CHARSET_SUBSET (charset
);
789 charset
= CHARSET_FROM_ID (XFASTINT (AREF (subset_info
, 0)));
790 offset
= XINT (AREF (subset_info
, 3));
792 if (from
< XFASTINT (AREF (subset_info
, 1)))
793 from
= XFASTINT (AREF (subset_info
, 1));
795 if (to
> XFASTINT (AREF (subset_info
, 2)))
796 to
= XFASTINT (AREF (subset_info
, 2));
797 map_charset_chars (c_function
, function
, arg
, charset
, from
, to
);
799 else /* i.e. CHARSET_METHOD_SUPERSET */
803 for (parents
= CHARSET_SUPERSET (charset
); CONSP (parents
);
804 parents
= XCDR (parents
))
807 unsigned this_from
, this_to
;
809 charset
= CHARSET_FROM_ID (XFASTINT (XCAR (XCAR (parents
))));
810 offset
= XINT (XCDR (XCAR (parents
)));
811 this_from
= from
- offset
;
812 this_to
= to
- offset
;
813 if (this_from
< CHARSET_MIN_CODE (charset
))
814 this_from
= CHARSET_MIN_CODE (charset
);
815 if (this_to
> CHARSET_MAX_CODE (charset
))
816 this_to
= CHARSET_MAX_CODE (charset
);
817 map_charset_chars (c_function
, function
, arg
, charset
,
823 DEFUN ("map-charset-chars", Fmap_charset_chars
, Smap_charset_chars
, 2, 5, 0,
824 doc
: /* Call FUNCTION for all characters in CHARSET.
825 FUNCTION is called with an argument RANGE and the optional 3rd
828 RANGE is a cons (FROM . TO), where FROM and TO indicate a range of
829 characters contained in CHARSET.
831 The optional 4th and 5th arguments FROM-CODE and TO-CODE specify the
832 range of code points of target characters. */)
833 (function
, charset
, arg
, from_code
, to_code
)
834 Lisp_Object function
, charset
, arg
, from_code
, to_code
;
839 CHECK_CHARSET_GET_CHARSET (charset
, cs
);
840 if (NILP (from_code
))
841 from
= CHARSET_MIN_CODE (cs
);
844 CHECK_NATNUM (from_code
);
845 from
= XINT (from_code
);
846 if (from
< CHARSET_MIN_CODE (cs
))
847 from
= CHARSET_MIN_CODE (cs
);
850 to
= CHARSET_MAX_CODE (cs
);
853 CHECK_NATNUM (to_code
);
855 if (to
> CHARSET_MAX_CODE (cs
))
856 to
= CHARSET_MAX_CODE (cs
);
858 map_charset_chars (NULL
, function
, arg
, cs
, from
, to
);
863 /* Define a charset according to the arguments. The Nth argument is
864 the Nth attribute of the charset (the last attribute `charset-id'
865 is not included). See the docstring of `define-charset' for the
868 DEFUN ("define-charset-internal", Fdefine_charset_internal
,
869 Sdefine_charset_internal
, charset_arg_max
, MANY
, 0,
870 doc
: /* For internal use only.
871 usage: (define-charset-internal ...) */)
876 /* Charset attr vector. */
880 struct Lisp_Hash_Table
*hash_table
= XHASH_TABLE (Vcharset_hash_table
);
882 struct charset charset
;
885 int new_definition_p
;
888 if (nargs
!= charset_arg_max
)
889 return Fsignal (Qwrong_number_of_arguments
,
890 Fcons (intern ("define-charset-internal"),
891 make_number (nargs
)));
893 attrs
= Fmake_vector (make_number (charset_attr_max
), Qnil
);
895 CHECK_SYMBOL (args
[charset_arg_name
]);
896 ASET (attrs
, charset_name
, args
[charset_arg_name
]);
898 val
= args
[charset_arg_code_space
];
899 for (i
= 0, dimension
= 0, nchars
= 1; i
< 4; i
++)
901 int min_byte
, max_byte
;
903 min_byte
= XINT (Faref (val
, make_number (i
* 2)));
904 max_byte
= XINT (Faref (val
, make_number (i
* 2 + 1)));
905 if (min_byte
< 0 || min_byte
> max_byte
|| max_byte
>= 256)
906 error ("Invalid :code-space value");
907 charset
.code_space
[i
* 4] = min_byte
;
908 charset
.code_space
[i
* 4 + 1] = max_byte
;
909 charset
.code_space
[i
* 4 + 2] = max_byte
- min_byte
+ 1;
910 nchars
*= charset
.code_space
[i
* 4 + 2];
911 charset
.code_space
[i
* 4 + 3] = nchars
;
916 val
= args
[charset_arg_dimension
];
918 charset
.dimension
= dimension
;
922 charset
.dimension
= XINT (val
);
923 if (charset
.dimension
< 1 || charset
.dimension
> 4)
924 args_out_of_range_3 (val
, make_number (1), make_number (4));
927 charset
.code_linear_p
928 = (charset
.dimension
== 1
929 || (charset
.code_space
[2] == 256
930 && (charset
.dimension
== 2
931 || (charset
.code_space
[6] == 256
932 && (charset
.dimension
== 3
933 || charset
.code_space
[10] == 256)))));
935 if (! charset
.code_linear_p
)
937 charset
.code_space_mask
= (unsigned char *) xmalloc (256);
938 bzero (charset
.code_space_mask
, 256);
939 for (i
= 0; i
< 4; i
++)
940 for (j
= charset
.code_space
[i
* 4]; j
<= charset
.code_space
[i
* 4 + 1];
942 charset
.code_space_mask
[j
] |= (1 << i
);
945 charset
.iso_chars_96
= charset
.code_space
[2] == 96;
947 charset
.min_code
= (charset
.code_space
[0]
948 | (charset
.code_space
[4] << 8)
949 | (charset
.code_space
[8] << 16)
950 | (charset
.code_space
[12] << 24));
951 charset
.max_code
= (charset
.code_space
[1]
952 | (charset
.code_space
[5] << 8)
953 | (charset
.code_space
[9] << 16)
954 | (charset
.code_space
[13] << 24));
955 charset
.char_index_offset
= 0;
957 val
= args
[charset_arg_min_code
];
967 CHECK_NUMBER_CAR (val
);
968 CHECK_NUMBER_CDR (val
);
969 code
= (XINT (XCAR (val
)) << 16) | (XINT (XCDR (val
)));
971 if (code
< charset
.min_code
972 || code
> charset
.max_code
)
973 args_out_of_range_3 (make_number (charset
.min_code
),
974 make_number (charset
.max_code
), val
);
975 charset
.char_index_offset
= CODE_POINT_TO_INDEX (&charset
, code
);
976 charset
.min_code
= code
;
979 val
= args
[charset_arg_max_code
];
989 CHECK_NUMBER_CAR (val
);
990 CHECK_NUMBER_CDR (val
);
991 code
= (XINT (XCAR (val
)) << 16) | (XINT (XCDR (val
)));
993 if (code
< charset
.min_code
994 || code
> charset
.max_code
)
995 args_out_of_range_3 (make_number (charset
.min_code
),
996 make_number (charset
.max_code
), val
);
997 charset
.max_code
= code
;
1000 charset
.compact_codes_p
= charset
.max_code
< 0x10000;
1002 val
= args
[charset_arg_invalid_code
];
1005 if (charset
.min_code
> 0)
1006 charset
.invalid_code
= 0;
1009 XSETINT (val
, charset
.max_code
+ 1);
1010 if (XINT (val
) == charset
.max_code
+ 1)
1011 charset
.invalid_code
= charset
.max_code
+ 1;
1013 error ("Attribute :invalid-code must be specified");
1019 charset
.invalid_code
= XFASTINT (val
);
1022 val
= args
[charset_arg_iso_final
];
1024 charset
.iso_final
= -1;
1028 if (XINT (val
) < '0' || XINT (val
) > 127)
1029 error ("Invalid iso-final-char: %d", XINT (val
));
1030 charset
.iso_final
= XINT (val
);
1033 val
= args
[charset_arg_iso_revision
];
1035 charset
.iso_revision
= -1;
1039 if (XINT (val
) > 63)
1040 args_out_of_range (make_number (63), val
);
1041 charset
.iso_revision
= XINT (val
);
1044 val
= args
[charset_arg_emacs_mule_id
];
1046 charset
.emacs_mule_id
= -1;
1050 if ((XINT (val
) > 0 && XINT (val
) <= 128) || XINT (val
) >= 256)
1051 error ("Invalid emacs-mule-id: %d", XINT (val
));
1052 charset
.emacs_mule_id
= XINT (val
);
1055 charset
.ascii_compatible_p
= ! NILP (args
[charset_arg_ascii_compatible_p
]);
1057 charset
.supplementary_p
= ! NILP (args
[charset_arg_supplementary_p
]);
1059 charset
.unified_p
= 0;
1061 bzero (charset
.fast_map
, sizeof (charset
.fast_map
));
1063 if (! NILP (args
[charset_arg_code_offset
]))
1065 val
= args
[charset_arg_code_offset
];
1068 charset
.method
= CHARSET_METHOD_OFFSET
;
1069 charset
.code_offset
= XINT (val
);
1071 i
= CODE_POINT_TO_INDEX (&charset
, charset
.min_code
);
1072 charset
.min_char
= i
+ charset
.code_offset
;
1073 i
= CODE_POINT_TO_INDEX (&charset
, charset
.max_code
);
1074 charset
.max_char
= i
+ charset
.code_offset
;
1075 if (charset
.max_char
> MAX_CHAR
)
1076 error ("Unsupported max char: %d", charset
.max_char
);
1078 i
= (charset
.min_char
>> 7) << 7;
1079 for (; i
< 0x10000 && i
<= charset
.max_char
; i
+= 128)
1080 CHARSET_FAST_MAP_SET (i
, charset
.fast_map
);
1081 i
= (i
>> 12) << 12;
1082 for (; i
<= charset
.max_char
; i
+= 0x1000)
1083 CHARSET_FAST_MAP_SET (i
, charset
.fast_map
);
1085 else if (! NILP (args
[charset_arg_map
]))
1087 val
= args
[charset_arg_map
];
1088 ASET (attrs
, charset_map
, val
);
1089 charset
.method
= CHARSET_METHOD_MAP
;
1091 else if (! NILP (args
[charset_arg_subset
]))
1094 Lisp_Object parent_min_code
, parent_max_code
, parent_code_offset
;
1095 struct charset
*parent_charset
;
1097 val
= args
[charset_arg_subset
];
1098 parent
= Fcar (val
);
1099 CHECK_CHARSET_GET_CHARSET (parent
, parent_charset
);
1100 parent_min_code
= Fnth (make_number (1), val
);
1101 CHECK_NATNUM (parent_min_code
);
1102 parent_max_code
= Fnth (make_number (2), val
);
1103 CHECK_NATNUM (parent_max_code
);
1104 parent_code_offset
= Fnth (make_number (3), val
);
1105 CHECK_NUMBER (parent_code_offset
);
1106 val
= Fmake_vector (make_number (4), Qnil
);
1107 ASET (val
, 0, make_number (parent_charset
->id
));
1108 ASET (val
, 1, parent_min_code
);
1109 ASET (val
, 2, parent_max_code
);
1110 ASET (val
, 3, parent_code_offset
);
1111 ASET (attrs
, charset_subset
, val
);
1113 charset
.method
= CHARSET_METHOD_SUBSET
;
1114 /* Here, we just copy the parent's fast_map. It's not accurate,
1115 but at least it works for quickly detecting which character
1116 DOESN'T belong to this charset. */
1117 for (i
= 0; i
< 190; i
++)
1118 charset
.fast_map
[i
] = parent_charset
->fast_map
[i
];
1120 /* We also copy these for parents. */
1121 charset
.min_char
= parent_charset
->min_char
;
1122 charset
.max_char
= parent_charset
->max_char
;
1124 else if (! NILP (args
[charset_arg_superset
]))
1126 val
= args
[charset_arg_superset
];
1127 charset
.method
= CHARSET_METHOD_SUPERSET
;
1128 val
= Fcopy_sequence (val
);
1129 ASET (attrs
, charset_superset
, val
);
1131 charset
.min_char
= MAX_CHAR
;
1132 charset
.max_char
= 0;
1133 for (; ! NILP (val
); val
= Fcdr (val
))
1135 Lisp_Object elt
, car_part
, cdr_part
;
1136 int this_id
, offset
;
1137 struct charset
*this_charset
;
1142 car_part
= XCAR (elt
);
1143 cdr_part
= XCDR (elt
);
1144 CHECK_CHARSET_GET_ID (car_part
, this_id
);
1145 CHECK_NUMBER (cdr_part
);
1146 offset
= XINT (cdr_part
);
1150 CHECK_CHARSET_GET_ID (elt
, this_id
);
1153 XSETCAR (val
, Fcons (make_number (this_id
), make_number (offset
)));
1155 this_charset
= CHARSET_FROM_ID (this_id
);
1156 if (charset
.min_char
> this_charset
->min_char
)
1157 charset
.min_char
= this_charset
->min_char
;
1158 if (charset
.max_char
< this_charset
->max_char
)
1159 charset
.max_char
= this_charset
->max_char
;
1160 for (i
= 0; i
< 190; i
++)
1161 charset
.fast_map
[i
] |= this_charset
->fast_map
[i
];
1165 error ("None of :code-offset, :map, :parents are specified");
1167 val
= args
[charset_arg_unify_map
];
1168 if (! NILP (val
) && !STRINGP (val
))
1170 ASET (attrs
, charset_unify_map
, val
);
1172 CHECK_LIST (args
[charset_arg_plist
]);
1173 ASET (attrs
, charset_plist
, args
[charset_arg_plist
]);
1175 charset
.hash_index
= hash_lookup (hash_table
, args
[charset_arg_name
],
1177 if (charset
.hash_index
>= 0)
1179 new_definition_p
= 0;
1180 id
= XFASTINT (CHARSET_SYMBOL_ID (args
[charset_arg_name
]));
1181 HASH_VALUE (hash_table
, charset
.hash_index
) = attrs
;
1185 charset
.hash_index
= hash_put (hash_table
, args
[charset_arg_name
], attrs
,
1187 if (charset_table_used
== charset_table_size
)
1189 struct charset
*new_table
1190 = (struct charset
*) xmalloc (sizeof (struct charset
)
1191 * (charset_table_size
+ 16));
1192 bcopy (charset_table
, new_table
,
1193 sizeof (struct charset
) * charset_table_size
);
1194 charset_table_size
+= 16;
1195 charset_table
= new_table
;
1197 id
= charset_table_used
++;
1198 new_definition_p
= 1;
1201 ASET (attrs
, charset_id
, make_number (id
));
1203 charset_table
[id
] = charset
;
1205 if (charset
.method
== CHARSET_METHOD_MAP
)
1207 load_charset (&charset
, 0);
1208 charset_table
[id
] = charset
;
1211 if (charset
.iso_final
>= 0)
1213 ISO_CHARSET_TABLE (charset
.dimension
, charset
.iso_chars_96
,
1214 charset
.iso_final
) = id
;
1215 if (new_definition_p
)
1216 Viso_2022_charset_list
= nconc2 (Viso_2022_charset_list
,
1217 Fcons (make_number (id
), Qnil
));
1218 if (ISO_CHARSET_TABLE (1, 0, 'J') == id
)
1219 charset_jisx0201_roman
= id
;
1220 else if (ISO_CHARSET_TABLE (2, 0, '@') == id
)
1221 charset_jisx0208_1978
= id
;
1222 else if (ISO_CHARSET_TABLE (2, 0, 'B') == id
)
1223 charset_jisx0208
= id
;
1226 if (charset
.emacs_mule_id
>= 0)
1228 emacs_mule_charset
[charset
.emacs_mule_id
] = CHARSET_FROM_ID (id
);
1229 if (charset
.emacs_mule_id
< 0xA0)
1230 emacs_mule_bytes
[charset
.emacs_mule_id
] = charset
.dimension
+ 1;
1232 emacs_mule_bytes
[charset
.emacs_mule_id
] = charset
.dimension
+ 2;
1233 if (new_definition_p
)
1234 Vemacs_mule_charset_list
= nconc2 (Vemacs_mule_charset_list
,
1235 Fcons (make_number (id
), Qnil
));
1238 if (new_definition_p
)
1240 Vcharset_list
= Fcons (args
[charset_arg_name
], Vcharset_list
);
1241 if (charset
.supplementary_p
)
1242 Vcharset_ordered_list
= nconc2 (Vcharset_ordered_list
,
1243 Fcons (make_number (id
), Qnil
));
1248 for (tail
= Vcharset_ordered_list
; CONSP (tail
); tail
= XCDR (tail
))
1250 struct charset
*cs
= CHARSET_FROM_ID (XINT (XCAR (tail
)));
1252 if (cs
->supplementary_p
)
1255 if (EQ (tail
, Vcharset_ordered_list
))
1256 Vcharset_ordered_list
= Fcons (make_number (id
),
1257 Vcharset_ordered_list
);
1258 else if (NILP (tail
))
1259 Vcharset_ordered_list
= nconc2 (Vcharset_ordered_list
,
1260 Fcons (make_number (id
), Qnil
));
1263 val
= Fcons (XCAR (tail
), XCDR (tail
));
1264 XSETCDR (tail
, val
);
1265 XSETCAR (tail
, make_number (id
));
1268 charset_ordered_list_tick
++;
1275 /* Same as Fdefine_charset_internal but arguments are more convenient
1276 to call from C (typically in syms_of_charset). This can define a
1277 charset of `offset' method only. Return the ID of the new
1281 define_charset_internal (name
, dimension
, code_space
, min_code
, max_code
,
1282 iso_final
, iso_revision
, emacs_mule_id
,
1283 ascii_compatible
, supplementary
,
1287 unsigned char *code_space
;
1288 unsigned min_code
, max_code
;
1289 int iso_final
, iso_revision
, emacs_mule_id
;
1290 int ascii_compatible
, supplementary
;
1293 Lisp_Object args
[charset_arg_max
];
1294 Lisp_Object plist
[14];
1298 args
[charset_arg_name
] = name
;
1299 args
[charset_arg_dimension
] = make_number (dimension
);
1300 val
= Fmake_vector (make_number (8), make_number (0));
1301 for (i
= 0; i
< 8; i
++)
1302 ASET (val
, i
, make_number (code_space
[i
]));
1303 args
[charset_arg_code_space
] = val
;
1304 args
[charset_arg_min_code
] = make_number (min_code
);
1305 args
[charset_arg_max_code
] = make_number (max_code
);
1306 args
[charset_arg_iso_final
]
1307 = (iso_final
< 0 ? Qnil
: make_number (iso_final
));
1308 args
[charset_arg_iso_revision
] = make_number (iso_revision
);
1309 args
[charset_arg_emacs_mule_id
]
1310 = (emacs_mule_id
< 0 ? Qnil
: make_number (emacs_mule_id
));
1311 args
[charset_arg_ascii_compatible_p
] = ascii_compatible
? Qt
: Qnil
;
1312 args
[charset_arg_supplementary_p
] = supplementary
? Qt
: Qnil
;
1313 args
[charset_arg_invalid_code
] = Qnil
;
1314 args
[charset_arg_code_offset
] = make_number (code_offset
);
1315 args
[charset_arg_map
] = Qnil
;
1316 args
[charset_arg_subset
] = Qnil
;
1317 args
[charset_arg_superset
] = Qnil
;
1318 args
[charset_arg_unify_map
] = Qnil
;
1320 plist
[0] = intern (":name");
1321 plist
[1] = args
[charset_arg_name
];
1322 plist
[2] = intern (":dimension");
1323 plist
[3] = args
[charset_arg_dimension
];
1324 plist
[4] = intern (":code-space");
1325 plist
[5] = args
[charset_arg_code_space
];
1326 plist
[6] = intern (":iso-final-char");
1327 plist
[7] = args
[charset_arg_iso_final
];
1328 plist
[8] = intern (":emacs-mule-id");
1329 plist
[9] = args
[charset_arg_emacs_mule_id
];
1330 plist
[10] = intern (":ascii-compatible-p");
1331 plist
[11] = args
[charset_arg_ascii_compatible_p
];
1332 plist
[12] = intern (":code-offset");
1333 plist
[13] = args
[charset_arg_code_offset
];
1335 args
[charset_arg_plist
] = Flist (14, plist
);
1336 Fdefine_charset_internal (charset_arg_max
, args
);
1338 return XINT (CHARSET_SYMBOL_ID (name
));
1342 DEFUN ("define-charset-alias", Fdefine_charset_alias
,
1343 Sdefine_charset_alias
, 2, 2, 0,
1344 doc
: /* Define ALIAS as an alias for charset CHARSET. */)
1346 Lisp_Object alias
, charset
;
1350 CHECK_CHARSET_GET_ATTR (charset
, attr
);
1351 Fputhash (alias
, attr
, Vcharset_hash_table
);
1352 Vcharset_list
= Fcons (alias
, Vcharset_list
);
1357 DEFUN ("charset-plist", Fcharset_plist
, Scharset_plist
, 1, 1, 0,
1358 doc
: /* Return the property list of CHARSET. */)
1360 Lisp_Object charset
;
1364 CHECK_CHARSET_GET_ATTR (charset
, attrs
);
1365 return CHARSET_ATTR_PLIST (attrs
);
1369 DEFUN ("set-charset-plist", Fset_charset_plist
, Sset_charset_plist
, 2, 2, 0,
1370 doc
: /* Set CHARSET's property list to PLIST. */)
1372 Lisp_Object charset
, plist
;
1376 CHECK_CHARSET_GET_ATTR (charset
, attrs
);
1377 CHARSET_ATTR_PLIST (attrs
) = plist
;
1382 DEFUN ("unify-charset", Funify_charset
, Sunify_charset
, 1, 3, 0,
1383 doc
: /* Unify characters of CHARSET with Unicode.
1384 This means reading the relevant file and installing the table defined
1385 by CHARSET's `:unify-map' property.
1387 Optional second arg UNIFY-MAP is a file name string or a vector. It has
1388 the same meaning as the `:unify-map' attribute in the function
1389 `define-charset' (which see).
1391 Optional third argument DEUNIFY, if non-nil, means to de-unify CHARSET. */)
1392 (charset
, unify_map
, deunify
)
1393 Lisp_Object charset
, unify_map
, deunify
;
1398 CHECK_CHARSET_GET_ID (charset
, id
);
1399 cs
= CHARSET_FROM_ID (id
);
1401 ? CHARSET_UNIFIED_P (cs
) && ! NILP (CHARSET_DEUNIFIER (cs
))
1402 : ! CHARSET_UNIFIED_P (cs
))
1405 CHARSET_UNIFIED_P (cs
) = 0;
1408 if (CHARSET_METHOD (cs
) != CHARSET_METHOD_OFFSET
1409 || CHARSET_CODE_OFFSET (cs
) < 0x110000)
1410 error ("Can't unify charset: %s", SDATA (SYMBOL_NAME (charset
)));
1411 if (NILP (unify_map
))
1412 unify_map
= CHARSET_UNIFY_MAP (cs
);
1415 if (! STRINGP (unify_map
) && ! VECTORP (unify_map
))
1416 signal_error ("Bad unify-map", unify_map
);
1417 CHARSET_UNIFY_MAP (cs
) = unify_map
;
1419 if (NILP (Vchar_unify_table
))
1420 Vchar_unify_table
= Fmake_char_table (Qnil
, Qnil
);
1421 char_table_set_range (Vchar_unify_table
,
1422 cs
->min_char
, cs
->max_char
, charset
);
1423 CHARSET_UNIFIED_P (cs
) = 1;
1425 else if (CHAR_TABLE_P (Vchar_unify_table
))
1427 int min_code
= CHARSET_MIN_CODE (cs
);
1428 int max_code
= CHARSET_MAX_CODE (cs
);
1429 int min_char
= DECODE_CHAR (cs
, min_code
);
1430 int max_char
= DECODE_CHAR (cs
, max_code
);
1432 char_table_set_range (Vchar_unify_table
, min_char
, max_char
, Qnil
);
1438 DEFUN ("get-unused-iso-final-char", Fget_unused_iso_final_char
,
1439 Sget_unused_iso_final_char
, 2, 2, 0,
1441 Return an unused ISO final char for a charset of DIMENSION and CHARS.
1442 DIMENSION is the number of bytes to represent a character: 1 or 2.
1443 CHARS is the number of characters in a dimension: 94 or 96.
1445 This final char is for private use, thus the range is `0' (48) .. `?' (63).
1446 If there's no unused final char for the specified kind of charset,
1449 Lisp_Object dimension
, chars
;
1453 CHECK_NUMBER (dimension
);
1454 CHECK_NUMBER (chars
);
1455 if (XINT (dimension
) != 1 && XINT (dimension
) != 2 && XINT (dimension
) != 3)
1456 args_out_of_range_3 (dimension
, make_number (1), make_number (3));
1457 if (XINT (chars
) != 94 && XINT (chars
) != 96)
1458 args_out_of_range_3 (chars
, make_number (94), make_number (96));
1459 for (final_char
= '0'; final_char
<= '?'; final_char
++)
1460 if (ISO_CHARSET_TABLE (XINT (dimension
), XINT (chars
), final_char
) < 0)
1462 return (final_char
<= '?' ? make_number (final_char
) : Qnil
);
1466 check_iso_charset_parameter (dimension
, chars
, final_char
)
1467 Lisp_Object dimension
, chars
, final_char
;
1469 CHECK_NATNUM (dimension
);
1470 CHECK_NATNUM (chars
);
1471 CHECK_NATNUM (final_char
);
1473 if (XINT (dimension
) > 3)
1474 error ("Invalid DIMENSION %d, it should be 1, 2, or 3", XINT (dimension
));
1475 if (XINT (chars
) != 94 && XINT (chars
) != 96)
1476 error ("Invalid CHARS %d, it should be 94 or 96", XINT (chars
));
1477 if (XINT (final_char
) < '0' || XINT (final_char
) > '~')
1478 error ("Invalid FINAL-CHAR %c, it should be `0'..`~'", XINT (chars
));
1482 DEFUN ("declare-equiv-charset", Fdeclare_equiv_charset
, Sdeclare_equiv_charset
,
1484 doc
: /* Declare an equivalent charset for ISO-2022 decoding.
1486 On decoding by an ISO-2022 base coding system, when a charset
1487 specified by DIMENSION, CHARS, and FINAL-CHAR is designated, behave as
1488 if CHARSET is designated instead. */)
1489 (dimension
, chars
, final_char
, charset
)
1490 Lisp_Object dimension
, chars
, final_char
, charset
;
1495 CHECK_CHARSET_GET_ID (charset
, id
);
1496 check_iso_charset_parameter (dimension
, chars
, final_char
);
1497 chars_flag
= XINT (chars
) == 96;
1498 ISO_CHARSET_TABLE (XINT (dimension
), chars_flag
, XINT (final_char
)) = id
;
1503 /* Return information about charsets in the text at PTR of NBYTES
1504 bytes, which are NCHARS characters. The value is:
1506 0: Each character is represented by one byte. This is always
1507 true for a unibyte string. For a multibyte string, true if
1508 it contains only ASCII characters.
1510 1: No charsets other than ascii, control-1, and latin-1 are
1517 string_xstring_p (string
)
1520 const unsigned char *p
= SDATA (string
);
1521 const unsigned char *endp
= p
+ SBYTES (string
);
1523 if (SCHARS (string
) == SBYTES (string
))
1528 int c
= STRING_CHAR_ADVANCE (p
);
1537 /* Find charsets in the string at PTR of NCHARS and NBYTES.
1539 CHARSETS is a vector. If Nth element is non-nil, it means the
1540 charset whose id is N is already found.
1542 It may lookup a translation table TABLE if supplied. */
1545 find_charsets_in_text (ptr
, nchars
, nbytes
, charsets
, table
, multibyte
)
1546 const unsigned char *ptr
;
1547 EMACS_INT nchars
, nbytes
;
1548 Lisp_Object charsets
, table
;
1551 const unsigned char *pend
= ptr
+ nbytes
;
1553 if (nchars
== nbytes
)
1556 ASET (charsets
, charset_ascii
, Qt
);
1563 c
= translate_char (table
, c
);
1564 if (ASCII_BYTE_P (c
))
1565 ASET (charsets
, charset_ascii
, Qt
);
1567 ASET (charsets
, charset_eight_bit
, Qt
);
1574 int c
= STRING_CHAR_ADVANCE (ptr
);
1575 struct charset
*charset
;
1578 c
= translate_char (table
, c
);
1579 charset
= CHAR_CHARSET (c
);
1580 ASET (charsets
, CHARSET_ID (charset
), Qt
);
1585 DEFUN ("find-charset-region", Ffind_charset_region
, Sfind_charset_region
,
1587 doc
: /* Return a list of charsets in the region between BEG and END.
1588 BEG and END are buffer positions.
1589 Optional arg TABLE if non-nil is a translation table to look up.
1591 If the current buffer is unibyte, the returned list may contain
1592 only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
1594 Lisp_Object beg
, end
, table
;
1596 Lisp_Object charsets
;
1597 EMACS_INT from
, from_byte
, to
, stop
, stop_byte
;
1600 int multibyte
= ! NILP (current_buffer
->enable_multibyte_characters
);
1602 validate_region (&beg
, &end
);
1603 from
= XFASTINT (beg
);
1604 stop
= to
= XFASTINT (end
);
1606 if (from
< GPT
&& GPT
< to
)
1609 stop_byte
= GPT_BYTE
;
1612 stop_byte
= CHAR_TO_BYTE (stop
);
1614 from_byte
= CHAR_TO_BYTE (from
);
1616 charsets
= Fmake_vector (make_number (charset_table_used
), Qnil
);
1619 find_charsets_in_text (BYTE_POS_ADDR (from_byte
), stop
- from
,
1620 stop_byte
- from_byte
, charsets
, table
,
1624 from
= stop
, from_byte
= stop_byte
;
1625 stop
= to
, stop_byte
= CHAR_TO_BYTE (stop
);
1632 for (i
= charset_table_used
- 1; i
>= 0; i
--)
1633 if (!NILP (AREF (charsets
, i
)))
1634 val
= Fcons (CHARSET_NAME (charset_table
+ i
), val
);
1638 DEFUN ("find-charset-string", Ffind_charset_string
, Sfind_charset_string
,
1640 doc
: /* Return a list of charsets in STR.
1641 Optional arg TABLE if non-nil is a translation table to look up.
1643 If STR is unibyte, the returned list may contain
1644 only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
1646 Lisp_Object str
, table
;
1648 Lisp_Object charsets
;
1654 charsets
= Fmake_vector (make_number (charset_table_used
), Qnil
);
1655 find_charsets_in_text (SDATA (str
), SCHARS (str
), SBYTES (str
),
1657 STRING_MULTIBYTE (str
));
1659 for (i
= charset_table_used
- 1; i
>= 0; i
--)
1660 if (!NILP (AREF (charsets
, i
)))
1661 val
= Fcons (CHARSET_NAME (charset_table
+ i
), val
);
1667 /* Return a unified character code for C (>= 0x110000). VAL is a
1668 value of Vchar_unify_table for C; i.e. it is nil, an integer, or a
1671 maybe_unify_char (c
, val
)
1675 struct charset
*charset
;
1682 CHECK_CHARSET_GET_CHARSET (val
, charset
);
1683 load_charset (charset
, 1);
1684 if (! inhibit_load_charset_map
)
1686 val
= CHAR_TABLE_REF (Vchar_unify_table
, c
);
1692 int code_index
= c
- CHARSET_CODE_OFFSET (charset
);
1693 int unified
= GET_TEMP_CHARSET_WORK_DECODER (code_index
);
1702 /* Return a character correponding to the code-point CODE of
1706 decode_char (charset
, code
)
1707 struct charset
*charset
;
1711 enum charset_method method
= CHARSET_METHOD (charset
);
1713 if (code
< CHARSET_MIN_CODE (charset
) || code
> CHARSET_MAX_CODE (charset
))
1716 if (method
== CHARSET_METHOD_SUBSET
)
1718 Lisp_Object subset_info
;
1720 subset_info
= CHARSET_SUBSET (charset
);
1721 charset
= CHARSET_FROM_ID (XFASTINT (AREF (subset_info
, 0)));
1722 code
-= XINT (AREF (subset_info
, 3));
1723 if (code
< XFASTINT (AREF (subset_info
, 1))
1724 || code
> XFASTINT (AREF (subset_info
, 2)))
1727 c
= DECODE_CHAR (charset
, code
);
1729 else if (method
== CHARSET_METHOD_SUPERSET
)
1731 Lisp_Object parents
;
1733 parents
= CHARSET_SUPERSET (charset
);
1735 for (; CONSP (parents
); parents
= XCDR (parents
))
1737 int id
= XINT (XCAR (XCAR (parents
)));
1738 int code_offset
= XINT (XCDR (XCAR (parents
)));
1739 unsigned this_code
= code
- code_offset
;
1741 charset
= CHARSET_FROM_ID (id
);
1742 if ((c
= DECODE_CHAR (charset
, this_code
)) >= 0)
1748 char_index
= CODE_POINT_TO_INDEX (charset
, code
);
1752 if (method
== CHARSET_METHOD_MAP
)
1754 Lisp_Object decoder
;
1756 decoder
= CHARSET_DECODER (charset
);
1757 if (! VECTORP (decoder
))
1759 load_charset (charset
, 1);
1760 decoder
= CHARSET_DECODER (charset
);
1762 if (VECTORP (decoder
))
1763 c
= XINT (AREF (decoder
, char_index
));
1765 c
= GET_TEMP_CHARSET_WORK_DECODER (char_index
);
1767 else /* method == CHARSET_METHOD_OFFSET */
1769 c
= char_index
+ CHARSET_CODE_OFFSET (charset
);
1770 if (CHARSET_UNIFIED_P (charset
)
1771 && c
> MAX_UNICODE_CHAR
)
1772 MAYBE_UNIFY_CHAR (c
);
1779 /* Variable used temporarily by the macro ENCODE_CHAR. */
1780 Lisp_Object charset_work
;
1782 /* Return a code-point of CHAR in CHARSET. If CHAR doesn't belong to
1783 CHARSET, return CHARSET_INVALID_CODE (CHARSET). If STRICT is true,
1784 use CHARSET's strict_max_char instead of max_char. */
1787 encode_char (charset
, c
)
1788 struct charset
*charset
;
1792 enum charset_method method
= CHARSET_METHOD (charset
);
1794 if (CHARSET_UNIFIED_P (charset
))
1796 Lisp_Object deunifier
, deunified
;
1797 int code_index
= -1;
1799 deunifier
= CHARSET_DEUNIFIER (charset
);
1800 if (! CHAR_TABLE_P (deunifier
))
1802 load_charset (charset
, 2);
1803 deunifier
= CHARSET_DEUNIFIER (charset
);
1805 if (CHAR_TABLE_P (deunifier
))
1807 Lisp_Object deunified
= CHAR_TABLE_REF (deunifier
, c
);
1809 if (INTEGERP (deunified
))
1810 code_index
= XINT (deunified
);
1814 code_index
= GET_TEMP_CHARSET_WORK_ENCODER (c
);
1816 if (code_index
>= 0)
1817 c
= CHARSET_CODE_OFFSET (charset
) + code_index
;
1820 if (method
== CHARSET_METHOD_SUBSET
)
1822 Lisp_Object subset_info
;
1823 struct charset
*this_charset
;
1825 subset_info
= CHARSET_SUBSET (charset
);
1826 this_charset
= CHARSET_FROM_ID (XFASTINT (AREF (subset_info
, 0)));
1827 code
= ENCODE_CHAR (this_charset
, c
);
1828 if (code
== CHARSET_INVALID_CODE (this_charset
)
1829 || code
< XFASTINT (AREF (subset_info
, 1))
1830 || code
> XFASTINT (AREF (subset_info
, 2)))
1831 return CHARSET_INVALID_CODE (charset
);
1832 code
+= XINT (AREF (subset_info
, 3));
1836 if (method
== CHARSET_METHOD_SUPERSET
)
1838 Lisp_Object parents
;
1840 parents
= CHARSET_SUPERSET (charset
);
1841 for (; CONSP (parents
); parents
= XCDR (parents
))
1843 int id
= XINT (XCAR (XCAR (parents
)));
1844 int code_offset
= XINT (XCDR (XCAR (parents
)));
1845 struct charset
*this_charset
= CHARSET_FROM_ID (id
);
1847 code
= ENCODE_CHAR (this_charset
, c
);
1848 if (code
!= CHARSET_INVALID_CODE (this_charset
))
1849 return code
+ code_offset
;
1851 return CHARSET_INVALID_CODE (charset
);
1854 if (! CHARSET_FAST_MAP_REF ((c
), charset
->fast_map
)
1855 || c
< CHARSET_MIN_CHAR (charset
) || c
> CHARSET_MAX_CHAR (charset
))
1856 return CHARSET_INVALID_CODE (charset
);
1858 if (method
== CHARSET_METHOD_MAP
)
1860 Lisp_Object encoder
;
1863 encoder
= CHARSET_ENCODER (charset
);
1864 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset
)))
1866 load_charset (charset
, 2);
1867 encoder
= CHARSET_ENCODER (charset
);
1869 if (CHAR_TABLE_P (encoder
))
1871 val
= CHAR_TABLE_REF (encoder
, c
);
1873 return CHARSET_INVALID_CODE (charset
);
1875 if (! CHARSET_COMPACT_CODES_P (charset
))
1876 code
= INDEX_TO_CODE_POINT (charset
, code
);
1880 code
= GET_TEMP_CHARSET_WORK_ENCODER (c
);
1881 code
= INDEX_TO_CODE_POINT (charset
, code
);
1884 else /* method == CHARSET_METHOD_OFFSET */
1886 int code_index
= c
- CHARSET_CODE_OFFSET (charset
);
1888 code
= INDEX_TO_CODE_POINT (charset
, code_index
);
1895 DEFUN ("decode-char", Fdecode_char
, Sdecode_char
, 2, 3, 0,
1896 doc
: /* Decode the pair of CHARSET and CODE-POINT into a character.
1897 Return nil if CODE-POINT is not valid in CHARSET.
1899 CODE-POINT may be a cons (HIGHER-16-BIT-VALUE . LOWER-16-BIT-VALUE).
1901 Optional argument RESTRICTION specifies a way to map the pair of CCS
1902 and CODE-POINT to a character. Currently not supported and just ignored. */)
1903 (charset
, code_point
, restriction
)
1904 Lisp_Object charset
, code_point
, restriction
;
1908 struct charset
*charsetp
;
1910 CHECK_CHARSET_GET_ID (charset
, id
);
1911 if (CONSP (code_point
))
1913 CHECK_NATNUM_CAR (code_point
);
1914 CHECK_NATNUM_CDR (code_point
);
1915 code
= (XINT (XCAR (code_point
)) << 16) | (XINT (XCDR (code_point
)));
1919 CHECK_NATNUM (code_point
);
1920 code
= XINT (code_point
);
1922 charsetp
= CHARSET_FROM_ID (id
);
1923 c
= DECODE_CHAR (charsetp
, code
);
1924 return (c
>= 0 ? make_number (c
) : Qnil
);
1928 DEFUN ("encode-char", Fencode_char
, Sencode_char
, 2, 3, 0,
1929 doc
: /* Encode the character CH into a code-point of CHARSET.
1930 Return nil if CHARSET doesn't include CH.
1932 Optional argument RESTRICTION specifies a way to map CH to a
1933 code-point in CCS. Currently not supported and just ignored. */)
1934 (ch
, charset
, restriction
)
1935 Lisp_Object ch
, charset
, restriction
;
1939 struct charset
*charsetp
;
1941 CHECK_CHARSET_GET_ID (charset
, id
);
1943 charsetp
= CHARSET_FROM_ID (id
);
1944 code
= ENCODE_CHAR (charsetp
, XINT (ch
));
1945 if (code
== CHARSET_INVALID_CODE (charsetp
))
1947 if (code
> 0x7FFFFFF)
1948 return Fcons (make_number (code
>> 16), make_number (code
& 0xFFFF));
1949 return make_number (code
);
1953 DEFUN ("make-char", Fmake_char
, Smake_char
, 1, 5, 0,
1955 /* Return a character of CHARSET whose position codes are CODEn.
1957 CODE1 through CODE4 are optional, but if you don't supply sufficient
1958 position codes, it is assumed that the minimum code in each dimension
1960 (charset
, code1
, code2
, code3
, code4
)
1961 Lisp_Object charset
, code1
, code2
, code3
, code4
;
1964 struct charset
*charsetp
;
1968 CHECK_CHARSET_GET_ID (charset
, id
);
1969 charsetp
= CHARSET_FROM_ID (id
);
1971 dimension
= CHARSET_DIMENSION (charsetp
);
1973 code
= (CHARSET_ASCII_COMPATIBLE_P (charsetp
)
1974 ? 0 : CHARSET_MIN_CODE (charsetp
));
1977 CHECK_NATNUM (code1
);
1978 if (XFASTINT (code1
) >= 0x100)
1979 args_out_of_range (make_number (0xFF), code1
);
1980 code
= XFASTINT (code1
);
1986 code
|= charsetp
->code_space
[(dimension
- 2) * 4];
1989 CHECK_NATNUM (code2
);
1990 if (XFASTINT (code2
) >= 0x100)
1991 args_out_of_range (make_number (0xFF), code2
);
1992 code
|= XFASTINT (code2
);
1999 code
|= charsetp
->code_space
[(dimension
- 3) * 4];
2002 CHECK_NATNUM (code3
);
2003 if (XFASTINT (code3
) >= 0x100)
2004 args_out_of_range (make_number (0xFF), code3
);
2005 code
|= XFASTINT (code3
);
2012 code
|= charsetp
->code_space
[0];
2015 CHECK_NATNUM (code4
);
2016 if (XFASTINT (code4
) >= 0x100)
2017 args_out_of_range (make_number (0xFF), code4
);
2018 code
|= XFASTINT (code4
);
2025 if (CHARSET_ISO_FINAL (charsetp
) >= 0)
2027 c
= DECODE_CHAR (charsetp
, code
);
2029 error ("Invalid code(s)");
2030 return make_number (c
);
2034 /* Return the first charset in CHARSET_LIST that contains C.
2035 CHARSET_LIST is a list of charset IDs. If it is nil, use
2036 Vcharset_ordered_list. */
2039 char_charset (c
, charset_list
, code_return
)
2041 Lisp_Object charset_list
;
2042 unsigned *code_return
;
2046 if (NILP (charset_list
))
2047 charset_list
= Vcharset_ordered_list
;
2051 while (CONSP (charset_list
))
2053 struct charset
*charset
= CHARSET_FROM_ID (XINT (XCAR (charset_list
)));
2054 unsigned code
= ENCODE_CHAR (charset
, c
);
2056 if (code
!= CHARSET_INVALID_CODE (charset
))
2059 *code_return
= code
;
2062 charset_list
= XCDR (charset_list
);
2063 if (c
<= MAX_UNICODE_CHAR
2064 && EQ (charset_list
, Vcharset_non_preferred_head
))
2065 return CHARSET_FROM_ID (charset_unicode
);
2067 return (maybe_null
? NULL
2068 : c
<= MAX_5_BYTE_CHAR
? CHARSET_FROM_ID (charset_emacs
)
2069 : CHARSET_FROM_ID (charset_eight_bit
));
2073 DEFUN ("split-char", Fsplit_char
, Ssplit_char
, 1, 1, 0,
2075 /*Return list of charset and one to four position-codes of CH.
2076 The charset is decided by the current priority order of charsets.
2077 A position-code is a byte value of each dimension of the code-point of
2078 CH in the charset. */)
2082 struct charset
*charset
;
2087 CHECK_CHARACTER (ch
);
2089 charset
= CHAR_CHARSET (c
);
2092 code
= ENCODE_CHAR (charset
, c
);
2093 if (code
== CHARSET_INVALID_CODE (charset
))
2095 dimension
= CHARSET_DIMENSION (charset
);
2096 for (val
= Qnil
; dimension
> 0; dimension
--)
2098 val
= Fcons (make_number (code
& 0xFF), val
);
2101 return Fcons (CHARSET_NAME (charset
), val
);
2105 DEFUN ("char-charset", Fchar_charset
, Schar_charset
, 1, 2, 0,
2106 doc
: /* Return the charset of highest priority that contains CH.
2107 If optional 2nd arg RESTRICTION is non-nil, it is a list of charsets
2108 from which to find the charset. It may also be a coding system. In
2109 that case, find the charset from what supported by that coding system. */)
2111 Lisp_Object ch
, restriction
;
2113 struct charset
*charset
;
2115 CHECK_CHARACTER (ch
);
2116 if (NILP (restriction
))
2117 charset
= CHAR_CHARSET (XINT (ch
));
2120 Lisp_Object charset_list
;
2122 if (CONSP (restriction
))
2124 for (charset_list
= Qnil
; CONSP (restriction
);
2125 restriction
= XCDR (restriction
))
2129 CHECK_CHARSET_GET_ID (XCAR (restriction
), id
);
2130 charset_list
= Fcons (make_number (id
), charset_list
);
2132 charset_list
= Fnreverse (charset_list
);
2135 charset_list
= coding_system_charset_list (restriction
);
2136 charset
= char_charset (XINT (ch
), charset_list
, NULL
);
2140 return (CHARSET_NAME (charset
));
2144 DEFUN ("charset-after", Fcharset_after
, Scharset_after
, 0, 1, 0,
2146 Return charset of a character in the current buffer at position POS.
2147 If POS is nil, it defauls to the current point.
2148 If POS is out of range, the value is nil. */)
2153 struct charset
*charset
;
2155 ch
= Fchar_after (pos
);
2156 if (! INTEGERP (ch
))
2158 charset
= CHAR_CHARSET (XINT (ch
));
2159 return (CHARSET_NAME (charset
));
2163 DEFUN ("iso-charset", Fiso_charset
, Siso_charset
, 3, 3, 0,
2165 Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR.
2167 ISO 2022's designation sequence (escape sequence) distinguishes charsets
2168 by their DIMENSION, CHARS, and FINAL-CHAR,
2169 whereas Emacs distinguishes them by charset symbol.
2170 See the documentation of the function `charset-info' for the meanings of
2171 DIMENSION, CHARS, and FINAL-CHAR. */)
2172 (dimension
, chars
, final_char
)
2173 Lisp_Object dimension
, chars
, final_char
;
2178 check_iso_charset_parameter (dimension
, chars
, final_char
);
2179 chars_flag
= XFASTINT (chars
) == 96;
2180 id
= ISO_CHARSET_TABLE (XFASTINT (dimension
), chars_flag
,
2181 XFASTINT (final_char
));
2182 return (id
>= 0 ? CHARSET_NAME (CHARSET_FROM_ID (id
)) : Qnil
);
2186 DEFUN ("clear-charset-maps", Fclear_charset_maps
, Sclear_charset_maps
,
2190 Clear temporary charset mapping tables.
2191 It should be called only from temacs invoked for dumping. */)
2195 struct charset
*charset
;
2198 if (temp_charset_work
)
2200 free (temp_charset_work
);
2201 temp_charset_work
= NULL
;
2204 if (CHAR_TABLE_P (Vchar_unify_table
))
2205 Foptimize_char_table (Vchar_unify_table
, Qnil
);
2210 DEFUN ("charset-priority-list", Fcharset_priority_list
,
2211 Scharset_priority_list
, 0, 1, 0,
2212 doc
: /* Return the list of charsets ordered by priority.
2213 HIGHESTP non-nil means just return the highest priority one. */)
2215 Lisp_Object highestp
;
2217 Lisp_Object val
= Qnil
, list
= Vcharset_ordered_list
;
2219 if (!NILP (highestp
))
2220 return CHARSET_NAME (CHARSET_FROM_ID (XINT (Fcar (list
))));
2222 while (!NILP (list
))
2224 val
= Fcons (CHARSET_NAME (CHARSET_FROM_ID (XINT (XCAR (list
)))), val
);
2227 return Fnreverse (val
);
2230 DEFUN ("set-charset-priority", Fset_charset_priority
, Sset_charset_priority
,
2232 doc
: /* Assign higher priority to the charsets given as arguments.
2233 usage: (set-charset-priority &rest charsets) */)
2238 Lisp_Object new_head
, old_list
, arglist
[2];
2239 Lisp_Object list_2022
, list_emacs_mule
;
2242 old_list
= Fcopy_sequence (Vcharset_ordered_list
);
2244 for (i
= 0; i
< nargs
; i
++)
2246 CHECK_CHARSET_GET_ID (args
[i
], id
);
2247 if (! NILP (Fmemq (make_number (id
), old_list
)))
2249 old_list
= Fdelq (make_number (id
), old_list
);
2250 new_head
= Fcons (make_number (id
), new_head
);
2253 arglist
[0] = Fnreverse (new_head
);
2254 arglist
[1] = Vcharset_non_preferred_head
= old_list
;
2255 Vcharset_ordered_list
= Fnconc (2, arglist
);
2256 charset_ordered_list_tick
++;
2258 for (old_list
= Vcharset_ordered_list
, list_2022
= list_emacs_mule
= Qnil
;
2259 CONSP (old_list
); old_list
= XCDR (old_list
))
2261 if (! NILP (Fmemq (XCAR (old_list
), Viso_2022_charset_list
)))
2262 list_2022
= Fcons (XCAR (old_list
), list_2022
);
2263 if (! NILP (Fmemq (XCAR (old_list
), Vemacs_mule_charset_list
)))
2264 list_emacs_mule
= Fcons (XCAR (old_list
), list_emacs_mule
);
2266 Viso_2022_charset_list
= Fnreverse (list_2022
);
2267 Vemacs_mule_charset_list
= Fnreverse (list_emacs_mule
);
2272 DEFUN ("charset-id-internal", Fcharset_id_internal
, Scharset_id_internal
,
2274 doc
: /* Internal use only.
2275 Return charset identification number of CHARSET. */)
2277 Lisp_Object charset
;
2281 CHECK_CHARSET_GET_ID (charset
, id
);
2282 return make_number (id
);
2289 Lisp_Object tempdir
;
2290 tempdir
= Fexpand_file_name (build_string ("charsets"), Vdata_directory
);
2291 if (access (SDATA (tempdir
), 0) < 0)
2293 dir_warning ("Error: charsets directory (%s) does not exist.\n\
2294 Emacs will not function correctly without the character map files.\n\
2295 Please check your installation!\n",
2297 /* TODO should this be a fatal error? (Bug#909) */
2300 Vcharset_map_path
= Fcons (tempdir
, Qnil
);
2305 init_charset_once ()
2309 for (i
= 0; i
< ISO_MAX_DIMENSION
; i
++)
2310 for (j
= 0; j
< ISO_MAX_CHARS
; j
++)
2311 for (k
= 0; k
< ISO_MAX_FINAL
; k
++)
2312 iso_charset_table
[i
][j
][k
] = -1;
2314 for (i
= 0; i
< 256; i
++)
2315 emacs_mule_charset
[i
] = NULL
;
2317 charset_jisx0201_roman
= -1;
2318 charset_jisx0208_1978
= -1;
2319 charset_jisx0208
= -1;
2321 for (i
= 0; i
< 128; i
++)
2322 unibyte_to_multibyte_table
[i
] = i
;
2323 for (; i
< 256; i
++)
2324 unibyte_to_multibyte_table
[i
] = BYTE8_TO_CHAR (i
);
2332 DEFSYM (Qcharsetp
, "charsetp");
2334 DEFSYM (Qascii
, "ascii");
2335 DEFSYM (Qunicode
, "unicode");
2336 DEFSYM (Qemacs
, "emacs");
2337 DEFSYM (Qeight_bit
, "eight-bit");
2338 DEFSYM (Qiso_8859_1
, "iso-8859-1");
2343 staticpro (&Vcharset_ordered_list
);
2344 Vcharset_ordered_list
= Qnil
;
2346 staticpro (&Viso_2022_charset_list
);
2347 Viso_2022_charset_list
= Qnil
;
2349 staticpro (&Vemacs_mule_charset_list
);
2350 Vemacs_mule_charset_list
= Qnil
;
2352 /* Don't staticpro them here. It's done in syms_of_fns. */
2353 QCtest
= intern (":test");
2354 Qeq
= intern ("eq");
2356 staticpro (&Vcharset_hash_table
);
2358 Lisp_Object args
[2];
2361 Vcharset_hash_table
= Fmake_hash_table (2, args
);
2364 charset_table_size
= 128;
2365 charset_table
= ((struct charset
*)
2366 xmalloc (sizeof (struct charset
) * charset_table_size
));
2367 charset_table_used
= 0;
2369 defsubr (&Scharsetp
);
2370 defsubr (&Smap_charset_chars
);
2371 defsubr (&Sdefine_charset_internal
);
2372 defsubr (&Sdefine_charset_alias
);
2373 defsubr (&Scharset_plist
);
2374 defsubr (&Sset_charset_plist
);
2375 defsubr (&Sunify_charset
);
2376 defsubr (&Sget_unused_iso_final_char
);
2377 defsubr (&Sdeclare_equiv_charset
);
2378 defsubr (&Sfind_charset_region
);
2379 defsubr (&Sfind_charset_string
);
2380 defsubr (&Sdecode_char
);
2381 defsubr (&Sencode_char
);
2382 defsubr (&Ssplit_char
);
2383 defsubr (&Smake_char
);
2384 defsubr (&Schar_charset
);
2385 defsubr (&Scharset_after
);
2386 defsubr (&Siso_charset
);
2387 defsubr (&Sclear_charset_maps
);
2388 defsubr (&Scharset_priority_list
);
2389 defsubr (&Sset_charset_priority
);
2390 defsubr (&Scharset_id_internal
);
2392 DEFVAR_LISP ("charset-map-path", &Vcharset_map_path
,
2393 doc
: /* *List of directories to search for charset map files. */);
2394 Vcharset_map_path
= Qnil
;
2396 DEFVAR_BOOL ("inhibit-load-charset-map", &inhibit_load_charset_map
,
2397 doc
: /* Inhibit loading of charset maps. Used when dumping Emacs. */);
2398 inhibit_load_charset_map
= 0;
2400 DEFVAR_LISP ("charset-list", &Vcharset_list
,
2401 doc
: /* List of all charsets ever defined. */);
2402 Vcharset_list
= Qnil
;
2404 DEFVAR_LISP ("current-iso639-language", &Vcurrent_iso639_language
,
2405 doc
: /* ISO639 language mnemonic symbol for the current language environment.
2406 If the current language environment is for multiple languages (e.g. "Latin-1"),
2407 the value may be a list of mnemonics. */);
2408 Vcurrent_iso639_language
= Qnil
;
2411 = define_charset_internal (Qascii
, 1, "\x00\x7F\x00\x00\x00\x00",
2412 0, 127, 'B', -1, 0, 1, 0, 0);
2414 = define_charset_internal (Qiso_8859_1
, 1, "\x00\xFF\x00\x00\x00\x00",
2415 0, 255, -1, -1, -1, 1, 0, 0);
2417 = define_charset_internal (Qunicode
, 3, "\x00\xFF\x00\xFF\x00\x10",
2418 0, MAX_UNICODE_CHAR
, -1, 0, -1, 1, 0, 0);
2420 = define_charset_internal (Qemacs
, 3, "\x00\xFF\x00\xFF\x00\x3F",
2421 0, MAX_5_BYTE_CHAR
, -1, 0, -1, 1, 1, 0);
2423 = define_charset_internal (Qeight_bit
, 1, "\x80\xFF\x00\x00\x00\x00",
2424 128, 255, -1, 0, -1, 0, 1,
2425 MAX_5_BYTE_CHAR
+ 1);
2430 /* arch-tag: 66a89b8d-4c28-47d3-9ca1-56f78440d69f
2431 (do not change this comment) */