simplify cpp usage in wait_reading_process_output
[bpt/emacs.git] / src / charset.c
1 /* Basic character set support.
2
3 Copyright (C) 2001-2014 Free Software Foundation, Inc.
4
5 Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
6 2005, 2006, 2007, 2008, 2009, 2010, 2011
7 National Institute of Advanced Industrial Science and Technology (AIST)
8 Registration Number H14PRO021
9
10 Copyright (C) 2003, 2004
11 National Institute of Advanced Industrial Science and Technology (AIST)
12 Registration Number H13PRO009
13
14 This file is part of GNU Emacs.
15
16 GNU Emacs is free software: you can redistribute it and/or modify
17 it under the terms of the GNU General Public License as published by
18 the Free Software Foundation, either version 3 of the License, or
19 (at your option) any later version.
20
21 GNU Emacs is distributed in the hope that it will be useful,
22 but WITHOUT ANY WARRANTY; without even the implied warranty of
23 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24 GNU General Public License for more details.
25
26 You should have received a copy of the GNU General Public License
27 along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
28
29 #include <config.h>
30
31 #include <errno.h>
32 #include <stdio.h>
33 #include <unistd.h>
34 #include <limits.h>
35 #include <sys/types.h>
36 #include <c-ctype.h>
37 #include "lisp.h"
38 #include "character.h"
39 #include "charset.h"
40 #include "coding.h"
41 #include "disptab.h"
42 #include "buffer.h"
43
44 /*** GENERAL NOTES on CODED CHARACTER SETS (CHARSETS) ***
45
46 A coded character set ("charset" hereafter) is a meaningful
47 collection (i.e. language, culture, functionality, etc.) of
48 characters. Emacs handles multiple charsets at once. In Emacs Lisp
49 code, a charset is represented by a symbol. In C code, a charset is
50 represented by its ID number or by a pointer to a struct charset.
51
52 The actual information about each charset is stored in two places.
53 Lispy information is stored in the hash table Vcharset_hash_table as
54 a vector (charset attributes). The other information is stored in
55 charset_table as a struct charset.
56
57 */
58
59 /* Hash table that contains attributes of each charset. Keys are
60 charset symbols, and values are vectors of charset attributes. */
61 Lisp_Object Vcharset_hash_table;
62
63 /* Table of struct charset. */
64 struct charset *charset_table;
65
66 static ptrdiff_t charset_table_size;
67 static int charset_table_used;
68
69 Lisp_Object Qcharsetp;
70
71 /* Special charset symbols. */
72 Lisp_Object Qascii;
73 static Lisp_Object Qeight_bit;
74 static Lisp_Object Qiso_8859_1;
75 static Lisp_Object Qunicode;
76 static Lisp_Object Qemacs;
77
78 /* The corresponding charsets. */
79 int charset_ascii;
80 int charset_eight_bit;
81 static int charset_iso_8859_1;
82 int charset_unicode;
83 static int charset_emacs;
84
85 /* The other special charsets. */
86 int charset_jisx0201_roman;
87 int charset_jisx0208_1978;
88 int charset_jisx0208;
89 int charset_ksc5601;
90
91 /* Value of charset attribute `charset-iso-plane'. */
92 static Lisp_Object Qgl, Qgr;
93
94 /* Charset of unibyte characters. */
95 int charset_unibyte;
96
97 /* List of charsets ordered by the priority. */
98 Lisp_Object Vcharset_ordered_list;
99
100 /* Sub-list of Vcharset_ordered_list that contains all non-preferred
101 charsets. */
102 Lisp_Object Vcharset_non_preferred_head;
103
104 /* Incremented everytime we change Vcharset_ordered_list. This is
105 unsigned short so that it fits in Lisp_Int and never matches
106 -1. */
107 unsigned short charset_ordered_list_tick;
108
109 /* List of iso-2022 charsets. */
110 Lisp_Object Viso_2022_charset_list;
111
112 /* List of emacs-mule charsets. */
113 Lisp_Object Vemacs_mule_charset_list;
114
115 int emacs_mule_charset[256];
116
117 /* Mapping table from ISO2022's charset (specified by DIMENSION,
118 CHARS, and FINAL-CHAR) to Emacs' charset. */
119 int iso_charset_table[ISO_MAX_DIMENSION][ISO_MAX_CHARS][ISO_MAX_FINAL];
120
121 #define CODE_POINT_TO_INDEX(charset, code) \
122 ((charset)->code_linear_p \
123 ? (int) ((code) - (charset)->min_code) \
124 : (((charset)->code_space_mask[(code) >> 24] & 0x8) \
125 && ((charset)->code_space_mask[((code) >> 16) & 0xFF] & 0x4) \
126 && ((charset)->code_space_mask[((code) >> 8) & 0xFF] & 0x2) \
127 && ((charset)->code_space_mask[(code) & 0xFF] & 0x1)) \
128 ? (int) (((((code) >> 24) - (charset)->code_space[12]) \
129 * (charset)->code_space[11]) \
130 + (((((code) >> 16) & 0xFF) - (charset)->code_space[8]) \
131 * (charset)->code_space[7]) \
132 + (((((code) >> 8) & 0xFF) - (charset)->code_space[4]) \
133 * (charset)->code_space[3]) \
134 + (((code) & 0xFF) - (charset)->code_space[0]) \
135 - ((charset)->char_index_offset)) \
136 : -1)
137
138
139 /* Return the code-point for the character index IDX in CHARSET.
140 IDX should be an unsigned int variable in a valid range (which is
141 always in nonnegative int range too). IDX contains garbage afterwards. */
142
143 #define INDEX_TO_CODE_POINT(charset, idx) \
144 ((charset)->code_linear_p \
145 ? (idx) + (charset)->min_code \
146 : (idx += (charset)->char_index_offset, \
147 (((charset)->code_space[0] + (idx) % (charset)->code_space[2]) \
148 | (((charset)->code_space[4] \
149 + ((idx) / (charset)->code_space[3] % (charset)->code_space[6])) \
150 << 8) \
151 | (((charset)->code_space[8] \
152 + ((idx) / (charset)->code_space[7] % (charset)->code_space[10])) \
153 << 16) \
154 | (((charset)->code_space[12] + ((idx) / (charset)->code_space[11])) \
155 << 24))))
156
157 /* Structure to hold mapping tables for a charset. Used by temacs
158 invoked for dumping. */
159
160 static struct
161 {
162 /* The current charset for which the following tables are setup. */
163 struct charset *current;
164
165 /* 1 iff the following table is used for encoder. */
166 short for_encoder;
167
168 /* When the following table is used for encoding, minimum and
169 maximum character of the current charset. */
170 int min_char, max_char;
171
172 /* A Unicode character corresponding to the code index 0 (i.e. the
173 minimum code-point) of the current charset, or -1 if the code
174 index 0 is not a Unicode character. This is checked when
175 table.encoder[CHAR] is zero. */
176 int zero_index_char;
177
178 union {
179 /* Table mapping code-indices (not code-points) of the current
180 charset to Unicode characters. If decoder[CHAR] is -1, CHAR
181 doesn't belong to the current charset. */
182 int decoder[0x10000];
183 /* Table mapping Unicode characters to code-indices of the current
184 charset. The first 0x10000 elements are for BMP (0..0xFFFF),
185 and the last 0x10000 are for SMP (0x10000..0x1FFFF) or SIP
186 (0x20000..0x2FFFF). Note that there is no charset map that
187 uses both SMP and SIP. */
188 unsigned short encoder[0x20000];
189 } table;
190 } *temp_charset_work;
191
192 #define SET_TEMP_CHARSET_WORK_ENCODER(C, CODE) \
193 do { \
194 if ((CODE) == 0) \
195 temp_charset_work->zero_index_char = (C); \
196 else if ((C) < 0x20000) \
197 temp_charset_work->table.encoder[(C)] = (CODE); \
198 else \
199 temp_charset_work->table.encoder[(C) - 0x10000] = (CODE); \
200 } while (0)
201
202 #define GET_TEMP_CHARSET_WORK_ENCODER(C) \
203 ((C) == temp_charset_work->zero_index_char ? 0 \
204 : (C) < 0x20000 ? (temp_charset_work->table.encoder[(C)] \
205 ? (int) temp_charset_work->table.encoder[(C)] : -1) \
206 : temp_charset_work->table.encoder[(C) - 0x10000] \
207 ? temp_charset_work->table.encoder[(C) - 0x10000] : -1)
208
209 #define SET_TEMP_CHARSET_WORK_DECODER(C, CODE) \
210 (temp_charset_work->table.decoder[(CODE)] = (C))
211
212 #define GET_TEMP_CHARSET_WORK_DECODER(CODE) \
213 (temp_charset_work->table.decoder[(CODE)])
214 \f
215
216 /* Set to 1 to warn that a charset map is loaded and thus a buffer
217 text and a string data may be relocated. */
218 bool charset_map_loaded;
219
220 struct charset_map_entries
221 {
222 struct {
223 unsigned from, to;
224 int c;
225 } entry[0x10000];
226 struct charset_map_entries *next;
227 };
228
229 /* Load the mapping information of CHARSET from ENTRIES for
230 initializing (CONTROL_FLAG == 0), decoding (CONTROL_FLAG == 1), and
231 encoding (CONTROL_FLAG == 2).
232
233 If CONTROL_FLAG is 0, setup CHARSET->min_char, CHARSET->max_char,
234 and CHARSET->fast_map.
235
236 If CONTROL_FLAG is 1, setup the following tables according to
237 CHARSET->method and inhibit_load_charset_map.
238
239 CHARSET->method | inhibit_lcm == 0 | inhibit_lcm == 1
240 ----------------------+--------------------+---------------------------
241 CHARSET_METHOD_MAP | CHARSET->decoder | temp_charset_work->decoder
242 ----------------------+--------------------+---------------------------
243 CHARSET_METHOD_OFFSET | Vchar_unify_table | temp_charset_work->decoder
244
245 If CONTROL_FLAG is 2, setup the following tables.
246
247 CHARSET->method | inhibit_lcm == 0 | inhibit_lcm == 1
248 ----------------------+--------------------+---------------------------
249 CHARSET_METHOD_MAP | CHARSET->encoder | temp_charset_work->encoder
250 ----------------------+--------------------+--------------------------
251 CHARSET_METHOD_OFFSET | CHARSET->deunifier | temp_charset_work->encoder
252 */
253
254 static void
255 load_charset_map (struct charset *charset, struct charset_map_entries *entries, int n_entries, int control_flag)
256 {
257 Lisp_Object vec IF_LINT (= Qnil), table IF_LINT (= Qnil);
258 unsigned max_code = CHARSET_MAX_CODE (charset);
259 bool ascii_compatible_p = charset->ascii_compatible_p;
260 int min_char, max_char, nonascii_min_char;
261 int i;
262 unsigned char *fast_map = charset->fast_map;
263
264 if (n_entries <= 0)
265 return;
266
267 if (control_flag)
268 {
269 if (! inhibit_load_charset_map)
270 {
271 if (control_flag == 1)
272 {
273 if (charset->method == CHARSET_METHOD_MAP)
274 {
275 int n = CODE_POINT_TO_INDEX (charset, max_code) + 1;
276
277 vec = Fmake_vector (make_number (n), make_number (-1));
278 set_charset_attr (charset, charset_decoder, vec);
279 }
280 else
281 {
282 char_table_set_range (Vchar_unify_table,
283 charset->min_char, charset->max_char,
284 Qnil);
285 }
286 }
287 else
288 {
289 table = Fmake_char_table (Qnil, Qnil);
290 set_charset_attr (charset,
291 (charset->method == CHARSET_METHOD_MAP
292 ? charset_encoder : charset_deunifier),
293 table);
294 }
295 }
296 else
297 {
298 if (! temp_charset_work)
299 temp_charset_work = xmalloc (sizeof *temp_charset_work);
300 if (control_flag == 1)
301 {
302 memset (temp_charset_work->table.decoder, -1,
303 sizeof (int) * 0x10000);
304 }
305 else
306 {
307 memset (temp_charset_work->table.encoder, 0,
308 sizeof (unsigned short) * 0x20000);
309 temp_charset_work->zero_index_char = -1;
310 }
311 temp_charset_work->current = charset;
312 temp_charset_work->for_encoder = (control_flag == 2);
313 control_flag += 2;
314 }
315 charset_map_loaded = 1;
316 }
317
318 min_char = max_char = entries->entry[0].c;
319 nonascii_min_char = MAX_CHAR;
320 for (i = 0; i < n_entries; i++)
321 {
322 unsigned from, to;
323 int from_index, to_index, lim_index;
324 int from_c, to_c;
325 int idx = i % 0x10000;
326
327 if (i > 0 && idx == 0)
328 entries = entries->next;
329 from = entries->entry[idx].from;
330 to = entries->entry[idx].to;
331 from_c = entries->entry[idx].c;
332 from_index = CODE_POINT_TO_INDEX (charset, from);
333 if (from == to)
334 {
335 to_index = from_index;
336 to_c = from_c;
337 }
338 else
339 {
340 to_index = CODE_POINT_TO_INDEX (charset, to);
341 to_c = from_c + (to_index - from_index);
342 }
343 if (from_index < 0 || to_index < 0)
344 continue;
345 lim_index = to_index + 1;
346
347 if (to_c > max_char)
348 max_char = to_c;
349 else if (from_c < min_char)
350 min_char = from_c;
351
352 if (control_flag == 1)
353 {
354 if (charset->method == CHARSET_METHOD_MAP)
355 for (; from_index < lim_index; from_index++, from_c++)
356 ASET (vec, from_index, make_number (from_c));
357 else
358 for (; from_index < lim_index; from_index++, from_c++)
359 CHAR_TABLE_SET (Vchar_unify_table,
360 CHARSET_CODE_OFFSET (charset) + from_index,
361 make_number (from_c));
362 }
363 else if (control_flag == 2)
364 {
365 if (charset->method == CHARSET_METHOD_MAP
366 && CHARSET_COMPACT_CODES_P (charset))
367 for (; from_index < lim_index; from_index++, from_c++)
368 {
369 unsigned code = from_index;
370 code = INDEX_TO_CODE_POINT (charset, code);
371
372 if (NILP (CHAR_TABLE_REF (table, from_c)))
373 CHAR_TABLE_SET (table, from_c, make_number (code));
374 }
375 else
376 for (; from_index < lim_index; from_index++, from_c++)
377 {
378 if (NILP (CHAR_TABLE_REF (table, from_c)))
379 CHAR_TABLE_SET (table, from_c, make_number (from_index));
380 }
381 }
382 else if (control_flag == 3)
383 for (; from_index < lim_index; from_index++, from_c++)
384 SET_TEMP_CHARSET_WORK_DECODER (from_c, from_index);
385 else if (control_flag == 4)
386 for (; from_index < lim_index; from_index++, from_c++)
387 SET_TEMP_CHARSET_WORK_ENCODER (from_c, from_index);
388 else /* control_flag == 0 */
389 {
390 if (ascii_compatible_p)
391 {
392 if (! ASCII_CHAR_P (from_c))
393 {
394 if (from_c < nonascii_min_char)
395 nonascii_min_char = from_c;
396 }
397 else if (! ASCII_CHAR_P (to_c))
398 {
399 nonascii_min_char = 0x80;
400 }
401 }
402
403 for (; from_c <= to_c; from_c++)
404 CHARSET_FAST_MAP_SET (from_c, fast_map);
405 }
406 }
407
408 if (control_flag == 0)
409 {
410 CHARSET_MIN_CHAR (charset) = (ascii_compatible_p
411 ? nonascii_min_char : min_char);
412 CHARSET_MAX_CHAR (charset) = max_char;
413 }
414 else if (control_flag == 4)
415 {
416 temp_charset_work->min_char = min_char;
417 temp_charset_work->max_char = max_char;
418 }
419 }
420
421
422 /* Read a hexadecimal number (preceded by "0x") from the file FP while
423 paying attention to comment character '#'. */
424
425 static unsigned
426 read_hex (FILE *fp, bool *eof, bool *overflow)
427 {
428 int c;
429 unsigned n;
430
431 while ((c = getc (fp)) != EOF)
432 {
433 if (c == '#')
434 {
435 while ((c = getc (fp)) != EOF && c != '\n');
436 }
437 else if (c == '0')
438 {
439 if ((c = getc (fp)) == EOF || c == 'x')
440 break;
441 }
442 }
443 if (c == EOF)
444 {
445 *eof = 1;
446 return 0;
447 }
448 n = 0;
449 while (c_isxdigit (c = getc (fp)))
450 {
451 if (UINT_MAX >> 4 < n)
452 *overflow = 1;
453 n = ((n << 4)
454 | (c - ('0' <= c && c <= '9' ? '0'
455 : 'A' <= c && c <= 'F' ? 'A' - 10
456 : 'a' - 10)));
457 }
458 if (c != EOF)
459 ungetc (c, fp);
460 return n;
461 }
462
463 /* Return a mapping vector for CHARSET loaded from MAPFILE.
464 Each line of MAPFILE has this form
465 0xAAAA 0xCCCC
466 where 0xAAAA is a code-point and 0xCCCC is the corresponding
467 character code, or this form
468 0xAAAA-0xBBBB 0xCCCC
469 where 0xAAAA and 0xBBBB are code-points specifying a range, and
470 0xCCCC is the first character code of the range.
471
472 The returned vector has this form:
473 [ CODE1 CHAR1 CODE2 CHAR2 .... ]
474 where CODE1 is a code-point or a cons of code-points specifying a
475 range.
476
477 Note that this function uses `openp' to open MAPFILE but ignores
478 `file-name-handler-alist' to avoid running any Lisp code. */
479
480 static void
481 load_charset_map_from_file (struct charset *charset, Lisp_Object mapfile,
482 int control_flag)
483 {
484 unsigned min_code = CHARSET_MIN_CODE (charset);
485 unsigned max_code = CHARSET_MAX_CODE (charset);
486 int fd;
487 FILE *fp = NULL;
488 Lisp_Object suffixes;
489 struct charset_map_entries *head, *entries;
490 int n_entries;
491 ptrdiff_t count;
492
493 suffixes = list2 (build_string (".map"), build_string (".TXT"));
494
495 count = SPECPDL_INDEX ();
496 record_unwind_protect_ptr (fclose_ptr_unwind, &fp);
497 specbind (Qfile_name_handler_alist, Qnil);
498 fd = openp (Vcharset_map_path, mapfile, suffixes, NULL, Qnil, false);
499 fp = fd < 0 ? 0 : fdopen (fd, "r");
500 if (!fp)
501 {
502 int open_errno = errno;
503 emacs_close (fd);
504 report_file_errno ("Loading charset map", mapfile, open_errno);
505 }
506 unbind_to (count + 1, Qnil);
507
508 /* Use record, as `charset_map_entries' is large (larger than
509 MAX_ALLOCA). */
510 head = xmalloc (sizeof *head);
511 entries = head;
512 memset (entries, 0, sizeof (struct charset_map_entries));
513
514 n_entries = 0;
515 while (1)
516 {
517 unsigned from, to, c;
518 int idx;
519 bool eof = 0, overflow = 0;
520
521 from = read_hex (fp, &eof, &overflow);
522 if (eof)
523 break;
524 if (getc (fp) == '-')
525 to = read_hex (fp, &eof, &overflow);
526 else
527 to = from;
528 if (eof)
529 break;
530 c = read_hex (fp, &eof, &overflow);
531 if (eof)
532 break;
533
534 if (overflow)
535 continue;
536 if (from < min_code || to > max_code || from > to || c > MAX_CHAR)
537 continue;
538
539 if (n_entries == 0x10000)
540 {
541 entries->next = xmalloc (sizeof *entries->next);
542 entries = entries->next;
543 memset (entries, 0, sizeof (struct charset_map_entries));
544 n_entries = 0;
545 }
546 idx = n_entries;
547 entries->entry[idx].from = from;
548 entries->entry[idx].to = to;
549 entries->entry[idx].c = c;
550 n_entries++;
551 }
552 fclose (fp);
553 fp = NULL;
554
555 load_charset_map (charset, head, n_entries, control_flag);
556 unbind_to (count, Qnil);
557 }
558
559 static void
560 load_charset_map_from_vector (struct charset *charset, Lisp_Object vec, int control_flag)
561 {
562 unsigned min_code = CHARSET_MIN_CODE (charset);
563 unsigned max_code = CHARSET_MAX_CODE (charset);
564 struct charset_map_entries *head, *entries;
565 int n_entries;
566 int len = ASIZE (vec);
567 int i;
568 USE_SAFE_ALLOCA;
569
570 if (len % 2 == 1)
571 {
572 add_to_log ("Failure in loading charset map: %V", vec, Qnil);
573 return;
574 }
575
576 /* Use SAFE_ALLOCA instead of alloca, as `charset_map_entries' is
577 large (larger than MAX_ALLOCA). */
578 head = SAFE_ALLOCA (sizeof *head);
579 entries = head;
580 memset (entries, 0, sizeof (struct charset_map_entries));
581
582 n_entries = 0;
583 for (i = 0; i < len; i += 2)
584 {
585 Lisp_Object val, val2;
586 unsigned from, to;
587 EMACS_INT c;
588 int idx;
589
590 val = AREF (vec, i);
591 if (CONSP (val))
592 {
593 val2 = XCDR (val);
594 val = XCAR (val);
595 from = XFASTINT (val);
596 to = XFASTINT (val2);
597 }
598 else
599 from = to = XFASTINT (val);
600 val = AREF (vec, i + 1);
601 CHECK_NATNUM (val);
602 c = XFASTINT (val);
603
604 if (from < min_code || to > max_code || from > to || c > MAX_CHAR)
605 continue;
606
607 if (n_entries > 0 && (n_entries % 0x10000) == 0)
608 {
609 entries->next = SAFE_ALLOCA (sizeof *entries->next);
610 entries = entries->next;
611 memset (entries, 0, sizeof (struct charset_map_entries));
612 }
613 idx = n_entries % 0x10000;
614 entries->entry[idx].from = from;
615 entries->entry[idx].to = to;
616 entries->entry[idx].c = c;
617 n_entries++;
618 }
619
620 load_charset_map (charset, head, n_entries, control_flag);
621 SAFE_FREE ();
622 }
623
624
625 /* Load a mapping table for CHARSET. CONTROL-FLAG tells what kind of
626 map it is (see the comment of load_charset_map for the detail). */
627
628 static void
629 load_charset (struct charset *charset, int control_flag)
630 {
631 Lisp_Object map;
632
633 if (inhibit_load_charset_map
634 && temp_charset_work
635 && charset == temp_charset_work->current
636 && ((control_flag == 2) == temp_charset_work->for_encoder))
637 return;
638
639 if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP)
640 map = CHARSET_MAP (charset);
641 else
642 {
643 if (! CHARSET_UNIFIED_P (charset))
644 emacs_abort ();
645 map = CHARSET_UNIFY_MAP (charset);
646 }
647 if (STRINGP (map))
648 load_charset_map_from_file (charset, map, control_flag);
649 else
650 load_charset_map_from_vector (charset, map, control_flag);
651 }
652
653
654 DEFUN ("charsetp", Fcharsetp, Scharsetp, 1, 1, 0,
655 doc: /* Return non-nil if and only if OBJECT is a charset.*/)
656 (Lisp_Object object)
657 {
658 return (CHARSETP (object) ? Qt : Qnil);
659 }
660
661
662 static void
663 map_charset_for_dump (void (*c_function) (Lisp_Object, Lisp_Object),
664 Lisp_Object function, Lisp_Object arg,
665 unsigned int from, unsigned int to)
666 {
667 int from_idx = CODE_POINT_TO_INDEX (temp_charset_work->current, from);
668 int to_idx = CODE_POINT_TO_INDEX (temp_charset_work->current, to);
669 Lisp_Object range;
670 int c, stop;
671 struct gcpro gcpro1;
672
673 range = Fcons (Qnil, Qnil);
674 GCPRO1 (range);
675
676 c = temp_charset_work->min_char;
677 stop = (temp_charset_work->max_char < 0x20000
678 ? temp_charset_work->max_char : 0xFFFF);
679
680 while (1)
681 {
682 int idx = GET_TEMP_CHARSET_WORK_ENCODER (c);
683
684 if (idx >= from_idx && idx <= to_idx)
685 {
686 if (NILP (XCAR (range)))
687 XSETCAR (range, make_number (c));
688 }
689 else if (! NILP (XCAR (range)))
690 {
691 XSETCDR (range, make_number (c - 1));
692 if (c_function)
693 (*c_function) (arg, range);
694 else
695 call2 (function, range, arg);
696 XSETCAR (range, Qnil);
697 }
698 if (c == stop)
699 {
700 if (c == temp_charset_work->max_char)
701 {
702 if (! NILP (XCAR (range)))
703 {
704 XSETCDR (range, make_number (c));
705 if (c_function)
706 (*c_function) (arg, range);
707 else
708 call2 (function, range, arg);
709 }
710 break;
711 }
712 c = 0x1FFFF;
713 stop = temp_charset_work->max_char;
714 }
715 c++;
716 }
717 UNGCPRO;
718 }
719
720 void
721 map_charset_chars (void (*c_function)(Lisp_Object, Lisp_Object), Lisp_Object function,
722 Lisp_Object arg, struct charset *charset, unsigned from, unsigned to)
723 {
724 Lisp_Object range;
725 bool partial = (from > CHARSET_MIN_CODE (charset)
726 || to < CHARSET_MAX_CODE (charset));
727
728 if (CHARSET_METHOD (charset) == CHARSET_METHOD_OFFSET)
729 {
730 int from_idx = CODE_POINT_TO_INDEX (charset, from);
731 int to_idx = CODE_POINT_TO_INDEX (charset, to);
732 int from_c = from_idx + CHARSET_CODE_OFFSET (charset);
733 int to_c = to_idx + CHARSET_CODE_OFFSET (charset);
734
735 if (CHARSET_UNIFIED_P (charset))
736 {
737 if (! CHAR_TABLE_P (CHARSET_DEUNIFIER (charset)))
738 load_charset (charset, 2);
739 if (CHAR_TABLE_P (CHARSET_DEUNIFIER (charset)))
740 map_char_table_for_charset (c_function, function,
741 CHARSET_DEUNIFIER (charset), arg,
742 partial ? charset : NULL, from, to);
743 else
744 map_charset_for_dump (c_function, function, arg, from, to);
745 }
746
747 range = Fcons (make_number (from_c), make_number (to_c));
748 if (NILP (function))
749 (*c_function) (arg, range);
750 else
751 call2 (function, range, arg);
752 }
753 else if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP)
754 {
755 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset)))
756 load_charset (charset, 2);
757 if (CHAR_TABLE_P (CHARSET_ENCODER (charset)))
758 map_char_table_for_charset (c_function, function,
759 CHARSET_ENCODER (charset), arg,
760 partial ? charset : NULL, from, to);
761 else
762 map_charset_for_dump (c_function, function, arg, from, to);
763 }
764 else if (CHARSET_METHOD (charset) == CHARSET_METHOD_SUBSET)
765 {
766 Lisp_Object subset_info;
767 int offset;
768
769 subset_info = CHARSET_SUBSET (charset);
770 charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
771 offset = XINT (AREF (subset_info, 3));
772 from -= offset;
773 if (from < XFASTINT (AREF (subset_info, 1)))
774 from = XFASTINT (AREF (subset_info, 1));
775 to -= offset;
776 if (to > XFASTINT (AREF (subset_info, 2)))
777 to = XFASTINT (AREF (subset_info, 2));
778 map_charset_chars (c_function, function, arg, charset, from, to);
779 }
780 else /* i.e. CHARSET_METHOD_SUPERSET */
781 {
782 Lisp_Object parents;
783
784 for (parents = CHARSET_SUPERSET (charset); CONSP (parents);
785 parents = XCDR (parents))
786 {
787 int offset;
788 unsigned this_from, this_to;
789
790 charset = CHARSET_FROM_ID (XFASTINT (XCAR (XCAR (parents))));
791 offset = XINT (XCDR (XCAR (parents)));
792 this_from = from > offset ? from - offset : 0;
793 this_to = to > offset ? to - offset : 0;
794 if (this_from < CHARSET_MIN_CODE (charset))
795 this_from = CHARSET_MIN_CODE (charset);
796 if (this_to > CHARSET_MAX_CODE (charset))
797 this_to = CHARSET_MAX_CODE (charset);
798 map_charset_chars (c_function, function, arg, charset,
799 this_from, this_to);
800 }
801 }
802 }
803
804 DEFUN ("map-charset-chars", Fmap_charset_chars, Smap_charset_chars, 2, 5, 0,
805 doc: /* Call FUNCTION for all characters in CHARSET.
806 FUNCTION is called with an argument RANGE and the optional 3rd
807 argument ARG.
808
809 RANGE is a cons (FROM . TO), where FROM and TO indicate a range of
810 characters contained in CHARSET.
811
812 The optional 4th and 5th arguments FROM-CODE and TO-CODE specify the
813 range of code points (in CHARSET) of target characters. */)
814 (Lisp_Object function, Lisp_Object charset, Lisp_Object arg, Lisp_Object from_code, Lisp_Object to_code)
815 {
816 struct charset *cs;
817 unsigned from, to;
818
819 CHECK_CHARSET_GET_CHARSET (charset, cs);
820 if (NILP (from_code))
821 from = CHARSET_MIN_CODE (cs);
822 else
823 {
824 from = XINT (from_code);
825 if (from < CHARSET_MIN_CODE (cs))
826 from = CHARSET_MIN_CODE (cs);
827 }
828 if (NILP (to_code))
829 to = CHARSET_MAX_CODE (cs);
830 else
831 {
832 to = XINT (to_code);
833 if (to > CHARSET_MAX_CODE (cs))
834 to = CHARSET_MAX_CODE (cs);
835 }
836 map_charset_chars (NULL, function, arg, cs, from, to);
837 return Qnil;
838 }
839
840
841 /* Define a charset according to the arguments. The Nth argument is
842 the Nth attribute of the charset (the last attribute `charset-id'
843 is not included). See the docstring of `define-charset' for the
844 detail. */
845
846 DEFUN ("define-charset-internal", Fdefine_charset_internal,
847 Sdefine_charset_internal, charset_arg_max, MANY, 0,
848 doc: /* For internal use only.
849 usage: (define-charset-internal ...) */)
850 (ptrdiff_t nargs, Lisp_Object *args)
851 {
852 /* Charset attr vector. */
853 Lisp_Object attrs;
854 Lisp_Object val;
855 EMACS_UINT hash_code;
856 struct Lisp_Hash_Table *hash_table = XHASH_TABLE (Vcharset_hash_table);
857 int i, j;
858 struct charset charset;
859 int id;
860 int dimension;
861 bool new_definition_p;
862 int nchars;
863
864 if (nargs != charset_arg_max)
865 return Fsignal (Qwrong_number_of_arguments,
866 Fcons (intern ("define-charset-internal"),
867 make_number (nargs)));
868
869 attrs = Fmake_vector (make_number (charset_attr_max), Qnil);
870
871 CHECK_SYMBOL (args[charset_arg_name]);
872 ASET (attrs, charset_name, args[charset_arg_name]);
873
874 val = args[charset_arg_code_space];
875 for (i = 0, dimension = 0, nchars = 1; ; i++)
876 {
877 Lisp_Object min_byte_obj, max_byte_obj;
878 int min_byte, max_byte;
879
880 min_byte_obj = Faref (val, make_number (i * 2));
881 max_byte_obj = Faref (val, make_number (i * 2 + 1));
882 CHECK_RANGED_INTEGER (min_byte_obj, 0, 255);
883 min_byte = XINT (min_byte_obj);
884 CHECK_RANGED_INTEGER (max_byte_obj, min_byte, 255);
885 max_byte = XINT (max_byte_obj);
886 charset.code_space[i * 4] = min_byte;
887 charset.code_space[i * 4 + 1] = max_byte;
888 charset.code_space[i * 4 + 2] = max_byte - min_byte + 1;
889 if (max_byte > 0)
890 dimension = i + 1;
891 if (i == 3)
892 break;
893 nchars *= charset.code_space[i * 4 + 2];
894 charset.code_space[i * 4 + 3] = nchars;
895 }
896
897 val = args[charset_arg_dimension];
898 if (NILP (val))
899 charset.dimension = dimension;
900 else
901 {
902 CHECK_RANGED_INTEGER (val, 1, 4);
903 charset.dimension = XINT (val);
904 }
905
906 charset.code_linear_p
907 = (charset.dimension == 1
908 || (charset.code_space[2] == 256
909 && (charset.dimension == 2
910 || (charset.code_space[6] == 256
911 && (charset.dimension == 3
912 || charset.code_space[10] == 256)))));
913
914 if (! charset.code_linear_p)
915 {
916 charset.code_space_mask = xzalloc_atomic (256);
917 for (i = 0; i < 4; i++)
918 for (j = charset.code_space[i * 4]; j <= charset.code_space[i * 4 + 1];
919 j++)
920 charset.code_space_mask[j] |= (1 << i);
921 }
922
923 charset.iso_chars_96 = charset.code_space[2] == 96;
924
925 charset.min_code = (charset.code_space[0]
926 | (charset.code_space[4] << 8)
927 | (charset.code_space[8] << 16)
928 | ((unsigned) charset.code_space[12] << 24));
929 charset.max_code = (charset.code_space[1]
930 | (charset.code_space[5] << 8)
931 | (charset.code_space[9] << 16)
932 | ((unsigned) charset.code_space[13] << 24));
933 charset.char_index_offset = 0;
934
935 val = args[charset_arg_min_code];
936 if (! NILP (val))
937 {
938 unsigned code = cons_to_unsigned (val, UINT_MAX);
939
940 if (code < charset.min_code
941 || code > charset.max_code)
942 args_out_of_range_3 (make_fixnum_or_float (charset.min_code),
943 make_fixnum_or_float (charset.max_code), val);
944 charset.char_index_offset = CODE_POINT_TO_INDEX (&charset, code);
945 charset.min_code = code;
946 }
947
948 val = args[charset_arg_max_code];
949 if (! NILP (val))
950 {
951 unsigned code = cons_to_unsigned (val, UINT_MAX);
952
953 if (code < charset.min_code
954 || code > charset.max_code)
955 args_out_of_range_3 (make_fixnum_or_float (charset.min_code),
956 make_fixnum_or_float (charset.max_code), val);
957 charset.max_code = code;
958 }
959
960 charset.compact_codes_p = charset.max_code < 0x10000;
961
962 val = args[charset_arg_invalid_code];
963 if (NILP (val))
964 {
965 if (charset.min_code > 0)
966 charset.invalid_code = 0;
967 else
968 {
969 if (charset.max_code < UINT_MAX)
970 charset.invalid_code = charset.max_code + 1;
971 else
972 error ("Attribute :invalid-code must be specified");
973 }
974 }
975 else
976 charset.invalid_code = cons_to_unsigned (val, UINT_MAX);
977
978 val = args[charset_arg_iso_final];
979 if (NILP (val))
980 charset.iso_final = -1;
981 else
982 {
983 CHECK_NUMBER (val);
984 if (XINT (val) < '0' || XINT (val) > 127)
985 error ("Invalid iso-final-char: %"pI"d", XINT (val));
986 charset.iso_final = XINT (val);
987 }
988
989 val = args[charset_arg_iso_revision];
990 if (NILP (val))
991 charset.iso_revision = -1;
992 else
993 {
994 CHECK_RANGED_INTEGER (val, -1, 63);
995 charset.iso_revision = XINT (val);
996 }
997
998 val = args[charset_arg_emacs_mule_id];
999 if (NILP (val))
1000 charset.emacs_mule_id = -1;
1001 else
1002 {
1003 CHECK_NATNUM (val);
1004 if ((XINT (val) > 0 && XINT (val) <= 128) || XINT (val) >= 256)
1005 error ("Invalid emacs-mule-id: %"pI"d", XINT (val));
1006 charset.emacs_mule_id = XINT (val);
1007 }
1008
1009 charset.ascii_compatible_p = ! NILP (args[charset_arg_ascii_compatible_p]);
1010
1011 charset.supplementary_p = ! NILP (args[charset_arg_supplementary_p]);
1012
1013 charset.unified_p = 0;
1014
1015 memset (charset.fast_map, 0, sizeof (charset.fast_map));
1016
1017 if (! NILP (args[charset_arg_code_offset]))
1018 {
1019 val = args[charset_arg_code_offset];
1020 CHECK_CHARACTER (val);
1021
1022 charset.method = CHARSET_METHOD_OFFSET;
1023 charset.code_offset = XINT (val);
1024
1025 i = CODE_POINT_TO_INDEX (&charset, charset.max_code);
1026 if (MAX_CHAR - charset.code_offset < i)
1027 error ("Unsupported max char: %d", charset.max_char);
1028 charset.max_char = i + charset.code_offset;
1029 i = CODE_POINT_TO_INDEX (&charset, charset.min_code);
1030 charset.min_char = i + charset.code_offset;
1031
1032 i = (charset.min_char >> 7) << 7;
1033 for (; i < 0x10000 && i <= charset.max_char; i += 128)
1034 CHARSET_FAST_MAP_SET (i, charset.fast_map);
1035 i = (i >> 12) << 12;
1036 for (; i <= charset.max_char; i += 0x1000)
1037 CHARSET_FAST_MAP_SET (i, charset.fast_map);
1038 if (charset.code_offset == 0 && charset.max_char >= 0x80)
1039 charset.ascii_compatible_p = 1;
1040 }
1041 else if (! NILP (args[charset_arg_map]))
1042 {
1043 val = args[charset_arg_map];
1044 ASET (attrs, charset_map, val);
1045 charset.method = CHARSET_METHOD_MAP;
1046 }
1047 else if (! NILP (args[charset_arg_subset]))
1048 {
1049 Lisp_Object parent;
1050 Lisp_Object parent_min_code, parent_max_code, parent_code_offset;
1051 struct charset *parent_charset;
1052
1053 val = args[charset_arg_subset];
1054 parent = Fcar (val);
1055 CHECK_CHARSET_GET_CHARSET (parent, parent_charset);
1056 parent_min_code = Fnth (make_number (1), val);
1057 CHECK_NATNUM (parent_min_code);
1058 parent_max_code = Fnth (make_number (2), val);
1059 CHECK_NATNUM (parent_max_code);
1060 parent_code_offset = Fnth (make_number (3), val);
1061 CHECK_NUMBER (parent_code_offset);
1062 val = make_uninit_vector (4);
1063 ASET (val, 0, make_number (parent_charset->id));
1064 ASET (val, 1, parent_min_code);
1065 ASET (val, 2, parent_max_code);
1066 ASET (val, 3, parent_code_offset);
1067 ASET (attrs, charset_subset, val);
1068
1069 charset.method = CHARSET_METHOD_SUBSET;
1070 /* Here, we just copy the parent's fast_map. It's not accurate,
1071 but at least it works for quickly detecting which character
1072 DOESN'T belong to this charset. */
1073 for (i = 0; i < 190; i++)
1074 charset.fast_map[i] = parent_charset->fast_map[i];
1075
1076 /* We also copy these for parents. */
1077 charset.min_char = parent_charset->min_char;
1078 charset.max_char = parent_charset->max_char;
1079 }
1080 else if (! NILP (args[charset_arg_superset]))
1081 {
1082 val = args[charset_arg_superset];
1083 charset.method = CHARSET_METHOD_SUPERSET;
1084 val = Fcopy_sequence (val);
1085 ASET (attrs, charset_superset, val);
1086
1087 charset.min_char = MAX_CHAR;
1088 charset.max_char = 0;
1089 for (; ! NILP (val); val = Fcdr (val))
1090 {
1091 Lisp_Object elt, car_part, cdr_part;
1092 int this_id, offset;
1093 struct charset *this_charset;
1094
1095 elt = Fcar (val);
1096 if (CONSP (elt))
1097 {
1098 car_part = XCAR (elt);
1099 cdr_part = XCDR (elt);
1100 CHECK_CHARSET_GET_ID (car_part, this_id);
1101 CHECK_TYPE_RANGED_INTEGER (int, cdr_part);
1102 offset = XINT (cdr_part);
1103 }
1104 else
1105 {
1106 CHECK_CHARSET_GET_ID (elt, this_id);
1107 offset = 0;
1108 }
1109 XSETCAR (val, Fcons (make_number (this_id), make_number (offset)));
1110
1111 this_charset = CHARSET_FROM_ID (this_id);
1112 if (charset.min_char > this_charset->min_char)
1113 charset.min_char = this_charset->min_char;
1114 if (charset.max_char < this_charset->max_char)
1115 charset.max_char = this_charset->max_char;
1116 for (i = 0; i < 190; i++)
1117 charset.fast_map[i] |= this_charset->fast_map[i];
1118 }
1119 }
1120 else
1121 error ("None of :code-offset, :map, :parents are specified");
1122
1123 val = args[charset_arg_unify_map];
1124 if (! NILP (val) && !STRINGP (val))
1125 CHECK_VECTOR (val);
1126 ASET (attrs, charset_unify_map, val);
1127
1128 CHECK_LIST (args[charset_arg_plist]);
1129 ASET (attrs, charset_plist, args[charset_arg_plist]);
1130
1131 charset.hash_index = hash_lookup (hash_table, args[charset_arg_name],
1132 &hash_code);
1133 if (charset.hash_index >= 0)
1134 {
1135 new_definition_p = 0;
1136 id = XFASTINT (CHARSET_SYMBOL_ID (args[charset_arg_name]));
1137 set_hash_value_slot (hash_table, charset.hash_index, attrs);
1138 }
1139 else
1140 {
1141 charset.hash_index = hash_put (hash_table, args[charset_arg_name], attrs,
1142 hash_code);
1143 if (charset_table_used == charset_table_size)
1144 {
1145 /* Ensure that charset IDs fit into 'int' as well as into the
1146 restriction imposed by fixnums. Although the 'int' restriction
1147 could be removed, too much other code would need altering; for
1148 example, the IDs are stuffed into struct
1149 coding_system.charbuf[i] entries, which are 'int'. */
1150 int old_size = charset_table_size;
1151 ptrdiff_t new_size = old_size;
1152 struct charset *new_table =
1153 xpalloc (0, &new_size, 1,
1154 min (INT_MAX, MOST_POSITIVE_FIXNUM),
1155 sizeof *charset_table);
1156 memcpy (new_table, charset_table, old_size * sizeof *new_table);
1157 charset_table = new_table;
1158 charset_table_size = new_size;
1159 /* FIXME: This leaks memory, as the old charset_table becomes
1160 unreachable. If the old charset table is charset_table_init
1161 then this leak is intentional; otherwise, it's unclear.
1162 If the latter memory leak is intentional, a
1163 comment should be added to explain this. If not, the old
1164 charset_table should be freed, by passing it as the 1st argument
1165 to xpalloc and removing the memcpy. */
1166 }
1167 id = charset_table_used++;
1168 new_definition_p = 1;
1169 }
1170
1171 ASET (attrs, charset_id, make_number (id));
1172 charset.id = id;
1173 charset_table[id] = charset;
1174
1175 if (charset.method == CHARSET_METHOD_MAP)
1176 {
1177 load_charset (&charset, 0);
1178 charset_table[id] = charset;
1179 }
1180
1181 if (charset.iso_final >= 0)
1182 {
1183 ISO_CHARSET_TABLE (charset.dimension, charset.iso_chars_96,
1184 charset.iso_final) = id;
1185 if (new_definition_p)
1186 Viso_2022_charset_list = nconc2 (Viso_2022_charset_list,
1187 list1 (make_number (id)));
1188 if (ISO_CHARSET_TABLE (1, 0, 'J') == id)
1189 charset_jisx0201_roman = id;
1190 else if (ISO_CHARSET_TABLE (2, 0, '@') == id)
1191 charset_jisx0208_1978 = id;
1192 else if (ISO_CHARSET_TABLE (2, 0, 'B') == id)
1193 charset_jisx0208 = id;
1194 else if (ISO_CHARSET_TABLE (2, 0, 'C') == id)
1195 charset_ksc5601 = id;
1196 }
1197
1198 if (charset.emacs_mule_id >= 0)
1199 {
1200 emacs_mule_charset[charset.emacs_mule_id] = id;
1201 if (charset.emacs_mule_id < 0xA0)
1202 emacs_mule_bytes[charset.emacs_mule_id] = charset.dimension + 1;
1203 else
1204 emacs_mule_bytes[charset.emacs_mule_id] = charset.dimension + 2;
1205 if (new_definition_p)
1206 Vemacs_mule_charset_list = nconc2 (Vemacs_mule_charset_list,
1207 list1 (make_number (id)));
1208 }
1209
1210 if (new_definition_p)
1211 {
1212 Vcharset_list = Fcons (args[charset_arg_name], Vcharset_list);
1213 if (charset.supplementary_p)
1214 Vcharset_ordered_list = nconc2 (Vcharset_ordered_list,
1215 list1 (make_number (id)));
1216 else
1217 {
1218 Lisp_Object tail;
1219
1220 for (tail = Vcharset_ordered_list; CONSP (tail); tail = XCDR (tail))
1221 {
1222 struct charset *cs = CHARSET_FROM_ID (XINT (XCAR (tail)));
1223
1224 if (cs->supplementary_p)
1225 break;
1226 }
1227 if (EQ (tail, Vcharset_ordered_list))
1228 Vcharset_ordered_list = Fcons (make_number (id),
1229 Vcharset_ordered_list);
1230 else if (NILP (tail))
1231 Vcharset_ordered_list = nconc2 (Vcharset_ordered_list,
1232 list1 (make_number (id)));
1233 else
1234 {
1235 val = Fcons (XCAR (tail), XCDR (tail));
1236 XSETCDR (tail, val);
1237 XSETCAR (tail, make_number (id));
1238 }
1239 }
1240 charset_ordered_list_tick++;
1241 }
1242
1243 return Qnil;
1244 }
1245
1246
1247 /* Same as Fdefine_charset_internal but arguments are more convenient
1248 to call from C (typically in syms_of_charset). This can define a
1249 charset of `offset' method only. Return the ID of the new
1250 charset. */
1251
1252 static int
1253 define_charset_internal (Lisp_Object name,
1254 int dimension,
1255 const char *code_space_chars,
1256 unsigned min_code, unsigned max_code,
1257 int iso_final, int iso_revision, int emacs_mule_id,
1258 bool ascii_compatible, bool supplementary,
1259 int code_offset)
1260 {
1261 const unsigned char *code_space = (const unsigned char *) code_space_chars;
1262 Lisp_Object args[charset_arg_max];
1263 Lisp_Object val;
1264 int i;
1265
1266 args[charset_arg_name] = name;
1267 args[charset_arg_dimension] = make_number (dimension);
1268 val = make_uninit_vector (8);
1269 for (i = 0; i < 8; i++)
1270 ASET (val, i, make_number (code_space[i]));
1271 args[charset_arg_code_space] = val;
1272 args[charset_arg_min_code] = make_number (min_code);
1273 args[charset_arg_max_code] = make_number (max_code);
1274 args[charset_arg_iso_final]
1275 = (iso_final < 0 ? Qnil : make_number (iso_final));
1276 args[charset_arg_iso_revision] = make_number (iso_revision);
1277 args[charset_arg_emacs_mule_id]
1278 = (emacs_mule_id < 0 ? Qnil : make_number (emacs_mule_id));
1279 args[charset_arg_ascii_compatible_p] = ascii_compatible ? Qt : Qnil;
1280 args[charset_arg_supplementary_p] = supplementary ? Qt : Qnil;
1281 args[charset_arg_invalid_code] = Qnil;
1282 args[charset_arg_code_offset] = make_number (code_offset);
1283 args[charset_arg_map] = Qnil;
1284 args[charset_arg_subset] = Qnil;
1285 args[charset_arg_superset] = Qnil;
1286 args[charset_arg_unify_map] = Qnil;
1287
1288 args[charset_arg_plist] =
1289 listn (CONSTYPE_HEAP, 14,
1290 intern_c_string (":name"),
1291 args[charset_arg_name],
1292 intern_c_string (":dimension"),
1293 args[charset_arg_dimension],
1294 intern_c_string (":code-space"),
1295 args[charset_arg_code_space],
1296 intern_c_string (":iso-final-char"),
1297 args[charset_arg_iso_final],
1298 intern_c_string (":emacs-mule-id"),
1299 args[charset_arg_emacs_mule_id],
1300 intern_c_string (":ascii-compatible-p"),
1301 args[charset_arg_ascii_compatible_p],
1302 intern_c_string (":code-offset"),
1303 args[charset_arg_code_offset]);
1304 Fdefine_charset_internal (charset_arg_max, args);
1305
1306 return XINT (CHARSET_SYMBOL_ID (name));
1307 }
1308
1309
1310 DEFUN ("define-charset-alias", Fdefine_charset_alias,
1311 Sdefine_charset_alias, 2, 2, 0,
1312 doc: /* Define ALIAS as an alias for charset CHARSET. */)
1313 (Lisp_Object alias, Lisp_Object charset)
1314 {
1315 Lisp_Object attr;
1316
1317 CHECK_CHARSET_GET_ATTR (charset, attr);
1318 Fputhash (alias, attr, Vcharset_hash_table);
1319 Vcharset_list = Fcons (alias, Vcharset_list);
1320 return Qnil;
1321 }
1322
1323
1324 DEFUN ("charset-plist", Fcharset_plist, Scharset_plist, 1, 1, 0,
1325 doc: /* Return the property list of CHARSET. */)
1326 (Lisp_Object charset)
1327 {
1328 Lisp_Object attrs;
1329
1330 CHECK_CHARSET_GET_ATTR (charset, attrs);
1331 return CHARSET_ATTR_PLIST (attrs);
1332 }
1333
1334
1335 DEFUN ("set-charset-plist", Fset_charset_plist, Sset_charset_plist, 2, 2, 0,
1336 doc: /* Set CHARSET's property list to PLIST. */)
1337 (Lisp_Object charset, Lisp_Object plist)
1338 {
1339 Lisp_Object attrs;
1340
1341 CHECK_CHARSET_GET_ATTR (charset, attrs);
1342 ASET (attrs, charset_plist, plist);
1343 return plist;
1344 }
1345
1346
1347 DEFUN ("unify-charset", Funify_charset, Sunify_charset, 1, 3, 0,
1348 doc: /* Unify characters of CHARSET with Unicode.
1349 This means reading the relevant file and installing the table defined
1350 by CHARSET's `:unify-map' property.
1351
1352 Optional second arg UNIFY-MAP is a file name string or a vector. It has
1353 the same meaning as the `:unify-map' attribute in the function
1354 `define-charset' (which see).
1355
1356 Optional third argument DEUNIFY, if non-nil, means to de-unify CHARSET. */)
1357 (Lisp_Object charset, Lisp_Object unify_map, Lisp_Object deunify)
1358 {
1359 int id;
1360 struct charset *cs;
1361
1362 CHECK_CHARSET_GET_ID (charset, id);
1363 cs = CHARSET_FROM_ID (id);
1364 if (NILP (deunify)
1365 ? CHARSET_UNIFIED_P (cs) && ! NILP (CHARSET_DEUNIFIER (cs))
1366 : ! CHARSET_UNIFIED_P (cs))
1367 return Qnil;
1368
1369 CHARSET_UNIFIED_P (cs) = 0;
1370 if (NILP (deunify))
1371 {
1372 if (CHARSET_METHOD (cs) != CHARSET_METHOD_OFFSET
1373 || CHARSET_CODE_OFFSET (cs) < 0x110000)
1374 error ("Can't unify charset: %s", SDATA (SYMBOL_NAME (charset)));
1375 if (NILP (unify_map))
1376 unify_map = CHARSET_UNIFY_MAP (cs);
1377 else
1378 {
1379 if (! STRINGP (unify_map) && ! VECTORP (unify_map))
1380 signal_error ("Bad unify-map", unify_map);
1381 set_charset_attr (cs, charset_unify_map, unify_map);
1382 }
1383 if (NILP (Vchar_unify_table))
1384 Vchar_unify_table = Fmake_char_table (Qnil, Qnil);
1385 char_table_set_range (Vchar_unify_table,
1386 cs->min_char, cs->max_char, charset);
1387 CHARSET_UNIFIED_P (cs) = 1;
1388 }
1389 else if (CHAR_TABLE_P (Vchar_unify_table))
1390 {
1391 unsigned min_code = CHARSET_MIN_CODE (cs);
1392 unsigned max_code = CHARSET_MAX_CODE (cs);
1393 int min_char = DECODE_CHAR (cs, min_code);
1394 int max_char = DECODE_CHAR (cs, max_code);
1395
1396 char_table_set_range (Vchar_unify_table, min_char, max_char, Qnil);
1397 }
1398
1399 return Qnil;
1400 }
1401
1402 DEFUN ("get-unused-iso-final-char", Fget_unused_iso_final_char,
1403 Sget_unused_iso_final_char, 2, 2, 0,
1404 doc: /*
1405 Return an unused ISO final char for a charset of DIMENSION and CHARS.
1406 DIMENSION is the number of bytes to represent a character: 1 or 2.
1407 CHARS is the number of characters in a dimension: 94 or 96.
1408
1409 This final char is for private use, thus the range is `0' (48) .. `?' (63).
1410 If there's no unused final char for the specified kind of charset,
1411 return nil. */)
1412 (Lisp_Object dimension, Lisp_Object chars)
1413 {
1414 int final_char;
1415
1416 CHECK_NUMBER (dimension);
1417 CHECK_NUMBER (chars);
1418 if (XINT (dimension) != 1 && XINT (dimension) != 2 && XINT (dimension) != 3)
1419 args_out_of_range_3 (dimension, make_number (1), make_number (3));
1420 if (XINT (chars) != 94 && XINT (chars) != 96)
1421 args_out_of_range_3 (chars, make_number (94), make_number (96));
1422 for (final_char = '0'; final_char <= '?'; final_char++)
1423 if (ISO_CHARSET_TABLE (XINT (dimension), XINT (chars), final_char) < 0)
1424 break;
1425 return (final_char <= '?' ? make_number (final_char) : Qnil);
1426 }
1427
1428 static void
1429 check_iso_charset_parameter (Lisp_Object dimension, Lisp_Object chars, Lisp_Object final_char)
1430 {
1431 CHECK_NATNUM (dimension);
1432 CHECK_NATNUM (chars);
1433 CHECK_CHARACTER (final_char);
1434
1435 if (XINT (dimension) > 3)
1436 error ("Invalid DIMENSION %"pI"d, it should be 1, 2, or 3",
1437 XINT (dimension));
1438 if (XINT (chars) != 94 && XINT (chars) != 96)
1439 error ("Invalid CHARS %"pI"d, it should be 94 or 96", XINT (chars));
1440 if (XINT (final_char) < '0' || XINT (final_char) > '~')
1441 error ("Invalid FINAL-CHAR %c, it should be `0'..`~'",
1442 (int)XINT (final_char));
1443 }
1444
1445
1446 DEFUN ("declare-equiv-charset", Fdeclare_equiv_charset, Sdeclare_equiv_charset,
1447 4, 4, 0,
1448 doc: /* Declare an equivalent charset for ISO-2022 decoding.
1449
1450 On decoding by an ISO-2022 base coding system, when a charset
1451 specified by DIMENSION, CHARS, and FINAL-CHAR is designated, behave as
1452 if CHARSET is designated instead. */)
1453 (Lisp_Object dimension, Lisp_Object chars, Lisp_Object final_char, Lisp_Object charset)
1454 {
1455 int id;
1456 bool chars_flag;
1457
1458 CHECK_CHARSET_GET_ID (charset, id);
1459 check_iso_charset_parameter (dimension, chars, final_char);
1460 chars_flag = XINT (chars) == 96;
1461 ISO_CHARSET_TABLE (XINT (dimension), chars_flag, XINT (final_char)) = id;
1462 return Qnil;
1463 }
1464
1465
1466 /* Return information about charsets in the text at PTR of NBYTES
1467 bytes, which are NCHARS characters. The value is:
1468
1469 0: Each character is represented by one byte. This is always
1470 true for a unibyte string. For a multibyte string, true if
1471 it contains only ASCII characters.
1472
1473 1: No charsets other than ascii, control-1, and latin-1 are
1474 found.
1475
1476 2: Otherwise.
1477 */
1478
1479 int
1480 string_xstring_p (Lisp_Object string)
1481 {
1482 const unsigned char *p = SDATA (string);
1483 const unsigned char *endp = p + SBYTES (string);
1484
1485 if (SCHARS (string) == SBYTES (string))
1486 return 0;
1487
1488 while (p < endp)
1489 {
1490 int c = STRING_CHAR_ADVANCE (p);
1491
1492 if (c >= 0x100)
1493 return 2;
1494 }
1495 return 1;
1496 }
1497
1498
1499 /* Find charsets in the string at PTR of NCHARS and NBYTES.
1500
1501 CHARSETS is a vector. If Nth element is non-nil, it means the
1502 charset whose id is N is already found.
1503
1504 It may lookup a translation table TABLE if supplied. */
1505
1506 static void
1507 find_charsets_in_text (const unsigned char *ptr, ptrdiff_t nchars,
1508 ptrdiff_t nbytes, Lisp_Object charsets,
1509 Lisp_Object table, bool multibyte)
1510 {
1511 const unsigned char *pend = ptr + nbytes;
1512
1513 if (nchars == nbytes)
1514 {
1515 if (multibyte)
1516 ASET (charsets, charset_ascii, Qt);
1517 else
1518 while (ptr < pend)
1519 {
1520 int c = *ptr++;
1521
1522 if (!NILP (table))
1523 c = translate_char (table, c);
1524 if (ASCII_CHAR_P (c))
1525 ASET (charsets, charset_ascii, Qt);
1526 else
1527 ASET (charsets, charset_eight_bit, Qt);
1528 }
1529 }
1530 else
1531 {
1532 while (ptr < pend)
1533 {
1534 int c = STRING_CHAR_ADVANCE (ptr);
1535 struct charset *charset;
1536
1537 if (!NILP (table))
1538 c = translate_char (table, c);
1539 charset = CHAR_CHARSET (c);
1540 ASET (charsets, CHARSET_ID (charset), Qt);
1541 }
1542 }
1543 }
1544
1545 DEFUN ("find-charset-region", Ffind_charset_region, Sfind_charset_region,
1546 2, 3, 0,
1547 doc: /* Return a list of charsets in the region between BEG and END.
1548 BEG and END are buffer positions.
1549 Optional arg TABLE if non-nil is a translation table to look up.
1550
1551 If the current buffer is unibyte, the returned list may contain
1552 only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
1553 (Lisp_Object beg, Lisp_Object end, Lisp_Object table)
1554 {
1555 Lisp_Object charsets;
1556 ptrdiff_t from, from_byte, to, stop, stop_byte;
1557 int i;
1558 Lisp_Object val;
1559 bool multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters));
1560
1561 validate_region (&beg, &end);
1562 from = XFASTINT (beg);
1563 stop = to = XFASTINT (end);
1564
1565 if (from < GPT && GPT < to)
1566 {
1567 stop = GPT;
1568 stop_byte = GPT_BYTE;
1569 }
1570 else
1571 stop_byte = CHAR_TO_BYTE (stop);
1572
1573 from_byte = CHAR_TO_BYTE (from);
1574
1575 charsets = Fmake_vector (make_number (charset_table_used), Qnil);
1576 while (1)
1577 {
1578 find_charsets_in_text (BYTE_POS_ADDR (from_byte), stop - from,
1579 stop_byte - from_byte, charsets, table,
1580 multibyte);
1581 if (stop < to)
1582 {
1583 from = stop, from_byte = stop_byte;
1584 stop = to, stop_byte = CHAR_TO_BYTE (stop);
1585 }
1586 else
1587 break;
1588 }
1589
1590 val = Qnil;
1591 for (i = charset_table_used - 1; i >= 0; i--)
1592 if (!NILP (AREF (charsets, i)))
1593 val = Fcons (CHARSET_NAME (charset_table + i), val);
1594 return val;
1595 }
1596
1597 DEFUN ("find-charset-string", Ffind_charset_string, Sfind_charset_string,
1598 1, 2, 0,
1599 doc: /* Return a list of charsets in STR.
1600 Optional arg TABLE if non-nil is a translation table to look up.
1601
1602 If STR is unibyte, the returned list may contain
1603 only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
1604 (Lisp_Object str, Lisp_Object table)
1605 {
1606 Lisp_Object charsets;
1607 int i;
1608 Lisp_Object val;
1609
1610 CHECK_STRING (str);
1611
1612 charsets = Fmake_vector (make_number (charset_table_used), Qnil);
1613 find_charsets_in_text (SDATA (str), SCHARS (str), SBYTES (str),
1614 charsets, table,
1615 STRING_MULTIBYTE (str));
1616 val = Qnil;
1617 for (i = charset_table_used - 1; i >= 0; i--)
1618 if (!NILP (AREF (charsets, i)))
1619 val = Fcons (CHARSET_NAME (charset_table + i), val);
1620 return val;
1621 }
1622
1623 \f
1624
1625 /* Return a unified character code for C (>= 0x110000). VAL is a
1626 value of Vchar_unify_table for C; i.e. it is nil, an integer, or a
1627 charset symbol. */
1628 static int
1629 maybe_unify_char (int c, Lisp_Object val)
1630 {
1631 struct charset *charset;
1632
1633 if (INTEGERP (val))
1634 return XFASTINT (val);
1635 if (NILP (val))
1636 return c;
1637
1638 CHECK_CHARSET_GET_CHARSET (val, charset);
1639 #ifdef REL_ALLOC
1640 /* The call to load_charset below can allocate memory, which screws
1641 callers of this function through STRING_CHAR_* macros that hold C
1642 pointers to buffer text, if REL_ALLOC is used. */
1643 r_alloc_inhibit_buffer_relocation (1);
1644 #endif
1645 load_charset (charset, 1);
1646 if (! inhibit_load_charset_map)
1647 {
1648 val = CHAR_TABLE_REF (Vchar_unify_table, c);
1649 if (! NILP (val))
1650 c = XFASTINT (val);
1651 }
1652 else
1653 {
1654 int code_index = c - CHARSET_CODE_OFFSET (charset);
1655 int unified = GET_TEMP_CHARSET_WORK_DECODER (code_index);
1656
1657 if (unified > 0)
1658 c = unified;
1659 }
1660 #ifdef REL_ALLOC
1661 r_alloc_inhibit_buffer_relocation (0);
1662 #endif
1663 return c;
1664 }
1665
1666
1667 /* Return a character corresponding to the code-point CODE of
1668 CHARSET. */
1669
1670 int
1671 decode_char (struct charset *charset, unsigned int code)
1672 {
1673 int c, char_index;
1674 enum charset_method method = CHARSET_METHOD (charset);
1675
1676 if (code < CHARSET_MIN_CODE (charset) || code > CHARSET_MAX_CODE (charset))
1677 return -1;
1678
1679 if (method == CHARSET_METHOD_SUBSET)
1680 {
1681 Lisp_Object subset_info;
1682
1683 subset_info = CHARSET_SUBSET (charset);
1684 charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
1685 code -= XINT (AREF (subset_info, 3));
1686 if (code < XFASTINT (AREF (subset_info, 1))
1687 || code > XFASTINT (AREF (subset_info, 2)))
1688 c = -1;
1689 else
1690 c = DECODE_CHAR (charset, code);
1691 }
1692 else if (method == CHARSET_METHOD_SUPERSET)
1693 {
1694 Lisp_Object parents;
1695
1696 parents = CHARSET_SUPERSET (charset);
1697 c = -1;
1698 for (; CONSP (parents); parents = XCDR (parents))
1699 {
1700 int id = XINT (XCAR (XCAR (parents)));
1701 int code_offset = XINT (XCDR (XCAR (parents)));
1702 unsigned this_code = code - code_offset;
1703
1704 charset = CHARSET_FROM_ID (id);
1705 if ((c = DECODE_CHAR (charset, this_code)) >= 0)
1706 break;
1707 }
1708 }
1709 else
1710 {
1711 char_index = CODE_POINT_TO_INDEX (charset, code);
1712 if (char_index < 0)
1713 return -1;
1714
1715 if (method == CHARSET_METHOD_MAP)
1716 {
1717 Lisp_Object decoder;
1718
1719 decoder = CHARSET_DECODER (charset);
1720 if (! VECTORP (decoder))
1721 {
1722 load_charset (charset, 1);
1723 decoder = CHARSET_DECODER (charset);
1724 }
1725 if (VECTORP (decoder))
1726 c = XINT (AREF (decoder, char_index));
1727 else
1728 c = GET_TEMP_CHARSET_WORK_DECODER (char_index);
1729 }
1730 else /* method == CHARSET_METHOD_OFFSET */
1731 {
1732 c = char_index + CHARSET_CODE_OFFSET (charset);
1733 if (CHARSET_UNIFIED_P (charset)
1734 && MAX_UNICODE_CHAR < c && c <= MAX_5_BYTE_CHAR)
1735 {
1736 /* Unify C with a Unicode character if possible. */
1737 Lisp_Object val = CHAR_TABLE_REF (Vchar_unify_table, c);
1738 c = maybe_unify_char (c, val);
1739 }
1740 }
1741 }
1742
1743 return c;
1744 }
1745
1746 /* Variable used temporarily by the macro ENCODE_CHAR. */
1747 Lisp_Object charset_work;
1748
1749 /* Return a code-point of C in CHARSET. If C doesn't belong to
1750 CHARSET, return CHARSET_INVALID_CODE (CHARSET). If STRICT is true,
1751 use CHARSET's strict_max_char instead of max_char. */
1752
1753 unsigned
1754 encode_char (struct charset *charset, int c)
1755 {
1756 unsigned code;
1757 enum charset_method method = CHARSET_METHOD (charset);
1758
1759 if (CHARSET_UNIFIED_P (charset))
1760 {
1761 Lisp_Object deunifier;
1762 int code_index = -1;
1763
1764 deunifier = CHARSET_DEUNIFIER (charset);
1765 if (! CHAR_TABLE_P (deunifier))
1766 {
1767 load_charset (charset, 2);
1768 deunifier = CHARSET_DEUNIFIER (charset);
1769 }
1770 if (CHAR_TABLE_P (deunifier))
1771 {
1772 Lisp_Object deunified = CHAR_TABLE_REF (deunifier, c);
1773
1774 if (INTEGERP (deunified))
1775 code_index = XINT (deunified);
1776 }
1777 else
1778 {
1779 code_index = GET_TEMP_CHARSET_WORK_ENCODER (c);
1780 }
1781 if (code_index >= 0)
1782 c = CHARSET_CODE_OFFSET (charset) + code_index;
1783 }
1784
1785 if (method == CHARSET_METHOD_SUBSET)
1786 {
1787 Lisp_Object subset_info;
1788 struct charset *this_charset;
1789
1790 subset_info = CHARSET_SUBSET (charset);
1791 this_charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
1792 code = ENCODE_CHAR (this_charset, c);
1793 if (code == CHARSET_INVALID_CODE (this_charset)
1794 || code < XFASTINT (AREF (subset_info, 1))
1795 || code > XFASTINT (AREF (subset_info, 2)))
1796 return CHARSET_INVALID_CODE (charset);
1797 code += XINT (AREF (subset_info, 3));
1798 return code;
1799 }
1800
1801 if (method == CHARSET_METHOD_SUPERSET)
1802 {
1803 Lisp_Object parents;
1804
1805 parents = CHARSET_SUPERSET (charset);
1806 for (; CONSP (parents); parents = XCDR (parents))
1807 {
1808 int id = XINT (XCAR (XCAR (parents)));
1809 int code_offset = XINT (XCDR (XCAR (parents)));
1810 struct charset *this_charset = CHARSET_FROM_ID (id);
1811
1812 code = ENCODE_CHAR (this_charset, c);
1813 if (code != CHARSET_INVALID_CODE (this_charset))
1814 return code + code_offset;
1815 }
1816 return CHARSET_INVALID_CODE (charset);
1817 }
1818
1819 if (! CHARSET_FAST_MAP_REF ((c), charset->fast_map)
1820 || c < CHARSET_MIN_CHAR (charset) || c > CHARSET_MAX_CHAR (charset))
1821 return CHARSET_INVALID_CODE (charset);
1822
1823 if (method == CHARSET_METHOD_MAP)
1824 {
1825 Lisp_Object encoder;
1826 Lisp_Object val;
1827
1828 encoder = CHARSET_ENCODER (charset);
1829 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset)))
1830 {
1831 load_charset (charset, 2);
1832 encoder = CHARSET_ENCODER (charset);
1833 }
1834 if (CHAR_TABLE_P (encoder))
1835 {
1836 val = CHAR_TABLE_REF (encoder, c);
1837 if (NILP (val))
1838 return CHARSET_INVALID_CODE (charset);
1839 code = XINT (val);
1840 if (! CHARSET_COMPACT_CODES_P (charset))
1841 code = INDEX_TO_CODE_POINT (charset, code);
1842 }
1843 else
1844 {
1845 code = GET_TEMP_CHARSET_WORK_ENCODER (c);
1846 code = INDEX_TO_CODE_POINT (charset, code);
1847 }
1848 }
1849 else /* method == CHARSET_METHOD_OFFSET */
1850 {
1851 unsigned code_index = c - CHARSET_CODE_OFFSET (charset);
1852
1853 code = INDEX_TO_CODE_POINT (charset, code_index);
1854 }
1855
1856 return code;
1857 }
1858
1859
1860 DEFUN ("decode-char", Fdecode_char, Sdecode_char, 2, 3, 0,
1861 doc: /* Decode the pair of CHARSET and CODE-POINT into a character.
1862 Return nil if CODE-POINT is not valid in CHARSET.
1863
1864 CODE-POINT may be a cons (HIGHER-16-BIT-VALUE . LOWER-16-BIT-VALUE). */)
1865 (Lisp_Object charset, Lisp_Object code_point, Lisp_Object restriction)
1866 {
1867 int c, id;
1868 unsigned code;
1869 struct charset *charsetp;
1870
1871 CHECK_CHARSET_GET_ID (charset, id);
1872 code = cons_to_unsigned (code_point, UINT_MAX);
1873 charsetp = CHARSET_FROM_ID (id);
1874 c = DECODE_CHAR (charsetp, code);
1875 return (c >= 0 ? make_number (c) : Qnil);
1876 }
1877
1878
1879 DEFUN ("encode-char", Fencode_char, Sencode_char, 2, 3, 0,
1880 doc: /* Encode the character CH into a code-point of CHARSET.
1881 Return nil if CHARSET doesn't include CH. */)
1882 (Lisp_Object ch, Lisp_Object charset, Lisp_Object restriction)
1883 {
1884 int c, id;
1885 unsigned code;
1886 struct charset *charsetp;
1887
1888 CHECK_CHARSET_GET_ID (charset, id);
1889 CHECK_CHARACTER (ch);
1890 c = XFASTINT (ch);
1891 charsetp = CHARSET_FROM_ID (id);
1892 code = ENCODE_CHAR (charsetp, c);
1893 if (code == CHARSET_INVALID_CODE (charsetp))
1894 return Qnil;
1895 return INTEGER_TO_CONS (code);
1896 }
1897
1898
1899 DEFUN ("make-char", Fmake_char, Smake_char, 1, 5, 0,
1900 doc:
1901 /* Return a character of CHARSET whose position codes are CODEn.
1902
1903 CODE1 through CODE4 are optional, but if you don't supply sufficient
1904 position codes, it is assumed that the minimum code in each dimension
1905 is specified. */)
1906 (Lisp_Object charset, Lisp_Object code1, Lisp_Object code2, Lisp_Object code3, Lisp_Object code4)
1907 {
1908 int id, dimension;
1909 struct charset *charsetp;
1910 unsigned code;
1911 int c;
1912
1913 CHECK_CHARSET_GET_ID (charset, id);
1914 charsetp = CHARSET_FROM_ID (id);
1915
1916 dimension = CHARSET_DIMENSION (charsetp);
1917 if (NILP (code1))
1918 code = (CHARSET_ASCII_COMPATIBLE_P (charsetp)
1919 ? 0 : CHARSET_MIN_CODE (charsetp));
1920 else
1921 {
1922 CHECK_NATNUM (code1);
1923 if (XFASTINT (code1) >= 0x100)
1924 args_out_of_range (make_number (0xFF), code1);
1925 code = XFASTINT (code1);
1926
1927 if (dimension > 1)
1928 {
1929 code <<= 8;
1930 if (NILP (code2))
1931 code |= charsetp->code_space[(dimension - 2) * 4];
1932 else
1933 {
1934 CHECK_NATNUM (code2);
1935 if (XFASTINT (code2) >= 0x100)
1936 args_out_of_range (make_number (0xFF), code2);
1937 code |= XFASTINT (code2);
1938 }
1939
1940 if (dimension > 2)
1941 {
1942 code <<= 8;
1943 if (NILP (code3))
1944 code |= charsetp->code_space[(dimension - 3) * 4];
1945 else
1946 {
1947 CHECK_NATNUM (code3);
1948 if (XFASTINT (code3) >= 0x100)
1949 args_out_of_range (make_number (0xFF), code3);
1950 code |= XFASTINT (code3);
1951 }
1952
1953 if (dimension > 3)
1954 {
1955 code <<= 8;
1956 if (NILP (code4))
1957 code |= charsetp->code_space[0];
1958 else
1959 {
1960 CHECK_NATNUM (code4);
1961 if (XFASTINT (code4) >= 0x100)
1962 args_out_of_range (make_number (0xFF), code4);
1963 code |= XFASTINT (code4);
1964 }
1965 }
1966 }
1967 }
1968 }
1969
1970 if (CHARSET_ISO_FINAL (charsetp) >= 0)
1971 code &= 0x7F7F7F7F;
1972 c = DECODE_CHAR (charsetp, code);
1973 if (c < 0)
1974 error ("Invalid code(s)");
1975 return make_number (c);
1976 }
1977
1978
1979 /* Return the first charset in CHARSET_LIST that contains C.
1980 CHARSET_LIST is a list of charset IDs. If it is nil, use
1981 Vcharset_ordered_list. */
1982
1983 struct charset *
1984 char_charset (int c, Lisp_Object charset_list, unsigned int *code_return)
1985 {
1986 bool maybe_null = 0;
1987
1988 if (NILP (charset_list))
1989 charset_list = Vcharset_ordered_list;
1990 else
1991 maybe_null = 1;
1992
1993 while (CONSP (charset_list))
1994 {
1995 struct charset *charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
1996 unsigned code = ENCODE_CHAR (charset, c);
1997
1998 if (code != CHARSET_INVALID_CODE (charset))
1999 {
2000 if (code_return)
2001 *code_return = code;
2002 return charset;
2003 }
2004 charset_list = XCDR (charset_list);
2005 if (! maybe_null
2006 && c <= MAX_UNICODE_CHAR
2007 && EQ (charset_list, Vcharset_non_preferred_head))
2008 return CHARSET_FROM_ID (charset_unicode);
2009 }
2010 return (maybe_null ? NULL
2011 : c <= MAX_5_BYTE_CHAR ? CHARSET_FROM_ID (charset_emacs)
2012 : CHARSET_FROM_ID (charset_eight_bit));
2013 }
2014
2015
2016 DEFUN ("split-char", Fsplit_char, Ssplit_char, 1, 1, 0,
2017 doc:
2018 /*Return list of charset and one to four position-codes of CH.
2019 The charset is decided by the current priority order of charsets.
2020 A position-code is a byte value of each dimension of the code-point of
2021 CH in the charset. */)
2022 (Lisp_Object ch)
2023 {
2024 struct charset *charset;
2025 int c, dimension;
2026 unsigned code;
2027 Lisp_Object val;
2028
2029 CHECK_CHARACTER (ch);
2030 c = XFASTINT (ch);
2031 charset = CHAR_CHARSET (c);
2032 if (! charset)
2033 emacs_abort ();
2034 code = ENCODE_CHAR (charset, c);
2035 if (code == CHARSET_INVALID_CODE (charset))
2036 emacs_abort ();
2037 dimension = CHARSET_DIMENSION (charset);
2038 for (val = Qnil; dimension > 0; dimension--)
2039 {
2040 val = Fcons (make_number (code & 0xFF), val);
2041 code >>= 8;
2042 }
2043 return Fcons (CHARSET_NAME (charset), val);
2044 }
2045
2046
2047 DEFUN ("char-charset", Fchar_charset, Schar_charset, 1, 2, 0,
2048 doc: /* Return the charset of highest priority that contains CH.
2049 ASCII characters are an exception: for them, this function always
2050 returns `ascii'.
2051 If optional 2nd arg RESTRICTION is non-nil, it is a list of charsets
2052 from which to find the charset. It may also be a coding system. In
2053 that case, find the charset from what supported by that coding system. */)
2054 (Lisp_Object ch, Lisp_Object restriction)
2055 {
2056 struct charset *charset;
2057
2058 CHECK_CHARACTER (ch);
2059 if (NILP (restriction))
2060 charset = CHAR_CHARSET (XINT (ch));
2061 else
2062 {
2063 if (CONSP (restriction))
2064 {
2065 int c = XFASTINT (ch);
2066
2067 for (; CONSP (restriction); restriction = XCDR (restriction))
2068 {
2069 struct charset *rcharset;
2070
2071 CHECK_CHARSET_GET_CHARSET (XCAR (restriction), rcharset);
2072 if (ENCODE_CHAR (rcharset, c) != CHARSET_INVALID_CODE (rcharset))
2073 return XCAR (restriction);
2074 }
2075 return Qnil;
2076 }
2077 restriction = coding_system_charset_list (restriction);
2078 charset = char_charset (XINT (ch), restriction, NULL);
2079 if (! charset)
2080 return Qnil;
2081 }
2082 return (CHARSET_NAME (charset));
2083 }
2084
2085
2086 DEFUN ("charset-after", Fcharset_after, Scharset_after, 0, 1, 0,
2087 doc: /*
2088 Return charset of a character in the current buffer at position POS.
2089 If POS is nil, it defaults to the current point.
2090 If POS is out of range, the value is nil. */)
2091 (Lisp_Object pos)
2092 {
2093 Lisp_Object ch;
2094 struct charset *charset;
2095
2096 ch = Fchar_after (pos);
2097 if (! INTEGERP (ch))
2098 return ch;
2099 charset = CHAR_CHARSET (XINT (ch));
2100 return (CHARSET_NAME (charset));
2101 }
2102
2103
2104 DEFUN ("iso-charset", Fiso_charset, Siso_charset, 3, 3, 0,
2105 doc: /*
2106 Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR.
2107
2108 ISO 2022's designation sequence (escape sequence) distinguishes charsets
2109 by their DIMENSION, CHARS, and FINAL-CHAR,
2110 whereas Emacs distinguishes them by charset symbol.
2111 See the documentation of the function `charset-info' for the meanings of
2112 DIMENSION, CHARS, and FINAL-CHAR. */)
2113 (Lisp_Object dimension, Lisp_Object chars, Lisp_Object final_char)
2114 {
2115 int id;
2116 bool chars_flag;
2117
2118 check_iso_charset_parameter (dimension, chars, final_char);
2119 chars_flag = XFASTINT (chars) == 96;
2120 id = ISO_CHARSET_TABLE (XFASTINT (dimension), chars_flag,
2121 XFASTINT (final_char));
2122 return (id >= 0 ? CHARSET_NAME (CHARSET_FROM_ID (id)) : Qnil);
2123 }
2124
2125
2126 DEFUN ("clear-charset-maps", Fclear_charset_maps, Sclear_charset_maps,
2127 0, 0, 0,
2128 doc: /*
2129 Internal use only.
2130 Clear temporary charset mapping tables.
2131 It should be called only from temacs invoked for dumping. */)
2132 (void)
2133 {
2134 if (temp_charset_work)
2135 {
2136 xfree (temp_charset_work);
2137 temp_charset_work = NULL;
2138 }
2139
2140 if (CHAR_TABLE_P (Vchar_unify_table))
2141 Foptimize_char_table (Vchar_unify_table, Qnil);
2142
2143 return Qnil;
2144 }
2145
2146 DEFUN ("charset-priority-list", Fcharset_priority_list,
2147 Scharset_priority_list, 0, 1, 0,
2148 doc: /* Return the list of charsets ordered by priority.
2149 HIGHESTP non-nil means just return the highest priority one. */)
2150 (Lisp_Object highestp)
2151 {
2152 Lisp_Object val = Qnil, list = Vcharset_ordered_list;
2153
2154 if (!NILP (highestp))
2155 return CHARSET_NAME (CHARSET_FROM_ID (XINT (Fcar (list))));
2156
2157 while (!NILP (list))
2158 {
2159 val = Fcons (CHARSET_NAME (CHARSET_FROM_ID (XINT (XCAR (list)))), val);
2160 list = XCDR (list);
2161 }
2162 return Fnreverse (val);
2163 }
2164
2165 DEFUN ("set-charset-priority", Fset_charset_priority, Sset_charset_priority,
2166 1, MANY, 0,
2167 doc: /* Assign higher priority to the charsets given as arguments.
2168 usage: (set-charset-priority &rest charsets) */)
2169 (ptrdiff_t nargs, Lisp_Object *args)
2170 {
2171 Lisp_Object new_head, old_list, arglist[2];
2172 Lisp_Object list_2022, list_emacs_mule;
2173 ptrdiff_t i;
2174 int id;
2175
2176 old_list = Fcopy_sequence (Vcharset_ordered_list);
2177 new_head = Qnil;
2178 for (i = 0; i < nargs; i++)
2179 {
2180 CHECK_CHARSET_GET_ID (args[i], id);
2181 if (! NILP (Fmemq (make_number (id), old_list)))
2182 {
2183 old_list = Fdelq (make_number (id), old_list);
2184 new_head = Fcons (make_number (id), new_head);
2185 }
2186 }
2187 arglist[0] = Fnreverse (new_head);
2188 arglist[1] = Vcharset_non_preferred_head = old_list;
2189 Vcharset_ordered_list = Fnconc (2, arglist);
2190 charset_ordered_list_tick++;
2191
2192 charset_unibyte = -1;
2193 for (old_list = Vcharset_ordered_list, list_2022 = list_emacs_mule = Qnil;
2194 CONSP (old_list); old_list = XCDR (old_list))
2195 {
2196 if (! NILP (Fmemq (XCAR (old_list), Viso_2022_charset_list)))
2197 list_2022 = Fcons (XCAR (old_list), list_2022);
2198 if (! NILP (Fmemq (XCAR (old_list), Vemacs_mule_charset_list)))
2199 list_emacs_mule = Fcons (XCAR (old_list), list_emacs_mule);
2200 if (charset_unibyte < 0)
2201 {
2202 struct charset *charset = CHARSET_FROM_ID (XINT (XCAR (old_list)));
2203
2204 if (CHARSET_DIMENSION (charset) == 1
2205 && CHARSET_ASCII_COMPATIBLE_P (charset)
2206 && CHARSET_MAX_CHAR (charset) >= 0x80)
2207 charset_unibyte = CHARSET_ID (charset);
2208 }
2209 }
2210 Viso_2022_charset_list = Fnreverse (list_2022);
2211 Vemacs_mule_charset_list = Fnreverse (list_emacs_mule);
2212 if (charset_unibyte < 0)
2213 charset_unibyte = charset_iso_8859_1;
2214
2215 return Qnil;
2216 }
2217
2218 DEFUN ("charset-id-internal", Fcharset_id_internal, Scharset_id_internal,
2219 0, 1, 0,
2220 doc: /* Internal use only.
2221 Return charset identification number of CHARSET. */)
2222 (Lisp_Object charset)
2223 {
2224 int id;
2225
2226 CHECK_CHARSET_GET_ID (charset, id);
2227 return make_number (id);
2228 }
2229
2230 struct charset_sort_data
2231 {
2232 Lisp_Object charset;
2233 int id;
2234 ptrdiff_t priority;
2235 };
2236
2237 static int
2238 charset_compare (const void *d1, const void *d2)
2239 {
2240 const struct charset_sort_data *data1 = d1, *data2 = d2;
2241 if (data1->priority != data2->priority)
2242 return data1->priority < data2->priority ? -1 : 1;
2243 return 0;
2244 }
2245
2246 DEFUN ("sort-charsets", Fsort_charsets, Ssort_charsets, 1, 1, 0,
2247 doc: /* Sort charset list CHARSETS by a priority of each charset.
2248 Return the sorted list. CHARSETS is modified by side effects.
2249 See also `charset-priority-list' and `set-charset-priority'. */)
2250 (Lisp_Object charsets)
2251 {
2252 Lisp_Object len = Flength (charsets);
2253 ptrdiff_t n = XFASTINT (len), i, j;
2254 int done;
2255 Lisp_Object tail, elt, attrs;
2256 struct charset_sort_data *sort_data;
2257 int id, min_id = INT_MAX, max_id = INT_MIN;
2258 USE_SAFE_ALLOCA;
2259
2260 if (n == 0)
2261 return Qnil;
2262 SAFE_NALLOCA (sort_data, 1, n);
2263 for (tail = charsets, i = 0; CONSP (tail); tail = XCDR (tail), i++)
2264 {
2265 elt = XCAR (tail);
2266 CHECK_CHARSET_GET_ATTR (elt, attrs);
2267 sort_data[i].charset = elt;
2268 sort_data[i].id = id = XINT (CHARSET_ATTR_ID (attrs));
2269 if (id < min_id)
2270 min_id = id;
2271 if (id > max_id)
2272 max_id = id;
2273 }
2274 for (done = 0, tail = Vcharset_ordered_list, i = 0;
2275 done < n && CONSP (tail); tail = XCDR (tail), i++)
2276 {
2277 elt = XCAR (tail);
2278 id = XFASTINT (elt);
2279 if (id >= min_id && id <= max_id)
2280 for (j = 0; j < n; j++)
2281 if (sort_data[j].id == id)
2282 {
2283 sort_data[j].priority = i;
2284 done++;
2285 }
2286 }
2287 qsort (sort_data, n, sizeof *sort_data, charset_compare);
2288 for (i = 0, tail = charsets; CONSP (tail); tail = XCDR (tail), i++)
2289 XSETCAR (tail, sort_data[i].charset);
2290 SAFE_FREE ();
2291 return charsets;
2292 }
2293
2294 \f
2295 void
2296 init_charset (void)
2297 {
2298 Lisp_Object tempdir;
2299 tempdir = Fexpand_file_name (build_string ("charsets"), Vdata_directory);
2300 if (! file_accessible_directory_p (SSDATA (tempdir)))
2301 {
2302 /* This used to be non-fatal (dir_warning), but it should not
2303 happen, and if it does sooner or later it will cause some
2304 obscure problem (eg bug#6401), so better abort. */
2305 fprintf (stderr, "Error: charsets directory not found:\n\
2306 %s\n\
2307 Emacs will not function correctly without the character map files.\n%s\
2308 Please check your installation!\n",
2309 SDATA (tempdir),
2310 egetenv("EMACSDATA") ? "The EMACSDATA environment \
2311 variable is set, maybe it has the wrong value?\n" : "");
2312 exit (1);
2313 }
2314
2315 Vcharset_map_path = list1 (tempdir);
2316 }
2317
2318
2319 void
2320 init_charset_once (void)
2321 {
2322 int i, j, k;
2323
2324 for (i = 0; i < ISO_MAX_DIMENSION; i++)
2325 for (j = 0; j < ISO_MAX_CHARS; j++)
2326 for (k = 0; k < ISO_MAX_FINAL; k++)
2327 iso_charset_table[i][j][k] = -1;
2328
2329 for (i = 0; i < 256; i++)
2330 emacs_mule_charset[i] = -1;
2331
2332 charset_jisx0201_roman = -1;
2333 charset_jisx0208_1978 = -1;
2334 charset_jisx0208 = -1;
2335 charset_ksc5601 = -1;
2336 }
2337
2338 #ifdef emacs
2339
2340 /* Allocate an initial charset table that is large enough to handle
2341 Emacs while it is bootstrapping. As of September 2011, the size
2342 needs to be at least 166; make it a bit bigger to allow for future
2343 expansion.
2344
2345 Don't make the value so small that the table is reallocated during
2346 bootstrapping, as glibc malloc calls larger than just under 64 KiB
2347 during an initial bootstrap wreak havoc after dumping; see the
2348 M_MMAP_THRESHOLD value in alloc.c, plus there is a extra overhead
2349 internal to glibc malloc and perhaps to Emacs malloc debugging. */
2350 static struct charset charset_table_init[180];
2351
2352 void
2353 syms_of_charset (void)
2354 {
2355 #include "charset.x"
2356
2357 DEFSYM (Qcharsetp, "charsetp");
2358
2359 DEFSYM (Qascii, "ascii");
2360 DEFSYM (Qunicode, "unicode");
2361 DEFSYM (Qemacs, "emacs");
2362 DEFSYM (Qeight_bit, "eight-bit");
2363 DEFSYM (Qiso_8859_1, "iso-8859-1");
2364
2365 DEFSYM (Qgl, "gl");
2366 DEFSYM (Qgr, "gr");
2367
2368 staticpro (&Vcharset_ordered_list);
2369 Vcharset_ordered_list = Qnil;
2370
2371 staticpro (&Viso_2022_charset_list);
2372 Viso_2022_charset_list = Qnil;
2373
2374 staticpro (&Vemacs_mule_charset_list);
2375 Vemacs_mule_charset_list = Qnil;
2376
2377 /* Don't staticpro them here. It's done in syms_of_fns. */
2378 QCtest = intern_c_string (":test");
2379 Qeq = intern_c_string ("eq");
2380
2381 staticpro (&Vcharset_hash_table);
2382 {
2383 Lisp_Object args[2];
2384 args[0] = QCtest;
2385 args[1] = Qeq;
2386 Vcharset_hash_table = Fmake_hash_table (2, args);
2387 }
2388
2389 charset_table = charset_table_init;
2390 charset_table_size = ARRAYELTS (charset_table_init);
2391 charset_table_used = 0;
2392
2393 DEFVAR_LISP ("charset-map-path", Vcharset_map_path,
2394 doc: /* List of directories to search for charset map files. */);
2395 Vcharset_map_path = Qnil;
2396
2397 DEFVAR_BOOL ("inhibit-load-charset-map", inhibit_load_charset_map,
2398 doc: /* Inhibit loading of charset maps. Used when dumping Emacs. */);
2399 inhibit_load_charset_map = 0;
2400
2401 DEFVAR_LISP ("charset-list", Vcharset_list,
2402 doc: /* List of all charsets ever defined. */);
2403 Vcharset_list = Qnil;
2404
2405 DEFVAR_LISP ("current-iso639-language", Vcurrent_iso639_language,
2406 doc: /* ISO639 language mnemonic symbol for the current language environment.
2407 If the current language environment is for multiple languages (e.g. "Latin-1"),
2408 the value may be a list of mnemonics. */);
2409 Vcurrent_iso639_language = Qnil;
2410
2411 charset_ascii
2412 = define_charset_internal (Qascii, 1, "\x00\x7F\0\0\0\0\0",
2413 0, 127, 'B', -1, 0, 1, 0, 0);
2414 charset_iso_8859_1
2415 = define_charset_internal (Qiso_8859_1, 1, "\x00\xFF\0\0\0\0\0",
2416 0, 255, -1, -1, -1, 1, 0, 0);
2417 charset_unicode
2418 = define_charset_internal (Qunicode, 3, "\x00\xFF\x00\xFF\x00\x10\0",
2419 0, MAX_UNICODE_CHAR, -1, 0, -1, 1, 0, 0);
2420 charset_emacs
2421 = define_charset_internal (Qemacs, 3, "\x00\xFF\x00\xFF\x00\x3F\0",
2422 0, MAX_5_BYTE_CHAR, -1, 0, -1, 1, 1, 0);
2423 charset_eight_bit
2424 = define_charset_internal (Qeight_bit, 1, "\x80\xFF\0\0\0\0\0",
2425 128, 255, -1, 0, -1, 0, 1,
2426 MAX_5_BYTE_CHAR + 1);
2427 charset_unibyte = charset_iso_8859_1;
2428 }
2429
2430 #endif /* emacs */