* emacs-lisp/re-builder.el (reb-auto-update): Remove redundant code.
[bpt/emacs.git] / src / charset.c
... / ...
CommitLineData
1/* Basic character set support.
2 Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007,
3 2008 Free Software Foundation, Inc.
4 Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
5 2005, 2006, 2007, 2008
6 National Institute of Advanced Industrial Science and Technology (AIST)
7 Registration Number H14PRO021
8
9 Copyright (C) 2003, 2004
10 National Institute of Advanced Industrial Science and Technology (AIST)
11 Registration Number H13PRO009
12
13This file is part of GNU Emacs.
14
15GNU Emacs is free software: you can redistribute it and/or modify
16it under the terms of the GNU General Public License as published by
17the Free Software Foundation, either version 3 of the License, or
18(at your option) any later version.
19
20GNU Emacs is distributed in the hope that it will be useful,
21but WITHOUT ANY WARRANTY; without even the implied warranty of
22MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23GNU General Public License for more details.
24
25You should have received a copy of the GNU General Public License
26along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
27
28#include <config.h>
29
30#include <stdio.h>
31#include <unistd.h>
32#include <ctype.h>
33#include <sys/types.h>
34#include "lisp.h"
35#include "character.h"
36#include "charset.h"
37#include "coding.h"
38#include "disptab.h"
39#include "buffer.h"
40
41/*** GENERAL NOTES on CODED CHARACTER SETS (CHARSETS) ***
42
43 A coded character set ("charset" hereafter) is a meaningful
44 collection (i.e. language, culture, functionality, etc.) of
45 characters. Emacs handles multiple charsets at once. In Emacs Lisp
46 code, a charset is represented by a symbol. In C code, a charset is
47 represented by its ID number or by a pointer to a struct charset.
48
49 The actual information about each charset is stored in two places.
50 Lispy information is stored in the hash table Vcharset_hash_table as
51 a vector (charset attributes). The other information is stored in
52 charset_table as a struct charset.
53
54*/
55
56/* List of all charsets. This variable is used only from Emacs
57 Lisp. */
58Lisp_Object Vcharset_list;
59
60/* Hash table that contains attributes of each charset. Keys are
61 charset symbols, and values are vectors of charset attributes. */
62Lisp_Object Vcharset_hash_table;
63
64/* Table of struct charset. */
65struct charset *charset_table;
66
67static int charset_table_size;
68static int charset_table_used;
69
70Lisp_Object Qcharsetp;
71
72/* Special charset symbols. */
73Lisp_Object Qascii;
74Lisp_Object Qeight_bit;
75Lisp_Object Qiso_8859_1;
76Lisp_Object Qunicode;
77Lisp_Object Qemacs;
78
79/* The corresponding charsets. */
80int charset_ascii;
81int charset_eight_bit;
82int charset_iso_8859_1;
83int charset_unicode;
84int charset_emacs;
85
86/* The other special charsets. */
87int charset_jisx0201_roman;
88int charset_jisx0208_1978;
89int charset_jisx0208;
90
91/* Value of charset attribute `charset-iso-plane'. */
92Lisp_Object Qgl, Qgr;
93
94/* Charset of unibyte characters. */
95int charset_unibyte;
96
97/* List of charsets ordered by the priority. */
98Lisp_Object Vcharset_ordered_list;
99
100/* Sub-list of Vcharset_ordered_list that contains all non-preferred
101 charsets. */
102Lisp_Object Vcharset_non_preferred_head;
103
104/* Incremented everytime we change Vcharset_ordered_list. This is
105 unsigned short so that it fits in Lisp_Int and never matches
106 -1. */
107unsigned short charset_ordered_list_tick;
108
109/* List of iso-2022 charsets. */
110Lisp_Object Viso_2022_charset_list;
111
112/* List of emacs-mule charsets. */
113Lisp_Object Vemacs_mule_charset_list;
114
115struct charset *emacs_mule_charset[256];
116
117/* Mapping table from ISO2022's charset (specified by DIMENSION,
118 CHARS, and FINAL-CHAR) to Emacs' charset. */
119int iso_charset_table[ISO_MAX_DIMENSION][ISO_MAX_CHARS][ISO_MAX_FINAL];
120
121Lisp_Object Vcharset_map_path;
122
123/* If nonzero, don't load charset maps. */
124int inhibit_load_charset_map;
125
126Lisp_Object Vcurrent_iso639_language;
127
128/* Defined in chartab.c */
129extern void
130map_char_table_for_charset P_ ((void (*c_function) (Lisp_Object, Lisp_Object),
131 Lisp_Object function, Lisp_Object table,
132 Lisp_Object arg, struct charset *charset,
133 unsigned from, unsigned to));
134
135#define CODE_POINT_TO_INDEX(charset, code) \
136 ((charset)->code_linear_p \
137 ? (code) - (charset)->min_code \
138 : (((charset)->code_space_mask[(code) >> 24] & 0x8) \
139 && ((charset)->code_space_mask[((code) >> 16) & 0xFF] & 0x4) \
140 && ((charset)->code_space_mask[((code) >> 8) & 0xFF] & 0x2) \
141 && ((charset)->code_space_mask[(code) & 0xFF] & 0x1)) \
142 ? (((((code) >> 24) - (charset)->code_space[12]) \
143 * (charset)->code_space[11]) \
144 + (((((code) >> 16) & 0xFF) - (charset)->code_space[8]) \
145 * (charset)->code_space[7]) \
146 + (((((code) >> 8) & 0xFF) - (charset)->code_space[4]) \
147 * (charset)->code_space[3]) \
148 + (((code) & 0xFF) - (charset)->code_space[0]) \
149 - ((charset)->char_index_offset)) \
150 : -1)
151
152
153/* Convert the character index IDX to code-point CODE for CHARSET.
154 It is assumed that IDX is in a valid range. */
155
156#define INDEX_TO_CODE_POINT(charset, idx) \
157 ((charset)->code_linear_p \
158 ? (idx) + (charset)->min_code \
159 : (idx += (charset)->char_index_offset, \
160 (((charset)->code_space[0] + (idx) % (charset)->code_space[2]) \
161 | (((charset)->code_space[4] \
162 + ((idx) / (charset)->code_space[3] % (charset)->code_space[6])) \
163 << 8) \
164 | (((charset)->code_space[8] \
165 + ((idx) / (charset)->code_space[7] % (charset)->code_space[10])) \
166 << 16) \
167 | (((charset)->code_space[12] + ((idx) / (charset)->code_space[11])) \
168 << 24))))
169
170/* Structure to hold mapping tables for a charset. Used by temacs
171 invoked for dumping. */
172
173static struct
174{
175 /* The current charset for which the following tables are setup. */
176 struct charset *current;
177
178 /* 1 iff the following table is used for encoder. */
179 short for_encoder;
180
181 /* When the following table is used for encoding, mininum and
182 maxinum character of the current charset. */
183 int min_char, max_char;
184
185 /* A Unicode character correspoinding to the code indice 0 (i.e. the
186 minimum code-point) of the current charset, or -1 if the code
187 indice 0 is not a Unicode character. This is checked when
188 table.encoder[CHAR] is zero. */
189 int zero_index_char;
190
191 union {
192 /* Table mapping code-indices (not code-points) of the current
193 charset to Unicode characters. If decoder[CHAR] is -1, CHAR
194 doesn't belong to the current charset. */
195 int decoder[0x10000];
196 /* Table mapping Unicode characters to code-indices of the current
197 charset. The first 0x10000 elements are for BMP (0..0xFFFF),
198 and the last 0x10000 are for SMP (0x10000..0x1FFFF) or SIP
199 (0x20000..0x2FFFF). Note that there is no charset map that
200 uses both SMP and SIP. */
201 unsigned short encoder[0x20000];
202 } table;
203} *temp_charset_work;
204
205#define SET_TEMP_CHARSET_WORK_ENCODER(C, CODE) \
206 do { \
207 if ((CODE) == 0) \
208 temp_charset_work->zero_index_char = (C); \
209 else if ((C) < 0x20000) \
210 temp_charset_work->table.encoder[(C)] = (CODE); \
211 else \
212 temp_charset_work->table.encoder[(C) - 0x10000] = (CODE); \
213 } while (0)
214
215#define GET_TEMP_CHARSET_WORK_ENCODER(C) \
216 ((C) == temp_charset_work->zero_index_char ? 0 \
217 : (C) < 0x20000 ? (temp_charset_work->table.encoder[(C)] \
218 ? (int) temp_charset_work->table.encoder[(C)] : -1) \
219 : temp_charset_work->table.encoder[(C) - 0x10000] \
220 ? temp_charset_work->table.encoder[(C) - 0x10000] : -1)
221
222#define SET_TEMP_CHARSET_WORK_DECODER(C, CODE) \
223 (temp_charset_work->table.decoder[(CODE)] = (C))
224
225#define GET_TEMP_CHARSET_WORK_DECODER(CODE) \
226 (temp_charset_work->table.decoder[(CODE)])
227\f
228
229/* Set to 1 to warn that a charset map is loaded and thus a buffer
230 text and a string data may be relocated. */
231int charset_map_loaded;
232
233struct charset_map_entries
234{
235 struct {
236 unsigned from, to;
237 int c;
238 } entry[0x10000];
239 struct charset_map_entries *next;
240};
241
242/* Load the mapping information of CHARSET from ENTRIES for
243 initializing (CONTROL_FLAG == 0), decoding (CONTROL_FLAG == 1), and
244 encoding (CONTROL_FLAG == 2).
245
246 If CONTROL_FLAG is 0, setup CHARSET->min_char, CHARSET->max_char,
247 and CHARSET->fast_map.
248
249 If CONTROL_FLAG is 1, setup the following tables according to
250 CHARSET->method and inhibit_load_charset_map.
251
252 CHARSET->method | inhibit_lcm == 0 | inhibit_lcm == 1
253 ----------------------+--------------------+---------------------------
254 CHARSET_METHOD_MAP | CHARSET->decoder | temp_charset_work->decoder
255 ----------------------+--------------------+---------------------------
256 CHARSET_METHOD_OFFSET | Vchar_unify_table | temp_charset_work->decoder
257
258 If CONTROL_FLAG is 2, setup the following tables.
259
260 CHARSET->method | inhibit_lcm == 0 | inhibit_lcm == 1
261 ----------------------+--------------------+---------------------------
262 CHARSET_METHOD_MAP | CHARSET->encoder | temp_charset_work->encoder
263 ----------------------+--------------------+--------------------------
264 CHARSET_METHOD_OFFSET | CHARSET->deunifier | temp_charset_work->encoder
265*/
266
267static void
268load_charset_map (charset, entries, n_entries, control_flag)
269 struct charset *charset;
270 struct charset_map_entries *entries;
271 int n_entries;
272 int control_flag;
273{
274 Lisp_Object vec, table;
275 unsigned max_code = CHARSET_MAX_CODE (charset);
276 int ascii_compatible_p = charset->ascii_compatible_p;
277 int min_char, max_char, nonascii_min_char;
278 int i;
279 unsigned char *fast_map = charset->fast_map;
280
281 if (n_entries <= 0)
282 return;
283
284 if (control_flag)
285 {
286 if (! inhibit_load_charset_map)
287 {
288 if (control_flag == 1)
289 {
290 if (charset->method == CHARSET_METHOD_MAP)
291 {
292 int n = CODE_POINT_TO_INDEX (charset, max_code) + 1;
293
294 vec = CHARSET_DECODER (charset)
295 = Fmake_vector (make_number (n), make_number (-1));
296 }
297 else
298 {
299 char_table_set_range (Vchar_unify_table,
300 charset->min_char, charset->max_char,
301 Qnil);
302 }
303 }
304 else
305 {
306 table = Fmake_char_table (Qnil, Qnil);
307 if (charset->method == CHARSET_METHOD_MAP)
308 CHARSET_ENCODER (charset) = table;
309 else
310 CHARSET_DEUNIFIER (charset) = table;
311 }
312 }
313 else
314 {
315 if (! temp_charset_work)
316 temp_charset_work = malloc (sizeof (*temp_charset_work));
317 if (control_flag == 1)
318 {
319 memset (temp_charset_work->table.decoder, -1,
320 sizeof (int) * 0x10000);
321 temp_charset_work->for_encoder = 0;
322 }
323 else
324 {
325 memset (temp_charset_work->table.encoder, 0,
326 sizeof (unsigned short) * 0x20000);
327 temp_charset_work->zero_index_char = -1;
328 }
329 temp_charset_work->current = charset;
330 temp_charset_work->for_encoder = (control_flag == 2);
331 control_flag += 2;
332 }
333 charset_map_loaded = 1;
334 }
335
336 min_char = max_char = entries->entry[0].c;
337 nonascii_min_char = MAX_CHAR;
338 for (i = 0; i < n_entries; i++)
339 {
340 unsigned from, to;
341 int from_index, to_index;
342 int from_c, to_c;
343 int idx = i % 0x10000;
344
345 if (i > 0 && idx == 0)
346 entries = entries->next;
347 from = entries->entry[idx].from;
348 to = entries->entry[idx].to;
349 from_c = entries->entry[idx].c;
350 from_index = CODE_POINT_TO_INDEX (charset, from);
351 if (from == to)
352 {
353 to_index = from_index;
354 to_c = from_c;
355 }
356 else
357 {
358 to_index = CODE_POINT_TO_INDEX (charset, to);
359 to_c = from_c + (to_index - from_index);
360 }
361 if (from_index < 0 || to_index < 0)
362 continue;
363
364 if (to_c > max_char)
365 max_char = to_c;
366 else if (from_c < min_char)
367 min_char = from_c;
368
369 if (control_flag == 1)
370 {
371 if (charset->method == CHARSET_METHOD_MAP)
372 for (; from_index <= to_index; from_index++, from_c++)
373 ASET (vec, from_index, make_number (from_c));
374 else
375 for (; from_index <= to_index; from_index++, from_c++)
376 CHAR_TABLE_SET (Vchar_unify_table,
377 CHARSET_CODE_OFFSET (charset) + from_index,
378 make_number (from_c));
379 }
380 else if (control_flag == 2)
381 {
382 if (charset->method == CHARSET_METHOD_MAP
383 && CHARSET_COMPACT_CODES_P (charset))
384 for (; from_index <= to_index; from_index++, from_c++)
385 {
386 unsigned code = INDEX_TO_CODE_POINT (charset, from_index);
387
388 if (NILP (CHAR_TABLE_REF (table, from_c)))
389 CHAR_TABLE_SET (table, from_c, make_number (code));
390 }
391 else
392 for (; from_index <= to_index; from_index++, from_c++)
393 {
394 if (NILP (CHAR_TABLE_REF (table, from_c)))
395 CHAR_TABLE_SET (table, from_c, make_number (from_index));
396 }
397 }
398 else if (control_flag == 3)
399 for (; from_index <= to_index; from_index++, from_c++)
400 SET_TEMP_CHARSET_WORK_DECODER (from_c, from_index);
401 else if (control_flag == 4)
402 for (; from_index <= to_index; from_index++, from_c++)
403 SET_TEMP_CHARSET_WORK_ENCODER (from_c, from_index);
404 else /* control_flag == 0 */
405 {
406 if (ascii_compatible_p)
407 {
408 if (! ASCII_BYTE_P (from_c))
409 {
410 if (from_c < nonascii_min_char)
411 nonascii_min_char = from_c;
412 }
413 else if (! ASCII_BYTE_P (to_c))
414 {
415 nonascii_min_char = 0x80;
416 }
417 }
418
419 for (; from_c <= to_c; from_c++)
420 CHARSET_FAST_MAP_SET (from_c, fast_map);
421 }
422 }
423
424 if (control_flag == 0)
425 {
426 CHARSET_MIN_CHAR (charset) = (ascii_compatible_p
427 ? nonascii_min_char : min_char);
428 CHARSET_MAX_CHAR (charset) = max_char;
429 }
430 else if (control_flag == 4)
431 {
432 temp_charset_work->min_char = min_char;
433 temp_charset_work->max_char = max_char;
434 }
435}
436
437
438/* Read a hexadecimal number (preceded by "0x") from the file FP while
439 paying attention to comment charcter '#'. */
440
441static INLINE unsigned
442read_hex (fp, eof)
443 FILE *fp;
444 int *eof;
445{
446 int c;
447 unsigned n;
448
449 while ((c = getc (fp)) != EOF)
450 {
451 if (c == '#')
452 {
453 while ((c = getc (fp)) != EOF && c != '\n');
454 }
455 else if (c == '0')
456 {
457 if ((c = getc (fp)) == EOF || c == 'x')
458 break;
459 }
460 }
461 if (c == EOF)
462 {
463 *eof = 1;
464 return 0;
465 }
466 *eof = 0;
467 n = 0;
468 if (c == 'x')
469 while ((c = getc (fp)) != EOF && isxdigit (c))
470 n = ((n << 4)
471 | (c <= '9' ? c - '0' : c <= 'F' ? c - 'A' + 10 : c - 'a' + 10));
472 else
473 while ((c = getc (fp)) != EOF && isdigit (c))
474 n = (n * 10) + c - '0';
475 if (c != EOF)
476 ungetc (c, fp);
477 return n;
478}
479
480
481/* Return a mapping vector for CHARSET loaded from MAPFILE.
482 Each line of MAPFILE has this form
483 0xAAAA 0xCCCC
484 where 0xAAAA is a code-point and 0xCCCC is the corresponding
485 character code, or this form
486 0xAAAA-0xBBBB 0xCCCC
487 where 0xAAAA and 0xBBBB are code-points specifying a range, and
488 0xCCCC is the first character code of the range.
489
490 The returned vector has this form:
491 [ CODE1 CHAR1 CODE2 CHAR2 .... ]
492 where CODE1 is a code-point or a cons of code-points specifying a
493 range. */
494
495extern void add_to_log P_ ((char *, Lisp_Object, Lisp_Object));
496
497static void
498load_charset_map_from_file (charset, mapfile, control_flag)
499 struct charset *charset;
500 Lisp_Object mapfile;
501 int control_flag;
502{
503 unsigned min_code = CHARSET_MIN_CODE (charset);
504 unsigned max_code = CHARSET_MAX_CODE (charset);
505 int fd;
506 FILE *fp;
507 int eof;
508 Lisp_Object suffixes;
509 struct charset_map_entries *head, *entries;
510 int n_entries;
511
512 suffixes = Fcons (build_string (".map"),
513 Fcons (build_string (".TXT"), Qnil));
514
515 fd = openp (Vcharset_map_path, mapfile, suffixes, NULL, Qnil);
516 if (fd < 0
517 || ! (fp = fdopen (fd, "r")))
518 {
519 add_to_log ("Failure in loading charset map: %S", mapfile, Qnil);
520 return;
521 }
522
523 head = entries = ((struct charset_map_entries *)
524 alloca (sizeof (struct charset_map_entries)));
525 n_entries = 0;
526 eof = 0;
527 while (1)
528 {
529 unsigned from, to;
530 int c;
531 int idx;
532
533 from = read_hex (fp, &eof);
534 if (eof)
535 break;
536 if (getc (fp) == '-')
537 to = read_hex (fp, &eof);
538 else
539 to = from;
540 c = (int) read_hex (fp, &eof);
541
542 if (from < min_code || to > max_code || from > to || c > MAX_CHAR)
543 continue;
544
545 if (n_entries > 0 && (n_entries % 0x10000) == 0)
546 {
547 entries->next = ((struct charset_map_entries *)
548 alloca (sizeof (struct charset_map_entries)));
549 entries = entries->next;
550 }
551 idx = n_entries % 0x10000;
552 entries->entry[idx].from = from;
553 entries->entry[idx].to = to;
554 entries->entry[idx].c = c;
555 n_entries++;
556 }
557 fclose (fp);
558 close (fd);
559
560 load_charset_map (charset, head, n_entries, control_flag);
561}
562
563static void
564load_charset_map_from_vector (charset, vec, control_flag)
565 struct charset *charset;
566 Lisp_Object vec;
567 int control_flag;
568{
569 unsigned min_code = CHARSET_MIN_CODE (charset);
570 unsigned max_code = CHARSET_MAX_CODE (charset);
571 struct charset_map_entries *head, *entries;
572 int n_entries;
573 int len = ASIZE (vec);
574 int i;
575
576 if (len % 2 == 1)
577 {
578 add_to_log ("Failure in loading charset map: %V", vec, Qnil);
579 return;
580 }
581
582 head = entries = ((struct charset_map_entries *)
583 alloca (sizeof (struct charset_map_entries)));
584 n_entries = 0;
585 for (i = 0; i < len; i += 2)
586 {
587 Lisp_Object val, val2;
588 unsigned from, to;
589 int c;
590 int idx;
591
592 val = AREF (vec, i);
593 if (CONSP (val))
594 {
595 val2 = XCDR (val);
596 val = XCAR (val);
597 CHECK_NATNUM (val);
598 CHECK_NATNUM (val2);
599 from = XFASTINT (val);
600 to = XFASTINT (val2);
601 }
602 else
603 {
604 CHECK_NATNUM (val);
605 from = to = XFASTINT (val);
606 }
607 val = AREF (vec, i + 1);
608 CHECK_NATNUM (val);
609 c = XFASTINT (val);
610
611 if (from < min_code || to > max_code || from > to || c > MAX_CHAR)
612 continue;
613
614 if (n_entries > 0 && (n_entries % 0x10000) == 0)
615 {
616 entries->next = ((struct charset_map_entries *)
617 alloca (sizeof (struct charset_map_entries)));
618 entries = entries->next;
619 }
620 idx = n_entries % 0x10000;
621 entries->entry[idx].from = from;
622 entries->entry[idx].to = to;
623 entries->entry[idx].c = c;
624 n_entries++;
625 }
626
627 load_charset_map (charset, head, n_entries, control_flag);
628}
629
630
631/* Load a mapping table for CHARSET. CONTROL-FLAG tells what kind of
632 map it is (see the comment of load_charset_map for the detail). */
633
634static void
635load_charset (charset, control_flag)
636 struct charset *charset;
637 int control_flag;
638{
639 Lisp_Object map;
640
641 if (inhibit_load_charset_map
642 && temp_charset_work
643 && charset == temp_charset_work->current
644 && (control_flag == 2 == temp_charset_work->for_encoder))
645 return;
646
647 if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP)
648 map = CHARSET_MAP (charset);
649 else if (CHARSET_UNIFIED_P (charset))
650 map = CHARSET_UNIFY_MAP (charset);
651 if (STRINGP (map))
652 load_charset_map_from_file (charset, map, control_flag);
653 else
654 load_charset_map_from_vector (charset, map, control_flag);
655}
656
657
658DEFUN ("charsetp", Fcharsetp, Scharsetp, 1, 1, 0,
659 doc: /* Return non-nil if and only if OBJECT is a charset.*/)
660 (object)
661 Lisp_Object object;
662{
663 return (CHARSETP (object) ? Qt : Qnil);
664}
665
666
667void map_charset_for_dump P_ ((void (*c_function) (Lisp_Object, Lisp_Object),
668 Lisp_Object function, Lisp_Object arg,
669 unsigned from, unsigned to));
670
671void
672map_charset_for_dump (c_function, function, arg, from, to)
673 void (*c_function) (Lisp_Object, Lisp_Object);
674 Lisp_Object function, arg;
675 unsigned from, to;
676{
677 int from_idx = CODE_POINT_TO_INDEX (temp_charset_work->current, from);
678 int to_idx = CODE_POINT_TO_INDEX (temp_charset_work->current, to);
679 Lisp_Object range;
680 int c, stop;
681 struct gcpro gcpro1;
682
683 range = Fcons (Qnil, Qnil);
684 GCPRO1 (range);
685
686 c = temp_charset_work->min_char;
687 stop = (temp_charset_work->max_char < 0x20000
688 ? temp_charset_work->max_char : 0xFFFF);
689
690 while (1)
691 {
692 int index = GET_TEMP_CHARSET_WORK_ENCODER (c);
693
694 if (index >= from_idx && index <= to_idx)
695 {
696 if (NILP (XCAR (range)))
697 XSETCAR (range, make_number (c));
698 }
699 else if (! NILP (XCAR (range)))
700 {
701 XSETCDR (range, make_number (c - 1));
702 if (c_function)
703 (*c_function) (arg, range);
704 else
705 call2 (function, range, arg);
706 XSETCAR (range, Qnil);
707 }
708 if (c == stop)
709 {
710 if (c == temp_charset_work->max_char)
711 {
712 if (! NILP (XCAR (range)))
713 {
714 XSETCDR (range, make_number (c));
715 if (c_function)
716 (*c_function) (arg, range);
717 else
718 call2 (function, range, arg);
719 }
720 break;
721 }
722 c = 0x1FFFF;
723 stop = temp_charset_work->max_char;
724 }
725 c++;
726 }
727}
728
729void
730map_charset_chars (c_function, function, arg,
731 charset, from, to)
732 void (*c_function) P_ ((Lisp_Object, Lisp_Object));
733 Lisp_Object function, arg;
734 struct charset *charset;
735 unsigned from, to;
736{
737 Lisp_Object range;
738 int partial;
739
740 partial = (from > CHARSET_MIN_CODE (charset)
741 || to < CHARSET_MAX_CODE (charset));
742
743 if (CHARSET_METHOD (charset) == CHARSET_METHOD_OFFSET)
744 {
745 int from_idx = CODE_POINT_TO_INDEX (charset, from);
746 int to_idx = CODE_POINT_TO_INDEX (charset, to);
747 int from_c = from_idx + CHARSET_CODE_OFFSET (charset);
748 int to_c = to_idx + CHARSET_CODE_OFFSET (charset);
749
750 if (CHARSET_UNIFIED_P (charset))
751 {
752 if (! CHAR_TABLE_P (CHARSET_DEUNIFIER (charset)))
753 load_charset (charset, 2);
754 if (CHAR_TABLE_P (CHARSET_DEUNIFIER (charset)))
755 map_char_table_for_charset (c_function, function,
756 CHARSET_DEUNIFIER (charset), arg,
757 partial ? charset : NULL, from, to);
758 else
759 map_charset_for_dump (c_function, function, arg, from, to);
760 }
761
762 range = Fcons (make_number (from_c), make_number (to_c));
763 if (NILP (function))
764 (*c_function) (arg, range);
765 else
766 call2 (function, range, arg);
767 }
768 else if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP)
769 {
770 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset)))
771 load_charset (charset, 2);
772 if (CHAR_TABLE_P (CHARSET_ENCODER (charset)))
773 map_char_table_for_charset (c_function, function,
774 CHARSET_ENCODER (charset), arg,
775 partial ? charset : NULL, from, to);
776 else
777 map_charset_for_dump (c_function, function, arg, from, to);
778 }
779 else if (CHARSET_METHOD (charset) == CHARSET_METHOD_SUBSET)
780 {
781 Lisp_Object subset_info;
782 int offset;
783
784 subset_info = CHARSET_SUBSET (charset);
785 charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
786 offset = XINT (AREF (subset_info, 3));
787 from -= offset;
788 if (from < XFASTINT (AREF (subset_info, 1)))
789 from = XFASTINT (AREF (subset_info, 1));
790 to -= offset;
791 if (to > XFASTINT (AREF (subset_info, 2)))
792 to = XFASTINT (AREF (subset_info, 2));
793 map_charset_chars (c_function, function, arg, charset, from, to);
794 }
795 else /* i.e. CHARSET_METHOD_SUPERSET */
796 {
797 Lisp_Object parents;
798
799 for (parents = CHARSET_SUPERSET (charset); CONSP (parents);
800 parents = XCDR (parents))
801 {
802 int offset;
803 unsigned this_from, this_to;
804
805 charset = CHARSET_FROM_ID (XFASTINT (XCAR (XCAR (parents))));
806 offset = XINT (XCDR (XCAR (parents)));
807 this_from = from - offset;
808 this_to = to - offset;
809 if (this_from < CHARSET_MIN_CODE (charset))
810 this_from = CHARSET_MIN_CODE (charset);
811 if (this_to > CHARSET_MAX_CODE (charset))
812 this_to = CHARSET_MAX_CODE (charset);
813 map_charset_chars (c_function, function, arg, charset,
814 this_from, this_to);
815 }
816 }
817}
818
819DEFUN ("map-charset-chars", Fmap_charset_chars, Smap_charset_chars, 2, 5, 0,
820 doc: /* Call FUNCTION for all characters in CHARSET.
821FUNCTION is called with an argument RANGE and the optional 3rd
822argument ARG.
823
824RANGE is a cons (FROM . TO), where FROM and TO indicate a range of
825characters contained in CHARSET.
826
827The optional 4th and 5th arguments FROM-CODE and TO-CODE specify the
828range of code points of target characters. */)
829 (function, charset, arg, from_code, to_code)
830 Lisp_Object function, charset, arg, from_code, to_code;
831{
832 struct charset *cs;
833 unsigned from, to;
834
835 CHECK_CHARSET_GET_CHARSET (charset, cs);
836 if (NILP (from_code))
837 from = CHARSET_MIN_CODE (cs);
838 else
839 {
840 CHECK_NATNUM (from_code);
841 from = XINT (from_code);
842 if (from < CHARSET_MIN_CODE (cs))
843 from = CHARSET_MIN_CODE (cs);
844 }
845 if (NILP (to_code))
846 to = CHARSET_MAX_CODE (cs);
847 else
848 {
849 CHECK_NATNUM (to_code);
850 to = XINT (to_code);
851 if (to > CHARSET_MAX_CODE (cs))
852 to = CHARSET_MAX_CODE (cs);
853 }
854 map_charset_chars (NULL, function, arg, cs, from, to);
855 return Qnil;
856}
857
858
859/* Define a charset according to the arguments. The Nth argument is
860 the Nth attribute of the charset (the last attribute `charset-id'
861 is not included). See the docstring of `define-charset' for the
862 detail. */
863
864DEFUN ("define-charset-internal", Fdefine_charset_internal,
865 Sdefine_charset_internal, charset_arg_max, MANY, 0,
866 doc: /* For internal use only.
867usage: (define-charset-internal ...) */)
868 (nargs, args)
869 int nargs;
870 Lisp_Object *args;
871{
872 /* Charset attr vector. */
873 Lisp_Object attrs;
874 Lisp_Object val;
875 unsigned hash_code;
876 struct Lisp_Hash_Table *hash_table = XHASH_TABLE (Vcharset_hash_table);
877 int i, j;
878 struct charset charset;
879 int id;
880 int dimension;
881 int new_definition_p;
882 int nchars;
883
884 if (nargs != charset_arg_max)
885 return Fsignal (Qwrong_number_of_arguments,
886 Fcons (intern ("define-charset-internal"),
887 make_number (nargs)));
888
889 attrs = Fmake_vector (make_number (charset_attr_max), Qnil);
890
891 CHECK_SYMBOL (args[charset_arg_name]);
892 ASET (attrs, charset_name, args[charset_arg_name]);
893
894 val = args[charset_arg_code_space];
895 for (i = 0, dimension = 0, nchars = 1; i < 4; i++)
896 {
897 int min_byte, max_byte;
898
899 min_byte = XINT (Faref (val, make_number (i * 2)));
900 max_byte = XINT (Faref (val, make_number (i * 2 + 1)));
901 if (min_byte < 0 || min_byte > max_byte || max_byte >= 256)
902 error ("Invalid :code-space value");
903 charset.code_space[i * 4] = min_byte;
904 charset.code_space[i * 4 + 1] = max_byte;
905 charset.code_space[i * 4 + 2] = max_byte - min_byte + 1;
906 nchars *= charset.code_space[i * 4 + 2];
907 charset.code_space[i * 4 + 3] = nchars;
908 if (max_byte > 0)
909 dimension = i + 1;
910 }
911
912 val = args[charset_arg_dimension];
913 if (NILP (val))
914 charset.dimension = dimension;
915 else
916 {
917 CHECK_NATNUM (val);
918 charset.dimension = XINT (val);
919 if (charset.dimension < 1 || charset.dimension > 4)
920 args_out_of_range_3 (val, make_number (1), make_number (4));
921 }
922
923 charset.code_linear_p
924 = (charset.dimension == 1
925 || (charset.code_space[2] == 256
926 && (charset.dimension == 2
927 || (charset.code_space[6] == 256
928 && (charset.dimension == 3
929 || charset.code_space[10] == 256)))));
930
931 if (! charset.code_linear_p)
932 {
933 charset.code_space_mask = (unsigned char *) xmalloc (256);
934 bzero (charset.code_space_mask, 256);
935 for (i = 0; i < 4; i++)
936 for (j = charset.code_space[i * 4]; j <= charset.code_space[i * 4 + 1];
937 j++)
938 charset.code_space_mask[j] |= (1 << i);
939 }
940
941 charset.iso_chars_96 = charset.code_space[2] == 96;
942
943 charset.min_code = (charset.code_space[0]
944 | (charset.code_space[4] << 8)
945 | (charset.code_space[8] << 16)
946 | (charset.code_space[12] << 24));
947 charset.max_code = (charset.code_space[1]
948 | (charset.code_space[5] << 8)
949 | (charset.code_space[9] << 16)
950 | (charset.code_space[13] << 24));
951 charset.char_index_offset = 0;
952
953 val = args[charset_arg_min_code];
954 if (! NILP (val))
955 {
956 unsigned code;
957
958 if (INTEGERP (val))
959 code = XINT (val);
960 else
961 {
962 CHECK_CONS (val);
963 CHECK_NUMBER_CAR (val);
964 CHECK_NUMBER_CDR (val);
965 code = (XINT (XCAR (val)) << 16) | (XINT (XCDR (val)));
966 }
967 if (code < charset.min_code
968 || code > charset.max_code)
969 args_out_of_range_3 (make_number (charset.min_code),
970 make_number (charset.max_code), val);
971 charset.char_index_offset = CODE_POINT_TO_INDEX (&charset, code);
972 charset.min_code = code;
973 }
974
975 val = args[charset_arg_max_code];
976 if (! NILP (val))
977 {
978 unsigned code;
979
980 if (INTEGERP (val))
981 code = XINT (val);
982 else
983 {
984 CHECK_CONS (val);
985 CHECK_NUMBER_CAR (val);
986 CHECK_NUMBER_CDR (val);
987 code = (XINT (XCAR (val)) << 16) | (XINT (XCDR (val)));
988 }
989 if (code < charset.min_code
990 || code > charset.max_code)
991 args_out_of_range_3 (make_number (charset.min_code),
992 make_number (charset.max_code), val);
993 charset.max_code = code;
994 }
995
996 charset.compact_codes_p = charset.max_code < 0x10000;
997
998 val = args[charset_arg_invalid_code];
999 if (NILP (val))
1000 {
1001 if (charset.min_code > 0)
1002 charset.invalid_code = 0;
1003 else
1004 {
1005 XSETINT (val, charset.max_code + 1);
1006 if (XINT (val) == charset.max_code + 1)
1007 charset.invalid_code = charset.max_code + 1;
1008 else
1009 error ("Attribute :invalid-code must be specified");
1010 }
1011 }
1012 else
1013 {
1014 CHECK_NATNUM (val);
1015 charset.invalid_code = XFASTINT (val);
1016 }
1017
1018 val = args[charset_arg_iso_final];
1019 if (NILP (val))
1020 charset.iso_final = -1;
1021 else
1022 {
1023 CHECK_NUMBER (val);
1024 if (XINT (val) < '0' || XINT (val) > 127)
1025 error ("Invalid iso-final-char: %d", XINT (val));
1026 charset.iso_final = XINT (val);
1027 }
1028
1029 val = args[charset_arg_iso_revision];
1030 if (NILP (val))
1031 charset.iso_revision = -1;
1032 else
1033 {
1034 CHECK_NUMBER (val);
1035 if (XINT (val) > 63)
1036 args_out_of_range (make_number (63), val);
1037 charset.iso_revision = XINT (val);
1038 }
1039
1040 val = args[charset_arg_emacs_mule_id];
1041 if (NILP (val))
1042 charset.emacs_mule_id = -1;
1043 else
1044 {
1045 CHECK_NATNUM (val);
1046 if ((XINT (val) > 0 && XINT (val) <= 128) || XINT (val) >= 256)
1047 error ("Invalid emacs-mule-id: %d", XINT (val));
1048 charset.emacs_mule_id = XINT (val);
1049 }
1050
1051 charset.ascii_compatible_p = ! NILP (args[charset_arg_ascii_compatible_p]);
1052
1053 charset.supplementary_p = ! NILP (args[charset_arg_supplementary_p]);
1054
1055 charset.unified_p = 0;
1056
1057 bzero (charset.fast_map, sizeof (charset.fast_map));
1058
1059 if (! NILP (args[charset_arg_code_offset]))
1060 {
1061 val = args[charset_arg_code_offset];
1062 CHECK_NUMBER (val);
1063
1064 charset.method = CHARSET_METHOD_OFFSET;
1065 charset.code_offset = XINT (val);
1066
1067 i = CODE_POINT_TO_INDEX (&charset, charset.min_code);
1068 charset.min_char = i + charset.code_offset;
1069 i = CODE_POINT_TO_INDEX (&charset, charset.max_code);
1070 charset.max_char = i + charset.code_offset;
1071 if (charset.max_char > MAX_CHAR)
1072 error ("Unsupported max char: %d", charset.max_char);
1073
1074 i = (charset.min_char >> 7) << 7;
1075 for (; i < 0x10000 && i <= charset.max_char; i += 128)
1076 CHARSET_FAST_MAP_SET (i, charset.fast_map);
1077 i = (i >> 12) << 12;
1078 for (; i <= charset.max_char; i += 0x1000)
1079 CHARSET_FAST_MAP_SET (i, charset.fast_map);
1080 }
1081 else if (! NILP (args[charset_arg_map]))
1082 {
1083 val = args[charset_arg_map];
1084 ASET (attrs, charset_map, val);
1085 charset.method = CHARSET_METHOD_MAP;
1086 }
1087 else if (! NILP (args[charset_arg_subset]))
1088 {
1089 Lisp_Object parent;
1090 Lisp_Object parent_min_code, parent_max_code, parent_code_offset;
1091 struct charset *parent_charset;
1092
1093 val = args[charset_arg_subset];
1094 parent = Fcar (val);
1095 CHECK_CHARSET_GET_CHARSET (parent, parent_charset);
1096 parent_min_code = Fnth (make_number (1), val);
1097 CHECK_NATNUM (parent_min_code);
1098 parent_max_code = Fnth (make_number (2), val);
1099 CHECK_NATNUM (parent_max_code);
1100 parent_code_offset = Fnth (make_number (3), val);
1101 CHECK_NUMBER (parent_code_offset);
1102 val = Fmake_vector (make_number (4), Qnil);
1103 ASET (val, 0, make_number (parent_charset->id));
1104 ASET (val, 1, parent_min_code);
1105 ASET (val, 2, parent_max_code);
1106 ASET (val, 3, parent_code_offset);
1107 ASET (attrs, charset_subset, val);
1108
1109 charset.method = CHARSET_METHOD_SUBSET;
1110 /* Here, we just copy the parent's fast_map. It's not accurate,
1111 but at least it works for quickly detecting which character
1112 DOESN'T belong to this charset. */
1113 for (i = 0; i < 190; i++)
1114 charset.fast_map[i] = parent_charset->fast_map[i];
1115
1116 /* We also copy these for parents. */
1117 charset.min_char = parent_charset->min_char;
1118 charset.max_char = parent_charset->max_char;
1119 }
1120 else if (! NILP (args[charset_arg_superset]))
1121 {
1122 val = args[charset_arg_superset];
1123 charset.method = CHARSET_METHOD_SUPERSET;
1124 val = Fcopy_sequence (val);
1125 ASET (attrs, charset_superset, val);
1126
1127 charset.min_char = MAX_CHAR;
1128 charset.max_char = 0;
1129 for (; ! NILP (val); val = Fcdr (val))
1130 {
1131 Lisp_Object elt, car_part, cdr_part;
1132 int this_id, offset;
1133 struct charset *this_charset;
1134
1135 elt = Fcar (val);
1136 if (CONSP (elt))
1137 {
1138 car_part = XCAR (elt);
1139 cdr_part = XCDR (elt);
1140 CHECK_CHARSET_GET_ID (car_part, this_id);
1141 CHECK_NUMBER (cdr_part);
1142 offset = XINT (cdr_part);
1143 }
1144 else
1145 {
1146 CHECK_CHARSET_GET_ID (elt, this_id);
1147 offset = 0;
1148 }
1149 XSETCAR (val, Fcons (make_number (this_id), make_number (offset)));
1150
1151 this_charset = CHARSET_FROM_ID (this_id);
1152 if (charset.min_char > this_charset->min_char)
1153 charset.min_char = this_charset->min_char;
1154 if (charset.max_char < this_charset->max_char)
1155 charset.max_char = this_charset->max_char;
1156 for (i = 0; i < 190; i++)
1157 charset.fast_map[i] |= this_charset->fast_map[i];
1158 }
1159 }
1160 else
1161 error ("None of :code-offset, :map, :parents are specified");
1162
1163 val = args[charset_arg_unify_map];
1164 if (! NILP (val) && !STRINGP (val))
1165 CHECK_VECTOR (val);
1166 ASET (attrs, charset_unify_map, val);
1167
1168 CHECK_LIST (args[charset_arg_plist]);
1169 ASET (attrs, charset_plist, args[charset_arg_plist]);
1170
1171 charset.hash_index = hash_lookup (hash_table, args[charset_arg_name],
1172 &hash_code);
1173 if (charset.hash_index >= 0)
1174 {
1175 new_definition_p = 0;
1176 id = XFASTINT (CHARSET_SYMBOL_ID (args[charset_arg_name]));
1177 HASH_VALUE (hash_table, charset.hash_index) = attrs;
1178 }
1179 else
1180 {
1181 charset.hash_index = hash_put (hash_table, args[charset_arg_name], attrs,
1182 hash_code);
1183 if (charset_table_used == charset_table_size)
1184 {
1185 struct charset *new_table
1186 = (struct charset *) xmalloc (sizeof (struct charset)
1187 * (charset_table_size + 16));
1188 bcopy (charset_table, new_table,
1189 sizeof (struct charset) * charset_table_size);
1190 charset_table_size += 16;
1191 charset_table = new_table;
1192 }
1193 id = charset_table_used++;
1194 new_definition_p = 1;
1195 }
1196
1197 ASET (attrs, charset_id, make_number (id));
1198 charset.id = id;
1199 charset_table[id] = charset;
1200
1201 if (charset.method == CHARSET_METHOD_MAP)
1202 load_charset (&charset, 0);
1203
1204 if (charset.iso_final >= 0)
1205 {
1206 ISO_CHARSET_TABLE (charset.dimension, charset.iso_chars_96,
1207 charset.iso_final) = id;
1208 if (new_definition_p)
1209 Viso_2022_charset_list = nconc2 (Viso_2022_charset_list,
1210 Fcons (make_number (id), Qnil));
1211 if (ISO_CHARSET_TABLE (1, 0, 'J') == id)
1212 charset_jisx0201_roman = id;
1213 else if (ISO_CHARSET_TABLE (2, 0, '@') == id)
1214 charset_jisx0208_1978 = id;
1215 else if (ISO_CHARSET_TABLE (2, 0, 'B') == id)
1216 charset_jisx0208 = id;
1217 }
1218
1219 if (charset.emacs_mule_id >= 0)
1220 {
1221 emacs_mule_charset[charset.emacs_mule_id] = CHARSET_FROM_ID (id);
1222 if (charset.emacs_mule_id < 0xA0)
1223 emacs_mule_bytes[charset.emacs_mule_id] = charset.dimension + 1;
1224 else
1225 emacs_mule_bytes[charset.emacs_mule_id] = charset.dimension + 2;
1226 if (new_definition_p)
1227 Vemacs_mule_charset_list = nconc2 (Vemacs_mule_charset_list,
1228 Fcons (make_number (id), Qnil));
1229 }
1230
1231 if (new_definition_p)
1232 {
1233 Vcharset_list = Fcons (args[charset_arg_name], Vcharset_list);
1234 if (charset.supplementary_p)
1235 Vcharset_ordered_list = nconc2 (Vcharset_ordered_list,
1236 Fcons (make_number (id), Qnil));
1237 else
1238 {
1239 Lisp_Object tail;
1240
1241 for (tail = Vcharset_ordered_list; CONSP (tail); tail = XCDR (tail))
1242 {
1243 struct charset *cs = CHARSET_FROM_ID (XINT (XCAR (tail)));
1244
1245 if (cs->supplementary_p)
1246 break;
1247 }
1248 if (EQ (tail, Vcharset_ordered_list))
1249 Vcharset_ordered_list = Fcons (make_number (id),
1250 Vcharset_ordered_list);
1251 else if (NILP (tail))
1252 Vcharset_ordered_list = nconc2 (Vcharset_ordered_list,
1253 Fcons (make_number (id), Qnil));
1254 else
1255 {
1256 val = Fcons (XCAR (tail), XCDR (tail));
1257 XSETCDR (tail, val);
1258 XSETCAR (tail, make_number (id));
1259 }
1260 }
1261 charset_ordered_list_tick++;
1262 }
1263
1264 return Qnil;
1265}
1266
1267
1268/* Same as Fdefine_charset_internal but arguments are more convenient
1269 to call from C (typically in syms_of_charset). This can define a
1270 charset of `offset' method only. Return the ID of the new
1271 charset. */
1272
1273static int
1274define_charset_internal (name, dimension, code_space, min_code, max_code,
1275 iso_final, iso_revision, emacs_mule_id,
1276 ascii_compatible, supplementary,
1277 code_offset)
1278 Lisp_Object name;
1279 int dimension;
1280 unsigned char *code_space;
1281 unsigned min_code, max_code;
1282 int iso_final, iso_revision, emacs_mule_id;
1283 int ascii_compatible, supplementary;
1284 int code_offset;
1285{
1286 Lisp_Object args[charset_arg_max];
1287 Lisp_Object plist[14];
1288 Lisp_Object val;
1289 int i;
1290
1291 args[charset_arg_name] = name;
1292 args[charset_arg_dimension] = make_number (dimension);
1293 val = Fmake_vector (make_number (8), make_number (0));
1294 for (i = 0; i < 8; i++)
1295 ASET (val, i, make_number (code_space[i]));
1296 args[charset_arg_code_space] = val;
1297 args[charset_arg_min_code] = make_number (min_code);
1298 args[charset_arg_max_code] = make_number (max_code);
1299 args[charset_arg_iso_final]
1300 = (iso_final < 0 ? Qnil : make_number (iso_final));
1301 args[charset_arg_iso_revision] = make_number (iso_revision);
1302 args[charset_arg_emacs_mule_id]
1303 = (emacs_mule_id < 0 ? Qnil : make_number (emacs_mule_id));
1304 args[charset_arg_ascii_compatible_p] = ascii_compatible ? Qt : Qnil;
1305 args[charset_arg_supplementary_p] = supplementary ? Qt : Qnil;
1306 args[charset_arg_invalid_code] = Qnil;
1307 args[charset_arg_code_offset] = make_number (code_offset);
1308 args[charset_arg_map] = Qnil;
1309 args[charset_arg_subset] = Qnil;
1310 args[charset_arg_superset] = Qnil;
1311 args[charset_arg_unify_map] = Qnil;
1312
1313 plist[0] = intern (":name");
1314 plist[1] = args[charset_arg_name];
1315 plist[2] = intern (":dimension");
1316 plist[3] = args[charset_arg_dimension];
1317 plist[4] = intern (":code-space");
1318 plist[5] = args[charset_arg_code_space];
1319 plist[6] = intern (":iso-final-char");
1320 plist[7] = args[charset_arg_iso_final];
1321 plist[8] = intern (":emacs-mule-id");
1322 plist[9] = args[charset_arg_emacs_mule_id];
1323 plist[10] = intern (":ascii-compatible-p");
1324 plist[11] = args[charset_arg_ascii_compatible_p];
1325 plist[12] = intern (":code-offset");
1326 plist[13] = args[charset_arg_code_offset];
1327
1328 args[charset_arg_plist] = Flist (14, plist);
1329 Fdefine_charset_internal (charset_arg_max, args);
1330
1331 return XINT (CHARSET_SYMBOL_ID (name));
1332}
1333
1334
1335DEFUN ("define-charset-alias", Fdefine_charset_alias,
1336 Sdefine_charset_alias, 2, 2, 0,
1337 doc: /* Define ALIAS as an alias for charset CHARSET. */)
1338 (alias, charset)
1339 Lisp_Object alias, charset;
1340{
1341 Lisp_Object attr;
1342
1343 CHECK_CHARSET_GET_ATTR (charset, attr);
1344 Fputhash (alias, attr, Vcharset_hash_table);
1345 Vcharset_list = Fcons (alias, Vcharset_list);
1346 return Qnil;
1347}
1348
1349
1350DEFUN ("charset-plist", Fcharset_plist, Scharset_plist, 1, 1, 0,
1351 doc: /* Return the property list of CHARSET. */)
1352 (charset)
1353 Lisp_Object charset;
1354{
1355 Lisp_Object attrs;
1356
1357 CHECK_CHARSET_GET_ATTR (charset, attrs);
1358 return CHARSET_ATTR_PLIST (attrs);
1359}
1360
1361
1362DEFUN ("set-charset-plist", Fset_charset_plist, Sset_charset_plist, 2, 2, 0,
1363 doc: /* Set CHARSET's property list to PLIST. */)
1364 (charset, plist)
1365 Lisp_Object charset, plist;
1366{
1367 Lisp_Object attrs;
1368
1369 CHECK_CHARSET_GET_ATTR (charset, attrs);
1370 CHARSET_ATTR_PLIST (attrs) = plist;
1371 return plist;
1372}
1373
1374
1375DEFUN ("unify-charset", Funify_charset, Sunify_charset, 1, 3, 0,
1376 doc: /* Unify characters of CHARSET with Unicode.
1377This means reading the relevant file and installing the table defined
1378by CHARSET's `:unify-map' property.
1379
1380Optional second arg UNIFY-MAP is a file name string or a vector. It has
1381the same meaning as the `:unify-map' attribute in the function
1382`define-charset' (which see).
1383
1384Optional third argument DEUNIFY, if non-nil, means to de-unify CHARSET. */)
1385 (charset, unify_map, deunify)
1386 Lisp_Object charset, unify_map, deunify;
1387{
1388 int id;
1389 struct charset *cs;
1390
1391 CHECK_CHARSET_GET_ID (charset, id);
1392 cs = CHARSET_FROM_ID (id);
1393 if (NILP (deunify)
1394 ? CHARSET_UNIFIED_P (cs) && ! NILP (CHARSET_DEUNIFIER (cs))
1395 : ! CHARSET_UNIFIED_P (cs))
1396 return Qnil;
1397
1398 CHARSET_UNIFIED_P (cs) = 0;
1399 if (NILP (deunify))
1400 {
1401 if (CHARSET_METHOD (cs) != CHARSET_METHOD_OFFSET
1402 || CHARSET_CODE_OFFSET (cs) < 0x110000)
1403 error ("Can't unify charset: %s", SDATA (SYMBOL_NAME (charset)));
1404 if (NILP (unify_map))
1405 unify_map = CHARSET_UNIFY_MAP (cs);
1406 else
1407 {
1408 if (! STRINGP (unify_map) && ! VECTORP (unify_map))
1409 signal_error ("Bad unify-map", unify_map);
1410 CHARSET_UNIFY_MAP (cs) = unify_map;
1411 }
1412 if (NILP (Vchar_unify_table))
1413 Vchar_unify_table = Fmake_char_table (Qnil, Qnil);
1414 char_table_set_range (Vchar_unify_table,
1415 cs->min_char, cs->max_char, charset);
1416 CHARSET_UNIFIED_P (cs) = 1;
1417 }
1418 else if (CHAR_TABLE_P (Vchar_unify_table))
1419 {
1420 int min_code = CHARSET_MIN_CODE (cs);
1421 int max_code = CHARSET_MAX_CODE (cs);
1422 int min_char = DECODE_CHAR (cs, min_code);
1423 int max_char = DECODE_CHAR (cs, max_code);
1424
1425 char_table_set_range (Vchar_unify_table, min_char, max_char, Qnil);
1426 }
1427
1428 return Qnil;
1429}
1430
1431DEFUN ("get-unused-iso-final-char", Fget_unused_iso_final_char,
1432 Sget_unused_iso_final_char, 2, 2, 0,
1433 doc: /*
1434Return an unused ISO final char for a charset of DIMENSION and CHARS.
1435DIMENSION is the number of bytes to represent a character: 1 or 2.
1436CHARS is the number of characters in a dimension: 94 or 96.
1437
1438This final char is for private use, thus the range is `0' (48) .. `?' (63).
1439If there's no unused final char for the specified kind of charset,
1440return nil. */)
1441 (dimension, chars)
1442 Lisp_Object dimension, chars;
1443{
1444 int final_char;
1445
1446 CHECK_NUMBER (dimension);
1447 CHECK_NUMBER (chars);
1448 if (XINT (dimension) != 1 && XINT (dimension) != 2 && XINT (dimension) != 3)
1449 args_out_of_range_3 (dimension, make_number (1), make_number (3));
1450 if (XINT (chars) != 94 && XINT (chars) != 96)
1451 args_out_of_range_3 (chars, make_number (94), make_number (96));
1452 for (final_char = '0'; final_char <= '?'; final_char++)
1453 if (ISO_CHARSET_TABLE (XINT (dimension), XINT (chars), final_char) < 0)
1454 break;
1455 return (final_char <= '?' ? make_number (final_char) : Qnil);
1456}
1457
1458static void
1459check_iso_charset_parameter (dimension, chars, final_char)
1460 Lisp_Object dimension, chars, final_char;
1461{
1462 CHECK_NATNUM (dimension);
1463 CHECK_NATNUM (chars);
1464 CHECK_NATNUM (final_char);
1465
1466 if (XINT (dimension) > 3)
1467 error ("Invalid DIMENSION %d, it should be 1, 2, or 3", XINT (dimension));
1468 if (XINT (chars) != 94 && XINT (chars) != 96)
1469 error ("Invalid CHARS %d, it should be 94 or 96", XINT (chars));
1470 if (XINT (final_char) < '0' || XINT (final_char) > '~')
1471 error ("Invalid FINAL-CHAR %c, it should be `0'..`~'", XINT (chars));
1472}
1473
1474
1475DEFUN ("declare-equiv-charset", Fdeclare_equiv_charset, Sdeclare_equiv_charset,
1476 4, 4, 0,
1477 doc: /* Declare an equivalent charset for ISO-2022 decoding.
1478
1479On decoding by an ISO-2022 base coding system, when a charset
1480specified by DIMENSION, CHARS, and FINAL-CHAR is designated, behave as
1481if CHARSET is designated instead. */)
1482 (dimension, chars, final_char, charset)
1483 Lisp_Object dimension, chars, final_char, charset;
1484{
1485 int id;
1486 int chars_flag;
1487
1488 CHECK_CHARSET_GET_ID (charset, id);
1489 check_iso_charset_parameter (dimension, chars, final_char);
1490 chars_flag = XINT (chars) == 96;
1491 ISO_CHARSET_TABLE (XINT (dimension), chars_flag, XINT (final_char)) = id;
1492 return Qnil;
1493}
1494
1495
1496/* Return information about charsets in the text at PTR of NBYTES
1497 bytes, which are NCHARS characters. The value is:
1498
1499 0: Each character is represented by one byte. This is always
1500 true for a unibyte string. For a multibyte string, true if
1501 it contains only ASCII characters.
1502
1503 1: No charsets other than ascii, control-1, and latin-1 are
1504 found.
1505
1506 2: Otherwise.
1507*/
1508
1509int
1510string_xstring_p (string)
1511 Lisp_Object string;
1512{
1513 const unsigned char *p = SDATA (string);
1514 const unsigned char *endp = p + SBYTES (string);
1515
1516 if (SCHARS (string) == SBYTES (string))
1517 return 0;
1518
1519 while (p < endp)
1520 {
1521 int c = STRING_CHAR_ADVANCE (p);
1522
1523 if (c >= 0x100)
1524 return 2;
1525 }
1526 return 1;
1527}
1528
1529
1530/* Find charsets in the string at PTR of NCHARS and NBYTES.
1531
1532 CHARSETS is a vector. If Nth element is non-nil, it means the
1533 charset whose id is N is already found.
1534
1535 It may lookup a translation table TABLE if supplied. */
1536
1537static void
1538find_charsets_in_text (ptr, nchars, nbytes, charsets, table, multibyte)
1539 const unsigned char *ptr;
1540 EMACS_INT nchars, nbytes;
1541 Lisp_Object charsets, table;
1542 int multibyte;
1543{
1544 const unsigned char *pend = ptr + nbytes;
1545
1546 if (nchars == nbytes)
1547 {
1548 if (multibyte)
1549 ASET (charsets, charset_ascii, Qt);
1550 else
1551 while (ptr < pend)
1552 {
1553 int c = *ptr++;
1554
1555 if (!NILP (table))
1556 c = translate_char (table, c);
1557 if (ASCII_BYTE_P (c))
1558 ASET (charsets, charset_ascii, Qt);
1559 else
1560 ASET (charsets, charset_eight_bit, Qt);
1561 }
1562 }
1563 else
1564 {
1565 while (ptr < pend)
1566 {
1567 int c = STRING_CHAR_ADVANCE (ptr);
1568 struct charset *charset;
1569
1570 if (!NILP (table))
1571 c = translate_char (table, c);
1572 charset = CHAR_CHARSET (c);
1573 ASET (charsets, CHARSET_ID (charset), Qt);
1574 }
1575 }
1576}
1577
1578DEFUN ("find-charset-region", Ffind_charset_region, Sfind_charset_region,
1579 2, 3, 0,
1580 doc: /* Return a list of charsets in the region between BEG and END.
1581BEG and END are buffer positions.
1582Optional arg TABLE if non-nil is a translation table to look up.
1583
1584If the current buffer is unibyte, the returned list may contain
1585only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
1586 (beg, end, table)
1587 Lisp_Object beg, end, table;
1588{
1589 Lisp_Object charsets;
1590 EMACS_INT from, from_byte, to, stop, stop_byte;
1591 int i;
1592 Lisp_Object val;
1593 int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1594
1595 validate_region (&beg, &end);
1596 from = XFASTINT (beg);
1597 stop = to = XFASTINT (end);
1598
1599 if (from < GPT && GPT < to)
1600 {
1601 stop = GPT;
1602 stop_byte = GPT_BYTE;
1603 }
1604 else
1605 stop_byte = CHAR_TO_BYTE (stop);
1606
1607 from_byte = CHAR_TO_BYTE (from);
1608
1609 charsets = Fmake_vector (make_number (charset_table_used), Qnil);
1610 while (1)
1611 {
1612 find_charsets_in_text (BYTE_POS_ADDR (from_byte), stop - from,
1613 stop_byte - from_byte, charsets, table,
1614 multibyte);
1615 if (stop < to)
1616 {
1617 from = stop, from_byte = stop_byte;
1618 stop = to, stop_byte = CHAR_TO_BYTE (stop);
1619 }
1620 else
1621 break;
1622 }
1623
1624 val = Qnil;
1625 for (i = charset_table_used - 1; i >= 0; i--)
1626 if (!NILP (AREF (charsets, i)))
1627 val = Fcons (CHARSET_NAME (charset_table + i), val);
1628 return val;
1629}
1630
1631DEFUN ("find-charset-string", Ffind_charset_string, Sfind_charset_string,
1632 1, 2, 0,
1633 doc: /* Return a list of charsets in STR.
1634Optional arg TABLE if non-nil is a translation table to look up.
1635
1636If STR is unibyte, the returned list may contain
1637only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
1638 (str, table)
1639 Lisp_Object str, table;
1640{
1641 Lisp_Object charsets;
1642 int i;
1643 Lisp_Object val;
1644
1645 CHECK_STRING (str);
1646
1647 charsets = Fmake_vector (make_number (charset_table_used), Qnil);
1648 find_charsets_in_text (SDATA (str), SCHARS (str), SBYTES (str),
1649 charsets, table,
1650 STRING_MULTIBYTE (str));
1651 val = Qnil;
1652 for (i = charset_table_used - 1; i >= 0; i--)
1653 if (!NILP (AREF (charsets, i)))
1654 val = Fcons (CHARSET_NAME (charset_table + i), val);
1655 return val;
1656}
1657
1658\f
1659
1660/* Return a unified character code for C (>= 0x110000). VAL is a
1661 value of Vchar_unify_table for C; i.e. it is nil, an integer, or a
1662 charset symbol. */
1663int
1664maybe_unify_char (c, val)
1665 int c;
1666 Lisp_Object val;
1667{
1668 struct charset *charset;
1669
1670 if (INTEGERP (val))
1671 return XINT (val);
1672 if (NILP (val))
1673 return c;
1674
1675 CHECK_CHARSET_GET_CHARSET (val, charset);
1676 load_charset (charset, 1);
1677 if (! inhibit_load_charset_map)
1678 {
1679 val = CHAR_TABLE_REF (Vchar_unify_table, c);
1680 if (! NILP (val))
1681 c = XINT (val);
1682 }
1683 else
1684 {
1685 int code_index = c - CHARSET_CODE_OFFSET (charset);
1686 int unified = GET_TEMP_CHARSET_WORK_DECODER (code_index);
1687
1688 if (unified > 0)
1689 c = unified;
1690 }
1691 return c;
1692}
1693
1694
1695/* Return a character correponding to the code-point CODE of
1696 CHARSET. */
1697
1698int
1699decode_char (charset, code)
1700 struct charset *charset;
1701 unsigned code;
1702{
1703 int c, char_index;
1704 enum charset_method method = CHARSET_METHOD (charset);
1705
1706 if (code < CHARSET_MIN_CODE (charset) || code > CHARSET_MAX_CODE (charset))
1707 return -1;
1708
1709 if (method == CHARSET_METHOD_SUBSET)
1710 {
1711 Lisp_Object subset_info;
1712
1713 subset_info = CHARSET_SUBSET (charset);
1714 charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
1715 code -= XINT (AREF (subset_info, 3));
1716 if (code < XFASTINT (AREF (subset_info, 1))
1717 || code > XFASTINT (AREF (subset_info, 2)))
1718 c = -1;
1719 else
1720 c = DECODE_CHAR (charset, code);
1721 }
1722 else if (method == CHARSET_METHOD_SUPERSET)
1723 {
1724 Lisp_Object parents;
1725
1726 parents = CHARSET_SUPERSET (charset);
1727 c = -1;
1728 for (; CONSP (parents); parents = XCDR (parents))
1729 {
1730 int id = XINT (XCAR (XCAR (parents)));
1731 int code_offset = XINT (XCDR (XCAR (parents)));
1732 unsigned this_code = code - code_offset;
1733
1734 charset = CHARSET_FROM_ID (id);
1735 if ((c = DECODE_CHAR (charset, this_code)) >= 0)
1736 break;
1737 }
1738 }
1739 else
1740 {
1741 char_index = CODE_POINT_TO_INDEX (charset, code);
1742 if (char_index < 0)
1743 return -1;
1744
1745 if (method == CHARSET_METHOD_MAP)
1746 {
1747 Lisp_Object decoder;
1748
1749 decoder = CHARSET_DECODER (charset);
1750 if (! VECTORP (decoder))
1751 {
1752 load_charset (charset, 1);
1753 decoder = CHARSET_DECODER (charset);
1754 }
1755 if (VECTORP (decoder))
1756 c = XINT (AREF (decoder, char_index));
1757 else
1758 c = GET_TEMP_CHARSET_WORK_DECODER (char_index);
1759 }
1760 else /* method == CHARSET_METHOD_OFFSET */
1761 {
1762 c = char_index + CHARSET_CODE_OFFSET (charset);
1763 if (CHARSET_UNIFIED_P (charset)
1764 && c > MAX_UNICODE_CHAR)
1765 MAYBE_UNIFY_CHAR (c);
1766 }
1767 }
1768
1769 return c;
1770}
1771
1772/* Variable used temporarily by the macro ENCODE_CHAR. */
1773Lisp_Object charset_work;
1774
1775/* Return a code-point of CHAR in CHARSET. If CHAR doesn't belong to
1776 CHARSET, return CHARSET_INVALID_CODE (CHARSET). If STRICT is true,
1777 use CHARSET's strict_max_char instead of max_char. */
1778
1779unsigned
1780encode_char (charset, c)
1781 struct charset *charset;
1782 int c;
1783{
1784 unsigned code;
1785 enum charset_method method = CHARSET_METHOD (charset);
1786
1787 if (CHARSET_UNIFIED_P (charset))
1788 {
1789 Lisp_Object deunifier, deunified;
1790 int code_index = -1;
1791
1792 deunifier = CHARSET_DEUNIFIER (charset);
1793 if (! CHAR_TABLE_P (deunifier))
1794 {
1795 load_charset (charset, 2);
1796 deunifier = CHARSET_DEUNIFIER (charset);
1797 }
1798 if (CHAR_TABLE_P (deunifier))
1799 {
1800 Lisp_Object deunified = CHAR_TABLE_REF (deunifier, c);
1801
1802 if (INTEGERP (deunified))
1803 code_index = XINT (deunified);
1804 }
1805 else
1806 {
1807 code_index = GET_TEMP_CHARSET_WORK_ENCODER (c);
1808 }
1809 if (code_index >= 0)
1810 c = CHARSET_CODE_OFFSET (charset) + code_index;
1811 }
1812
1813 if (method == CHARSET_METHOD_SUBSET)
1814 {
1815 Lisp_Object subset_info;
1816 struct charset *this_charset;
1817
1818 subset_info = CHARSET_SUBSET (charset);
1819 this_charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
1820 code = ENCODE_CHAR (this_charset, c);
1821 if (code == CHARSET_INVALID_CODE (this_charset)
1822 || code < XFASTINT (AREF (subset_info, 1))
1823 || code > XFASTINT (AREF (subset_info, 2)))
1824 return CHARSET_INVALID_CODE (charset);
1825 code += XINT (AREF (subset_info, 3));
1826 return code;
1827 }
1828
1829 if (method == CHARSET_METHOD_SUPERSET)
1830 {
1831 Lisp_Object parents;
1832
1833 parents = CHARSET_SUPERSET (charset);
1834 for (; CONSP (parents); parents = XCDR (parents))
1835 {
1836 int id = XINT (XCAR (XCAR (parents)));
1837 int code_offset = XINT (XCDR (XCAR (parents)));
1838 struct charset *this_charset = CHARSET_FROM_ID (id);
1839
1840 code = ENCODE_CHAR (this_charset, c);
1841 if (code != CHARSET_INVALID_CODE (this_charset))
1842 return code + code_offset;
1843 }
1844 return CHARSET_INVALID_CODE (charset);
1845 }
1846
1847 if (! CHARSET_FAST_MAP_REF ((c), charset->fast_map)
1848 || c < CHARSET_MIN_CHAR (charset) || c > CHARSET_MAX_CHAR (charset))
1849 return CHARSET_INVALID_CODE (charset);
1850
1851 if (method == CHARSET_METHOD_MAP)
1852 {
1853 Lisp_Object encoder;
1854 Lisp_Object val;
1855
1856 encoder = CHARSET_ENCODER (charset);
1857 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset)))
1858 load_charset (charset);
1859 if (CHAR_TABLE_P (CHARSET_ENCODER (charset)))
1860 {
1861 val = CHAR_TABLE_REF (encoder, c);
1862 if (NILP (val))
1863 return CHARSET_INVALID_CODE (charset);
1864 code = XINT (val);
1865 if (! CHARSET_COMPACT_CODES_P (charset))
1866 code = INDEX_TO_CODE_POINT (charset, code);
1867 }
1868 else
1869 {
1870 code = GET_TEMP_CHARSET_WORK_ENCODER (c);
1871 code = INDEX_TO_CODE_POINT (charset, code);
1872 }
1873 }
1874 else /* method == CHARSET_METHOD_OFFSET */
1875 {
1876 int code_index = c - CHARSET_CODE_OFFSET (charset);
1877
1878 code = INDEX_TO_CODE_POINT (charset, code_index);
1879 }
1880
1881 return code;
1882}
1883
1884
1885DEFUN ("decode-char", Fdecode_char, Sdecode_char, 2, 3, 0,
1886 doc: /* Decode the pair of CHARSET and CODE-POINT into a character.
1887Return nil if CODE-POINT is not valid in CHARSET.
1888
1889CODE-POINT may be a cons (HIGHER-16-BIT-VALUE . LOWER-16-BIT-VALUE).
1890
1891Optional argument RESTRICTION specifies a way to map the pair of CCS
1892and CODE-POINT to a character. Currently not supported and just ignored. */)
1893 (charset, code_point, restriction)
1894 Lisp_Object charset, code_point, restriction;
1895{
1896 int c, id;
1897 unsigned code;
1898 struct charset *charsetp;
1899
1900 CHECK_CHARSET_GET_ID (charset, id);
1901 if (CONSP (code_point))
1902 {
1903 CHECK_NATNUM_CAR (code_point);
1904 CHECK_NATNUM_CDR (code_point);
1905 code = (XINT (XCAR (code_point)) << 16) | (XINT (XCDR (code_point)));
1906 }
1907 else
1908 {
1909 CHECK_NATNUM (code_point);
1910 code = XINT (code_point);
1911 }
1912 charsetp = CHARSET_FROM_ID (id);
1913 c = DECODE_CHAR (charsetp, code);
1914 return (c >= 0 ? make_number (c) : Qnil);
1915}
1916
1917
1918DEFUN ("encode-char", Fencode_char, Sencode_char, 2, 3, 0,
1919 doc: /* Encode the character CH into a code-point of CHARSET.
1920Return nil if CHARSET doesn't include CH.
1921
1922Optional argument RESTRICTION specifies a way to map CH to a
1923code-point in CCS. Currently not supported and just ignored. */)
1924 (ch, charset, restriction)
1925 Lisp_Object ch, charset, restriction;
1926{
1927 int id;
1928 unsigned code;
1929 struct charset *charsetp;
1930
1931 CHECK_CHARSET_GET_ID (charset, id);
1932 CHECK_NATNUM (ch);
1933 charsetp = CHARSET_FROM_ID (id);
1934 code = ENCODE_CHAR (charsetp, XINT (ch));
1935 if (code == CHARSET_INVALID_CODE (charsetp))
1936 return Qnil;
1937 if (code > 0x7FFFFFF)
1938 return Fcons (make_number (code >> 16), make_number (code & 0xFFFF));
1939 return make_number (code);
1940}
1941
1942
1943DEFUN ("make-char", Fmake_char, Smake_char, 1, 5, 0,
1944 doc:
1945 /* Return a character of CHARSET whose position codes are CODEn.
1946
1947CODE1 through CODE4 are optional, but if you don't supply sufficient
1948position codes, it is assumed that the minimum code in each dimension
1949is specified. */)
1950 (charset, code1, code2, code3, code4)
1951 Lisp_Object charset, code1, code2, code3, code4;
1952{
1953 int id, dimension;
1954 struct charset *charsetp;
1955 unsigned code;
1956 int c;
1957
1958 CHECK_CHARSET_GET_ID (charset, id);
1959 charsetp = CHARSET_FROM_ID (id);
1960
1961 dimension = CHARSET_DIMENSION (charsetp);
1962 if (NILP (code1))
1963 code = (CHARSET_ASCII_COMPATIBLE_P (charsetp)
1964 ? 0 : CHARSET_MIN_CODE (charsetp));
1965 else
1966 {
1967 CHECK_NATNUM (code1);
1968 if (XFASTINT (code1) >= 0x100)
1969 args_out_of_range (make_number (0xFF), code1);
1970 code = XFASTINT (code1);
1971
1972 if (dimension > 1)
1973 {
1974 code <<= 8;
1975 if (NILP (code2))
1976 code |= charsetp->code_space[(dimension - 2) * 4];
1977 else
1978 {
1979 CHECK_NATNUM (code2);
1980 if (XFASTINT (code2) >= 0x100)
1981 args_out_of_range (make_number (0xFF), code2);
1982 code |= XFASTINT (code2);
1983 }
1984
1985 if (dimension > 2)
1986 {
1987 code <<= 8;
1988 if (NILP (code3))
1989 code |= charsetp->code_space[(dimension - 3) * 4];
1990 else
1991 {
1992 CHECK_NATNUM (code3);
1993 if (XFASTINT (code3) >= 0x100)
1994 args_out_of_range (make_number (0xFF), code3);
1995 code |= XFASTINT (code3);
1996 }
1997
1998 if (dimension > 3)
1999 {
2000 code <<= 8;
2001 if (NILP (code4))
2002 code |= charsetp->code_space[0];
2003 else
2004 {
2005 CHECK_NATNUM (code4);
2006 if (XFASTINT (code4) >= 0x100)
2007 args_out_of_range (make_number (0xFF), code4);
2008 code |= XFASTINT (code4);
2009 }
2010 }
2011 }
2012 }
2013 }
2014
2015 if (CHARSET_ISO_FINAL (charsetp) >= 0)
2016 code &= 0x7F7F7F7F;
2017 c = DECODE_CHAR (charsetp, code);
2018 if (c < 0)
2019 error ("Invalid code(s)");
2020 return make_number (c);
2021}
2022
2023
2024/* Return the first charset in CHARSET_LIST that contains C.
2025 CHARSET_LIST is a list of charset IDs. If it is nil, use
2026 Vcharset_ordered_list. */
2027
2028struct charset *
2029char_charset (c, charset_list, code_return)
2030 int c;
2031 Lisp_Object charset_list;
2032 unsigned *code_return;
2033{
2034 int maybe_null = 0;
2035
2036 if (NILP (charset_list))
2037 charset_list = Vcharset_ordered_list;
2038 else
2039 maybe_null = 1;
2040
2041 while (CONSP (charset_list))
2042 {
2043 struct charset *charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
2044 unsigned code = ENCODE_CHAR (charset, c);
2045
2046 if (code != CHARSET_INVALID_CODE (charset))
2047 {
2048 if (code_return)
2049 *code_return = code;
2050 return charset;
2051 }
2052 charset_list = XCDR (charset_list);
2053 if (c <= MAX_UNICODE_CHAR
2054 && EQ (charset_list, Vcharset_non_preferred_head))
2055 return CHARSET_FROM_ID (charset_unicode);
2056 }
2057 return (maybe_null ? NULL
2058 : c <= MAX_5_BYTE_CHAR ? CHARSET_FROM_ID (charset_emacs)
2059 : CHARSET_FROM_ID (charset_eight_bit));
2060}
2061
2062
2063DEFUN ("split-char", Fsplit_char, Ssplit_char, 1, 1, 0,
2064 doc:
2065 /*Return list of charset and one to four position-codes of CH.
2066The charset is decided by the current priority order of charsets.
2067A position-code is a byte value of each dimension of the code-point of
2068CH in the charset. */)
2069 (ch)
2070 Lisp_Object ch;
2071{
2072 struct charset *charset;
2073 int c, dimension;
2074 unsigned code;
2075 Lisp_Object val;
2076
2077 CHECK_CHARACTER (ch);
2078 c = XFASTINT (ch);
2079 charset = CHAR_CHARSET (c);
2080 if (! charset)
2081 abort ();
2082 code = ENCODE_CHAR (charset, c);
2083 if (code == CHARSET_INVALID_CODE (charset))
2084 abort ();
2085 dimension = CHARSET_DIMENSION (charset);
2086 for (val = Qnil; dimension > 0; dimension--)
2087 {
2088 val = Fcons (make_number (code & 0xFF), val);
2089 code >>= 8;
2090 }
2091 return Fcons (CHARSET_NAME (charset), val);
2092}
2093
2094
2095DEFUN ("char-charset", Fchar_charset, Schar_charset, 1, 1, 0,
2096 doc: /* Return the charset of highest priority that contains CH. */)
2097 (ch)
2098 Lisp_Object ch;
2099{
2100 struct charset *charset;
2101
2102 CHECK_CHARACTER (ch);
2103 charset = CHAR_CHARSET (XINT (ch));
2104 return (CHARSET_NAME (charset));
2105}
2106
2107
2108DEFUN ("charset-after", Fcharset_after, Scharset_after, 0, 1, 0,
2109 doc: /*
2110Return charset of a character in the current buffer at position POS.
2111If POS is nil, it defauls to the current point.
2112If POS is out of range, the value is nil. */)
2113 (pos)
2114 Lisp_Object pos;
2115{
2116 Lisp_Object ch;
2117 struct charset *charset;
2118
2119 ch = Fchar_after (pos);
2120 if (! INTEGERP (ch))
2121 return ch;
2122 charset = CHAR_CHARSET (XINT (ch));
2123 return (CHARSET_NAME (charset));
2124}
2125
2126
2127DEFUN ("iso-charset", Fiso_charset, Siso_charset, 3, 3, 0,
2128 doc: /*
2129Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR.
2130
2131ISO 2022's designation sequence (escape sequence) distinguishes charsets
2132by their DIMENSION, CHARS, and FINAL-CHAR,
2133whereas Emacs distinguishes them by charset symbol.
2134See the documentation of the function `charset-info' for the meanings of
2135DIMENSION, CHARS, and FINAL-CHAR. */)
2136 (dimension, chars, final_char)
2137 Lisp_Object dimension, chars, final_char;
2138{
2139 int id;
2140 int chars_flag;
2141
2142 check_iso_charset_parameter (dimension, chars, final_char);
2143 chars_flag = XFASTINT (chars) == 96;
2144 id = ISO_CHARSET_TABLE (XFASTINT (dimension), chars_flag,
2145 XFASTINT (final_char));
2146 return (id >= 0 ? CHARSET_NAME (CHARSET_FROM_ID (id)) : Qnil);
2147}
2148
2149
2150DEFUN ("clear-charset-maps", Fclear_charset_maps, Sclear_charset_maps,
2151 0, 0, 0,
2152 doc: /*
2153Internal use only.
2154Clear temporary charset mapping tables.
2155It should be called only from temacs invoked for dumping. */)
2156 ()
2157{
2158 int i;
2159 struct charset *charset;
2160 Lisp_Object attrs;
2161
2162 if (temp_charset_work)
2163 {
2164 free (temp_charset_work);
2165 temp_charset_work = NULL;
2166 }
2167
2168 if (CHAR_TABLE_P (Vchar_unify_table))
2169 Foptimize_char_table (Vchar_unify_table, Qnil);
2170
2171 return Qnil;
2172}
2173
2174DEFUN ("charset-priority-list", Fcharset_priority_list,
2175 Scharset_priority_list, 0, 1, 0,
2176 doc: /* Return the list of charsets ordered by priority.
2177HIGHESTP non-nil means just return the highest priority one. */)
2178 (highestp)
2179 Lisp_Object highestp;
2180{
2181 Lisp_Object val = Qnil, list = Vcharset_ordered_list;
2182
2183 if (!NILP (highestp))
2184 return CHARSET_NAME (CHARSET_FROM_ID (XINT (Fcar (list))));
2185
2186 while (!NILP (list))
2187 {
2188 val = Fcons (CHARSET_NAME (CHARSET_FROM_ID (XINT (XCAR (list)))), val);
2189 list = XCDR (list);
2190 }
2191 return Fnreverse (val);
2192}
2193
2194DEFUN ("set-charset-priority", Fset_charset_priority, Sset_charset_priority,
2195 1, MANY, 0,
2196 doc: /* Assign higher priority to the charsets given as arguments.
2197usage: (set-charset-priority &rest charsets) */)
2198 (nargs, args)
2199 int nargs;
2200 Lisp_Object *args;
2201{
2202 Lisp_Object new_head, old_list, arglist[2];
2203 Lisp_Object list_2022, list_emacs_mule;
2204 int i, id;
2205
2206 old_list = Fcopy_sequence (Vcharset_ordered_list);
2207 new_head = Qnil;
2208 for (i = 0; i < nargs; i++)
2209 {
2210 CHECK_CHARSET_GET_ID (args[i], id);
2211 if (! NILP (Fmemq (make_number (id), old_list)))
2212 {
2213 old_list = Fdelq (make_number (id), old_list);
2214 new_head = Fcons (make_number (id), new_head);
2215 }
2216 }
2217 arglist[0] = Fnreverse (new_head);
2218 arglist[1] = Vcharset_non_preferred_head = old_list;
2219 Vcharset_ordered_list = Fnconc (2, arglist);
2220 charset_ordered_list_tick++;
2221
2222 for (old_list = Vcharset_ordered_list, list_2022 = list_emacs_mule = Qnil;
2223 CONSP (old_list); old_list = XCDR (old_list))
2224 {
2225 if (! NILP (Fmemq (XCAR (old_list), Viso_2022_charset_list)))
2226 list_2022 = Fcons (XCAR (old_list), list_2022);
2227 if (! NILP (Fmemq (XCAR (old_list), Vemacs_mule_charset_list)))
2228 list_emacs_mule = Fcons (XCAR (old_list), list_emacs_mule);
2229 }
2230 Viso_2022_charset_list = Fnreverse (list_2022);
2231 Vemacs_mule_charset_list = Fnreverse (list_emacs_mule);
2232
2233 return Qnil;
2234}
2235
2236DEFUN ("charset-id-internal", Fcharset_id_internal, Scharset_id_internal,
2237 0, 1, 0,
2238 doc: /* Internal use only.
2239Return charset identification number of CHARSET. */)
2240 (charset)
2241 Lisp_Object charset;
2242{
2243 int id;
2244
2245 CHECK_CHARSET_GET_ID (charset, id);
2246 return make_number (id);
2247}
2248
2249\f
2250void
2251init_charset ()
2252{
2253 Lisp_Object tempdir;
2254 tempdir = Fexpand_file_name (build_string ("charsets"), Vdata_directory);
2255 if (access (SDATA (tempdir), 0) < 0)
2256 {
2257 dir_warning ("Error: charsets directory (%s) does not exist.\n\
2258Emacs will not function correctly without the character map files.\n\
2259Please check your installation!\n",
2260 tempdir);
2261 /* TODO should this be a fatal error? (Bug#909) */
2262 }
2263
2264 Vcharset_map_path = Fcons (tempdir, Qnil);
2265}
2266
2267
2268void
2269init_charset_once ()
2270{
2271 int i, j, k;
2272
2273 for (i = 0; i < ISO_MAX_DIMENSION; i++)
2274 for (j = 0; j < ISO_MAX_CHARS; j++)
2275 for (k = 0; k < ISO_MAX_FINAL; k++)
2276 iso_charset_table[i][j][k] = -1;
2277
2278 for (i = 0; i < 256; i++)
2279 emacs_mule_charset[i] = NULL;
2280
2281 charset_jisx0201_roman = -1;
2282 charset_jisx0208_1978 = -1;
2283 charset_jisx0208 = -1;
2284
2285 for (i = 0; i < 128; i++)
2286 unibyte_to_multibyte_table[i] = i;
2287 for (; i < 256; i++)
2288 unibyte_to_multibyte_table[i] = BYTE8_TO_CHAR (i);
2289}
2290
2291#ifdef emacs
2292
2293void
2294syms_of_charset ()
2295{
2296 DEFSYM (Qcharsetp, "charsetp");
2297
2298 DEFSYM (Qascii, "ascii");
2299 DEFSYM (Qunicode, "unicode");
2300 DEFSYM (Qemacs, "emacs");
2301 DEFSYM (Qeight_bit, "eight-bit");
2302 DEFSYM (Qiso_8859_1, "iso-8859-1");
2303
2304 DEFSYM (Qgl, "gl");
2305 DEFSYM (Qgr, "gr");
2306
2307 staticpro (&Vcharset_ordered_list);
2308 Vcharset_ordered_list = Qnil;
2309
2310 staticpro (&Viso_2022_charset_list);
2311 Viso_2022_charset_list = Qnil;
2312
2313 staticpro (&Vemacs_mule_charset_list);
2314 Vemacs_mule_charset_list = Qnil;
2315
2316 /* Don't staticpro them here. It's done in syms_of_fns. */
2317 QCtest = intern (":test");
2318 Qeq = intern ("eq");
2319
2320 staticpro (&Vcharset_hash_table);
2321 {
2322 Lisp_Object args[2];
2323 args[0] = QCtest;
2324 args[1] = Qeq;
2325 Vcharset_hash_table = Fmake_hash_table (2, args);
2326 }
2327
2328 charset_table_size = 128;
2329 charset_table = ((struct charset *)
2330 xmalloc (sizeof (struct charset) * charset_table_size));
2331 charset_table_used = 0;
2332
2333 defsubr (&Scharsetp);
2334 defsubr (&Smap_charset_chars);
2335 defsubr (&Sdefine_charset_internal);
2336 defsubr (&Sdefine_charset_alias);
2337 defsubr (&Scharset_plist);
2338 defsubr (&Sset_charset_plist);
2339 defsubr (&Sunify_charset);
2340 defsubr (&Sget_unused_iso_final_char);
2341 defsubr (&Sdeclare_equiv_charset);
2342 defsubr (&Sfind_charset_region);
2343 defsubr (&Sfind_charset_string);
2344 defsubr (&Sdecode_char);
2345 defsubr (&Sencode_char);
2346 defsubr (&Ssplit_char);
2347 defsubr (&Smake_char);
2348 defsubr (&Schar_charset);
2349 defsubr (&Scharset_after);
2350 defsubr (&Siso_charset);
2351 defsubr (&Sclear_charset_maps);
2352 defsubr (&Scharset_priority_list);
2353 defsubr (&Sset_charset_priority);
2354 defsubr (&Scharset_id_internal);
2355
2356 DEFVAR_LISP ("charset-map-path", &Vcharset_map_path,
2357 doc: /* *List of directories to search for charset map files. */);
2358 Vcharset_map_path = Qnil;
2359
2360 DEFVAR_BOOL ("inhibit-load-charset-map", &inhibit_load_charset_map,
2361 doc: /* Inhibit loading of charset maps. Used when dumping Emacs. */);
2362 inhibit_load_charset_map = 0;
2363
2364 DEFVAR_LISP ("charset-list", &Vcharset_list,
2365 doc: /* List of all charsets ever defined. */);
2366 Vcharset_list = Qnil;
2367
2368 DEFVAR_LISP ("current-iso639-language", &Vcurrent_iso639_language,
2369 doc: /* ISO639 language mnemonic symbol for the current language environment.
2370If the current language environment is for multiple languages (e.g. "Latin-1"),
2371the value may be a list of mnemonics. */);
2372 Vcurrent_iso639_language = Qnil;
2373
2374 charset_ascii
2375 = define_charset_internal (Qascii, 1, "\x00\x7F\x00\x00\x00\x00",
2376 0, 127, 'B', -1, 0, 1, 0, 0);
2377 charset_iso_8859_1
2378 = define_charset_internal (Qiso_8859_1, 1, "\x00\xFF\x00\x00\x00\x00",
2379 0, 255, -1, -1, -1, 1, 0, 0);
2380 charset_unicode
2381 = define_charset_internal (Qunicode, 3, "\x00\xFF\x00\xFF\x00\x10",
2382 0, MAX_UNICODE_CHAR, -1, 0, -1, 1, 0, 0);
2383 charset_emacs
2384 = define_charset_internal (Qemacs, 3, "\x00\xFF\x00\xFF\x00\x3F",
2385 0, MAX_5_BYTE_CHAR, -1, 0, -1, 1, 1, 0);
2386 charset_eight_bit
2387 = define_charset_internal (Qeight_bit, 1, "\x80\xFF\x00\x00\x00\x00",
2388 128, 255, -1, 0, -1, 0, 1,
2389 MAX_5_BYTE_CHAR + 1);
2390}
2391
2392#endif /* emacs */
2393
2394/* arch-tag: 66a89b8d-4c28-47d3-9ca1-56f78440d69f
2395 (do not change this comment) */