(CHECK_CHARSET_GET_ID): Use XINT on AREF result.
[bpt/emacs.git] / src / charset.c
CommitLineData
3263d5a2
KH
1/* Basic character set support.
2 Copyright (C) 1995, 97, 98, 2000, 2001 Electrotechnical Laboratory, JAPAN.
75c8c592 3 Licensed to the Free Software Foundation.
12bcae05 4 Copyright (C) 2001 Free Software Foundation, Inc.
3263d5a2
KH
5 Copyright (C) 2001, 2002
6 National Institute of Advanced Industrial Science and Technology (AIST)
7 Registration Number H13PRO009
4ed46869 8
369314dc
KH
9This file is part of GNU Emacs.
10
11GNU Emacs is free software; you can redistribute it and/or modify
12it under the terms of the GNU General Public License as published by
13the Free Software Foundation; either version 2, or (at your option)
14any later version.
4ed46869 15
369314dc
KH
16GNU Emacs is distributed in the hope that it will be useful,
17but WITHOUT ANY WARRANTY; without even the implied warranty of
18MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19GNU General Public License for more details.
4ed46869 20
369314dc
KH
21You should have received a copy of the GNU General Public License
22along with GNU Emacs; see the file COPYING. If not, write to
23the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
24Boston, MA 02111-1307, USA. */
4ed46869 25
68c45bf0
PE
26#ifdef emacs
27#include <config.h>
28#endif
29
4ed46869 30#include <stdio.h>
3263d5a2
KH
31#include <unistd.h>
32#include <ctype.h>
4ed46869
KH
33
34#ifdef emacs
35
36#include <sys/types.h>
4ed46869 37#include "lisp.h"
3263d5a2 38#include "character.h"
4ed46869
KH
39#include "charset.h"
40#include "coding.h"
fc6b09bf 41#include "disptab.h"
3263d5a2 42#include "buffer.h"
4ed46869
KH
43
44#else /* not emacs */
45
46#include "mulelib.h"
47
48#endif /* emacs */
49
4ed46869 50
04c2f2c5 51/*** GENERAL NOTES on CODED CHARACTER SETS (CHARSETS) ***
4ed46869 52
3263d5a2 53 A coded character set ("charset" hereafter) is a meaningful
04c2f2c5 54 collection (i.e. language, culture, functionality, etc.) of
3263d5a2 55 characters. Emacs handles multiple charsets at once. In Emacs Lisp
04c2f2c5
DL
56 code, a charset is represented by a symbol. In C code, a charset is
57 represented by its ID number or by a pointer to a struct charset.
4ed46869 58
3263d5a2
KH
59 The actual information about each charset is stored in two places.
60 Lispy information is stored in the hash table Vcharset_hash_table as
61 a vector (charset attributes). The other information is stored in
04c2f2c5 62 charset_table as a struct charset.
4ed46869 63
3263d5a2 64*/
4ed46869 65
3263d5a2
KH
66/* List of all charsets. This variable is used only from Emacs
67 Lisp. */
4ed46869
KH
68Lisp_Object Vcharset_list;
69
3263d5a2
KH
70/* Hash table that contains attributes of each charset. Keys are
71 charset symbols, and values are vectors of charset attributes. */
72Lisp_Object Vcharset_hash_table;
73
74/* Table of struct charset. */
75struct charset *charset_table;
76
77static int charset_table_size;
78int charset_table_used;
79
80Lisp_Object Qcharsetp;
81
82/* Special charset symbols. */
83Lisp_Object Qascii;
84Lisp_Object Qeight_bit_control;
85Lisp_Object Qeight_bit_graphic;
86Lisp_Object Qiso_8859_1;
87Lisp_Object Qunicode;
b0e3cf2b 88
3263d5a2
KH
89/* The corresponding charsets. */
90int charset_ascii;
91int charset_8_bit_control;
92int charset_8_bit_graphic;
93int charset_iso_8859_1;
94int charset_unicode;
c1a08b4c 95
7c7dceee
KH
96/* The other special charsets. */
97int charset_jisx0201_roman;
98int charset_jisx0208_1978;
99int charset_jisx0208;
100
3263d5a2
KH
101/* Value of charset attribute `charset-iso-plane'. */
102Lisp_Object Qgl, Qgr;
c1a08b4c 103
3263d5a2
KH
104/* The primary charset. It is a charset of unibyte characters. */
105int charset_primary;
106
107/* List of charsets ordered by the priority. */
108Lisp_Object Vcharset_ordered_list;
109
110/* List of iso-2022 charsets. */
111Lisp_Object Viso_2022_charset_list;
112
113/* List of emacs-mule charsets. */
114Lisp_Object Vemacs_mule_charset_list;
115
116struct charset *emacs_mule_charset[256];
4ed46869
KH
117
118/* Mapping table from ISO2022's charset (specified by DIMENSION,
119 CHARS, and FINAL-CHAR) to Emacs' charset. */
3263d5a2
KH
120int iso_charset_table[ISO_MAX_DIMENSION][ISO_MAX_CHARS][ISO_MAX_FINAL];
121
122Lisp_Object Vcharset_map_directory;
123
124Lisp_Object Vchar_unified_charset_table;
125
69f8de5b
KH
126#define CODE_POINT_TO_INDEX(charset, code) \
127 ((charset)->code_linear_p \
128 ? (code) - (charset)->min_code \
129 : (((charset)->code_space_mask[(code) >> 24] & 0x8) \
130 && ((charset)->code_space_mask[((code) >> 16) & 0xFF] & 0x4) \
131 && ((charset)->code_space_mask[((code) >> 8) & 0xFF] & 0x2) \
132 && ((charset)->code_space_mask[(code) & 0xFF] & 0x1)) \
133 ? (((((code) >> 24) - (charset)->code_space[12]) \
134 * (charset)->code_space[11]) \
135 + (((((code) >> 16) & 0xFF) - (charset)->code_space[8]) \
136 * (charset)->code_space[7]) \
137 + (((((code) >> 8) & 0xFF) - (charset)->code_space[4]) \
138 * (charset)->code_space[3]) \
820ee249
KH
139 + (((code) & 0xFF) - (charset)->code_space[0]) \
140 - ((charset)->char_index_offset)) \
3263d5a2
KH
141 : -1)
142
143
144/* Convert the character index IDX to code-point CODE for CHARSET.
145 It is assumed that IDX is in a valid range. */
146
820ee249
KH
147#define INDEX_TO_CODE_POINT(charset, idx) \
148 ((charset)->code_linear_p \
149 ? (idx) + (charset)->min_code \
150 : (idx += (charset)->char_index_offset, \
151 (((charset)->code_space[0] + (idx) % (charset)->code_space[2]) \
152 | (((charset)->code_space[4] \
153 + ((idx) / (charset)->code_space[3] % (charset)->code_space[6])) \
154 << 8) \
155 | (((charset)->code_space[8] \
156 + ((idx) / (charset)->code_space[7] % (charset)->code_space[10])) \
157 << 16) \
158 | (((charset)->code_space[12] + ((idx) / (charset)->code_space[11])) \
159 << 24))))
160
4ed46869 161
3263d5a2 162\f
4ed46869 163
e9ce014c
KH
164/* Set to 1 to warn that a charset map is loaded and thus a buffer
165 text and a string data may be relocated. */
3263d5a2 166int charset_map_loaded;
35e623fb 167
e9ce014c
KH
168struct charset_map_entries
169{
170 struct {
171 unsigned from, to;
172 int c;
173 } entry[0x10000];
174 struct charset_map_entries *next;
175};
176
177/* Load the mapping information for CHARSET from ENTRIES.
4cf9710d 178
3263d5a2 179 If CONTROL_FLAG is 0, setup CHARSET->min_char and CHARSET->max_char.
8a73a704 180
3263d5a2
KH
181 If CONTROL_FLAG is 1, setup CHARSET->min_char, CHARSET->max_char,
182 CHARSET->decoder, and CHARSET->encoder.
93bcb785 183
3263d5a2
KH
184 If CONTROL_FLAG is 2, setup CHARSET->deunifier and
185 Vchar_unify_table. If Vchar_unified_charset_table is non-nil,
186 setup it too. */
4ed46869 187
3263d5a2 188static void
e9ce014c 189load_charset_map (charset, entries, n_entries, control_flag)
3263d5a2 190 struct charset *charset;
e9ce014c
KH
191 struct charset_map_entries *entries;
192 int n_entries;
3263d5a2 193 int control_flag;
4ed46869 194{
3263d5a2 195 Lisp_Object vec, table;
3263d5a2
KH
196 unsigned max_code = CHARSET_MAX_CODE (charset);
197 int ascii_compatible_p = charset->ascii_compatible_p;
198 int min_char, max_char, nonascii_min_char;
3263d5a2 199 int i;
3263d5a2 200 unsigned char *fast_map = charset->fast_map;
99529c2c 201
e9ce014c
KH
202 if (n_entries <= 0)
203 return;
204
205 if (control_flag > 0)
8ac5a9cc 206 {
3263d5a2 207 int n = CODE_POINT_TO_INDEX (charset, max_code) + 1;
6662e69b 208
374c5cfd 209 table = Fmake_char_table (Qnil, Qnil);
3263d5a2
KH
210 if (control_flag == 1)
211 vec = Fmake_vector (make_number (n), make_number (-1));
212 else if (! CHAR_TABLE_P (Vchar_unify_table))
374c5cfd 213 Vchar_unify_table = Fmake_char_table (Qnil, Qnil);
6662e69b 214
3263d5a2 215 charset_map_loaded = 1;
2e344af3 216 }
3263d5a2 217
e9ce014c 218 min_char = max_char = entries->entry[0].c;
3263d5a2 219 nonascii_min_char = MAX_CHAR;
e9ce014c 220 for (i = 0; i < n_entries; i++)
2e344af3 221 {
e9ce014c 222 unsigned from, to;
3b4f4446
KH
223 int from_index, to_index;
224 int from_c, to_c;
e9ce014c 225 int idx = i % 0x10000;
3263d5a2 226
e9ce014c
KH
227 if (i > 0 && idx == 0)
228 entries = entries->next;
229 from = entries->entry[idx].from;
230 to = entries->entry[idx].to;
3b4f4446
KH
231 from_c = entries->entry[idx].c;
232 from_index = CODE_POINT_TO_INDEX (charset, from);
233 if (from == to)
234 {
235 to_index = from_index;
236 to_c = from_c;
237 }
238 else
239 {
240 to_index = CODE_POINT_TO_INDEX (charset, to);
241 to_c = from_c + (to_index - from_index);
242 }
243 if (from_index < 0 || to_index < 0)
244 continue;
3263d5a2 245
3263d5a2
KH
246 if (control_flag < 2)
247 {
3b4f4446
KH
248 int c;
249
250 if (to_c > max_char)
251 max_char = to_c;
252 else if (from_c < min_char)
253 min_char = from_c;
254 if (ascii_compatible_p)
255 {
256 if (! ASCII_BYTE_P (from_c))
257 {
258 if (from_c < nonascii_min_char)
259 nonascii_min_char = from_c;
260 }
261 else if (! ASCII_BYTE_P (to_c))
262 {
263 nonascii_min_char = 0x80;
264 }
265 }
266
267 for (c = from_c; c <= to_c; c++)
268 CHARSET_FAST_MAP_SET (c, fast_map);
269
e9ce014c 270 if (control_flag == 1)
3263d5a2 271 {
e9ce014c 272 unsigned code = from;
e9ce014c 273
e9ce014c
KH
274 if (CHARSET_COMPACT_CODES_P (charset))
275 while (1)
276 {
3b4f4446
KH
277 ASET (vec, from_index, make_number (from_c));
278 CHAR_TABLE_SET (table, from_c, make_number (code));
e9ce014c
KH
279 if (from_index == to_index)
280 break;
3b4f4446 281 from_index++, from_c++;
e9ce014c
KH
282 code = INDEX_TO_CODE_POINT (charset, from_index);
283 }
284 else
3b4f4446 285 for (; from_index <= to_index; from_index++, from_c++)
e9ce014c 286 {
3b4f4446
KH
287 ASET (vec, from_index, make_number (from_c));
288 CHAR_TABLE_SET (table, from_c, make_number (from_index));
e9ce014c 289 }
3263d5a2 290 }
3263d5a2 291 }
e9ce014c 292 else
2e344af3 293 {
69f8de5b 294 unsigned code = from;
3b4f4446 295
69f8de5b
KH
296 while (1)
297 {
298 int c1 = DECODE_CHAR (charset, code);
299
3263d5a2
KH
300 if (c1 >= 0)
301 {
3b4f4446
KH
302 CHAR_TABLE_SET (table, from_c, make_number (c1));
303 CHAR_TABLE_SET (Vchar_unify_table, c1, from_c);
3263d5a2
KH
304 if (CHAR_TABLE_P (Vchar_unified_charset_table))
305 CHAR_TABLE_SET (Vchar_unified_charset_table, c1,
306 CHARSET_NAME (charset));
307 }
69f8de5b
KH
308 if (from_index == to_index)
309 break;
3b4f4446 310 from_index++, from_c++;
69f8de5b 311 code = INDEX_TO_CODE_POINT (charset, from_index);
3263d5a2 312 }
2e344af3 313 }
8ac5a9cc 314 }
3263d5a2
KH
315
316 if (control_flag < 2)
4ed46869 317 {
3263d5a2
KH
318 CHARSET_MIN_CHAR (charset) = (ascii_compatible_p
319 ? nonascii_min_char : min_char);
320 CHARSET_MAX_CHAR (charset) = max_char;
e9ce014c 321 if (control_flag == 1)
4ed46869 322 {
3263d5a2
KH
323 CHARSET_DECODER (charset) = vec;
324 CHARSET_ENCODER (charset) = table;
4ed46869
KH
325 }
326 }
2e344af3 327 else
3263d5a2 328 CHARSET_DEUNIFIER (charset) = table;
4ed46869
KH
329}
330
12bcae05 331
3263d5a2
KH
332/* Read a hexadecimal number (preceded by "0x") from the file FP while
333 paying attention to comment charcter '#'. */
12bcae05 334
3263d5a2
KH
335static INLINE unsigned
336read_hex (fp, eof)
337 FILE *fp;
338 int *eof;
12bcae05 339{
3263d5a2
KH
340 int c;
341 unsigned n;
12bcae05 342
3263d5a2
KH
343 while ((c = getc (fp)) != EOF)
344 {
69f8de5b 345 if (c == '#')
3263d5a2
KH
346 {
347 while ((c = getc (fp)) != EOF && c != '\n');
348 }
349 else if (c == '0')
350 {
351 if ((c = getc (fp)) == EOF || c == 'x')
352 break;
353 }
354 }
355 if (c == EOF)
356 {
357 *eof = 1;
358 return 0;
359 }
360 *eof = 0;
361 n = 0;
362 if (c == 'x')
363 while ((c = getc (fp)) != EOF && isxdigit (c))
364 n = ((n << 4)
365 | (c <= '9' ? c - '0' : c <= 'F' ? c - 'A' + 10 : c - 'a' + 10));
366 else
367 while ((c = getc (fp)) != EOF && isdigit (c))
368 n = (n * 10) + c - '0';
e9ce014c
KH
369 if (c != EOF)
370 ungetc (c, fp);
3263d5a2
KH
371 return n;
372}
12bcae05 373
537efd8d 374
3263d5a2 375/* Return a mapping vector for CHARSET loaded from MAPFILE.
e9ce014c
KH
376 Each line of MAPFILE has this form
377 0xAAAA 0xCCCC
378 where 0xAAAA is a code-point and 0xCCCC is the corresponding
379 character code, or this form
380 0xAAAA-0xBBBB 0xCCCC
381 where 0xAAAA and 0xBBBB are code-points specifying a range, and
382 0xCCCC is the first character code of the range.
383
3263d5a2
KH
384 The returned vector has this form:
385 [ CODE1 CHAR1 CODE2 CHAR2 .... ]
e9ce014c
KH
386 where CODE1 is a code-point or a cons of code-points specifying a
387 range. */
4ed46869 388
c449997d
KH
389extern void add_to_log P_ ((char *, Lisp_Object, Lisp_Object));
390
e9ce014c
KH
391static void
392load_charset_map_from_file (charset, mapfile, control_flag)
3263d5a2
KH
393 struct charset *charset;
394 Lisp_Object mapfile;
e9ce014c 395 int control_flag;
4ed46869 396{
e9ce014c
KH
397 unsigned min_code = CHARSET_MIN_CODE (charset);
398 unsigned max_code = CHARSET_MAX_CODE (charset);
3263d5a2
KH
399 int fd;
400 FILE *fp;
3263d5a2
KH
401 int eof;
402 Lisp_Object suffixes;
e9ce014c
KH
403 struct charset_map_entries *head, *entries;
404 int n_entries;
4ed46869 405
3263d5a2
KH
406 suffixes = Fcons (build_string (".map"),
407 Fcons (build_string (".TXT"), Qnil));
4ed46869 408
3263d5a2
KH
409 fd = openp (Fcons (Vcharset_map_directory, Qnil), mapfile, suffixes,
410 NULL, 0);
411 if (fd < 0
412 || ! (fp = fdopen (fd, "r")))
413 {
414 add_to_log ("Failure in loading charset map: %S", mapfile, Qnil);
e9ce014c 415 return;
3263d5a2 416 }
4ed46869 417
e9ce014c
KH
418 head = entries = ((struct charset_map_entries *)
419 alloca (sizeof (struct charset_map_entries)));
420 n_entries = 0;
3263d5a2
KH
421 eof = 0;
422 while (1)
423 {
e9ce014c
KH
424 unsigned from, to;
425 int c;
426 int idx;
4ed46869 427
e9ce014c 428 from = read_hex (fp, &eof);
3263d5a2
KH
429 if (eof)
430 break;
e9ce014c
KH
431 if (getc (fp) == '-')
432 to = read_hex (fp, &eof);
433 else
434 to = from;
435 c = (int) read_hex (fp, &eof);
436
437 if (from < min_code || to > max_code || from > to || c > MAX_CHAR)
438 continue;
439
440 if (n_entries > 0 && (n_entries % 0x10000) == 0)
3263d5a2 441 {
e9ce014c
KH
442 entries->next = ((struct charset_map_entries *)
443 alloca (sizeof (struct charset_map_entries)));
444 entries = entries->next;
3263d5a2 445 }
e9ce014c
KH
446 idx = n_entries % 0x10000;
447 entries->entry[idx].from = from;
448 entries->entry[idx].to = to;
449 entries->entry[idx].c = c;
450 n_entries++;
3263d5a2
KH
451 }
452 fclose (fp);
453 close (fd);
4ed46869 454
e9ce014c
KH
455 load_charset_map (charset, head, n_entries, control_flag);
456}
457
458static void
459load_charset_map_from_vector (charset, vec, control_flag)
460 struct charset *charset;
461 Lisp_Object vec;
462 int control_flag;
463{
464 unsigned min_code = CHARSET_MIN_CODE (charset);
465 unsigned max_code = CHARSET_MAX_CODE (charset);
466 struct charset_map_entries *head, *entries;
467 int n_entries;
468 int len = ASIZE (vec);
469 int i;
470
471 if (len % 2 == 1)
3263d5a2 472 {
e9ce014c
KH
473 add_to_log ("Failure in loading charset map: %V", vec, Qnil);
474 return;
3263d5a2 475 }
4ed46869 476
e9ce014c
KH
477 head = entries = ((struct charset_map_entries *)
478 alloca (sizeof (struct charset_map_entries)));
479 n_entries = 0;
480 for (i = 0; i < len; i += 2)
481 {
482 Lisp_Object val, val2;
483 unsigned from, to;
484 int c;
485 int idx;
3263d5a2 486
e9ce014c
KH
487 val = AREF (vec, i);
488 if (CONSP (val))
489 {
490 val2 = XCDR (val);
491 val = XCAR (val);
492 CHECK_NATNUM (val);
493 CHECK_NATNUM (val2);
494 from = XFASTINT (val);
495 to = XFASTINT (val2);
496 }
497 else
498 {
499 CHECK_NATNUM (val);
500 from = to = XFASTINT (val);
501 }
502 val = AREF (vec, i + 1);
503 CHECK_NATNUM (val);
504 c = XFASTINT (val);
505
506 if (from < min_code || to > max_code || from > to || c > MAX_CHAR)
507 continue;
508
509 if ((n_entries % 0x10000) == 0)
510 {
511 entries->next = ((struct charset_map_entries *)
512 alloca (sizeof (struct charset_map_entries)));
513 entries = entries->next;
514 }
515 idx = n_entries % 0x10000;
516 entries->entry[idx].from = from;
517 entries->entry[idx].to = to;
518 entries->entry[idx].c = c;
519 n_entries++;
520 }
521
522 load_charset_map (charset, head, n_entries, control_flag);
ac4137cc
KH
523}
524
3263d5a2
KH
525static void
526load_charset (charset)
527 struct charset *charset;
ac4137cc 528{
3263d5a2
KH
529 if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP_DEFERRED)
530 {
531 Lisp_Object map;
ac4137cc 532
3263d5a2
KH
533 map = CHARSET_MAP (charset);
534 if (STRINGP (map))
e9ce014c
KH
535 load_charset_map_from_file (charset, map, 1);
536 else
537 load_charset_map_from_vector (charset, map, 1);
3263d5a2
KH
538 CHARSET_METHOD (charset) = CHARSET_METHOD_MAP;
539 }
4ed46869
KH
540}
541
3263d5a2
KH
542
543DEFUN ("charsetp", Fcharsetp, Scharsetp, 1, 1, 0,
544 doc: /* Return non-nil if and only if OBJECT is a charset.*/)
545 (object)
546 Lisp_Object object;
23d2a7f1 547{
3263d5a2 548 return (CHARSETP (object) ? Qt : Qnil);
23d2a7f1
KH
549}
550
35e623fb 551
3263d5a2 552void
374c5cfd
KH
553map_charset_chars (c_function, function, arg,
554 charset, from, to)
555 void (*c_function) P_ ((Lisp_Object, Lisp_Object));
556 Lisp_Object function, arg;
557 struct charset *charset;
558 unsigned from, to;
559
35e623fb 560{
3263d5a2 561 Lisp_Object range;
374c5cfd 562 int partial;
3263d5a2
KH
563
564 if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP_DEFERRED)
565 load_charset (charset);
566
374c5cfd
KH
567 partial = (from > CHARSET_MIN_CODE (charset)
568 || to < CHARSET_MAX_CODE (charset));
569
570 if (CHARSET_UNIFIED_P (charset)
571 && CHAR_TABLE_P (CHARSET_DEUNIFIER (charset)))
572 {
573 map_char_table_for_charset (c_function, function,
574 CHARSET_DEUNIFIER (charset), arg,
575 partial ? charset : NULL, from, to);
576 }
577
3263d5a2
KH
578 if (CHARSET_METHOD (charset) == CHARSET_METHOD_OFFSET)
579 {
374c5cfd
KH
580 int from_idx = CODE_POINT_TO_INDEX (charset, from);
581 int to_idx = CODE_POINT_TO_INDEX (charset, to);
582 int from_c = from_idx + CHARSET_CODE_OFFSET (charset);
583 int to_c = to_idx + CHARSET_CODE_OFFSET (charset);
584
585 range = Fcons (make_number (from_c), make_number (to_c));
3263d5a2 586 if (NILP (function))
374c5cfd 587 (*c_function) (range, arg);
3263d5a2
KH
588 else
589 call2 (function, range, arg);
590 }
591 else if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP)
592 {
593 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset)))
594 return;
374c5cfd 595 if (CHARSET_ASCII_COMPATIBLE_P (charset) && from <= 127)
bbf12bb3 596 {
374c5cfd
KH
597 range = Fcons (make_number (from), make_number (to));
598 if (to >= 128)
599 XSETCAR (range, make_number (127));
600
3263d5a2 601 if (NILP (function))
374c5cfd 602 (*c_function) (range, arg);
3263d5a2
KH
603 else
604 call2 (function, range, arg);
bbf12bb3 605 }
374c5cfd
KH
606 map_char_table_for_charset (c_function, function,
607 CHARSET_ENCODER (charset), arg,
608 partial ? charset : NULL, from, to);
3263d5a2 609 }
374c5cfd 610 else if (CHARSET_METHOD (charset) == CHARSET_METHOD_SUBSET)
3263d5a2 611 {
374c5cfd
KH
612 Lisp_Object subset_info;
613 int offset;
614
615 subset_info = CHARSET_SUBSET (charset);
616 charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
617 offset = XINT (AREF (subset_info, 3));
618 from -= offset;
619 if (from < XFASTINT (AREF (subset_info, 1)))
620 from = XFASTINT (AREF (subset_info, 1));
621 to -= offset;
622 if (to > XFASTINT (AREF (subset_info, 2)))
623 to = XFASTINT (AREF (subset_info, 2));
624 map_charset_chars (c_function, function, arg, charset, from, to);
625 }
626 else /* i.e. CHARSET_METHOD_SUPERSET */
627 {
628 Lisp_Object parents;
3263d5a2 629
374c5cfd
KH
630 for (parents = CHARSET_SUPERSET (charset); CONSP (parents);
631 parents = XCDR (parents))
bbf12bb3 632 {
374c5cfd
KH
633 int offset;
634 unsigned this_from, this_to;
635
636 charset = CHARSET_FROM_ID (XFASTINT (XCAR (XCAR (parents))));
637 offset = XINT (XCDR (XCAR (parents)));
638 this_from = from - offset;
639 this_to = to - offset;
640 if (this_from < CHARSET_MIN_CODE (charset))
641 this_from = CHARSET_MIN_CODE (charset);
642 if (this_to > CHARSET_MAX_CODE (charset))
643 this_to = CHARSET_MAX_CODE (charset);
644 map_charset_chars (c_function, function, arg, charset, from, to);
bbf12bb3 645 }
35e623fb 646 }
3263d5a2
KH
647}
648
374c5cfd
KH
649
650DEFUN ("map-charset-chars", Fmap_charset_chars, Smap_charset_chars, 2, 5, 0,
04c2f2c5 651 doc: /* Call FUNCTION for all characters in CHARSET.
374c5cfd 652FUNCTION is called with an argument RANGE and the optional 3rd
3263d5a2 653argument ARG.
6abd9323 654
374c5cfd
KH
655RANGE is a cons (FROM . TO), where FROM and TO indicate a range of
656characters contained in CHARSET.
657
658The optional 4th and 5th arguments FROM-CODE and TO-CODE specify the
659range of code points of targer characters. */)
660 (function, charset, arg, from_code, to_code)
661 Lisp_Object function, charset, arg, from_code, to_code;
3263d5a2 662{
374c5cfd
KH
663 struct charset *cs;
664
665 CHECK_CHARSET_GET_CHARSET (charset, cs);
666 if (NILP (from_code))
667 from_code = 0;
668 if (from_code < CHARSET_MIN_CODE (cs))
669 from_code = CHARSET_MIN_CODE (cs);
670 if (NILP (to_code))
671 to_code = 0xFFFFFFFF;
672 if (to_code > CHARSET_MAX_CODE (cs))
673 to_code = CHARSET_MAX_CODE (cs);
674
675 map_charset_chars (NULL, function, arg, cs, from_code, to_code);
3263d5a2 676 return Qnil;
35e623fb 677}
76d7b829
KH
678
679
3263d5a2
KH
680/* Define a charset according to the arguments. The Nth argument is
681 the Nth attribute of the charset (the last attribute `charset-id'
682 is not included). See the docstring of `define-charset' for the
683 detail. */
76d7b829 684
3263d5a2
KH
685DEFUN ("define-charset-internal", Fdefine_charset_internal,
686 Sdefine_charset_internal, charset_arg_max, MANY, 0,
04c2f2c5
DL
687 doc: /* For internal use only.
688usage: (define-charset-internal ...) */)
3263d5a2
KH
689 (nargs, args)
690 int nargs;
691 Lisp_Object *args;
76d7b829 692{
3263d5a2
KH
693 /* Charset attr vector. */
694 Lisp_Object attrs;
695 Lisp_Object val;
696 unsigned hash_code;
697 struct Lisp_Hash_Table *hash_table = XHASH_TABLE (Vcharset_hash_table);
69f8de5b 698 int i, j;
3263d5a2
KH
699 struct charset charset;
700 int id;
701 int dimension;
702 int new_definition_p;
703 int nchars;
704
705 if (nargs != charset_arg_max)
706 return Fsignal (Qwrong_number_of_arguments,
707 Fcons (intern ("define-charset-internal"),
708 make_number (nargs)));
709
710 attrs = Fmake_vector (make_number (charset_attr_max), Qnil);
711
712 CHECK_SYMBOL (args[charset_arg_name]);
713 ASET (attrs, charset_name, args[charset_arg_name]);
714
715 val = args[charset_arg_code_space];
716 for (i = 0, dimension = 0, nchars = 1; i < 4; i++)
76d7b829 717 {
3263d5a2
KH
718 int min_byte, max_byte;
719
720 min_byte = XINT (Faref (val, make_number (i * 2)));
721 max_byte = XINT (Faref (val, make_number (i * 2 + 1)));
722 if (min_byte < 0 || min_byte > max_byte || max_byte >= 256)
723 error ("Invalid :code-space value");
724 charset.code_space[i * 4] = min_byte;
725 charset.code_space[i * 4 + 1] = max_byte;
726 charset.code_space[i * 4 + 2] = max_byte - min_byte + 1;
727 nchars *= charset.code_space[i * 4 + 2];
728 charset.code_space[i * 4 + 3] = nchars;
729 if (max_byte > 0)
730 dimension = i + 1;
731 }
76d7b829 732
3263d5a2
KH
733 val = args[charset_arg_dimension];
734 if (NILP (val))
735 charset.dimension = dimension;
736 else
737 {
738 CHECK_NATNUM (val);
739 charset.dimension = XINT (val);
740 if (charset.dimension < 1 || charset.dimension > 4)
741 args_out_of_range_3 (val, make_number (1), make_number (4));
742 }
743
744 charset.code_linear_p
745 = (charset.dimension == 1
746 || (charset.code_space[2] == 256
747 && (charset.dimension == 2
748 || (charset.code_space[6] == 256
749 && (charset.dimension == 3
750 || charset.code_space[10] == 256)))));
751
69f8de5b
KH
752 if (! charset.code_linear_p)
753 {
754 charset.code_space_mask = (unsigned char *) xmalloc (256);
33df3183 755 bzero (charset.code_space_mask, 256);
69f8de5b
KH
756 for (i = 0; i < 4; i++)
757 for (j = charset.code_space[i * 4]; j <= charset.code_space[i * 4 + 1];
758 j++)
759 charset.code_space_mask[j] |= (1 << i);
760 }
761
3263d5a2
KH
762 charset.iso_chars_96 = charset.code_space[2] == 96;
763
764 charset.min_code = (charset.code_space[0]
765 | (charset.code_space[4] << 8)
766 | (charset.code_space[8] << 16)
767 | (charset.code_space[12] << 24));
768 charset.max_code = (charset.code_space[1]
769 | (charset.code_space[5] << 8)
770 | (charset.code_space[9] << 16)
771 | (charset.code_space[13] << 24));
820ee249
KH
772 charset.char_index_offset = 0;
773
774 val = args[charset_arg_min_code];
775 if (! NILP (val))
776 {
777 unsigned code;
778
779 if (INTEGERP (val))
780 code = XINT (val);
781 else
782 {
783 CHECK_CONS (val);
784 CHECK_NUMBER (XCAR (val));
785 CHECK_NUMBER (XCDR (val));
786 code = (XINT (XCAR (val)) << 16) | (XINT (XCDR (val)));
787 }
788 if (code < charset.min_code
789 || code > charset.max_code)
790 args_out_of_range_3 (make_number (charset.min_code),
791 make_number (charset.max_code), val);
792 charset.char_index_offset = CODE_POINT_TO_INDEX (&charset, code);
793 charset.min_code = code;
794 }
795
796 val = args[charset_arg_max_code];
797 if (! NILP (val))
798 {
799 unsigned code;
800
801 if (INTEGERP (val))
802 code = XINT (val);
803 else
804 {
805 CHECK_CONS (val);
806 CHECK_NUMBER (XCAR (val));
807 CHECK_NUMBER (XCDR (val));
808 code = (XINT (XCAR (val)) << 16) | (XINT (XCDR (val)));
809 }
810 if (code < charset.min_code
811 || code > charset.max_code)
812 args_out_of_range_3 (make_number (charset.min_code),
813 make_number (charset.max_code), val);
814 charset.max_code = code;
815 }
3263d5a2 816
e9ce014c
KH
817 charset.compact_codes_p = charset.max_code < 0x1000000;
818
3263d5a2
KH
819 val = args[charset_arg_invalid_code];
820 if (NILP (val))
821 {
822 if (charset.min_code > 0)
823 charset.invalid_code = 0;
bbf12bb3
KH
824 else
825 {
3263d5a2
KH
826 XSETINT (val, charset.max_code + 1);
827 if (XINT (val) == charset.max_code + 1)
828 charset.invalid_code = charset.max_code + 1;
829 else
830 error ("Attribute :invalid-code must be specified");
76d7b829 831 }
76d7b829 832 }
3263d5a2
KH
833 else
834 {
835 CHECK_NATNUM (val);
836 charset.invalid_code = XFASTINT (val);
837 }
76d7b829 838
3263d5a2
KH
839 val = args[charset_arg_iso_final];
840 if (NILP (val))
841 charset.iso_final = -1;
842 else
843 {
844 CHECK_NUMBER (val);
845 if (XINT (val) < '0' || XINT (val) > 127)
846 error ("Invalid iso-final-char: %d", XINT (val));
847 charset.iso_final = XINT (val);
848 }
849
850 val = args[charset_arg_iso_revision];
851 if (NILP (val))
852 charset.iso_revision = -1;
853 else
4ed46869 854 {
3263d5a2
KH
855 CHECK_NUMBER (val);
856 if (XINT (val) > 63)
857 args_out_of_range (make_number (63), val);
858 charset.iso_revision = XINT (val);
4ed46869 859 }
3263d5a2
KH
860
861 val = args[charset_arg_emacs_mule_id];
862 if (NILP (val))
863 charset.emacs_mule_id = -1;
4ed46869
KH
864 else
865 {
3263d5a2
KH
866 CHECK_NATNUM (val);
867 if ((XINT (val) > 0 && XINT (val) <= 128) || XINT (val) >= 256)
868 error ("Invalid emacs-mule-id: %d", XINT (val));
869 charset.emacs_mule_id = XINT (val);
c83ef371 870 }
6ef23ebb 871
3263d5a2 872 charset.ascii_compatible_p = ! NILP (args[charset_arg_ascii_compatible_p]);
4ed46869 873
3263d5a2
KH
874 charset.supplementary_p = ! NILP (args[charset_arg_supplementary_p]);
875
876 charset.unified_p = 0;
877
878 bzero (charset.fast_map, sizeof (charset.fast_map));
879
880 if (! NILP (args[charset_arg_code_offset]))
881 {
882 val = args[charset_arg_code_offset];
883 CHECK_NUMBER (val);
884
885 charset.method = CHARSET_METHOD_OFFSET;
886 charset.code_offset = XINT (val);
887
888 i = CODE_POINT_TO_INDEX (&charset, charset.min_code);
889 charset.min_char = i + charset.code_offset;
890 i = CODE_POINT_TO_INDEX (&charset, charset.max_code);
891 charset.max_char = i + charset.code_offset;
892 if (charset.max_char > MAX_CHAR)
893 error ("Unsupported max char: %d", charset.max_char);
894
895 for (i = charset.min_char; i < 0x10000 && i <= charset.max_char;
896 i += 128)
897 CHARSET_FAST_MAP_SET (i, charset.fast_map);
898 for (; i <= charset.max_char; i += 0x1000)
899 CHARSET_FAST_MAP_SET (i, charset.fast_map);
900 }
901 else if (! NILP (args[charset_arg_map]))
902 {
903 val = args[charset_arg_map];
904 ASET (attrs, charset_map, val);
905 if (STRINGP (val))
e9ce014c
KH
906 load_charset_map_from_file (&charset, val, 0);
907 else
908 load_charset_map_from_vector (&charset, val, 0);
3263d5a2
KH
909 charset.method = CHARSET_METHOD_MAP_DEFERRED;
910 }
374c5cfd 911 else if (! NILP (args[charset_arg_subset]))
3263d5a2 912 {
374c5cfd
KH
913 Lisp_Object parent;
914 Lisp_Object parent_min_code, parent_max_code, parent_code_offset;
915 struct charset *parent_charset;
916
917 val = args[charset_arg_subset];
918 parent = Fcar (val);
919 CHECK_CHARSET_GET_CHARSET (parent, parent_charset);
920 parent_min_code = Fnth (make_number (1), val);
921 CHECK_NATNUM (parent_min_code);
922 parent_max_code = Fnth (make_number (2), val);
923 CHECK_NATNUM (parent_max_code);
924 parent_code_offset = Fnth (make_number (3), val);
925 CHECK_NUMBER (parent_code_offset);
926 val = Fmake_vector (make_number (4), Qnil);
927 ASET (val, 0, make_number (parent_charset->id));
928 ASET (val, 1, parent_min_code);
929 ASET (val, 2, parent_max_code);
930 ASET (val, 3, parent_code_offset);
931 ASET (attrs, charset_subset, val);
932
933 charset.method = CHARSET_METHOD_SUBSET;
934 /* Here, we just copy the parent's fast_map. It's not accurate,
935 but at least it works for quickly detecting which character
936 DOESN'T belong to this charset. */
937 for (i = 0; i < 190; i++)
938 charset.fast_map[i] = parent_charset->fast_map[i];
939
940 /* We also copy these for parents. */
941 charset.min_char = parent_charset->min_char;
942 charset.max_char = parent_charset->max_char;
943 }
944 else if (! NILP (args[charset_arg_superset]))
945 {
946 val = args[charset_arg_superset];
947 charset.method = CHARSET_METHOD_SUPERSET;
3263d5a2 948 val = Fcopy_sequence (val);
374c5cfd 949 ASET (attrs, charset_superset, val);
3263d5a2
KH
950
951 charset.min_char = MAX_CHAR;
952 charset.max_char = 0;
953 for (; ! NILP (val); val = Fcdr (val))
4ed46869 954 {
3263d5a2
KH
955 Lisp_Object elt, car_part, cdr_part;
956 int this_id, offset;
957 struct charset *this_charset;
958
959 elt = Fcar (val);
960 if (CONSP (elt))
961 {
962 car_part = XCAR (elt);
963 cdr_part = XCDR (elt);
964 CHECK_CHARSET_GET_ID (car_part, this_id);
965 CHECK_NUMBER (cdr_part);
966 offset = XINT (cdr_part);
967 }
968 else
4ed46869 969 {
3263d5a2
KH
970 CHECK_CHARSET_GET_ID (elt, this_id);
971 offset = 0;
4ed46869 972 }
3263d5a2
KH
973 XSETCAR (val, Fcons (make_number (this_id), make_number (offset)));
974
975 this_charset = CHARSET_FROM_ID (this_id);
976 if (charset.min_char > this_charset->min_char)
977 charset.min_char = this_charset->min_char;
978 if (charset.max_char < this_charset->max_char)
979 charset.max_char = this_charset->max_char;
980 for (i = 0; i < 190; i++)
981 charset.fast_map[i] |= this_charset->fast_map[i];
4ed46869 982 }
4ed46869 983 }
3263d5a2
KH
984 else
985 error ("None of :code-offset, :map, :parents are specified");
4ed46869 986
3263d5a2
KH
987 val = args[charset_arg_unify_map];
988 if (! NILP (val) && !STRINGP (val))
989 CHECK_VECTOR (val);
990 ASET (attrs, charset_unify_map, val);
4ed46869 991
3263d5a2
KH
992 CHECK_LIST (args[charset_arg_plist]);
993 ASET (attrs, charset_plist, args[charset_arg_plist]);
4ed46869 994
3263d5a2
KH
995 charset.hash_index = hash_lookup (hash_table, args[charset_arg_name],
996 &hash_code);
997 if (charset.hash_index >= 0)
998 {
999 new_definition_p = 0;
4f65af01 1000 id = XFASTINT (CHARSET_SYMBOL_ID (args[charset_arg_name]));
3263d5a2
KH
1001 HASH_VALUE (hash_table, charset.hash_index) = attrs;
1002 }
1a45ff10 1003 else
3263d5a2
KH
1004 {
1005 charset.hash_index = hash_put (hash_table, args[charset_arg_name], attrs,
1006 hash_code);
1007 if (charset_table_used == charset_table_size)
1008 {
1009 charset_table_size += 256;
1010 charset_table
1011 = ((struct charset *)
1012 xrealloc (charset_table,
1013 sizeof (struct charset) * charset_table_size));
1014 }
1015 id = charset_table_used++;
3263d5a2
KH
1016 new_definition_p = 1;
1017 }
4ed46869 1018
4f65af01 1019 ASET (attrs, charset_id, make_number (id));
3263d5a2
KH
1020 charset.id = id;
1021 charset_table[id] = charset;
1022
1023 if (charset.iso_final >= 0)
4ed46869 1024 {
3263d5a2
KH
1025 ISO_CHARSET_TABLE (charset.dimension, charset.iso_chars_96,
1026 charset.iso_final) = id;
1027 if (new_definition_p)
1028 Viso_2022_charset_list = nconc2 (Viso_2022_charset_list,
1029 Fcons (make_number (id), Qnil));
7c7dceee
KH
1030 if (ISO_CHARSET_TABLE (1, 0, 'J') == id)
1031 charset_jisx0201_roman = id;
1032 else if (ISO_CHARSET_TABLE (2, 0, '@') == id)
1033 charset_jisx0208_1978 = id;
1034 else if (ISO_CHARSET_TABLE (2, 0, 'B') == id)
1035 charset_jisx0208 = id;
4ed46869 1036 }
3263d5a2
KH
1037
1038 if (charset.emacs_mule_id >= 0)
4ed46869 1039 {
3263d5a2 1040 emacs_mule_charset[charset.emacs_mule_id] = CHARSET_FROM_ID (id);
4f65af01
KH
1041 if (charset.emacs_mule_id < 0xA0)
1042 emacs_mule_bytes[charset.emacs_mule_id] = charset.dimension + 1;
3263d5a2
KH
1043 if (new_definition_p)
1044 Vemacs_mule_charset_list = nconc2 (Vemacs_mule_charset_list,
1045 Fcons (make_number (id), Qnil));
4ed46869
KH
1046 }
1047
3263d5a2
KH
1048 if (new_definition_p)
1049 {
1050 Vcharset_list = Fcons (args[charset_arg_name], Vcharset_list);
1051 Vcharset_ordered_list = nconc2 (Vcharset_ordered_list,
1052 Fcons (make_number (id), Qnil));
1053 }
4ed46869 1054
3263d5a2 1055 return Qnil;
4ed46869
KH
1056}
1057
3263d5a2
KH
1058DEFUN ("define-charset-alias", Fdefine_charset_alias,
1059 Sdefine_charset_alias, 2, 2, 0,
1060 doc: /* Define ALIAS as an alias for charset CHARSET. */)
1061 (alias, charset)
1062 Lisp_Object alias, charset;
4ed46869 1063{
3263d5a2
KH
1064 Lisp_Object attr;
1065
1066 CHECK_CHARSET_GET_ATTR (charset, attr);
1067 Fputhash (alias, attr, Vcharset_hash_table);
528623a0 1068 Vcharset_list = Fcons (alias, Vcharset_list);
3263d5a2
KH
1069 return Qnil;
1070}
4ed46869 1071
4ed46869 1072
3263d5a2 1073DEFUN ("primary-charset", Fprimary_charset, Sprimary_charset, 0, 0, 0,
56a46d1d 1074 doc: /* Return the primary charset (set by `set-primary-charset'). */)
3263d5a2
KH
1075 ()
1076{
1077 return CHARSET_NAME (CHARSET_FROM_ID (charset_primary));
1078}
4ed46869 1079
4ed46869 1080
3263d5a2
KH
1081DEFUN ("set-primary-charset", Fset_primary_charset, Sset_primary_charset,
1082 1, 1, 0,
56a46d1d
DL
1083 doc: /* Set the primary charset to CHARSET.
1084This determines how unibyte/multibyte conversion is done. See also
1085function `primary-charset'. */)
3263d5a2
KH
1086 (charset)
1087 Lisp_Object charset;
1088{
1089 int id;
1090
1091 CHECK_CHARSET_GET_ID (charset, id);
1092 charset_primary = id;
4ed46869
KH
1093 return Qnil;
1094}
1095
3263d5a2
KH
1096
1097DEFUN ("charset-plist", Fcharset_plist, Scharset_plist, 1, 1, 0,
56a46d1d 1098 doc: /* Return the property list of CHARSET. */)
3263d5a2
KH
1099 (charset)
1100 Lisp_Object charset;
1101{
1102 Lisp_Object attrs;
1103
1104 CHECK_CHARSET_GET_ATTR (charset, attrs);
1105 return CHARSET_ATTR_PLIST (attrs);
1106}
1107
1108
1109DEFUN ("set-charset-plist", Fset_charset_plist, Sset_charset_plist, 2, 2, 0,
1110 doc: /* Set CHARSET's property list to PLIST. */)
1111 (charset, plist)
1112 Lisp_Object charset, plist;
1113{
1114 Lisp_Object attrs;
1115
1116 CHECK_CHARSET_GET_ATTR (charset, attrs);
1117 CHARSET_ATTR_PLIST (attrs) = plist;
1118 return plist;
1119}
1120
1121
1122DEFUN ("unify-charset", Funify_charset, Sunify_charset, 1, 2, 0,
56a46d1d
DL
1123 doc: /* Unify characters of CHARSET with Unicode.
1124This means reading the relevant file and installing the table defined
1125by CHARSET's `:unify-map' property. */)
3263d5a2
KH
1126 (charset, unify_map)
1127 Lisp_Object charset, unify_map;
8a73a704 1128{
3263d5a2
KH
1129 int id;
1130 struct charset *cs;
1131
1132 CHECK_CHARSET_GET_ID (charset, id);
1133 cs = CHARSET_FROM_ID (id);
1134 if (CHARSET_METHOD (cs) == CHARSET_METHOD_MAP_DEFERRED)
1135 load_charset (cs);
1136 if (CHARSET_UNIFIED_P (cs)
1137 && CHAR_TABLE_P (CHARSET_DEUNIFIER (cs)))
1138 return Qnil;
1139 CHARSET_UNIFIED_P (cs) = 0;
1140 if (NILP (unify_map))
1141 unify_map = CHARSET_UNIFY_MAP (cs);
1142 if (STRINGP (unify_map))
e9ce014c 1143 load_charset_map_from_file (cs, unify_map, 2);
56a46d1d 1144 else if (VECTORP (unify_map))
e9ce014c 1145 load_charset_map_from_vector (cs, unify_map, 2);
56a46d1d
DL
1146 else if (NILP (unify_map))
1147 error ("No unify-map for charset");
1148 else
1149 error ("Bad unify-map arg");
3263d5a2
KH
1150 CHARSET_UNIFIED_P (cs) = 1;
1151 return Qnil;
8a73a704
KH
1152}
1153
3fac5a51
KH
1154DEFUN ("get-unused-iso-final-char", Fget_unused_iso_final_char,
1155 Sget_unused_iso_final_char, 2, 2, 0,
3263d5a2 1156 doc: /*
1721b6af 1157Return an unsed ISO final char for a charset of DIMENISION and CHARS.
fdb82f93
PJ
1158DIMENSION is the number of bytes to represent a character: 1 or 2.
1159CHARS is the number of characters in a dimension: 94 or 96.
1160
1161This final char is for private use, thus the range is `0' (48) .. `?' (63).
1721b6af 1162If there's no unused final char for the specified kind of charset,
fdb82f93
PJ
1163return nil. */)
1164 (dimension, chars)
3fac5a51
KH
1165 Lisp_Object dimension, chars;
1166{
1167 int final_char;
1168
b7826503
PJ
1169 CHECK_NUMBER (dimension);
1170 CHECK_NUMBER (chars);
3263d5a2
KH
1171 if (XINT (dimension) != 1 && XINT (dimension) != 2 && XINT (dimension) != 3)
1172 args_out_of_range_3 (dimension, make_number (1), make_number (3));
3fac5a51 1173 if (XINT (chars) != 94 && XINT (chars) != 96)
3263d5a2 1174 args_out_of_range_3 (chars, make_number (94), make_number (96));
3fac5a51 1175 for (final_char = '0'; final_char <= '?'; final_char++)
3263d5a2
KH
1176 if (ISO_CHARSET_TABLE (XINT (dimension), XINT (chars), final_char) < 0)
1177 break;
3fac5a51
KH
1178 return (final_char <= '?' ? make_number (final_char) : Qnil);
1179}
1180
3263d5a2
KH
1181static void
1182check_iso_charset_parameter (dimension, chars, final_char)
1183 Lisp_Object dimension, chars, final_char;
4ed46869 1184{
3263d5a2
KH
1185 CHECK_NATNUM (dimension);
1186 CHECK_NATNUM (chars);
1187 CHECK_NATNUM (final_char);
4ed46869 1188
3263d5a2
KH
1189 if (XINT (dimension) > 3)
1190 error ("Invalid DIMENSION %d, it should be 1, 2, or 3", XINT (dimension));
4ed46869
KH
1191 if (XINT (chars) != 94 && XINT (chars) != 96)
1192 error ("Invalid CHARS %d, it should be 94 or 96", XINT (chars));
3263d5a2 1193 if (XINT (final_char) < '0' || XINT (final_char) > '~')
4ed46869 1194 error ("Invalid FINAL-CHAR %c, it should be `0'..`~'", XINT (chars));
3263d5a2
KH
1195}
1196
1197
1198DEFUN ("declare-equiv-charset", Fdeclare_equiv_charset, Sdeclare_equiv_charset,
1199 4, 4, 0,
1200 doc: /*
1201Declare a charset of DIMENSION, CHARS, FINAL-CHAR is the same as CHARSET.
56a46d1d 1202CHARSET should be defined by `define-charset' in advance. */)
3263d5a2
KH
1203 (dimension, chars, final_char, charset)
1204 Lisp_Object dimension, chars, final_char, charset;
1205{
1206 int id;
4ed46869 1207
3263d5a2
KH
1208 CHECK_CHARSET_GET_ID (charset, id);
1209 check_iso_charset_parameter (dimension, chars, final_char);
1210
1211 ISO_CHARSET_TABLE (dimension, chars, final_char) = id;
4ed46869
KH
1212 return Qnil;
1213}
1214
3263d5a2 1215
2e344af3
KH
1216/* Return information about charsets in the text at PTR of NBYTES
1217 bytes, which are NCHARS characters. The value is:
f6302ac9 1218
cfe34140 1219 0: Each character is represented by one byte. This is always
3263d5a2
KH
1220 true for a unibyte string. For a multibyte string, true if
1221 it contains only ASCII characters.
1222
1223 1: No charsets other than ascii, eight-bit-control, and
1224 latin-1 are found.
1d67c29b 1225
3263d5a2
KH
1226 2: Otherwise.
1227*/
4ed46869
KH
1228
1229int
3263d5a2
KH
1230string_xstring_p (string)
1231 Lisp_Object string;
4ed46869 1232{
3263d5a2
KH
1233 unsigned char *p = XSTRING (string)->data;
1234 unsigned char *endp = p + STRING_BYTES (XSTRING (string));
1235 struct charset *charset;
1236
1237 if (XSTRING (string)->size == STRING_BYTES (XSTRING (string)))
1238 return 0;
1239
1240 charset = CHARSET_FROM_ID (charset_iso_8859_1);
1241 while (p < endp)
0282eb69 1242 {
3263d5a2 1243 int c = STRING_CHAR_ADVANCE (p);
2e344af3 1244
3263d5a2
KH
1245 if (ENCODE_CHAR (charset, c) < 0)
1246 return 2;
0282eb69 1247 }
3263d5a2
KH
1248 return 1;
1249}
05505664 1250
05505664 1251
3263d5a2 1252/* Find charsets in the string at PTR of NCHARS and NBYTES.
4ed46869 1253
3263d5a2
KH
1254 CHARSETS is a vector. Each element is a cons of CHARSET and
1255 FOUND-FLAG. CHARSET is a charset id, and FOUND-FLAG is nil or t.
1256 FOUND-FLAG t (or nil) means that the corresponding charset is
1257 already found (or not yet found).
2e344af3 1258
3263d5a2 1259 It may lookup a translation table TABLE if supplied. */
2e344af3 1260
3263d5a2
KH
1261static void
1262find_charsets_in_text (ptr, nchars, nbytes, charsets, table)
1263 unsigned char *ptr;
1264 int nchars, nbytes;
1265 Lisp_Object charsets, table;
1266{
1267 unsigned char *pend = ptr + nbytes;
1268 int ncharsets = ASIZE (charsets);
1269
1270 if (nchars == nbytes)
1271 return;
1272
1273 while (ptr < pend)
1274 {
1275 int c = STRING_CHAR_ADVANCE (ptr);
1276 int i;
1277 int all_found = 1;
1278 Lisp_Object elt;
1279
1280 if (!NILP (table))
1281 c = translate_char (table, c);
1282 for (i = 0; i < ncharsets; i++)
1283 {
1284 elt = AREF (charsets, i);
1285 if (NILP (XCDR (elt)))
1286 {
1287 struct charset *charset = CHARSET_FROM_ID (XINT (XCAR (elt)));
1288
1289 if (ENCODE_CHAR (charset, c) != CHARSET_INVALID_CODE (charset))
1290 XCDR (elt) = Qt;
1291 else
1292 all_found = 0;
1293 }
4ed46869 1294 }
3263d5a2
KH
1295 if (all_found)
1296 break;
4ed46869 1297 }
4ed46869
KH
1298}
1299
3263d5a2 1300
4ed46869 1301DEFUN ("find-charset-region", Ffind_charset_region, Sfind_charset_region,
23d2a7f1 1302 2, 3, 0,
fdb82f93
PJ
1303 doc: /* Return a list of charsets in the region between BEG and END.
1304BEG and END are buffer positions.
1305Optional arg TABLE if non-nil is a translation table to look up.
1306
fdb82f93
PJ
1307If the current buffer is unibyte, the returned list may contain
1308only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
1309 (beg, end, table)
23d2a7f1 1310 Lisp_Object beg, end, table;
4ed46869 1311{
3263d5a2 1312 Lisp_Object charsets;
6ae1f27e 1313 int from, from_byte, to, stop, stop_byte, i;
4ed46869
KH
1314 Lisp_Object val;
1315
1316 validate_region (&beg, &end);
1317 from = XFASTINT (beg);
1318 stop = to = XFASTINT (end);
6ae1f27e 1319
4ed46869 1320 if (from < GPT && GPT < to)
6ae1f27e
RS
1321 {
1322 stop = GPT;
1323 stop_byte = GPT_BYTE;
1324 }
1325 else
1326 stop_byte = CHAR_TO_BYTE (stop);
1327
1328 from_byte = CHAR_TO_BYTE (from);
1329
3263d5a2
KH
1330 charsets = Fmake_vector (make_number (charset_table_used), Qnil);
1331 for (i = 0; i < charset_table_used; i++)
1332 ASET (charsets, i, Fcons (make_number (i), Qnil));
1333
4ed46869
KH
1334 while (1)
1335 {
3263d5a2
KH
1336 find_charsets_in_text (BYTE_POS_ADDR (from_byte), stop - from,
1337 stop_byte - from_byte, charsets, table);
4ed46869 1338 if (stop < to)
6ae1f27e
RS
1339 {
1340 from = stop, from_byte = stop_byte;
1341 stop = to, stop_byte = CHAR_TO_BYTE (stop);
1342 }
4ed46869
KH
1343 else
1344 break;
1345 }
6ae1f27e 1346
4ed46869 1347 val = Qnil;
3263d5a2
KH
1348 for (i = charset_table_used - 1; i >= 0; i--)
1349 if (!NILP (XCDR (AREF (charsets, i))))
1350 val = Fcons (CHARSET_NAME (charset_table + i), val);
4ed46869
KH
1351 return val;
1352}
1353
1354DEFUN ("find-charset-string", Ffind_charset_string, Sfind_charset_string,
23d2a7f1 1355 1, 2, 0,
fdb82f93
PJ
1356 doc: /* Return a list of charsets in STR.
1357Optional arg TABLE if non-nil is a translation table to look up.
1358
fdb82f93 1359If STR is unibyte, the returned list may contain
3263d5a2 1360only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
fdb82f93 1361 (str, table)
23d2a7f1 1362 Lisp_Object str, table;
4ed46869 1363{
3263d5a2 1364 Lisp_Object charsets;
4ed46869
KH
1365 int i;
1366 Lisp_Object val;
1367
b7826503 1368 CHECK_STRING (str);
87b089ad 1369
3263d5a2 1370 charsets = Fmake_vector (make_number (charset_table_used), Qnil);
dcd50550
KH
1371 for (i = 0; i < charset_table_used; i++)
1372 ASET (charsets, i, Fcons (make_number (i), Qnil));
3263d5a2
KH
1373 find_charsets_in_text (XSTRING (str)->data, XSTRING (str)->size,
1374 STRING_BYTES (XSTRING (str)), charsets, table);
2e344af3 1375
4ed46869 1376 val = Qnil;
3263d5a2
KH
1377 for (i = charset_table_used - 1; i >= 0; i--)
1378 if (!NILP (XCDR (AREF (charsets, i))))
1379 val = Fcons (CHARSET_NAME (charset_table + i), val);
4ed46869
KH
1380 return val;
1381}
2e344af3 1382
4ed46869 1383\f
3263d5a2
KH
1384
1385/* Return a character correponding to the code-point CODE of
1386 CHARSET. */
1387
1388int
1389decode_char (charset, code)
1390 struct charset *charset;
1391 unsigned code;
4ed46869 1392{
3263d5a2
KH
1393 int c, char_index;
1394 enum charset_method method = CHARSET_METHOD (charset);
ac4137cc 1395
3263d5a2
KH
1396 if (code < CHARSET_MIN_CODE (charset) || code > CHARSET_MAX_CODE (charset))
1397 return -1;
4ed46869 1398
3263d5a2 1399 if (method == CHARSET_METHOD_MAP_DEFERRED)
ac4137cc 1400 {
3263d5a2
KH
1401 load_charset (charset);
1402 method = CHARSET_METHOD (charset);
ac4137cc 1403 }
4ed46869 1404
374c5cfd
KH
1405 if (method == CHARSET_METHOD_SUBSET)
1406 {
1407 Lisp_Object subset_info;
1408
1409 subset_info = CHARSET_SUBSET (charset);
1410 charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
1411 code -= XINT (AREF (subset_info, 3));
1412 if (code < XFASTINT (AREF (subset_info, 1))
1413 || code > XFASTINT (AREF (subset_info, 2)))
1414 c = -1;
1415 else
1416 c = DECODE_CHAR (charset, code);
1417 }
1418 else if (method == CHARSET_METHOD_SUPERSET)
2e344af3 1419 {
3263d5a2 1420 Lisp_Object parents;
4ed46869 1421
374c5cfd 1422 parents = CHARSET_SUPERSET (charset);
3263d5a2
KH
1423 c = -1;
1424 for (; CONSP (parents); parents = XCDR (parents))
1425 {
1426 int id = XINT (XCAR (XCAR (parents)));
1427 int code_offset = XINT (XCDR (XCAR (parents)));
374c5cfd 1428 unsigned this_code = code - code_offset;
9d3d8cba 1429
3263d5a2
KH
1430 charset = CHARSET_FROM_ID (id);
1431 if ((c = DECODE_CHAR (charset, this_code)) >= 0)
1432 break;
1433 }
1434 }
1435 else
ac4137cc 1436 {
3263d5a2 1437 char_index = CODE_POINT_TO_INDEX (charset, code);
69f8de5b
KH
1438 if (char_index < 0)
1439 return -1;
3263d5a2
KH
1440
1441 if (method == CHARSET_METHOD_MAP)
ac4137cc 1442 {
3263d5a2
KH
1443 Lisp_Object decoder;
1444
1445 decoder = CHARSET_DECODER (charset);
1446 if (! VECTORP (decoder))
1447 return -1;
1448 c = XINT (AREF (decoder, char_index));
ac4137cc
KH
1449 }
1450 else
1451 {
3263d5a2 1452 c = char_index + CHARSET_CODE_OFFSET (charset);
ac4137cc
KH
1453 }
1454 }
9d3d8cba 1455
3263d5a2
KH
1456 if (CHARSET_UNIFIED_P (charset)
1457 && c >= 0)
c449997d
KH
1458 {
1459 MAYBE_UNIFY_CHAR (c);
1460 }
d2665018 1461
3263d5a2 1462 return c;
d2665018
KH
1463}
1464
374c5cfd
KH
1465/* Variable used temporarily by the macro ENCODE_CHAR. */
1466Lisp_Object charset_work;
1bcc1567 1467
3263d5a2
KH
1468/* Return a code-point of CHAR in CHARSET. If CHAR doesn't belong to
1469 CHARSET, return CHARSET_INVALID_CODE (CHARSET). */
1bcc1567 1470
3263d5a2
KH
1471unsigned
1472encode_char (charset, c)
1473 struct charset *charset;
9b6a601f
KH
1474 int c;
1475{
3263d5a2
KH
1476 unsigned code;
1477 enum charset_method method = CHARSET_METHOD (charset);
8ac5a9cc 1478
3263d5a2 1479 if (CHARSET_UNIFIED_P (charset))
4ed46869 1480 {
374c5cfd 1481 Lisp_Object deunifier, deunified;
4ed46869 1482
3263d5a2
KH
1483 deunifier = CHARSET_DEUNIFIER (charset);
1484 if (! CHAR_TABLE_P (deunifier))
1485 {
1486 Funify_charset (CHARSET_NAME (charset), Qnil);
1487 deunifier = CHARSET_DEUNIFIER (charset);
1488 }
374c5cfd
KH
1489 deunified = CHAR_TABLE_REF (deunifier, c);
1490 if (! NILP (deunified))
1491 c = XINT (deunified);
4ed46869 1492 }
beeedaad 1493
3263d5a2
KH
1494 if (! CHARSET_FAST_MAP_REF ((c), charset->fast_map)
1495 || c < CHARSET_MIN_CHAR (charset) || c > CHARSET_MAX_CHAR (charset))
1496 return CHARSET_INVALID_CODE (charset);
beeedaad 1497
374c5cfd
KH
1498 if (method == CHARSET_METHOD_SUBSET)
1499 {
1500 Lisp_Object subset_info;
1501 struct charset *this_charset;
1502
1503 subset_info = CHARSET_SUBSET (charset);
1504 this_charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
1505 code = ENCODE_CHAR (this_charset, c);
1506 if (code == CHARSET_INVALID_CODE (this_charset)
1507 || code < XFASTINT (AREF (subset_info, 1))
1508 || code > XFASTINT (AREF (subset_info, 2)))
1509 return CHARSET_INVALID_CODE (charset);
1510 code += XINT (AREF (subset_info, 3));
1511 return code;
1512 }
1513
1514 if (method == CHARSET_METHOD_SUPERSET)
859f2b3c 1515 {
3263d5a2 1516 Lisp_Object parents;
859f2b3c 1517
374c5cfd 1518 parents = CHARSET_SUPERSET (charset);
3263d5a2 1519 for (; CONSP (parents); parents = XCDR (parents))
beeedaad 1520 {
3263d5a2
KH
1521 int id = XINT (XCAR (XCAR (parents)));
1522 int code_offset = XINT (XCDR (XCAR (parents)));
1523 struct charset *this_charset = CHARSET_FROM_ID (id);
beeedaad 1524
3263d5a2
KH
1525 code = ENCODE_CHAR (this_charset, c);
1526 if (code != CHARSET_INVALID_CODE (this_charset)
1527 && (code_offset < 0 || code >= code_offset))
1528 {
374c5cfd 1529 code += code_offset;
69f8de5b
KH
1530 if (code >= charset->min_code && code <= charset->max_code
1531 && CODE_POINT_TO_INDEX (charset, code) >= 0)
3263d5a2
KH
1532 return code;
1533 }
beeedaad 1534 }
3263d5a2
KH
1535 return CHARSET_INVALID_CODE (charset);
1536 }
99529c2c 1537
3263d5a2 1538 if (method == CHARSET_METHOD_MAP_DEFERRED)
beeedaad 1539 {
3263d5a2
KH
1540 load_charset (charset);
1541 method = CHARSET_METHOD (charset);
859f2b3c 1542 }
beeedaad 1543
3263d5a2 1544 if (method == CHARSET_METHOD_MAP)
3f62427c 1545 {
3263d5a2 1546 Lisp_Object encoder;
beeedaad 1547 Lisp_Object val;
beeedaad 1548
3263d5a2
KH
1549 encoder = CHARSET_ENCODER (charset);
1550 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset)))
1551 return CHARSET_INVALID_CODE (charset);
1552 val = CHAR_TABLE_REF (encoder, c);
374c5cfd
KH
1553 if (NILP (val))
1554 return CHARSET_INVALID_CODE (charset);
e9ce014c
KH
1555 code = XINT (val);
1556 if (! CHARSET_COMPACT_CODES_P (charset))
1557 code = INDEX_TO_CODE_POINT (charset, code);
3263d5a2 1558 }
820ee249 1559 else /* method == CHARSET_METHOD_OFFSET */
beeedaad 1560 {
3263d5a2
KH
1561 code = c - CHARSET_CODE_OFFSET (charset);
1562 code = INDEX_TO_CODE_POINT (charset, code);
3f62427c 1563 }
beeedaad 1564
3263d5a2 1565 return code;
3f62427c
KH
1566}
1567
4ed46869 1568
3263d5a2
KH
1569DEFUN ("decode-char", Fdecode_char, Sdecode_char, 2, 3, 0,
1570 doc: /* Decode the pair of CHARSET and CODE-POINT into a character.
1571Return nil if CODE-POINT is not valid in CHARSET.
4ed46869 1572
3263d5a2
KH
1573CODE-POINT may be a cons (HIGHER-16-BIT-VALUE . LOWER-16-BIT-VALUE).
1574
1575Optional argument RESTRICTION specifies a way to map the pair of CCS
1576and CODE-POINT to a chracter. Currently not supported and just ignored. */)
1577 (charset, code_point, restriction)
1578 Lisp_Object charset, code_point, restriction;
4ed46869 1579{
3263d5a2
KH
1580 int c, id;
1581 unsigned code;
1582 struct charset *charsetp;
4ed46869 1583
3263d5a2
KH
1584 CHECK_CHARSET_GET_ID (charset, id);
1585 if (CONSP (code_point))
1586 {
1587 CHECK_NATNUM (XCAR (code_point));
1588 CHECK_NATNUM (XCDR (code_point));
69f8de5b 1589 code = (XINT (XCAR (code_point)) << 16) | (XINT (XCDR (code_point)));
3263d5a2
KH
1590 }
1591 else
1592 {
1593 CHECK_NATNUM (code_point);
1594 code = XINT (code_point);
1595 }
1596 charsetp = CHARSET_FROM_ID (id);
1597 c = DECODE_CHAR (charsetp, code);
1598 return (c >= 0 ? make_number (c) : Qnil);
4ed46869
KH
1599}
1600
046b1f03 1601
3263d5a2
KH
1602DEFUN ("encode-char", Fencode_char, Sencode_char, 2, 3, 0,
1603 doc: /* Encode the character CH into a code-point of CHARSET.
1604Return nil if CHARSET doesn't include CH.
17e7ef1b 1605
3263d5a2
KH
1606Optional argument RESTRICTION specifies a way to map CHAR to a
1607code-point in CCS. Currently not supported and just ignored. */)
1608 (ch, charset, restriction)
1609 Lisp_Object ch, charset, restriction;
1610{
1611 int c, id;
1612 unsigned code;
1613 struct charset *charsetp;
046b1f03 1614
3263d5a2
KH
1615 CHECK_CHARSET_GET_ID (charset, id);
1616 CHECK_NATNUM (ch);
1617 c = XINT (ch);
1618 charsetp = CHARSET_FROM_ID (id);
1619 code = ENCODE_CHAR (charsetp, ch);
1620 if (code == CHARSET_INVALID_CODE (charsetp))
1621 return Qnil;
1622 if (code > 0x7FFFFFF)
1623 return Fcons (make_number (code >> 16), make_number (code & 0xFFFF));
1624 return make_number (code);
6ae1f27e 1625}
9036eb45 1626
87b089ad 1627
b121a744
KH
1628DEFUN ("make-char", Fmake_char, Smake_char, 1, 5, 0,
1629 doc:
1630 /* Return a character of CHARSET whose position codes are CODEn.
1631
1632CODE1 through CODE4 are optional, but if you don't supply sufficient
1633position codes, it is assumed that the minimum code in each dimension
04c2f2c5 1634is specified. */)
b121a744
KH
1635 (charset, code1, code2, code3, code4)
1636 Lisp_Object charset, code1, code2, code3, code4;
87b089ad 1637{
3263d5a2
KH
1638 int id, dimension;
1639 struct charset *charsetp;
b121a744
KH
1640 unsigned code;
1641 int c;
87b089ad 1642
3263d5a2
KH
1643 CHECK_CHARSET_GET_ID (charset, id);
1644 charsetp = CHARSET_FROM_ID (id);
87b089ad 1645
b121a744
KH
1646 dimension = CHARSET_DIMENSION (charsetp);
1647 if (NILP (code1))
d47073ca
KH
1648 code = (CHARSET_ASCII_COMPATIBLE_P (charsetp)
1649 ? 0 : CHARSET_MIN_CODE (charsetp));
3263d5a2 1650 else
87b089ad 1651 {
b121a744
KH
1652 CHECK_NATNUM (code1);
1653 if (XFASTINT (code1) >= 0x100)
1654 args_out_of_range (make_number (0xFF), code1);
1655 code = XFASTINT (code1);
2e344af3 1656
b0a1e45e 1657 if (dimension > 1)
b121a744
KH
1658 {
1659 code <<= 8;
b0a1e45e
KH
1660 if (NILP (code2))
1661 code |= charsetp->code_space[(dimension - 2) * 4];
b121a744
KH
1662 else
1663 {
b0a1e45e
KH
1664 CHECK_NATNUM (code2);
1665 if (XFASTINT (code2) >= 0x100)
1666 args_out_of_range (make_number (0xFF), code2);
1667 code |= XFASTINT (code2);
b121a744
KH
1668 }
1669
b0a1e45e 1670 if (dimension > 2)
b121a744
KH
1671 {
1672 code <<= 8;
b0a1e45e
KH
1673 if (NILP (code3))
1674 code |= charsetp->code_space[(dimension - 3) * 4];
b121a744
KH
1675 else
1676 {
b0a1e45e
KH
1677 CHECK_NATNUM (code3);
1678 if (XFASTINT (code3) >= 0x100)
1679 args_out_of_range (make_number (0xFF), code3);
1680 code |= XFASTINT (code3);
1681 }
1682
1683 if (dimension > 3)
1684 {
1685 code <<= 8;
1686 if (NILP (code4))
1687 code |= charsetp->code_space[0];
1688 else
1689 {
1690 CHECK_NATNUM (code4);
1691 if (XFASTINT (code4) >= 0x100)
1692 args_out_of_range (make_number (0xFF), code4);
1693 code |= XFASTINT (code4);
1694 }
b121a744
KH
1695 }
1696 }
1697 }
1698 }
3263d5a2 1699
b121a744
KH
1700 if (CHARSET_ISO_FINAL (charsetp) >= 0)
1701 code &= 0x7F7F7F7F;
1702 c = DECODE_CHAR (charsetp, code);
1703 if (c < 0)
1704 error ("Invalid code(s)");
3263d5a2 1705 return make_number (c);
2e344af3
KH
1706}
1707
3263d5a2
KH
1708
1709/* Return the first charset in CHARSET_LIST that contains C.
1710 CHARSET_LIST is a list of charset IDs. If it is nil, use
1711 Vcharset_ordered_list. */
1712
1713struct charset *
1714char_charset (c, charset_list, code_return)
1715 int c;
1716 Lisp_Object charset_list;
1717 unsigned *code_return;
2e344af3 1718{
3263d5a2
KH
1719 if (NILP (charset_list))
1720 charset_list = Vcharset_ordered_list;
2e344af3 1721
3263d5a2 1722 while (CONSP (charset_list))
2e344af3 1723 {
3263d5a2
KH
1724 struct charset *charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
1725 unsigned code = ENCODE_CHAR (charset, c);
1726
1727 if (code != CHARSET_INVALID_CODE (charset))
1728 {
1729 if (code_return)
1730 *code_return = code;
1731 return charset;
1732 }
1733 charset_list = XCDR (charset_list);
2e344af3 1734 }
3263d5a2 1735 return NULL;
2e344af3
KH
1736}
1737
2e344af3 1738
56a46d1d 1739/* Fixme: `unknown' can't happen now? */
3263d5a2 1740DEFUN ("split-char", Fsplit_char, Ssplit_char, 1, 1, 0,
04c2f2c5 1741 doc: /*Return list of charset and one to three position-codes of CHAR.
3263d5a2
KH
1742If CHAR is invalid as a character code,
1743return a list of symbol `unknown' and CHAR. */)
1744 (ch)
1745 Lisp_Object ch;
2e344af3 1746{
3263d5a2
KH
1747 struct charset *charset;
1748 int c, dimension;
1749 unsigned code;
1750 Lisp_Object val;
1751
1752 CHECK_CHARACTER (ch);
1753 c = XFASTINT (ch);
1754 charset = CHAR_CHARSET (c);
1755 if (! charset)
1756 return Fcons (intern ("unknown"), Fcons (ch, Qnil));
1757
1758 code = ENCODE_CHAR (charset, c);
1759 if (code == CHARSET_INVALID_CODE (charset))
1760 abort ();
1761 dimension = CHARSET_DIMENSION (charset);
1762 val = (dimension == 1 ? Fcons (make_number (code), Qnil)
1763 : dimension == 2 ? Fcons (make_number (code >> 8),
1764 Fcons (make_number (code & 0xFF), Qnil))
1765 : Fcons (make_number (code >> 16),
1766 Fcons (make_number ((code >> 8) & 0xFF),
1767 Fcons (make_number (code & 0xFF), Qnil))));
1768 return Fcons (CHARSET_NAME (charset), val);
2e344af3 1769}
87b089ad 1770
740f080d 1771
3263d5a2
KH
1772DEFUN ("char-charset", Fchar_charset, Schar_charset, 1, 1, 0,
1773 doc: /* Return the charset of highest priority that contains CHAR. */)
1774 (ch)
1775 Lisp_Object ch;
740f080d 1776{
3263d5a2 1777 struct charset *charset;
740f080d 1778
3263d5a2
KH
1779 CHECK_CHARACTER (ch);
1780 charset = CHAR_CHARSET (XINT (ch));
1781 return (CHARSET_NAME (charset));
740f080d
KH
1782}
1783
2e344af3 1784
3263d5a2
KH
1785DEFUN ("charset-after", Fcharset_after, Scharset_after, 0, 1, 0,
1786 doc: /*
1787Return charset of a character in the current buffer at position POS.
1788If POS is nil, it defauls to the current point.
1789If POS is out of range, the value is nil. */)
1790 (pos)
1791 Lisp_Object pos;
2e344af3 1792{
3263d5a2
KH
1793 Lisp_Object ch;
1794 struct charset *charset;
1795
1796 ch = Fchar_after (pos);
1797 if (! INTEGERP (ch))
1798 return ch;
1799 charset = CHAR_CHARSET (XINT (ch));
1800 return (CHARSET_NAME (charset));
87b089ad
RS
1801}
1802
2e344af3 1803
3263d5a2
KH
1804DEFUN ("iso-charset", Fiso_charset, Siso_charset, 3, 3, 0,
1805 doc: /*
1806Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR.
1807
1808ISO 2022's designation sequence (escape sequence) distinguishes charsets
1809by their DIMENSION, CHARS, and FINAL-CHAR,
1810where as Emacs distinguishes them by charset symbol.
1811See the documentation of the function `charset-info' for the meanings of
1812DIMENSION, CHARS, and FINAL-CHAR. */)
1813 (dimension, chars, final_char)
1814 Lisp_Object dimension, chars, final_char;
2e344af3 1815{
3263d5a2 1816 int id;
2e344af3 1817
3263d5a2
KH
1818 check_iso_charset_parameter (dimension, chars, final_char);
1819 id = ISO_CHARSET_TABLE (XFASTINT (dimension), XFASTINT (chars),
1820 XFASTINT (final_char));
1821 return (id >= 0 ? CHARSET_NAME (CHARSET_FROM_ID (id)) : Qnil);
2e344af3
KH
1822}
1823
3263d5a2
KH
1824
1825DEFUN ("clear-charset-maps", Fclear_charset_maps, Sclear_charset_maps,
1826 0, 0, 0,
1827 doc: /*
1828Clear encoder and decoder of charsets that are loaded from mapfiles. */)
1829 ()
4ed46869 1830{
53316e55 1831 int i;
3263d5a2
KH
1832 struct charset *charset;
1833 Lisp_Object attrs;
4ed46869 1834
3263d5a2 1835 for (i = 0; i < charset_table_used; i++)
4ed46869 1836 {
3263d5a2
KH
1837 charset = CHARSET_FROM_ID (i);
1838 attrs = CHARSET_ATTRIBUTES (charset);
1839
1840 if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP)
1841 {
1842 CHARSET_ATTR_DECODER (attrs) = Qnil;
1843 CHARSET_ATTR_ENCODER (attrs) = Qnil;
1844 CHARSET_METHOD (charset) = CHARSET_METHOD_MAP_DEFERRED;
1845 }
1846
1847 if (CHARSET_UNIFIED_P (charset))
1848 CHARSET_ATTR_DEUNIFIER (attrs) = Qnil;
5729c92f
KH
1849 }
1850
3263d5a2 1851 if (CHAR_TABLE_P (Vchar_unified_charset_table))
5729c92f 1852 {
3263d5a2
KH
1853 Foptimize_char_table (Vchar_unified_charset_table);
1854 Vchar_unify_table = Vchar_unified_charset_table;
1855 Vchar_unified_charset_table = Qnil;
4ed46869
KH
1856 }
1857
3263d5a2 1858 return Qnil;
4ed46869
KH
1859}
1860
8ddf5e57
DL
1861DEFUN ("charset-priority-list", Fcharset_priority_list,
1862 Scharset_priority_list, 0, 1, 0,
1863 doc: /* Return the list of charsets ordered by priority.
1864HIGHESTP non-nil means just return the highest priority one. */)
1865 (highestp)
1866 Lisp_Object highestp;
1867{
1868 Lisp_Object val = Qnil, list = Vcharset_ordered_list;
1869
1870 if (!NILP (highestp))
1871 return CHARSET_NAME (CHARSET_FROM_ID (Fcar (list)));
1872
1873 while (!NILP (list))
1874 {
1875 val = Fcons (CHARSET_NAME (CHARSET_FROM_ID (XCAR (list))), val);
1876 list = XCDR (list);
1877 }
1878 return Fnreverse (val);
1879}
1880
1881DEFUN ("set-charset-priority", Fset_charset_priority, Sset_charset_priority,
1882 1, MANY, 0,
1883 doc: /* Assign higher priority to the charsets given as arguments.
1884usage: (set-charset-priority &rest charsets) */)
1885 (nargs, args)
1886 int nargs;
1887 Lisp_Object *args;
1888{
1889 Lisp_Object new_head = Qnil, old_list, id, arglist[2];
1890 int i;
1891
1892 old_list = Fcopy_sequence (Vcharset_ordered_list);
1893 for (i = 0; i < nargs; i++)
1894 {
1895 CHECK_CHARSET_GET_ID (args[i], id);
1896 old_list = Fdelq (id, old_list);
1897 new_head = Fcons (id, new_head);
1898 }
1899 arglist[0] = Fnreverse (new_head);
1900 arglist[1] = old_list;
1901 Vcharset_ordered_list = Fnconc (2, arglist);
1902 return Qnil;
1903}
4ed46869 1904\f
3263d5a2
KH
1905void
1906init_charset ()
4ed46869 1907{
4ed46869 1908
4ed46869
KH
1909}
1910
4ed46869 1911
dfcf069d 1912void
4ed46869
KH
1913init_charset_once ()
1914{
1915 int i, j, k;
1916
3263d5a2
KH
1917 for (i = 0; i < ISO_MAX_DIMENSION; i++)
1918 for (j = 0; j < ISO_MAX_CHARS; j++)
1919 for (k = 0; k < ISO_MAX_FINAL; k++)
1920 iso_charset_table[i][j][k] = -1;
1921
1922 for (i = 0; i < 255; i++)
1923 emacs_mule_charset[i] = NULL;
4ed46869 1924
7c7dceee
KH
1925 charset_jisx0201_roman = -1;
1926 charset_jisx0208_1978 = -1;
1927 charset_jisx0208 = -1;
1928
3263d5a2
KH
1929#if 0
1930 Vchar_charset_set = Fmake_char_table (Qnil, Qnil);
1931 CHAR_TABLE_SET (Vchar_charset_set, make_number (97), Qnil);
1932
1933 DEFSYM (Qcharset_encode_table, "charset-encode-table");
4ed46869
KH
1934
1935 /* Intern this now in case it isn't already done.
1936 Setting this variable twice is harmless.
1937 But don't staticpro it here--that is done in alloc.c. */
1938 Qchar_table_extra_slots = intern ("char-table-extra-slots");
1939
3263d5a2
KH
1940 /* Now we are ready to set up this property, so we can create syntax
1941 tables. */
1942 Fput (Qcharset_encode_table, Qchar_table_extra_slots, make_number (0));
1943#endif
4ed46869
KH
1944}
1945
1946#ifdef emacs
1947
dfcf069d 1948void
4ed46869
KH
1949syms_of_charset ()
1950{
3263d5a2
KH
1951 char *p;
1952
1953 DEFSYM (Qcharsetp, "charsetp");
1954
1955 DEFSYM (Qascii, "ascii");
1956 DEFSYM (Qunicode, "unicode");
1957 DEFSYM (Qeight_bit_control, "eight-bit-control");
1958 DEFSYM (Qeight_bit_graphic, "eight-bit-graphic");
1959 DEFSYM (Qiso_8859_1, "iso-8859-1");
1960
1961 DEFSYM (Qgl, "gl");
1962 DEFSYM (Qgr, "gr");
1963
1964 p = (char *) xmalloc (30000);
1965
1966 staticpro (&Vcharset_ordered_list);
1967 Vcharset_ordered_list = Qnil;
1968
1969 staticpro (&Viso_2022_charset_list);
1970 Viso_2022_charset_list = Qnil;
1971
1972 staticpro (&Vemacs_mule_charset_list);
1973 Vemacs_mule_charset_list = Qnil;
1974
1975 staticpro (&Vcharset_hash_table);
1976 Vcharset_hash_table = Fmakehash (Qeq);
1977
1978 charset_table_size = 128;
1979 charset_table = ((struct charset *)
1980 xmalloc (sizeof (struct charset) * charset_table_size));
1981 charset_table_used = 0;
1982
1983 staticpro (&Vchar_unified_charset_table);
1984 Vchar_unified_charset_table = Fmake_char_table (Qnil, make_number (-1));
1985
1986 defsubr (&Scharsetp);
1987 defsubr (&Smap_charset_chars);
1988 defsubr (&Sdefine_charset_internal);
1989 defsubr (&Sdefine_charset_alias);
1990 defsubr (&Sprimary_charset);
1991 defsubr (&Sset_primary_charset);
1992 defsubr (&Scharset_plist);
1993 defsubr (&Sset_charset_plist);
1994 defsubr (&Sunify_charset);
3fac5a51 1995 defsubr (&Sget_unused_iso_final_char);
4ed46869
KH
1996 defsubr (&Sdeclare_equiv_charset);
1997 defsubr (&Sfind_charset_region);
1998 defsubr (&Sfind_charset_string);
3263d5a2
KH
1999 defsubr (&Sdecode_char);
2000 defsubr (&Sencode_char);
4ed46869 2001 defsubr (&Ssplit_char);
3263d5a2 2002 defsubr (&Smake_char);
4ed46869 2003 defsubr (&Schar_charset);
90d7b74e 2004 defsubr (&Scharset_after);
4ed46869 2005 defsubr (&Siso_charset);
3263d5a2 2006 defsubr (&Sclear_charset_maps);
8ddf5e57
DL
2007 defsubr (&Scharset_priority_list);
2008 defsubr (&Sset_charset_priority);
3263d5a2
KH
2009
2010 DEFVAR_LISP ("charset-map-directory", &Vcharset_map_directory,
2011 doc: /* Directory of charset map files that come with GNU Emacs.
04c2f2c5 2012The default value is sub-directory "charsets" of `data-directory'. */);
3263d5a2
KH
2013 Vcharset_map_directory = Fexpand_file_name (build_string ("charsets"),
2014 Vdata_directory);
4ed46869
KH
2015
2016 DEFVAR_LISP ("charset-list", &Vcharset_list,
528623a0 2017 doc: /* List of all charsets ever defined. */);
3263d5a2
KH
2018 Vcharset_list = Qnil;
2019
2020 /* Make the prerequisite charset `ascii' and `unicode'. */
2021 {
2022 Lisp_Object args[charset_arg_max];
2023 Lisp_Object plist[14];
2024 Lisp_Object val;
2025
2026 plist[0] = intern (":name");
3263d5a2 2027 plist[2] = intern (":dimension");
3263d5a2 2028 plist[4] = intern (":code-space");
3263d5a2 2029 plist[6] = intern (":iso-final-char");
3263d5a2 2030 plist[8] = intern (":emacs-mule-id");
3263d5a2 2031 plist[10] = intern (":ascii-compatible-p");
820ee249
KH
2032 plist[12] = intern (":code-offset");
2033
2034 args[charset_arg_name] = Qascii;
2035 args[charset_arg_dimension] = make_number (1);
2036 val = Fmake_vector (make_number (8), make_number (0));
2037 ASET (val, 1, make_number (127));
2038 args[charset_arg_code_space] = val;
2039 args[charset_arg_min_code] = Qnil;
2040 args[charset_arg_max_code] = Qnil;
2041 args[charset_arg_iso_final] = make_number ('B');
2042 args[charset_arg_iso_revision] = Qnil;
2043 args[charset_arg_emacs_mule_id] = make_number (0);
2044 args[charset_arg_ascii_compatible_p] = Qt;
3263d5a2
KH
2045 args[charset_arg_supplementary_p] = Qnil;
2046 args[charset_arg_invalid_code] = Qnil;
820ee249 2047 args[charset_arg_code_offset] = make_number (0);
3263d5a2 2048 args[charset_arg_map] = Qnil;
374c5cfd
KH
2049 args[charset_arg_subset] = Qnil;
2050 args[charset_arg_superset] = Qnil;
3263d5a2
KH
2051 args[charset_arg_unify_map] = Qnil;
2052 /* The actual plist is set by mule-conf.el. */
820ee249
KH
2053 plist[1] = args[charset_arg_name];
2054 plist[3] = args[charset_arg_dimension];
2055 plist[5] = args[charset_arg_code_space];
2056 plist[7] = args[charset_arg_iso_final];
2057 plist[9] = args[charset_arg_emacs_mule_id];
2058 plist[11] = args[charset_arg_ascii_compatible_p];
2059 plist[13] = args[charset_arg_code_offset];
3263d5a2
KH
2060 args[charset_arg_plist] = Flist (14, plist);
2061 Fdefine_charset_internal (charset_arg_max, args);
2062 charset_ascii = CHARSET_SYMBOL_ID (Qascii);
2063
820ee249
KH
2064 args[charset_arg_name] = Qunicode;
2065 args[charset_arg_dimension] = make_number (3);
3263d5a2
KH
2066 val = Fmake_vector (make_number (8), make_number (0));
2067 ASET (val, 1, make_number (255));
2068 ASET (val, 3, make_number (255));
2069 ASET (val, 5, make_number (16));
820ee249
KH
2070 args[charset_arg_code_space] = val;
2071 args[charset_arg_min_code] = Qnil;
2072 args[charset_arg_max_code] = Qnil;
2073 args[charset_arg_iso_final] = Qnil;
3263d5a2 2074 args[charset_arg_iso_revision] = Qnil;
820ee249
KH
2075 args[charset_arg_emacs_mule_id] = Qnil;
2076 args[charset_arg_ascii_compatible_p] = Qt;
3263d5a2
KH
2077 args[charset_arg_supplementary_p] = Qnil;
2078 args[charset_arg_invalid_code] = Qnil;
820ee249 2079 args[charset_arg_code_offset] = make_number (0);
3263d5a2 2080 args[charset_arg_map] = Qnil;
374c5cfd
KH
2081 args[charset_arg_subset] = Qnil;
2082 args[charset_arg_superset] = Qnil;
3263d5a2
KH
2083 args[charset_arg_unify_map] = Qnil;
2084 /* The actual plist is set by mule-conf.el. */
820ee249
KH
2085 plist[1] = args[charset_arg_name];
2086 plist[3] = args[charset_arg_dimension];
2087 plist[5] = args[charset_arg_code_space];
2088 plist[7] = args[charset_arg_iso_final];
2089 plist[9] = args[charset_arg_emacs_mule_id];
2090 plist[11] = args[charset_arg_ascii_compatible_p];
2091 plist[13] = args[charset_arg_code_offset];
3263d5a2
KH
2092 args[charset_arg_plist] = Flist (14, plist);
2093 Fdefine_charset_internal (charset_arg_max, args);
2094 charset_unicode = CHARSET_SYMBOL_ID (Qunicode);
2095 }
4ed46869
KH
2096}
2097
2098#endif /* emacs */