(Fmap_charset_chars): Fix handling of default value for FROM_CODE
[bpt/emacs.git] / src / charset.c
CommitLineData
3263d5a2
KH
1/* Basic character set support.
2 Copyright (C) 1995, 97, 98, 2000, 2001 Electrotechnical Laboratory, JAPAN.
75c8c592 3 Licensed to the Free Software Foundation.
12bcae05 4 Copyright (C) 2001 Free Software Foundation, Inc.
3263d5a2
KH
5 Copyright (C) 2001, 2002
6 National Institute of Advanced Industrial Science and Technology (AIST)
7 Registration Number H13PRO009
4ed46869 8
369314dc
KH
9This file is part of GNU Emacs.
10
11GNU Emacs is free software; you can redistribute it and/or modify
12it under the terms of the GNU General Public License as published by
13the Free Software Foundation; either version 2, or (at your option)
14any later version.
4ed46869 15
369314dc
KH
16GNU Emacs is distributed in the hope that it will be useful,
17but WITHOUT ANY WARRANTY; without even the implied warranty of
18MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19GNU General Public License for more details.
4ed46869 20
369314dc
KH
21You should have received a copy of the GNU General Public License
22along with GNU Emacs; see the file COPYING. If not, write to
23the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
24Boston, MA 02111-1307, USA. */
4ed46869 25
68c45bf0
PE
26#ifdef emacs
27#include <config.h>
28#endif
29
4ed46869 30#include <stdio.h>
3263d5a2
KH
31#include <unistd.h>
32#include <ctype.h>
4ed46869
KH
33
34#ifdef emacs
35
36#include <sys/types.h>
4ed46869 37#include "lisp.h"
3263d5a2 38#include "character.h"
4ed46869
KH
39#include "charset.h"
40#include "coding.h"
fc6b09bf 41#include "disptab.h"
3263d5a2 42#include "buffer.h"
4ed46869
KH
43
44#else /* not emacs */
45
46#include "mulelib.h"
47
48#endif /* emacs */
49
4ed46869 50
04c2f2c5 51/*** GENERAL NOTES on CODED CHARACTER SETS (CHARSETS) ***
4ed46869 52
3263d5a2 53 A coded character set ("charset" hereafter) is a meaningful
04c2f2c5 54 collection (i.e. language, culture, functionality, etc.) of
3263d5a2 55 characters. Emacs handles multiple charsets at once. In Emacs Lisp
04c2f2c5
DL
56 code, a charset is represented by a symbol. In C code, a charset is
57 represented by its ID number or by a pointer to a struct charset.
4ed46869 58
3263d5a2
KH
59 The actual information about each charset is stored in two places.
60 Lispy information is stored in the hash table Vcharset_hash_table as
61 a vector (charset attributes). The other information is stored in
04c2f2c5 62 charset_table as a struct charset.
4ed46869 63
3263d5a2 64*/
4ed46869 65
3263d5a2
KH
66/* List of all charsets. This variable is used only from Emacs
67 Lisp. */
4ed46869
KH
68Lisp_Object Vcharset_list;
69
3263d5a2
KH
70/* Hash table that contains attributes of each charset. Keys are
71 charset symbols, and values are vectors of charset attributes. */
72Lisp_Object Vcharset_hash_table;
73
74/* Table of struct charset. */
75struct charset *charset_table;
76
77static int charset_table_size;
78int charset_table_used;
79
80Lisp_Object Qcharsetp;
81
82/* Special charset symbols. */
83Lisp_Object Qascii;
84Lisp_Object Qeight_bit_control;
85Lisp_Object Qeight_bit_graphic;
86Lisp_Object Qiso_8859_1;
87Lisp_Object Qunicode;
b0e3cf2b 88
3263d5a2
KH
89/* The corresponding charsets. */
90int charset_ascii;
91int charset_8_bit_control;
92int charset_8_bit_graphic;
93int charset_iso_8859_1;
94int charset_unicode;
c1a08b4c 95
7c7dceee
KH
96/* The other special charsets. */
97int charset_jisx0201_roman;
98int charset_jisx0208_1978;
99int charset_jisx0208;
100
3263d5a2
KH
101/* Value of charset attribute `charset-iso-plane'. */
102Lisp_Object Qgl, Qgr;
c1a08b4c 103
3263d5a2
KH
104/* The primary charset. It is a charset of unibyte characters. */
105int charset_primary;
106
107/* List of charsets ordered by the priority. */
108Lisp_Object Vcharset_ordered_list;
109
dbbb237d
KH
110/* Incremented everytime we change Vcharset_ordered_list. This is
111 unsigned short so that it fits in Lisp_Int and never match with
112 -1. */
113unsigned short charset_ordered_list_tick;
114
3263d5a2
KH
115/* List of iso-2022 charsets. */
116Lisp_Object Viso_2022_charset_list;
117
118/* List of emacs-mule charsets. */
119Lisp_Object Vemacs_mule_charset_list;
120
121struct charset *emacs_mule_charset[256];
4ed46869
KH
122
123/* Mapping table from ISO2022's charset (specified by DIMENSION,
124 CHARS, and FINAL-CHAR) to Emacs' charset. */
3263d5a2
KH
125int iso_charset_table[ISO_MAX_DIMENSION][ISO_MAX_CHARS][ISO_MAX_FINAL];
126
127Lisp_Object Vcharset_map_directory;
128
129Lisp_Object Vchar_unified_charset_table;
130
69f8de5b
KH
131#define CODE_POINT_TO_INDEX(charset, code) \
132 ((charset)->code_linear_p \
133 ? (code) - (charset)->min_code \
134 : (((charset)->code_space_mask[(code) >> 24] & 0x8) \
135 && ((charset)->code_space_mask[((code) >> 16) & 0xFF] & 0x4) \
136 && ((charset)->code_space_mask[((code) >> 8) & 0xFF] & 0x2) \
137 && ((charset)->code_space_mask[(code) & 0xFF] & 0x1)) \
138 ? (((((code) >> 24) - (charset)->code_space[12]) \
139 * (charset)->code_space[11]) \
140 + (((((code) >> 16) & 0xFF) - (charset)->code_space[8]) \
141 * (charset)->code_space[7]) \
142 + (((((code) >> 8) & 0xFF) - (charset)->code_space[4]) \
143 * (charset)->code_space[3]) \
820ee249
KH
144 + (((code) & 0xFF) - (charset)->code_space[0]) \
145 - ((charset)->char_index_offset)) \
3263d5a2
KH
146 : -1)
147
148
149/* Convert the character index IDX to code-point CODE for CHARSET.
150 It is assumed that IDX is in a valid range. */
151
820ee249
KH
152#define INDEX_TO_CODE_POINT(charset, idx) \
153 ((charset)->code_linear_p \
154 ? (idx) + (charset)->min_code \
155 : (idx += (charset)->char_index_offset, \
156 (((charset)->code_space[0] + (idx) % (charset)->code_space[2]) \
157 | (((charset)->code_space[4] \
158 + ((idx) / (charset)->code_space[3] % (charset)->code_space[6])) \
159 << 8) \
160 | (((charset)->code_space[8] \
161 + ((idx) / (charset)->code_space[7] % (charset)->code_space[10])) \
162 << 16) \
163 | (((charset)->code_space[12] + ((idx) / (charset)->code_space[11])) \
164 << 24))))
165
4ed46869 166
3263d5a2 167\f
4ed46869 168
e9ce014c
KH
169/* Set to 1 to warn that a charset map is loaded and thus a buffer
170 text and a string data may be relocated. */
3263d5a2 171int charset_map_loaded;
35e623fb 172
e9ce014c
KH
173struct charset_map_entries
174{
175 struct {
176 unsigned from, to;
177 int c;
178 } entry[0x10000];
179 struct charset_map_entries *next;
180};
181
182/* Load the mapping information for CHARSET from ENTRIES.
4cf9710d 183
3263d5a2 184 If CONTROL_FLAG is 0, setup CHARSET->min_char and CHARSET->max_char.
8a73a704 185
3263d5a2
KH
186 If CONTROL_FLAG is 1, setup CHARSET->min_char, CHARSET->max_char,
187 CHARSET->decoder, and CHARSET->encoder.
93bcb785 188
3263d5a2
KH
189 If CONTROL_FLAG is 2, setup CHARSET->deunifier and
190 Vchar_unify_table. If Vchar_unified_charset_table is non-nil,
191 setup it too. */
4ed46869 192
3263d5a2 193static void
e9ce014c 194load_charset_map (charset, entries, n_entries, control_flag)
3263d5a2 195 struct charset *charset;
e9ce014c
KH
196 struct charset_map_entries *entries;
197 int n_entries;
3263d5a2 198 int control_flag;
4ed46869 199{
3263d5a2 200 Lisp_Object vec, table;
3263d5a2
KH
201 unsigned max_code = CHARSET_MAX_CODE (charset);
202 int ascii_compatible_p = charset->ascii_compatible_p;
203 int min_char, max_char, nonascii_min_char;
3263d5a2 204 int i;
3263d5a2 205 unsigned char *fast_map = charset->fast_map;
99529c2c 206
e9ce014c
KH
207 if (n_entries <= 0)
208 return;
209
210 if (control_flag > 0)
8ac5a9cc 211 {
3263d5a2 212 int n = CODE_POINT_TO_INDEX (charset, max_code) + 1;
6662e69b 213
374c5cfd 214 table = Fmake_char_table (Qnil, Qnil);
3263d5a2
KH
215 if (control_flag == 1)
216 vec = Fmake_vector (make_number (n), make_number (-1));
217 else if (! CHAR_TABLE_P (Vchar_unify_table))
374c5cfd 218 Vchar_unify_table = Fmake_char_table (Qnil, Qnil);
6662e69b 219
3263d5a2 220 charset_map_loaded = 1;
2e344af3 221 }
3263d5a2 222
e9ce014c 223 min_char = max_char = entries->entry[0].c;
3263d5a2 224 nonascii_min_char = MAX_CHAR;
e9ce014c 225 for (i = 0; i < n_entries; i++)
2e344af3 226 {
e9ce014c 227 unsigned from, to;
3b4f4446
KH
228 int from_index, to_index;
229 int from_c, to_c;
e9ce014c 230 int idx = i % 0x10000;
3263d5a2 231
e9ce014c
KH
232 if (i > 0 && idx == 0)
233 entries = entries->next;
234 from = entries->entry[idx].from;
235 to = entries->entry[idx].to;
3b4f4446
KH
236 from_c = entries->entry[idx].c;
237 from_index = CODE_POINT_TO_INDEX (charset, from);
238 if (from == to)
239 {
240 to_index = from_index;
241 to_c = from_c;
242 }
243 else
244 {
245 to_index = CODE_POINT_TO_INDEX (charset, to);
246 to_c = from_c + (to_index - from_index);
247 }
248 if (from_index < 0 || to_index < 0)
249 continue;
3263d5a2 250
3263d5a2
KH
251 if (control_flag < 2)
252 {
3b4f4446
KH
253 int c;
254
255 if (to_c > max_char)
256 max_char = to_c;
257 else if (from_c < min_char)
258 min_char = from_c;
259 if (ascii_compatible_p)
260 {
261 if (! ASCII_BYTE_P (from_c))
262 {
263 if (from_c < nonascii_min_char)
264 nonascii_min_char = from_c;
265 }
266 else if (! ASCII_BYTE_P (to_c))
267 {
268 nonascii_min_char = 0x80;
269 }
270 }
271
272 for (c = from_c; c <= to_c; c++)
273 CHARSET_FAST_MAP_SET (c, fast_map);
274
e9ce014c 275 if (control_flag == 1)
3263d5a2 276 {
e9ce014c 277 unsigned code = from;
e9ce014c 278
e9ce014c
KH
279 if (CHARSET_COMPACT_CODES_P (charset))
280 while (1)
281 {
3b4f4446
KH
282 ASET (vec, from_index, make_number (from_c));
283 CHAR_TABLE_SET (table, from_c, make_number (code));
e9ce014c
KH
284 if (from_index == to_index)
285 break;
3b4f4446 286 from_index++, from_c++;
e9ce014c
KH
287 code = INDEX_TO_CODE_POINT (charset, from_index);
288 }
289 else
3b4f4446 290 for (; from_index <= to_index; from_index++, from_c++)
e9ce014c 291 {
3b4f4446
KH
292 ASET (vec, from_index, make_number (from_c));
293 CHAR_TABLE_SET (table, from_c, make_number (from_index));
e9ce014c 294 }
3263d5a2 295 }
3263d5a2 296 }
e9ce014c 297 else
2e344af3 298 {
69f8de5b 299 unsigned code = from;
3b4f4446 300
69f8de5b
KH
301 while (1)
302 {
303 int c1 = DECODE_CHAR (charset, code);
304
3263d5a2
KH
305 if (c1 >= 0)
306 {
3b4f4446 307 CHAR_TABLE_SET (table, from_c, make_number (c1));
16fed1fc 308 CHAR_TABLE_SET (Vchar_unify_table, c1, make_number (from_c));
3263d5a2
KH
309 if (CHAR_TABLE_P (Vchar_unified_charset_table))
310 CHAR_TABLE_SET (Vchar_unified_charset_table, c1,
311 CHARSET_NAME (charset));
312 }
69f8de5b
KH
313 if (from_index == to_index)
314 break;
3b4f4446 315 from_index++, from_c++;
69f8de5b 316 code = INDEX_TO_CODE_POINT (charset, from_index);
3263d5a2 317 }
2e344af3 318 }
8ac5a9cc 319 }
3263d5a2
KH
320
321 if (control_flag < 2)
4ed46869 322 {
3263d5a2
KH
323 CHARSET_MIN_CHAR (charset) = (ascii_compatible_p
324 ? nonascii_min_char : min_char);
325 CHARSET_MAX_CHAR (charset) = max_char;
e9ce014c 326 if (control_flag == 1)
4ed46869 327 {
3263d5a2
KH
328 CHARSET_DECODER (charset) = vec;
329 CHARSET_ENCODER (charset) = table;
4ed46869
KH
330 }
331 }
2e344af3 332 else
3263d5a2 333 CHARSET_DEUNIFIER (charset) = table;
4ed46869
KH
334}
335
12bcae05 336
3263d5a2
KH
337/* Read a hexadecimal number (preceded by "0x") from the file FP while
338 paying attention to comment charcter '#'. */
12bcae05 339
3263d5a2
KH
340static INLINE unsigned
341read_hex (fp, eof)
342 FILE *fp;
343 int *eof;
12bcae05 344{
3263d5a2
KH
345 int c;
346 unsigned n;
12bcae05 347
3263d5a2
KH
348 while ((c = getc (fp)) != EOF)
349 {
69f8de5b 350 if (c == '#')
3263d5a2
KH
351 {
352 while ((c = getc (fp)) != EOF && c != '\n');
353 }
354 else if (c == '0')
355 {
356 if ((c = getc (fp)) == EOF || c == 'x')
357 break;
358 }
359 }
360 if (c == EOF)
361 {
362 *eof = 1;
363 return 0;
364 }
365 *eof = 0;
366 n = 0;
367 if (c == 'x')
368 while ((c = getc (fp)) != EOF && isxdigit (c))
369 n = ((n << 4)
370 | (c <= '9' ? c - '0' : c <= 'F' ? c - 'A' + 10 : c - 'a' + 10));
371 else
372 while ((c = getc (fp)) != EOF && isdigit (c))
373 n = (n * 10) + c - '0';
e9ce014c
KH
374 if (c != EOF)
375 ungetc (c, fp);
3263d5a2
KH
376 return n;
377}
12bcae05 378
537efd8d 379
3263d5a2 380/* Return a mapping vector for CHARSET loaded from MAPFILE.
e9ce014c
KH
381 Each line of MAPFILE has this form
382 0xAAAA 0xCCCC
383 where 0xAAAA is a code-point and 0xCCCC is the corresponding
384 character code, or this form
385 0xAAAA-0xBBBB 0xCCCC
386 where 0xAAAA and 0xBBBB are code-points specifying a range, and
387 0xCCCC is the first character code of the range.
388
3263d5a2
KH
389 The returned vector has this form:
390 [ CODE1 CHAR1 CODE2 CHAR2 .... ]
e9ce014c
KH
391 where CODE1 is a code-point or a cons of code-points specifying a
392 range. */
4ed46869 393
c449997d
KH
394extern void add_to_log P_ ((char *, Lisp_Object, Lisp_Object));
395
e9ce014c
KH
396static void
397load_charset_map_from_file (charset, mapfile, control_flag)
3263d5a2
KH
398 struct charset *charset;
399 Lisp_Object mapfile;
e9ce014c 400 int control_flag;
4ed46869 401{
e9ce014c
KH
402 unsigned min_code = CHARSET_MIN_CODE (charset);
403 unsigned max_code = CHARSET_MAX_CODE (charset);
3263d5a2
KH
404 int fd;
405 FILE *fp;
3263d5a2
KH
406 int eof;
407 Lisp_Object suffixes;
e9ce014c
KH
408 struct charset_map_entries *head, *entries;
409 int n_entries;
4ed46869 410
3263d5a2
KH
411 suffixes = Fcons (build_string (".map"),
412 Fcons (build_string (".TXT"), Qnil));
4ed46869 413
3263d5a2
KH
414 fd = openp (Fcons (Vcharset_map_directory, Qnil), mapfile, suffixes,
415 NULL, 0);
416 if (fd < 0
417 || ! (fp = fdopen (fd, "r")))
418 {
419 add_to_log ("Failure in loading charset map: %S", mapfile, Qnil);
e9ce014c 420 return;
3263d5a2 421 }
4ed46869 422
e9ce014c
KH
423 head = entries = ((struct charset_map_entries *)
424 alloca (sizeof (struct charset_map_entries)));
425 n_entries = 0;
3263d5a2
KH
426 eof = 0;
427 while (1)
428 {
e9ce014c
KH
429 unsigned from, to;
430 int c;
431 int idx;
4ed46869 432
e9ce014c 433 from = read_hex (fp, &eof);
3263d5a2
KH
434 if (eof)
435 break;
e9ce014c
KH
436 if (getc (fp) == '-')
437 to = read_hex (fp, &eof);
438 else
439 to = from;
440 c = (int) read_hex (fp, &eof);
441
442 if (from < min_code || to > max_code || from > to || c > MAX_CHAR)
443 continue;
444
445 if (n_entries > 0 && (n_entries % 0x10000) == 0)
3263d5a2 446 {
e9ce014c
KH
447 entries->next = ((struct charset_map_entries *)
448 alloca (sizeof (struct charset_map_entries)));
449 entries = entries->next;
3263d5a2 450 }
e9ce014c
KH
451 idx = n_entries % 0x10000;
452 entries->entry[idx].from = from;
453 entries->entry[idx].to = to;
454 entries->entry[idx].c = c;
455 n_entries++;
3263d5a2
KH
456 }
457 fclose (fp);
458 close (fd);
4ed46869 459
e9ce014c
KH
460 load_charset_map (charset, head, n_entries, control_flag);
461}
462
463static void
464load_charset_map_from_vector (charset, vec, control_flag)
465 struct charset *charset;
466 Lisp_Object vec;
467 int control_flag;
468{
469 unsigned min_code = CHARSET_MIN_CODE (charset);
470 unsigned max_code = CHARSET_MAX_CODE (charset);
471 struct charset_map_entries *head, *entries;
472 int n_entries;
473 int len = ASIZE (vec);
474 int i;
475
476 if (len % 2 == 1)
3263d5a2 477 {
e9ce014c
KH
478 add_to_log ("Failure in loading charset map: %V", vec, Qnil);
479 return;
3263d5a2 480 }
4ed46869 481
e9ce014c
KH
482 head = entries = ((struct charset_map_entries *)
483 alloca (sizeof (struct charset_map_entries)));
484 n_entries = 0;
485 for (i = 0; i < len; i += 2)
486 {
487 Lisp_Object val, val2;
488 unsigned from, to;
489 int c;
490 int idx;
3263d5a2 491
e9ce014c
KH
492 val = AREF (vec, i);
493 if (CONSP (val))
494 {
495 val2 = XCDR (val);
496 val = XCAR (val);
497 CHECK_NATNUM (val);
498 CHECK_NATNUM (val2);
499 from = XFASTINT (val);
500 to = XFASTINT (val2);
501 }
502 else
503 {
504 CHECK_NATNUM (val);
505 from = to = XFASTINT (val);
506 }
507 val = AREF (vec, i + 1);
508 CHECK_NATNUM (val);
509 c = XFASTINT (val);
510
511 if (from < min_code || to > max_code || from > to || c > MAX_CHAR)
512 continue;
513
514 if ((n_entries % 0x10000) == 0)
515 {
516 entries->next = ((struct charset_map_entries *)
517 alloca (sizeof (struct charset_map_entries)));
518 entries = entries->next;
519 }
520 idx = n_entries % 0x10000;
521 entries->entry[idx].from = from;
522 entries->entry[idx].to = to;
523 entries->entry[idx].c = c;
524 n_entries++;
525 }
526
527 load_charset_map (charset, head, n_entries, control_flag);
ac4137cc
KH
528}
529
3263d5a2
KH
530static void
531load_charset (charset)
532 struct charset *charset;
ac4137cc 533{
3263d5a2
KH
534 if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP_DEFERRED)
535 {
536 Lisp_Object map;
ac4137cc 537
3263d5a2
KH
538 map = CHARSET_MAP (charset);
539 if (STRINGP (map))
e9ce014c
KH
540 load_charset_map_from_file (charset, map, 1);
541 else
542 load_charset_map_from_vector (charset, map, 1);
3263d5a2
KH
543 CHARSET_METHOD (charset) = CHARSET_METHOD_MAP;
544 }
4ed46869
KH
545}
546
3263d5a2
KH
547
548DEFUN ("charsetp", Fcharsetp, Scharsetp, 1, 1, 0,
549 doc: /* Return non-nil if and only if OBJECT is a charset.*/)
550 (object)
551 Lisp_Object object;
23d2a7f1 552{
3263d5a2 553 return (CHARSETP (object) ? Qt : Qnil);
23d2a7f1
KH
554}
555
35e623fb 556
3263d5a2 557void
374c5cfd
KH
558map_charset_chars (c_function, function, arg,
559 charset, from, to)
560 void (*c_function) P_ ((Lisp_Object, Lisp_Object));
561 Lisp_Object function, arg;
562 struct charset *charset;
563 unsigned from, to;
564
35e623fb 565{
3263d5a2 566 Lisp_Object range;
374c5cfd 567 int partial;
3263d5a2
KH
568
569 if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP_DEFERRED)
570 load_charset (charset);
571
374c5cfd
KH
572 partial = (from > CHARSET_MIN_CODE (charset)
573 || to < CHARSET_MAX_CODE (charset));
574
575 if (CHARSET_UNIFIED_P (charset)
576 && CHAR_TABLE_P (CHARSET_DEUNIFIER (charset)))
577 {
578 map_char_table_for_charset (c_function, function,
579 CHARSET_DEUNIFIER (charset), arg,
580 partial ? charset : NULL, from, to);
581 }
582
3263d5a2
KH
583 if (CHARSET_METHOD (charset) == CHARSET_METHOD_OFFSET)
584 {
374c5cfd
KH
585 int from_idx = CODE_POINT_TO_INDEX (charset, from);
586 int to_idx = CODE_POINT_TO_INDEX (charset, to);
587 int from_c = from_idx + CHARSET_CODE_OFFSET (charset);
588 int to_c = to_idx + CHARSET_CODE_OFFSET (charset);
589
590 range = Fcons (make_number (from_c), make_number (to_c));
3263d5a2 591 if (NILP (function))
374c5cfd 592 (*c_function) (range, arg);
3263d5a2
KH
593 else
594 call2 (function, range, arg);
595 }
596 else if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP)
597 {
598 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset)))
599 return;
374c5cfd 600 if (CHARSET_ASCII_COMPATIBLE_P (charset) && from <= 127)
bbf12bb3 601 {
374c5cfd
KH
602 range = Fcons (make_number (from), make_number (to));
603 if (to >= 128)
604 XSETCAR (range, make_number (127));
605
3263d5a2 606 if (NILP (function))
374c5cfd 607 (*c_function) (range, arg);
3263d5a2
KH
608 else
609 call2 (function, range, arg);
bbf12bb3 610 }
374c5cfd
KH
611 map_char_table_for_charset (c_function, function,
612 CHARSET_ENCODER (charset), arg,
613 partial ? charset : NULL, from, to);
3263d5a2 614 }
374c5cfd 615 else if (CHARSET_METHOD (charset) == CHARSET_METHOD_SUBSET)
3263d5a2 616 {
374c5cfd
KH
617 Lisp_Object subset_info;
618 int offset;
619
620 subset_info = CHARSET_SUBSET (charset);
621 charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
622 offset = XINT (AREF (subset_info, 3));
623 from -= offset;
624 if (from < XFASTINT (AREF (subset_info, 1)))
625 from = XFASTINT (AREF (subset_info, 1));
626 to -= offset;
627 if (to > XFASTINT (AREF (subset_info, 2)))
628 to = XFASTINT (AREF (subset_info, 2));
629 map_charset_chars (c_function, function, arg, charset, from, to);
630 }
631 else /* i.e. CHARSET_METHOD_SUPERSET */
632 {
633 Lisp_Object parents;
3263d5a2 634
374c5cfd
KH
635 for (parents = CHARSET_SUPERSET (charset); CONSP (parents);
636 parents = XCDR (parents))
bbf12bb3 637 {
374c5cfd
KH
638 int offset;
639 unsigned this_from, this_to;
640
641 charset = CHARSET_FROM_ID (XFASTINT (XCAR (XCAR (parents))));
642 offset = XINT (XCDR (XCAR (parents)));
643 this_from = from - offset;
644 this_to = to - offset;
645 if (this_from < CHARSET_MIN_CODE (charset))
646 this_from = CHARSET_MIN_CODE (charset);
647 if (this_to > CHARSET_MAX_CODE (charset))
648 this_to = CHARSET_MAX_CODE (charset);
649 map_charset_chars (c_function, function, arg, charset, from, to);
bbf12bb3 650 }
35e623fb 651 }
3263d5a2
KH
652}
653
374c5cfd
KH
654
655DEFUN ("map-charset-chars", Fmap_charset_chars, Smap_charset_chars, 2, 5, 0,
04c2f2c5 656 doc: /* Call FUNCTION for all characters in CHARSET.
374c5cfd 657FUNCTION is called with an argument RANGE and the optional 3rd
3263d5a2 658argument ARG.
6abd9323 659
374c5cfd
KH
660RANGE is a cons (FROM . TO), where FROM and TO indicate a range of
661characters contained in CHARSET.
662
663The optional 4th and 5th arguments FROM-CODE and TO-CODE specify the
664range of code points of targer characters. */)
665 (function, charset, arg, from_code, to_code)
666 Lisp_Object function, charset, arg, from_code, to_code;
3263d5a2 667{
374c5cfd 668 struct charset *cs;
16fed1fc 669 unsigned from, to;
374c5cfd
KH
670
671 CHECK_CHARSET_GET_CHARSET (charset, cs);
672 if (NILP (from_code))
16fed1fc 673 from = CHARSET_MIN_CODE (cs);
970b7474
KH
674 else
675 {
676 CHECK_NATNUM (from_code);
677 from = XINT (from_code);
678 if (from < CHARSET_MIN_CODE (cs))
679 from = CHARSET_MIN_CODE (cs);
680 }
374c5cfd 681 if (NILP (to_code))
970b7474
KH
682 to = CHARSET_MAX_CODE (cs);
683 else
684 {
685 CHECK_NATNUM (to_code);
686 to = XINT (to_code);
687 if (to > CHARSET_MAX_CODE (cs))
688 to = CHARSET_MAX_CODE (cs);
689 }
16fed1fc 690 map_charset_chars (NULL, function, arg, cs, from, to);
3263d5a2 691 return Qnil;
35e623fb 692}
76d7b829
KH
693
694
3263d5a2
KH
695/* Define a charset according to the arguments. The Nth argument is
696 the Nth attribute of the charset (the last attribute `charset-id'
697 is not included). See the docstring of `define-charset' for the
698 detail. */
76d7b829 699
3263d5a2
KH
700DEFUN ("define-charset-internal", Fdefine_charset_internal,
701 Sdefine_charset_internal, charset_arg_max, MANY, 0,
04c2f2c5
DL
702 doc: /* For internal use only.
703usage: (define-charset-internal ...) */)
3263d5a2
KH
704 (nargs, args)
705 int nargs;
706 Lisp_Object *args;
76d7b829 707{
3263d5a2
KH
708 /* Charset attr vector. */
709 Lisp_Object attrs;
710 Lisp_Object val;
711 unsigned hash_code;
712 struct Lisp_Hash_Table *hash_table = XHASH_TABLE (Vcharset_hash_table);
69f8de5b 713 int i, j;
3263d5a2
KH
714 struct charset charset;
715 int id;
716 int dimension;
717 int new_definition_p;
718 int nchars;
719
720 if (nargs != charset_arg_max)
721 return Fsignal (Qwrong_number_of_arguments,
722 Fcons (intern ("define-charset-internal"),
723 make_number (nargs)));
724
725 attrs = Fmake_vector (make_number (charset_attr_max), Qnil);
726
727 CHECK_SYMBOL (args[charset_arg_name]);
728 ASET (attrs, charset_name, args[charset_arg_name]);
729
730 val = args[charset_arg_code_space];
731 for (i = 0, dimension = 0, nchars = 1; i < 4; i++)
76d7b829 732 {
3263d5a2
KH
733 int min_byte, max_byte;
734
735 min_byte = XINT (Faref (val, make_number (i * 2)));
736 max_byte = XINT (Faref (val, make_number (i * 2 + 1)));
737 if (min_byte < 0 || min_byte > max_byte || max_byte >= 256)
738 error ("Invalid :code-space value");
739 charset.code_space[i * 4] = min_byte;
740 charset.code_space[i * 4 + 1] = max_byte;
741 charset.code_space[i * 4 + 2] = max_byte - min_byte + 1;
742 nchars *= charset.code_space[i * 4 + 2];
743 charset.code_space[i * 4 + 3] = nchars;
744 if (max_byte > 0)
745 dimension = i + 1;
746 }
76d7b829 747
3263d5a2
KH
748 val = args[charset_arg_dimension];
749 if (NILP (val))
750 charset.dimension = dimension;
751 else
752 {
753 CHECK_NATNUM (val);
754 charset.dimension = XINT (val);
755 if (charset.dimension < 1 || charset.dimension > 4)
756 args_out_of_range_3 (val, make_number (1), make_number (4));
757 }
758
759 charset.code_linear_p
760 = (charset.dimension == 1
761 || (charset.code_space[2] == 256
762 && (charset.dimension == 2
763 || (charset.code_space[6] == 256
764 && (charset.dimension == 3
765 || charset.code_space[10] == 256)))));
766
69f8de5b
KH
767 if (! charset.code_linear_p)
768 {
769 charset.code_space_mask = (unsigned char *) xmalloc (256);
33df3183 770 bzero (charset.code_space_mask, 256);
69f8de5b
KH
771 for (i = 0; i < 4; i++)
772 for (j = charset.code_space[i * 4]; j <= charset.code_space[i * 4 + 1];
773 j++)
774 charset.code_space_mask[j] |= (1 << i);
775 }
776
3263d5a2
KH
777 charset.iso_chars_96 = charset.code_space[2] == 96;
778
779 charset.min_code = (charset.code_space[0]
780 | (charset.code_space[4] << 8)
781 | (charset.code_space[8] << 16)
782 | (charset.code_space[12] << 24));
783 charset.max_code = (charset.code_space[1]
784 | (charset.code_space[5] << 8)
785 | (charset.code_space[9] << 16)
786 | (charset.code_space[13] << 24));
820ee249
KH
787 charset.char_index_offset = 0;
788
789 val = args[charset_arg_min_code];
790 if (! NILP (val))
791 {
792 unsigned code;
793
794 if (INTEGERP (val))
795 code = XINT (val);
796 else
797 {
798 CHECK_CONS (val);
799 CHECK_NUMBER (XCAR (val));
800 CHECK_NUMBER (XCDR (val));
801 code = (XINT (XCAR (val)) << 16) | (XINT (XCDR (val)));
802 }
803 if (code < charset.min_code
804 || code > charset.max_code)
805 args_out_of_range_3 (make_number (charset.min_code),
806 make_number (charset.max_code), val);
807 charset.char_index_offset = CODE_POINT_TO_INDEX (&charset, code);
808 charset.min_code = code;
809 }
810
811 val = args[charset_arg_max_code];
812 if (! NILP (val))
813 {
814 unsigned code;
815
816 if (INTEGERP (val))
817 code = XINT (val);
818 else
819 {
820 CHECK_CONS (val);
821 CHECK_NUMBER (XCAR (val));
822 CHECK_NUMBER (XCDR (val));
823 code = (XINT (XCAR (val)) << 16) | (XINT (XCDR (val)));
824 }
825 if (code < charset.min_code
826 || code > charset.max_code)
827 args_out_of_range_3 (make_number (charset.min_code),
828 make_number (charset.max_code), val);
829 charset.max_code = code;
830 }
3263d5a2 831
e9ce014c
KH
832 charset.compact_codes_p = charset.max_code < 0x1000000;
833
3263d5a2
KH
834 val = args[charset_arg_invalid_code];
835 if (NILP (val))
836 {
837 if (charset.min_code > 0)
838 charset.invalid_code = 0;
bbf12bb3
KH
839 else
840 {
3263d5a2
KH
841 XSETINT (val, charset.max_code + 1);
842 if (XINT (val) == charset.max_code + 1)
843 charset.invalid_code = charset.max_code + 1;
844 else
845 error ("Attribute :invalid-code must be specified");
76d7b829 846 }
76d7b829 847 }
3263d5a2
KH
848 else
849 {
850 CHECK_NATNUM (val);
851 charset.invalid_code = XFASTINT (val);
852 }
76d7b829 853
3263d5a2
KH
854 val = args[charset_arg_iso_final];
855 if (NILP (val))
856 charset.iso_final = -1;
857 else
858 {
859 CHECK_NUMBER (val);
860 if (XINT (val) < '0' || XINT (val) > 127)
861 error ("Invalid iso-final-char: %d", XINT (val));
862 charset.iso_final = XINT (val);
863 }
864
865 val = args[charset_arg_iso_revision];
866 if (NILP (val))
867 charset.iso_revision = -1;
868 else
4ed46869 869 {
3263d5a2
KH
870 CHECK_NUMBER (val);
871 if (XINT (val) > 63)
872 args_out_of_range (make_number (63), val);
873 charset.iso_revision = XINT (val);
4ed46869 874 }
3263d5a2
KH
875
876 val = args[charset_arg_emacs_mule_id];
877 if (NILP (val))
878 charset.emacs_mule_id = -1;
4ed46869
KH
879 else
880 {
3263d5a2
KH
881 CHECK_NATNUM (val);
882 if ((XINT (val) > 0 && XINT (val) <= 128) || XINT (val) >= 256)
883 error ("Invalid emacs-mule-id: %d", XINT (val));
884 charset.emacs_mule_id = XINT (val);
c83ef371 885 }
6ef23ebb 886
3263d5a2 887 charset.ascii_compatible_p = ! NILP (args[charset_arg_ascii_compatible_p]);
4ed46869 888
3263d5a2
KH
889 charset.supplementary_p = ! NILP (args[charset_arg_supplementary_p]);
890
891 charset.unified_p = 0;
892
893 bzero (charset.fast_map, sizeof (charset.fast_map));
894
895 if (! NILP (args[charset_arg_code_offset]))
896 {
897 val = args[charset_arg_code_offset];
898 CHECK_NUMBER (val);
899
900 charset.method = CHARSET_METHOD_OFFSET;
901 charset.code_offset = XINT (val);
902
903 i = CODE_POINT_TO_INDEX (&charset, charset.min_code);
904 charset.min_char = i + charset.code_offset;
905 i = CODE_POINT_TO_INDEX (&charset, charset.max_code);
906 charset.max_char = i + charset.code_offset;
907 if (charset.max_char > MAX_CHAR)
908 error ("Unsupported max char: %d", charset.max_char);
909
910 for (i = charset.min_char; i < 0x10000 && i <= charset.max_char;
911 i += 128)
912 CHARSET_FAST_MAP_SET (i, charset.fast_map);
913 for (; i <= charset.max_char; i += 0x1000)
914 CHARSET_FAST_MAP_SET (i, charset.fast_map);
915 }
916 else if (! NILP (args[charset_arg_map]))
917 {
918 val = args[charset_arg_map];
919 ASET (attrs, charset_map, val);
920 if (STRINGP (val))
e9ce014c
KH
921 load_charset_map_from_file (&charset, val, 0);
922 else
923 load_charset_map_from_vector (&charset, val, 0);
3263d5a2
KH
924 charset.method = CHARSET_METHOD_MAP_DEFERRED;
925 }
374c5cfd 926 else if (! NILP (args[charset_arg_subset]))
3263d5a2 927 {
374c5cfd
KH
928 Lisp_Object parent;
929 Lisp_Object parent_min_code, parent_max_code, parent_code_offset;
930 struct charset *parent_charset;
931
932 val = args[charset_arg_subset];
933 parent = Fcar (val);
934 CHECK_CHARSET_GET_CHARSET (parent, parent_charset);
935 parent_min_code = Fnth (make_number (1), val);
936 CHECK_NATNUM (parent_min_code);
937 parent_max_code = Fnth (make_number (2), val);
938 CHECK_NATNUM (parent_max_code);
939 parent_code_offset = Fnth (make_number (3), val);
940 CHECK_NUMBER (parent_code_offset);
941 val = Fmake_vector (make_number (4), Qnil);
942 ASET (val, 0, make_number (parent_charset->id));
943 ASET (val, 1, parent_min_code);
944 ASET (val, 2, parent_max_code);
945 ASET (val, 3, parent_code_offset);
946 ASET (attrs, charset_subset, val);
947
948 charset.method = CHARSET_METHOD_SUBSET;
949 /* Here, we just copy the parent's fast_map. It's not accurate,
950 but at least it works for quickly detecting which character
951 DOESN'T belong to this charset. */
952 for (i = 0; i < 190; i++)
953 charset.fast_map[i] = parent_charset->fast_map[i];
954
955 /* We also copy these for parents. */
956 charset.min_char = parent_charset->min_char;
957 charset.max_char = parent_charset->max_char;
958 }
959 else if (! NILP (args[charset_arg_superset]))
960 {
961 val = args[charset_arg_superset];
962 charset.method = CHARSET_METHOD_SUPERSET;
3263d5a2 963 val = Fcopy_sequence (val);
374c5cfd 964 ASET (attrs, charset_superset, val);
3263d5a2
KH
965
966 charset.min_char = MAX_CHAR;
967 charset.max_char = 0;
968 for (; ! NILP (val); val = Fcdr (val))
4ed46869 969 {
3263d5a2
KH
970 Lisp_Object elt, car_part, cdr_part;
971 int this_id, offset;
972 struct charset *this_charset;
973
974 elt = Fcar (val);
975 if (CONSP (elt))
976 {
977 car_part = XCAR (elt);
978 cdr_part = XCDR (elt);
979 CHECK_CHARSET_GET_ID (car_part, this_id);
980 CHECK_NUMBER (cdr_part);
981 offset = XINT (cdr_part);
982 }
983 else
4ed46869 984 {
3263d5a2
KH
985 CHECK_CHARSET_GET_ID (elt, this_id);
986 offset = 0;
4ed46869 987 }
3263d5a2
KH
988 XSETCAR (val, Fcons (make_number (this_id), make_number (offset)));
989
990 this_charset = CHARSET_FROM_ID (this_id);
991 if (charset.min_char > this_charset->min_char)
992 charset.min_char = this_charset->min_char;
993 if (charset.max_char < this_charset->max_char)
994 charset.max_char = this_charset->max_char;
995 for (i = 0; i < 190; i++)
996 charset.fast_map[i] |= this_charset->fast_map[i];
4ed46869 997 }
4ed46869 998 }
3263d5a2
KH
999 else
1000 error ("None of :code-offset, :map, :parents are specified");
4ed46869 1001
3263d5a2
KH
1002 val = args[charset_arg_unify_map];
1003 if (! NILP (val) && !STRINGP (val))
1004 CHECK_VECTOR (val);
1005 ASET (attrs, charset_unify_map, val);
4ed46869 1006
3263d5a2
KH
1007 CHECK_LIST (args[charset_arg_plist]);
1008 ASET (attrs, charset_plist, args[charset_arg_plist]);
4ed46869 1009
3263d5a2
KH
1010 charset.hash_index = hash_lookup (hash_table, args[charset_arg_name],
1011 &hash_code);
1012 if (charset.hash_index >= 0)
1013 {
1014 new_definition_p = 0;
4f65af01 1015 id = XFASTINT (CHARSET_SYMBOL_ID (args[charset_arg_name]));
3263d5a2
KH
1016 HASH_VALUE (hash_table, charset.hash_index) = attrs;
1017 }
1a45ff10 1018 else
3263d5a2
KH
1019 {
1020 charset.hash_index = hash_put (hash_table, args[charset_arg_name], attrs,
1021 hash_code);
1022 if (charset_table_used == charset_table_size)
1023 {
1024 charset_table_size += 256;
1025 charset_table
1026 = ((struct charset *)
1027 xrealloc (charset_table,
1028 sizeof (struct charset) * charset_table_size));
1029 }
1030 id = charset_table_used++;
3263d5a2
KH
1031 new_definition_p = 1;
1032 }
4ed46869 1033
4f65af01 1034 ASET (attrs, charset_id, make_number (id));
3263d5a2
KH
1035 charset.id = id;
1036 charset_table[id] = charset;
1037
1038 if (charset.iso_final >= 0)
4ed46869 1039 {
3263d5a2
KH
1040 ISO_CHARSET_TABLE (charset.dimension, charset.iso_chars_96,
1041 charset.iso_final) = id;
1042 if (new_definition_p)
1043 Viso_2022_charset_list = nconc2 (Viso_2022_charset_list,
1044 Fcons (make_number (id), Qnil));
7c7dceee
KH
1045 if (ISO_CHARSET_TABLE (1, 0, 'J') == id)
1046 charset_jisx0201_roman = id;
1047 else if (ISO_CHARSET_TABLE (2, 0, '@') == id)
1048 charset_jisx0208_1978 = id;
1049 else if (ISO_CHARSET_TABLE (2, 0, 'B') == id)
1050 charset_jisx0208 = id;
4ed46869 1051 }
3263d5a2
KH
1052
1053 if (charset.emacs_mule_id >= 0)
4ed46869 1054 {
3263d5a2 1055 emacs_mule_charset[charset.emacs_mule_id] = CHARSET_FROM_ID (id);
4f65af01
KH
1056 if (charset.emacs_mule_id < 0xA0)
1057 emacs_mule_bytes[charset.emacs_mule_id] = charset.dimension + 1;
3263d5a2
KH
1058 if (new_definition_p)
1059 Vemacs_mule_charset_list = nconc2 (Vemacs_mule_charset_list,
1060 Fcons (make_number (id), Qnil));
4ed46869
KH
1061 }
1062
3263d5a2
KH
1063 if (new_definition_p)
1064 {
1065 Vcharset_list = Fcons (args[charset_arg_name], Vcharset_list);
1066 Vcharset_ordered_list = nconc2 (Vcharset_ordered_list,
1067 Fcons (make_number (id), Qnil));
dbbb237d 1068 charset_ordered_list_tick++;
3263d5a2 1069 }
4ed46869 1070
3263d5a2 1071 return Qnil;
4ed46869
KH
1072}
1073
3263d5a2
KH
1074DEFUN ("define-charset-alias", Fdefine_charset_alias,
1075 Sdefine_charset_alias, 2, 2, 0,
1076 doc: /* Define ALIAS as an alias for charset CHARSET. */)
1077 (alias, charset)
1078 Lisp_Object alias, charset;
4ed46869 1079{
3263d5a2
KH
1080 Lisp_Object attr;
1081
1082 CHECK_CHARSET_GET_ATTR (charset, attr);
1083 Fputhash (alias, attr, Vcharset_hash_table);
528623a0 1084 Vcharset_list = Fcons (alias, Vcharset_list);
3263d5a2
KH
1085 return Qnil;
1086}
4ed46869 1087
4ed46869 1088
3263d5a2 1089DEFUN ("primary-charset", Fprimary_charset, Sprimary_charset, 0, 0, 0,
56a46d1d 1090 doc: /* Return the primary charset (set by `set-primary-charset'). */)
3263d5a2
KH
1091 ()
1092{
1093 return CHARSET_NAME (CHARSET_FROM_ID (charset_primary));
1094}
4ed46869 1095
4ed46869 1096
3263d5a2
KH
1097DEFUN ("set-primary-charset", Fset_primary_charset, Sset_primary_charset,
1098 1, 1, 0,
56a46d1d
DL
1099 doc: /* Set the primary charset to CHARSET.
1100This determines how unibyte/multibyte conversion is done. See also
1101function `primary-charset'. */)
3263d5a2
KH
1102 (charset)
1103 Lisp_Object charset;
1104{
1105 int id;
1106
1107 CHECK_CHARSET_GET_ID (charset, id);
1108 charset_primary = id;
4ed46869
KH
1109 return Qnil;
1110}
1111
3263d5a2
KH
1112
1113DEFUN ("charset-plist", Fcharset_plist, Scharset_plist, 1, 1, 0,
56a46d1d 1114 doc: /* Return the property list of CHARSET. */)
3263d5a2
KH
1115 (charset)
1116 Lisp_Object charset;
1117{
1118 Lisp_Object attrs;
1119
1120 CHECK_CHARSET_GET_ATTR (charset, attrs);
1121 return CHARSET_ATTR_PLIST (attrs);
1122}
1123
1124
1125DEFUN ("set-charset-plist", Fset_charset_plist, Sset_charset_plist, 2, 2, 0,
1126 doc: /* Set CHARSET's property list to PLIST. */)
1127 (charset, plist)
1128 Lisp_Object charset, plist;
1129{
1130 Lisp_Object attrs;
1131
1132 CHECK_CHARSET_GET_ATTR (charset, attrs);
1133 CHARSET_ATTR_PLIST (attrs) = plist;
1134 return plist;
1135}
1136
1137
dbbb237d 1138DEFUN ("unify-charset", Funify_charset, Sunify_charset, 1, 3, 0,
56a46d1d
DL
1139 doc: /* Unify characters of CHARSET with Unicode.
1140This means reading the relevant file and installing the table defined
dbbb237d
KH
1141by CHARSET's `:unify-map' property.
1142
1143Optional second arg UNIFY-MAP a file name string or vector that has
1144the same meaning of the `:unify-map' attribute of the function
1145`define-charset' (which see).
1146
1147Optional third argument DEUNIFY, if non-nil, means to de-unify CHARSET. */)
1148 (charset, unify_map, deunify)
1149 Lisp_Object charset, unify_map, deunify;
8a73a704 1150{
3263d5a2
KH
1151 int id;
1152 struct charset *cs;
1153
1154 CHECK_CHARSET_GET_ID (charset, id);
1155 cs = CHARSET_FROM_ID (id);
1156 if (CHARSET_METHOD (cs) == CHARSET_METHOD_MAP_DEFERRED)
1157 load_charset (cs);
dbbb237d
KH
1158 if (NILP (deunify)
1159 ? CHARSET_UNIFIED_P (cs) && ! NILP (CHARSET_DEUNIFIER (cs))
1160 : ! CHARSET_UNIFIED_P (cs))
3263d5a2 1161 return Qnil;
dbbb237d 1162
3263d5a2 1163 CHARSET_UNIFIED_P (cs) = 0;
dbbb237d
KH
1164 if (NILP (deunify))
1165 {
1166 if (CHARSET_METHOD (cs) != CHARSET_METHOD_OFFSET)
1167 error ("Can't unify charset: %s", XSYMBOL (charset)->name->data);
1168 if (NILP (unify_map))
1169 unify_map = CHARSET_UNIFY_MAP (cs);
1170 if (STRINGP (unify_map))
1171 load_charset_map_from_file (cs, unify_map, 2);
1172 else if (VECTORP (unify_map))
1173 load_charset_map_from_vector (cs, unify_map, 2);
1174 else if (NILP (unify_map))
1175 error ("No unify-map for charset");
1176 else
1177 error ("Bad unify-map arg");
1178 CHARSET_UNIFIED_P (cs) = 1;
1179 }
1180 else if (CHAR_TABLE_P (Vchar_unify_table))
1181 {
1182 int min_code = CHARSET_MIN_CODE (cs);
1183 int max_code = CHARSET_MAX_CODE (cs);
1184 int min_char = DECODE_CHAR (cs, min_code);
1185 int max_char = DECODE_CHAR (cs, max_code);
1186
1187 char_table_set_range (Vchar_unify_table, min_char, max_char, Qnil);
1188 }
1189
3263d5a2 1190 return Qnil;
8a73a704
KH
1191}
1192
3fac5a51
KH
1193DEFUN ("get-unused-iso-final-char", Fget_unused_iso_final_char,
1194 Sget_unused_iso_final_char, 2, 2, 0,
3263d5a2 1195 doc: /*
1721b6af 1196Return an unsed ISO final char for a charset of DIMENISION and CHARS.
fdb82f93
PJ
1197DIMENSION is the number of bytes to represent a character: 1 or 2.
1198CHARS is the number of characters in a dimension: 94 or 96.
1199
1200This final char is for private use, thus the range is `0' (48) .. `?' (63).
1721b6af 1201If there's no unused final char for the specified kind of charset,
fdb82f93
PJ
1202return nil. */)
1203 (dimension, chars)
3fac5a51
KH
1204 Lisp_Object dimension, chars;
1205{
1206 int final_char;
1207
b7826503
PJ
1208 CHECK_NUMBER (dimension);
1209 CHECK_NUMBER (chars);
3263d5a2
KH
1210 if (XINT (dimension) != 1 && XINT (dimension) != 2 && XINT (dimension) != 3)
1211 args_out_of_range_3 (dimension, make_number (1), make_number (3));
3fac5a51 1212 if (XINT (chars) != 94 && XINT (chars) != 96)
3263d5a2 1213 args_out_of_range_3 (chars, make_number (94), make_number (96));
3fac5a51 1214 for (final_char = '0'; final_char <= '?'; final_char++)
3263d5a2
KH
1215 if (ISO_CHARSET_TABLE (XINT (dimension), XINT (chars), final_char) < 0)
1216 break;
3fac5a51
KH
1217 return (final_char <= '?' ? make_number (final_char) : Qnil);
1218}
1219
3263d5a2
KH
1220static void
1221check_iso_charset_parameter (dimension, chars, final_char)
1222 Lisp_Object dimension, chars, final_char;
4ed46869 1223{
3263d5a2
KH
1224 CHECK_NATNUM (dimension);
1225 CHECK_NATNUM (chars);
1226 CHECK_NATNUM (final_char);
4ed46869 1227
3263d5a2
KH
1228 if (XINT (dimension) > 3)
1229 error ("Invalid DIMENSION %d, it should be 1, 2, or 3", XINT (dimension));
4ed46869
KH
1230 if (XINT (chars) != 94 && XINT (chars) != 96)
1231 error ("Invalid CHARS %d, it should be 94 or 96", XINT (chars));
3263d5a2 1232 if (XINT (final_char) < '0' || XINT (final_char) > '~')
4ed46869 1233 error ("Invalid FINAL-CHAR %c, it should be `0'..`~'", XINT (chars));
3263d5a2
KH
1234}
1235
1236
1237DEFUN ("declare-equiv-charset", Fdeclare_equiv_charset, Sdeclare_equiv_charset,
1238 4, 4, 0,
1239 doc: /*
1240Declare a charset of DIMENSION, CHARS, FINAL-CHAR is the same as CHARSET.
56a46d1d 1241CHARSET should be defined by `define-charset' in advance. */)
3263d5a2
KH
1242 (dimension, chars, final_char, charset)
1243 Lisp_Object dimension, chars, final_char, charset;
1244{
1245 int id;
4ed46869 1246
3263d5a2
KH
1247 CHECK_CHARSET_GET_ID (charset, id);
1248 check_iso_charset_parameter (dimension, chars, final_char);
1249
16fed1fc 1250 ISO_CHARSET_TABLE (XINT (dimension), XINT (chars), XINT (final_char)) = id;
4ed46869
KH
1251 return Qnil;
1252}
1253
3263d5a2 1254
2e344af3
KH
1255/* Return information about charsets in the text at PTR of NBYTES
1256 bytes, which are NCHARS characters. The value is:
f6302ac9 1257
cfe34140 1258 0: Each character is represented by one byte. This is always
3263d5a2
KH
1259 true for a unibyte string. For a multibyte string, true if
1260 it contains only ASCII characters.
1261
28c026cd
DL
1262 1: No charsets other than ascii, control-1, and latin-1 are
1263 found.
1d67c29b 1264
3263d5a2
KH
1265 2: Otherwise.
1266*/
4ed46869
KH
1267
1268int
3263d5a2
KH
1269string_xstring_p (string)
1270 Lisp_Object string;
4ed46869 1271{
dbbb237d
KH
1272 const unsigned char *p = XSTRING (string)->data;
1273 const unsigned char *endp = p + STRING_BYTES (XSTRING (string));
3263d5a2
KH
1274 struct charset *charset;
1275
1276 if (XSTRING (string)->size == STRING_BYTES (XSTRING (string)))
1277 return 0;
1278
1279 charset = CHARSET_FROM_ID (charset_iso_8859_1);
1280 while (p < endp)
0282eb69 1281 {
3263d5a2 1282 int c = STRING_CHAR_ADVANCE (p);
2e344af3 1283
3263d5a2
KH
1284 if (ENCODE_CHAR (charset, c) < 0)
1285 return 2;
0282eb69 1286 }
3263d5a2
KH
1287 return 1;
1288}
05505664 1289
05505664 1290
3263d5a2 1291/* Find charsets in the string at PTR of NCHARS and NBYTES.
4ed46869 1292
3263d5a2
KH
1293 CHARSETS is a vector. Each element is a cons of CHARSET and
1294 FOUND-FLAG. CHARSET is a charset id, and FOUND-FLAG is nil or t.
1295 FOUND-FLAG t (or nil) means that the corresponding charset is
1296 already found (or not yet found).
2e344af3 1297
3263d5a2 1298 It may lookup a translation table TABLE if supplied. */
2e344af3 1299
3263d5a2
KH
1300static void
1301find_charsets_in_text (ptr, nchars, nbytes, charsets, table)
dbbb237d 1302 const unsigned char *ptr;
3263d5a2
KH
1303 int nchars, nbytes;
1304 Lisp_Object charsets, table;
1305{
dbbb237d 1306 const unsigned char *pend = ptr + nbytes;
3263d5a2
KH
1307 int ncharsets = ASIZE (charsets);
1308
1309 if (nchars == nbytes)
1310 return;
1311
1312 while (ptr < pend)
1313 {
1314 int c = STRING_CHAR_ADVANCE (ptr);
1315 int i;
1316 int all_found = 1;
1317 Lisp_Object elt;
1318
1319 if (!NILP (table))
1320 c = translate_char (table, c);
1321 for (i = 0; i < ncharsets; i++)
1322 {
1323 elt = AREF (charsets, i);
1324 if (NILP (XCDR (elt)))
1325 {
1326 struct charset *charset = CHARSET_FROM_ID (XINT (XCAR (elt)));
1327
1328 if (ENCODE_CHAR (charset, c) != CHARSET_INVALID_CODE (charset))
1329 XCDR (elt) = Qt;
1330 else
1331 all_found = 0;
1332 }
4ed46869 1333 }
3263d5a2
KH
1334 if (all_found)
1335 break;
4ed46869 1336 }
4ed46869
KH
1337}
1338
28c026cd 1339/* Fixme: returns nil for unibyte. */
4ed46869 1340DEFUN ("find-charset-region", Ffind_charset_region, Sfind_charset_region,
23d2a7f1 1341 2, 3, 0,
fdb82f93
PJ
1342 doc: /* Return a list of charsets in the region between BEG and END.
1343BEG and END are buffer positions.
1344Optional arg TABLE if non-nil is a translation table to look up.
1345
fdb82f93
PJ
1346If the current buffer is unibyte, the returned list may contain
1347only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
1348 (beg, end, table)
23d2a7f1 1349 Lisp_Object beg, end, table;
4ed46869 1350{
3263d5a2 1351 Lisp_Object charsets;
6ae1f27e 1352 int from, from_byte, to, stop, stop_byte, i;
4ed46869
KH
1353 Lisp_Object val;
1354
1355 validate_region (&beg, &end);
1356 from = XFASTINT (beg);
1357 stop = to = XFASTINT (end);
6ae1f27e 1358
4ed46869 1359 if (from < GPT && GPT < to)
6ae1f27e
RS
1360 {
1361 stop = GPT;
1362 stop_byte = GPT_BYTE;
1363 }
1364 else
1365 stop_byte = CHAR_TO_BYTE (stop);
1366
1367 from_byte = CHAR_TO_BYTE (from);
1368
3263d5a2
KH
1369 charsets = Fmake_vector (make_number (charset_table_used), Qnil);
1370 for (i = 0; i < charset_table_used; i++)
1371 ASET (charsets, i, Fcons (make_number (i), Qnil));
1372
4ed46869
KH
1373 while (1)
1374 {
3263d5a2
KH
1375 find_charsets_in_text (BYTE_POS_ADDR (from_byte), stop - from,
1376 stop_byte - from_byte, charsets, table);
4ed46869 1377 if (stop < to)
6ae1f27e
RS
1378 {
1379 from = stop, from_byte = stop_byte;
1380 stop = to, stop_byte = CHAR_TO_BYTE (stop);
1381 }
4ed46869
KH
1382 else
1383 break;
1384 }
6ae1f27e 1385
4ed46869 1386 val = Qnil;
3263d5a2
KH
1387 for (i = charset_table_used - 1; i >= 0; i--)
1388 if (!NILP (XCDR (AREF (charsets, i))))
1389 val = Fcons (CHARSET_NAME (charset_table + i), val);
4ed46869
KH
1390 return val;
1391}
1392
28c026cd 1393/* Fixme: returns nil for unibyte. */
4ed46869 1394DEFUN ("find-charset-string", Ffind_charset_string, Sfind_charset_string,
23d2a7f1 1395 1, 2, 0,
fdb82f93
PJ
1396 doc: /* Return a list of charsets in STR.
1397Optional arg TABLE if non-nil is a translation table to look up.
1398
fdb82f93 1399If STR is unibyte, the returned list may contain
3263d5a2 1400only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
fdb82f93 1401 (str, table)
23d2a7f1 1402 Lisp_Object str, table;
4ed46869 1403{
3263d5a2 1404 Lisp_Object charsets;
4ed46869
KH
1405 int i;
1406 Lisp_Object val;
1407
b7826503 1408 CHECK_STRING (str);
87b089ad 1409
3263d5a2 1410 charsets = Fmake_vector (make_number (charset_table_used), Qnil);
dcd50550
KH
1411 for (i = 0; i < charset_table_used; i++)
1412 ASET (charsets, i, Fcons (make_number (i), Qnil));
3263d5a2
KH
1413 find_charsets_in_text (XSTRING (str)->data, XSTRING (str)->size,
1414 STRING_BYTES (XSTRING (str)), charsets, table);
2e344af3 1415
4ed46869 1416 val = Qnil;
3263d5a2
KH
1417 for (i = charset_table_used - 1; i >= 0; i--)
1418 if (!NILP (XCDR (AREF (charsets, i))))
1419 val = Fcons (CHARSET_NAME (charset_table + i), val);
4ed46869
KH
1420 return val;
1421}
2e344af3 1422
4ed46869 1423\f
3263d5a2
KH
1424
1425/* Return a character correponding to the code-point CODE of
1426 CHARSET. */
1427
1428int
1429decode_char (charset, code)
1430 struct charset *charset;
1431 unsigned code;
4ed46869 1432{
3263d5a2
KH
1433 int c, char_index;
1434 enum charset_method method = CHARSET_METHOD (charset);
ac4137cc 1435
3263d5a2
KH
1436 if (code < CHARSET_MIN_CODE (charset) || code > CHARSET_MAX_CODE (charset))
1437 return -1;
4ed46869 1438
3263d5a2 1439 if (method == CHARSET_METHOD_MAP_DEFERRED)
ac4137cc 1440 {
3263d5a2
KH
1441 load_charset (charset);
1442 method = CHARSET_METHOD (charset);
ac4137cc 1443 }
4ed46869 1444
374c5cfd
KH
1445 if (method == CHARSET_METHOD_SUBSET)
1446 {
1447 Lisp_Object subset_info;
1448
1449 subset_info = CHARSET_SUBSET (charset);
1450 charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
1451 code -= XINT (AREF (subset_info, 3));
1452 if (code < XFASTINT (AREF (subset_info, 1))
1453 || code > XFASTINT (AREF (subset_info, 2)))
1454 c = -1;
1455 else
1456 c = DECODE_CHAR (charset, code);
1457 }
1458 else if (method == CHARSET_METHOD_SUPERSET)
2e344af3 1459 {
3263d5a2 1460 Lisp_Object parents;
4ed46869 1461
374c5cfd 1462 parents = CHARSET_SUPERSET (charset);
3263d5a2
KH
1463 c = -1;
1464 for (; CONSP (parents); parents = XCDR (parents))
1465 {
1466 int id = XINT (XCAR (XCAR (parents)));
1467 int code_offset = XINT (XCDR (XCAR (parents)));
374c5cfd 1468 unsigned this_code = code - code_offset;
9d3d8cba 1469
3263d5a2
KH
1470 charset = CHARSET_FROM_ID (id);
1471 if ((c = DECODE_CHAR (charset, this_code)) >= 0)
1472 break;
1473 }
1474 }
1475 else
ac4137cc 1476 {
3263d5a2 1477 char_index = CODE_POINT_TO_INDEX (charset, code);
69f8de5b
KH
1478 if (char_index < 0)
1479 return -1;
3263d5a2
KH
1480
1481 if (method == CHARSET_METHOD_MAP)
ac4137cc 1482 {
3263d5a2
KH
1483 Lisp_Object decoder;
1484
1485 decoder = CHARSET_DECODER (charset);
1486 if (! VECTORP (decoder))
1487 return -1;
1488 c = XINT (AREF (decoder, char_index));
ac4137cc
KH
1489 }
1490 else
1491 {
3263d5a2 1492 c = char_index + CHARSET_CODE_OFFSET (charset);
ac4137cc
KH
1493 }
1494 }
9d3d8cba 1495
3263d5a2
KH
1496 if (CHARSET_UNIFIED_P (charset)
1497 && c >= 0)
c449997d
KH
1498 {
1499 MAYBE_UNIFY_CHAR (c);
1500 }
d2665018 1501
3263d5a2 1502 return c;
d2665018
KH
1503}
1504
374c5cfd
KH
1505/* Variable used temporarily by the macro ENCODE_CHAR. */
1506Lisp_Object charset_work;
1bcc1567 1507
3263d5a2 1508/* Return a code-point of CHAR in CHARSET. If CHAR doesn't belong to
28c026cd
DL
1509 CHARSET, return CHARSET_INVALID_CODE (CHARSET). If STRICT is true,
1510 use CHARSET's strict_max_char instead of max_char. */
1bcc1567 1511
3263d5a2
KH
1512unsigned
1513encode_char (charset, c)
1514 struct charset *charset;
9b6a601f
KH
1515 int c;
1516{
3263d5a2
KH
1517 unsigned code;
1518 enum charset_method method = CHARSET_METHOD (charset);
8ac5a9cc 1519
3263d5a2 1520 if (CHARSET_UNIFIED_P (charset))
4ed46869 1521 {
374c5cfd 1522 Lisp_Object deunifier, deunified;
4ed46869 1523
3263d5a2
KH
1524 deunifier = CHARSET_DEUNIFIER (charset);
1525 if (! CHAR_TABLE_P (deunifier))
1526 {
dbbb237d 1527 Funify_charset (CHARSET_NAME (charset), Qnil, Qnil);
3263d5a2
KH
1528 deunifier = CHARSET_DEUNIFIER (charset);
1529 }
374c5cfd
KH
1530 deunified = CHAR_TABLE_REF (deunifier, c);
1531 if (! NILP (deunified))
1532 c = XINT (deunified);
4ed46869 1533 }
beeedaad 1534
3263d5a2
KH
1535 if (! CHARSET_FAST_MAP_REF ((c), charset->fast_map)
1536 || c < CHARSET_MIN_CHAR (charset) || c > CHARSET_MAX_CHAR (charset))
1537 return CHARSET_INVALID_CODE (charset);
beeedaad 1538
374c5cfd
KH
1539 if (method == CHARSET_METHOD_SUBSET)
1540 {
1541 Lisp_Object subset_info;
1542 struct charset *this_charset;
1543
1544 subset_info = CHARSET_SUBSET (charset);
1545 this_charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
1546 code = ENCODE_CHAR (this_charset, c);
1547 if (code == CHARSET_INVALID_CODE (this_charset)
1548 || code < XFASTINT (AREF (subset_info, 1))
1549 || code > XFASTINT (AREF (subset_info, 2)))
1550 return CHARSET_INVALID_CODE (charset);
1551 code += XINT (AREF (subset_info, 3));
1552 return code;
1553 }
1554
1555 if (method == CHARSET_METHOD_SUPERSET)
859f2b3c 1556 {
3263d5a2 1557 Lisp_Object parents;
859f2b3c 1558
374c5cfd 1559 parents = CHARSET_SUPERSET (charset);
3263d5a2 1560 for (; CONSP (parents); parents = XCDR (parents))
beeedaad 1561 {
3263d5a2
KH
1562 int id = XINT (XCAR (XCAR (parents)));
1563 int code_offset = XINT (XCDR (XCAR (parents)));
1564 struct charset *this_charset = CHARSET_FROM_ID (id);
beeedaad 1565
3263d5a2 1566 code = ENCODE_CHAR (this_charset, c);
dbbb237d
KH
1567 if (code != CHARSET_INVALID_CODE (this_charset))
1568 return code + code_offset;
beeedaad 1569 }
3263d5a2
KH
1570 return CHARSET_INVALID_CODE (charset);
1571 }
99529c2c 1572
3263d5a2 1573 if (method == CHARSET_METHOD_MAP_DEFERRED)
beeedaad 1574 {
3263d5a2
KH
1575 load_charset (charset);
1576 method = CHARSET_METHOD (charset);
859f2b3c 1577 }
beeedaad 1578
3263d5a2 1579 if (method == CHARSET_METHOD_MAP)
3f62427c 1580 {
3263d5a2 1581 Lisp_Object encoder;
beeedaad 1582 Lisp_Object val;
beeedaad 1583
3263d5a2
KH
1584 encoder = CHARSET_ENCODER (charset);
1585 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset)))
1586 return CHARSET_INVALID_CODE (charset);
1587 val = CHAR_TABLE_REF (encoder, c);
374c5cfd
KH
1588 if (NILP (val))
1589 return CHARSET_INVALID_CODE (charset);
e9ce014c
KH
1590 code = XINT (val);
1591 if (! CHARSET_COMPACT_CODES_P (charset))
1592 code = INDEX_TO_CODE_POINT (charset, code);
3263d5a2 1593 }
820ee249 1594 else /* method == CHARSET_METHOD_OFFSET */
beeedaad 1595 {
3263d5a2
KH
1596 code = c - CHARSET_CODE_OFFSET (charset);
1597 code = INDEX_TO_CODE_POINT (charset, code);
3f62427c 1598 }
beeedaad 1599
3263d5a2 1600 return code;
3f62427c
KH
1601}
1602
4ed46869 1603
3263d5a2
KH
1604DEFUN ("decode-char", Fdecode_char, Sdecode_char, 2, 3, 0,
1605 doc: /* Decode the pair of CHARSET and CODE-POINT into a character.
1606Return nil if CODE-POINT is not valid in CHARSET.
4ed46869 1607
3263d5a2
KH
1608CODE-POINT may be a cons (HIGHER-16-BIT-VALUE . LOWER-16-BIT-VALUE).
1609
1610Optional argument RESTRICTION specifies a way to map the pair of CCS
1611and CODE-POINT to a chracter. Currently not supported and just ignored. */)
1612 (charset, code_point, restriction)
1613 Lisp_Object charset, code_point, restriction;
4ed46869 1614{
3263d5a2
KH
1615 int c, id;
1616 unsigned code;
1617 struct charset *charsetp;
4ed46869 1618
3263d5a2
KH
1619 CHECK_CHARSET_GET_ID (charset, id);
1620 if (CONSP (code_point))
1621 {
1622 CHECK_NATNUM (XCAR (code_point));
1623 CHECK_NATNUM (XCDR (code_point));
69f8de5b 1624 code = (XINT (XCAR (code_point)) << 16) | (XINT (XCDR (code_point)));
3263d5a2
KH
1625 }
1626 else
1627 {
1628 CHECK_NATNUM (code_point);
1629 code = XINT (code_point);
1630 }
1631 charsetp = CHARSET_FROM_ID (id);
1632 c = DECODE_CHAR (charsetp, code);
1633 return (c >= 0 ? make_number (c) : Qnil);
4ed46869
KH
1634}
1635
046b1f03 1636
3263d5a2
KH
1637DEFUN ("encode-char", Fencode_char, Sencode_char, 2, 3, 0,
1638 doc: /* Encode the character CH into a code-point of CHARSET.
1639Return nil if CHARSET doesn't include CH.
17e7ef1b 1640
3263d5a2
KH
1641Optional argument RESTRICTION specifies a way to map CHAR to a
1642code-point in CCS. Currently not supported and just ignored. */)
1643 (ch, charset, restriction)
1644 Lisp_Object ch, charset, restriction;
1645{
16fed1fc 1646 int id;
3263d5a2
KH
1647 unsigned code;
1648 struct charset *charsetp;
046b1f03 1649
3263d5a2
KH
1650 CHECK_CHARSET_GET_ID (charset, id);
1651 CHECK_NATNUM (ch);
3263d5a2 1652 charsetp = CHARSET_FROM_ID (id);
16fed1fc 1653 code = ENCODE_CHAR (charsetp, XINT (ch));
3263d5a2
KH
1654 if (code == CHARSET_INVALID_CODE (charsetp))
1655 return Qnil;
1656 if (code > 0x7FFFFFF)
1657 return Fcons (make_number (code >> 16), make_number (code & 0xFFFF));
1658 return make_number (code);
6ae1f27e 1659}
9036eb45 1660
87b089ad 1661
b121a744
KH
1662DEFUN ("make-char", Fmake_char, Smake_char, 1, 5, 0,
1663 doc:
1664 /* Return a character of CHARSET whose position codes are CODEn.
1665
1666CODE1 through CODE4 are optional, but if you don't supply sufficient
1667position codes, it is assumed that the minimum code in each dimension
04c2f2c5 1668is specified. */)
b121a744
KH
1669 (charset, code1, code2, code3, code4)
1670 Lisp_Object charset, code1, code2, code3, code4;
87b089ad 1671{
3263d5a2
KH
1672 int id, dimension;
1673 struct charset *charsetp;
b121a744
KH
1674 unsigned code;
1675 int c;
87b089ad 1676
3263d5a2
KH
1677 CHECK_CHARSET_GET_ID (charset, id);
1678 charsetp = CHARSET_FROM_ID (id);
87b089ad 1679
b121a744
KH
1680 dimension = CHARSET_DIMENSION (charsetp);
1681 if (NILP (code1))
d47073ca
KH
1682 code = (CHARSET_ASCII_COMPATIBLE_P (charsetp)
1683 ? 0 : CHARSET_MIN_CODE (charsetp));
3263d5a2 1684 else
87b089ad 1685 {
b121a744
KH
1686 CHECK_NATNUM (code1);
1687 if (XFASTINT (code1) >= 0x100)
1688 args_out_of_range (make_number (0xFF), code1);
1689 code = XFASTINT (code1);
2e344af3 1690
b0a1e45e 1691 if (dimension > 1)
b121a744
KH
1692 {
1693 code <<= 8;
b0a1e45e
KH
1694 if (NILP (code2))
1695 code |= charsetp->code_space[(dimension - 2) * 4];
b121a744
KH
1696 else
1697 {
b0a1e45e
KH
1698 CHECK_NATNUM (code2);
1699 if (XFASTINT (code2) >= 0x100)
1700 args_out_of_range (make_number (0xFF), code2);
1701 code |= XFASTINT (code2);
b121a744
KH
1702 }
1703
b0a1e45e 1704 if (dimension > 2)
b121a744
KH
1705 {
1706 code <<= 8;
b0a1e45e
KH
1707 if (NILP (code3))
1708 code |= charsetp->code_space[(dimension - 3) * 4];
b121a744
KH
1709 else
1710 {
b0a1e45e
KH
1711 CHECK_NATNUM (code3);
1712 if (XFASTINT (code3) >= 0x100)
1713 args_out_of_range (make_number (0xFF), code3);
1714 code |= XFASTINT (code3);
1715 }
1716
1717 if (dimension > 3)
1718 {
1719 code <<= 8;
1720 if (NILP (code4))
1721 code |= charsetp->code_space[0];
1722 else
1723 {
1724 CHECK_NATNUM (code4);
1725 if (XFASTINT (code4) >= 0x100)
1726 args_out_of_range (make_number (0xFF), code4);
1727 code |= XFASTINT (code4);
1728 }
b121a744
KH
1729 }
1730 }
1731 }
1732 }
3263d5a2 1733
b121a744
KH
1734 if (CHARSET_ISO_FINAL (charsetp) >= 0)
1735 code &= 0x7F7F7F7F;
1736 c = DECODE_CHAR (charsetp, code);
1737 if (c < 0)
1738 error ("Invalid code(s)");
3263d5a2 1739 return make_number (c);
2e344af3
KH
1740}
1741
3263d5a2
KH
1742
1743/* Return the first charset in CHARSET_LIST that contains C.
1744 CHARSET_LIST is a list of charset IDs. If it is nil, use
1745 Vcharset_ordered_list. */
1746
1747struct charset *
1748char_charset (c, charset_list, code_return)
1749 int c;
1750 Lisp_Object charset_list;
1751 unsigned *code_return;
2e344af3 1752{
3263d5a2
KH
1753 if (NILP (charset_list))
1754 charset_list = Vcharset_ordered_list;
2e344af3 1755
3263d5a2 1756 while (CONSP (charset_list))
2e344af3 1757 {
3263d5a2
KH
1758 struct charset *charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
1759 unsigned code = ENCODE_CHAR (charset, c);
1760
1761 if (code != CHARSET_INVALID_CODE (charset))
1762 {
1763 if (code_return)
1764 *code_return = code;
1765 return charset;
1766 }
1767 charset_list = XCDR (charset_list);
2e344af3 1768 }
3263d5a2 1769 return NULL;
2e344af3
KH
1770}
1771
2e344af3 1772
56a46d1d 1773/* Fixme: `unknown' can't happen now? */
3263d5a2 1774DEFUN ("split-char", Fsplit_char, Ssplit_char, 1, 1, 0,
04c2f2c5 1775 doc: /*Return list of charset and one to three position-codes of CHAR.
16fed1fc 1776If CHAR is invalid as a character code, return a list `(unknown CHAR)'. */)
3263d5a2
KH
1777 (ch)
1778 Lisp_Object ch;
2e344af3 1779{
3263d5a2
KH
1780 struct charset *charset;
1781 int c, dimension;
1782 unsigned code;
1783 Lisp_Object val;
1784
1785 CHECK_CHARACTER (ch);
1786 c = XFASTINT (ch);
1787 charset = CHAR_CHARSET (c);
1788 if (! charset)
1789 return Fcons (intern ("unknown"), Fcons (ch, Qnil));
1790
1791 code = ENCODE_CHAR (charset, c);
1792 if (code == CHARSET_INVALID_CODE (charset))
1793 abort ();
1794 dimension = CHARSET_DIMENSION (charset);
1795 val = (dimension == 1 ? Fcons (make_number (code), Qnil)
1796 : dimension == 2 ? Fcons (make_number (code >> 8),
1797 Fcons (make_number (code & 0xFF), Qnil))
1798 : Fcons (make_number (code >> 16),
1799 Fcons (make_number ((code >> 8) & 0xFF),
1800 Fcons (make_number (code & 0xFF), Qnil))));
1801 return Fcons (CHARSET_NAME (charset), val);
2e344af3 1802}
87b089ad 1803
740f080d 1804
3263d5a2
KH
1805DEFUN ("char-charset", Fchar_charset, Schar_charset, 1, 1, 0,
1806 doc: /* Return the charset of highest priority that contains CHAR. */)
1807 (ch)
1808 Lisp_Object ch;
740f080d 1809{
3263d5a2 1810 struct charset *charset;
740f080d 1811
3263d5a2
KH
1812 CHECK_CHARACTER (ch);
1813 charset = CHAR_CHARSET (XINT (ch));
1814 return (CHARSET_NAME (charset));
740f080d
KH
1815}
1816
2e344af3 1817
3263d5a2
KH
1818DEFUN ("charset-after", Fcharset_after, Scharset_after, 0, 1, 0,
1819 doc: /*
1820Return charset of a character in the current buffer at position POS.
1821If POS is nil, it defauls to the current point.
1822If POS is out of range, the value is nil. */)
1823 (pos)
1824 Lisp_Object pos;
2e344af3 1825{
3263d5a2
KH
1826 Lisp_Object ch;
1827 struct charset *charset;
1828
1829 ch = Fchar_after (pos);
1830 if (! INTEGERP (ch))
1831 return ch;
1832 charset = CHAR_CHARSET (XINT (ch));
1833 return (CHARSET_NAME (charset));
87b089ad
RS
1834}
1835
2e344af3 1836
3263d5a2
KH
1837DEFUN ("iso-charset", Fiso_charset, Siso_charset, 3, 3, 0,
1838 doc: /*
1839Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR.
1840
1841ISO 2022's designation sequence (escape sequence) distinguishes charsets
1842by their DIMENSION, CHARS, and FINAL-CHAR,
1843where as Emacs distinguishes them by charset symbol.
1844See the documentation of the function `charset-info' for the meanings of
1845DIMENSION, CHARS, and FINAL-CHAR. */)
1846 (dimension, chars, final_char)
1847 Lisp_Object dimension, chars, final_char;
2e344af3 1848{
3263d5a2 1849 int id;
2e344af3 1850
3263d5a2
KH
1851 check_iso_charset_parameter (dimension, chars, final_char);
1852 id = ISO_CHARSET_TABLE (XFASTINT (dimension), XFASTINT (chars),
1853 XFASTINT (final_char));
1854 return (id >= 0 ? CHARSET_NAME (CHARSET_FROM_ID (id)) : Qnil);
2e344af3
KH
1855}
1856
3263d5a2
KH
1857
1858DEFUN ("clear-charset-maps", Fclear_charset_maps, Sclear_charset_maps,
1859 0, 0, 0,
1860 doc: /*
1861Clear encoder and decoder of charsets that are loaded from mapfiles. */)
1862 ()
4ed46869 1863{
53316e55 1864 int i;
3263d5a2
KH
1865 struct charset *charset;
1866 Lisp_Object attrs;
4ed46869 1867
3263d5a2 1868 for (i = 0; i < charset_table_used; i++)
4ed46869 1869 {
3263d5a2
KH
1870 charset = CHARSET_FROM_ID (i);
1871 attrs = CHARSET_ATTRIBUTES (charset);
1872
1873 if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP)
1874 {
1875 CHARSET_ATTR_DECODER (attrs) = Qnil;
1876 CHARSET_ATTR_ENCODER (attrs) = Qnil;
1877 CHARSET_METHOD (charset) = CHARSET_METHOD_MAP_DEFERRED;
1878 }
1879
1880 if (CHARSET_UNIFIED_P (charset))
1881 CHARSET_ATTR_DEUNIFIER (attrs) = Qnil;
5729c92f
KH
1882 }
1883
3263d5a2 1884 if (CHAR_TABLE_P (Vchar_unified_charset_table))
5729c92f 1885 {
3263d5a2
KH
1886 Foptimize_char_table (Vchar_unified_charset_table);
1887 Vchar_unify_table = Vchar_unified_charset_table;
1888 Vchar_unified_charset_table = Qnil;
4ed46869
KH
1889 }
1890
3263d5a2 1891 return Qnil;
4ed46869
KH
1892}
1893
8ddf5e57
DL
1894DEFUN ("charset-priority-list", Fcharset_priority_list,
1895 Scharset_priority_list, 0, 1, 0,
1896 doc: /* Return the list of charsets ordered by priority.
1897HIGHESTP non-nil means just return the highest priority one. */)
1898 (highestp)
1899 Lisp_Object highestp;
1900{
1901 Lisp_Object val = Qnil, list = Vcharset_ordered_list;
1902
1903 if (!NILP (highestp))
16fed1fc 1904 return CHARSET_NAME (CHARSET_FROM_ID (XINT (Fcar (list))));
8ddf5e57
DL
1905
1906 while (!NILP (list))
1907 {
16fed1fc 1908 val = Fcons (CHARSET_NAME (CHARSET_FROM_ID (XINT (XCAR (list)))), val);
8ddf5e57
DL
1909 list = XCDR (list);
1910 }
1911 return Fnreverse (val);
1912}
1913
1914DEFUN ("set-charset-priority", Fset_charset_priority, Sset_charset_priority,
1915 1, MANY, 0,
1916 doc: /* Assign higher priority to the charsets given as arguments.
1917usage: (set-charset-priority &rest charsets) */)
1918 (nargs, args)
1919 int nargs;
1920 Lisp_Object *args;
1921{
16fed1fc
DL
1922 Lisp_Object new_head = Qnil, old_list, arglist[2];
1923 int i, id;
8ddf5e57
DL
1924
1925 old_list = Fcopy_sequence (Vcharset_ordered_list);
1926 for (i = 0; i < nargs; i++)
1927 {
1928 CHECK_CHARSET_GET_ID (args[i], id);
16fed1fc
DL
1929 old_list = Fdelq (make_number (id), old_list);
1930 new_head = Fcons (make_number (id), new_head);
8ddf5e57
DL
1931 }
1932 arglist[0] = Fnreverse (new_head);
1933 arglist[1] = old_list;
1934 Vcharset_ordered_list = Fnconc (2, arglist);
dbbb237d 1935 charset_ordered_list_tick++;
8ddf5e57
DL
1936 return Qnil;
1937}
4ed46869 1938\f
3263d5a2
KH
1939void
1940init_charset ()
4ed46869 1941{
4ed46869 1942
4ed46869
KH
1943}
1944
4ed46869 1945
dfcf069d 1946void
4ed46869
KH
1947init_charset_once ()
1948{
1949 int i, j, k;
1950
3263d5a2
KH
1951 for (i = 0; i < ISO_MAX_DIMENSION; i++)
1952 for (j = 0; j < ISO_MAX_CHARS; j++)
1953 for (k = 0; k < ISO_MAX_FINAL; k++)
1954 iso_charset_table[i][j][k] = -1;
1955
1956 for (i = 0; i < 255; i++)
1957 emacs_mule_charset[i] = NULL;
4ed46869 1958
7c7dceee
KH
1959 charset_jisx0201_roman = -1;
1960 charset_jisx0208_1978 = -1;
1961 charset_jisx0208 = -1;
1962
3263d5a2
KH
1963#if 0
1964 Vchar_charset_set = Fmake_char_table (Qnil, Qnil);
1965 CHAR_TABLE_SET (Vchar_charset_set, make_number (97), Qnil);
1966
1967 DEFSYM (Qcharset_encode_table, "charset-encode-table");
4ed46869
KH
1968
1969 /* Intern this now in case it isn't already done.
1970 Setting this variable twice is harmless.
1971 But don't staticpro it here--that is done in alloc.c. */
1972 Qchar_table_extra_slots = intern ("char-table-extra-slots");
1973
3263d5a2
KH
1974 /* Now we are ready to set up this property, so we can create syntax
1975 tables. */
1976 Fput (Qcharset_encode_table, Qchar_table_extra_slots, make_number (0));
1977#endif
4ed46869
KH
1978}
1979
1980#ifdef emacs
1981
dfcf069d 1982void
4ed46869
KH
1983syms_of_charset ()
1984{
3263d5a2
KH
1985 char *p;
1986
1987 DEFSYM (Qcharsetp, "charsetp");
1988
1989 DEFSYM (Qascii, "ascii");
1990 DEFSYM (Qunicode, "unicode");
1991 DEFSYM (Qeight_bit_control, "eight-bit-control");
1992 DEFSYM (Qeight_bit_graphic, "eight-bit-graphic");
1993 DEFSYM (Qiso_8859_1, "iso-8859-1");
1994
1995 DEFSYM (Qgl, "gl");
1996 DEFSYM (Qgr, "gr");
1997
1998 p = (char *) xmalloc (30000);
1999
2000 staticpro (&Vcharset_ordered_list);
2001 Vcharset_ordered_list = Qnil;
2002
2003 staticpro (&Viso_2022_charset_list);
2004 Viso_2022_charset_list = Qnil;
2005
2006 staticpro (&Vemacs_mule_charset_list);
2007 Vemacs_mule_charset_list = Qnil;
2008
2009 staticpro (&Vcharset_hash_table);
2010 Vcharset_hash_table = Fmakehash (Qeq);
2011
2012 charset_table_size = 128;
2013 charset_table = ((struct charset *)
2014 xmalloc (sizeof (struct charset) * charset_table_size));
2015 charset_table_used = 0;
2016
2017 staticpro (&Vchar_unified_charset_table);
2018 Vchar_unified_charset_table = Fmake_char_table (Qnil, make_number (-1));
2019
2020 defsubr (&Scharsetp);
2021 defsubr (&Smap_charset_chars);
2022 defsubr (&Sdefine_charset_internal);
2023 defsubr (&Sdefine_charset_alias);
2024 defsubr (&Sprimary_charset);
2025 defsubr (&Sset_primary_charset);
2026 defsubr (&Scharset_plist);
2027 defsubr (&Sset_charset_plist);
2028 defsubr (&Sunify_charset);
3fac5a51 2029 defsubr (&Sget_unused_iso_final_char);
4ed46869
KH
2030 defsubr (&Sdeclare_equiv_charset);
2031 defsubr (&Sfind_charset_region);
2032 defsubr (&Sfind_charset_string);
3263d5a2
KH
2033 defsubr (&Sdecode_char);
2034 defsubr (&Sencode_char);
4ed46869 2035 defsubr (&Ssplit_char);
3263d5a2 2036 defsubr (&Smake_char);
4ed46869 2037 defsubr (&Schar_charset);
90d7b74e 2038 defsubr (&Scharset_after);
4ed46869 2039 defsubr (&Siso_charset);
3263d5a2 2040 defsubr (&Sclear_charset_maps);
8ddf5e57
DL
2041 defsubr (&Scharset_priority_list);
2042 defsubr (&Sset_charset_priority);
3263d5a2
KH
2043
2044 DEFVAR_LISP ("charset-map-directory", &Vcharset_map_directory,
2045 doc: /* Directory of charset map files that come with GNU Emacs.
04c2f2c5 2046The default value is sub-directory "charsets" of `data-directory'. */);
3263d5a2
KH
2047 Vcharset_map_directory = Fexpand_file_name (build_string ("charsets"),
2048 Vdata_directory);
4ed46869
KH
2049
2050 DEFVAR_LISP ("charset-list", &Vcharset_list,
528623a0 2051 doc: /* List of all charsets ever defined. */);
3263d5a2
KH
2052 Vcharset_list = Qnil;
2053
2054 /* Make the prerequisite charset `ascii' and `unicode'. */
2055 {
2056 Lisp_Object args[charset_arg_max];
2057 Lisp_Object plist[14];
2058 Lisp_Object val;
2059
2060 plist[0] = intern (":name");
3263d5a2 2061 plist[2] = intern (":dimension");
3263d5a2 2062 plist[4] = intern (":code-space");
3263d5a2 2063 plist[6] = intern (":iso-final-char");
3263d5a2 2064 plist[8] = intern (":emacs-mule-id");
3263d5a2 2065 plist[10] = intern (":ascii-compatible-p");
820ee249
KH
2066 plist[12] = intern (":code-offset");
2067
2068 args[charset_arg_name] = Qascii;
2069 args[charset_arg_dimension] = make_number (1);
2070 val = Fmake_vector (make_number (8), make_number (0));
2071 ASET (val, 1, make_number (127));
2072 args[charset_arg_code_space] = val;
2073 args[charset_arg_min_code] = Qnil;
2074 args[charset_arg_max_code] = Qnil;
2075 args[charset_arg_iso_final] = make_number ('B');
2076 args[charset_arg_iso_revision] = Qnil;
2077 args[charset_arg_emacs_mule_id] = make_number (0);
2078 args[charset_arg_ascii_compatible_p] = Qt;
3263d5a2
KH
2079 args[charset_arg_supplementary_p] = Qnil;
2080 args[charset_arg_invalid_code] = Qnil;
820ee249 2081 args[charset_arg_code_offset] = make_number (0);
3263d5a2 2082 args[charset_arg_map] = Qnil;
374c5cfd
KH
2083 args[charset_arg_subset] = Qnil;
2084 args[charset_arg_superset] = Qnil;
3263d5a2
KH
2085 args[charset_arg_unify_map] = Qnil;
2086 /* The actual plist is set by mule-conf.el. */
820ee249
KH
2087 plist[1] = args[charset_arg_name];
2088 plist[3] = args[charset_arg_dimension];
2089 plist[5] = args[charset_arg_code_space];
2090 plist[7] = args[charset_arg_iso_final];
2091 plist[9] = args[charset_arg_emacs_mule_id];
2092 plist[11] = args[charset_arg_ascii_compatible_p];
2093 plist[13] = args[charset_arg_code_offset];
3263d5a2
KH
2094 args[charset_arg_plist] = Flist (14, plist);
2095 Fdefine_charset_internal (charset_arg_max, args);
16fed1fc 2096 charset_ascii = XINT (CHARSET_SYMBOL_ID (Qascii));
3263d5a2 2097
820ee249
KH
2098 args[charset_arg_name] = Qunicode;
2099 args[charset_arg_dimension] = make_number (3);
3263d5a2
KH
2100 val = Fmake_vector (make_number (8), make_number (0));
2101 ASET (val, 1, make_number (255));
2102 ASET (val, 3, make_number (255));
2103 ASET (val, 5, make_number (16));
820ee249
KH
2104 args[charset_arg_code_space] = val;
2105 args[charset_arg_min_code] = Qnil;
2106 args[charset_arg_max_code] = Qnil;
2107 args[charset_arg_iso_final] = Qnil;
3263d5a2 2108 args[charset_arg_iso_revision] = Qnil;
820ee249
KH
2109 args[charset_arg_emacs_mule_id] = Qnil;
2110 args[charset_arg_ascii_compatible_p] = Qt;
3263d5a2
KH
2111 args[charset_arg_supplementary_p] = Qnil;
2112 args[charset_arg_invalid_code] = Qnil;
820ee249 2113 args[charset_arg_code_offset] = make_number (0);
3263d5a2 2114 args[charset_arg_map] = Qnil;
374c5cfd
KH
2115 args[charset_arg_subset] = Qnil;
2116 args[charset_arg_superset] = Qnil;
3263d5a2
KH
2117 args[charset_arg_unify_map] = Qnil;
2118 /* The actual plist is set by mule-conf.el. */
820ee249
KH
2119 plist[1] = args[charset_arg_name];
2120 plist[3] = args[charset_arg_dimension];
2121 plist[5] = args[charset_arg_code_space];
2122 plist[7] = args[charset_arg_iso_final];
2123 plist[9] = args[charset_arg_emacs_mule_id];
2124 plist[11] = args[charset_arg_ascii_compatible_p];
2125 plist[13] = args[charset_arg_code_offset];
3263d5a2
KH
2126 args[charset_arg_plist] = Flist (14, plist);
2127 Fdefine_charset_internal (charset_arg_max, args);
16fed1fc 2128 charset_unicode = XINT (CHARSET_SYMBOL_ID (Qunicode));
3263d5a2 2129 }
4ed46869
KH
2130}
2131
2132#endif /* emacs */