*** empty log message ***
[bpt/emacs.git] / src / charset.c
CommitLineData
3263d5a2
KH
1/* Basic character set support.
2 Copyright (C) 1995, 97, 98, 2000, 2001 Electrotechnical Laboratory, JAPAN.
75c8c592 3 Licensed to the Free Software Foundation.
12bcae05 4 Copyright (C) 2001 Free Software Foundation, Inc.
3263d5a2
KH
5 Copyright (C) 2001, 2002
6 National Institute of Advanced Industrial Science and Technology (AIST)
7 Registration Number H13PRO009
4ed46869 8
369314dc
KH
9This file is part of GNU Emacs.
10
11GNU Emacs is free software; you can redistribute it and/or modify
12it under the terms of the GNU General Public License as published by
13the Free Software Foundation; either version 2, or (at your option)
14any later version.
4ed46869 15
369314dc
KH
16GNU Emacs is distributed in the hope that it will be useful,
17but WITHOUT ANY WARRANTY; without even the implied warranty of
18MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19GNU General Public License for more details.
4ed46869 20
369314dc
KH
21You should have received a copy of the GNU General Public License
22along with GNU Emacs; see the file COPYING. If not, write to
23the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
24Boston, MA 02111-1307, USA. */
4ed46869 25
68c45bf0
PE
26#ifdef emacs
27#include <config.h>
28#endif
29
4ed46869 30#include <stdio.h>
3263d5a2
KH
31#include <unistd.h>
32#include <ctype.h>
4ed46869
KH
33
34#ifdef emacs
35
36#include <sys/types.h>
4ed46869 37#include "lisp.h"
3263d5a2 38#include "character.h"
4ed46869
KH
39#include "charset.h"
40#include "coding.h"
fc6b09bf 41#include "disptab.h"
3263d5a2 42#include "buffer.h"
4ed46869
KH
43
44#else /* not emacs */
45
46#include "mulelib.h"
47
48#endif /* emacs */
49
4ed46869 50
04c2f2c5 51/*** GENERAL NOTES on CODED CHARACTER SETS (CHARSETS) ***
4ed46869 52
3263d5a2 53 A coded character set ("charset" hereafter) is a meaningful
04c2f2c5 54 collection (i.e. language, culture, functionality, etc.) of
3263d5a2 55 characters. Emacs handles multiple charsets at once. In Emacs Lisp
04c2f2c5
DL
56 code, a charset is represented by a symbol. In C code, a charset is
57 represented by its ID number or by a pointer to a struct charset.
4ed46869 58
3263d5a2
KH
59 The actual information about each charset is stored in two places.
60 Lispy information is stored in the hash table Vcharset_hash_table as
61 a vector (charset attributes). The other information is stored in
04c2f2c5 62 charset_table as a struct charset.
4ed46869 63
3263d5a2 64*/
4ed46869 65
3263d5a2
KH
66/* List of all charsets. This variable is used only from Emacs
67 Lisp. */
4ed46869
KH
68Lisp_Object Vcharset_list;
69
3263d5a2
KH
70/* Hash table that contains attributes of each charset. Keys are
71 charset symbols, and values are vectors of charset attributes. */
72Lisp_Object Vcharset_hash_table;
73
74/* Table of struct charset. */
75struct charset *charset_table;
76
77static int charset_table_size;
78int charset_table_used;
79
80Lisp_Object Qcharsetp;
81
82/* Special charset symbols. */
83Lisp_Object Qascii;
84Lisp_Object Qeight_bit_control;
85Lisp_Object Qeight_bit_graphic;
86Lisp_Object Qiso_8859_1;
87Lisp_Object Qunicode;
b0e3cf2b 88
3263d5a2
KH
89/* The corresponding charsets. */
90int charset_ascii;
91int charset_8_bit_control;
92int charset_8_bit_graphic;
93int charset_iso_8859_1;
94int charset_unicode;
c1a08b4c 95
7c7dceee
KH
96/* The other special charsets. */
97int charset_jisx0201_roman;
98int charset_jisx0208_1978;
99int charset_jisx0208;
100
3263d5a2
KH
101/* Value of charset attribute `charset-iso-plane'. */
102Lisp_Object Qgl, Qgr;
c1a08b4c 103
3263d5a2
KH
104/* The primary charset. It is a charset of unibyte characters. */
105int charset_primary;
106
107/* List of charsets ordered by the priority. */
108Lisp_Object Vcharset_ordered_list;
109
dbbb237d
KH
110/* Incremented everytime we change Vcharset_ordered_list. This is
111 unsigned short so that it fits in Lisp_Int and never match with
112 -1. */
113unsigned short charset_ordered_list_tick;
114
3263d5a2
KH
115/* List of iso-2022 charsets. */
116Lisp_Object Viso_2022_charset_list;
117
118/* List of emacs-mule charsets. */
119Lisp_Object Vemacs_mule_charset_list;
120
121struct charset *emacs_mule_charset[256];
4ed46869
KH
122
123/* Mapping table from ISO2022's charset (specified by DIMENSION,
124 CHARS, and FINAL-CHAR) to Emacs' charset. */
3263d5a2
KH
125int iso_charset_table[ISO_MAX_DIMENSION][ISO_MAX_CHARS][ISO_MAX_FINAL];
126
127Lisp_Object Vcharset_map_directory;
128
129Lisp_Object Vchar_unified_charset_table;
130
69f8de5b
KH
131#define CODE_POINT_TO_INDEX(charset, code) \
132 ((charset)->code_linear_p \
133 ? (code) - (charset)->min_code \
134 : (((charset)->code_space_mask[(code) >> 24] & 0x8) \
135 && ((charset)->code_space_mask[((code) >> 16) & 0xFF] & 0x4) \
136 && ((charset)->code_space_mask[((code) >> 8) & 0xFF] & 0x2) \
137 && ((charset)->code_space_mask[(code) & 0xFF] & 0x1)) \
138 ? (((((code) >> 24) - (charset)->code_space[12]) \
139 * (charset)->code_space[11]) \
140 + (((((code) >> 16) & 0xFF) - (charset)->code_space[8]) \
141 * (charset)->code_space[7]) \
142 + (((((code) >> 8) & 0xFF) - (charset)->code_space[4]) \
143 * (charset)->code_space[3]) \
820ee249
KH
144 + (((code) & 0xFF) - (charset)->code_space[0]) \
145 - ((charset)->char_index_offset)) \
3263d5a2
KH
146 : -1)
147
148
149/* Convert the character index IDX to code-point CODE for CHARSET.
150 It is assumed that IDX is in a valid range. */
151
820ee249
KH
152#define INDEX_TO_CODE_POINT(charset, idx) \
153 ((charset)->code_linear_p \
154 ? (idx) + (charset)->min_code \
155 : (idx += (charset)->char_index_offset, \
156 (((charset)->code_space[0] + (idx) % (charset)->code_space[2]) \
157 | (((charset)->code_space[4] \
158 + ((idx) / (charset)->code_space[3] % (charset)->code_space[6])) \
159 << 8) \
160 | (((charset)->code_space[8] \
161 + ((idx) / (charset)->code_space[7] % (charset)->code_space[10])) \
162 << 16) \
163 | (((charset)->code_space[12] + ((idx) / (charset)->code_space[11])) \
164 << 24))))
165
4ed46869 166
3263d5a2 167\f
4ed46869 168
e9ce014c
KH
169/* Set to 1 to warn that a charset map is loaded and thus a buffer
170 text and a string data may be relocated. */
3263d5a2 171int charset_map_loaded;
35e623fb 172
e9ce014c
KH
173struct charset_map_entries
174{
175 struct {
176 unsigned from, to;
177 int c;
178 } entry[0x10000];
179 struct charset_map_entries *next;
180};
181
182/* Load the mapping information for CHARSET from ENTRIES.
4cf9710d 183
3263d5a2 184 If CONTROL_FLAG is 0, setup CHARSET->min_char and CHARSET->max_char.
8a73a704 185
3263d5a2
KH
186 If CONTROL_FLAG is 1, setup CHARSET->min_char, CHARSET->max_char,
187 CHARSET->decoder, and CHARSET->encoder.
93bcb785 188
3263d5a2
KH
189 If CONTROL_FLAG is 2, setup CHARSET->deunifier and
190 Vchar_unify_table. If Vchar_unified_charset_table is non-nil,
191 setup it too. */
4ed46869 192
3263d5a2 193static void
e9ce014c 194load_charset_map (charset, entries, n_entries, control_flag)
3263d5a2 195 struct charset *charset;
e9ce014c
KH
196 struct charset_map_entries *entries;
197 int n_entries;
3263d5a2 198 int control_flag;
4ed46869 199{
3263d5a2 200 Lisp_Object vec, table;
3263d5a2
KH
201 unsigned max_code = CHARSET_MAX_CODE (charset);
202 int ascii_compatible_p = charset->ascii_compatible_p;
203 int min_char, max_char, nonascii_min_char;
3263d5a2 204 int i;
3263d5a2 205 unsigned char *fast_map = charset->fast_map;
99529c2c 206
e9ce014c
KH
207 if (n_entries <= 0)
208 return;
209
210 if (control_flag > 0)
8ac5a9cc 211 {
3263d5a2 212 int n = CODE_POINT_TO_INDEX (charset, max_code) + 1;
6662e69b 213
374c5cfd 214 table = Fmake_char_table (Qnil, Qnil);
3263d5a2
KH
215 if (control_flag == 1)
216 vec = Fmake_vector (make_number (n), make_number (-1));
217 else if (! CHAR_TABLE_P (Vchar_unify_table))
374c5cfd 218 Vchar_unify_table = Fmake_char_table (Qnil, Qnil);
6662e69b 219
3263d5a2 220 charset_map_loaded = 1;
2e344af3 221 }
3263d5a2 222
e9ce014c 223 min_char = max_char = entries->entry[0].c;
3263d5a2 224 nonascii_min_char = MAX_CHAR;
e9ce014c 225 for (i = 0; i < n_entries; i++)
2e344af3 226 {
e9ce014c 227 unsigned from, to;
3b4f4446
KH
228 int from_index, to_index;
229 int from_c, to_c;
e9ce014c 230 int idx = i % 0x10000;
3263d5a2 231
e9ce014c
KH
232 if (i > 0 && idx == 0)
233 entries = entries->next;
234 from = entries->entry[idx].from;
235 to = entries->entry[idx].to;
3b4f4446
KH
236 from_c = entries->entry[idx].c;
237 from_index = CODE_POINT_TO_INDEX (charset, from);
238 if (from == to)
239 {
240 to_index = from_index;
241 to_c = from_c;
242 }
243 else
244 {
245 to_index = CODE_POINT_TO_INDEX (charset, to);
246 to_c = from_c + (to_index - from_index);
247 }
248 if (from_index < 0 || to_index < 0)
249 continue;
3263d5a2 250
3263d5a2
KH
251 if (control_flag < 2)
252 {
3b4f4446
KH
253 int c;
254
255 if (to_c > max_char)
256 max_char = to_c;
257 else if (from_c < min_char)
258 min_char = from_c;
259 if (ascii_compatible_p)
260 {
261 if (! ASCII_BYTE_P (from_c))
262 {
263 if (from_c < nonascii_min_char)
264 nonascii_min_char = from_c;
265 }
266 else if (! ASCII_BYTE_P (to_c))
267 {
268 nonascii_min_char = 0x80;
269 }
270 }
271
272 for (c = from_c; c <= to_c; c++)
273 CHARSET_FAST_MAP_SET (c, fast_map);
274
e9ce014c 275 if (control_flag == 1)
3263d5a2 276 {
e9ce014c 277 unsigned code = from;
e9ce014c 278
e9ce014c
KH
279 if (CHARSET_COMPACT_CODES_P (charset))
280 while (1)
281 {
3b4f4446
KH
282 ASET (vec, from_index, make_number (from_c));
283 CHAR_TABLE_SET (table, from_c, make_number (code));
e9ce014c
KH
284 if (from_index == to_index)
285 break;
3b4f4446 286 from_index++, from_c++;
e9ce014c
KH
287 code = INDEX_TO_CODE_POINT (charset, from_index);
288 }
289 else
3b4f4446 290 for (; from_index <= to_index; from_index++, from_c++)
e9ce014c 291 {
3b4f4446
KH
292 ASET (vec, from_index, make_number (from_c));
293 CHAR_TABLE_SET (table, from_c, make_number (from_index));
e9ce014c 294 }
3263d5a2 295 }
3263d5a2 296 }
e9ce014c 297 else
2e344af3 298 {
69f8de5b 299 unsigned code = from;
3b4f4446 300
69f8de5b
KH
301 while (1)
302 {
303 int c1 = DECODE_CHAR (charset, code);
304
3263d5a2
KH
305 if (c1 >= 0)
306 {
3b4f4446 307 CHAR_TABLE_SET (table, from_c, make_number (c1));
16fed1fc 308 CHAR_TABLE_SET (Vchar_unify_table, c1, make_number (from_c));
3263d5a2
KH
309 if (CHAR_TABLE_P (Vchar_unified_charset_table))
310 CHAR_TABLE_SET (Vchar_unified_charset_table, c1,
311 CHARSET_NAME (charset));
312 }
69f8de5b
KH
313 if (from_index == to_index)
314 break;
3b4f4446 315 from_index++, from_c++;
69f8de5b 316 code = INDEX_TO_CODE_POINT (charset, from_index);
3263d5a2 317 }
2e344af3 318 }
8ac5a9cc 319 }
3263d5a2
KH
320
321 if (control_flag < 2)
4ed46869 322 {
3263d5a2
KH
323 CHARSET_MIN_CHAR (charset) = (ascii_compatible_p
324 ? nonascii_min_char : min_char);
325 CHARSET_MAX_CHAR (charset) = max_char;
e9ce014c 326 if (control_flag == 1)
4ed46869 327 {
3263d5a2
KH
328 CHARSET_DECODER (charset) = vec;
329 CHARSET_ENCODER (charset) = table;
4ed46869
KH
330 }
331 }
2e344af3 332 else
3263d5a2 333 CHARSET_DEUNIFIER (charset) = table;
4ed46869
KH
334}
335
12bcae05 336
3263d5a2
KH
337/* Read a hexadecimal number (preceded by "0x") from the file FP while
338 paying attention to comment charcter '#'. */
12bcae05 339
3263d5a2
KH
340static INLINE unsigned
341read_hex (fp, eof)
342 FILE *fp;
343 int *eof;
12bcae05 344{
3263d5a2
KH
345 int c;
346 unsigned n;
12bcae05 347
3263d5a2
KH
348 while ((c = getc (fp)) != EOF)
349 {
69f8de5b 350 if (c == '#')
3263d5a2
KH
351 {
352 while ((c = getc (fp)) != EOF && c != '\n');
353 }
354 else if (c == '0')
355 {
356 if ((c = getc (fp)) == EOF || c == 'x')
357 break;
358 }
359 }
360 if (c == EOF)
361 {
362 *eof = 1;
363 return 0;
364 }
365 *eof = 0;
366 n = 0;
367 if (c == 'x')
368 while ((c = getc (fp)) != EOF && isxdigit (c))
369 n = ((n << 4)
370 | (c <= '9' ? c - '0' : c <= 'F' ? c - 'A' + 10 : c - 'a' + 10));
371 else
372 while ((c = getc (fp)) != EOF && isdigit (c))
373 n = (n * 10) + c - '0';
e9ce014c
KH
374 if (c != EOF)
375 ungetc (c, fp);
3263d5a2
KH
376 return n;
377}
12bcae05 378
537efd8d 379
3263d5a2 380/* Return a mapping vector for CHARSET loaded from MAPFILE.
e9ce014c
KH
381 Each line of MAPFILE has this form
382 0xAAAA 0xCCCC
383 where 0xAAAA is a code-point and 0xCCCC is the corresponding
384 character code, or this form
385 0xAAAA-0xBBBB 0xCCCC
386 where 0xAAAA and 0xBBBB are code-points specifying a range, and
387 0xCCCC is the first character code of the range.
388
3263d5a2
KH
389 The returned vector has this form:
390 [ CODE1 CHAR1 CODE2 CHAR2 .... ]
e9ce014c
KH
391 where CODE1 is a code-point or a cons of code-points specifying a
392 range. */
4ed46869 393
c449997d
KH
394extern void add_to_log P_ ((char *, Lisp_Object, Lisp_Object));
395
e9ce014c
KH
396static void
397load_charset_map_from_file (charset, mapfile, control_flag)
3263d5a2
KH
398 struct charset *charset;
399 Lisp_Object mapfile;
e9ce014c 400 int control_flag;
4ed46869 401{
e9ce014c
KH
402 unsigned min_code = CHARSET_MIN_CODE (charset);
403 unsigned max_code = CHARSET_MAX_CODE (charset);
3263d5a2
KH
404 int fd;
405 FILE *fp;
3263d5a2
KH
406 int eof;
407 Lisp_Object suffixes;
e9ce014c
KH
408 struct charset_map_entries *head, *entries;
409 int n_entries;
4ed46869 410
3263d5a2
KH
411 suffixes = Fcons (build_string (".map"),
412 Fcons (build_string (".TXT"), Qnil));
4ed46869 413
3263d5a2
KH
414 fd = openp (Fcons (Vcharset_map_directory, Qnil), mapfile, suffixes,
415 NULL, 0);
416 if (fd < 0
417 || ! (fp = fdopen (fd, "r")))
418 {
419 add_to_log ("Failure in loading charset map: %S", mapfile, Qnil);
e9ce014c 420 return;
3263d5a2 421 }
4ed46869 422
e9ce014c
KH
423 head = entries = ((struct charset_map_entries *)
424 alloca (sizeof (struct charset_map_entries)));
425 n_entries = 0;
3263d5a2
KH
426 eof = 0;
427 while (1)
428 {
e9ce014c
KH
429 unsigned from, to;
430 int c;
431 int idx;
4ed46869 432
e9ce014c 433 from = read_hex (fp, &eof);
3263d5a2
KH
434 if (eof)
435 break;
e9ce014c
KH
436 if (getc (fp) == '-')
437 to = read_hex (fp, &eof);
438 else
439 to = from;
440 c = (int) read_hex (fp, &eof);
441
442 if (from < min_code || to > max_code || from > to || c > MAX_CHAR)
443 continue;
444
445 if (n_entries > 0 && (n_entries % 0x10000) == 0)
3263d5a2 446 {
e9ce014c
KH
447 entries->next = ((struct charset_map_entries *)
448 alloca (sizeof (struct charset_map_entries)));
449 entries = entries->next;
3263d5a2 450 }
e9ce014c
KH
451 idx = n_entries % 0x10000;
452 entries->entry[idx].from = from;
453 entries->entry[idx].to = to;
454 entries->entry[idx].c = c;
455 n_entries++;
3263d5a2
KH
456 }
457 fclose (fp);
458 close (fd);
4ed46869 459
e9ce014c
KH
460 load_charset_map (charset, head, n_entries, control_flag);
461}
462
463static void
464load_charset_map_from_vector (charset, vec, control_flag)
465 struct charset *charset;
466 Lisp_Object vec;
467 int control_flag;
468{
469 unsigned min_code = CHARSET_MIN_CODE (charset);
470 unsigned max_code = CHARSET_MAX_CODE (charset);
471 struct charset_map_entries *head, *entries;
472 int n_entries;
473 int len = ASIZE (vec);
474 int i;
475
476 if (len % 2 == 1)
3263d5a2 477 {
e9ce014c
KH
478 add_to_log ("Failure in loading charset map: %V", vec, Qnil);
479 return;
3263d5a2 480 }
4ed46869 481
e9ce014c
KH
482 head = entries = ((struct charset_map_entries *)
483 alloca (sizeof (struct charset_map_entries)));
484 n_entries = 0;
485 for (i = 0; i < len; i += 2)
486 {
487 Lisp_Object val, val2;
488 unsigned from, to;
489 int c;
490 int idx;
3263d5a2 491
e9ce014c
KH
492 val = AREF (vec, i);
493 if (CONSP (val))
494 {
495 val2 = XCDR (val);
496 val = XCAR (val);
497 CHECK_NATNUM (val);
498 CHECK_NATNUM (val2);
499 from = XFASTINT (val);
500 to = XFASTINT (val2);
501 }
502 else
503 {
504 CHECK_NATNUM (val);
505 from = to = XFASTINT (val);
506 }
507 val = AREF (vec, i + 1);
508 CHECK_NATNUM (val);
509 c = XFASTINT (val);
510
511 if (from < min_code || to > max_code || from > to || c > MAX_CHAR)
512 continue;
513
514 if ((n_entries % 0x10000) == 0)
515 {
516 entries->next = ((struct charset_map_entries *)
517 alloca (sizeof (struct charset_map_entries)));
518 entries = entries->next;
519 }
520 idx = n_entries % 0x10000;
521 entries->entry[idx].from = from;
522 entries->entry[idx].to = to;
523 entries->entry[idx].c = c;
524 n_entries++;
525 }
526
527 load_charset_map (charset, head, n_entries, control_flag);
ac4137cc
KH
528}
529
3263d5a2
KH
530static void
531load_charset (charset)
532 struct charset *charset;
ac4137cc 533{
3263d5a2
KH
534 if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP_DEFERRED)
535 {
536 Lisp_Object map;
ac4137cc 537
3263d5a2
KH
538 map = CHARSET_MAP (charset);
539 if (STRINGP (map))
e9ce014c
KH
540 load_charset_map_from_file (charset, map, 1);
541 else
542 load_charset_map_from_vector (charset, map, 1);
3263d5a2
KH
543 CHARSET_METHOD (charset) = CHARSET_METHOD_MAP;
544 }
4ed46869
KH
545}
546
3263d5a2
KH
547
548DEFUN ("charsetp", Fcharsetp, Scharsetp, 1, 1, 0,
549 doc: /* Return non-nil if and only if OBJECT is a charset.*/)
550 (object)
551 Lisp_Object object;
23d2a7f1 552{
3263d5a2 553 return (CHARSETP (object) ? Qt : Qnil);
23d2a7f1
KH
554}
555
35e623fb 556
3263d5a2 557void
374c5cfd
KH
558map_charset_chars (c_function, function, arg,
559 charset, from, to)
560 void (*c_function) P_ ((Lisp_Object, Lisp_Object));
561 Lisp_Object function, arg;
562 struct charset *charset;
563 unsigned from, to;
564
35e623fb 565{
3263d5a2 566 Lisp_Object range;
374c5cfd 567 int partial;
3263d5a2
KH
568
569 if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP_DEFERRED)
570 load_charset (charset);
571
374c5cfd
KH
572 partial = (from > CHARSET_MIN_CODE (charset)
573 || to < CHARSET_MAX_CODE (charset));
574
575 if (CHARSET_UNIFIED_P (charset)
576 && CHAR_TABLE_P (CHARSET_DEUNIFIER (charset)))
577 {
578 map_char_table_for_charset (c_function, function,
579 CHARSET_DEUNIFIER (charset), arg,
580 partial ? charset : NULL, from, to);
581 }
582
3263d5a2
KH
583 if (CHARSET_METHOD (charset) == CHARSET_METHOD_OFFSET)
584 {
374c5cfd
KH
585 int from_idx = CODE_POINT_TO_INDEX (charset, from);
586 int to_idx = CODE_POINT_TO_INDEX (charset, to);
587 int from_c = from_idx + CHARSET_CODE_OFFSET (charset);
588 int to_c = to_idx + CHARSET_CODE_OFFSET (charset);
589
590 range = Fcons (make_number (from_c), make_number (to_c));
3263d5a2 591 if (NILP (function))
374c5cfd 592 (*c_function) (range, arg);
3263d5a2
KH
593 else
594 call2 (function, range, arg);
595 }
596 else if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP)
597 {
598 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset)))
599 return;
374c5cfd 600 if (CHARSET_ASCII_COMPATIBLE_P (charset) && from <= 127)
bbf12bb3 601 {
374c5cfd
KH
602 range = Fcons (make_number (from), make_number (to));
603 if (to >= 128)
604 XSETCAR (range, make_number (127));
605
3263d5a2 606 if (NILP (function))
374c5cfd 607 (*c_function) (range, arg);
3263d5a2
KH
608 else
609 call2 (function, range, arg);
bbf12bb3 610 }
374c5cfd
KH
611 map_char_table_for_charset (c_function, function,
612 CHARSET_ENCODER (charset), arg,
613 partial ? charset : NULL, from, to);
3263d5a2 614 }
374c5cfd 615 else if (CHARSET_METHOD (charset) == CHARSET_METHOD_SUBSET)
3263d5a2 616 {
374c5cfd
KH
617 Lisp_Object subset_info;
618 int offset;
619
620 subset_info = CHARSET_SUBSET (charset);
621 charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
622 offset = XINT (AREF (subset_info, 3));
623 from -= offset;
624 if (from < XFASTINT (AREF (subset_info, 1)))
625 from = XFASTINT (AREF (subset_info, 1));
626 to -= offset;
627 if (to > XFASTINT (AREF (subset_info, 2)))
628 to = XFASTINT (AREF (subset_info, 2));
629 map_charset_chars (c_function, function, arg, charset, from, to);
630 }
631 else /* i.e. CHARSET_METHOD_SUPERSET */
632 {
633 Lisp_Object parents;
3263d5a2 634
374c5cfd
KH
635 for (parents = CHARSET_SUPERSET (charset); CONSP (parents);
636 parents = XCDR (parents))
bbf12bb3 637 {
374c5cfd
KH
638 int offset;
639 unsigned this_from, this_to;
640
641 charset = CHARSET_FROM_ID (XFASTINT (XCAR (XCAR (parents))));
642 offset = XINT (XCDR (XCAR (parents)));
643 this_from = from - offset;
644 this_to = to - offset;
645 if (this_from < CHARSET_MIN_CODE (charset))
646 this_from = CHARSET_MIN_CODE (charset);
647 if (this_to > CHARSET_MAX_CODE (charset))
648 this_to = CHARSET_MAX_CODE (charset);
649 map_charset_chars (c_function, function, arg, charset, from, to);
bbf12bb3 650 }
35e623fb 651 }
3263d5a2
KH
652}
653
374c5cfd
KH
654
655DEFUN ("map-charset-chars", Fmap_charset_chars, Smap_charset_chars, 2, 5, 0,
04c2f2c5 656 doc: /* Call FUNCTION for all characters in CHARSET.
374c5cfd 657FUNCTION is called with an argument RANGE and the optional 3rd
3263d5a2 658argument ARG.
6abd9323 659
374c5cfd
KH
660RANGE is a cons (FROM . TO), where FROM and TO indicate a range of
661characters contained in CHARSET.
662
663The optional 4th and 5th arguments FROM-CODE and TO-CODE specify the
664range of code points of targer characters. */)
665 (function, charset, arg, from_code, to_code)
666 Lisp_Object function, charset, arg, from_code, to_code;
3263d5a2 667{
374c5cfd 668 struct charset *cs;
16fed1fc 669 unsigned from, to;
374c5cfd
KH
670
671 CHECK_CHARSET_GET_CHARSET (charset, cs);
672 if (NILP (from_code))
16fed1fc
DL
673 from_code = make_number (0);
674 CHECK_NATNUM (from_code);
675 from = XINT (from_code);
676 if (from < CHARSET_MIN_CODE (cs))
677 from = CHARSET_MIN_CODE (cs);
374c5cfd 678 if (NILP (to_code))
16fed1fc
DL
679 to_code = make_number (0xFFFFFFFF);
680 CHECK_NATNUM (from_code);
681 to = XINT (to_code);
682 if (to > CHARSET_MAX_CODE (cs))
683 to_code = make_number (CHARSET_MAX_CODE (cs));
374c5cfd 684
16fed1fc 685 map_charset_chars (NULL, function, arg, cs, from, to);
3263d5a2 686 return Qnil;
35e623fb 687}
76d7b829
KH
688
689
3263d5a2
KH
690/* Define a charset according to the arguments. The Nth argument is
691 the Nth attribute of the charset (the last attribute `charset-id'
692 is not included). See the docstring of `define-charset' for the
693 detail. */
76d7b829 694
3263d5a2
KH
695DEFUN ("define-charset-internal", Fdefine_charset_internal,
696 Sdefine_charset_internal, charset_arg_max, MANY, 0,
04c2f2c5
DL
697 doc: /* For internal use only.
698usage: (define-charset-internal ...) */)
3263d5a2
KH
699 (nargs, args)
700 int nargs;
701 Lisp_Object *args;
76d7b829 702{
3263d5a2
KH
703 /* Charset attr vector. */
704 Lisp_Object attrs;
705 Lisp_Object val;
706 unsigned hash_code;
707 struct Lisp_Hash_Table *hash_table = XHASH_TABLE (Vcharset_hash_table);
69f8de5b 708 int i, j;
3263d5a2
KH
709 struct charset charset;
710 int id;
711 int dimension;
712 int new_definition_p;
713 int nchars;
714
715 if (nargs != charset_arg_max)
716 return Fsignal (Qwrong_number_of_arguments,
717 Fcons (intern ("define-charset-internal"),
718 make_number (nargs)));
719
720 attrs = Fmake_vector (make_number (charset_attr_max), Qnil);
721
722 CHECK_SYMBOL (args[charset_arg_name]);
723 ASET (attrs, charset_name, args[charset_arg_name]);
724
725 val = args[charset_arg_code_space];
726 for (i = 0, dimension = 0, nchars = 1; i < 4; i++)
76d7b829 727 {
3263d5a2
KH
728 int min_byte, max_byte;
729
730 min_byte = XINT (Faref (val, make_number (i * 2)));
731 max_byte = XINT (Faref (val, make_number (i * 2 + 1)));
732 if (min_byte < 0 || min_byte > max_byte || max_byte >= 256)
733 error ("Invalid :code-space value");
734 charset.code_space[i * 4] = min_byte;
735 charset.code_space[i * 4 + 1] = max_byte;
736 charset.code_space[i * 4 + 2] = max_byte - min_byte + 1;
737 nchars *= charset.code_space[i * 4 + 2];
738 charset.code_space[i * 4 + 3] = nchars;
739 if (max_byte > 0)
740 dimension = i + 1;
741 }
76d7b829 742
3263d5a2
KH
743 val = args[charset_arg_dimension];
744 if (NILP (val))
745 charset.dimension = dimension;
746 else
747 {
748 CHECK_NATNUM (val);
749 charset.dimension = XINT (val);
750 if (charset.dimension < 1 || charset.dimension > 4)
751 args_out_of_range_3 (val, make_number (1), make_number (4));
752 }
753
754 charset.code_linear_p
755 = (charset.dimension == 1
756 || (charset.code_space[2] == 256
757 && (charset.dimension == 2
758 || (charset.code_space[6] == 256
759 && (charset.dimension == 3
760 || charset.code_space[10] == 256)))));
761
69f8de5b
KH
762 if (! charset.code_linear_p)
763 {
764 charset.code_space_mask = (unsigned char *) xmalloc (256);
33df3183 765 bzero (charset.code_space_mask, 256);
69f8de5b
KH
766 for (i = 0; i < 4; i++)
767 for (j = charset.code_space[i * 4]; j <= charset.code_space[i * 4 + 1];
768 j++)
769 charset.code_space_mask[j] |= (1 << i);
770 }
771
3263d5a2
KH
772 charset.iso_chars_96 = charset.code_space[2] == 96;
773
774 charset.min_code = (charset.code_space[0]
775 | (charset.code_space[4] << 8)
776 | (charset.code_space[8] << 16)
777 | (charset.code_space[12] << 24));
778 charset.max_code = (charset.code_space[1]
779 | (charset.code_space[5] << 8)
780 | (charset.code_space[9] << 16)
781 | (charset.code_space[13] << 24));
820ee249
KH
782 charset.char_index_offset = 0;
783
784 val = args[charset_arg_min_code];
785 if (! NILP (val))
786 {
787 unsigned code;
788
789 if (INTEGERP (val))
790 code = XINT (val);
791 else
792 {
793 CHECK_CONS (val);
794 CHECK_NUMBER (XCAR (val));
795 CHECK_NUMBER (XCDR (val));
796 code = (XINT (XCAR (val)) << 16) | (XINT (XCDR (val)));
797 }
798 if (code < charset.min_code
799 || code > charset.max_code)
800 args_out_of_range_3 (make_number (charset.min_code),
801 make_number (charset.max_code), val);
802 charset.char_index_offset = CODE_POINT_TO_INDEX (&charset, code);
803 charset.min_code = code;
804 }
805
806 val = args[charset_arg_max_code];
807 if (! NILP (val))
808 {
809 unsigned code;
810
811 if (INTEGERP (val))
812 code = XINT (val);
813 else
814 {
815 CHECK_CONS (val);
816 CHECK_NUMBER (XCAR (val));
817 CHECK_NUMBER (XCDR (val));
818 code = (XINT (XCAR (val)) << 16) | (XINT (XCDR (val)));
819 }
820 if (code < charset.min_code
821 || code > charset.max_code)
822 args_out_of_range_3 (make_number (charset.min_code),
823 make_number (charset.max_code), val);
824 charset.max_code = code;
825 }
3263d5a2 826
e9ce014c
KH
827 charset.compact_codes_p = charset.max_code < 0x1000000;
828
3263d5a2
KH
829 val = args[charset_arg_invalid_code];
830 if (NILP (val))
831 {
832 if (charset.min_code > 0)
833 charset.invalid_code = 0;
bbf12bb3
KH
834 else
835 {
3263d5a2
KH
836 XSETINT (val, charset.max_code + 1);
837 if (XINT (val) == charset.max_code + 1)
838 charset.invalid_code = charset.max_code + 1;
839 else
840 error ("Attribute :invalid-code must be specified");
76d7b829 841 }
76d7b829 842 }
3263d5a2
KH
843 else
844 {
845 CHECK_NATNUM (val);
846 charset.invalid_code = XFASTINT (val);
847 }
76d7b829 848
3263d5a2
KH
849 val = args[charset_arg_iso_final];
850 if (NILP (val))
851 charset.iso_final = -1;
852 else
853 {
854 CHECK_NUMBER (val);
855 if (XINT (val) < '0' || XINT (val) > 127)
856 error ("Invalid iso-final-char: %d", XINT (val));
857 charset.iso_final = XINT (val);
858 }
859
860 val = args[charset_arg_iso_revision];
861 if (NILP (val))
862 charset.iso_revision = -1;
863 else
4ed46869 864 {
3263d5a2
KH
865 CHECK_NUMBER (val);
866 if (XINT (val) > 63)
867 args_out_of_range (make_number (63), val);
868 charset.iso_revision = XINT (val);
4ed46869 869 }
3263d5a2
KH
870
871 val = args[charset_arg_emacs_mule_id];
872 if (NILP (val))
873 charset.emacs_mule_id = -1;
4ed46869
KH
874 else
875 {
3263d5a2
KH
876 CHECK_NATNUM (val);
877 if ((XINT (val) > 0 && XINT (val) <= 128) || XINT (val) >= 256)
878 error ("Invalid emacs-mule-id: %d", XINT (val));
879 charset.emacs_mule_id = XINT (val);
c83ef371 880 }
6ef23ebb 881
3263d5a2 882 charset.ascii_compatible_p = ! NILP (args[charset_arg_ascii_compatible_p]);
4ed46869 883
3263d5a2
KH
884 charset.supplementary_p = ! NILP (args[charset_arg_supplementary_p]);
885
886 charset.unified_p = 0;
887
888 bzero (charset.fast_map, sizeof (charset.fast_map));
889
890 if (! NILP (args[charset_arg_code_offset]))
891 {
892 val = args[charset_arg_code_offset];
893 CHECK_NUMBER (val);
894
895 charset.method = CHARSET_METHOD_OFFSET;
896 charset.code_offset = XINT (val);
897
898 i = CODE_POINT_TO_INDEX (&charset, charset.min_code);
899 charset.min_char = i + charset.code_offset;
900 i = CODE_POINT_TO_INDEX (&charset, charset.max_code);
901 charset.max_char = i + charset.code_offset;
902 if (charset.max_char > MAX_CHAR)
903 error ("Unsupported max char: %d", charset.max_char);
904
905 for (i = charset.min_char; i < 0x10000 && i <= charset.max_char;
906 i += 128)
907 CHARSET_FAST_MAP_SET (i, charset.fast_map);
908 for (; i <= charset.max_char; i += 0x1000)
909 CHARSET_FAST_MAP_SET (i, charset.fast_map);
910 }
911 else if (! NILP (args[charset_arg_map]))
912 {
913 val = args[charset_arg_map];
914 ASET (attrs, charset_map, val);
915 if (STRINGP (val))
e9ce014c
KH
916 load_charset_map_from_file (&charset, val, 0);
917 else
918 load_charset_map_from_vector (&charset, val, 0);
3263d5a2
KH
919 charset.method = CHARSET_METHOD_MAP_DEFERRED;
920 }
374c5cfd 921 else if (! NILP (args[charset_arg_subset]))
3263d5a2 922 {
374c5cfd
KH
923 Lisp_Object parent;
924 Lisp_Object parent_min_code, parent_max_code, parent_code_offset;
925 struct charset *parent_charset;
926
927 val = args[charset_arg_subset];
928 parent = Fcar (val);
929 CHECK_CHARSET_GET_CHARSET (parent, parent_charset);
930 parent_min_code = Fnth (make_number (1), val);
931 CHECK_NATNUM (parent_min_code);
932 parent_max_code = Fnth (make_number (2), val);
933 CHECK_NATNUM (parent_max_code);
934 parent_code_offset = Fnth (make_number (3), val);
935 CHECK_NUMBER (parent_code_offset);
936 val = Fmake_vector (make_number (4), Qnil);
937 ASET (val, 0, make_number (parent_charset->id));
938 ASET (val, 1, parent_min_code);
939 ASET (val, 2, parent_max_code);
940 ASET (val, 3, parent_code_offset);
941 ASET (attrs, charset_subset, val);
942
943 charset.method = CHARSET_METHOD_SUBSET;
944 /* Here, we just copy the parent's fast_map. It's not accurate,
945 but at least it works for quickly detecting which character
946 DOESN'T belong to this charset. */
947 for (i = 0; i < 190; i++)
948 charset.fast_map[i] = parent_charset->fast_map[i];
949
950 /* We also copy these for parents. */
951 charset.min_char = parent_charset->min_char;
952 charset.max_char = parent_charset->max_char;
953 }
954 else if (! NILP (args[charset_arg_superset]))
955 {
956 val = args[charset_arg_superset];
957 charset.method = CHARSET_METHOD_SUPERSET;
3263d5a2 958 val = Fcopy_sequence (val);
374c5cfd 959 ASET (attrs, charset_superset, val);
3263d5a2
KH
960
961 charset.min_char = MAX_CHAR;
962 charset.max_char = 0;
963 for (; ! NILP (val); val = Fcdr (val))
4ed46869 964 {
3263d5a2
KH
965 Lisp_Object elt, car_part, cdr_part;
966 int this_id, offset;
967 struct charset *this_charset;
968
969 elt = Fcar (val);
970 if (CONSP (elt))
971 {
972 car_part = XCAR (elt);
973 cdr_part = XCDR (elt);
974 CHECK_CHARSET_GET_ID (car_part, this_id);
975 CHECK_NUMBER (cdr_part);
976 offset = XINT (cdr_part);
977 }
978 else
4ed46869 979 {
3263d5a2
KH
980 CHECK_CHARSET_GET_ID (elt, this_id);
981 offset = 0;
4ed46869 982 }
3263d5a2
KH
983 XSETCAR (val, Fcons (make_number (this_id), make_number (offset)));
984
985 this_charset = CHARSET_FROM_ID (this_id);
986 if (charset.min_char > this_charset->min_char)
987 charset.min_char = this_charset->min_char;
988 if (charset.max_char < this_charset->max_char)
989 charset.max_char = this_charset->max_char;
990 for (i = 0; i < 190; i++)
991 charset.fast_map[i] |= this_charset->fast_map[i];
4ed46869 992 }
4ed46869 993 }
3263d5a2
KH
994 else
995 error ("None of :code-offset, :map, :parents are specified");
4ed46869 996
3263d5a2
KH
997 val = args[charset_arg_unify_map];
998 if (! NILP (val) && !STRINGP (val))
999 CHECK_VECTOR (val);
1000 ASET (attrs, charset_unify_map, val);
4ed46869 1001
3263d5a2
KH
1002 CHECK_LIST (args[charset_arg_plist]);
1003 ASET (attrs, charset_plist, args[charset_arg_plist]);
4ed46869 1004
3263d5a2
KH
1005 charset.hash_index = hash_lookup (hash_table, args[charset_arg_name],
1006 &hash_code);
1007 if (charset.hash_index >= 0)
1008 {
1009 new_definition_p = 0;
4f65af01 1010 id = XFASTINT (CHARSET_SYMBOL_ID (args[charset_arg_name]));
3263d5a2
KH
1011 HASH_VALUE (hash_table, charset.hash_index) = attrs;
1012 }
1a45ff10 1013 else
3263d5a2
KH
1014 {
1015 charset.hash_index = hash_put (hash_table, args[charset_arg_name], attrs,
1016 hash_code);
1017 if (charset_table_used == charset_table_size)
1018 {
1019 charset_table_size += 256;
1020 charset_table
1021 = ((struct charset *)
1022 xrealloc (charset_table,
1023 sizeof (struct charset) * charset_table_size));
1024 }
1025 id = charset_table_used++;
3263d5a2
KH
1026 new_definition_p = 1;
1027 }
4ed46869 1028
4f65af01 1029 ASET (attrs, charset_id, make_number (id));
3263d5a2
KH
1030 charset.id = id;
1031 charset_table[id] = charset;
1032
1033 if (charset.iso_final >= 0)
4ed46869 1034 {
3263d5a2
KH
1035 ISO_CHARSET_TABLE (charset.dimension, charset.iso_chars_96,
1036 charset.iso_final) = id;
1037 if (new_definition_p)
1038 Viso_2022_charset_list = nconc2 (Viso_2022_charset_list,
1039 Fcons (make_number (id), Qnil));
7c7dceee
KH
1040 if (ISO_CHARSET_TABLE (1, 0, 'J') == id)
1041 charset_jisx0201_roman = id;
1042 else if (ISO_CHARSET_TABLE (2, 0, '@') == id)
1043 charset_jisx0208_1978 = id;
1044 else if (ISO_CHARSET_TABLE (2, 0, 'B') == id)
1045 charset_jisx0208 = id;
4ed46869 1046 }
3263d5a2
KH
1047
1048 if (charset.emacs_mule_id >= 0)
4ed46869 1049 {
3263d5a2 1050 emacs_mule_charset[charset.emacs_mule_id] = CHARSET_FROM_ID (id);
4f65af01
KH
1051 if (charset.emacs_mule_id < 0xA0)
1052 emacs_mule_bytes[charset.emacs_mule_id] = charset.dimension + 1;
3263d5a2
KH
1053 if (new_definition_p)
1054 Vemacs_mule_charset_list = nconc2 (Vemacs_mule_charset_list,
1055 Fcons (make_number (id), Qnil));
4ed46869
KH
1056 }
1057
3263d5a2
KH
1058 if (new_definition_p)
1059 {
1060 Vcharset_list = Fcons (args[charset_arg_name], Vcharset_list);
1061 Vcharset_ordered_list = nconc2 (Vcharset_ordered_list,
1062 Fcons (make_number (id), Qnil));
dbbb237d 1063 charset_ordered_list_tick++;
3263d5a2 1064 }
4ed46869 1065
3263d5a2 1066 return Qnil;
4ed46869
KH
1067}
1068
3263d5a2
KH
1069DEFUN ("define-charset-alias", Fdefine_charset_alias,
1070 Sdefine_charset_alias, 2, 2, 0,
1071 doc: /* Define ALIAS as an alias for charset CHARSET. */)
1072 (alias, charset)
1073 Lisp_Object alias, charset;
4ed46869 1074{
3263d5a2
KH
1075 Lisp_Object attr;
1076
1077 CHECK_CHARSET_GET_ATTR (charset, attr);
1078 Fputhash (alias, attr, Vcharset_hash_table);
528623a0 1079 Vcharset_list = Fcons (alias, Vcharset_list);
3263d5a2
KH
1080 return Qnil;
1081}
4ed46869 1082
4ed46869 1083
3263d5a2 1084DEFUN ("primary-charset", Fprimary_charset, Sprimary_charset, 0, 0, 0,
56a46d1d 1085 doc: /* Return the primary charset (set by `set-primary-charset'). */)
3263d5a2
KH
1086 ()
1087{
1088 return CHARSET_NAME (CHARSET_FROM_ID (charset_primary));
1089}
4ed46869 1090
4ed46869 1091
3263d5a2
KH
1092DEFUN ("set-primary-charset", Fset_primary_charset, Sset_primary_charset,
1093 1, 1, 0,
56a46d1d
DL
1094 doc: /* Set the primary charset to CHARSET.
1095This determines how unibyte/multibyte conversion is done. See also
1096function `primary-charset'. */)
3263d5a2
KH
1097 (charset)
1098 Lisp_Object charset;
1099{
1100 int id;
1101
1102 CHECK_CHARSET_GET_ID (charset, id);
1103 charset_primary = id;
4ed46869
KH
1104 return Qnil;
1105}
1106
3263d5a2
KH
1107
1108DEFUN ("charset-plist", Fcharset_plist, Scharset_plist, 1, 1, 0,
56a46d1d 1109 doc: /* Return the property list of CHARSET. */)
3263d5a2
KH
1110 (charset)
1111 Lisp_Object charset;
1112{
1113 Lisp_Object attrs;
1114
1115 CHECK_CHARSET_GET_ATTR (charset, attrs);
1116 return CHARSET_ATTR_PLIST (attrs);
1117}
1118
1119
1120DEFUN ("set-charset-plist", Fset_charset_plist, Sset_charset_plist, 2, 2, 0,
1121 doc: /* Set CHARSET's property list to PLIST. */)
1122 (charset, plist)
1123 Lisp_Object charset, plist;
1124{
1125 Lisp_Object attrs;
1126
1127 CHECK_CHARSET_GET_ATTR (charset, attrs);
1128 CHARSET_ATTR_PLIST (attrs) = plist;
1129 return plist;
1130}
1131
1132
dbbb237d 1133DEFUN ("unify-charset", Funify_charset, Sunify_charset, 1, 3, 0,
56a46d1d
DL
1134 doc: /* Unify characters of CHARSET with Unicode.
1135This means reading the relevant file and installing the table defined
dbbb237d
KH
1136by CHARSET's `:unify-map' property.
1137
1138Optional second arg UNIFY-MAP a file name string or vector that has
1139the same meaning of the `:unify-map' attribute of the function
1140`define-charset' (which see).
1141
1142Optional third argument DEUNIFY, if non-nil, means to de-unify CHARSET. */)
1143 (charset, unify_map, deunify)
1144 Lisp_Object charset, unify_map, deunify;
8a73a704 1145{
3263d5a2
KH
1146 int id;
1147 struct charset *cs;
1148
1149 CHECK_CHARSET_GET_ID (charset, id);
1150 cs = CHARSET_FROM_ID (id);
1151 if (CHARSET_METHOD (cs) == CHARSET_METHOD_MAP_DEFERRED)
1152 load_charset (cs);
dbbb237d
KH
1153 if (NILP (deunify)
1154 ? CHARSET_UNIFIED_P (cs) && ! NILP (CHARSET_DEUNIFIER (cs))
1155 : ! CHARSET_UNIFIED_P (cs))
3263d5a2 1156 return Qnil;
dbbb237d 1157
3263d5a2 1158 CHARSET_UNIFIED_P (cs) = 0;
dbbb237d
KH
1159 if (NILP (deunify))
1160 {
1161 if (CHARSET_METHOD (cs) != CHARSET_METHOD_OFFSET)
1162 error ("Can't unify charset: %s", XSYMBOL (charset)->name->data);
1163 if (NILP (unify_map))
1164 unify_map = CHARSET_UNIFY_MAP (cs);
1165 if (STRINGP (unify_map))
1166 load_charset_map_from_file (cs, unify_map, 2);
1167 else if (VECTORP (unify_map))
1168 load_charset_map_from_vector (cs, unify_map, 2);
1169 else if (NILP (unify_map))
1170 error ("No unify-map for charset");
1171 else
1172 error ("Bad unify-map arg");
1173 CHARSET_UNIFIED_P (cs) = 1;
1174 }
1175 else if (CHAR_TABLE_P (Vchar_unify_table))
1176 {
1177 int min_code = CHARSET_MIN_CODE (cs);
1178 int max_code = CHARSET_MAX_CODE (cs);
1179 int min_char = DECODE_CHAR (cs, min_code);
1180 int max_char = DECODE_CHAR (cs, max_code);
1181
1182 char_table_set_range (Vchar_unify_table, min_char, max_char, Qnil);
1183 }
1184
3263d5a2 1185 return Qnil;
8a73a704
KH
1186}
1187
3fac5a51
KH
1188DEFUN ("get-unused-iso-final-char", Fget_unused_iso_final_char,
1189 Sget_unused_iso_final_char, 2, 2, 0,
3263d5a2 1190 doc: /*
1721b6af 1191Return an unsed ISO final char for a charset of DIMENISION and CHARS.
fdb82f93
PJ
1192DIMENSION is the number of bytes to represent a character: 1 or 2.
1193CHARS is the number of characters in a dimension: 94 or 96.
1194
1195This final char is for private use, thus the range is `0' (48) .. `?' (63).
1721b6af 1196If there's no unused final char for the specified kind of charset,
fdb82f93
PJ
1197return nil. */)
1198 (dimension, chars)
3fac5a51
KH
1199 Lisp_Object dimension, chars;
1200{
1201 int final_char;
1202
b7826503
PJ
1203 CHECK_NUMBER (dimension);
1204 CHECK_NUMBER (chars);
3263d5a2
KH
1205 if (XINT (dimension) != 1 && XINT (dimension) != 2 && XINT (dimension) != 3)
1206 args_out_of_range_3 (dimension, make_number (1), make_number (3));
3fac5a51 1207 if (XINT (chars) != 94 && XINT (chars) != 96)
3263d5a2 1208 args_out_of_range_3 (chars, make_number (94), make_number (96));
3fac5a51 1209 for (final_char = '0'; final_char <= '?'; final_char++)
3263d5a2
KH
1210 if (ISO_CHARSET_TABLE (XINT (dimension), XINT (chars), final_char) < 0)
1211 break;
3fac5a51
KH
1212 return (final_char <= '?' ? make_number (final_char) : Qnil);
1213}
1214
3263d5a2
KH
1215static void
1216check_iso_charset_parameter (dimension, chars, final_char)
1217 Lisp_Object dimension, chars, final_char;
4ed46869 1218{
3263d5a2
KH
1219 CHECK_NATNUM (dimension);
1220 CHECK_NATNUM (chars);
1221 CHECK_NATNUM (final_char);
4ed46869 1222
3263d5a2
KH
1223 if (XINT (dimension) > 3)
1224 error ("Invalid DIMENSION %d, it should be 1, 2, or 3", XINT (dimension));
4ed46869
KH
1225 if (XINT (chars) != 94 && XINT (chars) != 96)
1226 error ("Invalid CHARS %d, it should be 94 or 96", XINT (chars));
3263d5a2 1227 if (XINT (final_char) < '0' || XINT (final_char) > '~')
4ed46869 1228 error ("Invalid FINAL-CHAR %c, it should be `0'..`~'", XINT (chars));
3263d5a2
KH
1229}
1230
1231
1232DEFUN ("declare-equiv-charset", Fdeclare_equiv_charset, Sdeclare_equiv_charset,
1233 4, 4, 0,
1234 doc: /*
1235Declare a charset of DIMENSION, CHARS, FINAL-CHAR is the same as CHARSET.
56a46d1d 1236CHARSET should be defined by `define-charset' in advance. */)
3263d5a2
KH
1237 (dimension, chars, final_char, charset)
1238 Lisp_Object dimension, chars, final_char, charset;
1239{
1240 int id;
4ed46869 1241
3263d5a2
KH
1242 CHECK_CHARSET_GET_ID (charset, id);
1243 check_iso_charset_parameter (dimension, chars, final_char);
1244
16fed1fc 1245 ISO_CHARSET_TABLE (XINT (dimension), XINT (chars), XINT (final_char)) = id;
4ed46869
KH
1246 return Qnil;
1247}
1248
3263d5a2 1249
2e344af3
KH
1250/* Return information about charsets in the text at PTR of NBYTES
1251 bytes, which are NCHARS characters. The value is:
f6302ac9 1252
cfe34140 1253 0: Each character is represented by one byte. This is always
3263d5a2
KH
1254 true for a unibyte string. For a multibyte string, true if
1255 it contains only ASCII characters.
1256
28c026cd
DL
1257 1: No charsets other than ascii, control-1, and latin-1 are
1258 found.
1d67c29b 1259
3263d5a2
KH
1260 2: Otherwise.
1261*/
4ed46869
KH
1262
1263int
3263d5a2
KH
1264string_xstring_p (string)
1265 Lisp_Object string;
4ed46869 1266{
dbbb237d
KH
1267 const unsigned char *p = XSTRING (string)->data;
1268 const unsigned char *endp = p + STRING_BYTES (XSTRING (string));
3263d5a2
KH
1269 struct charset *charset;
1270
1271 if (XSTRING (string)->size == STRING_BYTES (XSTRING (string)))
1272 return 0;
1273
1274 charset = CHARSET_FROM_ID (charset_iso_8859_1);
1275 while (p < endp)
0282eb69 1276 {
3263d5a2 1277 int c = STRING_CHAR_ADVANCE (p);
2e344af3 1278
3263d5a2
KH
1279 if (ENCODE_CHAR (charset, c) < 0)
1280 return 2;
0282eb69 1281 }
3263d5a2
KH
1282 return 1;
1283}
05505664 1284
05505664 1285
3263d5a2 1286/* Find charsets in the string at PTR of NCHARS and NBYTES.
4ed46869 1287
3263d5a2
KH
1288 CHARSETS is a vector. Each element is a cons of CHARSET and
1289 FOUND-FLAG. CHARSET is a charset id, and FOUND-FLAG is nil or t.
1290 FOUND-FLAG t (or nil) means that the corresponding charset is
1291 already found (or not yet found).
2e344af3 1292
3263d5a2 1293 It may lookup a translation table TABLE if supplied. */
2e344af3 1294
3263d5a2
KH
1295static void
1296find_charsets_in_text (ptr, nchars, nbytes, charsets, table)
dbbb237d 1297 const unsigned char *ptr;
3263d5a2
KH
1298 int nchars, nbytes;
1299 Lisp_Object charsets, table;
1300{
dbbb237d 1301 const unsigned char *pend = ptr + nbytes;
3263d5a2
KH
1302 int ncharsets = ASIZE (charsets);
1303
1304 if (nchars == nbytes)
1305 return;
1306
1307 while (ptr < pend)
1308 {
1309 int c = STRING_CHAR_ADVANCE (ptr);
1310 int i;
1311 int all_found = 1;
1312 Lisp_Object elt;
1313
1314 if (!NILP (table))
1315 c = translate_char (table, c);
1316 for (i = 0; i < ncharsets; i++)
1317 {
1318 elt = AREF (charsets, i);
1319 if (NILP (XCDR (elt)))
1320 {
1321 struct charset *charset = CHARSET_FROM_ID (XINT (XCAR (elt)));
1322
1323 if (ENCODE_CHAR (charset, c) != CHARSET_INVALID_CODE (charset))
1324 XCDR (elt) = Qt;
1325 else
1326 all_found = 0;
1327 }
4ed46869 1328 }
3263d5a2
KH
1329 if (all_found)
1330 break;
4ed46869 1331 }
4ed46869
KH
1332}
1333
28c026cd 1334/* Fixme: returns nil for unibyte. */
4ed46869 1335DEFUN ("find-charset-region", Ffind_charset_region, Sfind_charset_region,
23d2a7f1 1336 2, 3, 0,
fdb82f93
PJ
1337 doc: /* Return a list of charsets in the region between BEG and END.
1338BEG and END are buffer positions.
1339Optional arg TABLE if non-nil is a translation table to look up.
1340
fdb82f93
PJ
1341If the current buffer is unibyte, the returned list may contain
1342only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
1343 (beg, end, table)
23d2a7f1 1344 Lisp_Object beg, end, table;
4ed46869 1345{
3263d5a2 1346 Lisp_Object charsets;
6ae1f27e 1347 int from, from_byte, to, stop, stop_byte, i;
4ed46869
KH
1348 Lisp_Object val;
1349
1350 validate_region (&beg, &end);
1351 from = XFASTINT (beg);
1352 stop = to = XFASTINT (end);
6ae1f27e 1353
4ed46869 1354 if (from < GPT && GPT < to)
6ae1f27e
RS
1355 {
1356 stop = GPT;
1357 stop_byte = GPT_BYTE;
1358 }
1359 else
1360 stop_byte = CHAR_TO_BYTE (stop);
1361
1362 from_byte = CHAR_TO_BYTE (from);
1363
3263d5a2
KH
1364 charsets = Fmake_vector (make_number (charset_table_used), Qnil);
1365 for (i = 0; i < charset_table_used; i++)
1366 ASET (charsets, i, Fcons (make_number (i), Qnil));
1367
4ed46869
KH
1368 while (1)
1369 {
3263d5a2
KH
1370 find_charsets_in_text (BYTE_POS_ADDR (from_byte), stop - from,
1371 stop_byte - from_byte, charsets, table);
4ed46869 1372 if (stop < to)
6ae1f27e
RS
1373 {
1374 from = stop, from_byte = stop_byte;
1375 stop = to, stop_byte = CHAR_TO_BYTE (stop);
1376 }
4ed46869
KH
1377 else
1378 break;
1379 }
6ae1f27e 1380
4ed46869 1381 val = Qnil;
3263d5a2
KH
1382 for (i = charset_table_used - 1; i >= 0; i--)
1383 if (!NILP (XCDR (AREF (charsets, i))))
1384 val = Fcons (CHARSET_NAME (charset_table + i), val);
4ed46869
KH
1385 return val;
1386}
1387
28c026cd 1388/* Fixme: returns nil for unibyte. */
4ed46869 1389DEFUN ("find-charset-string", Ffind_charset_string, Sfind_charset_string,
23d2a7f1 1390 1, 2, 0,
fdb82f93
PJ
1391 doc: /* Return a list of charsets in STR.
1392Optional arg TABLE if non-nil is a translation table to look up.
1393
fdb82f93 1394If STR is unibyte, the returned list may contain
3263d5a2 1395only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
fdb82f93 1396 (str, table)
23d2a7f1 1397 Lisp_Object str, table;
4ed46869 1398{
3263d5a2 1399 Lisp_Object charsets;
4ed46869
KH
1400 int i;
1401 Lisp_Object val;
1402
b7826503 1403 CHECK_STRING (str);
87b089ad 1404
3263d5a2 1405 charsets = Fmake_vector (make_number (charset_table_used), Qnil);
dcd50550
KH
1406 for (i = 0; i < charset_table_used; i++)
1407 ASET (charsets, i, Fcons (make_number (i), Qnil));
3263d5a2
KH
1408 find_charsets_in_text (XSTRING (str)->data, XSTRING (str)->size,
1409 STRING_BYTES (XSTRING (str)), charsets, table);
2e344af3 1410
4ed46869 1411 val = Qnil;
3263d5a2
KH
1412 for (i = charset_table_used - 1; i >= 0; i--)
1413 if (!NILP (XCDR (AREF (charsets, i))))
1414 val = Fcons (CHARSET_NAME (charset_table + i), val);
4ed46869
KH
1415 return val;
1416}
2e344af3 1417
4ed46869 1418\f
3263d5a2
KH
1419
1420/* Return a character correponding to the code-point CODE of
1421 CHARSET. */
1422
1423int
1424decode_char (charset, code)
1425 struct charset *charset;
1426 unsigned code;
4ed46869 1427{
3263d5a2
KH
1428 int c, char_index;
1429 enum charset_method method = CHARSET_METHOD (charset);
ac4137cc 1430
3263d5a2
KH
1431 if (code < CHARSET_MIN_CODE (charset) || code > CHARSET_MAX_CODE (charset))
1432 return -1;
4ed46869 1433
3263d5a2 1434 if (method == CHARSET_METHOD_MAP_DEFERRED)
ac4137cc 1435 {
3263d5a2
KH
1436 load_charset (charset);
1437 method = CHARSET_METHOD (charset);
ac4137cc 1438 }
4ed46869 1439
374c5cfd
KH
1440 if (method == CHARSET_METHOD_SUBSET)
1441 {
1442 Lisp_Object subset_info;
1443
1444 subset_info = CHARSET_SUBSET (charset);
1445 charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
1446 code -= XINT (AREF (subset_info, 3));
1447 if (code < XFASTINT (AREF (subset_info, 1))
1448 || code > XFASTINT (AREF (subset_info, 2)))
1449 c = -1;
1450 else
1451 c = DECODE_CHAR (charset, code);
1452 }
1453 else if (method == CHARSET_METHOD_SUPERSET)
2e344af3 1454 {
3263d5a2 1455 Lisp_Object parents;
4ed46869 1456
374c5cfd 1457 parents = CHARSET_SUPERSET (charset);
3263d5a2
KH
1458 c = -1;
1459 for (; CONSP (parents); parents = XCDR (parents))
1460 {
1461 int id = XINT (XCAR (XCAR (parents)));
1462 int code_offset = XINT (XCDR (XCAR (parents)));
374c5cfd 1463 unsigned this_code = code - code_offset;
9d3d8cba 1464
3263d5a2
KH
1465 charset = CHARSET_FROM_ID (id);
1466 if ((c = DECODE_CHAR (charset, this_code)) >= 0)
1467 break;
1468 }
1469 }
1470 else
ac4137cc 1471 {
3263d5a2 1472 char_index = CODE_POINT_TO_INDEX (charset, code);
69f8de5b
KH
1473 if (char_index < 0)
1474 return -1;
3263d5a2
KH
1475
1476 if (method == CHARSET_METHOD_MAP)
ac4137cc 1477 {
3263d5a2
KH
1478 Lisp_Object decoder;
1479
1480 decoder = CHARSET_DECODER (charset);
1481 if (! VECTORP (decoder))
1482 return -1;
1483 c = XINT (AREF (decoder, char_index));
ac4137cc
KH
1484 }
1485 else
1486 {
3263d5a2 1487 c = char_index + CHARSET_CODE_OFFSET (charset);
ac4137cc
KH
1488 }
1489 }
9d3d8cba 1490
3263d5a2
KH
1491 if (CHARSET_UNIFIED_P (charset)
1492 && c >= 0)
c449997d
KH
1493 {
1494 MAYBE_UNIFY_CHAR (c);
1495 }
d2665018 1496
3263d5a2 1497 return c;
d2665018
KH
1498}
1499
374c5cfd
KH
1500/* Variable used temporarily by the macro ENCODE_CHAR. */
1501Lisp_Object charset_work;
1bcc1567 1502
3263d5a2 1503/* Return a code-point of CHAR in CHARSET. If CHAR doesn't belong to
28c026cd
DL
1504 CHARSET, return CHARSET_INVALID_CODE (CHARSET). If STRICT is true,
1505 use CHARSET's strict_max_char instead of max_char. */
1bcc1567 1506
3263d5a2
KH
1507unsigned
1508encode_char (charset, c)
1509 struct charset *charset;
9b6a601f
KH
1510 int c;
1511{
3263d5a2
KH
1512 unsigned code;
1513 enum charset_method method = CHARSET_METHOD (charset);
8ac5a9cc 1514
3263d5a2 1515 if (CHARSET_UNIFIED_P (charset))
4ed46869 1516 {
374c5cfd 1517 Lisp_Object deunifier, deunified;
4ed46869 1518
3263d5a2
KH
1519 deunifier = CHARSET_DEUNIFIER (charset);
1520 if (! CHAR_TABLE_P (deunifier))
1521 {
dbbb237d 1522 Funify_charset (CHARSET_NAME (charset), Qnil, Qnil);
3263d5a2
KH
1523 deunifier = CHARSET_DEUNIFIER (charset);
1524 }
374c5cfd
KH
1525 deunified = CHAR_TABLE_REF (deunifier, c);
1526 if (! NILP (deunified))
1527 c = XINT (deunified);
4ed46869 1528 }
beeedaad 1529
3263d5a2
KH
1530 if (! CHARSET_FAST_MAP_REF ((c), charset->fast_map)
1531 || c < CHARSET_MIN_CHAR (charset) || c > CHARSET_MAX_CHAR (charset))
1532 return CHARSET_INVALID_CODE (charset);
beeedaad 1533
374c5cfd
KH
1534 if (method == CHARSET_METHOD_SUBSET)
1535 {
1536 Lisp_Object subset_info;
1537 struct charset *this_charset;
1538
1539 subset_info = CHARSET_SUBSET (charset);
1540 this_charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
1541 code = ENCODE_CHAR (this_charset, c);
1542 if (code == CHARSET_INVALID_CODE (this_charset)
1543 || code < XFASTINT (AREF (subset_info, 1))
1544 || code > XFASTINT (AREF (subset_info, 2)))
1545 return CHARSET_INVALID_CODE (charset);
1546 code += XINT (AREF (subset_info, 3));
1547 return code;
1548 }
1549
1550 if (method == CHARSET_METHOD_SUPERSET)
859f2b3c 1551 {
3263d5a2 1552 Lisp_Object parents;
859f2b3c 1553
374c5cfd 1554 parents = CHARSET_SUPERSET (charset);
3263d5a2 1555 for (; CONSP (parents); parents = XCDR (parents))
beeedaad 1556 {
3263d5a2
KH
1557 int id = XINT (XCAR (XCAR (parents)));
1558 int code_offset = XINT (XCDR (XCAR (parents)));
1559 struct charset *this_charset = CHARSET_FROM_ID (id);
beeedaad 1560
3263d5a2 1561 code = ENCODE_CHAR (this_charset, c);
dbbb237d
KH
1562 if (code != CHARSET_INVALID_CODE (this_charset))
1563 return code + code_offset;
beeedaad 1564 }
3263d5a2
KH
1565 return CHARSET_INVALID_CODE (charset);
1566 }
99529c2c 1567
3263d5a2 1568 if (method == CHARSET_METHOD_MAP_DEFERRED)
beeedaad 1569 {
3263d5a2
KH
1570 load_charset (charset);
1571 method = CHARSET_METHOD (charset);
859f2b3c 1572 }
beeedaad 1573
3263d5a2 1574 if (method == CHARSET_METHOD_MAP)
3f62427c 1575 {
3263d5a2 1576 Lisp_Object encoder;
beeedaad 1577 Lisp_Object val;
beeedaad 1578
3263d5a2
KH
1579 encoder = CHARSET_ENCODER (charset);
1580 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset)))
1581 return CHARSET_INVALID_CODE (charset);
1582 val = CHAR_TABLE_REF (encoder, c);
374c5cfd
KH
1583 if (NILP (val))
1584 return CHARSET_INVALID_CODE (charset);
e9ce014c
KH
1585 code = XINT (val);
1586 if (! CHARSET_COMPACT_CODES_P (charset))
1587 code = INDEX_TO_CODE_POINT (charset, code);
3263d5a2 1588 }
820ee249 1589 else /* method == CHARSET_METHOD_OFFSET */
beeedaad 1590 {
3263d5a2
KH
1591 code = c - CHARSET_CODE_OFFSET (charset);
1592 code = INDEX_TO_CODE_POINT (charset, code);
3f62427c 1593 }
beeedaad 1594
3263d5a2 1595 return code;
3f62427c
KH
1596}
1597
4ed46869 1598
3263d5a2
KH
1599DEFUN ("decode-char", Fdecode_char, Sdecode_char, 2, 3, 0,
1600 doc: /* Decode the pair of CHARSET and CODE-POINT into a character.
1601Return nil if CODE-POINT is not valid in CHARSET.
4ed46869 1602
3263d5a2
KH
1603CODE-POINT may be a cons (HIGHER-16-BIT-VALUE . LOWER-16-BIT-VALUE).
1604
1605Optional argument RESTRICTION specifies a way to map the pair of CCS
1606and CODE-POINT to a chracter. Currently not supported and just ignored. */)
1607 (charset, code_point, restriction)
1608 Lisp_Object charset, code_point, restriction;
4ed46869 1609{
3263d5a2
KH
1610 int c, id;
1611 unsigned code;
1612 struct charset *charsetp;
4ed46869 1613
3263d5a2
KH
1614 CHECK_CHARSET_GET_ID (charset, id);
1615 if (CONSP (code_point))
1616 {
1617 CHECK_NATNUM (XCAR (code_point));
1618 CHECK_NATNUM (XCDR (code_point));
69f8de5b 1619 code = (XINT (XCAR (code_point)) << 16) | (XINT (XCDR (code_point)));
3263d5a2
KH
1620 }
1621 else
1622 {
1623 CHECK_NATNUM (code_point);
1624 code = XINT (code_point);
1625 }
1626 charsetp = CHARSET_FROM_ID (id);
1627 c = DECODE_CHAR (charsetp, code);
1628 return (c >= 0 ? make_number (c) : Qnil);
4ed46869
KH
1629}
1630
046b1f03 1631
3263d5a2
KH
1632DEFUN ("encode-char", Fencode_char, Sencode_char, 2, 3, 0,
1633 doc: /* Encode the character CH into a code-point of CHARSET.
1634Return nil if CHARSET doesn't include CH.
17e7ef1b 1635
3263d5a2
KH
1636Optional argument RESTRICTION specifies a way to map CHAR to a
1637code-point in CCS. Currently not supported and just ignored. */)
1638 (ch, charset, restriction)
1639 Lisp_Object ch, charset, restriction;
1640{
16fed1fc 1641 int id;
3263d5a2
KH
1642 unsigned code;
1643 struct charset *charsetp;
046b1f03 1644
3263d5a2
KH
1645 CHECK_CHARSET_GET_ID (charset, id);
1646 CHECK_NATNUM (ch);
3263d5a2 1647 charsetp = CHARSET_FROM_ID (id);
16fed1fc 1648 code = ENCODE_CHAR (charsetp, XINT (ch));
3263d5a2
KH
1649 if (code == CHARSET_INVALID_CODE (charsetp))
1650 return Qnil;
1651 if (code > 0x7FFFFFF)
1652 return Fcons (make_number (code >> 16), make_number (code & 0xFFFF));
1653 return make_number (code);
6ae1f27e 1654}
9036eb45 1655
87b089ad 1656
b121a744
KH
1657DEFUN ("make-char", Fmake_char, Smake_char, 1, 5, 0,
1658 doc:
1659 /* Return a character of CHARSET whose position codes are CODEn.
1660
1661CODE1 through CODE4 are optional, but if you don't supply sufficient
1662position codes, it is assumed that the minimum code in each dimension
04c2f2c5 1663is specified. */)
b121a744
KH
1664 (charset, code1, code2, code3, code4)
1665 Lisp_Object charset, code1, code2, code3, code4;
87b089ad 1666{
3263d5a2
KH
1667 int id, dimension;
1668 struct charset *charsetp;
b121a744
KH
1669 unsigned code;
1670 int c;
87b089ad 1671
3263d5a2
KH
1672 CHECK_CHARSET_GET_ID (charset, id);
1673 charsetp = CHARSET_FROM_ID (id);
87b089ad 1674
b121a744
KH
1675 dimension = CHARSET_DIMENSION (charsetp);
1676 if (NILP (code1))
d47073ca
KH
1677 code = (CHARSET_ASCII_COMPATIBLE_P (charsetp)
1678 ? 0 : CHARSET_MIN_CODE (charsetp));
3263d5a2 1679 else
87b089ad 1680 {
b121a744
KH
1681 CHECK_NATNUM (code1);
1682 if (XFASTINT (code1) >= 0x100)
1683 args_out_of_range (make_number (0xFF), code1);
1684 code = XFASTINT (code1);
2e344af3 1685
b0a1e45e 1686 if (dimension > 1)
b121a744
KH
1687 {
1688 code <<= 8;
b0a1e45e
KH
1689 if (NILP (code2))
1690 code |= charsetp->code_space[(dimension - 2) * 4];
b121a744
KH
1691 else
1692 {
b0a1e45e
KH
1693 CHECK_NATNUM (code2);
1694 if (XFASTINT (code2) >= 0x100)
1695 args_out_of_range (make_number (0xFF), code2);
1696 code |= XFASTINT (code2);
b121a744
KH
1697 }
1698
b0a1e45e 1699 if (dimension > 2)
b121a744
KH
1700 {
1701 code <<= 8;
b0a1e45e
KH
1702 if (NILP (code3))
1703 code |= charsetp->code_space[(dimension - 3) * 4];
b121a744
KH
1704 else
1705 {
b0a1e45e
KH
1706 CHECK_NATNUM (code3);
1707 if (XFASTINT (code3) >= 0x100)
1708 args_out_of_range (make_number (0xFF), code3);
1709 code |= XFASTINT (code3);
1710 }
1711
1712 if (dimension > 3)
1713 {
1714 code <<= 8;
1715 if (NILP (code4))
1716 code |= charsetp->code_space[0];
1717 else
1718 {
1719 CHECK_NATNUM (code4);
1720 if (XFASTINT (code4) >= 0x100)
1721 args_out_of_range (make_number (0xFF), code4);
1722 code |= XFASTINT (code4);
1723 }
b121a744
KH
1724 }
1725 }
1726 }
1727 }
3263d5a2 1728
b121a744
KH
1729 if (CHARSET_ISO_FINAL (charsetp) >= 0)
1730 code &= 0x7F7F7F7F;
1731 c = DECODE_CHAR (charsetp, code);
1732 if (c < 0)
1733 error ("Invalid code(s)");
3263d5a2 1734 return make_number (c);
2e344af3
KH
1735}
1736
3263d5a2
KH
1737
1738/* Return the first charset in CHARSET_LIST that contains C.
1739 CHARSET_LIST is a list of charset IDs. If it is nil, use
1740 Vcharset_ordered_list. */
1741
1742struct charset *
1743char_charset (c, charset_list, code_return)
1744 int c;
1745 Lisp_Object charset_list;
1746 unsigned *code_return;
2e344af3 1747{
3263d5a2
KH
1748 if (NILP (charset_list))
1749 charset_list = Vcharset_ordered_list;
2e344af3 1750
3263d5a2 1751 while (CONSP (charset_list))
2e344af3 1752 {
3263d5a2
KH
1753 struct charset *charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
1754 unsigned code = ENCODE_CHAR (charset, c);
1755
1756 if (code != CHARSET_INVALID_CODE (charset))
1757 {
1758 if (code_return)
1759 *code_return = code;
1760 return charset;
1761 }
1762 charset_list = XCDR (charset_list);
2e344af3 1763 }
3263d5a2 1764 return NULL;
2e344af3
KH
1765}
1766
2e344af3 1767
56a46d1d 1768/* Fixme: `unknown' can't happen now? */
3263d5a2 1769DEFUN ("split-char", Fsplit_char, Ssplit_char, 1, 1, 0,
04c2f2c5 1770 doc: /*Return list of charset and one to three position-codes of CHAR.
16fed1fc 1771If CHAR is invalid as a character code, return a list `(unknown CHAR)'. */)
3263d5a2
KH
1772 (ch)
1773 Lisp_Object ch;
2e344af3 1774{
3263d5a2
KH
1775 struct charset *charset;
1776 int c, dimension;
1777 unsigned code;
1778 Lisp_Object val;
1779
1780 CHECK_CHARACTER (ch);
1781 c = XFASTINT (ch);
1782 charset = CHAR_CHARSET (c);
1783 if (! charset)
1784 return Fcons (intern ("unknown"), Fcons (ch, Qnil));
1785
1786 code = ENCODE_CHAR (charset, c);
1787 if (code == CHARSET_INVALID_CODE (charset))
1788 abort ();
1789 dimension = CHARSET_DIMENSION (charset);
1790 val = (dimension == 1 ? Fcons (make_number (code), Qnil)
1791 : dimension == 2 ? Fcons (make_number (code >> 8),
1792 Fcons (make_number (code & 0xFF), Qnil))
1793 : Fcons (make_number (code >> 16),
1794 Fcons (make_number ((code >> 8) & 0xFF),
1795 Fcons (make_number (code & 0xFF), Qnil))));
1796 return Fcons (CHARSET_NAME (charset), val);
2e344af3 1797}
87b089ad 1798
740f080d 1799
3263d5a2
KH
1800DEFUN ("char-charset", Fchar_charset, Schar_charset, 1, 1, 0,
1801 doc: /* Return the charset of highest priority that contains CHAR. */)
1802 (ch)
1803 Lisp_Object ch;
740f080d 1804{
3263d5a2 1805 struct charset *charset;
740f080d 1806
3263d5a2
KH
1807 CHECK_CHARACTER (ch);
1808 charset = CHAR_CHARSET (XINT (ch));
1809 return (CHARSET_NAME (charset));
740f080d
KH
1810}
1811
2e344af3 1812
3263d5a2
KH
1813DEFUN ("charset-after", Fcharset_after, Scharset_after, 0, 1, 0,
1814 doc: /*
1815Return charset of a character in the current buffer at position POS.
1816If POS is nil, it defauls to the current point.
1817If POS is out of range, the value is nil. */)
1818 (pos)
1819 Lisp_Object pos;
2e344af3 1820{
3263d5a2
KH
1821 Lisp_Object ch;
1822 struct charset *charset;
1823
1824 ch = Fchar_after (pos);
1825 if (! INTEGERP (ch))
1826 return ch;
1827 charset = CHAR_CHARSET (XINT (ch));
1828 return (CHARSET_NAME (charset));
87b089ad
RS
1829}
1830
2e344af3 1831
3263d5a2
KH
1832DEFUN ("iso-charset", Fiso_charset, Siso_charset, 3, 3, 0,
1833 doc: /*
1834Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR.
1835
1836ISO 2022's designation sequence (escape sequence) distinguishes charsets
1837by their DIMENSION, CHARS, and FINAL-CHAR,
1838where as Emacs distinguishes them by charset symbol.
1839See the documentation of the function `charset-info' for the meanings of
1840DIMENSION, CHARS, and FINAL-CHAR. */)
1841 (dimension, chars, final_char)
1842 Lisp_Object dimension, chars, final_char;
2e344af3 1843{
3263d5a2 1844 int id;
2e344af3 1845
3263d5a2
KH
1846 check_iso_charset_parameter (dimension, chars, final_char);
1847 id = ISO_CHARSET_TABLE (XFASTINT (dimension), XFASTINT (chars),
1848 XFASTINT (final_char));
1849 return (id >= 0 ? CHARSET_NAME (CHARSET_FROM_ID (id)) : Qnil);
2e344af3
KH
1850}
1851
3263d5a2
KH
1852
1853DEFUN ("clear-charset-maps", Fclear_charset_maps, Sclear_charset_maps,
1854 0, 0, 0,
1855 doc: /*
1856Clear encoder and decoder of charsets that are loaded from mapfiles. */)
1857 ()
4ed46869 1858{
53316e55 1859 int i;
3263d5a2
KH
1860 struct charset *charset;
1861 Lisp_Object attrs;
4ed46869 1862
3263d5a2 1863 for (i = 0; i < charset_table_used; i++)
4ed46869 1864 {
3263d5a2
KH
1865 charset = CHARSET_FROM_ID (i);
1866 attrs = CHARSET_ATTRIBUTES (charset);
1867
1868 if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP)
1869 {
1870 CHARSET_ATTR_DECODER (attrs) = Qnil;
1871 CHARSET_ATTR_ENCODER (attrs) = Qnil;
1872 CHARSET_METHOD (charset) = CHARSET_METHOD_MAP_DEFERRED;
1873 }
1874
1875 if (CHARSET_UNIFIED_P (charset))
1876 CHARSET_ATTR_DEUNIFIER (attrs) = Qnil;
5729c92f
KH
1877 }
1878
3263d5a2 1879 if (CHAR_TABLE_P (Vchar_unified_charset_table))
5729c92f 1880 {
3263d5a2
KH
1881 Foptimize_char_table (Vchar_unified_charset_table);
1882 Vchar_unify_table = Vchar_unified_charset_table;
1883 Vchar_unified_charset_table = Qnil;
4ed46869
KH
1884 }
1885
3263d5a2 1886 return Qnil;
4ed46869
KH
1887}
1888
8ddf5e57
DL
1889DEFUN ("charset-priority-list", Fcharset_priority_list,
1890 Scharset_priority_list, 0, 1, 0,
1891 doc: /* Return the list of charsets ordered by priority.
1892HIGHESTP non-nil means just return the highest priority one. */)
1893 (highestp)
1894 Lisp_Object highestp;
1895{
1896 Lisp_Object val = Qnil, list = Vcharset_ordered_list;
1897
1898 if (!NILP (highestp))
16fed1fc 1899 return CHARSET_NAME (CHARSET_FROM_ID (XINT (Fcar (list))));
8ddf5e57
DL
1900
1901 while (!NILP (list))
1902 {
16fed1fc 1903 val = Fcons (CHARSET_NAME (CHARSET_FROM_ID (XINT (XCAR (list)))), val);
8ddf5e57
DL
1904 list = XCDR (list);
1905 }
1906 return Fnreverse (val);
1907}
1908
1909DEFUN ("set-charset-priority", Fset_charset_priority, Sset_charset_priority,
1910 1, MANY, 0,
1911 doc: /* Assign higher priority to the charsets given as arguments.
1912usage: (set-charset-priority &rest charsets) */)
1913 (nargs, args)
1914 int nargs;
1915 Lisp_Object *args;
1916{
16fed1fc
DL
1917 Lisp_Object new_head = Qnil, old_list, arglist[2];
1918 int i, id;
8ddf5e57
DL
1919
1920 old_list = Fcopy_sequence (Vcharset_ordered_list);
1921 for (i = 0; i < nargs; i++)
1922 {
1923 CHECK_CHARSET_GET_ID (args[i], id);
16fed1fc
DL
1924 old_list = Fdelq (make_number (id), old_list);
1925 new_head = Fcons (make_number (id), new_head);
8ddf5e57
DL
1926 }
1927 arglist[0] = Fnreverse (new_head);
1928 arglist[1] = old_list;
1929 Vcharset_ordered_list = Fnconc (2, arglist);
dbbb237d 1930 charset_ordered_list_tick++;
8ddf5e57
DL
1931 return Qnil;
1932}
4ed46869 1933\f
3263d5a2
KH
1934void
1935init_charset ()
4ed46869 1936{
4ed46869 1937
4ed46869
KH
1938}
1939
4ed46869 1940
dfcf069d 1941void
4ed46869
KH
1942init_charset_once ()
1943{
1944 int i, j, k;
1945
3263d5a2
KH
1946 for (i = 0; i < ISO_MAX_DIMENSION; i++)
1947 for (j = 0; j < ISO_MAX_CHARS; j++)
1948 for (k = 0; k < ISO_MAX_FINAL; k++)
1949 iso_charset_table[i][j][k] = -1;
1950
1951 for (i = 0; i < 255; i++)
1952 emacs_mule_charset[i] = NULL;
4ed46869 1953
7c7dceee
KH
1954 charset_jisx0201_roman = -1;
1955 charset_jisx0208_1978 = -1;
1956 charset_jisx0208 = -1;
1957
3263d5a2
KH
1958#if 0
1959 Vchar_charset_set = Fmake_char_table (Qnil, Qnil);
1960 CHAR_TABLE_SET (Vchar_charset_set, make_number (97), Qnil);
1961
1962 DEFSYM (Qcharset_encode_table, "charset-encode-table");
4ed46869
KH
1963
1964 /* Intern this now in case it isn't already done.
1965 Setting this variable twice is harmless.
1966 But don't staticpro it here--that is done in alloc.c. */
1967 Qchar_table_extra_slots = intern ("char-table-extra-slots");
1968
3263d5a2
KH
1969 /* Now we are ready to set up this property, so we can create syntax
1970 tables. */
1971 Fput (Qcharset_encode_table, Qchar_table_extra_slots, make_number (0));
1972#endif
4ed46869
KH
1973}
1974
1975#ifdef emacs
1976
dfcf069d 1977void
4ed46869
KH
1978syms_of_charset ()
1979{
3263d5a2
KH
1980 char *p;
1981
1982 DEFSYM (Qcharsetp, "charsetp");
1983
1984 DEFSYM (Qascii, "ascii");
1985 DEFSYM (Qunicode, "unicode");
1986 DEFSYM (Qeight_bit_control, "eight-bit-control");
1987 DEFSYM (Qeight_bit_graphic, "eight-bit-graphic");
1988 DEFSYM (Qiso_8859_1, "iso-8859-1");
1989
1990 DEFSYM (Qgl, "gl");
1991 DEFSYM (Qgr, "gr");
1992
1993 p = (char *) xmalloc (30000);
1994
1995 staticpro (&Vcharset_ordered_list);
1996 Vcharset_ordered_list = Qnil;
1997
1998 staticpro (&Viso_2022_charset_list);
1999 Viso_2022_charset_list = Qnil;
2000
2001 staticpro (&Vemacs_mule_charset_list);
2002 Vemacs_mule_charset_list = Qnil;
2003
2004 staticpro (&Vcharset_hash_table);
2005 Vcharset_hash_table = Fmakehash (Qeq);
2006
2007 charset_table_size = 128;
2008 charset_table = ((struct charset *)
2009 xmalloc (sizeof (struct charset) * charset_table_size));
2010 charset_table_used = 0;
2011
2012 staticpro (&Vchar_unified_charset_table);
2013 Vchar_unified_charset_table = Fmake_char_table (Qnil, make_number (-1));
2014
2015 defsubr (&Scharsetp);
2016 defsubr (&Smap_charset_chars);
2017 defsubr (&Sdefine_charset_internal);
2018 defsubr (&Sdefine_charset_alias);
2019 defsubr (&Sprimary_charset);
2020 defsubr (&Sset_primary_charset);
2021 defsubr (&Scharset_plist);
2022 defsubr (&Sset_charset_plist);
2023 defsubr (&Sunify_charset);
3fac5a51 2024 defsubr (&Sget_unused_iso_final_char);
4ed46869
KH
2025 defsubr (&Sdeclare_equiv_charset);
2026 defsubr (&Sfind_charset_region);
2027 defsubr (&Sfind_charset_string);
3263d5a2
KH
2028 defsubr (&Sdecode_char);
2029 defsubr (&Sencode_char);
4ed46869 2030 defsubr (&Ssplit_char);
3263d5a2 2031 defsubr (&Smake_char);
4ed46869 2032 defsubr (&Schar_charset);
90d7b74e 2033 defsubr (&Scharset_after);
4ed46869 2034 defsubr (&Siso_charset);
3263d5a2 2035 defsubr (&Sclear_charset_maps);
8ddf5e57
DL
2036 defsubr (&Scharset_priority_list);
2037 defsubr (&Sset_charset_priority);
3263d5a2
KH
2038
2039 DEFVAR_LISP ("charset-map-directory", &Vcharset_map_directory,
2040 doc: /* Directory of charset map files that come with GNU Emacs.
04c2f2c5 2041The default value is sub-directory "charsets" of `data-directory'. */);
3263d5a2
KH
2042 Vcharset_map_directory = Fexpand_file_name (build_string ("charsets"),
2043 Vdata_directory);
4ed46869
KH
2044
2045 DEFVAR_LISP ("charset-list", &Vcharset_list,
528623a0 2046 doc: /* List of all charsets ever defined. */);
3263d5a2
KH
2047 Vcharset_list = Qnil;
2048
2049 /* Make the prerequisite charset `ascii' and `unicode'. */
2050 {
2051 Lisp_Object args[charset_arg_max];
2052 Lisp_Object plist[14];
2053 Lisp_Object val;
2054
2055 plist[0] = intern (":name");
3263d5a2 2056 plist[2] = intern (":dimension");
3263d5a2 2057 plist[4] = intern (":code-space");
3263d5a2 2058 plist[6] = intern (":iso-final-char");
3263d5a2 2059 plist[8] = intern (":emacs-mule-id");
3263d5a2 2060 plist[10] = intern (":ascii-compatible-p");
820ee249
KH
2061 plist[12] = intern (":code-offset");
2062
2063 args[charset_arg_name] = Qascii;
2064 args[charset_arg_dimension] = make_number (1);
2065 val = Fmake_vector (make_number (8), make_number (0));
2066 ASET (val, 1, make_number (127));
2067 args[charset_arg_code_space] = val;
2068 args[charset_arg_min_code] = Qnil;
2069 args[charset_arg_max_code] = Qnil;
2070 args[charset_arg_iso_final] = make_number ('B');
2071 args[charset_arg_iso_revision] = Qnil;
2072 args[charset_arg_emacs_mule_id] = make_number (0);
2073 args[charset_arg_ascii_compatible_p] = Qt;
3263d5a2
KH
2074 args[charset_arg_supplementary_p] = Qnil;
2075 args[charset_arg_invalid_code] = Qnil;
820ee249 2076 args[charset_arg_code_offset] = make_number (0);
3263d5a2 2077 args[charset_arg_map] = Qnil;
374c5cfd
KH
2078 args[charset_arg_subset] = Qnil;
2079 args[charset_arg_superset] = Qnil;
3263d5a2
KH
2080 args[charset_arg_unify_map] = Qnil;
2081 /* The actual plist is set by mule-conf.el. */
820ee249
KH
2082 plist[1] = args[charset_arg_name];
2083 plist[3] = args[charset_arg_dimension];
2084 plist[5] = args[charset_arg_code_space];
2085 plist[7] = args[charset_arg_iso_final];
2086 plist[9] = args[charset_arg_emacs_mule_id];
2087 plist[11] = args[charset_arg_ascii_compatible_p];
2088 plist[13] = args[charset_arg_code_offset];
3263d5a2
KH
2089 args[charset_arg_plist] = Flist (14, plist);
2090 Fdefine_charset_internal (charset_arg_max, args);
16fed1fc 2091 charset_ascii = XINT (CHARSET_SYMBOL_ID (Qascii));
3263d5a2 2092
820ee249
KH
2093 args[charset_arg_name] = Qunicode;
2094 args[charset_arg_dimension] = make_number (3);
3263d5a2
KH
2095 val = Fmake_vector (make_number (8), make_number (0));
2096 ASET (val, 1, make_number (255));
2097 ASET (val, 3, make_number (255));
2098 ASET (val, 5, make_number (16));
820ee249
KH
2099 args[charset_arg_code_space] = val;
2100 args[charset_arg_min_code] = Qnil;
2101 args[charset_arg_max_code] = Qnil;
2102 args[charset_arg_iso_final] = Qnil;
3263d5a2 2103 args[charset_arg_iso_revision] = Qnil;
820ee249
KH
2104 args[charset_arg_emacs_mule_id] = Qnil;
2105 args[charset_arg_ascii_compatible_p] = Qt;
3263d5a2
KH
2106 args[charset_arg_supplementary_p] = Qnil;
2107 args[charset_arg_invalid_code] = Qnil;
820ee249 2108 args[charset_arg_code_offset] = make_number (0);
3263d5a2 2109 args[charset_arg_map] = Qnil;
374c5cfd
KH
2110 args[charset_arg_subset] = Qnil;
2111 args[charset_arg_superset] = Qnil;
3263d5a2
KH
2112 args[charset_arg_unify_map] = Qnil;
2113 /* The actual plist is set by mule-conf.el. */
820ee249
KH
2114 plist[1] = args[charset_arg_name];
2115 plist[3] = args[charset_arg_dimension];
2116 plist[5] = args[charset_arg_code_space];
2117 plist[7] = args[charset_arg_iso_final];
2118 plist[9] = args[charset_arg_emacs_mule_id];
2119 plist[11] = args[charset_arg_ascii_compatible_p];
2120 plist[13] = args[charset_arg_code_offset];
3263d5a2
KH
2121 args[charset_arg_plist] = Flist (14, plist);
2122 Fdefine_charset_internal (charset_arg_max, args);
16fed1fc 2123 charset_unicode = XINT (CHARSET_SYMBOL_ID (Qunicode));
3263d5a2 2124 }
4ed46869
KH
2125}
2126
2127#endif /* emacs */